c28x下使用定点和浮点计算乘加操作需要的Cycle
; C28x 32位定点计算需要14个Cycle;
;Y1=(M1*X1)>> Q + B1
MOVL XT,@M1
IMPYL P,XT,@X1
QMPYL ACC,XT,@X1
ASR64 ACC:P,#Q
ADDL ACC,@B1
MOVL @Y1,ACC
; Y2=(M2*X2)>> Q + B2
MOVL XT,@M2
IMPYL P,XT,@X2
QMPYL ACC,XT,@X2
ASR64 ACC:P,#Q
ADDL ACC,@B2
MOVL @Y2,ACC
; 14 cycles
使用未优化的32位浮点计算需要14个Cycle
MOV32 R0H,@M1
MOV32 R1H,@X1
MPYF32 R1H,R1H,R0H
|| MOV32 R0H,@B1
NOP ; delay for MPYF32
ADDF32 R1H,R1H,R0H
NOP ; delay for ADDF32
MOV32 @Y1,R1H
; Y2 = M2*X2 + B2
MOV32 R0H,@M2
MOV32 R1H,@X2
MPYF32 R1H,R1H,R0H
|| MOV32 R0H,@B2
NOP ; delay for MPYF32
ADDF32 R1H,R1H,R0H
NOP ; delay for ADDF32
MOV32 @Y2,R1H
使用 C28x+FPU 优化的 32位浮点,需要9个Cycle
; Y1 = M1*X1 + B1
; Y2 = M2*X2 + B2
MOV32 R2H,@X1
MOV32 R1H,@M1
MPYF32 R3H,R2H,R1H
|| MOV32 R0H,@M2
MOV32 R1H,@X2
MPYF32 R0H,R1H,R0H
|| MOV32 R4H,@B1
ADDF32 R1H,R4H,R3H
|| MOV32 R2H,@B2
ADDF32 R0H,R2H,R0H
MOV32 @Y1,R1H
MOV32 @Y2,R0H
|