- ;******************** (C) COPYRIGHT 2009 STMicroelectronics ********************
- ;* File Name : cr4_fft_256_stm32.s
- ;* Author : MCD Application Team
- ;* Version : V2.0.0
- ;* Date : 04/27/2009
- ;* Description : Optimized 256-point radix-4 complex FFT for Cortex-M3
- ;********************************************************************************
- ;* THE PRESENT FIRMWARE WHICH IS FOR GUIDANCE ONLY AIMS AT PROVIDING CUSTOMERS
- ;* WITH CODING INFORMATION REGARDING THEIR PRODUCTS IN ORDER FOR THEM TO SAVE TIME.
- ;* AS A RESULT, STMICROELECTRONICS SHALL NOT BE HELD LIABLE FOR ANY DIRECT,
- ;* INDIRECT OR CONSEQUENTIAL DAMAGES WITH RESPECT TO ANY CLAIMS ARISING FROM THE
- ;* CONTENT OF SUCH SOFTWARE AND/OR THE USE MADE BY CUSTOMERS OF THE CODING
- ;* INFORMATION CONTAINED HEREIN IN CONNECTION WITH THEIR PRODUCTS.
- ;*******************************************************************************/
- THUMB
- REQUIRE8
- PRESERVE8
- AREA |.text|, CODE, READONLY, ALIGN=2
-
- EXPORT cr4_fft_256_stm32
- EXTERN TableFFT
- pssK RN R0
- pssOUT RN R0
- pssX RN R1
- pssIN RN R1
- butternbr RN R2
- Nbin RN R2
- index RN R3
- Ar RN R4
- Ai RN R5
- Br RN R6
- Bi RN R7
- Cr RN R8
- Ci RN R9
- Dr RN R10
- Di RN R11
- cntrbitrev RN R12
- tmp RN R12
- pssIN2 RN R14
- tmp2 RN R14
- NPT EQU 256
- ;----------------------------- MACROS ----------------------------------------
-
- MACRO
- DEC $reg
- SUB $reg,$reg,#1
- MEND
- MACRO
- INC $reg
- ADD $reg,$reg,#1
- MEND
- MACRO
- QUAD $reg
- MOV $reg,$reg,LSL#2
- MEND
- ;sXi = *(PssX+1); sXr = *PssX; PssX += offset; PssX= R1
- MACRO
- LDR2Q $sXr,$sXi, $PssX, $offset
- LDRSH $sXi, [$PssX, #2]
- LDRSH $sXr, [$PssX]
- ADD $PssX, $PssX, $offset
- MEND
- ;!! Same macro, to be used when passing negative offset value !!
- MACRO
- LDR2Qm $sXr, $sXi, $PssX, $offset
- LDRSH $sXi, [$PssX, #2]
- LDRSH $sXr, [$PssX]
- SUB $PssX, $PssX, $offset
- MEND
- ;(PssX+1)= sXi; *PssX=sXr; PssX += offset;
- MACRO
- STR2Q $sXr, $sXi, $PssX, $offset
- STRH $sXi, [$PssX, #2]
- STRH $sXr, [$PssX]
- ADD $PssX, $PssX, $offset
- MEND
- ; YY = Cplx_conjugate_mul(Y,K)
- ; Y = YYr + i*YYi
- ; use the following trick
- ; K = (Kr-Ki) + i*Ki
- MACRO
- CXMUL_V7 $YYr, $YYi, $Yr, $Yi, $Kr, $Ki,$tmp,$tmp2
- SUB $tmp2, $Yi, $Yr ; sYi-sYr
- MUL $tmp, $tmp2, $Ki ; (sYi-sYr)*sKi
- ADD $tmp2, $Kr, $Ki, LSL#1 ; (sKr+sKi)
- MLA $YYi, $Yi, $Kr, $tmp ; lYYi = sYi*sKr-sYr*sKi
- MLA $YYr, $Yr, $tmp2, $tmp ; lYYr = sYr*sKr+sYi*sKi
- MEND
- ; Four point complex Fast Fourier Transform
- MACRO
- CXADDA4 $s
- ; (C,D) = (C+D, C-D)
- ADD Cr, Cr, Dr
- ADD Ci, Ci, Di
- SUB Dr, Cr, Dr, LSL#1
- SUB Di, Ci, Di, LSL#1
- ; (A,B) = (A+(B>>s), A-(B>>s))/4
- MOV Ar, Ar, ASR#2
- MOV Ai, Ai, ASR#2
- ADD Ar, Ar, Br, ASR#(2+$s)
- ADD Ai, Ai, Bi, ASR#(2+$s)
- SUB Br, Ar, Br, ASR#(1+$s)
- SUB Bi, Ai, Bi, ASR#(1+$s)
- ; (A,C) = (A+(C>>s)/4, A-(C>>s)/4)
- ADD Ar, Ar, Cr, ASR#(2+$s)
- ADD Ai, Ai, Ci, ASR#(2+$s)
- SUB Cr, Ar, Cr, ASR#(1+$s)
- SUB Ci, Ai, Ci, ASR#(1+$s)
- ; (B,D) = (B-i*(D>>s)/4, B+i*(D>>s)/4)
- ADD Br, Br, Di, ASR#(2+$s)
- SUB Bi, Bi, Dr, ASR#(2+$s)
- SUB Di, Br, Di, ASR#(1+$s)
- ADD Dr, Bi, Dr, ASR#(1+$s)
- MEND
-
- MACRO
- BUTFLY4ZERO_OPT $pIN,$offset, $pOUT
- LDRSH Ai, [$pIN, #2]
- LDRSH Ar, [$pIN]
- ADD $pIN, #NPT
- LDRSH Ci, [$pIN, #2]
- LDRSH Cr, [$pIN]
- ADD $pIN, #NPT
- LDRSH Bi, [$pIN, #2]
- LDRSH Br, [$pIN]
- ADD $pIN, #NPT
- LDRSH Di, [$pIN, #2]
- LDRSH Dr, [$pIN]
- ADD $pIN, #NPT
- ; (C,D) = (C+D, C-D)
- ADD Cr, Cr, Dr
- ADD Ci, Ci, Di
- SUB Dr, Cr, Dr, LSL#1 ; trick
- SUB Di, Ci, Di, LSL#1 ;trick
- ; (A,B) = (A+B)/4, (A-B)/4
- MOV Ar, Ar, ASR#2
- MOV Ai, Ai, ASR#2
- ADD Ar, Ar, Br, ASR#2
- ADD Ai, Ai, Bi, ASR#2
- SUB Br, Ar, Br, ASR#1
- SUB Bi, Ai, Bi, ASR#1
- ; (A,C) = (A+C)/4, (A-C)/4
- ADD Ar, Ar, Cr, ASR#2
- ADD Ai, Ai, Ci, ASR#2
- SUB Cr, Ar, Cr, ASR#1
- SUB Ci, Ai, Ci, ASR#1
- ; (B,D) = (B-i*D)/4, (B+i*D)/4
- ADD Br, Br, Di, ASR#2
- SUB Bi, Bi, Dr, ASR#2
- SUB Di, Br, Di, ASR#1
- ADD Dr, Bi, Dr, ASR#1
- ;
- STRH Ai, [$pOUT, #2]
- STRH Ar, [$pOUT], #4
- STRH Bi, [$pOUT, #2]
- STRH Br, [$pOUT], #4
- STRH Ci, [$pOUT, #2]
- STRH Cr, [$pOUT], #4
- STRH Dr, [$pOUT, #2] ; inversion here
- STRH Di, [$pOUT], #4
- MEND
- MACRO
- BUTFLY4_V7 $pssDin,$offset,$pssDout,$qformat,$pssK
- LDR2Qm Ar,Ai,$pssDin, $offset;-$offset
- LDR2Q Dr,Di,$pssK, #4
- ; format CXMUL_V7 YYr, YYi, Yr, Yi, Kr, Ki,tmp,tmp2
- CXMUL_V7 Dr,Di,Ar,Ai,Dr,Di,tmp,tmp2
- LDR2Qm Ar,Ai,$pssDin,$offset;-$offset
- LDR2Q Cr,Ci,$pssK,#4
- CXMUL_V7 Cr,Ci,Ar,Ai,Cr,Ci,tmp,tmp2
- LDR2Qm Ar,Ai, $pssDin, $offset;-$offset
- LDR2Q Br,Bi, $pssK, #4
- CXMUL_V7 Br,Bi,Ar,Ai,Br,Bi,tmp,tmp2
- LDR2Q Ar,Ai, $pssDin, #0
- CXADDA4 $qformat
- STRH Ai, [$pssDout, #2]
- STRH Ar, [$pssDout]
- ADD $pssDout, $pssDout, $offset
- STRH Bi, [$pssDout, #2]
- STRH Br, [$pssDout]
- ADD $pssDout, $pssDout, $offset
- STRH Ci, [$pssDout, #2]
- STRH Cr, [$pssDout]
- ADD $pssDout, $pssDout, $offset
- STRH Dr, [$pssDout, #2] ; inversion here
- STRH Di, [$pssDout], #4
- MEND
- ;------------------- CODE --------------------------------
- ;===============================================================================
- ;*******************************************************************************
- ;* Function Name : cr4_fft_256_stm32
- ;* Description : complex radix-4 256 points FFT
- ;* Input : - R0 = pssOUT: Output array .
- ;* - R1 = pssIN: Input array
- ;* - R2 = Nbin: =256 number of points, this optimized FFT function
- ;* can only convert 256 points.
- ;* Output : None
- ;* Return : None
- ;*******************************************************************************
- cr4_fft_256_stm32
- STMFD SP!, {R4-R11, LR}
-
- MOV cntrbitrev, #0
- MOV index,#0
-
- preloop_v7
- ADD pssIN2, pssIN, cntrbitrev, LSR#24 ;256-pts
- BUTFLY4ZERO_OPT pssIN2,Nbin,pssOUT
- INC index
- RBIT cntrbitrev,index
- CMP index,#64 ;256-pts
- BNE preloop_v7
- SUB pssX, pssOUT, Nbin, LSL#2
- MOV index, #16
- MOVS butternbr, Nbin, LSR#4 ;dual use of register
-
- ;------------------------------------------------------------------------------
- ; The FFT coefficients table can be stored into Flash or RAM.
- ; The following two lines of code allow selecting the method for coefficients
- ; storage.
- ; In the case of choosing coefficients in RAM, you have to:
- ; 1. Include the file table_fft.h, which is a part of the DSP library,
- ; in your main file.
- ; 2. Decomment the line LDR.W pssK, =TableFFT and comment the line
- ; ADRL pssK, TableFFT_V7
- ; 3. Comment all the TableFFT_V7 data.
- ;------------------------------------------------------------------------------
- ADRL pssK, TableFFT_V7 ; Coeff in Flash
- ;LDR.W pssK, =TableFFT ; Coeff in RAM
- ;................................
- passloop_v7
- STMFD SP!, {pssX,butternbr}
- ADD tmp, index, index, LSL#1
- ADD pssX, pssX, tmp
- SUB butternbr, butternbr, #1<<16
- ;................
- grouploop_v7
- ADD butternbr,butternbr,index,LSL#(16-2)
- ;.......
- butterloop_v7
- BUTFLY4_V7 pssX,index,pssX,14,pssK
- SUBS butternbr,butternbr, #1<<16
- BGE butterloop_v7
- ;.......
- ADD tmp, index, index, LSL#1
- ADD pssX, pssX, tmp
- DEC butternbr
- MOVS tmp2, butternbr, LSL#16
- IT NE
- SUBNE pssK, pssK, tmp
- BNE grouploop_v7
- ;................
- LDMFD sp!, {pssX, butternbr}
- QUAD index
- MOVS butternbr, butternbr, LSR#2 ; loop nbr /= radix
- BNE passloop_v7
- ;................................
- LDMFD SP!, {R4-R11, PC}
- ;=============================================================================
- TableFFT_V7
- ;N=16
- DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
- DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
- DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
- DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
- ; N=64
- DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
- DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c
- DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e
- DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e
- DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
- DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537
- DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21
- DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5
- DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
- DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5
- DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21
- DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537
- DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
- DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e
- DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e
- DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c
- ; N=256
- DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
- DCW 0x3b1e,0x04b5, 0x3e69,0x0192, 0x3cc8,0x0324
- DCW 0x35eb,0x0964, 0x3cc8,0x0324, 0x396b,0x0646
- DCW 0x306c,0x0e06, 0x3b1e,0x04b5, 0x35eb,0x0964
- DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c
- DCW 0x24ae,0x1709, 0x37af,0x07d6, 0x2e88,0x0f8d
- DCW 0x1e7e,0x1b5d, 0x35eb,0x0964, 0x2aaa,0x1294
- DCW 0x1824,0x1f8c, 0x341e,0x0af1, 0x26b3,0x1590
- DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e
- DCW 0x0b14,0x2760, 0x306c,0x0e06, 0x1e7e,0x1b5d
- DCW 0x0471,0x2afb, 0x2e88,0x0f8d, 0x1a46,0x1e2b
- DCW 0xfdc7,0x2e5a, 0x2c9d,0x1112, 0x15fe,0x20e7
- DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e
- DCW 0xf087,0x3453, 0x28b2,0x1413, 0x0d48,0x2620
- DCW 0xea02,0x36e5, 0x26b3,0x1590, 0x08df,0x289a
- DCW 0xe39c,0x392b, 0x24ae,0x1709, 0x0471,0x2afb
- DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
- DCW 0xd74e,0x3cc5, 0x2093,0x19ef, 0xfb8f,0x2f6c
- DCW 0xd178,0x3e15, 0x1e7e,0x1b5d, 0xf721,0x3179
- DCW 0xcbe2,0x3f0f, 0x1c64,0x1cc6, 0xf2b8,0x3368
- DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537
- DCW 0xc197,0x3ffb, 0x1824,0x1f8c, 0xea02,0x36e5
- DCW 0xbcf0,0x3fec, 0x15fe,0x20e7, 0xe5ba,0x3871
- DCW 0xb8a6,0x3f85, 0x13d5,0x223d, 0xe182,0x39db
- DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21
- DCW 0xb140,0x3daf, 0x0f79,0x24da, 0xd94d,0x3c42
- DCW 0xae2e,0x3c42, 0x0d48,0x2620, 0xd556,0x3d3f
- DCW 0xab8e,0x3a82, 0x0b14,0x2760, 0xd178,0x3e15
- DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5
- DCW 0xa7b1,0x3612, 0x06a9,0x29ce, 0xca15,0x3f4f
- DCW 0xa678,0x3368, 0x0471,0x2afb, 0xc695,0x3fb1
- DCW 0xa5bc,0x3076, 0x0239,0x2c21, 0xc338,0x3fec
- DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
- DCW 0xa5bc,0x29ce, 0xfdc7,0x2e5a, 0xbcf0,0x3fec
- DCW 0xa678,0x2620, 0xfb8f,0x2f6c, 0xba09,0x3fb1
- DCW 0xa7b1,0x223d, 0xf957,0x3076, 0xb74d,0x3f4f
- DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5
- DCW 0xab8e,0x19ef, 0xf4ec,0x3274, 0xb25e,0x3e15
- DCW 0xae2e,0x1590, 0xf2b8,0x3368, 0xb02d,0x3d3f
- DCW 0xb140,0x1112, 0xf087,0x3453, 0xae2e,0x3c42
- DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21
- DCW 0xb8a6,0x07d6, 0xec2b,0x3612, 0xaac8,0x39db
- DCW 0xbcf0,0x0324, 0xea02,0x36e5, 0xa963,0x3871
- DCW 0xc197,0xfe6e, 0xe7dc,0x37b0, 0xa834,0x36e5
- DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537
- DCW 0xcbe2,0xf50f, 0xe39c,0x392b, 0xa678,0x3368
- DCW 0xd178,0xf073, 0xe182,0x39db, 0xa5ed,0x3179
- DCW 0xd74e,0xebed, 0xdf6d,0x3a82, 0xa599,0x2f6c
- DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
- DCW 0xe39c,0xe33a, 0xdb52,0x3bb6, 0xa599,0x2afb
- DCW 0xea02,0xdf19, 0xd94d,0x3c42, 0xa5ed,0x289a
- DCW 0xf087,0xdb26, 0xd74e,0x3cc5, 0xa678,0x2620
- DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e
- DCW 0xfdc7,0xd3df, 0xd363,0x3daf, 0xa834,0x20e7
- DCW 0x0471,0xd094, 0xd178,0x3e15, 0xa963,0x1e2b
- DCW 0x0b14,0xcd8c, 0xcf94,0x3e72, 0xaac8,0x1b5d
- DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e
- DCW 0x1824,0xc850, 0xcbe2,0x3f0f, 0xae2e,0x1590
- DCW 0x1e7e,0xc625, 0xca15,0x3f4f, 0xb02d,0x1294
- DCW 0x24ae,0xc44a, 0xc851,0x3f85, 0xb25e,0x0f8d
- DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c
- DCW 0x306c,0xc18e, 0xc4e2,0x3fd4, 0xb74d,0x0964
- DCW 0x35eb,0xc0b1, 0xc338,0x3fec, 0xba09,0x0646
- DCW 0x3b1e,0xc02c, 0xc197,0x3ffb, 0xbcf0,0x0324
-
- END
- ;******************* (C) COPYRIGHT 2009 STMicroelectronics *****END OF FILE****