无聊写个32位乘32位乘法函数
用到浮点数计算,但是来会转换比较麻烦,干脆用整数了。。浮点数在线转换 https://www.h-schmidt.net/FloatConverter/IEEE754.html
别拍砖只是瞎写,效率很低。 没办法不会算法也学不会,只能上土法了。
三十二位乘三十二位等于六十四位
测试下 R2 0XFFFFFFFF ×R3 0XFFFFFFFF=R11 0XFFFFFFFE(高32位)R12 0X00000001 (低32位)
再测个 0X44DD22×0X44DD22=0X00001286 0X3913B884
chengfa64:
@入R2 乘以 R3
@出 R11高32 , R12低32
@ FFFFFFFF × FFFFFFFF
@ F F F E 0 0 0 1
@ F F F E 0 0 0 1
@ F F F E 0 0 0 1
@ F F F E 0 0 0 1
@ F F F F F F F E 0 0 0 0 0 0 0 1
push {r0-r7,lr}
mov r7, sp
adds r7, # 0x50
mov r0, r2
bl gaodi16
mov r12, r0 @ 高
mov r11, r1
mov r0, r3
bl gaodi16
mov r10, r0
mov r9, r1
mov r0, r12
mov r1, r10
movs r6, # 0
bl chengfa16
mov r0, r11
mov r1, r10
bl chengfa16
mov r0, r12
mov r1, r9
bl chengfa16
mov r0, r11
mov r1, r9
bl chengfa16
ldr r0,
ldr r1,
adds r0, r1
ldr r1,
adds r0, r1
bl gaodi16
lsls r1, # 16
ldr r2,
adds r1, r2
mov r12, r1@ 低32位
ldr r1,
ldr r2,
ldr r3,
adds r0, r1
adds r0, r2
adds r0, r3
bl gaodi16
ldr r2,
adds r0, r2
lsls r0, # 16
adds r0, r1
mov r11, r0 @ 高32位
subs r7, # 0x50
mov sp, r7
pop {r0-r7,pc}
gaodi16: @ 高低16 ,R0人 出R0高,R1低
lsls r1, r0, # 16
lsrs r1, # 16
lsrs r0, # 16
bx lr
chengfa16: @ r0 X r1 = 两个16位分别放在R7和R7+0X04
@ R6指
push {lr}
muls r0, r1
bl gaodi16
str r0,
adds r6, # 0x04
str r1,
adds r6, # 0x04
pop {pc}
我了个去,原来是汇编,遇到高人了。 本帖最后由 叶春勇 于 2023-4-13 10:56 编辑
无意中翻到:补个cortex-m0,64位乘法‘
@ long long __aeabi_lmul(long long r1:r0, long long r3:r2)
@
@ Multiply r1:r0 and r3:r2 and return the product in r1:r0
@ Can also be used for unsigned long product
@
.thumb_func
.global __aeabi_lmul
__aeabi_lmul:
push {r4, lr}
muls r1, r2
muls r3, r0
adds r1, r3
lsrs r3, r0, #16
lsrs r4, r2, #16
muls r3, r4
adds r1, r3
lsrs r3, r0, #16
uxth r0, r0
uxth r2, r2
muls r3, r2
muls r4, r0
muls r0, r2
movs r2, #0
adds r3, r4
adcs r2, r2
lsls r2, #16
adds r1, r2
lsls r2, r3, #16
lsrs r3, #16
adds r0, r2
adcs r1, r3
pop {r4, pc}
本帖最后由 yjmwxwx 于 2023-4-13 19:03 编辑
叶春勇 发表于 2023-4-13 10:54
无意中翻到:补个cortex-m0,64位乘法‘
后来我这个乘法也重新写过,不过好像还是比人家写的麻烦一些
__chengfa:
@入R0 乘以 R1
@出 R0高32 , R1低32
push {r2-r7,lr}
cmp r0, # 0
beq __cheng_fa_fan_hui
cmp r1, # 0
beq __cheng_fa_fan_hui
__ji_suan_cheng_fa:
mov r2, r0
mov r3, r1
lsrs r0, r0, # 16 @高16
lsls r2, r2, # 16 @ 低16
lsrs r2, r2, # 16
lsrs r1, r1, # 16 @高16
lsls r3, r3, # 16 @低16
lsrs r3, r3, # 16
mov r4, r2
mov r5, r0
muls r2, r2, r3 @1
muls r0, r0, r3 @2
muls r4, r4, r1 @3
muls r5, r5, r1 @4
mov r6, r0
mov r7, r4
lsls r0, r0, # 16 @2低32
lsls r4, r4, # 16 @3低32
lsrs r6, r6, # 16 @2高32
lsrs r7, r7, # 16 @3高32
movs r1, # 0
adds r2, r2, r0 @低32
adcs r6, r6, r1 @高32
adds r2, r2, r4
adcs r6, r6, r7
adds r6, r6, r5
mov r0, r6
mov r1, r2
pop {r2-r7,pc}
__cheng_fa_fan_hui:
movs r0, # 0
movs r1, # 0
pop {r2-r7,pc}
佩服佩服 64位乘法函数 yjmwxwx 发表于 2023-4-13 19:01
后来我这个乘法也重新写过,不过好像还是比人家写的麻烦一些
这些基本运算,调用人家的库就好了,也有浮点的。不过M0单片机用浮点就有点奢侈了。
https://github.com/bobbl/libaeabi-cortexm0
arm-none-eabi-gcc -mcpu=cortex-m0 -mthumb -o foo.arm foo.c -nostdlib -laeabi-cortexm0
叶春勇 发表于 2023-4-14 09:47
这些基本运算,调用人家的库就好了,也有浮点的。不过M0单片机用浮点就有点奢侈了。
https://github.com/ ...
反正闲着没事就自己写呗,别人的库还要学怎么用,他这个乘法我就看不懂怎么用,是64位乘64位还是32位乘32位,入口这四个寄存器不知道应该存什么数据,我自己写的虽然不如人家写的好,但是最起码明确知道用法就是32位乘32位得到64位,浮点数我做东西时候还没用到。
.thumb
.syntax unified
vectors:
.word zhanding
.word _start + 1
.equ zhanding, 0x20001000
_start :
ldr r0, = 0xffff
ldr r1, = 0xffff
ldr r2, = 0xffff
ldr r3, = 0xffff
bl __aeabi_lmul
bkpt # 1
ldr r0, = 0xffffffff
ldr r1, = 0xffffffff
bl __chengfa
bkpt # 2
__aeabi_lmul:
push {r4, lr}
muls r1, r2
muls r3, r0
adds r1, r3
lsrs r3, r0, #16
lsrs r4, r2, #16
muls r3, r4
adds r1, r3
lsrs r3, r0, #16
uxth r0, r0
uxth r2, r2
muls r3, r2
muls r4, r0
muls r0, r2
movs r2, #0
adds r3, r4
adcs r2, r2
lsls r2, #16
adds r1, r2
lsls r2, r3, #16
lsrs r3, #16
adds r0, r2
adcs r1, r3
pop {r4, pc}
__chengfa:
@入R0 乘以 R1
@出 R0高32 , R1低32
@0xffffffff*0xffffffff
@4 F F F E 0 0 0 1 @4
@3 F F F E 0 0 0 1 @3
@2 F F F E 0 0 0 1 @2
@1 F F F E 0 0 0 1 @1
@ F F F F F F F E 0 0 0 0 0 0 0 1
push {r2-r7,lr}
cmp r0, # 0
beq __cheng_fa_fan_hui
cmp r1, # 0
beq __cheng_fa_fan_hui
__ji_suan_cheng_fa:
mov r2, r0
mov r3, r1
lsrs r0, r0, # 16 @高16
lsls r2, r2, # 16 @ 低16
lsrs r2, r2, # 16
lsrs r1, r1, # 16 @高16
lsls r3, r3, # 16 @低16
lsrs r3, r3, # 16
mov r4, r2
mov r5, r0
muls r2, r2, r3 @1
muls r0, r0, r3 @2
muls r4, r4, r1 @3
muls r5, r5, r1 @4
mov r6, r0
mov r7, r4
lsls r0, r0, # 16 @2低32
lsls r4, r4, # 16 @3低32
lsrs r6, r6, # 16 @2高32
lsrs r7, r7, # 16 @3高32
movs r1, # 0
adds r2, r2, r0 @低32
adcs r6, r6, r1 @高32
adds r2, r2, r4
adcs r6, r6, r7
adds r6, r6, r5
mov r0, r6
mov r1, r2
pop {r2-r7,pc}
__cheng_fa_fan_hui:
movs r0, # 0
movs r1, # 0
pop {r2-r7,pc}
yjmwxwx 发表于 2023-4-14 11:28
反正闲着没事就自己写呗,别人的库还要学怎么用,他这个乘法我就看不懂怎么用,是64位乘64位还是32位乘32 ...
前几天在写一个fft算法,当时看到stm32f103c8t6有乘累加指令。准备写一个内联汇编的函数。
结果用c语言生成汇编,编译器把+=直接编译成mac指令。看汇编的时候无意中看到了这个__lmul函数,就上网搜了下gcc内部的源代码,找到这个库。搜的过程中,正好看到你的贴子,正好把我搜到的补上去。记得你在论坛里基本都是用汇编。跟你2017年得代码差太多。
你后面得代码基本接近了。不过好像还是多了几行。
以前研究过这类算法,不过现在没什么兴趣了。你有兴趣就研究下,为啥差这么多。
我现在写得fft,是没有复数运算的,没有浮点。用的整数。
用汇编写代码的就是大佬!!! 叶春勇 发表于 2023-4-14 12:27
前几天在写一个fft算法,当时看到stm32f103c8t6有乘累加指令。准备写一个内联汇编的函数。
结果用c语言生 ...
哦 原来是这样,我现在没兴趣搞这些程序了,也就凑合玩着还没扔下,我又不能靠这些找工作,现实中也用不到,看别人搞的东西能卖钱,我就搞个免费的分享还有点乐趣,要不实在没地方用。 yjmwxwx 发表于 2023-4-14 14:17
哦 原来是这样,我现在没兴趣搞这些程序了,也就凑合玩着还没扔下,我又不能靠这些找工作,现实中也用不 ...
这是我的C代码,写出来搞个模板,准备自己插点代码。#include<stdint.h>
int64_t mula(int64_t *a,int64_t *b)
{
return (*a)*(*b);
}
void mulb(int64_t *a,int64_t *b,int64_t *c)
{
(*c)=(*a)*(*b);
}编译器生成的汇编代码:
.cpu cortex-m0
.arch armv6s-m
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 4
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file "test.c"
.text
.global __aeabi_lmul
.align 1
.global mula
.syntax unified
.code 16
.thumb_func
.type mula, %function
mula:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
push {r4, lr}
ldr r2,
ldr r3,
ldr r1,
ldr r0,
bl __aeabi_lmul
@ sp needed
pop {r4, pc}
.size mula, .-mula
.align 1
.global mulb
.syntax unified
.code 16
.thumb_func
.type mulb, %function
mulb:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
push {r4, lr}
movs r4, r2
ldr r2,
ldr r3,
ldr r1,
ldr r0,
@ sp needed
bl __aeabi_lmul
str r0,
str r1,
pop {r4, pc}
.size mulb, .-mulb
.ident "GCC: (GNU Arm Embedded Toolchain 10.3-2021.10) 10.3.1 20210824 (release)"我上面的mulb函数,就是汇编常用的格式。指针传递参数,指针传递输出结果。
看了下这些写库的人,汇编水平不低的呀。
叶春勇 发表于 2023-4-14 16:05
这是我的C代码,写出来搞个模板,准备自己插点代码。编译器生成的汇编代码:
我上面的mulb函数,就是汇编 ...
通过你贴的代码看懂他那__aeabi_lmul入口四个寄存器应该存什么数据了,结果R0存低32、R1存高32,感觉他这个是64位数乘一个数字但是结果要小于64位才行。
人家是正规军肯定水平高,我写的很多地方都不规范,学的时候没看过什么书,就是用调试器试错
yjmwxwx 发表于 2023-4-14 18:10
通过你贴的代码看懂他那__aeabi_lmul入口四个寄存器应该存什么数据了,结果R0存低32、R1存高32,感觉 ...
跟m0的硬件乘法指令一样,32x32=32。
r1和r3传入立即数0,就是32x32=64了,不过他这个好像是有符号数,负数可能用的是64bit补码,这个编译器里应该有函数的
这个怎么保存数据呢 cemaj 发表于 2023-4-15 19:43
这个怎么保存数据呢
叶春勇提供的库你可以进去看下,直接调用就行了吧 计算的 工作量怎么样 本帖最后由 叶春勇 于 2023-4-17 10:04 编辑
补一个stm32,FFT程序,汇编:;******************** (C) COPYRIGHT 2009STMicroelectronics ********************
;* File Name : cr4_fft_256_stm32.s
;* Author : MCD Application Team
;* Version : V2.0.0
;* Date : 04/27/2009
;* Description : Optimized 256-point radix-4 complex FFT for Cortex-M3
;********************************************************************************
;* THE PRESENT FIRMWARE WHICH IS FOR GUIDANCE ONLY AIMS AT PROVIDING CUSTOMERS
;* WITH CODING INFORMATION REGARDING THEIR PRODUCTS IN ORDER FOR THEM TO SAVE TIME.
;* AS A RESULT, STMICROELECTRONICS SHALL NOT BE HELD LIABLE FOR ANY DIRECT,
;* INDIRECT OR CONSEQUENTIAL DAMAGES WITH RESPECT TO ANY CLAIMS ARISING FROM THE
;* CONTENT OF SUCH SOFTWARE AND/OR THE USE MADE BY CUSTOMERS OF THE CODING
;* INFORMATION CONTAINED HEREIN IN CONNECTION WITH THEIR PRODUCTS.
;*******************************************************************************/
THUMB
REQUIRE8
PRESERVE8
AREA |.text|, CODE, READONLY, ALIGN=2
EXPORT cr4_fft_256_stm32
EXTERN TableFFT
pssK RN R0
pssOUT RN R0
pssX RN R1
pssIN RN R1
butternbr RN R2
Nbin RN R2
index RN R3
Ar RN R4
Ai RN R5
Br RN R6
Bi RN R7
Cr RN R8
Ci RN R9
Dr RN R10
Di RN R11
cntrbitrev RN R12
tmp RN R12
pssIN2 RN R14
tmp2 RN R14
NPT EQU 256
;----------------------------- MACROS ----------------------------------------
MACRO
DEC$reg
SUB$reg,$reg,#1
MEND
MACRO
INC$reg
ADD$reg,$reg,#1
MEND
MACRO
QUAD$reg
MOV$reg,$reg,LSL#2
MEND
;sXi = *(PssX+1); sXr = *PssX; PssX += offset; PssX= R1
MACRO
LDR2Q$sXr,$sXi, $PssX, $offset
LDRSH $sXi, [$PssX, #2]
LDRSH $sXr, [$PssX]
ADD $PssX, $PssX, $offset
MEND
;!! Same macro, to be used when passing negative offset value !!
MACRO
LDR2Qm$sXr, $sXi, $PssX, $offset
LDRSH $sXi, [$PssX, #2]
LDRSH $sXr, [$PssX]
SUB $PssX, $PssX, $offset
MEND
;(PssX+1)= sXi;*PssX=sXr; PssX += offset;
MACRO
STR2Q$sXr, $sXi, $PssX, $offset
STRH$sXi, [$PssX, #2]
STRH$sXr, [$PssX]
ADD $PssX, $PssX, $offset
MEND
; YY = Cplx_conjugate_mul(Y,K)
;Y = YYr + i*YYi
; use the following trick
;K = (Kr-Ki) + i*Ki
MACRO
CXMUL_V7$YYr, $YYi, $Yr, $Yi, $Kr, $Ki,$tmp,$tmp2
SUB$tmp2, $Yi, $Yr ; sYi-sYr
MUL$tmp, $tmp2, $Ki ; (sYi-sYr)*sKi
ADD$tmp2, $Kr, $Ki, LSL#1; (sKr+sKi)
MLA$YYi, $Yi, $Kr, $tmp ; lYYi = sYi*sKr-sYr*sKi
MLA$YYr, $Yr, $tmp2, $tmp ; lYYr = sYr*sKr+sYi*sKi
MEND
; Four point complex Fast Fourier Transform
MACRO
CXADDA4$s
; (C,D) = (C+D, C-D)
ADD Cr, Cr, Dr
ADD Ci, Ci, Di
SUB Dr, Cr, Dr, LSL#1
SUB Di, Ci, Di, LSL#1
; (A,B) = (A+(B>>s), A-(B>>s))/4
MOV Ar, Ar, ASR#2
MOV Ai, Ai, ASR#2
ADD Ar, Ar, Br, ASR#(2+$s)
ADD Ai, Ai, Bi, ASR#(2+$s)
SUB Br, Ar, Br, ASR#(1+$s)
SUB Bi, Ai, Bi, ASR#(1+$s)
; (A,C) = (A+(C>>s)/4, A-(C>>s)/4)
ADD Ar, Ar, Cr, ASR#(2+$s)
ADD Ai, Ai, Ci, ASR#(2+$s)
SUB Cr, Ar, Cr, ASR#(1+$s)
SUB Ci, Ai, Ci, ASR#(1+$s)
; (B,D) = (B-i*(D>>s)/4, B+i*(D>>s)/4)
ADD Br, Br, Di, ASR#(2+$s)
SUB Bi, Bi, Dr, ASR#(2+$s)
SUB Di, Br, Di, ASR#(1+$s)
ADD Dr, Bi, Dr, ASR#(1+$s)
MEND
MACRO
BUTFLY4ZERO_OPT$pIN,$offset, $pOUT
LDRSH Ai, [$pIN, #2]
LDRSH Ar, [$pIN]
ADD $pIN, #NPT
LDRSH Ci, [$pIN, #2]
LDRSH Cr, [$pIN]
ADD $pIN, #NPT
LDRSH Bi, [$pIN, #2]
LDRSH Br, [$pIN]
ADD $pIN, #NPT
LDRSH Di, [$pIN, #2]
LDRSH Dr, [$pIN]
ADD $pIN, #NPT
; (C,D) = (C+D, C-D)
ADD Cr, Cr, Dr
ADD Ci, Ci, Di
SUB Dr, Cr, Dr, LSL#1; trick
SUB Di, Ci, Di, LSL#1;trick
; (A,B) = (A+B)/4, (A-B)/4
MOV Ar, Ar, ASR#2
MOV Ai, Ai, ASR#2
ADD Ar, Ar, Br, ASR#2
ADD Ai, Ai, Bi, ASR#2
SUB Br, Ar, Br, ASR#1
SUB Bi, Ai, Bi, ASR#1
; (A,C) = (A+C)/4, (A-C)/4
ADD Ar, Ar, Cr, ASR#2
ADD Ai, Ai, Ci, ASR#2
SUB Cr, Ar, Cr, ASR#1
SUB Ci, Ai, Ci, ASR#1
; (B,D) = (B-i*D)/4, (B+i*D)/4
ADD Br, Br, Di, ASR#2
SUB Bi, Bi, Dr, ASR#2
SUB Di, Br, Di, ASR#1
ADD Dr, Bi, Dr, ASR#1
;
STRH Ai, [$pOUT, #2]
STRH Ar, [$pOUT], #4
STRH Bi, [$pOUT, #2]
STRH Br, [$pOUT], #4
STRH Ci, [$pOUT, #2]
STRH Cr, [$pOUT], #4
STRH Dr, [$pOUT, #2]; inversion here
STRH Di, [$pOUT], #4
MEND
MACRO
BUTFLY4_V7 $pssDin,$offset,$pssDout,$qformat,$pssK
LDR2Qm Ar,Ai,$pssDin, $offset;-$offset
LDR2Q Dr,Di,$pssK, #4
; format CXMUL_V7 YYr, YYi, Yr, Yi, Kr, Ki,tmp,tmp2
CXMUL_V7 Dr,Di,Ar,Ai,Dr,Di,tmp,tmp2
LDR2Qm Ar,Ai,$pssDin,$offset;-$offset
LDR2Q Cr,Ci,$pssK,#4
CXMUL_V7 Cr,Ci,Ar,Ai,Cr,Ci,tmp,tmp2
LDR2Qm Ar,Ai, $pssDin, $offset;-$offset
LDR2Q Br,Bi, $pssK, #4
CXMUL_V7Br,Bi,Ar,Ai,Br,Bi,tmp,tmp2
LDR2Q Ar,Ai, $pssDin, #0
CXADDA4$qformat
STRH Ai, [$pssDout, #2]
STRH Ar, [$pssDout]
ADD $pssDout, $pssDout, $offset
STRH Bi, [$pssDout, #2]
STRH Br, [$pssDout]
ADD $pssDout, $pssDout, $offset
STRH Ci, [$pssDout, #2]
STRH Cr, [$pssDout]
ADD $pssDout, $pssDout, $offset
STRH Dr, [$pssDout, #2]; inversion here
STRH Di, [$pssDout], #4
MEND
;------------------- CODE --------------------------------
;===============================================================================
;*******************************************************************************
;* Function Name: cr4_fft_256_stm32
;* Description : complex radix-4 256 points FFT
;* Input : - R0 = pssOUT: Output array .
;* - R1 = pssIN: Input array
;* - R2 = Nbin: =256 number of points, this optimized FFT function
;* can only convert 256 points.
;* Output : None
;* Return : None
;*******************************************************************************
cr4_fft_256_stm32
STMFD SP!, {R4-R11, LR}
MOV cntrbitrev, #0
MOV index,#0
preloop_v7
ADD pssIN2, pssIN, cntrbitrev, LSR#24 ;256-pts
BUTFLY4ZERO_OPT pssIN2,Nbin,pssOUT
INC index
RBIT cntrbitrev,index
CMP index,#64;256-pts
BNEpreloop_v7
SUB pssX, pssOUT, Nbin, LSL#2
MOV index, #16
MOVS butternbr, Nbin, LSR#4 ;dual use of register
;------------------------------------------------------------------------------
; The FFT coefficients table can be stored into Flash or RAM.
; The following two lines of code allow selecting the method for coefficients
; storage.
; In the case of choosing coefficients in RAM, you have to:
; 1. Include the file table_fft.h, which is a part of the DSP library,
; in your main file.
; 2. Decomment the line LDR.W pssK, =TableFFT and comment the line
; ADRL pssK, TableFFT_V7
; 3. Comment all the TableFFT_V7 data.
;------------------------------------------------------------------------------
ADRL pssK, TableFFT_V7 ; Coeff in Flash
;LDR.W pssK, =TableFFT ; Coeff in RAM
;................................
passloop_v7
STMFD SP!, {pssX,butternbr}
ADD tmp, index, index, LSL#1
ADD pssX, pssX, tmp
SUB butternbr, butternbr, #1<<16
;................
grouploop_v7
ADD butternbr,butternbr,index,LSL#(16-2)
;.......
butterloop_v7
BUTFLY4_V7pssX,index,pssX,14,pssK
SUBS butternbr,butternbr, #1<<16
BGE butterloop_v7
;.......
ADD tmp, index, index, LSL#1
ADD pssX, pssX, tmp
DEC butternbr
MOVS tmp2, butternbr, LSL#16
IT NE
SUBNE pssK, pssK, tmp
BNE grouploop_v7
;................
LDMFD sp!, {pssX, butternbr}
QUAD index
MOVS butternbr, butternbr, LSR#2 ; loop nbr /= radix
BNE passloop_v7
;................................
LDMFD SP!, {R4-R11, PC}
;=============================================================================
TableFFT_V7
;N=16
DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
; N=64
DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c
DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e
DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e
DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537
DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21
DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5
DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5
DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21
DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537
DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e
DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e
DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c
; N=256
DCW 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
DCW 0x3b1e,0x04b5, 0x3e69,0x0192, 0x3cc8,0x0324
DCW 0x35eb,0x0964, 0x3cc8,0x0324, 0x396b,0x0646
DCW 0x306c,0x0e06, 0x3b1e,0x04b5, 0x35eb,0x0964
DCW 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c
DCW 0x24ae,0x1709, 0x37af,0x07d6, 0x2e88,0x0f8d
DCW 0x1e7e,0x1b5d, 0x35eb,0x0964, 0x2aaa,0x1294
DCW 0x1824,0x1f8c, 0x341e,0x0af1, 0x26b3,0x1590
DCW 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e
DCW 0x0b14,0x2760, 0x306c,0x0e06, 0x1e7e,0x1b5d
DCW 0x0471,0x2afb, 0x2e88,0x0f8d, 0x1a46,0x1e2b
DCW 0xfdc7,0x2e5a, 0x2c9d,0x1112, 0x15fe,0x20e7
DCW 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e
DCW 0xf087,0x3453, 0x28b2,0x1413, 0x0d48,0x2620
DCW 0xea02,0x36e5, 0x26b3,0x1590, 0x08df,0x289a
DCW 0xe39c,0x392b, 0x24ae,0x1709, 0x0471,0x2afb
DCW 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
DCW 0xd74e,0x3cc5, 0x2093,0x19ef, 0xfb8f,0x2f6c
DCW 0xd178,0x3e15, 0x1e7e,0x1b5d, 0xf721,0x3179
DCW 0xcbe2,0x3f0f, 0x1c64,0x1cc6, 0xf2b8,0x3368
DCW 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537
DCW 0xc197,0x3ffb, 0x1824,0x1f8c, 0xea02,0x36e5
DCW 0xbcf0,0x3fec, 0x15fe,0x20e7, 0xe5ba,0x3871
DCW 0xb8a6,0x3f85, 0x13d5,0x223d, 0xe182,0x39db
DCW 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21
DCW 0xb140,0x3daf, 0x0f79,0x24da, 0xd94d,0x3c42
DCW 0xae2e,0x3c42, 0x0d48,0x2620, 0xd556,0x3d3f
DCW 0xab8e,0x3a82, 0x0b14,0x2760, 0xd178,0x3e15
DCW 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5
DCW 0xa7b1,0x3612, 0x06a9,0x29ce, 0xca15,0x3f4f
DCW 0xa678,0x3368, 0x0471,0x2afb, 0xc695,0x3fb1
DCW 0xa5bc,0x3076, 0x0239,0x2c21, 0xc338,0x3fec
DCW 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
DCW 0xa5bc,0x29ce, 0xfdc7,0x2e5a, 0xbcf0,0x3fec
DCW 0xa678,0x2620, 0xfb8f,0x2f6c, 0xba09,0x3fb1
DCW 0xa7b1,0x223d, 0xf957,0x3076, 0xb74d,0x3f4f
DCW 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5
DCW 0xab8e,0x19ef, 0xf4ec,0x3274, 0xb25e,0x3e15
DCW 0xae2e,0x1590, 0xf2b8,0x3368, 0xb02d,0x3d3f
DCW 0xb140,0x1112, 0xf087,0x3453, 0xae2e,0x3c42
DCW 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21
DCW 0xb8a6,0x07d6, 0xec2b,0x3612, 0xaac8,0x39db
DCW 0xbcf0,0x0324, 0xea02,0x36e5, 0xa963,0x3871
DCW 0xc197,0xfe6e, 0xe7dc,0x37b0, 0xa834,0x36e5
DCW 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537
DCW 0xcbe2,0xf50f, 0xe39c,0x392b, 0xa678,0x3368
DCW 0xd178,0xf073, 0xe182,0x39db, 0xa5ed,0x3179
DCW 0xd74e,0xebed, 0xdf6d,0x3a82, 0xa599,0x2f6c
DCW 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
DCW 0xe39c,0xe33a, 0xdb52,0x3bb6, 0xa599,0x2afb
DCW 0xea02,0xdf19, 0xd94d,0x3c42, 0xa5ed,0x289a
DCW 0xf087,0xdb26, 0xd74e,0x3cc5, 0xa678,0x2620
DCW 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e
DCW 0xfdc7,0xd3df, 0xd363,0x3daf, 0xa834,0x20e7
DCW 0x0471,0xd094, 0xd178,0x3e15, 0xa963,0x1e2b
DCW 0x0b14,0xcd8c, 0xcf94,0x3e72, 0xaac8,0x1b5d
DCW 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e
DCW 0x1824,0xc850, 0xcbe2,0x3f0f, 0xae2e,0x1590
DCW 0x1e7e,0xc625, 0xca15,0x3f4f, 0xb02d,0x1294
DCW 0x24ae,0xc44a, 0xc851,0x3f85, 0xb25e,0x0f8d
DCW 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c
DCW 0x306c,0xc18e, 0xc4e2,0x3fd4, 0xb74d,0x0964
DCW 0x35eb,0xc0b1, 0xc338,0x3fec, 0xba09,0x0646
DCW 0x3b1e,0xc02c, 0xc197,0x3ffb, 0xbcf0,0x0324
END
;******************* (C) COPYRIGHT 2009STMicroelectronics *****END OF FILE****
本帖最后由 yjmwxwx 于 2023-4-17 10:49 编辑
叶春勇 发表于 2023-4-17 10:02
补一个stm32,FFT程序,汇编:
当年ST是有汇编FFT库的,我当时自己写也看过但是看不懂,后来下载了个网页版例子再结合书才看懂,很多年不用都忘记具体怎么算的了。
www.themobilestudio.net
第二页图片显示不出来了被浏览器阻止了,每一步怎么计算的都有插图比较容易看懂
yjmwxwx 发表于 2023-4-17 10:29
当年ST是有汇编FFT库的,我当时自己写也看过但是看不懂,后来下载了个网页版例子再结合书才看懂,很多年不 ...
快速傅里叶算法基本都有库的。递归算法,我自己也能写出来,递推算法,写了比较了下也没别人快。后来直接用库了。我们自己能折腾的就是纯整数计算。最近在研究数论变换。