GCC内嵌汇编乘除法:
#include <stdlib.h> #include <stdint.h>
typedef struct udiv8_t { uint8_t quot; uint8_t rem; }udiv8_t;
typedef struct div8_t { int8_t quot; int8_t rem; }div8_t;
typedef struct udiv16_t {uint16_t quot; uint16_t rem; }udiv16_t;
#define div16_t div_t
typedef struct udiv32_t { uint32_t quot; uint32_t rem; }udiv32_t;
#define div32_t ldiv_t
extern int16_t muls8(int8_t val0,int8_t val1); extern uint16_t mulu8(int8_t val0,int8_t val1); extern int32_t muls16( int16_t val0,int16_t val1); extern uint32_t mulu16( uint16_t val0,uint16_t val1); extern int32_t fmuls16( int16_t val0,int16_t val1);
extern div8_t div8(int8_t,int8_t) __asm__("__divmodqi4") __ATTR_CONST__ ; extern udiv8_t udiv8(uint8_t,uint8_t) __asm__("__udivmodqi4") __ATTR_CONST__ ; extern div16_t div16(int16_t,int16_t) __asm__("__divmodhi4") __ATTR_CONST__ ; extern udiv16_t udiv16(uint16_t,uint16_t) __asm__("__udivmodhi4") __ATTR_CONST__ ; extern div32_t div32(int32_t,int32_t) __asm__("__divmodsi4") __ATTR_CONST__ ; extern udiv32_t udiv32(uint32_t,uint32_t) __asm__("__udivmodsi4") __ATTR_CONST__ ; extern udiv8_t udiv16_8(uint16_t val0,uint8_t val1); extern udiv16_t udiv32_16(uint32_t val0,uint16_t val1);
extern int16_t sqrt32(int32_t val);
#include "fmath.h"
/****************************乘法******************************/
//s8*s8=s16 int16_t muls8(int8_t val0,int8_t val1) { int16_t result; asm ( "muls %1,%2" "
" "movw %A0,r0" "
"
:"=r"(result) :"a"(val0),"a"(val1) :"r0","r1" ); return result; }
//u8*u8=u16 uint16_t mulu8(int8_t val0,int8_t val1) { uint16_t result; asm ( "mul %1,%2" "
" "movw %A0,r0" "
"
:"=r"(result) :"a"(val0),"a"(val1) :"r0","r1" ); return result; }
//s16*s16=s32 int32_t muls16( int16_t val0,int16_t val1) {
int32_t result;
asm ( "clr r18" "
" "muls %B1,%B2" "
" "movw %C0,r0" "
" "mul %A1,%A2" "
" "movw %A0,r0" "
" "adc %C0,r18" "
" "mulsu %B1,%A2" "
" "sbc %D0,r18" "
" "add %B0,r0" "
" "adc %C0,r1" "
" "adc %D0,r18" "
" "mulsu %B2,%A1" "
" "sbc %D0,r18" "
" "add %B0,r0" "
" "adc %C0,r1" "
" "adc %D0,r18" "
" :"=r"(result) :"a"(val0),"a"(val1) :"r0","r1","r18" ); return result; }
//u16*u16=u32 uint32_t mulu16( uint16_t val0,uint16_t val1) {
uint32_t result;
asm ( "clr r18" "
" "mul %B1,%B2" "
" "movw %C0,r0" "
" "mul %A1,%A2" "
" "movw %A0,r0" "
" "adc %C0,r18" "
" "mul %B1,%A2" "
" "adc %D0,r18" "
" "add %B0,r0" "
" "adc %C0,r1" "
" "adc %D0,r18" "
" "mul %B2,%A1" "
" "adc %D0,r18" "
" "add %B0,r0" "
" "adc %C0,r1" "
" "adc %D0,r18" "
" :"=r"(result) :"a"(val0),"a"(val1) :"r0","r1","r18" ); return result; }
//fs16*fs16=fs32 //16位有符号定点小数相乘的32位有符号定点小数 int32_t fmuls16( int16_t val0,int16_t val1) { int32_t result; asm ( "clr r23" "
" "fmuls %B1,%B2" "
" "movw %C0,r0" "
" "fmul %A1,%A2" "
" "movw %A0,r0" "
" "adc %C0,r23" "
" "fmulsu %B1,%A2" "
" "sbc %D0,r23" "
" "add %B0,r0" "
" "adc %C0,r1" "
" "adc %D0,r23" "
" "fmulsu %B2,%A1" "
" "sbc %D0,r23" "
" "add %B0,r0" "
" "adc %C0,r1" "
" "adc %D0,r23" "
"
:"=r"(result) :"a"(val0),"a"(val1) :"r0","r1","r23" ); return result; }
/*************************************除法************************************/
//s8/s8={s8,s8} div8_t div8(int8_t,int8_t) __asm__("__divmodqi4") __ATTR_CONST__ ;
//u8/u8={u8,u8} udiv8_t udiv8(uint8_t,uint8_t) __asm__("__udivmodqi4") __ATTR_CONST__ ;
//s16/s16={s16,s16} div16_t div16(int16_t,int16_t) __asm__("__divmodhi4") __ATTR_CONST__ ;
//u16/u16={u16,u16} udiv16_t udiv16(uint16_t,uint16_t) __asm__("__udivmodhi4") __ATTR_CONST__ ;
//s32/s32={s32,s32} div32_t div32(int32_t,int32_t) __asm__("__divmodsi4") __ATTR_CONST__ ;
//u32/u32={u32,u32} udiv32_t udiv32(uint32_t,uint32_t) __asm__("__udivmodsi4") __ATTR_CONST__ ;
//u16/u8={u8,u8} udiv8_t udiv16_8(uint16_t val0,uint8_t val1) { asm( "cp %B0,%1" "
" "brcc 2f" "
"
"ldi r18,0x09" "
" "mov r19,%B0" "
" "mov %B0,%A0" "
" "ldi %A0,0x7f" "
" "rjmp 1f" "
"
"0:" "adc r19,r19" "
" "cp r19,%1" "
" "brcs 1f" "
" "sub r19,%1" "
"
"1:" "adc %A0,%A0" "
" "adc %B0,%B0" "
" "dec r18" "
" "brne 0b" "
" "com %A0" "
" "mov %B0,r19" "
" "rjmp 3f" "
"
"2:" "ldi %A0,0Xff" "
" "ldi %B0,0xff" "
"
"3:" "
"
:"=r"(val0),"=r"(val1) :"0"(val0),"1"(val1) :"r18","r19" );
return *( udiv8_t *)&val0; }
//u32/u16={u16,u16} udiv16_t udiv32_16(uint32_t val0,uint16_t val1) { asm( "cp %C0,%A1" "
" "cpc %D0,%B1" "
" "brcc 2f" "
"
"ldi r20,0x11" "
" "movw r18,%C0" "
" "movw %C0,%A0" "
" "ldi %B0,0x7f" "
" "ldi %A0,0xff" "
" "rjmp 1f" "
"
"0:" "adc r18,r18" "
" "adc r19,r19" "
" "cp r18,%A1" "
" "cpc r19,%B1" "
" "brcs 1f" "
" "sub r18,%A1" "
" "sbc r19,%B1" "
"
"1:" "adc %A0,%A0" "
" "adc %B0,%B0" "
" "adc %C0,%C0" "
" "adc %D0,%D0" "
" "dec r20" "
" "brne 0b" "
" "com %A0" "
" "com %B0" "
" "movw %C0,r18" "
" "rjmp 3f" "
"
"2:" "ldi %A0,0xff" "
" "ldi %B0,0xff" "
" "ldi %C0,0xff" "
" "ldi %D0,0xff" "
"
"3:" "
"
:"=r"(val0),"=r"(val1) :"0"(val0),"1"(val1) :"r18","r19","r20" );
return *(udiv16_t *)&val0; }
/************************开方*************************************/
//s32**1/2=s16 int16_t sqrt32(int32_t val) {
asm ( "MOVW r2,%A0" "
" "MOVW r4,%C0" "
" "CLR r6" "
" "CLR r7" "
" "CLR r8" "
" "CLR r9" "
" "LDI %A0,1" "
" "LDI %B0,0" "
" "CLR %C0" "
" "CLR %D0" "
" "LDI r23,16" "
"
"0:" "LSL r2" "
" "ROL r3" "
" "ROL r4" "
" "ROL r5" "
" "ROL r6" "
" "ROL r7" "
" "ROL r8" "
" "ROL r9" "
" "LSL r2" "
" "ROL r3" "
" "ROL r4" "
" "ROL r5" "
" "ROL r6" "
" "ROL r7" "
" "ROL r8" "
" "ROL r9" "
" "BRPL 1f" "
"
"ADD r6,%A0" "
" "ADC r7,%B0" "
" "ADC r8,%C0" "
" "ADC r9,%D0" "
" "RJMP 2f" "
"
"1: " "SUB r6,%A0" "
" "SBC r7,%B0" "
" "SBC r8,%C0" "
" "SBC r9,%D0" "
"
"2:" "LSL %A0" "
" "ROL %B0" "
" "ROL %C0" "
" "ANDI %A0,0B11111000" "
" "ORI %A0,0B00000101" "
" "SBRC R9,7" "
" "SUBI %A0,2" "
" "DEC r23" "
" "BRNE 0b" "
"
"LSR %C0" "
" "ROR %B0" "
" "ROR %A0" "
" "LSR %C0" "
" "ROR %B0" "
" "ROR %A0" "
"
:"=r"(val) :"0"(val) :"r2","r3","r4","r5","r6","r7","r8","r9","r23" );
return (int16_t)val; } |