移植了一下coremark,坛子里coremark移植比较多,移植步骤就简单说一说,重点在后面从sram运行coremark的性能数据比从flash执行的要差,是一件比较奇怪的事情。
先说说简单coremark移植,先clone代码
git clone https://github.com/eembc/coremark
以Examples/GPIO/GPIO_Toggle/为模板创建coremark工程
cp -a Examples/GPIO/GPIO_Toggle/ Examples/coremark
把第一步clone得到的coremark仓库里这几个文件拷贝到coremark工程目录
core_list_join.c core_matrix.c core_util.c core_main.c core_state.c coremark.h
simple/core_portme.c simple/core_portme.h
修改Examples/coremark/Source/main.c如下
extern volatile unsigned int ticks;
int main(void)
{
USART1_Init(115200);
/* Init delay function */
Delay_Init();
SysTick_Config(SystemCoreClock / 1000);
core_main(1, NULL);
for(;;) __WFI();
}
修改Examples/coremark/Source/core_main.c的main函数名为core_main
修改Examples/coremark/Source/core_portme.c
增加如下两行:
#define ITERATIONS 6000
extern volatile uint32_t ticks;
时间相关的改成:
#define NSECS_PER_SEC 1000
#define CORETIMETYPE clock_t
#define GETMYTIME(_t) (*_t = ticks)
修改Examples/coremark/Source/core_portme.h,加入
#define COMPILER_FLAGS "-O3"
修改Makefile
diff --git a/Makefile b/Makefile
index abd688c..78559d0 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ TARGET = apm32f411
# debug build?
DEBUG = 0
# optimization for size, enable lto
-OPT = -Os -flto
+OPT = -O3 -flto
#######################################
@@ -24,7 +24,7 @@ BUILD_DIR = build
# C sources
C_SOURCES = $(wildcard Libraries/APM32F4xx_StdPeriphDriver/src/*.c)
C_SOURCES += $(wildcard Boards/Board_APM32F411_TINY/src/*.c)
-C_SOURCES += $(wildcard Examples/GPIO/GPIO_Toggle/Source/*.c)
+C_SOURCES += $(wildcard Examples/coremark/Source/*.c)
# ASM sources
ASM_SOURCES = Libraries/Device/Geehy/APM32F4xx/Source/gcc/startup_apm32f411.S
@@ -38,7 +38,7 @@ C_INCLUDES += -ILibraries/Device/Geehy/APM32F4xx/Include
C_INCLUDES += -ILibraries/CMSIS/Include/
C_INCLUDES += -IBoards/
C_INCLUDES += -IBoards/Board_APM32F411_TINY/inc/
-C_INCLUDES += -IExamples/GPIO/GPIO_Toggle/Include
+C_INCLUDES += -IExamples/coremark/Include
C_DEFS := -DAPM32F411 -DAPM32F411_TINY
@@ -84,12 +84,12 @@ CFLAGS += -MMD -MP -MF"$(@:%.o=%.d)"
# libraries
LIBS = -lc -lm -lnosys
LIBDIR =
-LDFLAGS = $(MCU) -fsigned-char -ffunction-sections -fdata-sections -Wunused -Wuninitialized -T $(LDSCRIPT) -Wl,--gc-sections,--print-memory-usage,-Map=$(BUILD_DIR)/$(TARGET).map --specs=nano.specs $(LIBS) -flto -fuse-linker-plugin
+LDFLAGS = $(MCU) -fsigned-char -ffunction-sections -fdata-sections -Wunused -Wuninitialized -T $(LDSCRIPT) -Wl,--gc-sections,--print-memory-usage,-Map=$(BUILD_DIR)/$(TARGET).map --specs=nano.specs -u_printf_float $(LIBS) -flto -fuse-linker-plugin
编译&烧录&运行
make
pyocd load -e sector -t apm32f411ve build/apm32f411.bin
这一步编译出来的是从flash启动运行的,运行结果如下图
利用帖子https://bbs.21ic.com/icview-3375412-1-1.html 里的方法改成从完全sram运行,结果如下:
由此可以看出完全从sram运行的性能只有flash启动运行75%左右,和理论期望不符合,百思不得其解,难度APM32F411硬件上有flash加速比如flash预取?软件透明的cache?大家有没有什么评论?
|