移植了一下coremark,坛子里coremark移植比较多,移植步骤就简单说一说,重点在后面从sram运行coremark的性能数据比从flash执行的要差,是一件比较奇怪的事情。
先说说简单coremark移植,先clone代码
- git clone https://github.com/eembc/coremark
以Examples/GPIO/GPIO_Toggle/为模板创建coremark工程
- cp -a Examples/GPIO/GPIO_Toggle/ Examples/coremark
把第一步clone得到的coremark仓库里这几个文件拷贝到coremark工程目录
core_list_join.c core_matrix.c core_util.c core_main.c core_state.c coremark.h
simple/core_portme.c simple/core_portme.h
修改Examples/coremark/Source/main.c如下
- extern volatile unsigned int ticks;
- int main(void)
- {
- USART1_Init(115200);
- /* Init delay function */
- Delay_Init();
- SysTick_Config(SystemCoreClock / 1000);
- core_main(1, NULL);
- for(;;) __WFI();
- }
修改Examples/coremark/Source/core_main.c的main函数名为core_main
修改Examples/coremark/Source/core_portme.c
增加如下两行:
- #define ITERATIONS 6000
- extern volatile uint32_t ticks;
时间相关的改成:
- #define NSECS_PER_SEC 1000
- #define CORETIMETYPE clock_t
- #define GETMYTIME(_t) (*_t = ticks)
修改Examples/coremark/Source/core_portme.h,加入
- #define COMPILER_FLAGS "-O3"
修改Makefile
- diff --git a/Makefile b/Makefile
- index abd688c..78559d0 100644
- --- a/Makefile
- +++ b/Makefile
- @@ -9,7 +9,7 @@ TARGET = apm32f411
- # debug build?
- DEBUG = 0
- # optimization for size, enable lto
- -OPT = -Os -flto
- +OPT = -O3 -flto
- #######################################
- @@ -24,7 +24,7 @@ BUILD_DIR = build
- # C sources
- C_SOURCES = $(wildcard Libraries/APM32F4xx_StdPeriphDriver/src/*.c)
- C_SOURCES += $(wildcard Boards/Board_APM32F411_TINY/src/*.c)
- -C_SOURCES += $(wildcard Examples/GPIO/GPIO_Toggle/Source/*.c)
- +C_SOURCES += $(wildcard Examples/coremark/Source/*.c)
- # ASM sources
- ASM_SOURCES = Libraries/Device/Geehy/APM32F4xx/Source/gcc/startup_apm32f411.S
- @@ -38,7 +38,7 @@ C_INCLUDES += -ILibraries/Device/Geehy/APM32F4xx/Include
- C_INCLUDES += -ILibraries/CMSIS/Include/
- C_INCLUDES += -IBoards/
- C_INCLUDES += -IBoards/Board_APM32F411_TINY/inc/
- -C_INCLUDES += -IExamples/GPIO/GPIO_Toggle/Include
- +C_INCLUDES += -IExamples/coremark/Include
- C_DEFS := -DAPM32F411 -DAPM32F411_TINY
- @@ -84,12 +84,12 @@ CFLAGS += -MMD -MP -MF"$(@:%.o=%.d)"
- # libraries
- LIBS = -lc -lm -lnosys
- LIBDIR =
- -LDFLAGS = $(MCU) -fsigned-char -ffunction-sections -fdata-sections -Wunused -Wuninitialized -T $(LDSCRIPT) -Wl,--gc-sections,--print-memory-usage,-Map=$(BUILD_DIR)/$(TARGET).map --specs=nano.specs $(LIBS) -flto -fuse-linker-plugin
- +LDFLAGS = $(MCU) -fsigned-char -ffunction-sections -fdata-sections -Wunused -Wuninitialized -T $(LDSCRIPT) -Wl,--gc-sections,--print-memory-usage,-Map=$(BUILD_DIR)/$(TARGET).map --specs=nano.specs -u_printf_float $(LIBS) -flto -fuse-linker-plugin
-
编译&烧录&运行
- make
- pyocd load -e sector -t apm32f411ve build/apm32f411.bin
这一步编译出来的是从flash启动运行的,运行结果如下图
利用帖子https://bbs.21ic.com/icview-3375412-1-1.html 里的方法改成从完全sram运行,结果如下:
由此可以看出完全从sram运行的性能只有flash启动运行75%左右,和理论期望不符合,百思不得其解,难度APM32F411硬件上有flash加速比如flash预取?软件透明的cache?大家有没有什么评论?
|