3.3 运行中记录出错位置
以3.2为例子,进行简单的反推。启动文件中的Hard中断处理一般如下所示,即让程序陷入这个死循环。
HardFaultException
; B HardFaultException
现在我们要在记录重要数据,即此刻系统的运行情况,主要包括:此刻堆栈情况、以及R0等8个寄存器的值、相关Hard硬件寄存器的值,若是任务引发的,还要记录任务的ID号,因此修改这个异常处理函数。
HardFaultException
TST LR, #4 ;将LR的值与4按位相与
ITE EQ //若为0则是MSP,否则是PSP
MRSEQ R0, MSP
MRSNE R0, PSP
B hard_fault_handler_c //这个是C语言编写的函数
void hard_fault_handler_c(unsigned int * hardfault_args)
{
unsigned int stacked_r0,stacked_r1,stacked_r2,stacked_r3;
unsigned int stacked_r12,stacked_lr, stacked_pc, stacked_psr;
stacked_r0 = ((unsigned long) hardfault_args[0]);
stacked_r1 = ((unsigned long) hardfault_args[1]);
stacked_r2 = ((unsigned long) hardfault_args[2]);
stacked_r3 = ((unsigned long) hardfault_args[3]);
stacked_r12 = ((unsigned long) hardfault_args[4]);
stacked_lr = ((unsigned long) hardfault_args[5]);
stacked_pc = ((unsigned long) hardfault_args[6]);
stacked_psr = ((unsigned long) hardfault_args[7]);
sprintf((char*)g_cDataBuf,"[Hard fault handler]\n");
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"The task pri id = 0x%0.8x\n", OSPrioCur); //任务ID号
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"SP = 0x%0.8x\n", hardfault_args); //堆栈地址
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"R0 = 0x%0.8x\n", stacked_r0);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"R1 = 0x%0.8x\n", stacked_r1);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"R2 = 0x%0.8x\n", stacked_r2);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"R3 = 0x%0.8x\n", stacked_r3);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"R12 = 0x%0.8x\n", stacked_r12);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"LR = 0x%0.8x\n", stacked_lr);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"PC = 0x%0.8x\n", stacked_pc);
Usart232SendStr(g_cDataBuf);
sprintf((char*)g_cDataBuf,"PSR = 0x%0.8x\n", stacked_psr);
Usart232SendStr(g_cDataBuf);
exit(0); // terminate
return;
}
以3.2为例,发生异常后,串口的输出入下所示:
[Hard fault handler]
The task pri id = 0x00000014 //任务优先级是20
SP = 0x200077d8 //当前任务的堆栈地址是0x2000 77D8
R0 = 0x2000dfa0
R1 = 0x68130000
R2 = 0x2000df9c
R3 = 0x20002100
R12 = 0x00000001
LR = 0x0801c7fb //分析得出,这个地址就是出错的地方
PC = 0x68130000
PSR = 0x00000000
此时需要借助map文件分析,map文件中得出对应的代码和数据位置。
tcpip_thread 0x0801c7ad Thumb Code 190 tcpip.o(i.tcpip_thread)
i.tcpsvr_accept_20 0x0801c874 Section 64 ftpmanage.o(i.tcpsvr_accept_20)
0x0801 c7fb应该在tcpip文件中的tciip_thread函数里。
T_LWIP_THREAD_STK 0x20007000 Data 2048 sys_arch.o(.bss)
rsuPib 0x20007800 Data 32 para.o(.bss)
堆栈空间是0x2000 77D8,是在T_LWIP_THREAD_STK这个栈空间里,这也是协议栈任务的堆栈空间,证明判断的任务优先级为20是正确的。
从0x0801 C7AD处开始的16进制文件如下图所示,再将汇编文件列出(需要KeiL生成)。
tcpip_thread PROC
;;;232 static void
;;;233 tcpip_thread(void *arg)
000000 b508 PUSH {r3,lr} //开始
;;;234 {
;;;235 struct tcpip_msg *msg;
;;;236 LWIP_UNUSED_ARG(arg);
;;;237
;;;238 #if IP_REASSEMBLY
;;;239 sys_timeout(IP_TMR_INTERVAL, ip_reass_timer, NULL);
;;;240 #endif /* IP_REASSEMBLY */
;;;241 #if LWIP_ARP
;;;242 sys_timeout(ARP_TMR_INTERVAL, arp_timer, NULL);
000002 2200 MOVS r2,#0
000004 492e LDR r1,|L11.192|
000006 f2413088 MOV r0,#0x1388
00000a f7fffffe BL sys_timeout
;;;243 #endif /* LWIP_ARP */
;;;244 #if LWIP_DHCP
;;;245 sys_timeout(DHCP_COARSE_TIMER_MSECS, dhcp_timer_coarse, NULL);
;;;246 sys_timeout(DHCP_FINE_TIMER_MSECS, dhcp_timer_fine, NULL);
;;;247 #endif /* LWIP_DHCP */
;;;248 #if LWIP_AUTOIP
;;;249 sys_timeout(AUTOIP_TMR_INTERVAL, autoip_timer, NULL);
;;;250 #endif /* LWIP_AUTOIP */
;;;251 #if LWIP_IGMP
;;;252 sys_timeout(IGMP_TMR_INTERVAL, igmp_timer, NULL);
;;;253 #endif /* LWIP_IGMP */
;;;254 #if LWIP_DNS
;;;255 sys_timeout(DNS_TMR_INTERVAL, dns_timer, NULL);
;;;256 #endif /* LWIP_DNS */
;;;257
;;;258 if (tcpip_init_done != NULL) {
00000e 482d LDR r0,|L11.196|
000010 6800 LDR r0,[r0,#0] ; tcpip_init_done
000012 b128 CBZ r0,|L11.32|
;;;259 tcpip_init_done(tcpip_init_done_arg);
000014 482b LDR r0,|L11.196|
000016 1d00 ADDS r0,r0,#4
000018 6800 LDR r0,[r0,#0] ; tcpip_init_done_arg
00001a 492a LDR r1,|L11.196|
00001c 6809 LDR r1,[r1,#0] ; tcpip_init_done
00001e 4788 BLX r1
|L11.32|
;;;260 }
;;;261
;;;262 LOCK_TCPIP_CORE();
;;;263 while (1) { /* MAIN Loop */
000020 e04c B |L11.188|
|L11.34|
;;;264 sys_mbox_fetch(mbox, (void *)&msg);
000022 4669 MOV r1,sp
000024 4827 LDR r0,|L11.196|
000026 1f00 SUBS r0,r0,#4
000028 6800 LDR r0,[r0,#0] ; mbox
00002a f7fffffe BL sys_mbox_fetch
;;;265 switch (msg->type) {
00002e 9800 LDR r0,[sp,#0]
000030 7800 LDRB r0,[r0,#0]
000032 2805 CMP r0,#5
000034 d240 BCS |L11.184|
000036 e8dff000 TBB [pc,r0]
00003a 030b DCB 0x03,0x0b
00003c 222b3500 DCB 0x22,0x2b,0x35,0x00
;;;266 #if LWIP_NETCONN
;;;267 case TCPIP_MSG_API:
;;;268 //if(msg->msg.apimsg->msg.conn == NULL)
;;;269 // break;
;;;270 LWIP_DEBUGF(TCPIP_DEBUG, ("tcpip_thread: API message %p\n", (void *)msg));
;;;271 msg->msg.apimsg->function(&(msg->msg.apimsg->msg));
000040 9a00 LDR r2,[sp,#0]
000042 6892 LDR r2,[r2,#8]
000044 1d10 ADDS r0,r2,#4
000046 9a00 LDR r2,[sp,#0]
000048 6892 LDR r2,[r2,#8]
00004a 6811 LDR r1,[r2,#0]
00004c 4788 BLX r1
;;;272 break;
00004e e034 B |L11.186| //0x0801 c7fb对应的代码
;;;273 #endif /* LWIP_NETCONN */
;;;274
;;;275 case TCPIP_MSG_INPKT:
;;;276 LWIP_DEBUGF(TCPIP_DEBUG, ("tcpip_thread: PACKET %p\n", (void *)msg));
;;;277 #if LWIP_ARP
;;;278 if (msg->msg.inp.netif->flags & NETIF_FLAG_ETHARP) {
000050 9800 LDR r0,[sp,#0]
从代码看地址对应是00004e e034 B |L11.186| ,即switch分支的break语句,但是实际应该是上面的那句,BLX r1,而此时R1的值是
R1 = 0x68130000,即跳转到6813 0000处执行,与在3.2的分析是一样的。
这也只能判断出出错的位置,原因还是需要仿真调试,才能找到。 |