21ic问答首页 - tcpip线程被mu0锁住导致网络线程无法使用问题
tcpip线程被mu0锁住导致网络线程无法使用问题 赏300家园币
各位好,我使用rtthread开发STM32F407VGT6芯片,程序有多个线程,每个线程都会创建一个socket,建立tcp连接或者udp连接,现在出现一个问题,程序长时间运行有概率死机,但是没有任何hardfault打印,我加入了cmbacktrace也没有输出任何信息,我在串口的msh里输入任何数据都不会相应,多次输入会提示接收字符缓存区满了。
后来根据检查发现,我的CAN接收发送线程和RS485接收发送线程没死,我在程序里添加了一个接收指定字符后,打印一些函数结果的代码,代码如下:
ch = uart_sample_get_char();
// LOG_D("ch = %d",ch);
if(ch == 0x01) {
mem_monitor2(NULL);
list_thread();
list_sem();
list_msgqueue();
list_mutex();
list_timer();
} else if(ch == 0x02) {
for(int i=0;i < 8; i ++) {
if(lock[i]->owner != RT_NULL) {
rt_kprintf("Mutex %s held by thread: %s\n",
lock[i]->parent.parent.name,
lock[i]->owner->name);
}else {
rt_kprintf("Mutex %s is free\n", lock[i]->parent.parent.name);
}
}
}
else if(ch == 0x03) {
msh_close_sockets();
}
这里的msh_close_sockets()作用是关闭所有的socket,函数的代码如下,:
// MSH 命令实现
void msh_close_sockets(void) {
for (int var = 0; var < 8; ++var) {
rt_mutex_take(lock[var], RT_WAITING_FOREVER);
if(sock[var] != server_cfg_num && sock[var] != server_execute_num && sock[var] != server_udp_num && sock[var] >= 0){
if(shutdown(sock[var], SHUT_RDWR) >= 0) {
if(closesocket(sock[var]) >= 0) {
LOG_D("===== close %d successs! =====",var);
sock[var] = -1;
} else {
LOG_D("===== close %d fail! =====",var);
}
} else {
LOG_D("===== shutdown %d fail! =====",var);
}
}
rt_mutex_release(lock[var]);
}
rt_pin_write(ETH_RESET, 0);
rt_thread_mdelay(1000);
rt_pin_write(ETH_RESET, 1);
}
根据测试发现,list_mutex()函数,打印了一些线程锁,我看到一个tcpip线程被一个mu0锁住了,后面有4个线程在等待mu0,我估计问题就是这个情况导致的,但是我不知道该怎么查了,请问有没有人能够提供一些帮助的。。。
后来根据检查发现,我的CAN接收发送线程和RS485接收发送线程没死,我在程序里添加了一个接收指定字符后,打印一些函数结果的代码,代码如下:
ch = uart_sample_get_char();
// LOG_D("ch = %d",ch);
if(ch == 0x01) {
mem_monitor2(NULL);
list_thread();
list_sem();
list_msgqueue();
list_mutex();
list_timer();
} else if(ch == 0x02) {
for(int i=0;i < 8; i ++) {
if(lock[i]->owner != RT_NULL) {
rt_kprintf("Mutex %s held by thread: %s\n",
lock[i]->parent.parent.name,
lock[i]->owner->name);
}else {
rt_kprintf("Mutex %s is free\n", lock[i]->parent.parent.name);
}
}
}
else if(ch == 0x03) {
msh_close_sockets();
}
这里的msh_close_sockets()作用是关闭所有的socket,函数的代码如下,:
// MSH 命令实现
void msh_close_sockets(void) {
for (int var = 0; var < 8; ++var) {
rt_mutex_take(lock[var], RT_WAITING_FOREVER);
if(sock[var] != server_cfg_num && sock[var] != server_execute_num && sock[var] != server_udp_num && sock[var] >= 0){
if(shutdown(sock[var], SHUT_RDWR) >= 0) {
if(closesocket(sock[var]) >= 0) {
LOG_D("===== close %d successs! =====",var);
sock[var] = -1;
} else {
LOG_D("===== close %d fail! =====",var);
}
} else {
LOG_D("===== shutdown %d fail! =====",var);
}
}
rt_mutex_release(lock[var]);
}
rt_pin_write(ETH_RESET, 0);
rt_thread_mdelay(1000);
rt_pin_write(ETH_RESET, 1);
}
根据测试发现,list_mutex()函数,打印了一些线程锁,我看到一个tcpip线程被一个mu0锁住了,后面有4个线程在等待mu0,我估计问题就是这个情况导致的,但是我不知道该怎么查了,请问有没有人能够提供一些帮助的。。。
赞0
rt_thread_t ss1_thread = rt_thread_create("ss1", sensor_task, (void*)1, 2048, 10, 200);
rt_thread_startup(ss1_thread);
rt_thread_t ss2_thread = rt_thread_create("ss2", sensor_task, (void*)2, 2048, 10, 200);
rt_thread_startup(ss2_thread);
rt_thread_t ss3_thread = rt_thread_create("ss3", sensor_task, (void*)3, 2048, 10, 200);
rt_thread_startup(ss3_thread);
rt_thread_t ss4_thread = rt_thread_create("ss4", sensor_task, (void*)4, 2048, 10, 200);
rt_thread_startup(ss4_thread);
现成方法sensor_task代码如下:
static void sensor_task(void *param){
m_msleep(2000);
do{
if(socket_init((int)param) == RET_OK){
parse_sensor_data((int)param);
}else{
m_msleep(3000);
}
}while(1);
}
socket_init代码如下:
ret_t socket_init(int id){
// LOG_I("socket init:%d", id);
rt_mutex_take(lock[id], RT_WAITING_FOREVER);
if(sock[id] >= 0){
closesocket(sock[id]);
sock[id] = -1;
}
int new_sock = socket(AF_INET, SOCK_STREAM, 0);
if (new_sock == -1)
{
LOG_E("Socket[%d] error\n", id);
// rt_kprintf("Socket[%d] create error\n", id);
rt_mutex_release(lock[id]);
return RET_FAIL;
}
struct timeval tv_out = {60, 0};
if(id >= 1 && id <= 4){
tv_out.tv_sec = 60;
}else{
tv_out.tv_sec = 10;
}
// if(id == IDX_LIGHT){
// tv_out.tv_sec = 3;
// }
setsockopt(new_sock, SOL_SOCKET, SO_RCVTIMEO, &tv_out, sizeof(tv_out));
tv_out.tv_sec = 3;
setsockopt(new_sock, SOL_SOCKET, SO_CONTIMEO, &tv_out, sizeof(tv_out));
int val = 1;
setsockopt(new_sock, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val));
// 设置 KeepAlive 时间(单位:秒)
int keepidle = 5; // 5秒后开始发送探测包
setsockopt(new_sock, IPPROTO_TCP, TCP_KEEPIDLE, &keepidle, sizeof(keepidle));
// 设置 KeepAlive 探测间隔
int keepintvl = 1; // 每 1秒 发送一次探测包
setsockopt(new_sock, IPPROTO_TCP, TCP_KEEPINTVL, &keepintvl, sizeof(keepintvl));
// 设置 KeepAlive 探测次数
int keepcnt = 1; // 最多 1 次探测包
setsockopt(new_sock, IPPROTO_TCP, TCP_KEEPCNT, &keepcnt, sizeof(keepcnt));
// 直接解析IP地址
struct sockaddr_in server_addr = {0};
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(port[id]);
// 关键:使用inet_pton替代getaddrinfo
if (inet_pton(AF_INET, url[id], &server_addr.sin_addr) <= 0) {
rt_kprintf("Invalid IP address: %s\n", url[id]);
closesocket(new_sock);
rt_mutex_release(lock[id]);
return RET_FAIL;
}
// 连接服务器
if (connect(new_sock, (struct sockaddr*)&server_addr, sizeof(server_addr)) == 0) {
sock[id] = new_sock;
rt_mutex_release(lock[id]);
return RET_OK;
} else {
closesocket(new_sock);
rt_mutex_release(lock[id]);
return RET_FAIL;
}
}
parse_sensor_data方法的话,就是调用recv,判断接收到的字节,如果≤0,就return RET_IO(丢失链接);接收到数据的话,就按照对应的数据发送一些信号量,或者修改一些自己的变量,没有动底层的东西。
评论
2025-03-12
您需要登录后才可以回复 登录 | 注册