在用户进程启用了保活定时器的情况下,如果连接超过空闲时间没有数据交互,则保活定时器超时,向对端发送保活探测包,若(1)收到回复则说明对端工作正常,重置定时器等下下次达到空闲时间;(2) 收到其他回复,则确定对端已重启,关闭连接;(3) 超过探测次数仍未得到回复,则认为对端主机已经崩溃,关闭连接;
启动定时器:
用户进程可以通过socket的SO_KEEPALIVE选项来开启或关闭保活定时器探测,TCP最终会调用tcp_set_keepalive来实现保活定期的开启与关闭;
1 int sock_setsockopt(struct socket *sock, int level, int optname, 2 char __user *optval, unsigned int optlen) 3 { 4 struct sock *sk = sock->sk; 5 int val; 6 int valbool; 7 struct linger ling; 8 int ret = 0; 9 10 valbool = val ? 1 : 0; 11 12 lock_sock(sk); 13 14 switch (optname) { 15 case SO_KEEPALIVE: 16 if (sk->sk_prot->keepalive) 17 sk->sk_prot->keepalive(sk, valbool); 18 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); 19 break; 20 } 21 }
1 struct proto tcp_prot = { 2 .name = "TCP", 3 /* 省略部分字段 */ 4 .keepalive = tcp_set_keepalive, 5 /* 省略部分字段 */ 6 }
1 void tcp_set_keepalive(struct sock *sk, int val) 2 { 3 if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) 4 return; 5 6 if (val && !sock_flag(sk, SOCK_KEEPOPEN)) 7 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); 8 else if (!val) 9 inet_csk_delete_keepalive_timer(sk); 10 }
定时器回调函数:
tcp_keepalive_timer函数为保活定时器和FIN_WAIT_2定时器共用,我们这里只关注保活部分;函数执行必要的状态检查,之后对空闲时间和配置空闲时间阈值进行判断,在超过阈值的情况下,若未超过探测次数和用户配置超时时间,则发送探测包,否则关闭连接;
1 static void tcp_keepalive_timer (unsigned long data) 2 { 3 struct sock *sk = (struct sock *) data; 4 struct inet_connection_sock *icsk = inet_csk(sk); 5 struct tcp_sock *tp = tcp_sk(sk); 6 u32 elapsed; 7 8 /* Only process if socket is not in use. */ 9 bh_lock_sock(sk); 10 11 /* 传输控制块被用户进程锁定 */ 12 if (sock_owned_by_user(sk)) { 13 /* Try again later. */ 14 /* 重置定时器 */ 15 inet_csk_reset_keepalive_timer (sk, HZ/20); 16 goto out; 17 } 18 19 /* 连接处于LISTEN状态,退出 */ 20 if (sk->sk_state == TCP_LISTEN) { 21 pr_err("Hmm... keepalive on a LISTEN ??? "); 22 goto out; 23 } 24 25 /* 处于fin_wait2且socket即将销毁,用作FIN_WAIT_2定时器 */ 26 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { 27 28 /* 停留在FIN_WAIT_2的停留时间>=0 */ 29 if (tp->linger2 >= 0) { 30 /* 获取在FIN_WAIT_2的剩余时间 */ 31 const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; 32 33 /* 有剩余时间则调用FIN_WAIT_2定时器 */ 34 if (tmo > 0) { 35 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 36 goto out; 37 } 38 } 39 40 /* 发送rst */ 41 tcp_send_active_reset(sk, GFP_ATOMIC); 42 goto death; 43 } 44 45 /* 未启用保活|| 状态处于关闭或者发送syn状态,退出 */ 46 if (!sock_flag(sk, SOCK_KEEPOPEN) || 47 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) 48 goto out; 49 50 /* 获取设定的连接空闲时间 */ 51 elapsed = keepalive_time_when(tp); 52 53 /* It is alive without keepalive 8) */ 54 /* 有发送未确认的包或者还有待发送的包,不是空闲状态 */ 55 if (tp->packets_out || tcp_send_head(sk)) 56 goto resched; 57 58 /* 从上次收到包到现在的空闲时间 */ 59 elapsed = keepalive_time_elapsed(tp); 60 61 /* 连接空闲时间超过设定值 */ 62 if (elapsed >= keepalive_time_when(tp)) { 63 /* If the TCP_USER_TIMEOUT option is enabled, use that 64 * to determine when to timeout instead. 65 */ 66 /* 67 设置了用户超时,空闲时间达到用户超时时间,已发送过探测 68 未设置用户超时,探测次数达到了保活最大探测次数 69 则发送rst关闭连接 70 */ 71 if ((icsk->icsk_user_timeout != 0 && 72 elapsed >= icsk->icsk_user_timeout && 73 icsk->icsk_probes_out > 0) || 74 (icsk->icsk_user_timeout == 0 && 75 icsk->icsk_probes_out >= keepalive_probes(tp))) { 76 /* 发送rst */ 77 tcp_send_active_reset(sk, GFP_ATOMIC); 78 79 /* 关闭连接 */ 80 tcp_write_err(sk); 81 goto out; 82 } 83 84 /* 发送保活探测包 */ 85 if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) { 86 /* 探测次数增加 */ 87 icsk->icsk_probes_out++; 88 /* 下一次探测时间 */ 89 elapsed = keepalive_intvl_when(tp); 90 } else { 91 /* If keepalive was lost due to local congestion, 92 * try harder. 93 */ 94 /* 本地拥塞导致的失败,则重置定时器 */ 95 elapsed = TCP_RESOURCE_PROBE_INTERVAL; 96 } 97 } else { 98 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */ 99 /* 未超过空闲时间,则计算将要达到空闲的时间 */ 100 elapsed = keepalive_time_when(tp) - elapsed; 101 } 102 103 sk_mem_reclaim(sk); 104 105 resched: 106 /* 重置定时器 */ 107 inet_csk_reset_keepalive_timer (sk, elapsed); 108 goto out; 109 110 death: 111 tcp_done(sk); 112 113 out: 114 bh_unlock_sock(sk); 115 sock_put(sk); 116 }