注:这部分还没有完全分析透彻,先在此记录,后面回顾的时候再进行补充;
启动定时器:
(1) 之前发送的数据段已经得到确认,新发出一个数据段之后设定;
(2) 新建连接发送syn之后设定;
(3) PMTU探测失败之后设定;
(4) 接收方丢弃SACK部分接收的段时设定;
定时器回调函数:
重传定时器超时回调,根据连接控制块中不同的事件类型来分别调用不同的函数进行处理,这里我们只关心ICSK_TIME_RETRANS类型(重传类型),重传细节会继续调用函数tcp_retransmit_timer进行下一步的处理;
1 /* Called with bottom-half processing disabled. 2 Called by tcp_write_timer() */ 3 void tcp_write_timer_handler(struct sock *sk) 4 { 5 struct inet_connection_sock *icsk = inet_csk(sk); 6 int event; 7 8 /* 连接处于CLOSE或者LISTEN状态或者 没有指定待处理事件类型 */ 9 if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || 10 !icsk->icsk_pending) 11 goto out; 12 13 /* 超时时间未到,则重新设置定时器超时时间 */ 14 if (time_after(icsk->icsk_timeout, jiffies)) { 15 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); 16 goto out; 17 } 18 19 /* 获取事件类型 */ 20 event = icsk->icsk_pending; 21 22 switch (event) { 23 case ICSK_TIME_REO_TIMEOUT: 24 tcp_rack_reo_timeout(sk); 25 break; 26 case ICSK_TIME_LOSS_PROBE: 27 tcp_send_loss_probe(sk); 28 break; 29 /* 重传事件 */ 30 case ICSK_TIME_RETRANS: 31 icsk->icsk_pending = 0; 32 tcp_retransmit_timer(sk); 33 break; 34 case ICSK_TIME_PROBE0: 35 icsk->icsk_pending = 0; 36 tcp_probe_timer(sk); 37 break; 38 } 39 40 out: 41 sk_mem_reclaim(sk); 42 }
tcp_retransmit_timer函数即为超时重传的核心函数,其根据不同的情况决定是否进行重传,并且调整重传次数和退避指数,设定下一次重传定时器等;
1 /** 2 * tcp_retransmit_timer() - The TCP retransmit timeout handler 3 * @sk: Pointer to the current socket. 4 * 5 * This function gets called when the kernel timer for a TCP packet 6 * of this socket expires. 7 * 8 * It handles retransmission, timer adjustment and other necesarry measures. 9 * 10 * Returns: Nothing (void) 11 */ 12 void tcp_retransmit_timer(struct sock *sk) 13 { 14 struct tcp_sock *tp = tcp_sk(sk); 15 struct net *net = sock_net(sk); 16 struct inet_connection_sock *icsk = inet_csk(sk); 17 18 /* fastopen请求控制块不为空 */ 19 if (tp->fastopen_rsk) { 20 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 21 sk->sk_state != TCP_FIN_WAIT1); 22 /* fastopen重传syn+ack */ 23 tcp_fastopen_synack_timer(sk); 24 /* Before we receive ACK to our SYN-ACK don't retransmit 25 * anything else (e.g., data or FIN segments). 26 */ 27 return; 28 } 29 30 /* 发送队列列出的段都已经得到确认 */ 31 if (!tp->packets_out) 32 goto out; 33 34 WARN_ON(tcp_write_queue_empty(sk)); 35 36 tp->tlp_high_seq = 0; 37 38 /* 39 对端窗口为0,套接口状态不是DEAD, 40 连接不是出于连接过程中的状态 41 */ 42 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && 43 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { 44 /* Receiver dastardly shrinks window. Our retransmits 45 * become zero probes, but we should not timeout this 46 * connection. If the socket is an orphan, time it out, 47 * we cannot allow such beasts to hang infinitely. 48 */ 49 struct inet_sock *inet = inet_sk(sk); 50 if (sk->sk_family == AF_INET) { 51 net_dbg_ratelimited("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired) ", 52 &inet->inet_daddr, 53 ntohs(inet->inet_dport), 54 inet->inet_num, 55 tp->snd_una, tp->snd_nxt); 56 } 57 #if IS_ENABLED(CONFIG_IPV6) 58 else if (sk->sk_family == AF_INET6) { 59 net_dbg_ratelimited("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired) ", 60 &sk->sk_v6_daddr, 61 ntohs(inet->inet_dport), 62 inet->inet_num, 63 tp->snd_una, tp->snd_nxt); 64 } 65 #endif 66 /* 接收时间已经超过了TCP_RTO_MAX,出错 */ 67 if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { 68 tcp_write_err(sk); 69 goto out; 70 } 71 72 /* 进入loss状态 */ 73 tcp_enter_loss(sk); 74 75 /* 发送重传队列的第一个数据段 */ 76 tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); 77 78 /* 重置路由缓存 */ 79 __sk_dst_reset(sk); 80 goto out_reset_timer; 81 } 82 83 /* 重传检查 */ 84 if (tcp_write_timeout(sk)) 85 goto out; 86 87 /* 重传次数为0,第一次进入重传 */ 88 if (icsk->icsk_retransmits == 0) { 89 int mib_idx; 90 91 /* 不同拥塞状态的数据统计 */ 92 93 if (icsk->icsk_ca_state == TCP_CA_Recovery) { 94 if (tcp_is_sack(tp)) 95 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; 96 else 97 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; 98 } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 99 mib_idx = LINUX_MIB_TCPLOSSFAILURES; 100 } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) || 101 tp->sacked_out) { 102 if (tcp_is_sack(tp)) 103 mib_idx = LINUX_MIB_TCPSACKFAILURES; 104 else 105 mib_idx = LINUX_MIB_TCPRENOFAILURES; 106 } else { 107 mib_idx = LINUX_MIB_TCPTIMEOUTS; 108 } 109 __NET_INC_STATS(sock_net(sk), mib_idx); 110 } 111 112 /* 进入loss阶段 */ 113 tcp_enter_loss(sk); 114 115 /* 发送重传队列的第一个数据段失败 */ 116 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { 117 /* Retransmission failed because of local congestion, 118 * do not backoff. 119 */ 120 /* 更新重传数 */ 121 if (!icsk->icsk_retransmits) 122 icsk->icsk_retransmits = 1; 123 124 /* 复位定时器,等待下次重传 */ 125 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 126 min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), 127 TCP_RTO_MAX); 128 goto out; 129 } 130 131 /* Increase the timeout each time we retransmit. Note that 132 * we do not increase the rtt estimate. rto is initialized 133 * from rtt, but increases here. Jacobson (SIGCOMM 88) suggests 134 * that doubling rto each time is the least we can get away with. 135 * In KA9Q, Karn uses this for the first few times, and then 136 * goes to quadratic. netBSD doubles, but only goes up to *64, 137 * and clamps at 1 to 64 sec afterwards. Note that 120 sec is 138 * defined in the protocol as the maximum possible RTT. I guess 139 * we'll have to use something other than TCP to talk to the 140 * University of Mars. 141 * 142 * PAWS allows us longer timeouts and large windows, so once 143 * implemented ftp to mars will work nicely. We will have to fix 144 * the 120 second clamps though! 145 */ 146 /* 递增退避指数和重传次数 */ 147 icsk->icsk_backoff++; 148 icsk->icsk_retransmits++; 149 150 out_reset_timer: 151 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is 152 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this 153 * might be increased if the stream oscillates between thin and thick, 154 * thus the old value might already be too high compared to the value 155 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without 156 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating 157 * exponential backoff behaviour to avoid continue hammering 158 * linear-timeout retransmissions into a black hole 159 */ 160 161 if (sk->sk_state == TCP_ESTABLISHED && 162 (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && 163 tcp_stream_is_thin(tp) && 164 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { 165 /* 退避指数清0 */ 166 icsk->icsk_backoff = 0; 167 /* 重传超时时间不变 */ 168 icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); 169 } else { 170 /* Use normal (exponential) backoff */ 171 /* 重传超时时间*2 */ 172 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 173 } 174 175 /* 复位定时器,等待下次重传 */ 176 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 177 178 /* 重传超时重置路由缓存 */ 179 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0, 0)) 180 __sk_dst_reset(sk); 181 182 out:; 183 }
tcp_write_timeout为重传超时情况的判断,函数根据不同情况,获取最大重传次数,并且通过该次数获取最大的超时时间,若发送时间超过了该最大超时时间,则断开连接;
1 /* A write timeout has occurred. Process the after effects. */ 2 static int tcp_write_timeout(struct sock *sk) 3 { 4 struct inet_connection_sock *icsk = inet_csk(sk); 5 struct tcp_sock *tp = tcp_sk(sk); 6 struct net *net = sock_net(sk); 7 int retry_until; 8 bool do_reset, syn_set = false; 9 10 /* 连接建立过程中 */ 11 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 12 /* 已经重传过 */ 13 if (icsk->icsk_retransmits) { 14 15 /* 更新路由缓存项 */ 16 dst_negative_advice(sk); 17 18 /* fastopen缓存 */ 19 if (tp->syn_fastopen || tp->syn_data) 20 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 21 if (tp->syn_data && icsk->icsk_retransmits == 1) 22 NET_INC_STATS(sock_net(sk), 23 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 24 } else if (!tp->syn_data && !tp->syn_fastopen) { 25 sk_rethink_txhash(sk); 26 } 27 28 /* 重传最大次数 */ 29 retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; 30 syn_set = true; 31 } else { 32 /* 重传次数超过retries1,黑洞? */ 33 if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0, 0)) { 34 /* Some middle-boxes may black-hole Fast Open _after_ 35 * the handshake. Therefore we conservatively disable 36 * Fast Open on this path on recurring timeouts after 37 * successful Fast Open. 38 */ 39 if (tp->syn_data_acked) { 40 tcp_fastopen_cache_set(sk, 0, NULL, true, 0); 41 if (icsk->icsk_retransmits == net->ipv4.sysctl_tcp_retries1) 42 NET_INC_STATS(sock_net(sk), 43 LINUX_MIB_TCPFASTOPENACTIVEFAIL); 44 } 45 /* Black hole detection */ 46 47 /* PMTU探测 */ 48 tcp_mtu_probing(icsk, sk); 49 50 /* 更新路由缓存 */ 51 dst_negative_advice(sk); 52 } else { 53 sk_rethink_txhash(sk); 54 } 55 56 /* 连接已建立重传次数 */ 57 retry_until = net->ipv4.sysctl_tcp_retries2; 58 59 /* 套接口在关闭状态 */ 60 if (sock_flag(sk, SOCK_DEAD)) { 61 62 /* rto < 最大值 */ 63 const bool alive = icsk->icsk_rto < TCP_RTO_MAX; 64 65 /* 获取重传次数 */ 66 retry_until = tcp_orphan_retries(sk, alive); 67 68 /* 连接超时判断 */ 69 do_reset = alive || 70 !retransmits_timed_out(sk, retry_until, 0, 0); 71 72 /* 孤儿socket超过资源限制 */ 73 if (tcp_out_of_resources(sk, do_reset)) 74 return 1; 75 } 76 } 77 78 /* 判断连接是否超时 */ 79 if (retransmits_timed_out(sk, retry_until, 80 syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) { 81 /* Has it gone just too far? */ 82 tcp_write_err(sk); 83 return 1; 84 } 85 return 0; 86 }
1 /** 2 * retransmits_timed_out() - returns true if this connection has timed out 3 * @sk: The current socket 4 * @boundary: max number of retransmissions 5 * @timeout: A custom timeout value. 6 * If set to 0 the default timeout is calculated and used. 7 * Using TCP_RTO_MIN and the number of unsuccessful retransmits. 8 * @syn_set: true if the SYN Bit was set. 9 * 10 * The default "timeout" value this function can calculate and use 11 * is equivalent to the timeout of a TCP Connection 12 * after "boundary" unsuccessful, exponentially backed-off 13 * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if 14 * syn_set flag is set. 15 * 16 */ 17 static bool retransmits_timed_out(struct sock *sk, 18 unsigned int boundary, 19 unsigned int timeout, 20 bool syn_set) 21 { 22 unsigned int linear_backoff_thresh, start_ts; 23 24 /* 设置基础超时时间 */ 25 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; 26 27 /* 未发生过重传 */ 28 if (!inet_csk(sk)->icsk_retransmits) 29 return false; 30 31 /* 开始时间设置为数据包发送时间戳 */ 32 start_ts = tcp_sk(sk)->retrans_stamp; 33 34 /* 开始时间为0,则设置为第一个sk的 */ 35 if (unlikely(!start_ts)) 36 start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); 37 38 /* syn包timeout为0,非syn包tcp_user_timeout为0 */ 39 if (likely(timeout == 0)) { 40 41 /* 指数退避次数 */ 42 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 43 44 /* 根据重传次数boudany计算超时时间 */ 45 if (boundary <= linear_backoff_thresh) 46 timeout = ((2 << boundary) - 1) * rto_base; 47 else 48 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + 49 (boundary - linear_backoff_thresh) * TCP_RTO_MAX; 50 } 51 52 /* 经过的时间是否超过了超时时间 */ 53 return (tcp_time_stamp - start_ts) >= timeout; 54 }