概述
tcp_rcv_established用于处理已连接状态下的输入,处理过程根据首部预测字段分为快速路径和慢速路径;
1. 在快路中,对是有有数据负荷进行不同处理:
(1) 若无数据,则处理输入ack,释放该skb,检查是否有数据发送,有则发送;
(2) 若有数据,检查是否当前处理进程上下文,并且是期望读取的数据,若是则将数据复制到用户空间,若不满足直接复制到用户空间的情况,或者复制失败,则需要将数据段加入到接收队列中,加入方式包括合并到已有数据段,或者加入队列尾部,并唤醒用户进程通知有数据可读;
2. 在慢路中,会进行更详细的校验,然后处理ack,处理紧急数据,接收数据段,其中数据段可能包含乱序的情况,最后进行是否有数据和ack的发送检查;
源码分析
1 he first three cases are guaranteed by proper pred_flags setting, 2 * the rest is checked inline. Fast processing is turned on in 3 * tcp_data_queue when everything is OK. 4 */ 5 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, 6 const struct tcphdr *th, unsigned int len) 7 { 8 struct tcp_sock *tp = tcp_sk(sk); 9 10 skb_mstamp_get(&tp->tcp_mstamp); 11 /* 路由为空,则重新设置路由 */ 12 if (unlikely(!sk->sk_rx_dst)) 13 inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); 14 /* 15 * Header prediction. 16 * The code loosely follows the one in the famous 17 * "30 instruction TCP receive" Van Jacobson mail. 18 * 19 * Van's trick is to deposit buffers into socket queue 20 * on a device interrupt, to call tcp_recv function 21 * on the receive process context and checksum and copy 22 * the buffer to user space. smart... 23 * 24 * Our current scheme is not silly either but we take the 25 * extra cost of the net_bh soft interrupt processing... 26 * We do checksum and copy also but from device to kernel. 27 */ 28 29 tp->rx_opt.saw_tstamp = 0; 30 31 /* pred_flags is 0xS?10 << 16 + snd_wnd 32 * if header_prediction is to be made 33 * 'S' will always be tp->tcp_header_len >> 2 34 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to 35 * turn it off (when there are holes in the receive 36 * space for instance) 37 * PSH flag is ignored. 38 */ 39 40 /* 快路检查&& 序号正确 && ack序号正确 */ 41 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && 42 TCP_SKB_CB(skb)->seq == tp->rcv_nxt && 43 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { 44 /* tcp头部长度 */ 45 int tcp_header_len = tp->tcp_header_len; 46 47 /* Timestamp header prediction: tcp_header_len 48 * is automatically equal to th->doff*4 due to pred_flags 49 * match. 50 */ 51 52 /* Check timestamp */ 53 /* 有时间戳选项 */ 54 if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { 55 /* No? Slow path! */ 56 /* 解析时间戳选项失败,执行慢路 */ 57 if (!tcp_parse_aligned_timestamp(tp, th)) 58 goto slow_path; 59 60 /* If PAWS failed, check it more carefully in slow path */ 61 /* 序号回转,执行慢路 */ 62 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) 63 goto slow_path; 64 65 /* DO NOT update ts_recent here, if checksum fails 66 * and timestamp was corrupted part, it will result 67 * in a hung connection since we will drop all 68 * future packets due to the PAWS test. 69 */ 70 } 71 72 /* 无数据 */ 73 if (len <= tcp_header_len) { 74 /* Bulk data transfer: sender */ 75 if (len == tcp_header_len) { 76 /* Predicted packet is in window by definition. 77 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 78 * Hence, check seq<=rcv_wup reduces to: 79 */ 80 /* 81 有时间戳选项 82 && 所有接收的数据段均确认完毕 83 保存时间戳 84 */ 85 if (tcp_header_len == 86 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && 87 tp->rcv_nxt == tp->rcv_wup) 88 tcp_store_ts_recent(tp); 89 90 /* We know that such packets are checksummed 91 * on entry. 92 */ 93 /* 输入ack处理 */ 94 tcp_ack(sk, skb, 0); 95 /* 释放skb */ 96 __kfree_skb(skb); 97 98 /* 检查是否有数据要发送,并检查发送缓冲区大小 */ 99 tcp_data_snd_check(sk); 100 return; 101 } 102 /* 数据多小,比头部都小,错包 */ 103 else { /* Header too small */ 104 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 105 goto discard; 106 } 107 } 108 /* 有数据 */ 109 else { 110 int eaten = 0; 111 bool fragstolen = false; 112 113 /* 读取进程上下文 */ 114 if (tp->ucopy.task == current && 115 /* 期待读取的和期待接收的序号一致 */ 116 tp->copied_seq == tp->rcv_nxt && 117 /* 数据<= 待读取长度 */ 118 len - tcp_header_len <= tp->ucopy.len && 119 /* 控制块被用户空间锁定 */ 120 sock_owned_by_user(sk)) { 121 122 /* 设置状态为running??? */ 123 __set_current_state(TASK_RUNNING); 124 125 /* 拷贝数据到msghdr */ 126 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { 127 /* Predicted packet is in window by definition. 128 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 129 * Hence, check seq<=rcv_wup reduces to: 130 */ 131 /* 有时间戳选项&& 收到的数据段均已确认,更新时间戳 */ 132 if (tcp_header_len == 133 (sizeof(struct tcphdr) + 134 TCPOLEN_TSTAMP_ALIGNED) && 135 tp->rcv_nxt == tp->rcv_wup) 136 tcp_store_ts_recent(tp); 137 138 /* 接收端RTT估算 */ 139 tcp_rcv_rtt_measure_ts(sk, skb); 140 141 __skb_pull(skb, tcp_header_len); 142 143 /* 更新期望接收的序号 */ 144 tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); 145 NET_INC_STATS(sock_net(sk), 146 LINUX_MIB_TCPHPHITSTOUSER); 147 eaten = 1; 148 } 149 } 150 151 /* 未拷贝数据到用户空间,或者拷贝失败 */ 152 if (!eaten) { 153 /* 检查校验和 */ 154 if (tcp_checksum_complete(skb)) 155 goto csum_error; 156 157 /* skb长度> 预分配长度 */ 158 if ((int)skb->truesize > sk->sk_forward_alloc) 159 goto step5; 160 161 /* Predicted packet is in window by definition. 162 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 163 * Hence, check seq<=rcv_wup reduces to: 164 */ 165 /* 有时间戳选项,且数据均已确认完毕,则更新时间戳 */ 166 if (tcp_header_len == 167 (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && 168 tp->rcv_nxt == tp->rcv_wup) 169 tcp_store_ts_recent(tp); 170 171 /* 计算RTT */ 172 tcp_rcv_rtt_measure_ts(sk, skb); 173 174 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); 175 176 /* Bulk data transfer: receiver */ 177 /* 数据加入接收队列 */ 178 eaten = tcp_queue_rcv(sk, skb, tcp_header_len, 179 &fragstolen); 180 } 181 182 tcp_event_data_recv(sk, skb); 183 184 /* 确认序号确认了数据 */ 185 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { 186 /* Well, only one small jumplet in fast path... */ 187 /* 处理ack */ 188 tcp_ack(sk, skb, FLAG_DATA); 189 /* 检查是否有数据要发送,需要则发送 */ 190 tcp_data_snd_check(sk); 191 /* 没有ack要发送 */ 192 if (!inet_csk_ack_scheduled(sk)) 193 goto no_ack; 194 } 195 196 /* 检查是否有ack要发送,需要则发送 */ 197 __tcp_ack_snd_check(sk, 0); 198 no_ack: 199 /* skb已经复制到用户空间,则释放之 */ 200 if (eaten) 201 kfree_skb_partial(skb, fragstolen); 202 203 /* 唤醒用户进程有数据读取 */ 204 sk->sk_data_ready(sk); 205 return; 206 } 207 } 208 209 slow_path: 210 /* 长度错误|| 校验和错误 */ 211 if (len < (th->doff << 2) || tcp_checksum_complete(skb)) 212 goto csum_error; 213 214 /* 无ack,无rst,无syn */ 215 if (!th->ack && !th->rst && !th->syn) 216 goto discard; 217 218 /* 219 * Standard slow path. 220 */ 221 /* 种种校验 */ 222 if (!tcp_validate_incoming(sk, skb, th, 1)) 223 return; 224 225 step5: 226 /* 处理ack */ 227 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) 228 goto discard; 229 230 /* 计算rtt */ 231 tcp_rcv_rtt_measure_ts(sk, skb); 232 233 /* Process urgent data. */ 234 /* 处理紧急数据 */ 235 tcp_urg(sk, skb, th); 236 237 /* step 7: process the segment text */ 238 /* 数据段处理 */ 239 tcp_data_queue(sk, skb); 240 241 /* 发送数据检查,有则发送 */ 242 tcp_data_snd_check(sk); 243 244 /* 发送ack检查,有则发送 */ 245 tcp_ack_snd_check(sk); 246 return; 247 248 csum_error: 249 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 250 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 251 252 discard: 253 tcp_drop(sk, skb); 254 }