在调用close系统调用关闭套接字时,如果套接字引用计数已经归零,则需继续向上层调用其close实现,tcp为tcp_close;本文仅介绍tcp部分,前置部分请参考本博关于close系统调用的文章;
1 void tcp_close(struct sock *sk, long timeout) 2 { 3 struct sk_buff *skb; 4 int data_was_unread = 0; 5 int state; 6 7 lock_sock(sk); 8 sk->sk_shutdown = SHUTDOWN_MASK; 9 10 /* LISTEN状态处理 */ 11 if (sk->sk_state == TCP_LISTEN) { 12 /* 设置close状态 */ 13 tcp_set_state(sk, TCP_CLOSE); 14 15 /* Special case. */ 16 /* 清理完成连接队列 */ 17 inet_csk_listen_stop(sk); 18 19 goto adjudge_to_death; 20 } 21 22 /* We need to flush the recv. buffs. We do this only on the 23 * descriptor close, not protocol-sourced closes, because the 24 * reader process may not have drained the data yet! 25 */ 26 /* 删除接收队列中用户进程未读取的skb */ 27 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 28 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; 29 30 /* 减去fin的一个序号长度 */ 31 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 32 len--; 33 data_was_unread += len; 34 __kfree_skb(skb); 35 } 36 37 sk_mem_reclaim(sk); 38 39 /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ 40 /* CLOSE状态 */ 41 if (sk->sk_state == TCP_CLOSE) 42 goto adjudge_to_death; 43 44 /* As outlined in RFC 2525, section 2.17, we send a RST here because 45 * data was lost. To witness the awful effects of the old behavior of 46 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk 47 * GET in an FTP client, suspend the process, wait for the client to 48 * advertise a zero window, then kill -9 the FTP client, wheee... 49 * Note: timeout is always zero in such a case. 50 */ 51 /* 修复状态,断开连接 */ 52 if (unlikely(tcp_sk(sk)->repair)) { 53 sk->sk_prot->disconnect(sk, 0); 54 } 55 /* 用户进程有数据未读 */ 56 else if (data_was_unread) { 57 /* Unread data was tossed, zap the connection. */ 58 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); 59 60 /* 设置为close */ 61 tcp_set_state(sk, TCP_CLOSE); 62 63 /* 发送rst */ 64 tcp_send_active_reset(sk, sk->sk_allocation); 65 } 66 /* lingertime==0,断开连接 */ 67 else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 68 /* Check zero linger _after_ checking for unread data. */ 69 sk->sk_prot->disconnect(sk, 0); 70 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); 71 } 72 /* 关闭状态转移 */ 73 else if (tcp_close_state(sk)) { 74 /* We FIN if the application ate all the data before 75 * zapping the connection. 76 */ 77 78 /* RED-PEN. Formally speaking, we have broken TCP state 79 * machine. State transitions: 80 * 81 * TCP_ESTABLISHED -> TCP_FIN_WAIT1 82 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) 83 * TCP_CLOSE_WAIT -> TCP_LAST_ACK 84 * 85 * are legal only when FIN has been sent (i.e. in window), 86 * rather than queued out of window. Purists blame. 87 * 88 * F.e. "RFC state" is ESTABLISHED, 89 * if Linux state is FIN-WAIT-1, but FIN is still not sent. 90 * 91 * The visible declinations are that sometimes 92 * we enter time-wait state, when it is not required really 93 * (harmless), do not send active resets, when they are 94 * required by specs (TCP_ESTABLISHED, TCP_CLOSE_WAIT, when 95 * they look as CLOSING or LAST_ACK for Linux) 96 * Probably, I missed some more holelets. 97 * --ANK 98 * XXX (TFO) - To start off we don't support SYN+ACK+FIN 99 * in a single packet! (May consider it later but will 100 * probably need API support or TCP_CORK SYN-ACK until 101 * data is written and socket is closed.) 102 */ 103 /* 发送fin */ 104 tcp_send_fin(sk); 105 } 106 107 /* 等待关闭,状态为FIN_WAIT_1 CLOSING LAST_ACK或sk_lingertime超时 */ 108 sk_stream_wait_close(sk, timeout); 109 110 adjudge_to_death: 111 state = sk->sk_state; 112 sock_hold(sk); 113 114 /* 设置为DEAD状态 */ 115 sock_orphan(sk); 116 117 /* It is the last release_sock in its life. It will remove backlog. */ 118 /* 删除控制块的backlog、cb */ 119 release_sock(sk); 120 121 122 /* Now socket is owned by kernel and we acquire BH lock 123 to finish close. No need to check for user refs. 124 */ 125 local_bh_disable(); 126 bh_lock_sock(sk); 127 WARN_ON(sock_owned_by_user(sk)); 128 129 /* 增加孤儿计数 */ 130 percpu_counter_inc(sk->sk_prot->orphan_count); 131 132 /* Have we already been destroyed by a softirq or backlog? */ 133 /* 被软中断或者backlog销毁了????? */ 134 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) 135 goto out; 136 137 /* This is a (useful) BSD violating of the RFC. There is a 138 * problem with TCP as specified in that the other end could 139 * keep a socket open forever with no application left this end. 140 * We use a 1 minute timeout (about the same as BSD) then kill 141 * our end. If they send after that then tough - BUT: long enough 142 * that we won't make the old 4*rto = almost no time - whoops 143 * reset mistake. 144 * 145 * Nope, it was not mistake. It is really desired behaviour 146 * f.e. on http servers, when such sockets are useless, but 147 * consume significant resources. Let's do it with special 148 * linger2 option. --ANK 149 */ 150 151 if (sk->sk_state == TCP_FIN_WAIT2) { 152 struct tcp_sock *tp = tcp_sk(sk); 153 /* linger2小于0,无需等待 */ 154 if (tp->linger2 < 0) { 155 156 /* 转到CLOSE */ 157 tcp_set_state(sk, TCP_CLOSE); 158 /* 发送rst */ 159 tcp_send_active_reset(sk, GFP_ATOMIC); 160 __NET_INC_STATS(sock_net(sk), 161 LINUX_MIB_TCPABORTONLINGER); 162 } else { 163 164 /* 获取FIN_WAIT_2超时时间 */ 165 const int tmo = tcp_fin_time(sk); 166 167 /* FIN_WAIT_2超时时间> timewait时间,加FIN_WAIT_2定时器 */ 168 if (tmo > TCP_TIMEWAIT_LEN) { 169 inet_csk_reset_keepalive_timer(sk, 170 tmo - TCP_TIMEWAIT_LEN); 171 } 172 /* 小于TIME_WAIT时间,则进入TIME_WAIT */ 173 else { 174 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 175 goto out; 176 } 177 } 178 } 179 180 /* 未处于CLOSE */ 181 if (sk->sk_state != TCP_CLOSE) { 182 sk_mem_reclaim(sk); 183 184 /* 孤儿数量过多,或者socket内存过多 */ 185 if (tcp_check_oom(sk, 0)) { 186 /* 进入CLOSE */ 187 tcp_set_state(sk, TCP_CLOSE); 188 /* 发送rst */ 189 tcp_send_active_reset(sk, GFP_ATOMIC); 190 __NET_INC_STATS(sock_net(sk), 191 LINUX_MIB_TCPABORTONMEMORY); 192 } 193 } 194 195 /* 处于CLOSE */ 196 if (sk->sk_state == TCP_CLOSE) { 197 struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 198 /* We could get here with a non-NULL req if the socket is 199 * aborted (e.g., closed with unread data) before 3WHS 200 * finishes. 201 */ 202 if (req) 203 reqsk_fastopen_remove(sk, req, false); 204 205 /* 销毁控制块 */ 206 inet_csk_destroy_sock(sk); 207 } 208 /* Otherwise, socket is reprieved until protocol close. */ 209 210 out: 211 bh_unlock_sock(sk); 212 local_bh_enable(); 213 sock_put(sk); 214 }