转自:http://blog.chinaunix.net/uid-20662820-id-3371081.html
关于TCP connect 返回错误99,可以能大家都会遇到,这里就分析一下这个错误的真正含义:
基于内核2.6.32
应用层调用connect,对应的系统调用的套接口实现是inet_stream_connect,对应tcp协议对应的传输层接口是tcp_v4_connect, 这里就从这个函数作为入口,该函数定义在net/ipv4/tcp_ipv4.c文件中
点击(此处)折叠或打开
- int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
- {
- struct inet_sock *inet = inet_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
- struct rtable *rt;
- __be32 daddr, nexthop;
- int tmp;
- int err;
- if (addr_len < sizeof(struct sockaddr_in))
- return -EINVAL;
- if (usin->sin_family != AF_INET)
- return -EAFNOSUPPORT;
- nexthop = daddr = usin->sin_addr.s_addr;
- if (inet->opt && inet->opt->srr) {
- if (!daddr)
- return -EINVAL;
- nexthop = inet->opt->faddr;
- }
- tmp = ip_route_connect(&rt, nexthop, inet->saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_TCP,
- inet->sport, usin->sin_port, sk, 1);
- if (tmp < 0) {
- if (tmp == -ENETUNREACH)
- IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
- return tmp;
- }
- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
- ip_rt_put(rt);
- return -ENETUNREACH;
- }
- if (!inet->opt || !inet->opt->srr)
- daddr = rt->rt_dst;
- if (!inet->saddr)
- inet->saddr = rt->rt_src;
- inet->rcv_saddr = inet->saddr;
- if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
- /* Reset inherited state */
- tp->rx_opt.ts_recent = 0;
- tp->rx_opt.ts_recent_stamp = 0;
- tp->write_seq = 0;
- }
- if (tcp_death_row.sysctl_tw_recycle &&
- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
- struct inet_peer *peer = rt_get_peer(rt);
- /*
- * VJ's idea. We save last timestamp seen from
- * the destination in peer table, when entering state
- * TIME-WAIT * and initialize rx_opt.ts_recent from it,
- * when trying new connection.
- */
- if (peer != NULL &&
- peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
- tp->rx_opt.ts_recent = peer->tcp_ts;
- }
- }
- inet->dport = usin->sin_port;
- inet->daddr = daddr;
- inet_csk(sk)->icsk_ext_hdr_len = 0;
- if (inet->opt)
- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
- tp->rx_opt.mss_clamp = 536;
- /* Socket identity is still unknown (sport may be zero).
- * However we set state to SYN-SENT and not releasing socket
- * lock select source port, enter ourselves into the hash tables and
- * complete initialization after this.
- */
- tcp_set_state(sk, TCP_SYN_SENT);
- err = inet_hash_connect(&tcp_death_row, sk);
- if (err)
- goto failure;
- err = ip_route_newports(&rt, IPPROTO_TCP,
- inet->sport, inet->dport, sk);
- if (err)
- goto failure;
- /* OK, now commit destination to socket. */
- sk->sk_gso_type = SKB_GSO_TCPV4;
- sk_setup_caps(sk, &rt->u.dst);
- if (!tp->write_seq)
- tp->write_seq = secure_tcp_sequence_number(inet->saddr,
- inet->daddr,
- inet->sport,
- usin->sin_port);
- inet->id = tp->write_seq ^ jiffies;
- err = tcp_connect(sk);
- rt = NULL;
- if (err)
- goto failure;
- return 0;
- failure:
- /*
- * This unhashes the socket and releases the local port,
- * if necessary.
- */
- tcp_set_state(sk, TCP_CLOSE);
- ip_rt_put(rt);
- sk->sk_route_caps = 0;
- inet->dport = 0;
- return err;
- }
点击(此处)折叠或打开
- int inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk)
- {
- return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
- __inet_check_established, __inet_hash_nolisten);
- }
点击(此处)折叠或打开
- int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
- int (*check_established)(struct inet_timewait_death_row *,
- struct sock *, __u16, struct inet_timewait_sock **),
- void (*hash)(struct sock *sk))
- {
- struct inet_hashinfo *hinfo = death_row->hashinfo;
- const unsigned short snum = inet_sk(sk)->num;
- struct inet_bind_hashbucket *head;
- struct inet_bind_bucket *tb;
- int ret;
- struct net *net = sock_net(sk);
- if (!snum) {
- int i, remaining, low, high, port;
- static u32 hint;
- u32 offset = hint + port_offset;
- struct hlist_node *node;
- struct inet_timewait_sock *tw = NULL;
- inet_get_local_port_range(&low, &high);
- remaining = (high - low) + 1;
- local_bh_disable();
- for (i = 1; i <= remaining; i++) {
- port = low + (i + offset) % remaining;
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock(&head->lock);
- /* Does not bother with rcv_saddr checks,
- * because the established check is already
- * unique enough.
- */
- inet_bind_bucket_for_each(tb, node, &head->chain) {
- if (ib_net(tb) == net && tb->port == port) {
- if (tb->fastreuse >= 0)
- goto next_port;
- WARN_ON(hlist_empty(&tb->owners));
- if (!check_established(death_row, sk,
- port, &tw))
- goto ok;
- goto next_port;
- }
- }
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port);
- if (!tb) {
- spin_unlock(&head->lock);
- break;
- }
- tb->fastreuse = -1;
- goto ok;
- next_port:
- spin_unlock(&head->lock);
- }
- local_bh_enable();
- return -EADDRNOTAVAIL;
- ok:
- hint += i;
- /* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, port);
- if (sk_unhashed(sk)) {
- inet_sk(sk)->sport = htons(port);
- hash(sk);
- }
- spin_unlock(&head->lock);
- if (tw) {
- inet_twsk_deschedule(tw, death_row);
- inet_twsk_put(tw);
- }
- ret = 0;
- goto out;
- }
- head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- hash(sk);
- spin_unlock_bh(&head->lock);
- return 0;
- } else {
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
- ret = check_established(death_row, sk, snum, NULL);
- out:
- local_bh_enable();
- return ret;
- }
- }
这里分析一下这个函数实现:
调用inet_get_local_port_range(&low, &high) 获取可用的端口列表,这个值就是/proc/sys/net/ipv4/ip_local_port_range 中的值。
点击(此处)折叠或打开
- void inet_get_local_port_range(int *low, int *high)
- {
- unsigned seq;
- do {
- seq = read_seqbegin(&sysctl_local_ports.lock);
- *low = sysctl_local_ports.range[0];
- *high = sysctl_local_ports.range[1];
- } while (read_seqretry(&sysctl_local_ports.lock, seq));
- }
1. bind使用的端口
2. 端口处于非TIME_WAIT状态
3. 端口处于TIME_WAIT状态,但是没有启用tcp_tw_reuse
那么就会返回EADDRNOTAVAIL错误。
一般情况下,出现这个错误应该是代码设计的问题,如果确定代码没有问题,那么根据上面的原则,可用使用如下方法解决问题:
1. 增大可选端口的范围,修改/proc/sys/net/ipv4/ip_local_port_range的值。
2. 开启tcp_tw_reuse,允许使用TIME_WAIT状态的端口。