• setsockopt的TCP层实现剖析


    应用层

    NAME

        setsockopt - set options on sockets

    SYNOPSIS

        #include <sys/types.h>

        #include <sys/socket.h>

        int setsockopt (int s, int level, int optname, const void *optval, socklen_t optlen);

    EXAMPLE

        自定义一个TCP层Socket选项:TCP_MAX_CWND。

        int one = 1;

        setsockopt(sockfd, SOL_TCP, TCP_MAX_CWND, &one, sizeof(one));

    来看一下通用的TCP层Socket选项:

    @netinet/tcp.h:

    /* User-settable options (used with setsockopt). */
    #define TCP_NODELAY    1            /* Don't delay send to coalesce packets */
    #define TCP_MAXSEG    2           /* Set Maximum segment size */
    #define TCP_CORK    3           /* Control sending of partial frames */
    #define TCP_KEEPIDLE    4        /* Start keepalives after this period */
    #define TCP_KEEPINTVL    5        /* Interval between keepalives */
    #define TCP_KEEPCNT    6        /* Number of keepalives before death */
    #define TCP_SYNCNT    7        /* Number of SYN retransmits */
    #define TCP_LINGER2    8        /* Life time of orphaned FIN_WAIT2 state */
    #define TCP_DEFER_ACCEPT    9        /* Wake up listener only when data arrive */ 
    #define TCP_WINDOW_CLAMP    10        /* Bound advertised window */
    #define TCP_INFO    11        /* 注意:这个选项不能用于设置,只用于读取。Information about this connection */
    #define TCP_QUICKACK    12        /* Bock/reenable quick ACKs */ 

    Linux除了支持以上通用的TCP层Socket选项,还支持一些它特有的选项(较新的版本中又多了一些:)

    @linux/tcp.h:

    /* TCP soket options */
    #define TCP_NODELAY    1        /* Turn off Nagle's algorithm. */
    #define TCP_MAXSEG    2        /* Limit MSS */
    #define TCP_CORK    3        /* Never send partially complete segments */
    #define TCP_KEEPIDLE    4        /* Start keepalives after this period */
    #define TCP_KEEPINTVL    5        /* Interval between keepalives */
    #define TCP_KEEPCNT    6        /* Number of keepalives before death */
    #define TCP_SYNCNT    7        /* Number of SYN retransmits */
    #define TCP_LINGER2    8        /* Life time of orphaned FIN_WAIT2 state */
    #define TCP_DEFER_ACCEPT 9        /* Wake up listener only when data arrive */
    #define TCP_WINDOW_CLAMP    10        /* Bound advertised window */
    #define TCP_INFO    11        /* Information about this connection. */
    #define TCP_QUICKACK    12        /* Block/reenable quick acks */
    #define TCP_CONGESTION    13        /* Congestion control algorithm */
    #define TCP_MD5SIG    14        /* TCP MD5 Signature (RFC2385) */
    #define TCP_COOKIE_TRANSACTIONS    15        /* TCP Cookie Transactions */
    #define TCP_THIN_LINEAR_TIMEOUTS    16        /* Use linear timeouts for thin streams */
    #define TCP_THIN_DUPACK    17        /* Fast retrans. after 1 dupack */
    

    函数关系

    函数调用关系图如下:

    数据结构

    struct proto tcp_prot = {
        .name = "TCP",
        .owner = THIS_MODULE,
        ...
        .setsockopt = tcp_setsockopt,
        .getsockopt = tcp_getsockopt,
        ...
    };
    

    函数实现

    int tcp_setsockopt (struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen)
    {
        struct inet_connection_sock *icsk = inet_csk(sk);
    
        if (level != SOL_TCP)
            return icsk->icsk_af_ops->setsockopt(sk, level, optname, optval, optlen);
    
        return do_tcp_setsockopt(sk, level, optname, optval, optlen);
    }
    static int do_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen)
    {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);
        int val;
        int err = 0;
    
        /* These are data/string values, all the others are ints */
        switch (optname) {
            /* 指定连接要使用的TCP拥塞控制算法 */
            case TCP_CONGESTION: { 
                char name[TCP_CA_NAME_MAX];
    
                if (optlen < 1)
                    return -EINVAL;
    
                val = strncpy_from_user(name, optval,  
                             min_t(long, TCP_CA_NAME_MAX - 1, optlen)); /*说明名字不宜超过15字节*/
    
                if (val < 0)
                    return -EFAULT;
                name[val] = 0;
    
                lock_sock(sk);
                err = tcp_set_congestion_control(sk, name);
                release_sock(sk);
    
                return err;
            }
    
            case TCP_COOKIE_TRANSACTIONS: {
                ...
            }
    
            default:
                break; /* fallthru */
        }
     
        if (optlen < sizeof(int))
            return -EINVAL; /* -22, Invalid argument */
    
        if (get_user(val, (int __user *) optval)) /* 获取用户空间的数据 */
            return -EFAULT; /* -14, Bad address */
    
        lock_sock(sk); 
    
        switch(optname) {
            case TCP_MAXSEG:
                /* Values greater than interface MTU won't take effect. However at the point
                 * when this call is done we typically don't yet know which interface is going to be used */
                if (val < TCP_MIN_MSS || val > MAX_TCP_WINDOW) { /* 必须在88 ~ 32767之间 */
                    err = -EINVAL;
                    break;
                }
                tp->rx_opt.user_mss = val; /*以后本端和对端的MSS都不会大于这个值了 */
                break;
            ...
            case TCP_WINDOW_CLAMP:
                if (! val) {
                    if (sk->sk_state != TCP_CLOSE) {
                        err = -EINVAL;
                        break;
                    }
                    tp->window_clamp = 0; /* tp->window_clamp: Maximal window to advertise */
    
                } else /* 最小的通告窗口:(2048 + sizeof(struct sk_buff)) / 2) */
                    tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? SOCK_MIN_RCVBUF/2 : val;
                break;
            ...
            case TCP_THIN_DUPACK:
                if (val < 0 || val > 1)
                    err = -EINVAL;
                else
                    tp->thin_dupack = val;
                break;
    
            case TCP_MAX_CWND:   //自定义选项
                if (val < 0)
                    err = -EINVAL;
                else
                    tp->snd_cwnd_clamp = val; /* change max value of snd_cwnd */
                break;
            ...
            default:
                err = -ENOPROTOOPT; /* -92, 协议无此选项 */
        }
    
        release_sock(sk);
        return err;
    }
    

    Author

    zhangskd @ csdn blog

  • 相关阅读:
    [ACM] POJ 3687 Labeling Balls (拓扑排序,反向生成端)
    xml和json选择奖
    android 如何分析java.lang.IllegalArgumentException: Cannot draw recycled bitmaps异常
    代码农民提高生产力
    &#39;Basic&#39; attribute type should not be a persistence entity/a container
    13 适配器
    密码学基础知识(四)分组密码
    PKCS #1 RSA Encryption Version 1.5 填充方式
    rsa加密--选择padding模式需要注意的问题。。。
    RSA PKCS1 填充方式
  • 原文地址:https://www.cnblogs.com/aiwz/p/6333344.html
Copyright © 2020-2023  润新知