• linux源码解读(二十):网络通信简介——socket&sock结构体介绍


      linux下的网络编程离不开socket,中文被翻译为套接字。任何网络通信都必须先建立socket,再通过socket给对方收发数据!数据接受的demo代码如下:

    #include <string.h>
    #include <sys/socket.h>
    #include <sys/types.h>
    #define SET_PORT 3490
    int main(void)
    {
        int sockfd, new_fd;
        struct sockaddr_in my_addr;
        struct sockaddr_in their_addr;
        int sin_size;
        sockfd = socket(PF_INET, SOCK_STREAM, 0);
        my_addr.sin_family = AF_INET;
        my_addr.sin_port = htons(_INT_PORT);
        my_addr.sin_addr.s_addr = INADDR_ANY;
        bzero(&(my_addr.sin_zero),sizeof(my_addr.sin_zero));
        bind(sockfd, (struct sockaddr *)&my_addr,sizeof(struct sockaddr));// 绑定套接字
        listen(sockfd, 10);                                                     // 监听套接字
        sin_size = sizeof(struct sockaddr_in);
        new_fd = accept(sockfd, &their_addr, &sin_size);                        // 接收套接字
    }

          可以看出,需要先调用socket函数建立socket,再绑定套接字,最后监听和接受数据。 这个socket到底是啥?linux在内核中又是怎么使用的了?

          1、(1)socket是个结构体,字段不多,但是嵌套了其他结构体,各种嵌套的关系标识如下:

    •   proto_ops:用户层调用的各种接口就是在这里注册的(篇幅有限,截图的字段不全)
    •        wq:等待该socket的进程队列和异步通知队列;换句话说:同一个socket可能有多个进程都在等待使用!
    •        sock:应该是socket结构体最核心的嵌套结构体了(篇幅有限,截图的字段不全)!

           

           (2)socket结构体有了,接下来就是创建和初始化了!linux内核创建socket的函数是__sock_create,核心代码如下:

    int __sock_create(struct net *net, int family, int type, int protocol,
                 struct socket **res, int kern)
    {
        int err;
        struct socket *sock;
        const struct net_proto_family *pf;
            .........
    
         /*
         *    Allocate the socket and allow the family to set things up. if
         *    the protocol is 0, the family is instructed to select an appropriate
         *    default.
            本质:创建socket结构体,存放在inode,通过superblock统一检索和管理
         */
        sock = sock_alloc();
            .........
            /*socket就是在这里创建的,实际调用的是inet_create
              af_inet.c文件中:
              static const struct net_proto_family inet_family_ops = {
                   .family = PF_INET,
                   .create = inet_create,
                   .owner    = THIS_MODULE,
        };*/
        err = pf->create(net, sock, protocol, kern);
        ..................
    }    

            创建socket的核心函数就2个:sock_alloc,还有pf->create!先看第一个sock_alloc,代码如下:

    /**
     *    sock_alloc    -    allocate a socket
     *
     *    Allocate a new inode and socket object. The two are bound together
     *    and initialised. The socket is then returned. If we are out of inodes
     *    NULL is returned.
        明明是申请socket,底层却分配inode,这是为啥了?
        1、socket也需要管理,放在inode后通过super_bloc统一检索和管理
        2、socket的属性字段自然也存放在inode节点了
        3、符合万物皆文件的理念
     */
    
    struct socket *sock_alloc(void)
    {
        struct inode *inode;
        struct socket *sock;
        //从超级块里分配一个inode
        inode = new_inode_pseudo(sock_mnt->mnt_sb);
        if (!inode)
            return NULL;
        /*把inode和socket绑定在一起,通过inode寻址socket,便于管理*/
        sock = SOCKET_I(inode);
    
        kmemcheck_annotate_bitfield(sock, type);//标记shadow memory来表示这块内存已经使用了
        inode->i_ino = get_next_ino();
        inode->i_mode = S_IFSOCK | S_IRWXUGO;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
        inode->i_op = &sockfs_inode_ops;
    
        this_cpu_add(sockets_in_use, 1);
        return sock;
    }

      本质上就是分配一个inode,然后和socket结构体绑定,通过inode寻址socket结构体!socket结构体有了,接下来就是在socket内部嵌套的sock结构体了!其生成和初始化的工作都是在inet_create内部完成的,代码如下:

    static int inet_create(struct net *net, struct socket *sock, int protocol,
                   int kern)
    {
        struct sock *sk;
        struct inet_protosw *answer;
        struct inet_sock *inet;
        struct proto *answer_prot;
        unsigned char answer_flags;
        int try_loading_module = 0;
        int err;
    
        if (protocol < 0 || protocol >= IPPROTO_MAX)
            return -EINVAL;
    
        sock->state = SS_UNCONNECTED;//初始化状态当然设置成未连接了
    
        /* Look for the requested type/protocol pair. */
    lookup_protocol:
        err = -ESOCKTNOSUPPORT;
        rcu_read_lock();
        list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
    
            err = 0;
            /* Check the non-wild match. */
            if (protocol == answer->protocol) {
                if (protocol != IPPROTO_IP)
                    break;
            } else {
                /* Check for the two wild cases. */
                if (IPPROTO_IP == protocol) {
                    protocol = answer->protocol;
                    break;
                }
                if (IPPROTO_IP == answer->protocol)
                    break;
            }
            err = -EPROTONOSUPPORT;
        }
    
        if (unlikely(err)) {
            if (try_loading_module < 2) {
                rcu_read_unlock();
                /*
                 * Be more specific, e.g. net-pf-2-proto-132-type-1
                 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
                 */
                if (++try_loading_module == 1)
                    request_module("net-pf-%d-proto-%d-type-%d",
                               PF_INET, protocol, sock->type);
                /*
                 * Fall back to generic, e.g. net-pf-2-proto-132
                 * (net-pf-PF_INET-proto-IPPROTO_SCTP)
                 */
                else
                    request_module("net-pf-%d-proto-%d",
                               PF_INET, protocol);
                goto lookup_protocol;
            } else
                goto out_rcu_unlock;
        }
    
        err = -EPERM;
        if (sock->type == SOCK_RAW && !kern &&
            !ns_capable(net->user_ns, CAP_NET_RAW))
            goto out_rcu_unlock;
    
        sock->ops = answer->ops;
        answer_prot = answer->prot;
        answer_flags = answer->flags;
        rcu_read_unlock();
    
        WARN_ON(!answer_prot->slab);
    
        err = -ENOBUFS;
        /*从cpu缓存或堆内存分配空间存储sock实例,并初始化*/
        sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
        if (!sk)
            goto out;
    
        err = 0;
        if (INET_PROTOSW_REUSE & answer_flags)
            sk->sk_reuse = SK_CAN_REUSE;
        /*
        1、强制转换成inet_sock类型,便于继续初始化;
        2、inet和sk指针并未改变,指向的是同一块内存地址,两个指针可以同时使用
        */
        inet = inet_sk(sk);
        inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
    
        inet->nodefrag = 0;
    
        if (SOCK_RAW == sock->type) {
            inet->inet_num = protocol;
            if (IPPROTO_RAW == protocol)
                inet->hdrincl = 1;
        }
    
        if (net->ipv4.sysctl_ip_no_pmtu_disc)
            inet->pmtudisc = IP_PMTUDISC_DONT;
        else
            inet->pmtudisc = IP_PMTUDISC_WANT;
    
        inet->inet_id = 0;
        /*
        1、初始化sk_buff的读、写、错误队列
        2、关联socket和sock的实例
        3、定义sock的回调函数
        4、初始化其他sock字段
        */
        sock_init_data(sock, sk);
    
        sk->sk_destruct       = inet_sock_destruct;//析构时的回调函数
        sk->sk_protocol       = protocol;//协议类型
        sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
        //sk和inet交替使用来初始化
        inet->uc_ttl    = -1;
        inet->mc_loop    = 1;
        inet->mc_ttl    = 1;
        inet->mc_all    = 1;
        inet->mc_index    = 0;
        inet->mc_list    = NULL;
        inet->rcv_tos    = 0;
    
        sk_refcnt_debug_inc(sk);//引用计数+1
    
        if (inet->inet_num) {
            /* It assumes that any protocol which allows
             * the user to assign a number at socket
             * creation time automatically
             * shares.
             */
            inet->inet_sport = htons(inet->inet_num);
            /* Add to protocol hash chains. */
            err = sk->sk_prot->hash(sk);
            if (err) {
                sk_common_release(sk);
                goto out;
            }
        }
    
        if (sk->sk_prot->init) {
            err = sk->sk_prot->init(sk);
            if (err)
                sk_common_release(sk);
        }
    out:
        return err;
    out_rcu_unlock:
        rcu_read_unlock();
        goto out;
    }

      整个逻辑并不复杂,先是调用sk_alloc函数生成sock实例,再调用sock_init_data初始化sock实力,并和socket实例关联,所以我个人认为sock_init_data是最核心的函数,如下:

    /*
    1、初始化sk_buff的读、写、错误队列
    2、关联socket和sock的实例
    3、定义sock的回调函数
    4、初始化其他sock字段
    */
    void sock_init_data(struct socket *sock, struct sock *sk)
    {
        /*初始化sk_buff的读写、错误队列*/
        skb_queue_head_init(&sk->sk_receive_queue);
        skb_queue_head_init(&sk->sk_write_queue);
        skb_queue_head_init(&sk->sk_error_queue);
    
        sk->sk_send_head    =    NULL;
        //初始化定时器
        init_timer(&sk->sk_timer);
    
        sk->sk_allocation    =    GFP_KERNEL;
        sk->sk_rcvbuf        =    sysctl_rmem_default;
        sk->sk_sndbuf        =    sysctl_wmem_default;
        sk->sk_state        =    TCP_CLOSE;
        //这里终于把socket和sock实例关联起来了
        sk_set_socket(sk, sock);
    
        sock_set_flag(sk, SOCK_ZAPPED);
    
        if (sock) {
            sk->sk_type    =    sock->type;
            sk->sk_wq    =    sock->wq;
            sock->sk    =    sk;
        } else
            sk->sk_wq    =    NULL;
    
        rwlock_init(&sk->sk_callback_lock);
        lockdep_set_class_and_name(&sk->sk_callback_lock,
                af_callback_keys + sk->sk_family,
                af_family_clock_key_strings[sk->sk_family]);
    
        sk->sk_state_change    =    sock_def_wakeup;//状态改变后的回调函数
        sk->sk_data_ready    =    sock_def_readable;//有数据可读的回调函数
        sk->sk_write_space    =    sock_def_write_space;//有缓存可写的回调函数
        sk->sk_error_report    =    sock_def_error_report;//发生io错误时的回调函数
        sk->sk_destruct        =    sock_def_destruct;
    
        sk->sk_frag.page    =    NULL;
        sk->sk_frag.offset    =    0;
        sk->sk_peek_off        =    -1;
    
        sk->sk_peer_pid     =    NULL;
        sk->sk_peer_cred    =    NULL;
        sk->sk_write_pending    =    0;
        sk->sk_rcvlowat        =    1;
        sk->sk_rcvtimeo        =    MAX_SCHEDULE_TIMEOUT;
        sk->sk_sndtimeo        =    MAX_SCHEDULE_TIMEOUT;
    
        sk->sk_stamp = ktime_set(-1L, 0);
    
    #ifdef CONFIG_NET_RX_BUSY_POLL
        sk->sk_napi_id        =    0;
        sk->sk_ll_usec        =    sysctl_net_busy_read;
    #endif
    
        sk->sk_max_pacing_rate = ~0U;
        sk->sk_pacing_rate = ~0U;
        sk->sk_incoming_cpu = -1;
        /*
         * Before updating sk_refcnt, we must commit prior changes to memory
         * (Documentation/RCU/rculist_nulls.txt for details)
         */
        smp_wmb();
        atomic_set(&sk->sk_refcnt, 1);
        atomic_set(&sk->sk_drops, 0);
    }

       上面有几个回调函数,其实实现的逻辑的代码结构基本是一样的:

    /*
     *    Default Socket Callbacks
     当sock的状态发生改变时,会调用此函数来进行处理
     */
    
    static void sock_def_wakeup(struct sock *sk)
    {
        struct socket_wq *wq;
    
        rcu_read_lock();
        wq = rcu_dereference(sk->sk_wq);
        if (skwq_has_sleeper(wq))//有进程阻塞在这个socket
        //唤醒所有在等待这个socket的进程,核心就是执行进程唤醒的回调函数
            wake_up_interruptible_all(&wq->wait);
        rcu_read_unlock();
    }
    /*sock有输入数据可读时,会调用此函数来处理*/
    static void sock_def_readable(struct sock *sk)
    {
        struct socket_wq *wq;
    
        rcu_read_lock();
        wq = rcu_dereference(sk->sk_wq);
        if (skwq_has_sleeper(wq))
            /* 唤醒等待数据的进程,核心还是执行回调函数 */
            wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
                            POLLRDNORM | POLLRDBAND);
        /* 异步通知队列的处理。
         * 检查应用程序是否通过recv()类调用来等待接收数据,如果没有就发送SIGIO信号,
         * 告知它有数据可读。
         * how为函数的处理方式,band为用来告知的IO类型。
         */
        sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
        rcu_read_unlock();
    }

      当有可读数据的时候,肯定第一时间通知相应的进程来读取数据,核心是通过sk_wake_async函数实现的;而sk_wake_async最终调用了kill_fasync_rcu来给排队等待的队列发出SIGIO信号,通知这些队列中的进程来取数据了!异步的好处在这里就凸显了:进程不用在这里空转等数据,而是可以释放cpu去执行其他进程的代码;等socket有数据后再通过类似中断的形式通知等待的进程来取数据了

    /*
     * rcu_read_lock() is held
       函数名有kill,但实际是向队列的进程发送SIGIO信号
     */
    static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
    {
        while (fa) {
            struct fown_struct *fown;
            unsigned long flags;
    
            if (fa->magic != FASYNC_MAGIC) {
                printk(KERN_ERR "kill_fasync: bad magic number in "
                       "fasync_struct!\n");
                return;
            }
            spin_lock_irqsave(&fa->fa_lock, flags);
            if (fa->fa_file) {
                fown = &fa->fa_file->f_owner;
                /* Don't send SIGURG to processes which have not set a
                   queued signum: SIGURG has its own default signalling
                   mechanism. */
                if (!(sig == SIGURG && fown->signum == 0))
                    send_sigio(fown, fa->fa_fd, band);
            }
            spin_unlock_irqrestore(&fa->fa_lock, flags);
            fa = rcu_dereference(fa->fa_next);
        }
    }
  • 相关阅读:
    【BZOJ3110】K大数查询(权值线段树套线段树+标记永久化,整体二分)
    【BZOJ3669】魔法森林(LCT)
    art-template前端高性能模板
    spring新心得
    工作流程
    idea操作
    log4j学习
    对实体 "useSSL" 的引用必须以 ';' 分隔符结尾。
    JUnit4学习
    maven搭建
  • 原文地址:https://www.cnblogs.com/theseventhson/p/15859467.html
Copyright © 2020-2023  润新知