• Linux内核Socket实现之------Socket创建(2) 文件描述符


    转载请注明:http://blog.chinaunix.net/uid-20788636-id-4408276.html

    1.2 sock_map_fd函数

             在用户空间创建了一个socket后,返回值是一个文件描述符,下面分析一下创建socket时怎么和文件描述符联系的。在SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)最后调用sock_map_fd进行关联,其中返回的retval就是用户空间获取的文件描述符fd,sock就是调用sock_create创建成功的socket.

             sock_map_fd()主要用于对socket的*file指针初始化,经过sock_map_fd()操作后,socket就通过其*file指针与VFS管理的文件进行了关联,便可以进行文件的各种操作,如read、write、lseek、ioctl等. 

    retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));

    static int sock_map_fd(struct socket *sock, int flags)

    {

             struct file *newfile;

             int fd = get_unused_fd_flags(flags);//根据flags获取没有使用的fd,具体分析见1.2.1

             if (unlikely(fd < 0))

                       return fd;

             newfile = sock_alloc_file(sock, flags, NULL);

             if (likely(!IS_ERR(newfile))) {

                       fd_install(fd, newfile);

                       return fd;

             }

             put_unused_fd(fd);

             return PTR_ERR(newfile);

    }

    1.2.1   get_unused_fd_flags函数

             get_unused_fd_flags()函数调用__alloc_fd分配一个新的可用的fd

    int __alloc_fd(struct files_struct *files,

                    unsigned start, unsigned end, unsigned flags)

    {

             unsigned int fd;

             int error;

             struct fdtable *fdt;

             spin_lock(&files->file_lock);

    repeat:

    /*得到本进程的文件描述符表*/

             fdt = files_fdtable(files);

             fd = start;//从start开始,这里的start为0

    /* files->next_fd为上一次查找确定的下一个可用空闲的文件描述符,这样可以提高获取的效率,如果fd小于files->next_fd的话就可以直接使用next_fd */

             if (fd < files->next_fd)

                       fd = files->next_fd;

    /*当fd小于目前进程支持的最大的描述符号,那么可以通过fds_bits位图,从fd位开始查找,找到下一个0位,即下一个空闲描述符。*/

             if (fd < fdt->max_fds)

                       fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);

             /*

              * N.B. For clone tasks sharing a files structure, this test

              * will limit the total number of files that can be opened.

              */

             error = -EMFILE;

             if (fd >= end)

                       goto out;

    /* 如需要则扩展文件描述符表 */

             error = expand_files(files, fd);

             if (error < 0)

                       goto out;

             /*

              * If we needed to expand the fs array we

              * might have blocked - try again.

              */

             if (error)

                       goto repeat;

        /* 

         设置next_fd,用于下次加速查找空闲的fd。

         当start大于next_fd时,不会设置next_fd以避免文件描述符的不连续

         */

             if (start <= files->next_fd)

                       files->next_fd = fd + 1;

      /* 将fd添加到已打开的文件描述符表中 */

             __set_open_fd(fd, fdt);

             if (flags & O_CLOEXEC)

                       __set_close_on_exec(fd, fdt);

             else

                       __clear_close_on_exec(fd, fdt);

             error = fd;

    #if 1

             /* Sanity check */

             if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {

                       printk(KERN_WARNING "alloc_fd: slot %d not NULL! ", fd);

                       rcu_assign_pointer(fdt->fd[fd], NULL);

             }

    #endif

    out:

             spin_unlock(&files->file_lock);

             return error;

    }

    1.2.2 sock_alloc_file函数

    struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)

    {

             struct qstr name = { .name = "" };

             struct path path;

             struct file *file;

             if (dname) {//这里的dname为空

                       name.name = dname;

                       name.len = strlen(name.name);

             } else if (sock->sk) {

     /*这里的name应该是TCP 根据struct proto tcp_prot */

                       name.name = sock->sk->sk_prot_creator->name;

                       name.len = strlen(name.name);

             }

    /*申请一个新的dentry,其中sock_mnt->mnt_sb在前面已经分析过了,是一个sock_fs_type文件系统挂载点,*/

             path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);

             if (unlikely(!path.dentry))

                       return ERR_PTR(-ENOMEM);

             path.mnt = mntget(sock_mnt);

    /*将文件操作的函数绑定到inode,对于dentry是在sockfs_mount函数中sockfs_dentry_operations,该函数在sock_init是调用,在前面有分析 */

             d_instantiate(path.dentry, SOCK_INODE(sock));

             SOCK_INODE(sock)->i_fop = &socket_file_ops;

    /*申请新的file,将path,file,关联起来*/

             file = alloc_file(&path, FMODE_READ | FMODE_WRITE,

                         &socket_file_ops);

             if (unlikely(IS_ERR(file))) {

                       /* drop dentry, keep inode */

                       ihold(path.dentry->d_inode);

                       path_put(&path);

                       return file;

             }

             sock->file = file;//sock->file和刚分配的file关联起来

             file->f_flags = O_RDWR | (flags & O_NONBLOCK);//设置file的标志

             file->private_data = sock;//file的私有数据指针指向sock.

             return file;

    }

    Socket创建流程图

    附录:对于sk_alloc分配的内存大小问题分析

             在分析中经常看到此种类型的强制转换inet = inet_sk(sk);,其中inet被定义为struct inet_sock *inet;结构体,我们看结构体的定义sock结构体的大小小于struct inet_sock,这样是无法进行强制类型转换的,但在实际分配的过程中sock分配的大小为tcp_sock的大小,而该结构足够大。

    struct sock *sk_alloc(struct net *net, int family, gfp_t priority,

                             struct proto *prot)

    {

             struct sock *sk;

             sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);

             if (sk) {

                       sk->sk_family = family;

                       /*

                        * See comment in struct sock definition to understand

                        * why we need sk_prot_creator -acme

                        */

                       sk->sk_prot = sk->sk_prot_creator = prot;

                       sock_lock_init(sk);

                       sock_net_set(sk, get_net(net));

                       atomic_set(&sk->sk_wmem_alloc, 1);

                       sock_update_classid(sk);

                       sock_update_netprioidx(sk);

             }

             return sk;

    }

    static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,

                       int family)

    {

             struct sock *sk;

             struct kmem_cache *slab;

        /*这里分配内存空间时,分为两种情况,第一种情况是从高速缓存上分配,第二种是普通的分配*/

             slab = prot->slab;

             if (slab != NULL) {

                      sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);---------------------(1)

                       if (!sk)

                                return sk;

                       if (priority & __GFP_ZERO) {

                                if (prot->clear_sk)

                                         prot->clear_sk(sk, prot->obj_size);

                                else

                                         sk_prot_clear_nulls(sk, prot->obj_size);

                       }

             } else

                       sk = kmalloc(prot->obj_size, priority);---------------------------(2)

             if (sk != NULL) {

                       kmemcheck_annotate_bitfield(sk, flags);

                       if (security_sk_alloc(sk, family, priority))

                                goto out_free;

                       if (!try_module_get(prot->owner))

                                goto out_free_sec;

                       sk_tx_queue_clear(sk);

             }

             return sk;

    out_free_sec:

             security_sk_free(sk);

    out_free:

             if (slab != NULL)

                       kmem_cache_free(slab, sk);

             else

                       kfree(sk);

             return NULL;

    }

    (1)第一种情况:sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO) 这里的slap等于slab = prot->slab;也就是函数传递过来的struct proto *prot,再看一下这个结构体是怎么定义的?在inet_create函数中sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);,这里的answer_prot为answer_prot = answer->prot;在看一下answer->prot是如何来的?

             在inet_ctreate函数中通过遍历inetsw数组获取到struct inet_protosw *answer;

    list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {

                       err = 0;

                       /* Check the non-wild match. */

                       if (protocol == answer->protocol) {

                                if (protocol != IPPROTO_IP)

                                         break;

                       } else {

                                /* Check for the two wild cases. */

                                if (IPPROTO_IP == protocol) {

                                         protocol = answer->protocol;

                                         break;

                                }

                                if (IPPROTO_IP == answer->protocol)

                                         break;

                       }

                       err = -EPROTONOSUPPORT;

             }

             其中inetsw的定义下面类型的数组,如果是SOCK_STREAM类型的socket,这里的prot = tcp_prot

    static struct inet_protosw inetsw_array[] =

    {

             {

                       .type =       SOCK_STREAM,

                       .protocol =   IPPROTO_TCP,

                      .prot =       &tcp_prot,

                       .ops =        &inet_stream_ops,

                       .no_check =   0,

                       .flags =      INET_PROTOSW_PERMANENT |

                                      INET_PROTOSW_ICSK,

             },

             {

                       .type =       SOCK_DGRAM,

                       .protocol =   IPPROTO_UDP,

                       .prot =       &udp_prot,

                       .ops =        &inet_dgram_ops,

                       .no_check =   UDP_CSUM_DEFAULT,

                       .flags =      INET_PROTOSW_PERMANENT,

           },

           {

                       .type =       SOCK_DGRAM,

                       .protocol =   IPPROTO_ICMP,

                       .prot =       &ping_prot,

                       .ops =        &inet_dgram_ops,

                       .no_check =   UDP_CSUM_DEFAULT,

                       .flags =      INET_PROTOSW_REUSE,

           },

           {

                    .type =       SOCK_RAW,

                    .protocol =   IPPROTO_IP,       /* wild card */

                    .prot =       &raw_prot,

                    .ops =        &inet_sockraw_ops,

                    .no_check =   UDP_CSUM_DEFAULT,

                    .flags =      INET_PROTOSW_REUSE,

           }

    };

             再看一下

    struct proto tcp_prot = {

             .name                         = "TCP",

             .owner                        = THIS_MODULE,

             .close                          = tcp_close,

             .connect            = tcp_v4_connect,

             .disconnect                = tcp_disconnect,

             .accept                       = inet_csk_accept,

             .ioctl                            = tcp_ioctl,

             .init                     = tcp_v4_init_sock,

             .destroy            = tcp_v4_destroy_sock,

             .shutdown                 = tcp_shutdown,

             .setsockopt               = tcp_setsockopt,

             .getsockopt               = tcp_getsockopt,

             .recvmsg           = tcp_recvmsg,

             .sendmsg                   = tcp_sendmsg,

             .sendpage                  = tcp_sendpage,

             .backlog_rcv              = tcp_v4_do_rcv,

             .release_cb               = tcp_release_cb,

             .mtu_reduced          = tcp_v4_mtu_reduced,

             .hash                           = inet_hash,

             .unhash                      = inet_unhash,

             .get_port          = inet_csk_get_port,

             .enter_memory_pressure       = tcp_enter_memory_pressure,

             .stream_memory_free    = tcp_stream_memory_free,

             .sockets_allocated  = &tcp_sockets_allocated,

             .orphan_count                   = &tcp_orphan_count,

             .memory_allocated = &tcp_memory_allocated,

             .memory_pressure = &tcp_memory_pressure,

             .sysctl_mem             = sysctl_tcp_mem,

             .sysctl_wmem          = sysctl_tcp_wmem,

             .sysctl_rmem            = sysctl_tcp_rmem,

             .max_header            = MAX_TCP_HEADER,

             .obj_size           = sizeof(struct tcp_sock),

             .slab_flags                 = SLAB_DESTROY_BY_RCU,

             .twsk_prot                 = &tcp_timewait_sock_ops,

             .rsk_prot           = &tcp_request_sock_ops,

             .h.hashinfo                = &tcp_hashinfo,

             .no_autobind            = true,

    #ifdef CONFIG_COMPAT

             .compat_setsockopt        = compat_tcp_setsockopt,

             .compat_getsockopt        = compat_tcp_getsockopt,

    #endif

    #ifdef CONFIG_MEMCG_KMEM

             .init_cgroup               = tcp_init_cgroup,

             .destroy_cgroup                = tcp_destroy_cgroup,

             .proto_cgroup          = tcp_proto_cgroup,

    #endif

    };

             在af_inet.c文件中的inet_init函数中的

    static int __init inet_init(void)

    {

             struct inet_protosw *q;

             struct list_head *r;

             int rc = -EINVAL;

             BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));

             sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);

             if (!sysctl_local_reserved_ports)

                       goto out;

        //该函数是注册tcp_prot,在该函数中对tcp_prot->slab进行内存分配

             rc = proto_register(&tcp_prot, 1);

             if (rc)

                       goto out_free_reserved_ports;

             rc = proto_register(&udp_prot, 1);

             if (rc)

                       goto out_unregister_tcp_proto;

             rc = proto_register(&raw_prot, 1);

             if (rc)

                       goto out_unregister_udp_proto;

             rc = proto_register(&ping_prot, 1);

             if (rc)

                       goto out_unregister_raw_proto;

             /*

              *     Tell SOCKET that we are alive...

              */

             (void)sock_register(&inet_family_ops);

    #ifdef CONFIG_SYSCTL

             ip_static_sysctl_init();

    #endif

             /*

              *     Add all the base protocols.

              */

             if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)

                       pr_crit("%s: Cannot add ICMP protocol ", __func__);

             if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)

                       pr_crit("%s: Cannot add UDP protocol ", __func__);

             if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)

                       pr_crit("%s: Cannot add TCP protocol ", __func__);

    #ifdef CONFIG_IP_MULTICAST

             if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)

                       pr_crit("%s: Cannot add IGMP protocol ", __func__);

    #endif

             /* Register the socket-side information for inet_create. 对inetsw进行初始化操作*/

             for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)

                       INIT_LIST_HEAD(r);

    /*将inetsw_array 加入到对于的inetsw链表中,就可以在inet_create 函数中进行遍历*/

             for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)

                       inet_register_protosw(q);

             /*

              *     Set the ARP module up

              */

             arp_init();

             /*

              *     Set the IP module up

              */

             ip_init();

             tcp_v4_init();

             /* Setup TCP slab cache for open requests. */

             tcp_init();

             /* Setup UDP memory threshold */

             udp_init();

             /* Add UDP-Lite (RFC 3828) */

             udplite4_register();

             ping_init();

             /*

              *     Set the ICMP layer up

              */

             if (icmp_init() < 0)

                       panic("Failed to create the ICMP control socket. ");

             /*

              *     Initialise the multicast router

              */

    #if defined(CONFIG_IP_MROUTE)

             if (ip_mr_init())

                       pr_crit("%s: Cannot init ipv4 mroute ", __func__);

    #endif

             /*

              *     Initialise per-cpu ipv4 mibs

              */

             if (init_ipv4_mibs())

                       pr_crit("%s: Cannot init ipv4 mibs ", __func__);

             ipv4_proc_init();

             ipfrag_init();

             dev_add_pack(&ip_packet_type);

             rc = 0;

    out:

             return rc;

    out_unregister_raw_proto:

             proto_unregister(&raw_prot);

    out_unregister_udp_proto:

             proto_unregister(&udp_prot);

    out_unregister_tcp_proto:

             proto_unregister(&tcp_prot);

    out_free_reserved_ports:

             kfree(sysctl_local_reserved_ports);

             goto out;

    }

             在proto_register函数中,主要是关注prot->slab进行了初始化。

    int proto_register(struct proto *prot, int alloc_slab)

    {

             if (alloc_slab) {

                       prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,

                                                   SLAB_HWCACHE_ALIGN | prot->slab_flags,

                                                   NULL);// 这里的饿prot->obj_size为.obj_size               = sizeof(struct tcp_sock),

                       if (prot->slab == NULL) {

                                pr_crit("%s: Can't create sock SLAB cache! ",

                                         prot->name);

                                goto out;

                       }

    ……………………..

    }

    (2)对于第二种情况,主要prot->obj_size,就是struct proto tcp_prot 中初始化的.obj_size            = sizeof(struct tcp_sock)。sk = kmalloc(prot->obj_size, priority);---------------------------(2)

             下面是五个相关的数据结构,tcp_sock结构体占用的空间是最大的,所以在分配内存空间时,都是分配的tcp_sock的大小,这样在后面进行强制转换的过程中可以保证正确。

  • 相关阅读:
    聊天的时间显示
    Android下Affinities和Task
    android Notification 的使用
    Android Notification使用及取消
    类似微信发图片的样式
    Delphi---TServerSocket和TClientSocket发送和接收大数据包
    使用拷贝文件测试(BufferedInputStream,FileInputStream)
    android-getTextSize返回值是以像素(px)为单位的,setTextSize()以sp为单位
    怎样成为PHP 方向的一个合格的架构师
    mac 查看某个文件夹下所有隐藏文件(夹)的大小
  • 原文地址:https://www.cnblogs.com/feng9exe/p/7001308.html
Copyright © 2020-2023  润新知