• files_struct/fdtable解析


    files_struct/fdtable解析

    include/linux/fdtable.h

    /*
     * Open file table structure
     */
    struct files_struct {
      /*
       * read mostly part
       */
        atomic_t count;
        bool resize_in_progress;
        wait_queue_head_t resize_wait;
    
        struct fdtable __rcu *fdt;
        struct fdtable fdtab;
      /*
       * written part on a separate cache line in SMP
       */
        spinlock_t file_lock ____cacheline_aligned_in_smp;
        unsigned int next_fd;
        unsigned long close_on_exec_init[1];
        unsigned long open_fds_init[1];
        unsigned long full_fds_bits_init[1];
        struct file __rcu * fd_array[NR_OPEN_DEFAULT];
    };

    上述files_struct中最关键的成员是struct fdtable的fdt指针

    对于小进程fork时父进程open的文件数小于NR_OPEN_DEFAULT,则fd table会直接使用files_struct里的;如果超过NR_OPEN_DEFAULT,则不会使用files_struct里的,会调用alloc_fdtable()进行分配fd table。

    然后将父进程的fd table拷贝到新fork的进程的fd table。

    NR_OPEN_DEFAULT定义为BITS_PER_LONG,一般为64。对于较大进程fork子进程时,父进程此时一般已经open了较多file,比如超过了64,此时就会alloc fd table,而不会使用files_struct里的default fd table;对于小进程fork子进程,此时一般就直接用了files_struct里default fd table。

    fs/file.c

    /*
     * Allocate a new files structure and copy contents from the
     * passed in files structure.
     * errorp will be valid only when the returned files_struct is NULL.
     */
    struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
    {
        struct files_struct *newf;
        struct file **old_fds, **new_fds;
        unsigned int open_files, i;
        struct fdtable *old_fdt, *new_fdt;
    
        *errorp = -ENOMEM;
        newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
        if (!newf)
            goto out;
    
        atomic_set(&newf->count, 1);
    
        spin_lock_init(&newf->file_lock);
        newf->resize_in_progress = false;
        init_waitqueue_head(&newf->resize_wait);
        newf->next_fd = 0;
        new_fdt = &newf->fdtab;
        new_fdt->max_fds = NR_OPEN_DEFAULT;
        new_fdt->close_on_exec = newf->close_on_exec_init;
        new_fdt->open_fds = newf->open_fds_init;
        new_fdt->full_fds_bits = newf->full_fds_bits_init;
        new_fdt->fd = &newf->fd_array[0];
    
        spin_lock(&oldf->file_lock);
        old_fdt = files_fdtable(oldf);
        open_files = sane_fdtable_size(old_fdt, max_fds);
    
        /*
         * Check whether we need to allocate a larger fd array and fd set.
         */
        while (unlikely(open_files > new_fdt->max_fds)) {
            spin_unlock(&oldf->file_lock);
    
            if (new_fdt != &newf->fdtab)
                __free_fdtable(new_fdt);
    
            new_fdt = alloc_fdtable(open_files - 1);
            if (!new_fdt) {
                *errorp = -ENOMEM;
                goto out_release;
            }
    
            /* beyond sysctl_nr_open; nothing to do */
            if (unlikely(new_fdt->max_fds < open_files)) {
                __free_fdtable(new_fdt);
                *errorp = -EMFILE;
                goto out_release;
            }
    
            /*
             * Reacquire the oldf lock and a pointer to its fd table
             * who knows it may have a new bigger fd table. We need
             * the latest pointer.
             */
            spin_lock(&oldf->file_lock);
            old_fdt = files_fdtable(oldf);
            open_files = sane_fdtable_size(old_fdt, max_fds);
        }
    
        copy_fd_bitmaps(new_fdt, old_fdt, open_files);
    
        old_fds = old_fdt->fd;
        new_fds = new_fdt->fd;
    
        for (i = open_files; i != 0; i--) {
            struct file *f = *old_fds++;
            if (f) {
                get_file(f);
            } else {
                /*
                 * The fd may be claimed in the fd bitmap but not yet
                 * instantiated in the files array if a sibling thread
                 * is partway through open().  So make sure that this
                 * fd is available to the new process.
                 */
                __clear_open_fd(open_files - i, new_fdt);
            }
            rcu_assign_pointer(*new_fds++, f);
        }
        spin_unlock(&oldf->file_lock);
    
        /* clear the remainder */
        memset(new_fds, 0, (new_fdt->max_fds - open_files) * sizeof(struct file *));
    
        rcu_assign_pointer(newf->fdt, new_fdt);
    
        return newf;
    
    out_release:
        kmem_cache_free(files_cachep, newf);
    out:
        return NULL;
    }

    fd table里的内容

    include/linux/fdtable.h

    struct fdtable {
        unsigned int max_fds;
        struct file __rcu **fd;      /* current fd array */
        unsigned long *close_on_exec;
        unsigned long *open_fds;
        unsigned long *full_fds_bits;
        struct rcu_head rcu;
    };

    max_fds,表示此fd table最多能容纳多少个fd;

    struct file的二重指针fd,这个是指向一个数组,这个数组里的元素是struct file *指针;

    open_fds,long型指针,指向一个long型数组,数组中的每个元素的每一个bit代表一个文件,如果这个bit为1,表示此文件已open;

    close_on_exec,和open_fds功能一样,含义不同;

    full_fds_bits,long型数组,每一个bit代表open_fds里每个元素所有bit是否都为1,如果都为1,这个bit置上;只要有一个不为1,这个bit将被clear;

    根据allc_fdtable(),可以看到open_fds/close_on_exec/full_fds_bits数组是一块申请分配的,内存layout顺序依次是open_fds/close_on_exec/full_fds_bits

    alloc_fdtable()

    fs/file.c

    static struct fdtable * alloc_fdtable(unsigned int nr)
    {
        struct fdtable *fdt;
        void *data;
    
        /*
         * Figure out how many fds we actually want to support in this fdtable.
         * Allocation steps are keyed to the size of the fdarray, since it
         * grows far faster than any of the other dynamic data. We try to fit
         * the fdarray into comfortable page-tuned chunks: starting at 1024B
         * and growing in powers of two from there on.
         */
        nr /= (1024 / sizeof(struct file *));
        nr = roundup_pow_of_two(nr + 1);
        nr *= (1024 / sizeof(struct file *));
        /*
         * Note that this can drive nr *below* what we had passed if sysctl_nr_open
         * had been set lower between the check in expand_files() and here.  Deal
         * with that in caller, it's cheaper that way.
         *
         * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
         * bitmaps handling below becomes unpleasant, to put it mildly...
         */
        if (unlikely(nr > sysctl_nr_open))
            nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
    
        fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
        if (!fdt)
            goto out;
        fdt->max_fds = nr;
        data = kvmalloc_array(nr, sizeof(struct file *), GFP_KERNEL_ACCOUNT);
        if (!data)
            goto out_fdt;
        fdt->fd = data;
    
        data = kvmalloc(max_t(size_t,
                     2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES),
                     GFP_KERNEL_ACCOUNT);
        if (!data)
            goto out_arr;
        fdt->open_fds = data;
        data += nr / BITS_PER_BYTE;
        fdt->close_on_exec = data;
        data += nr / BITS_PER_BYTE;
        fdt->full_fds_bits = data;
    
        return fdt;
    
    out_arr:
        kvfree(fdt->fd);
    out_fdt:
        kfree(fdt);
    out:
        return NULL;
    }

    重点看下上述kvmalloc(),这个分配的大小是2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr),nr表示此fd table要容纳多少个fd,nr/BITS_PER_BYTE,一个bit表示一个fd,所以这个表示容纳nr个fd需要多少个byte;

    *2是因为有open_fds和close_on_exec两个大小一样的数组;

    BITBIT_SIZE(nr),这个宏定义如下。假设nr为65*64,则BITBIT_SIZE(65*64)的结果为2*8,即为两个long型,这两个long型的每个bit为1表示open_fds里一个元素(long型)所有bit均为1

    #define BITBIT_NR(nr)    BITS_TO_LONGS(BITS_TO_LONGS(nr))
    #define BITBIT_SIZE(nr)    (BITBIT_NR(nr) * sizeof(long))
  • 相关阅读:
    pyspark使用及原理
    谷歌地图聚合点使用(GoogleMaps MarkerCluster)
    struts2升级到2.5的配置
    二进制反码补码
    进制转换
    JAVA基础第一章
    TypeError: unhashable type: 'list' 如何解决?
    剪辑模板
    Mysql三天入门(三) 数据库设计的三范式
    Mysql三天入门(三) DBA命令
  • 原文地址:https://www.cnblogs.com/aspirs/p/15442832.html
Copyright © 2020-2023  润新知