• PROC文件系统


    1. seq_file

    参考: http://blog.chinaunix.net/uid-26084833-id-1754437.html

    seq_file的结构体定义:

       1: struct seq_file {
       2:     char *buf;
       3:     size_t size;
       4:     size_t from;
       5:     size_t count;
       6:     loff_t index;
       7:     loff_t read_pos;
       8:     u64 version;
       9:     struct mutex lock;
      10:     const struct seq_operations *op;
      11:     void *private;
      12: };

    seq_operations的定义:

       1: struct seq_operations {
       2:     void * (*start) (struct seq_file *m, loff_t *pos);
       3:     void (*stop) (struct seq_file *m, void *v);
       4:     void * (*next) (struct seq_file *m, void *v, loff_t *pos);
       5:     int (*show) (struct seq_file *m, void *v);
       6: };

    start函数

    用于指定seq_file文件的读开始位置,返回实际读开始位置,如果指定的位置超过文件末尾,应当返回NULL,start函数可以有一个特殊的返回SEQ_START_TOKEN,它用于让show函数输出文件头,但这只能在pos为0时使用;

    next函数

    用于把seq_file 文件的当前读位置移动到下一个读位置,返回实际的下一个读位置,如果已经到达文件末尾,返回NULL;

    stop函数

    用于在读完seq_file文件后调 用,它类似于文件操作close,用于做一些必要的清理,如释放内存等;

    show函数

    用于格式化输出,如果成功返回0,否则返回出错码。

    我们查看一下用来打印/proc/mounts信息对应的seq_file操作函数:

       1: const struct seq_operations mounts_op = {
       2:     .start    = m_start,
       3:     .next    = m_next,
       4:     .stop    = m_stop,
       5:     .show    = show_vfsmnt
       6: };

    依次来看各个函数的实现:

       1: static void *m_start(struct seq_file *m, loff_t *pos)
       2: {
       3:     struct proc_mounts *p = m->private;
       4:  
       5:     down_read(&namespace_sem);
       6:     return seq_list_start(&p->ns->list, *pos);
       7: }
       8:  

    down_read(&namespace_sem);

    用来将可用的信号量降低一个数值,表示占用一个信号量,用来读取namespace相关的信息。

       1: static struct list_head *mount_hashtable __read_mostly;
       2: static struct kmem_cache *mnt_cache __read_mostly;
       3: static struct rw_semaphore namespace_sem;

    namespace_sem用来保护对mount_hashtable的并发读写。

    struct proc_mounts *p = m->private;

    这里可以知道,给mountinfo使用的seq_file的成员private用来保存proc_mounts结构体指针。

       1: struct proc_mounts {
       2:     struct seq_file m; /* must be the first element */
       3:     struct mnt_namespace *ns;
       4:     struct path root;
       5:     int event;
       6: };

    顾名思义,proc_mounts保存的是我们想要的/proc/mounts信息的数据结构。

    那么,proc_mounts结构体中的数据是从哪里得到的呢

       1: static int mounts_open_common(struct inode *inode, struct file *file,
       2:                   const struct seq_operations *op)
       3: {
       4:     struct task_struct *task = get_proc_task(inode);
       5:     struct nsproxy *nsp;
       6:     struct mnt_namespace *ns = NULL;
       7:     struct path root;
       8:     struct proc_mounts *p;
       9:     int ret = -EINVAL;
      10:  
      11:     if (task) {
      12:         rcu_read_lock();
      13:         nsp = task_nsproxy(task);
      14:         if (nsp) {
      15:             ns = nsp->mnt_ns;
      16:             if (ns)
      17:                 get_mnt_ns(ns);
      18:         }
      19:         rcu_read_unlock();
      20:         if (ns && get_task_root(task, &root) == 0)
      21:             ret = 0;
      22:         put_task_struct(task);
      23:     }
      24:  
      25:     if (!ns)
      26:         goto err;
      27:     if (ret)
      28:         goto err_put_ns;
      29:  
      30:     ret = -ENOMEM;
      31:     p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
      32:     if (!p)
      33:         goto err_put_path;
      34:  
      35:     file->private_data = &p->m;
      36:     ret = seq_open(file, op);
      37:     if (ret)
      38:         goto err_free;
      39:  
      40:     p->m.private = p;
      41:     p->ns = ns;
      42:     p->root = root;
      43:     p->event = ns->event;
      44:  
      45:     return 0;
      46:  
      47:  err_free:
      48:     kfree(p);
      49:  err_put_path:
      50:     path_put(&root);
      51:  err_put_ns:
      52:     put_mnt_ns(ns);
      53:  err:
      54:     return ret;
      55: }

    首先看到这段代码

        p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
        if (!p)
            goto err_put_path;

        file->private_data = &p->m;
        ret = seq_open(file, op);
        if (ret)
            goto err_free;

        p->m.private = p;
        p->ns = ns;
        p->root = root;
        p->event = ns->event;

    可以确定,proc_mounts结构体是在这里创建并且初始化的。其中最重要的数据ns是怎么来的呢?

        if (task) {
            rcu_read_lock();
            nsp = task_nsproxy(task);
            if (nsp) {
                ns = nsp->mnt_ns;
                if (ns)
                    get_mnt_ns(ns);
            }
            rcu_read_unlock();
            if (ns && get_task_root(task, &root) == 0)
                ret = 0;
            put_task_struct(task);
        }

       1: /*
       2:  * A structure to contain pointers to all per-process
       3:  * namespaces - fs (mount), uts, network, sysvipc, etc.
       4:  *
       5:  * 'count' is the number of tasks holding a reference.
       6:  * The count for each namespace, then, will be the number
       7:  * of nsproxies pointing to it, not the number of tasks.
       8:  *
       9:  * The nsproxy is shared by tasks which share all namespaces.
      10:  * As soon as a single namespace is cloned or unshared, the
      11:  * nsproxy is copied.
      12:  */
      13: struct nsproxy {
      14:     atomic_t count;
      15:     struct uts_namespace *uts_ns;
      16:     struct ipc_namespace *ipc_ns;
      17:     struct mnt_namespace *mnt_ns;
      18:     struct pid_namespace *pid_ns;
      19:     struct net          *net_ns;
      20: };
    这段代码就是通过当前任务的nsproxy结构体得到mnt_ns数据。

    那么是哪里调到了mounts_open_common函数呢?

       1: static int mounts_open(struct inode *inode, struct file *file)
       2: {
       3:     return mounts_open_common(inode, file, &mounts_op);
       4: }
       5:  
       6: static const struct file_operations proc_mounts_operations = {
       7:     .open        = mounts_open,
       8:     .read        = seq_read,
       9:     .llseek        = seq_lseek,
      10:     .release    = mounts_release,
      11:     .poll        = mounts_poll,
      12: };

    我们又在fs/proc/base.c中有了大发现:

       1: static const struct pid_entry tgid_base_stuff[] = {
       2:     DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
       3:     DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
       4:     DIR("fdinfo",     S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
       5:     DIR("ns",      S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
       6: #ifdef CONFIG_NET
       7:     DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
       8: #endif
       9:     REG("environ",    S_IRUSR, proc_environ_operations),
      10:     INF("auxv",       S_IRUSR, proc_pid_auxv),
      11:     ONE("status",     S_IRUGO, proc_pid_status),
      12:     ONE("personality", S_IRUGO, proc_pid_personality),
      13:     INF("limits",      S_IRUGO, proc_pid_limits),
      14: #ifdef CONFIG_SCHED_DEBUG
      15:     REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
      16: #endif
      17: #ifdef CONFIG_SCHED_AUTOGROUP
      18:     REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
      19: #endif
      20:     REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
      21: #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
      22:     INF("syscall",    S_IRUGO, proc_pid_syscall),
      23: #endif
      24:     INF("cmdline",    S_IRUGO, proc_pid_cmdline),
      25:     ONE("stat",       S_IRUGO, proc_tgid_stat),
      26:     ONE("statm",      S_IRUGO, proc_pid_statm),
      27:     REG("maps",       S_IRUGO, proc_maps_operations),
      28: #ifdef CONFIG_NUMA
      29:     REG("numa_maps",  S_IRUGO, proc_numa_maps_operations),
      30: #endif
      31:     REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
      32:     LNK("cwd",        proc_cwd_link),
      33:     LNK("root",       proc_root_link),
      34:     LNK("exe",        proc_exe_link),
      35:     REG("mounts",     S_IRUGO, proc_mounts_operations),
      36:     REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
      37:     REG("mountstats", S_IRUSR, proc_mountstats_operations),
      38: #ifdef CONFIG_PROC_PAGE_MONITOR
      39:     REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
      40:     REG("smaps",      S_IRUGO, proc_smaps_operations),
      41:     REG("pagemap",    S_IRUGO, proc_pagemap_operations),
      42: #endif
      43: #ifdef CONFIG_SECURITY
      44:     DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
      45: #endif
      46: #ifdef CONFIG_KALLSYMS
      47:     INF("wchan",      S_IRUGO, proc_pid_wchan),
      48: #endif
      49: #ifdef CONFIG_STACKTRACE
      50:     ONE("stack",      S_IRUGO, proc_pid_stack),
      51: #endif
      52: #ifdef CONFIG_SCHEDSTATS
      53:     INF("schedstat",  S_IRUGO, proc_pid_schedstat),
      54: #endif
      55: #ifdef CONFIG_LATENCYTOP
      56:     REG("latency",  S_IRUGO, proc_lstats_operations),
      57: #endif
      58: #ifdef CONFIG_PROC_PID_CPUSET
      59:     REG("cpuset",     S_IRUGO, proc_cpuset_operations),
      60: #endif
      61: #ifdef CONFIG_CGROUPS
      62:     REG("cgroup",  S_IRUGO, proc_cgroup_operations),
      63: #endif
      64:     INF("oom_score",  S_IRUGO, proc_oom_score),
      65:     REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adjust_operations),
      66:     REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
      67: #ifdef CONFIG_AUDITSYSCALL
      68:     REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
      69:     REG("sessionid",  S_IRUGO, proc_sessionid_operations),
      70: #endif
      71: #ifdef CONFIG_FAULT_INJECTION
      72:     REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
      73: #endif
      74: #ifdef CONFIG_ELF_CORE
      75:     REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
      76: #endif
      77: #ifdef CONFIG_TASK_IO_ACCOUNTING
      78:     INF("io",    S_IRUSR, proc_tgid_io_accounting),
      79: #endif
      80: #ifdef CONFIG_HARDWALL
      81:     INF("hardwall",   S_IRUGO, proc_pid_hardwall),
      82: #endif
      83: };

    这里定义着在每个/proc/[pid]下面的所有目录项

    那么/proc/mounts呢,我们查看一下/proc/mounts的信息:

       1: #ls -l /proc
       2: ......
       3: lrwxrwxrwx  1 root       root               11 2014-01-26 22:11 mounts -> self/mounts
       4: ......
       5: lrwxrwxrwx  1 root       root               64 2014-01-23 01:22 self -> 10590
       6: ......

    因此,一切都明了了,/proc/mounts其实是到当前任务的/proc/self/mounts的软链接。

    proc_mounts的数据源头,以及生成数据的调用层次问题已经找到了答案,接下来再回过头来看看seq_file。

    return seq_list_start(&p->ns->list, *pos);

       1: struct list_head *seq_list_start(struct list_head *head, loff_t pos)
       2: {
       3:     struct list_head *lh;
       4:  
       5:     list_for_each(lh, head)
       6:         if (pos-- == 0)
       7:             return lh;
       8:  
       9:     return NULL;
      10: }
      11: EXPORT_SYMBOL(seq_list_start);

    其实很简单,就是返回到双链表head的第pos项的位置指针。如果pos超出了head双链表中的项目数目,就返回NULL。

    可见,这是为了读取seq_file中的内容做准备。

    对于m_next和m_stop的逻辑也很简单,不再详述。

       1: static void *m_next(struct seq_file *m, void *v, loff_t *pos)
       2: {
       3:     struct proc_mounts *p = m->private;
       4:  
       5:     return seq_list_next(v, &p->ns->list, pos);
       6: }
       7:  
       8: static void m_stop(struct seq_file *m, void *v)
       9: {
      10:     up_read(&namespace_sem);
      11: }
       1: struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
       2: {
       3:     struct list_head *lh;
       4:  
       5:     lh = ((struct list_head *)v)->next;
       6:     ++*ppos;
       7:     return lh == head ? NULL : lh;
       8: }
       9: EXPORT_SYMBOL(seq_list_next);

    总结一下,就是m_start/m_next向外界暴露proc_mounts->ns->list的位置指针,允许外界对其内容进行读取。

    m_stop用来当读取结束后做清理工作,这里是恢复namespace_sem信号量。

    显示函数

       1: static int show_vfsmnt(struct seq_file *m, void *v)
       2: {
       3:     struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
       4:     int err = 0;
       5:     struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
       6:  
       7:     if (mnt->mnt_sb->s_op->show_devname) {
       8:         err = mnt->mnt_sb->s_op->show_devname(m, mnt);
       9:         if (err)
      10:             goto out;
      11:     } else {
      12:         mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
      13:     }
      14:     seq_putc(m, ' ');
      15:     seq_path(m, &mnt_path, " 	
    \");
      16:     seq_putc(m, ' ');
      17:     show_type(m, mnt->mnt_sb);
      18:     seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
      19:     err = show_sb_opts(m, mnt->mnt_sb);
      20:     if (err)
      21:         goto out;
      22:     show_mnt_opts(m, mnt);
      23:     if (mnt->mnt_sb->s_op->show_options)
      24:         err = mnt->mnt_sb->s_op->show_options(m, mnt);
      25:     seq_puts(m, " 0 0
    ");
      26: out:
      27:     return err;
      28: }

    从show函数来看,是将v指针指向的vfsmount结构体的mnt_list内容以一定的格式写到seq_file的buffer里面去。

    这里有理由猜想v实际上保存的是

    m->p->ns->root

    接下来我们看一下,这些简单的功能(m_start/m_next/m_stop/show_vfsmnt)是怎样发挥作用的:

       1: ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
       2: {
       3:     struct seq_file *m = file->private_data;
       4:     size_t copied = 0;
       5:     loff_t pos;
       6:     size_t n;
       7:     void *p;
       8:     int err = 0;
       9:  
      10:     mutex_lock(&m->lock);
      11:  
      12:     /* Don't assume *ppos is where we left it */
      13:     if (unlikely(*ppos != m->read_pos)) {
      14:         m->read_pos = *ppos;
      15:         while ((err = traverse(m, *ppos)) == -EAGAIN)
      16:             ;
      17:         if (err) {
      18:             /* With prejudice... */
      19:             m->read_pos = 0;
      20:             m->version = 0;
      21:             m->index = 0;
      22:             m->count = 0;
      23:             goto Done;
      24:         }
      25:     }
      26:  
      27:     /*
      28:      * seq_file->op->..m_start/m_stop/m_next may do special actions
      29:      * or optimisations based on the file->f_version, so we want to
      30:      * pass the file->f_version to those methods.
      31:      *
      32:      * seq_file->version is just copy of f_version, and seq_file
      33:      * methods can treat it simply as file version.
      34:      * It is copied in first and copied out after all operations.
      35:      * It is convenient to have it as  part of structure to avoid the
      36:      * need of passing another argument to all the seq_file methods.
      37:      */
      38:     m->version = file->f_version;
      39:     /* grab buffer if we didn't have one */
      40:     if (!m->buf) {
      41:         m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
      42:         if (!m->buf)
      43:             goto Enomem;
      44:     }
      45:     /* if not empty - flush it first */
      46:     if (m->count) {
      47:         n = min(m->count, size);
      48:         err = copy_to_user(buf, m->buf + m->from, n);
      49:         if (err)
      50:             goto Efault;
      51:         m->count -= n;
      52:         m->from += n;
      53:         size -= n;
      54:         buf += n;
      55:         copied += n;
      56:         if (!m->count)
      57:             m->index++;
      58:         if (!size)
      59:             goto Done;
      60:     }
      61:     /* we need at least one record in buffer */
      62:     pos = m->index;
      63:     p = m->op->start(m, &pos);
      64:     while (1) {
      65:         err = PTR_ERR(p);
      66:         if (!p || IS_ERR(p))
      67:             break;
      68:         err = m->op->show(m, p);
      69:         if (err < 0)
      70:             break;
      71:         if (unlikely(err))
      72:             m->count = 0;
      73:         if (unlikely(!m->count)) {
      74:             p = m->op->next(m, p, &pos);
      75:             m->index = pos;
      76:             continue;
      77:         }
      78:         if (m->count < m->size)
      79:             goto Fill;
      80:         m->op->stop(m, p);
      81:         kfree(m->buf);
      82:         m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
      83:         if (!m->buf)
      84:             goto Enomem;
      85:         m->count = 0;
      86:         m->version = 0;
      87:         pos = m->index;
      88:         p = m->op->start(m, &pos);
      89:     }
      90:     m->op->stop(m, p);
      91:     m->count = 0;
      92:     goto Done;
      93: Fill:
      94:     /* they want more? let's try to get some more */
      95:     while (m->count < size) {
      96:         size_t offs = m->count;
      97:         loff_t next = pos;
      98:         p = m->op->next(m, p, &next);
      99:         if (!p || IS_ERR(p)) {
     100:             err = PTR_ERR(p);
     101:             break;
     102:         }
     103:         err = m->op->show(m, p);
     104:         if (m->count == m->size || err) {
     105:             m->count = offs;
     106:             if (likely(err <= 0))
     107:                 break;
     108:         }
     109:         pos = next;
     110:     }
     111:     m->op->stop(m, p);
     112:     n = min(m->count, size);
     113:     err = copy_to_user(buf, m->buf, n);
     114:     if (err)
     115:         goto Efault;
     116:     copied += n;
     117:     m->count -= n;
     118:     if (m->count)
     119:         m->from = n;
     120:     else
     121:         pos++;
     122:     m->index = pos;
     123: Done:
     124:     if (!copied)
     125:         copied = err;
     126:     else {
     127:         *ppos += copied;
     128:         m->read_pos += copied;
     129:     }
     130:     file->f_version = m->version;
     131:     mutex_unlock(&m->lock);
     132:     return copied;
     133: Enomem:
     134:     err = -ENOMEM;
     135:     goto Done;
     136: Efault:
     137:     err = -EFAULT;
     138:     goto Done;
     139: }

    seq_read,显然是用来读取文件内容的,但是其接口并不是seq_file,而是file,这就表明这个接口是把seq_file的实现细节隐藏在了该函数的内容,而对于外面来说,可以通过常用的struct file接口来调用该函数。

    因此该函数起到了Adapter的作用。

    下面这段是核心代码

        pos = m->index;
        p = m->op->start(m, &pos);
        while (1) {
            err = PTR_ERR(p);
            if (!p || IS_ERR(p))
                break;
            err = m->op->show(m, p);
            if (err < 0)
                break;
            if (unlikely(err))
                m->count = 0;
            if (unlikely(!m->count)) {
                p = m->op->next(m, p, &pos);
                m->index = pos;
                continue;
            }
            if (m->count < m->size)
                goto Fill;
            m->op->stop(m, p);
            kfree(m->buf);
            m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
            if (!m->buf)
                goto Enomem;
            m->count = 0;
            m->version = 0;
            pos = m->index;
            p = m->op->start(m, &pos);
        }
        m->op->stop(m, p);
        m->count = 0;
        goto Done;

    如果err代表出错,则使用m_next读取下一条,因此控制逻辑在show中,如果没有读完,就返回出错的信息。

    err < 0, 代表成功,跳出循环;

    err > 0, 代表没有读完全,即调用m_next来读下一条;

    err = 0, 代表失败,将buffer大小调整为原平的2倍,再尝试重新读。

    其中,m->count代表已经读到m->buffer中的字节数目,m->size代表一共需要读取多少字节。

  • 相关阅读:
    Enumerable.Distinct方法去重
    datatable使用lambda
    jQuery获取单选框(复选框)选中的状态
    从 datetime2 数据类型到 datetime 数据类型的转换产生一个超出范围的值
    SQL Server 中WITH (NOLOCK)浅析
    C#将int型数字转换成3位字符串,不足的时候,前面补0
    window.locatin.href 中特殊字符串问题
    windows下git bash不显示中文问题解决
    python获取文件行数
    自定义异常以及触发异常
  • 原文地址:https://www.cnblogs.com/long123king/p/3534989.html
Copyright © 2020-2023  润新知