• inode缓存与dentry缓存


    1. inode缓存

       1: struct inode {
       2:     /* RCU path lookup touches following: */
       3:     umode_t            i_mode;
       4:     uid_t            i_uid;
       5:     gid_t            i_gid;
       6:     const struct inode_operations    *i_op;
       7:     struct super_block    *i_sb;
       8:  
       9:     spinlock_t        i_lock;    /* i_blocks, i_bytes, maybe i_size */
      10:     unsigned int        i_flags;
      11:     unsigned long        i_state;
      12: #ifdef CONFIG_SECURITY
      13:     void            *i_security;
      14: #endif
      15:     struct mutex        i_mutex;
      16:  
      17:  
      18:     unsigned long        dirtied_when;    /* jiffies of first dirtying */
      19:  
      20:     struct hlist_node    i_hash;
      21:     struct list_head    i_wb_list;    /* backing dev IO list */
      22:     struct list_head    i_lru;        /* inode LRU list */
      23:     struct list_head    i_sb_list;
      24:     union {
      25:         struct list_head    i_dentry;
      26:         struct rcu_head        i_rcu;
      27:     };
      28:     unsigned long        i_ino;
      29:     atomic_t        i_count;
      30:     unsigned int        i_nlink;
      31:     dev_t            i_rdev;
      32:     unsigned int        i_blkbits;
      33:     u64            i_version;
      34:     loff_t            i_size;
      35: #ifdef __NEED_I_SIZE_ORDERED
      36:     seqcount_t        i_size_seqcount;
      37: #endif
      38:     struct timespec        i_atime;
      39:     struct timespec        i_mtime;
      40:     struct timespec        i_ctime;
      41:     blkcnt_t        i_blocks;
      42:     unsigned short          i_bytes;
      43:     struct rw_semaphore    i_alloc_sem;
      44:     const struct file_operations    *i_fop;    /* former ->i_op->default_file_ops */
      45:     struct file_lock    *i_flock;
      46:     struct address_space    *i_mapping;
      47:     struct address_space    i_data;
      48: #ifdef CONFIG_QUOTA
      49:     struct dquot        *i_dquot[MAXQUOTAS];
      50: #endif
      51:     struct list_head    i_devices;
      52:     union {
      53:         struct pipe_inode_info    *i_pipe;
      54:         struct block_device    *i_bdev;
      55:         struct cdev        *i_cdev;
      56:     };
      57:  
      58:     __u32            i_generation;
      59:  
      60: #ifdef CONFIG_FSNOTIFY
      61:     __u32            i_fsnotify_mask; /* all events this inode cares about */
      62:     struct hlist_head    i_fsnotify_marks;
      63: #endif
      64:  
      65: #ifdef CONFIG_IMA
      66:     atomic_t        i_readcount; /* struct files open RO */
      67: #endif
      68:     atomic_t        i_writecount;
      69: #ifdef CONFIG_FS_POSIX_ACL
      70:     struct posix_acl    *i_acl;
      71:     struct posix_acl    *i_default_acl;
      72: #endif
      73:     void            *i_private; /* fs or device private pointer */
      74: };

    inode可能处于三种状态:

    1)unused,里面没有保存有效的内容,可以被复用为新的用途;

    2)in use,正在被使用,其成员i_count以及i_nlink一定大于0,此时inode与文件系统或者说设备上的文件相关联,但是自从上次与设备同步后,内容没有发生改变,即不是dirty的;

    3)dirty,inode里面的内容已经与文件系统中的文件内容不一致了,即脏了,需要进行文件同步操作。

    前两种状态的inode都各自位于一个全局的链表中,而第三种的inode位于super_block结构体中的一个链表中。

    先看inode结构体中的一个成员:

    struct list_head    i_lru;        /* inode LRU list */

    对应着一个全局的链表:

    static LIST_HEAD(inode_lru);
    static DEFINE_SPINLOCK(inode_lru_lock);

       1: /*
       2:  * Called when we're dropping the last reference
       3:  * to an inode.
       4:  *
       5:  * Call the FS "drop_inode()" function, defaulting to
       6:  * the legacy UNIX filesystem behaviour.  If it tells
       7:  * us to evict inode, do so.  Otherwise, retain inode
       8:  * in cache if fs is alive, sync and evict if fs is
       9:  * shutting down.
      10:  */
      11: static void iput_final(struct inode *inode)
      12: {
      13:     struct super_block *sb = inode->i_sb;
      14:     const struct super_operations *op = inode->i_sb->s_op;
      15:     int drop;
      16:  
      17:     WARN_ON(inode->i_state & I_NEW);
      18:  
      19:     if (op && op->drop_inode)
      20:         drop = op->drop_inode(inode);
      21:     else
      22:         drop = generic_drop_inode(inode);
      23:  
      24:     if (!drop && (sb->s_flags & MS_ACTIVE)) {
      25:         inode->i_state |= I_REFERENCED;
      26:         if (!(inode->i_state & (I_DIRTY|I_SYNC)))
      27:             inode_lru_list_add(inode);
      28:         spin_unlock(&inode->i_lock);
      29:         return;
      30:     }
      31:  
      32:     if (!drop) {
      33:         inode->i_state |= I_WILL_FREE;
      34:         spin_unlock(&inode->i_lock);
      35:         write_inode_now(inode, 1);
      36:         spin_lock(&inode->i_lock);
      37:         WARN_ON(inode->i_state & I_NEW);
      38:         inode->i_state &= ~I_WILL_FREE;
      39:     }
      40:  
      41:     inode->i_state |= I_FREEING;
      42:     inode_lru_list_del(inode);
      43:     spin_unlock(&inode->i_lock);
      44:  
      45:     evict(inode);
      46: }

    函数iput_final是在当inode没有被任何地方引用后,即变成了unused状态后,回收inode的机制。

    if (op && op->drop_inode)
            drop = op->drop_inode(inode);
        else
            drop = generic_drop_inode(inode);

    drop为0时,表示i_nlink为0,并且inode没有保存着inode_hashtable中的拉链表,即这个inode可以被释放掉。

       1: /*
       2:  * Normal UNIX filesystem behaviour: delete the
       3:  * inode when the usage count drops to zero, and
       4:  * i_nlink is zero.
       5:  */
       6: int generic_drop_inode(struct inode *inode)
       7: {
       8:     return !inode->i_nlink || inode_unhashed(inode);
       9: }
      10: EXPORT_SYMBOL_GPL(generic_drop_inode);

    if (!drop && (sb->s_flags & MS_ACTIVE)) {
            inode->i_state |= I_REFERENCED;
            if (!(inode->i_state & (I_DIRTY|I_SYNC)))
               inode_lru_list_add(inode);
            spin_unlock(&inode->i_lock);
            return;
        }

    如果superblock还存在在系统中,就调用inode_lru_list_add将inode添加到unused列表中,即将inode缓存起来。

    否则,就先调用write_inode_now写回到磁盘上,再调用inode_lru_list_del将已经缓存下来的inode删除掉,最后调用evict函数将inode彻底删除。

       1: static void inode_lru_list_add(struct inode *inode)
       2: {
       3:     spin_lock(&inode_lru_lock);
       4:     if (list_empty(&inode->i_lru)) {
       5:         list_add(&inode->i_lru, &inode_lru);
       6:         inodes_stat.nr_unused++;
       7:     }
       8:     spin_unlock(&inode_lru_lock);
       9: }

    因此inode_lru就是全局的unused inode列表,通过“Least Recently Used”的顺序保存。

    此外,操作inode_lru的函数还有prune_icache

       1: /*
       2:  * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
       3:  * temporary list and then are freed outside inode_lru_lock by dispose_list().
       4:  *
       5:  * Any inodes which are pinned purely because of attached pagecache have their
       6:  * pagecache removed.  If the inode has metadata buffers attached to
       7:  * mapping->private_list then try to remove them.
       8:  *
       9:  * If the inode has the I_REFERENCED flag set, then it means that it has been
      10:  * used recently - the flag is set in iput_final(). When we encounter such an
      11:  * inode, clear the flag and move it to the back of the LRU so it gets another
      12:  * pass through the LRU before it gets reclaimed. This is necessary because of
      13:  * the fact we are doing lazy LRU updates to minimise lock contention so the
      14:  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
      15:  * with this flag set because they are the inodes that are out of order.
      16:  */
      17: static void prune_icache(int nr_to_scan)
      18: {
      19:     LIST_HEAD(freeable);
      20:     int nr_scanned;
      21:     unsigned long reap = 0;
      22:  
      23:     down_read(&iprune_sem);
      24:     spin_lock(&inode_lru_lock);
      25:     for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
      26:         struct inode *inode;
      27:  
      28:         if (list_empty(&inode_lru))
      29:             break;
      30:  
      31:         inode = list_entry(inode_lru.prev, struct inode, i_lru);
      32:  
      33:         /*
      34:          * we are inverting the inode_lru_lock/inode->i_lock here,
      35:          * so use a trylock. If we fail to get the lock, just move the
      36:          * inode to the back of the list so we don't spin on it.
      37:          */
      38:         if (!spin_trylock(&inode->i_lock)) {
      39:             list_move(&inode->i_lru, &inode_lru);
      40:             continue;
      41:         }
      42:  
      43:         /*
      44:          * Referenced or dirty inodes are still in use. Give them
      45:          * another pass through the LRU as we canot reclaim them now.
      46:          */
      47:         if (atomic_read(&inode->i_count) ||
      48:             (inode->i_state & ~I_REFERENCED)) {
      49:             list_del_init(&inode->i_lru);
      50:             spin_unlock(&inode->i_lock);
      51:             inodes_stat.nr_unused--;
      52:             continue;
      53:         }
      54:  
      55:         /* recently referenced inodes get one more pass */
      56:         if (inode->i_state & I_REFERENCED) {
      57:             inode->i_state &= ~I_REFERENCED;
      58:             list_move(&inode->i_lru, &inode_lru);
      59:             spin_unlock(&inode->i_lock);
      60:             continue;
      61:         }
      62:         if (inode_has_buffers(inode) || inode->i_data.nrpages) {
      63:             __iget(inode);
      64:             spin_unlock(&inode->i_lock);
      65:             spin_unlock(&inode_lru_lock);
      66:             if (remove_inode_buffers(inode))
      67:                 reap += invalidate_mapping_pages(&inode->i_data,
      68:                                 0, -1);
      69:             iput(inode);
      70:             spin_lock(&inode_lru_lock);
      71:  
      72:             if (inode != list_entry(inode_lru.next,
      73:                         struct inode, i_lru))
      74:                 continue;    /* wrong inode or list_empty */
      75:             /* avoid lock inversions with trylock */
      76:             if (!spin_trylock(&inode->i_lock))
      77:                 continue;
      78:             if (!can_unuse(inode)) {
      79:                 spin_unlock(&inode->i_lock);
      80:                 continue;
      81:             }
      82:         }
      83:         WARN_ON(inode->i_state & I_NEW);
      84:         inode->i_state |= I_FREEING;
      85:         spin_unlock(&inode->i_lock);
      86:  
      87:         list_move(&inode->i_lru, &freeable);
      88:         inodes_stat.nr_unused--;
      89:     }
      90:     if (current_is_kswapd())
      91:         __count_vm_events(KSWAPD_INODESTEAL, reap);
      92:     else
      93:         __count_vm_events(PGINODESTEAL, reap);
      94:     spin_unlock(&inode_lru_lock);
      95:  
      96:     dispose_list(&freeable);
      97:     up_read(&iprune_sem);
      98: }

    该函数的作用是在内存压力较大时,通过缩减缓存的inode列表inode_lru以释放出更多的内存。

    该函数就是从inode_lru中从头开始取inode出来,做一些简单检查,如果inode还有一些原因需要继续存在在缓存中,就将该inode移到链表的尾部,然后检查下一个inode。

    使得inode继续保留的原因包括:无法获取到操作inode中数据的锁i_lock;inode中的数据是脏的;inode的使用计数非0;inode刚刚被引用过等等。

    还有一个比较实用的问题,我们看到在调用iput_final时,检查如果i_nlink为0,并且没有被用作拉链表的话,就将其放到缓存inode_lru中,但是在prune_icache时,会检查i_count引用计数是否为0。

    这也就是说,如果一个inode对应的磁盘文件已经被删除了,但是还有进程对其进行操作的话,那么它不会被直接删除,而是会保存在缓存中,也就是说对其操作的进程仍然可以对已经缓存下来的数据页面page进行操作。

    直到没有进程再对其进行操作了,才有可能被清除出缓存。

    inode中有两个链表头元素,分别是i_sb_list和i_wb_list,其中i_sb_list是super_block->s_inodes列表的元素,而i_wb_list是用于维护设备的后备inode列表。

    2. dentry缓存

    dentry缓存的目的,为了减少对慢速磁盘的访问,每当VFS文件系统对底层的数据进行访问时,都会将访问的结果缓存下来,保存成一个dentry对象。

    而且dentry对象的组织与管理,是和inode缓存极其相似的,也有一个hash表,和一个lru队列。

    而且当内存压力较大时,也会调用prune_dcache来企图释放lru中优先级较低的dentry项目。

    区别在于,inode是不需要维护目录的关系的,但是dentry需要,因此dentry的组织比inode要复杂。

       1: static struct hlist_bl_head *dentry_hashtable __read_mostly;
       2:  

    在super_block中

       1: /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
       2:     struct list_head    s_dentry_lru;    /* unused dentry lru */
       3:  

    因此,保存dentry全局hash表的数据结构是全局的,而保存dentry缓存的数据结构是存在于super_block数据结构中。

       1: /*
       2:  * dentry_lru_(add|del|move_tail) must be called with d_lock held.
       3:  */
       4: static void dentry_lru_add(struct dentry *dentry)
       5: {
       6:     if (list_empty(&dentry->d_lru)) {
       7:         spin_lock(&dcache_lru_lock);
       8:         list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
       9:         dentry->d_sb->s_nr_dentry_unused++;
      10:         dentry_stat.nr_unused++;
      11:         spin_unlock(&dcache_lru_lock);
      12:     }
      13: }

    dentry_lur_add函数用于向dentry缓存中添加一个释放的dentry,它被函数dput调用。

       1: /* 
       2:  * This is dput
       3:  *
       4:  * This is complicated by the fact that we do not want to put
       5:  * dentries that are no longer on any hash chain on the unused
       6:  * list: we'd much rather just get rid of them immediately.
       7:  *
       8:  * However, that implies that we have to traverse the dentry
       9:  * tree upwards to the parents which might _also_ now be
      10:  * scheduled for deletion (it may have been only waiting for
      11:  * its last child to go away).
      12:  *
      13:  * This tail recursion is done by hand as we don't want to depend
      14:  * on the compiler to always get this right (gcc generally doesn't).
      15:  * Real recursion would eat up our stack space.
      16:  */
      17:  
      18: /*
      19:  * dput - release a dentry
      20:  * @dentry: dentry to release 
      21:  *
      22:  * Release a dentry. This will drop the usage count and if appropriate
      23:  * call the dentry unlink method as well as removing it from the queues and
      24:  * releasing its resources. If the parent dentries were scheduled for release
      25:  * they too may now get deleted.
      26:  */
      27: void dput(struct dentry *dentry)
      28: {
      29:     if (!dentry)
      30:         return;
      31:  
      32: repeat:
      33:     if (dentry->d_count == 1)
      34:         might_sleep();
      35:     spin_lock(&dentry->d_lock);
      36:     BUG_ON(!dentry->d_count);
      37:     if (dentry->d_count > 1) {
      38:         dentry->d_count--;
      39:         spin_unlock(&dentry->d_lock);
      40:         return;
      41:     }
      42:  
      43:     if (dentry->d_flags & DCACHE_OP_DELETE) {
      44:         if (dentry->d_op->d_delete(dentry))
      45:             goto kill_it;
      46:     }
      47:  
      48:     /* Unreachable? Get rid of it */
      49:      if (d_unhashed(dentry))
      50:         goto kill_it;
      51:  
      52:     /* Otherwise leave it cached and ensure it's on the LRU */
      53:     dentry->d_flags |= DCACHE_REFERENCED;
      54:     dentry_lru_add(dentry);
      55:  
      56:     dentry->d_count--;
      57:     spin_unlock(&dentry->d_lock);
      58:     return;
      59:  
      60: kill_it:
      61:     dentry = dentry_kill(dentry, 1);
      62:     if (dentry)
      63:         goto repeat;
      64: }
      65: EXPORT_SYMBOL(dput);

    所有的dentry实例会形成一个网络,用于反映文件系统的结构。

    d_subdirs成员,里面保存着所有的子目录以及该目录下的文件组成的列表。

    d_child成员,是该dentry链接到其父目录的dentry节点的锚点。

    这两个成员,是构成文件系统的层次结构的基本设施。

    if (dentry->d_count == 1)
            might_sleep();

    参考:http://yuxu9710108.blog.163.com/blog/static/23751534201011715413404/

    用于调试时,提示atomic context的可能睡眠情况。

    分析dput函数的逻辑:

    如果dentry的引用计数大于1,那么代表还有其他的地方在使用这个dentry,因此只减少引用计数,直接返回;

    如果dentry->d_flags里面设置了delete标志,那么直接调用d_op->d_delete函数指针删除该dentry,再调用dentry_kill来处理;

    【d_op->d_delete与dentry_kill在功能上有什么不同?】

    如果在全局的hash表中也已经找不该dentry了,那么直接调用dentry_kill来处理;

    如果dentry的引用计数为1,而且也不属于上面二种需要调用dentry_kill的情况,那么就将其缓存在super_block的LRU队列中。

    我们看一种可能的d_delete的实现

       1: /*
       2:  * This is called from dput() when d_count is going to 0.
       3:  */
       4: static int nfs_dentry_delete(const struct dentry *dentry)
       5: {
       6:     dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)
    ",
       7:         dentry->d_parent->d_name.name, dentry->d_name.name,
       8:         dentry->d_flags);
       9:  
      10:     /* Unhash any dentry with a stale inode */
      11:     if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
      12:         return 1;
      13:  
      14:     if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
      15:         /* Unhash it, so that ->d_iput() would be called */
      16:         return 1;
      17:     }
      18:     if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
      19:         /* Unhash it, so that ancestors of killed async unlink
      20:          * files will be cleaned up during umount */
      21:         return 1;
      22:     }
      23:     return 0;
      24:  
      25: }

    可见,该函数是进行一些内部的判断,决定是否需要将该dentry从全局的hash表中删除掉。

    if (dentry->d_flags & DCACHE_OP_DELETE) {
            if (dentry->d_op->d_delete(dentry))
                goto kill_it;
        }

       1: /*
       2:  * Finish off a dentry we've decided to kill.
       3:  * dentry->d_lock must be held, returns with it unlocked.
       4:  * If ref is non-zero, then decrement the refcount too.
       5:  * Returns dentry requiring refcount drop, or NULL if we're done.
       6:  */
       7: static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
       8:     __releases(dentry->d_lock)
       9: {
      10:     struct inode *inode;
      11:     struct dentry *parent;
      12:  
      13:     inode = dentry->d_inode;
      14:     if (inode && !spin_trylock(&inode->i_lock)) {
      15: relock:
      16:         spin_unlock(&dentry->d_lock);
      17:         cpu_relax();
      18:         return dentry; /* try again with same dentry */
      19:     }
      20:     if (IS_ROOT(dentry))
      21:         parent = NULL;
      22:     else
      23:         parent = dentry->d_parent;
      24:     if (parent && !spin_trylock(&parent->d_lock)) {
      25:         if (inode)
      26:             spin_unlock(&inode->i_lock);
      27:         goto relock;
      28:     }
      29:  
      30:     if (ref)
      31:         dentry->d_count--;
      32:     /* if dentry was on the d_lru list delete it from there */
      33:     dentry_lru_del(dentry);
      34:     /* if it was on the hash then remove it */
      35:     __d_drop(dentry);
      36:     return d_kill(dentry, parent);
      37: }
  • 相关阅读:
    C++中的类访问控制
    Docker应用程序容器技术_转
    16C554(8250)驱动分析
    动态调频DVFS_转
    ubifs概述
    ubifs物理存储
    fmt-重新格式化段落
    uniq-删除重复
    sort
    join用法
  • 原文地址:https://www.cnblogs.com/long123king/p/3536486.html
Copyright © 2020-2023  润新知