• open系统调用flow之do_last()


    open系统调用flow之do_last()

    本文章来分析下open file系统调用最后一个主要的函数do_last()

    open系统调用来到do_last()时,主要看其nd参数,这个结构体里的path、last成员,此时的path表示open file完整路径最低一级路径,last表示open file的file name,比如open file /data/test/test.txt,此时path结构体表示/data/test的路径;而last qstr struct表示test.txt

    有了这个前提之后再来分析下do_last()

    和之前link_path_walk()里lookup dentry类似,do_last同样会去查找last file对应的dentry。先调用lookup_fast()看看last所表示的file是否有对应的dentry,如果有,则会直接跳到finish_lookup,不会再走lookup_open() flow;如果没有则会走lookup_open() flow。

    不管dcache里是否有对应的dentry,都会call到step_into(),这个函数根据path结构体更新nd.path,即nd.path表示last文件。

    如果是走的lookup_open() flow,则接下来会调用vfs_open(),至于如果在dcache里找到了对应的dentry是否还会call vfs_open()没有确认过。

    /*
     * Handle the last step of open()
     */
    static int do_last(struct nameidata *nd,
               struct file *file, const struct open_flags *op)
    {
        struct dentry *dir = nd->path.dentry;
        kuid_t dir_uid = nd->inode->i_uid;
        umode_t dir_mode = nd->inode->i_mode;
        int open_flag = op->open_flag;
        bool will_truncate = (open_flag & O_TRUNC) != 0;
        bool got_write = false;
        int acc_mode = op->acc_mode;
        unsigned seq;
        struct inode *inode;
        struct path path;
        int error;
    
        nd->flags &= ~LOOKUP_PARENT;
        nd->flags |= op->intent;
    
        if (nd->last_type != LAST_NORM) {
            error = handle_dots(nd, nd->last_type);
            if (unlikely(error))
                return error;
            goto finish_open;
        }
    
        if (!(open_flag & O_CREAT)) { //因为是是分析的file已经存在的case,所以是没有O_CREAT flag的
            if (nd->last.name[nd->last.len])
                nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
            /* we _can_ be in RCU mode here */
            error = lookup_fast(nd, &path, &inode, &seq);
            if (likely(error > 0))
                goto finish_lookup;
    
            if (error < 0)
                return error;
    
            BUG_ON(nd->inode != dir->d_inode);
            BUG_ON(nd->flags & LOOKUP_RCU);
        } else {
            /* create side of things */
            /*
             * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
             * has been cleared when we got to the last component we are
             * about to look up
             */
            error = complete_walk(nd);
            if (error)
                return error;
    
            audit_inode(nd->name, dir, LOOKUP_PARENT);
            /* trailing slashes? */
            if (unlikely(nd->last.name[nd->last.len]))
                return -EISDIR;
        }
    
        if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
            error = mnt_want_write(nd->path.mnt);
            if (!error)
                got_write = true;
            /*
             * do _not_ fail yet - we might not need that or fail with
             * a different error; let lookup_open() decide; we'll be
             * dropping this one anyway.
             */
        }
        if (open_flag & O_CREAT)
            inode_lock(dir->d_inode);
        else
            inode_lock_shared(dir->d_inode);
        error = lookup_open(nd, &path, file, op, got_write);
        if (open_flag & O_CREAT)
            inode_unlock(dir->d_inode);
        else
            inode_unlock_shared(dir->d_inode);
    
        if (error)
            goto out;
    
        if (file->f_mode & FMODE_OPENED) {
            if ((file->f_mode & FMODE_CREATED) ||
                !S_ISREG(file_inode(file)->i_mode))
                will_truncate = false;
    
            audit_inode(nd->name, file->f_path.dentry, 0);
            goto opened;
        }
    
        if (file->f_mode & FMODE_CREATED) {
            /* Don't check for write permission, don't truncate */
            open_flag &= ~O_TRUNC;
            will_truncate = false;
            acc_mode = 0;
            path_to_nameidata(&path, nd);
            goto finish_open_created;
        }
    
        /*
         * If atomic_open() acquired write access it is dropped now due to
         * possible mount and symlink following (this might be optimized away if
         * necessary...)
         */
        if (got_write) {
            mnt_drop_write(nd->path.mnt);
            got_write = false;
        }
    
        error = follow_managed(&path, nd);
        if (unlikely(error < 0))
            return error;
    
        if (unlikely(d_is_negative(path.dentry))) {
            path_to_nameidata(&path, nd);
            return -ENOENT;
        }
    
        /*
         * create/update audit record if it already exists.
         */
        audit_inode(nd->name, path.dentry, 0);
    
        if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
            path_to_nameidata(&path, nd);
            return -EEXIST;
        }
    
        seq = 0;    /* out of RCU mode, so the value doesn't matter */
        inode = d_backing_inode(path.dentry);
    finish_lookup:
        error = step_into(nd, &path, 0, inode, seq);
        if (unlikely(error))
            return error;
    finish_open:
        /* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
        error = complete_walk(nd);
        if (error)
            return error;
        audit_inode(nd->name, nd->path.dentry, 0);
        if (open_flag & O_CREAT) {
            error = -EISDIR;
            if (d_is_dir(nd->path.dentry))
                goto out;
            error = may_create_in_sticky(dir_mode, dir_uid,
                             d_backing_inode(nd->path.dentry));
            if (unlikely(error))
                goto out;
        }
        error = -ENOTDIR;
        if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
            goto out;
        if (!d_is_reg(nd->path.dentry))
            will_truncate = false;
    
        if (will_truncate) {
            error = mnt_want_write(nd->path.mnt);
            if (error)
                goto out;
            got_write = true;
        }
    finish_open_created:
        error = may_open(&nd->path, acc_mode, open_flag);
        if (error)
            goto out;
        BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
        error = vfs_open(&nd->path, file);
        if (error)
            goto out;
    opened:
        error = ima_file_check(file, op->acc_mode);
        if (!error && will_truncate)
            error = handle_truncate(file);
    out:
        if (unlikely(error > 0)) {
            WARN_ON(1);
            error = -EINVAL;
        }
        if (got_write)
            mnt_drop_write(nd->path.mnt);
        return error;
    }

    lookup_open()首先调用了d_lookup(),这个lookup同样是在dcache里查找last所表示的文件是否有对应的dentry,这个lookup是需要操作rcu锁以及dentry的d_lock自旋锁的。如果没有找到,说明这个文件之前没有被open过,还没有建立对应的dentry,则调用d_alloc_parallel给last分配一个dentry。

    然后会走到no_open label执行dir_inode->i_op->lookup,这个lookup是具体文件系统的lookup函数。这部分alloc dentry和lookup和之前在分析link_path_walk flow时分析的lookup_slow() flow一样,这里不再赘述。

    最后设置下path结构体,将path结构体里的dentry、mnt成员更新为表示当前文件:

    static int lookup_open(struct nameidata *nd, struct path *path,
                struct file *file,
                const struct open_flags *op,
                bool got_write)
    {
        struct dentry *dir = nd->path.dentry;
        struct inode *dir_inode = dir->d_inode;
        int open_flag = op->open_flag;
        struct dentry *dentry;
        int error, create_error = 0;
        umode_t mode = op->mode;
        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    
        if (unlikely(IS_DEADDIR(dir_inode)))
            return -ENOENT;
    
        file->f_mode &= ~FMODE_CREATED;
        dentry = d_lookup(dir, &nd->last);
        for (;;) {
            if (!dentry) {
                dentry = d_alloc_parallel(dir, &nd->last, &wq);
                if (IS_ERR(dentry))
                    return PTR_ERR(dentry);
            }
            if (d_in_lookup(dentry))
                break;
    
            error = d_revalidate(dentry, nd->flags);
            if (likely(error > 0))
                break;
            if (error)
                goto out_dput;
            d_invalidate(dentry);
            dput(dentry);
            dentry = NULL;
        }
        if (dentry->d_inode) {
            /* Cached positive dentry: will open in f_op->open */
            goto out_no_open;
        }
    
        /*
         * Checking write permission is tricky, bacuse we don't know if we are
         * going to actually need it: O_CREAT opens should work as long as the
         * file exists.  But checking existence breaks atomicity.  The trick is
         * to check access and if not granted clear O_CREAT from the flags.
         *
         * Another problem is returing the "right" error value (e.g. for an
         * O_EXCL open we want to return EEXIST not EROFS).
         */
        if (open_flag & O_CREAT) {
            if (!IS_POSIXACL(dir->d_inode))
                mode &= ~current_umask();
            if (unlikely(!got_write)) {
                create_error = -EROFS;
                open_flag &= ~O_CREAT;
                if (open_flag & (O_EXCL | O_TRUNC))
                    goto no_open;
                /* No side effects, safe to clear O_CREAT */
            } else {
                create_error = may_o_create(&nd->path, dentry, mode);
                if (create_error) {
                    open_flag &= ~O_CREAT;
                    if (open_flag & O_EXCL)
                        goto no_open;
                }
            }
        } else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) &&
               unlikely(!got_write)) {
            /*
             * No O_CREATE -> atomicity not a requirement -> fall
             * back to lookup + open
             */
            goto no_open;
        }
    
        if (dir_inode->i_op->atomic_open) { #这个是atomic open
            error = atomic_open(nd, dentry, path, file, op, open_flag,
                        mode);
            if (unlikely(error == -ENOENT) && create_error)
                error = create_error;
            return error;
        }
    
    no_open:
        if (d_in_lookup(dentry)) {
            struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
                                     nd->flags);
            d_lookup_done(dentry);
            if (unlikely(res)) {
                if (IS_ERR(res)) {
                    error = PTR_ERR(res);
                    goto out_dput;
                }
                dput(dentry);
                dentry = res;
            }
        }
    
        /* Negative dentry, just create the file */
        if (!dentry->d_inode && (open_flag & O_CREAT)) {
            file->f_mode |= FMODE_CREATED;
            audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
            if (!dir_inode->i_op->create) {
                error = -EACCES;
                goto out_dput;
            }
            error = dir_inode->i_op->create(dir_inode, dentry, mode,
                            open_flag & O_EXCL);
            if (error)
                goto out_dput;
            fsnotify_create(dir_inode, dentry);
        }
        if (unlikely(create_error) && !dentry->d_inode) {
            error = create_error;
            goto out_dput;
        }
    out_no_open:
        path->dentry = dentry;
        path->mnt = nd->path.mnt;
        return 0;
    
    out_dput:
        dput(dentry);
        return error;
    }

    vfs_open,将表示file的path赋值给此file的f_path成员,之后就可以根据file struct得到此file的dentry(f_path.dentry)以及此文件所在的文件系统vfsmount(f_path.mnt)了。根据这个dentry又可以得到这个file的inode struct。

    然后调用do_dentry_open()

    int vfs_open(const struct path *path, struct file *file)
    {
        file->f_path = *path;
        return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
    }

    do_dentry_open参数说明:

    f: 即表示此file的file struct;

    inode: 即此file的inode struct;

    open: 这里它为null

    do_dentry_open()里会调用fops_get(inode->i_fop),将返回值赋值给f->f_op,fops_get()的返回值,对于文件系统来说,它是没有设置module类型的owner成员的,同时CONFIG_MODULES和CONFIG_MODULE_UNLOAD(表示是否支持卸载已经加载的ko)一般是define了的,所以是使用的module.c里的try_module_get(),在这个函数里,如果module指针是null,则直接return true,所以对于文件系统来说,是直接将inode->i_fop赋值给f->fop,即设置file的file_operations,这个file_operations是具体文件系统提供的,以ext4 fs为例,它是ext4_file_operations。然后如果具体文件系统提供的file_operations函数集里有open函数,则调用这个open函数,对应ext4 fs,它是有提供的,它是ext4_file_open(),传给它的参数是inode和f,即此file的indoe、file struct。至此open系统调用过程基本结束!

    static int do_dentry_open(struct file *f,
                  struct inode *inode,
                  int (*open)(struct inode *, struct file *))
    {
        static const struct file_operations empty_fops = {};
        int error;
    
        path_get(&f->f_path);
        f->f_inode = inode;
        f->f_mapping = inode->i_mapping;
    
        /* Ensure that we skip any errors that predate opening of the file */
        f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
    
        if (unlikely(f->f_flags & O_PATH)) {
            f->f_mode = FMODE_PATH | FMODE_OPENED;
            f->f_op = &empty_fops;
            return 0;
        }
    
        /* Any file opened for execve()/uselib() has to be a regular file. */
        if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) {
            error = -EACCES;
            goto cleanup_file;
        }
    
        if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
            error = get_write_access(inode);
            if (unlikely(error))
                goto cleanup_file;
            error = __mnt_want_write(f->f_path.mnt);
            if (unlikely(error)) {
                put_write_access(inode);
                goto cleanup_file;
            }
            f->f_mode |= FMODE_WRITER;
        }
    
        /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
        if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
            f->f_mode |= FMODE_ATOMIC_POS;
    
        f->f_op = fops_get(inode->i_fop);
        if (unlikely(WARN_ON(!f->f_op))) {
            error = -ENODEV;
            goto cleanup_all;
        }
    
        error = security_file_open(f);
        if (error)
            goto cleanup_all;
    
        error = break_lease(locks_inode(f), f->f_flags);
        if (error)
            goto cleanup_all;
    
        /* normally all 3 are set; ->open() can clear them if needed */
        f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
        if (!open)
            open = f->f_op->open;
        if (open) {
            error = open(inode, f);
            if (error)
                goto cleanup_all;
        }
        f->f_mode |= FMODE_OPENED;
        if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
            i_readcount_inc(inode);
        if ((f->f_mode & FMODE_READ) &&
             likely(f->f_op->read || f->f_op->read_iter))
            f->f_mode |= FMODE_CAN_READ;
        if ((f->f_mode & FMODE_WRITE) &&
             likely(f->f_op->write || f->f_op->write_iter))
            f->f_mode |= FMODE_CAN_WRITE;
    
        f->f_write_hint = WRITE_LIFE_NOT_SET;
        f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
    
        file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
    
        /* NB: we're sure to have correct a_ops only after f_op->open */
        if (f->f_flags & O_DIRECT) {
            if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
                return -EINVAL;
        }
        return 0;
    
    cleanup_all:
        if (WARN_ON_ONCE(error > 0))
            error = -EINVAL;
        fops_put(f->f_op);
        if (f->f_mode & FMODE_WRITER) {
            put_write_access(inode);
            __mnt_drop_write(f->f_path.mnt);
        }
    cleanup_file:
        path_put(&f->f_path);
        f->f_path.mnt = NULL;
        f->f_path.dentry = NULL;
        f->f_inode = NULL;
        return error;
    }
  • 相关阅读:
    Codeforces Round #362 (Div. 2) C
    poj1655 树重心
    poj1985 树直径
    Codeforces Round #403 (based on Technocup 2017 Finals)
    uva 10054 The necklacr
    bfs codeforces 754B Ilya and tic-tac-toe game
    矩阵快速幂专题
    CodeForces 863E Turn Off The TV 思维,扫描线
    CodeForces 803F Coprime Subsequences 莫比乌斯,容斥
    CodeForces 803C Maximal GCD 思维
  • 原文地址:https://www.cnblogs.com/aspirs/p/15734298.html
Copyright © 2020-2023  润新知