• Linux uevent分析、用户接收uevent以及mdev分析【转】


    转自:https://www.cnblogs.com/arnoldlu/p/11246204.html

    关键词:uevent、netlink、ADD/REMOVE/CHANGE、uevent_helper、hotplug、usermode helper、mdev、mdev.conf等等。

     本文从三方面了解uevent相关内容:内核中uevent如何传送、用户空间如何处理uevent、如何通过mdev实现热插拔功能。

    1. Linux uevent分析

    kobject_action定义了 Linux下的uevent类型;struct kerenl_uevent_env表示一个待发送的uevent。

    uevent_net_init()创建发送uevent所需要的socket等信息。

    内核驱动通过kobject_uevent()/kobject_uevent_env()发送uevent到用户空间,主要包括两部分工作:一是通过netlink_broadcast_filtered()发送netlink消息;另一是通过call_usermodehelper_setup()/call_usermodehelper_exec()调用用户空间程序处理uevent消息。

    1.1 uevent数据结构

    kobject_action定义了kobject的动作,包括ADD、REMOVE、CHANGE等等。用户空间根据ADD或者REMOVE处理热插拔时间,电池模块根据CHANGE处理电量更新。

    kobj_uevent_env用于表示一个kobject事件,argv是用户空间执行的helper参数;envp和buf组成发送uevent字符串信息。

    复制代码
    enum kobject_action {
        KOBJ_ADD,------------------------ADD/REMOVE添加/移除事件。
        KOBJ_REMOVE,
        KOBJ_CHANGE,---------------------设备状态或者内容发生改变。
        KOBJ_MOVE,-----------------------更改名称或者更改parent,即更改了目录结构。
        KOBJ_ONLINE,---------------------设备上线/下线事件,常表示使能或者去使能。
        KOBJ_OFFLINE,
        KOBJ_MAX
    };
    
    static const char *kobject_actions[] = {
        [KOBJ_ADD] =        "add",
        [KOBJ_REMOVE] =        "remove",
        [KOBJ_CHANGE] =        "change",
        [KOBJ_MOVE] =        "move",
        [KOBJ_ONLINE] =        "online",
        [KOBJ_OFFLINE] =    "offline",
    };
    
    struct kobj_uevent_env {
        char *argv[3];------------------------------用户空间可执行文件路径,以及参数等。
        char *envp[UEVENT_NUM_ENVP];----------------指针数组,保存每个环境变量的地址。
        int envp_idx;
        char buf[UEVENT_BUFFER_SIZE];---------------环境变量内容。
        int buflen;
    };
    复制代码

    1.2 uevent初始化

    uevent_net_init()创建类型为NETLINK_KOBJECT_UEVENT的socket,并将其放入uevent_sock_list链表上。uevent_net_exit()则将其从uevent_socket_list中摘除,并且释放socket相关资源。

    复制代码
    static int uevent_net_init(struct net *net)
    {
        struct uevent_sock *ue_sk;
        struct netlink_kernel_cfg cfg = {
            .groups    = 1,
            .flags    = NL_CFG_F_NONROOT_RECV,
        };
    
        ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
        if (!ue_sk)
            return -ENOMEM;
    
        ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg);------------创建NETLINK_KOBJECT_UEVENT类型的socket。
        if (!ue_sk->sk) {
            printk(KERN_ERR
                   "kobject_uevent: unable to create netlink socket!
    ");
            kfree(ue_sk);
            return -ENODEV;
        }
        mutex_lock(&uevent_sock_mutex);
        list_add_tail(&ue_sk->list, &uevent_sock_list);-----------------------------------将创建的uevent_sock加入到uevent_sock_list中。
        mutex_unlock(&uevent_sock_mutex);
        return 0;
    }
    
    static void uevent_net_exit(struct net *net)
    {
        struct uevent_sock *ue_sk;
    
        mutex_lock(&uevent_sock_mutex);
        list_for_each_entry(ue_sk, &uevent_sock_list, list) {
            if (sock_net(ue_sk->sk) == net)
                goto found;
        }
        mutex_unlock(&uevent_sock_mutex);
        return;
    
    found:
        list_del(&ue_sk->list);
        mutex_unlock(&uevent_sock_mutex);
    
        netlink_kernel_release(ue_sk->sk);
        kfree(ue_sk);
    }
    
    static struct pernet_operations uevent_net_ops = {
        .init    = uevent_net_init,
        .exit    = uevent_net_exit,
    };
    
    static int __init kobject_uevent_init(void)
    {
        return register_pernet_subsys(&uevent_net_ops);-----------将uevent网络协议模块添加到新的命名空间子系统中,并且调用init初始化函数。
    }
    
    postcore_initcall(kobject_uevent_init);
    复制代码

    1.3 对uevent_helper设置

    对uevent_helper设置,可以对/proc/sys/kernel/hotplug写可执行文件路径即可。

    然后在内核触发uevent事件的之后调用相关可执行文件进行处理。

    复制代码
    static struct ctl_table kern_table[] = {
    ...
    #ifdef CONFIG_UEVENT_HELPER
        {
            .procname    = "hotplug",
            .data        = &uevent_helper,
            .maxlen        = UEVENT_HELPER_PATH_LEN,
            .mode        = 0644,
            .proc_handler    = proc_dostring,
        },
    #endif...
        { }
    };
    复制代码

    或者还可以对/proc/kernel/uevent_helper写入可执行文件路径。

    复制代码
    static ssize_t uevent_helper_show(struct kobject *kobj,
                      struct kobj_attribute *attr, char *buf)
    {
        return sprintf(buf, "%s
    ", uevent_helper);
    }
    static ssize_t uevent_helper_store(struct kobject *kobj,
                       struct kobj_attribute *attr,
                       const char *buf, size_t count)
    {
        if (count+1 > UEVENT_HELPER_PATH_LEN)
            return -ENOENT;
        memcpy(uevent_helper, buf, count);
        uevent_helper[count] = '';
        if (count && uevent_helper[count-1] == '
    ')
            uevent_helper[count-1] = '';
        return count;
    }
    KERNEL_ATTR_RW(uevent_helper);
    复制代码

    1.4 usermode helper

    usermode helper用于帮助在内核空间启动一个用户空间程序。首先通过call_usermodehelper_setup()初始化一个struct subprocess_info实例;然后调用call_usermodehelper_exec()执行,通过kernel_thread()创建线程,入口函数call_usermodehelper_exec_async()调用do_execve()加载用户空间程序。

    这里不同等待程序运行结束的方式,UMH_NO_WAIT在将work放入system_unbound_wq之后,不等待直接退出;UMH_KILLABLE则会等待进程变为TASK_KILLABLE。UMH_WAIT_PROC等待进程执行完毕,UMH_WAIT_EXEC只是等待do_exec()执行完毕,而不是进程结束。

    struct subprocess_info表示一个usermode helper执行的实例。

    复制代码
    #define UMH_NO_WAIT    0    /* don't wait at all */
    #define UMH_WAIT_EXEC    1    /* wait for the exec, but not the process */
    #define UMH_WAIT_PROC    2    /* wait for the process to complete */
    #define UMH_KILLABLE    4    /* wait for EXEC/PROC killable */
    
    struct subprocess_info {
        struct work_struct work;---------------将usermode helper作为一个work放入system_unbound_wq中。
        struct completion *complete;
        char *path;----------------------------用户空间可执行文件路径。
        char **argv;---------------------------可执行文件所需参数。
        char **envp;---------------------------可执行文件所需环境变量。
        int wait;------------------------------等待标志。
        int retval;
        int (*init)(struct subprocess_info *info, struct cred *new);---执行产需之前的初始化函数。
        void (*cleanup)(struct subprocess_info *info);-----------------释放struct subprocess_info是的清理程序。
        void *data;
    };
    复制代码

    call_usermodehelper()首先创建struct subprocess_info,然后执行用户空间程序。

    复制代码
    int call_usermodehelper(char *path, char **argv, char **envp, int wait)
    {
        struct subprocess_info *info;
        gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
    
        info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
                         NULL, NULL, NULL);-------------------------------需要用户空间执行的程序路径以及参数,内存分配gfp_mask等等,填充倒struc subprocess_info中。
        if (info == NULL)
            return -ENOMEM;
    
        return call_usermodehelper_exec(info, wait);----------------------将subprocess_info->work放入system_unbound_eq执行。
    }
    复制代码

    call_usermodehelper_setup()初始化struct subprocess_info实例,包括程序路径、参数等等,还有初始化一个work,对应的执行函数式call_usermodehelper_exec_work()。

    复制代码
    struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
            char **envp, gfp_t gfp_mask,
            int (*init)(struct subprocess_info *info, struct cred *new),
            void (*cleanup)(struct subprocess_info *info),
            void *data)
    {
        struct subprocess_info *sub_info;
        sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
        if (!sub_info)
            goto out;
    
        INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
        sub_info->path = path;
        sub_info->argv = argv;
        sub_info->envp = envp;
    
        sub_info->cleanup = cleanup;
        sub_info->init = init;
        sub_info->data = data;
      out:
        return sub_info;
    }
    
    static void call_usermodehelper_exec_work(struct work_struct *work)
    {
        struct subprocess_info *sub_info =
            container_of(work, struct subprocess_info, work);
    
        if (sub_info->wait & UMH_WAIT_PROC) {
            call_usermodehelper_exec_sync(sub_info);
        } else {
            pid_t pid;
            /*
             * Use CLONE_PARENT to reparent it to kthreadd; we do not
             * want to pollute current->children, and we need a parent
             * that always ignores SIGCHLD to ensure auto-reaping.
             */
            pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
                        CLONE_PARENT | SIGCHLD);--------------------------CLONE_PARENT让新创建的进程与创建它的进程成了‘兄弟’而不是‘父子’。
            if (pid < 0) {
                sub_info->retval = pid;
                umh_complete(sub_info);
            }
        }
    }
    复制代码

    call_usermode_herlper_exec_async()和call_usermodehelper_exec_sync()最大的区别是 创建进程的flags,前者CLONE_PARENT导致新创建的进程和创建它的进程编程兄弟关系,而后者还保持父子关系。

    复制代码
    static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info)
    {
        pid_t pid;
    
        /* If SIGCLD is ignored sys_wait4 won't populate the status. */
        kernel_sigaction(SIGCHLD, SIG_DFL);
        pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD);
        if (pid < 0) {
            sub_info->retval = pid;
        } else {
            int ret = -ECHILD;
    
            sys_wait4(pid, (int __user *)&ret, 0, NULL);-------------------------等待子进程退出,这也是async和sync最大的区别所在。
            if (ret)
                sub_info->retval = ret;
        }
    
        kernel_sigaction(SIGCHLD, SIG_IGN);
    
        umh_complete(sub_info);
    }
    
    static int call_usermodehelper_exec_async(void *data)
    {
        struct subprocess_info *sub_info = data;
        struct cred *new;
        int retval;
    
        spin_lock_irq(&current->sighand->siglock);
        flush_signal_handlers(current, 1);------------------------------------------进行signal、nice、credential准备工作。
        spin_unlock_irq(&current->sighand->siglock);
    
        set_user_nice(current, 0);
    
        retval = -ENOMEM;
        new = prepare_kernel_cred(current);
        if (!new)
            goto out;
    
        spin_lock(&umh_sysctl_lock);
        new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
        new->cap_inheritable = cap_intersect(usermodehelper_inheritable,
                             new->cap_inheritable);
        spin_unlock(&umh_sysctl_lock);
    
        if (sub_info->init) {
            retval = sub_info->init(sub_info, new);--------------------------------为进程创建进行初始化工作。
            if (retval) {
                abort_creds(new);
                goto out;
            }
        }
    
        commit_creds(new);
    
        retval = do_execve(getname_kernel(sub_info->path),
                   (const char __user *const __user *)sub_info->argv,
                   (const char __user *const __user *)sub_info->envp);------------调用usermode程序替代当前进程。
    ...
    }
    复制代码

    call_usermodehelper_exec()最主要的工作就是将一个usermode helper命令放入system_unbound_wq执行,然后根据wait类型进行不同条件的等待。

    复制代码
    int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
    {
        DECLARE_COMPLETION_ONSTACK(done);
        int retval = 0;
    
        if (!sub_info->path) {
            call_usermodehelper_freeinfo(sub_info);
            return -EINVAL;
        }
        helper_lock();
        if (usermodehelper_disabled) {
            retval = -EBUSY;
            goto out;
        }
    
        sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
        sub_info->wait = wait;
    
        queue_work(system_unbound_wq, &sub_info->work);---------------将usermode helper进程放入system_unbound_wq上调度,即不绑定到任何CPU上,尽快得到执行。
        if (wait == UMH_NO_WAIT)    /* task has freed sub_info */-----对于UMH_NO_WAIT类型,跳过下面的completion同步等待步骤。
            goto unlock;
    
        if (wait & UMH_KILLABLE) {
            retval = wait_for_completion_killable(&done);-------------等待进程属性变为TASK_KILLABLE。
            if (!retval)
                goto wait_done;
    
            /* umh_complete() will see NULL and free sub_info */
            if (xchg(&sub_info->complete, NULL))
                goto unlock;
            /* fallthrough, umh_complete() was already called */
        }
    
        wait_for_completion(&done);
    wait_done:
        retval = sub_info->retval;
    out:
        call_usermodehelper_freeinfo(sub_info);
    unlock:
        helper_unlock();
        return retval;
    }
    
    static void umh_complete(struct subprocess_info *sub_info)
    {
        struct completion *comp = xchg(&sub_info->complete, NULL);
    
        if (comp)
            complete(comp);
        else
            call_usermodehelper_freeinfo(sub_info);
    }
    
    static void call_usermodehelper_freeinfo(struct subprocess_info *info)
    {
        if (info->cleanup)
            (*info->cleanup)(info);
        kfree(info);
    }
    复制代码

    1.5 uevent发送

    uevent发送可以通过kobject_uevent(),或者通过kobject_uevent_env()附加更多uevent信息。

    kobject_uevent_env()主要分为两部分,一是通过netlink_broadcast_filtered()将socket信息发出去;另一个是通过uevent helper将uevent调用指定的uevent_helper进行处理,通常是热插拔程序mdev、udevd等。

    复制代码
    int kobject_uevent(struct kobject *kobj, enum kobject_action action)
    {
        return kobject_uevent_env(kobj, action, NULL);
    }
    
    int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
                   char *envp_ext[])
    {
        struct kobj_uevent_env *env;
        const char *action_string = kobject_actions[action];------------将action转换成字符串。
        const char *devpath = NULL;
        const char *subsystem;
        struct kobject *top_kobj;
        struct kset *kset;
        const struct kset_uevent_ops *uevent_ops;
        int i = 0;
        int retval = 0;
    #ifdef CONFIG_NET
        struct uevent_sock *ue_sk;
    #endif    top_kobj = kobj;
        while (!top_kobj->kset && top_kobj->parent)
            top_kobj = top_kobj->parent;
    ...
        kset = top_kobj->kset;
        uevent_ops = kset->uevent_ops;
    ...
        /* originating subsystem */
        if (uevent_ops && uevent_ops->name)
            subsystem = uevent_ops->name(kset, kobj);
        else
            subsystem = kobject_name(&kset->kobj);
    ...
        env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
        if (!env)
            return -ENOMEM;
    
        /* complete object path */
        devpath = kobject_get_path(kobj, GFP_KERNEL);
        if (!devpath) {
            retval = -ENOENT;
            goto exit;
        }
    
        /* default keys */
        retval = add_uevent_var(env, "ACTION=%s", action_string);-----------默认添加ACTION、DEVPATH、SUBSYSTEM三个键值。
        if (retval)
            goto exit;
    ...
        if (envp_ext) {-----------------------------------------------------将自定义的键值附上。
            for (i = 0; envp_ext[i]; i++) {
                retval = add_uevent_var(env, "%s", envp_ext[i]);
                if (retval)
                    goto exit;
            }
        }
    
        /* let the kset specific function add its stuff */
        if (uevent_ops && uevent_ops->uevent) {
            retval = uevent_ops->uevent(kset, kobj, env);
            if (retval) {
                pr_debug("kobject: '%s' (%p): %s: uevent() returned "
                     "%d
    ", kobject_name(kobj), kobj,
                     __func__, retval);
                goto exit;
            }
        }
    
        if (action == KOBJ_ADD)
            kobj->state_add_uevent_sent = 1;
        else if (action == KOBJ_REMOVE)
            kobj->state_remove_uevent_sent = 1;
    
        mutex_lock(&uevent_sock_mutex);
    ...
    #if defined(CONFIG_NET)
        /* send netlink message */
        list_for_each_entry(ue_sk, &uevent_sock_list, list) {------------------遍历uevent_sock_list上所有的socket。
            struct sock *uevent_sock = ue_sk->sk;
            struct sk_buff *skb;
            size_t len;
    
            if (!netlink_has_listeners(uevent_sock, 1))
                continue;
    
            /* allocate message with the maximum possible size */
            len = strlen(action_string) + strlen(devpath) + 2;
            skb = alloc_skb(len + env->buflen, GFP_KERNEL);--------------------为下面消息发送创建sk_buff实例。
            if (skb) {
                char *scratch;
    
                /* add header */
                scratch = skb_put(skb, len);
                sprintf(scratch, "%s@%s", action_string, devpath);-------------在已有键值基础上添加action_string@devpath。
    
                /* copy keys to our continuous event payload buffer */
                for (i = 0; i < env->envp_idx; i++) {
                    len = strlen(env->envp[i]) + 1;
                    scratch = skb_put(skb, len);
                    strcpy(scratch, env->envp[i]);
                }
    
                NETLINK_CB(skb).dst_group = 1;
                retval = netlink_broadcast_filtered(uevent_sock, skb,
                                    0, 1, GFP_KERNEL,
                                    kobj_bcast_filter,
                                    kobj);--------------------------------------通过netlink_broadcast_filtered()发送skb数据。
                /* ENOBUFS should be handled in userspace */
                if (retval == -ENOBUFS || retval == -ESRCH)
                    retval = 0;
            } else
                retval = -ENOMEM;
        }
    #endif
        mutex_unlock(&uevent_sock_mutex);
    
    #ifdef CONFIG_UEVENT_HELPER
        /* call uevent_helper, usually only enabled during early boot */
        if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
            struct subprocess_info *info;
    
            retval = add_uevent_var(env, "HOME=/");
            if (retval)
                goto exit;
            retval = add_uevent_var(env,
                        "PATH=/sbin:/bin:/usr/sbin:/usr/bin");
            if (retval)
                goto exit;
            retval = init_uevent_argv(env, subsystem);
            if (retval)
                goto exit;
    
            retval = -ENOMEM;
            info = call_usermodehelper_setup(env->argv[0], env->argv,
                             env->envp, GFP_KERNEL,
                             NULL, cleanup_uevent_env, env);
            if (info) {
                retval = call_usermodehelper_exec(info, UMH_NO_WAIT);
                env = NULL;    /* freed by cleanup_uevent_env */
            }
        }
    #endif...
    }
    
    int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
    {
        va_list args;
        int len;
    
        if (env->envp_idx >= ARRAY_SIZE(env->envp)) {
            WARN(1, KERN_ERR "add_uevent_var: too many keys
    ");
            return -ENOMEM;
        }
    
        va_start(args, format);
        len = vsnprintf(&env->buf[env->buflen],
                sizeof(env->buf) - env->buflen,
                format, args);
        va_end(args);
    
        if (len >= (sizeof(env->buf) - env->buflen)) {
            WARN(1, KERN_ERR "add_uevent_var: buffer size too small
    ");
            return -ENOMEM;
        }
    
        env->envp[env->envp_idx++] = &env->buf[env->buflen];
        env->buflen += len + 1;
        return 0;
    }
    
    static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
    {
        struct kobject *kobj = data, *ksobj;
        const struct kobj_ns_type_operations *ops;
    
        ops = kobj_ns_ops(kobj);
        if (!ops && kobj->kset) {
            ksobj = &kobj->kset->kobj;
            if (ksobj->parent != NULL)
                ops = kobj_ns_ops(ksobj->parent);
        }
    
        if (ops && ops->netlink_ns && kobj->ktype->namespace) {
            const void *sock_ns, *ns;
            ns = kobj->ktype->namespace(kobj);
            sock_ns = ops->netlink_ns(dsk);
            return sock_ns != ns;
        }
    
        return 0;
    }
    static int kobj_usermode_filter(struct kobject *kobj)
    {
        const struct kobj_ns_type_operations *ops;
    
        ops = kobj_ns_ops(kobj);
        if (ops) {
            const void *init_ns, *ns;
            ns = kobj->ktype->namespace(kobj);
            init_ns = ops->initial_ns();
            return ns != init_ns;
        }
    
        return 0;
    }
    
    static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem)
    {
        int len;
    
        len = strlcpy(&env->buf[env->buflen], subsystem,
                  sizeof(env->buf) - env->buflen);
        if (len >= (sizeof(env->buf) - env->buflen)) {
            WARN(1, KERN_ERR "init_uevent_argv: buffer size too small
    ");
            return -ENOMEM;
        }
    
        env->argv[0] = uevent_helper;
        env->argv[1] = &env->buf[env->buflen];
        env->argv[2] = NULL;
    
        env->buflen += len + 1;
        return 0;
    }
    
    static void cleanup_uevent_env(struct subprocess_info *info)
    {
        kfree(info->data);
    }
    复制代码

    kobject_uevent_env()详细解释参考《设备模型的uevent机制》。

    2. 用户空间处理uevent

    2.1 kernel发送uevent

    通过内核发送uevent很简单,将数据代表环境变量的字符串组装好后,选择合适的action,指定对应的kobject设备即可。

    复制代码
    static int user_cooling_set_cur_state(struct thermal_cooling_device *cdev,
                     unsigned long new_target_ratio)
    {
        int ret = 0, i = 0, temperature = 0;
        char *thermal_prop[4];
        struct thermal_instance *instance;
    
        list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
            if (instance->tz->temperature > temperature)
                temperature = instance->tz->temperature;
        }
    
        user_cooling_state = new_target_ratio;
        thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", cdev->type);
        thermal_prop[1] = kasprintf(GFP_KERNEL, "STATE=%lu", new_target_ratio);
        thermal_prop[2] = kasprintf(GFP_KERNEL, "TEMP=%d", temperature);
        thermal_prop[3] = NULL;
        kobject_uevent_env(&cdev->device.kobj, KOBJ_CHANGE, thermal_prop);
        for (i = 0; i < 3; ++i)
            kfree(thermal_prop[i]);
    
        return ret;
    }
    复制代码

    通过kobject_uevent_env()可以添加自定义环境变量,用户空间就会收到如下uevent消息。

    复制代码
    change@/devices/virtual/thermal/cooling_device0
    ACTION=change
    DEVPATH=/devices/virtual/thermal/cooling_device0
    SUBSYSTEM=thermal
    NAME=user_cooling
    STATE=1
    TEMP=90
    SEQNUM=747
    复制代码

    2.2 用户空间uevent处理

    用户空间首先创建一个socket,并绑定到AF_NETLINK上,然后recv()接收消息,在处理字符串。

    复制代码
    #include <stdio.h>
    #include <string.h>
    #include <sys/types.h>
    #include <unistd.h>
    #include <stdlib.h>
    #include <sys/socket.h>
    #include <linux/netlink.h>
    
    #define UEVENT_MSG_LEN 2048
    #define USER_COOLING_DEV "/devices/virtual/thermal/cooling_device0"
    
    struct cooling_device {
        const char *name;
        const char *action;
        const char *path;
        int state;
        int temp;    
    };
    
    static int open_uevent_socket(void);
    static void parse_uevent(const char *msg, struct cooling_device *cdev);
    
    int main(int argc, char* argv[])
    {
        int socket_fd = -1;
        char msg[UEVENT_MSG_LEN+2];
        int n;
    
        socket_fd = open_uevent_socket();--------------------------------------创建socket。
        printf("socket_fd = %d
    ", socket_fd);
    
        do {
            while((n = recv(socket_fd, msg, UEVENT_MSG_LEN, 0)) > 0) {---------接收uevent信息。
                struct cooling_device cdev;
                memset(&cdev, 0x0, sizeof(cdev));
    
                if(n == UEVENT_MSG_LEN)
                    continue;
    
                msg[n] = '';
                msg[n+1] = '';
    
                parse_uevent(msg, &cdev);---------------------------------------解析收到的uevent字符。
            }
        } while(1);
    }
    
    static int open_uevent_socket(void)
    {
        struct sockaddr_nl addr;
        int sz = 64*1024;
        int s = 0;
    
        memset(&addr, 0, sizeof(addr));
        addr.nl_family = AF_NETLINK;
        addr.nl_pid = getpid();
        addr.nl_groups = 0xffffffff;
    
        s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);-------------地址族是AF_NETLINK类型的socket,协议类型是NETLINK_KOBJECT_UEVENT。
        if (s < 0) {
            return -1;
        }
    
        setsockopt(s, SOL_SOCKET, SO_RCVBUFFORCE, &sz, sizeof(sz));
    
        if (bind(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) {-------------将当前socket绑定到AF_NETLINK地址族,并且设置本进程为处理消息的进程。
            close(s);
            return -1;
        }
    
        return s;
    }
    
    static void parse_uevent(const char *msg, struct cooling_device *cdev)
    {
        while (*msg) {
            //printf("%s
    ", msg);
            if (!strncmp(msg, "NAME=", 5)) {
                msg += 5;
                cdev->name = msg;
            } else if (!strncmp(msg, "ACTION=", 7)) {
                msg += 7;
                cdev->action = msg;
            } else if (!strncmp(msg, "DEVPATH=", 8)) {
                msg += 8;
                cdev->path = msg;
            } else if (!strncmp(msg, "STATE=", 6)) {
                msg += 6;
                cdev->state = atoi(msg);
            } else if (!strncmp(msg, "TEMP=", 5)) {
                msg += 5;
                cdev->temp = atoi(msg);
            }
    
            while(*msg++);
        }
    
        if(!strncmp(cdev->path, USER_COOLING_DEV, sizeof(USER_COOLING_DEV)) && !strncmp(cdev->action, "change", 5))
            printf("event { name=%s, action=%s, path=%s, state=%d, temp=%d}
    ",
                cdev->name,    cdev->action, cdev->path, cdev->state, cdev->temp);
    }
    复制代码

    3. mdev

    3.1 buxybox下mdev分析

    mdev一种是附加-s主动遍历/sys/dev下设备,另一种是作为hotplug处理程序,被内核uevent_helper调用到。

    mdev作为hotplug程序处理时,从环境变量中获取参数,创建或者删除设备,或者加载firmware。

    复制代码
    int mdev_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
    int mdev_main(int argc UNUSED_PARAM, char **argv)
    {
        RESERVE_CONFIG_BUFFER(temp, PATH_MAX + SCRATCH_SIZE);
    
        INIT_G();
    
    #if ENABLE_FEATURE_MDEV_CONF
        G.filename = "/etc/mdev.conf";
    #endif
    
        bb_sanitize_stdio();
    
        umask(0);
    
        xchdir("/dev");--------------------------------------------------当前工作目录切换到/dev下。
    
        if (argv[1] && strcmp(argv[1], "-s") == 0) {---------------------mdev -s情况下遍历/sys/dev下面所有设备。
            /*
             * Scan: mdev -s
             */
            struct stat st;
    
    #if ENABLE_FEATURE_MDEV_CONF
            /* Same as xrealloc_vector(NULL, 4, 0): */
            G.rule_vec = xzalloc((1 << 4) * sizeof(*G.rule_vec));
    #endif
            xstat("/", &st);
            G.root_major = major(st.st_dev);
            G.root_minor = minor(st.st_dev);
    
            putenv((char*)"ACTION=add");
    
            /* Create all devices from /sys/dev hierarchy */
            recursive_action("/sys/dev",
                     ACTION_RECURSE | ACTION_FOLLOWLINKS,
                     fileAction, dirAction, temp, 0);----------------这个函数是递归函数,扫描/sys/dev下所有文件,如果发现dev文件,则按照/etc/mdev.con文件进行相应的设置。
        } else {
            char *fw;
            char *seq;
            char *action;
            char *env_devname;
            char *env_devpath;
            unsigned my_pid;
            unsigned seqnum = seqnum; /* for compiler */
            int seq_fd;
            smalluint op;
    
            /* Hotplug:
             * env ACTION=... DEVPATH=... SUBSYSTEM=... [SEQNUM=...] mdev
             * ACTION can be "add", "remove", "change"
             * DEVPATH is like "/block/sda" or "/class/input/mice"
             */
            env_devname = getenv("DEVNAME"); /* can be NULL */----------在内核的kobject_uevent_env()中已经将参数和环境变量作为参数传入do_execve()中。这里mdev可以通过getenv来解析。
            G.subsystem = getenv("SUBSYSTEM");
            action = getenv("ACTION");
            env_devpath = getenv("DEVPATH");
            if (!action || !env_devpath /*|| !G.subsystem*/)
                bb_show_usage();
            fw = getenv("FIRMWARE");
            seq = getenv("SEQNUM");
            op = index_in_strings(keywords, action);--------------------keywords仅包含add和remove,所以op也仅有OP_add和OP_remove。
    ...
            snprintf(temp, PATH_MAX, "/sys%s", env_devpath);
            if (op == OP_remove) {
                /* Ignoring "remove firmware". It was reported
                 * to happen and to cause erroneous deletion
                 * of device nodes. */
                if (!fw)
                    make_device(env_devname, temp, op);-----------------在temp指定的目录下创建env_devnam名称的设备。
            }
            else {
                make_device(env_devname, temp, op);---------------------删除temp目录下名称为env_devname的设备。
                if (ENABLE_FEATURE_MDEV_LOAD_FIRMWARE) {
                    if (op == OP_add && fw)
                        load_firmware(fw, temp);------------------------将fw文件加载到temp路径中。
                }
            }
    ...
        }
    
        if (ENABLE_FEATURE_CLEAN_UP)
            RELEASE_CONFIG_BUFFER(temp);
    
        return EXIT_SUCCESS;
    }
    复制代码

    3.2 mdev.conf规则

    下面是mdev.conf配置文件的基本格式:

    <device regex> <uid>:<gid> <permissions> [=path] [@|$|*<command>]
    <device regex> <uid>:<gid> <permissions> [>path] [@|$|*<command>]
    <device regex> <uid>:<gid> <permissions> [!] [@|$|*<command>] 

    <device regex>:设备名称,支持正则表达式如hd[a-z][0-9]*等等。

    <uid>:<gid>:用户ID和组ID。

    <permissions>:表示设备的属性。

    [=path]:如果path是个目录(比如drivers/),则将设备节点移动到目录下;如果path是个名称,则将设备节点重命名为这个名称。

      hda 0:3 660 =drivers/:移动hda到drivers目录下。

      hdb 0:3 60 =cdrom:将hdb重命名为cdrom。

    [>path]:重命名或者移动设备节点,类似于[=path]。但是同时会在/dev/下创建相关设备节点。

    [!]:则不会创建设备节点。

    [@<command>]:在创建设备节点之后执行command。

    [$<command>]:在移动设备之前执行command。

    [*<command>]:在创建设备之后以及移动设备之前都执行command。

    上面的<command>通过system()调用执行,并且stdin/stdout/stderr都被重定向到/dev/null中。同时环境变量$MDEV指向匹配成功的设备节点名称,$ACTION表示uevent动作。

    3.3 mdev和udev的区别

    udev和mdev都是使用uevent机制处理热插拔的用户空间程序。

    但是udev通过监听内核发送的uevent消息,解析后进行相应的热插拔擦欧洲哦,包括创建/删除设备节点,加载/卸载驱动程序,加载Firmware等等。

    mdev则是基于uevent_helper机制,内核在发送uevent的时候,同时调用uevent_helper指向的用户空间程序进行热插拔处理。

    另外udev是作为一个daemon常驻内存的,一直在监听uevent;mdev只是在需要的时候被调用。

    4. 小结

    uevent是内核发送消息到用户空间的一种途径,这种技术基于netlink实现。

    内核中通过kobject_uevent()/kobject_uevent_env()发送uevent消息。

    用户空间使用标准的socket接口,等待接收消息,然后进行解析处理;或者通过usermode helper调用用户空间进程mdev进行热插拔处理,处理的方式遵循mdev.conf规则。

    联系方式:arnoldlu@qq.com
  • 相关阅读:
    MySQL 简单查询(实验回顾)
    PicGo + Gitee 创建图床,Typora竟还有这种功能
    SQL 条件判断
    django ORM中的复选MultiSelectField的使用
    with的用法
    django update-or-create的用法
    获取异常文件
    支付宝支付
    单例模式创建
    数据类模型
  • 原文地址:https://www.cnblogs.com/sky-heaven/p/14042128.html
Copyright © 2020-2023  润新知