• Linux内核设计与实现 总结笔记(第三章)进程


    进程管理

    进程:处于执行期的程序。

    线程:在进程中活动的对象

    虚拟机制

    虚拟处理器:多个进程分享一个处理器

    虚拟内存:多个线程共享虚拟内存

    一、进程描述符和任务结构

    进程存放在双向循环链表中(队列),链表中的项为task_struct,称为进程描述符。在头文件<linux/sched.h>中。

    struct task_struct {
        volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
        void *stack;
        atomic_t usage;
        unsigned int flags; /* per process flags, defined below */
        unsigned int ptrace;
    
    #ifdef CONFIG_SMP
        struct task_struct *wake_entry;
        int on_cpu;
    #endif
        int on_rq;
    
        int prio, static_prio, normal_prio;
        unsigned int rt_priority;
        const struct sched_class *sched_class;
        struct sched_entity se;
        struct sched_rt_entity rt;
    
    #ifdef CONFIG_PREEMPT_NOTIFIERS
        /* list of struct preempt_notifier: */
        struct hlist_head preempt_notifiers;
    #endif
    
        /*
         * fpu_counter contains the number of consecutive context switches
         * that the FPU is used. If this is over a threshold, the lazy fpu
         * saving becomes unlazy to save the trap. This is an unsigned char
         * so that after 256 times the counter wraps and the behavior turns
         * lazy again; this to deal with bursty apps that only use FPU for
         * a short time
         */
        unsigned char fpu_counter;
    #ifdef CONFIG_BLK_DEV_IO_TRACE
        unsigned int btrace_seq;
    #endif
    
        unsigned int policy;
        cpumask_t cpus_allowed;
    
    #ifdef CONFIG_PREEMPT_RCU
        int rcu_read_lock_nesting;
        char rcu_read_unlock_special;
    #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
        int rcu_boosted;
    #endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
        struct list_head rcu_node_entry;
    #endif /* #ifdef CONFIG_PREEMPT_RCU */
    #ifdef CONFIG_TREE_PREEMPT_RCU
        struct rcu_node *rcu_blocked_node;
    #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
    #ifdef CONFIG_RCU_BOOST
        struct rt_mutex *rcu_boost_mutex;
    #endif /* #ifdef CONFIG_RCU_BOOST */
    
    #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        struct sched_info sched_info;
    #endif
    
        struct list_head tasks;
    #ifdef CONFIG_SMP
        struct plist_node pushable_tasks;
    #endif
    
        struct mm_struct *mm, *active_mm;
    #ifdef CONFIG_COMPAT_BRK
        unsigned brk_randomized:1;
    #endif
    #if defined(SPLIT_RSS_COUNTING)
        struct task_rss_stat    rss_stat;
    #endif
    /* task state */
        int exit_state;
        int exit_code, exit_signal;
        int pdeath_signal;  /*  The signal sent when the parent dies  */
        unsigned int group_stop;    /* GROUP_STOP_*, siglock protected */
        /* ??? */
        unsigned int personality;
        unsigned did_exec:1;
        unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                     * execve */
        unsigned in_iowait:1;
    
    
        /* Revert to default priority/policy when forking */
        unsigned sched_reset_on_fork:1;
        unsigned sched_contributes_to_load:1;
    
        pid_t pid;
        pid_t tgid;
    
    #ifdef CONFIG_CC_STACKPROTECTOR
        /* Canary value for the -fstack-protector gcc feature */
        unsigned long stack_canary;
    #endif
    
        /* 
         * pointers to (original) parent process, youngest child, younger sibling,
         * older sibling, respectively.  (p->father can be replaced with 
         * p->real_parent->pid)
         */
        struct task_struct *real_parent; /* real parent process */
        struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */
        /*
         * children/sibling forms the list of my natural children
         */
        struct list_head children;  /* list of my children */
        struct list_head sibling;   /* linkage in my parent's children list */
        struct task_struct *group_leader;   /* threadgroup leader */
    
        /*
         * ptraced is the list of tasks this task is using ptrace on.
         * This includes both natural children and PTRACE_ATTACH targets.
         * p->ptrace_entry is p's link on the p->parent->ptraced list.
         */
        struct list_head ptraced;
        struct list_head ptrace_entry;
    
        /* PID/PID hash table linkage. */
        struct pid_link pids[PIDTYPE_MAX];
        struct list_head thread_group;
    
        struct completion *vfork_done;      /* for vfork() */
        int __user *set_child_tid;      /* CLONE_CHILD_SETTID */
        int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */
    
        cputime_t utime, stime, utimescaled, stimescaled;
        cputime_t gtime;
    #ifndef CONFIG_VIRT_CPU_ACCOUNTING
        cputime_t prev_utime, prev_stime;
    #endif
        unsigned long nvcsw, nivcsw; /* context switch counts */
        struct timespec start_time;         /* monotonic time */
        struct timespec real_start_time;    /* boot based time */
    /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
        unsigned long min_flt, maj_flt;
    
        struct task_cputime cputime_expires;
        struct list_head cpu_timers[3];
    /* process credentials */
        const struct cred __rcu *real_cred; /* objective and real subjective task
                         * credentials (COW) */
        const struct cred __rcu *cred;  /* effective (overridable) subjective task
                         * credentials (COW) */
        struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
    
        char comm[TASK_COMM_LEN]; /* executable name excluding path
                         - access with [gs]et_task_comm (which lock
                           it with task_lock())
                         - initialized normally by setup_new_exec */
    /* file system info */
        int link_count, total_link_count;
    #ifdef CONFIG_SYSVIPC
    /* ipc stuff */
        struct sysv_sem sysvsem;
    #endif
    #ifdef CONFIG_DETECT_HUNG_TASK
    /* hung task detection */
        unsigned long last_switch_count;
    #endif
    /* CPU-specific state of this task */
        struct thread_struct thread;
    /* filesystem information */
        struct fs_struct *fs;
    /* open file information */
        struct files_struct *files;
    /* namespaces */
        struct nsproxy *nsproxy;
    /* signal handlers */
        struct signal_struct *signal;
        struct sighand_struct *sighand;
    
        sigset_t blocked, real_blocked;
        sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
        struct sigpending pending;
    
        unsigned long sas_ss_sp;
        size_t sas_ss_size;
        int (*notifier)(void *priv);
        void *notifier_data;
        sigset_t *notifier_mask;
        struct audit_context *audit_context;
    #ifdef CONFIG_AUDITSYSCALL
        uid_t loginuid;
        unsigned int sessionid;
    #endif
        seccomp_t seccomp;
    
    /* Thread group tracking */
        u32 parent_exec_id;
        u32 self_exec_id;
    /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
     * mempolicy */
        spinlock_t alloc_lock;
    
    #ifdef CONFIG_GENERIC_HARDIRQS
        /* IRQ handler threads */
        struct irqaction *irqaction;
    #endif
    
        /* Protection of the PI data structures: */
        raw_spinlock_t pi_lock;
    
    #ifdef CONFIG_RT_MUTEXES
        /* PI waiters blocked on a rt_mutex held by this task */
        struct plist_head pi_waiters;
        /* Deadlock detection and priority inheritance handling */
        struct rt_mutex_waiter *pi_blocked_on;
    #endif
    
    #ifdef CONFIG_DEBUG_MUTEXES
        /* mutex deadlock detection */
        struct mutex_waiter *blocked_on;
    #endif
    #ifdef CONFIG_TRACE_IRQFLAGS
        unsigned int irq_events;
        unsigned long hardirq_enable_ip;
        unsigned long hardirq_disable_ip;
        unsigned int hardirq_enable_event;
        unsigned int hardirq_disable_event;
        int hardirqs_enabled;
        int hardirq_context;
        unsigned long softirq_disable_ip;
        unsigned long softirq_enable_ip;
        unsigned int softirq_disable_event;
        unsigned int softirq_enable_event;
        int softirqs_enabled;
        int softirq_context;
    #endif
    #ifdef CONFIG_LOCKDEP
    # define MAX_LOCK_DEPTH 48UL
        u64 curr_chain_key;
        int lockdep_depth;
        unsigned int lockdep_recursion;
        struct held_lock held_locks[MAX_LOCK_DEPTH];
        gfp_t lockdep_reclaim_gfp;
    #endif
    
    /* journalling filesystem info */
        void *journal_info;
    
    /* stacked block device info */
        struct bio_list *bio_list;
    
    #ifdef CONFIG_BLOCK
    /* stack plugging */
        struct blk_plug *plug;
    #endif
    
    /* VM state */
        struct reclaim_state *reclaim_state;
    
        struct backing_dev_info *backing_dev_info;
    
        struct io_context *io_context;
    
        unsigned long ptrace_message;
        siginfo_t *last_siginfo; /* For ptrace use.  */
        struct task_io_accounting ioac;
    #if defined(CONFIG_TASK_XACCT)
        u64 acct_rss_mem1;  /* accumulated rss usage */
        u64 acct_vm_mem1;   /* accumulated virtual memory usage */
        cputime_t acct_timexpd; /* stime + utime since last update */
    #endif
    #ifdef CONFIG_CPUSETS
        nodemask_t mems_allowed;    /* Protected by alloc_lock */
        int mems_allowed_change_disable;
        int cpuset_mem_spread_rotor;
        int cpuset_slab_spread_rotor;
    #endif
    #ifdef CONFIG_CGROUPS
        /* Control Group info protected by css_set_lock */
        struct css_set __rcu *cgroups;
        /* cg_list protected by css_set_lock and tsk->alloc_lock */
        struct list_head cg_list;
    #endif
    #ifdef CONFIG_FUTEX
        struct robust_list_head __user *robust_list;
    #ifdef CONFIG_COMPAT
        struct compat_robust_list_head __user *compat_robust_list;
    #endif
        struct list_head pi_state_list;
        struct futex_pi_state *pi_state_cache;
    #endif
    #ifdef CONFIG_PERF_EVENTS
        struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
        struct mutex perf_event_mutex;
        struct list_head perf_event_list;
    #endif
    #ifdef CONFIG_NUMA
        struct mempolicy *mempolicy;    /* Protected by alloc_lock */
        short il_next;
        short pref_node_fork;
    #endif
        atomic_t fs_excl;   /* holding fs exclusive resources */
        struct rcu_head rcu;
    
        /*
         * cache last used pipe for splice
         */
        struct pipe_inode_info *splice_pipe;
    #ifdef  CONFIG_TASK_DELAY_ACCT
        struct task_delay_info *delays;
    #endif
    #ifdef CONFIG_FAULT_INJECTION
        int make_it_fail;
    #endif
        struct prop_local_single dirties;
    #ifdef CONFIG_LATENCYTOP
        int latency_record_count;
        struct latency_record latency_record[LT_SAVECOUNT];
    #endif
        /*
         * time slack values; these are used to round up poll() and
         * select() etc timeout values. These are in nanoseconds.
         */
        unsigned long timer_slack_ns;
        unsigned long default_timer_slack_ns;
    
        struct list_head    *scm_work_list;
    #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        /* Index of current stored address in ret_stack */
        int curr_ret_stack;
        /* Stack of return addresses for return function tracing */
        struct ftrace_ret_stack *ret_stack;
        /* time stamp for last schedule */
        unsigned long long ftrace_timestamp;
        /*
         * Number of functions that haven't been traced
         * because of depth overrun.
         */
        atomic_t trace_overrun;
        /* Pause for the tracing */
        atomic_t tracing_graph_pause;
    #endif
    #ifdef CONFIG_TRACING
        /* state flags for use by tracers */
        unsigned long trace;
        /* bitmask and counter of trace recursion */
        unsigned long trace_recursion;
    #endif /* CONFIG_TRACING */
    #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
        struct memcg_batch_info {
            int do_batch;   /* incremented when batch uncharge started */
            struct mem_cgroup *memcg; /* target memcg of uncharge */
            unsigned long nr_pages; /* uncharged usage */
            unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
        } memcg_batch;
    #endif
    #ifdef CONFIG_HAVE_HW_BREAKPOINT
        atomic_t ptrace_bp_refcnt;
    #endif
    };
    task_struct

    slab分配器分配task_struct结构,2.6后slab分配器动态生成task struct,只需要在栈底(向下增长的栈)或栈顶(向上增长的栈)创建一个新的结构struct thread_info。

    在头文件<asm/thread_info.h>中,找了半天在arch/arm/include/asm/thread_info.h:

    /*
     * low level task data that entry.S needs immediate access to.
     * __switch_to() assumes cpu_context follows immediately after cpu_domain.
     */
    struct thread_info {
        unsigned long       flags;      /* low level flags */
        int         preempt_count;  /* 0 => preemptable, <0 => bug */
        mm_segment_t        addr_limit; /* address limit */
        struct task_struct  *task;      /* main task structure */
        struct exec_domain  *exec_domain;   /* execution domain */
        __u32           cpu;        /* cpu */
        __u32           cpu_domain; /* cpu domain */
        struct cpu_context_save cpu_context;    /* cpu context */
        __u32           syscall;    /* syscall number */
        __u8            used_cp[16];    /* thread used copro */
        unsigned long       tp_value;
        struct crunch_state crunchstate;
        union fp_state      fpstate __attribute__((aligned(8)));
        union vfp_state     vfpstate;
    #ifdef CONFIG_ARM_THUMBEE
        unsigned long       thumbee_state;  /* ThumbEE Handler Base register */
    #endif
        struct restart_block    restart_block;
    };
    thread_info

    在内核栈的尾端,分配了threa_info,结构体task域存放指向该任务的实际task_struct指针。

    二、进程描述符

    PID是一个数,实际上是int类型。内核把各自的进程的PID放在各自的进程描述符中。

    修改进程最大限制可以在文件<linux/threads.h>修改,或者修改/proc/sys/kernel/pid_max来提高上限

    2.1 进程状态

    TASK_RUNNING:进程使可运行的

    TASK_INTERRUPTIBLE:进程正在睡眠

    TASK_UNINTERRUPTIBLE:进程就算收到信号也不会唤醒

    __TASK_TRACED:被其他进程跟踪的进程

    __TASK_STOPPED:进程停止执行

    2.2 进程状态设置

    使用函数调整某个进程的状态

    set_task_state(task, state);        /* 将任务task的状态设置为state */

    进程上下文:当一个程序执行了系统调用,或触发了某个异常,就陷入了内核空间。此时,内核代表进程执行并处于进程上下文中。

    进程家族树:拥有同一个父进程的所有进程被称为兄弟。

    每个task_struct都包含一个指向其父进程task_struct叫做parent的指针,还包含一个称为children的子进程链表。

    /* 获取其父进程的进程描述符 */
    struct task_struct *my_parent = current->parent;  
    
    /* 一次访问子进程 */
    struct task_struct *task;
    struct list_head *list;
    list_for_each(list, &current->children) {
        task = list_entry(list, struct task_struct, sibling);
        /* task现在是指向当前的某个子进程 */
    }
    
    /* 访问init */
    struct task_struct *task;
    for(task = current; task != &init_task; task = task->parent)
        ;
    
    /* 遍历系统中所有进程 */
    list_entry(task->tasks.next, struct task_struct, tasks)
    list_entry(task->tasks.prev, struct task_struct, tasks)
    
    struct task_struct *task;
    for_each_process(task) {
        /* 它打印出每一个任务的名称和PID */
        printk("%s[%d] 
    ", task->comm, task->pid);
    }
    /* 注意:代价极大*/
    task_struct遍历

    三、进程创建

    首先,fork()通过拷贝当前进程创建要给子进程。子进程与父进程的区别仅仅在于PID和PPID,和某些资源和统计量(如挂起信号)。

    exec()负责读取可执行我呢见并将其载入地址空间开始运行。

    写时拷贝:一种可以推迟甚至免除拷贝数据的技术。内核并不复制整个进程地址空间,而是父子进程共享同一个拷贝。

    只有在要写入时,数据才会被复制。在此之前,都是只读方式共享。

    3.1 fork()

    Linux通过clone()系统调用实现fork()。do_fork()完成创建中的大部分工作,定义在kernel/fork.c中。

    该函数调用copy_process()函数,然后让进程开始运行。

    /*
     *  Ok, this is the main fork-routine.
     *
     * It copies the process, and if successful kick-starts
     * it and waits for it to finish using the VM if required.
     */
    long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
              struct pt_regs *regs,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
    {
        struct task_struct *p;
        int trace = 0;
        long nr;
    
        /*
         * Do some preliminary argument and permissions checking before we
         * actually start allocating stuff
         */
        if (clone_flags & CLONE_NEWUSER) {
            if (clone_flags & CLONE_THREAD)
                return -EINVAL;
            /* hopefully this check will go away when userns support is
             * complete
             */
            if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SETUID) ||
                    !capable(CAP_SETGID))
                return -EPERM;
        }
    
        /*
         * When called from kernel_thread, don't do user tracing stuff.
         */
        if (likely(user_mode(regs)))
            trace = tracehook_prepare_clone(clone_flags);
    
        /* 1) 调用copy_process()函数,然后让进程开始执行 */
    
        p = copy_process(clone_flags, stack_start, regs, stack_size,
                 child_tidptr, NULL, trace);
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
         */
        if (!IS_ERR(p)) {
            struct completion vfork;
    
            trace_sched_process_fork(current, p);
    
            nr = task_pid_vnr(p);
    
            if (clone_flags & CLONE_PARENT_SETTID)
                put_user(nr, parent_tidptr);
    
            if (clone_flags & CLONE_VFORK) {
                p->vfork_done = &vfork;
                init_completion(&vfork);
            }
    
            audit_finish_fork(p);
            tracehook_report_clone(regs, clone_flags, nr, p);
    
            /*
             * We set PF_STARTING at creation in case tracing wants to
             * use this to distinguish a fully live task from one that
             * hasn't gotten to tracehook_report_clone() yet.  Now we
             * clear it and set the child going.
             */
            p->flags &= ~PF_STARTING;
    
            wake_up_new_task(p);
    
            tracehook_report_clone_complete(trace, regs,
                            clone_flags, nr, p);
    
            if (clone_flags & CLONE_VFORK) {
                freezer_do_not_count();
                wait_for_completion(&vfork);
                freezer_count();
                tracehook_report_vfork_done(p, nr);
            }
        } else {
            nr = PTR_ERR(p);
        }
        return nr;
    }
    
    
    /*
     * This creates a new process as a copy of the old one,
     * but does not actually start it yet.
     *
     * It copies the registers, and all the appropriate
     * parts of the process environment (as per the clone
     * flags). The actual kick-off is left to the caller.
     */
    static struct task_struct *copy_process(unsigned long clone_flags,
                        unsigned long stack_start,
                        struct pt_regs *regs,
                        unsigned long stack_size,
                        int __user *child_tidptr,
                        struct pid *pid,
                        int trace)
    {
        int retval;
        struct task_struct *p;
        int cgroup_callbacks_done = 0;
    
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
            return ERR_PTR(-EINVAL);
    
        /*
         * Thread groups must share signals as well, and detached threads
         * can only be started up within the thread group.
         */
        if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
            return ERR_PTR(-EINVAL);
    
        /*
         * Shared signal handlers imply shared VM. By way of the above,
         * thread groups also imply shared VM. Blocking this case allows
         * for various simplifications in other code.
         */
        if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
            return ERR_PTR(-EINVAL);
    
        /*
         * Siblings of global init remain as zombies on exit since they are
         * not reaped by their parent (swapper). To solve this and to avoid
         * multi-rooted process trees, prevent global and container-inits
         * from creating siblings.
         */
        if ((clone_flags & CLONE_PARENT) &&
                    current->signal->flags & SIGNAL_UNKILLABLE)
            return ERR_PTR(-EINVAL);
    
        retval = security_task_create(clone_flags);
        if (retval)
            goto fork_out;
    
        /* 1) 调用dup_task_struct()为新进程创建一个内核栈、thread_info结构和task_struct,这些值
         * 与当前进程的值相同。此时,子进程和父进程的描述符是完全相同的。
         */
        retval = -ENOMEM;
        p = dup_task_struct(current);
        if (!p)
            goto fork_out;
    
        ftrace_graph_init_task(p);
    
        rt_mutex_init_task(p);
    
    #ifdef CONFIG_PROVE_LOCKING
        DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
        DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
    #endif
        
        /* 第一个if检查用户的进程数量是否超过限制
         * 第二个if检查用户是否有足够的权限操作
         */
        retval = -EAGAIN;
        if (atomic_read(&p->real_cred->user->processes) >=
                task_rlimit(p, RLIMIT_NPROC)) {
            if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
                p->real_cred->user != INIT_USER)
                goto bad_fork_free;
        }
    
        retval = copy_creds(p, clone_flags);
        if (retval < 0)
            goto bad_fork_free;
    
        /*
         * If multiple threads are within copy_process(), then this check
         * triggers too late. This doesn't hurt, the check is only there
         * to stop root fork bombs.
         * 2) 检查并确保新创建的这个子进程后,
         * 当前用户所拥有的进程数目没有超出给它分配的资源限制
         */
        retval = -EAGAIN;
        if (nr_threads >= max_threads)
            goto bad_fork_cleanup_count;
    
        /* 确认exec_domain模块是否被加载 */
        if (!try_module_get(task_thread_info(p)->exec_domain->module))      
            goto bad_fork_cleanup_count;
    
        p->did_exec = 0;
        delayacct_tsk_init(p);    /* Must remain after dup_task_struct() */
        /* 5) 调用copy_flags()以更新task_struct的flags成员,
         * 表明进程是否拥有超级用户权限的PF_SUPERPRIV标志被清0。
         * 表明进程还没有调用exec()函数的PF_FORKNOEXEC标志被设备
         */
        copy_flags(clone_flags, p);
        INIT_LIST_HEAD(&p->children);
        INIT_LIST_HEAD(&p->sibling);
        rcu_copy_process(p);
        p->vfork_done = NULL;
        spin_lock_init(&p->alloc_lock);
    
        init_sigpending(&p->pending);
    
        p->utime = cputime_zero;
        p->stime = cputime_zero;
        p->gtime = cputime_zero;
        p->utimescaled = cputime_zero;
        p->stimescaled = cputime_zero;
    #ifndef CONFIG_VIRT_CPU_ACCOUNTING
        p->prev_utime = cputime_zero;
        p->prev_stime = cputime_zero;
    #endif
    #if defined(SPLIT_RSS_COUNTING)
        memset(&p->rss_stat, 0, sizeof(p->rss_stat));
    #endif
    
        p->default_timer_slack_ns = current->timer_slack_ns;
    
        task_io_accounting_init(&p->ioac);
        acct_clear_integrals(p);
    
        posix_cpu_timers_init(p);
    
        do_posix_clock_monotonic_gettime(&p->start_time);
        p->real_start_time = p->start_time;
        monotonic_to_bootbased(&p->real_start_time);
        p->io_context = NULL;
        p->audit_context = NULL;
        if (clone_flags & CLONE_THREAD)
            threadgroup_fork_read_lock(current);
        cgroup_fork(p);
    #ifdef CONFIG_NUMA
        p->mempolicy = mpol_dup(p->mempolicy);
         if (IS_ERR(p->mempolicy)) {
             retval = PTR_ERR(p->mempolicy);
             p->mempolicy = NULL;
             goto bad_fork_cleanup_cgroup;
         }
        mpol_fix_fork_child_flag(p);
    #endif
    #ifdef CONFIG_TRACE_IRQFLAGS
        p->irq_events = 0;
    #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
        p->hardirqs_enabled = 1;
    #else
        p->hardirqs_enabled = 0;
    #endif
        p->hardirq_enable_ip = 0;
        p->hardirq_enable_event = 0;
        p->hardirq_disable_ip = _THIS_IP_;
        p->hardirq_disable_event = 0;
        p->softirqs_enabled = 1;
        p->softirq_enable_ip = _THIS_IP_;
        p->softirq_enable_event = 0;
        p->softirq_disable_ip = 0;
        p->softirq_disable_event = 0;
        p->hardirq_context = 0;
        p->softirq_context = 0;
    #endif
    #ifdef CONFIG_LOCKDEP
        p->lockdep_depth = 0; /* no locks held yet */
        p->curr_chain_key = 0;
        p->lockdep_recursion = 0;
    #endif
    
    #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
    #endif
    #ifdef CONFIG_CGROUP_MEM_RES_CTLR
        p->memcg_batch.do_batch = 0;
        p->memcg_batch.memcg = NULL;
    #endif
    
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p);
    
        retval = perf_event_init_task(p);
        if (retval)
            goto bad_fork_cleanup_policy;
    
        if ((retval = audit_alloc(p)))
            goto bad_fork_cleanup_policy;
        
        /* 下面的操作中,根据flag中是否设置了相关标志
         * 进行重新分配或者共享父进程的内容
         */
        /* copy all the process information */
        if ((retval = copy_semundo(clone_flags, p)))
            goto bad_fork_cleanup_audit;
        if ((retval = copy_files(clone_flags, p)))
            goto bad_fork_cleanup_semundo;
        if ((retval = copy_fs(clone_flags, p)))
            goto bad_fork_cleanup_files;
        if ((retval = copy_sighand(clone_flags, p)))
            goto bad_fork_cleanup_fs;
        if ((retval = copy_signal(clone_flags, p)))
            goto bad_fork_cleanup_sighand;
        if ((retval = copy_mm(clone_flags, p)))
            goto bad_fork_cleanup_signal;
        if ((retval = copy_namespaces(clone_flags, p)))
            goto bad_fork_cleanup_mm;
        if ((retval = copy_io(clone_flags, p)))
            goto bad_fork_cleanup_namespaces;
    
        /* 更新子进程的内核栈和寄存器中的值 */
        retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
        if (retval)
            goto bad_fork_cleanup_io;
    
        /* 6)为新进程分配一个有效的PID */
        if (pid != &init_struct_pid) {
            retval = -ENOMEM;
            pid = alloc_pid(p->nsproxy->pid_ns);
            if (!pid)
                goto bad_fork_cleanup_io;
        }
    
        p->pid = pid_nr(pid);
        p->tgid = p->pid;
        if (clone_flags & CLONE_THREAD)
            p->tgid = current->tgid;
    
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
        /*
         * Clear TID on mm_release()?
         */
        p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
    #ifdef CONFIG_BLOCK
        p->plug = NULL;
    #endif
    #ifdef CONFIG_FUTEX
        p->robust_list = NULL;
    #ifdef CONFIG_COMPAT
        p->compat_robust_list = NULL;
    #endif
        INIT_LIST_HEAD(&p->pi_state_list);
        p->pi_state_cache = NULL;
    #endif
        /*
         * sigaltstack should be cleared when sharing the same VM
         */
        if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
            p->sas_ss_sp = p->sas_ss_size = 0;
    
        /*
         * Syscall tracing and stepping should be turned off in the
         * child regardless of CLONE_PTRACE.
         */
        user_disable_single_step(p);
        clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
    #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
    #endif
        clear_all_latency_tracing(p);
    
        /* ok, now we should be set up.. */
        p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
        p->pdeath_signal = 0;
        p->exit_state = 0;
    
        /*
         * Ok, make it visible to the rest of the system.
         * We dont wake it up yet.
         */
        p->group_leader = p;
        INIT_LIST_HEAD(&p->thread_group);
    
        /* Now that the task is set up, run cgroup callbacks if
         * necessary. We need to run them before the task is visible
         * on the tasklist. */
        cgroup_fork_callbacks(p);
        cgroup_callbacks_done = 1;
    
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
    
        /* CLONE_PARENT re-uses the old parent */
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
            p->real_parent = current->real_parent;
            p->parent_exec_id = current->parent_exec_id;
        } else {
            p->real_parent = current;
            p->parent_exec_id = current->self_exec_id;
        }
    
        spin_lock(&current->sighand->siglock);
    
        /*
         * Process group and session signals need to be delivered to just the
         * parent before the fork or both the parent and the child after the
         * fork. Restart if a signal comes in before we add the new process to
         * it's process group.
         * A fatal signal pending means that current will exit, so the new
         * thread can't slip out of an OOM kill (or normal SIGKILL).
          */
        recalc_sigpending();
        if (signal_pending(current)) {
            spin_unlock(&current->sighand->siglock);
            write_unlock_irq(&tasklist_lock);
            retval = -ERESTARTNOINTR;
            goto bad_fork_free_pid;
        }
    
        if (clone_flags & CLONE_THREAD) {
            current->signal->nr_threads++;
            atomic_inc(&current->signal->live);
            atomic_inc(&current->signal->sigcnt);
            p->group_leader = current->group_leader;
            list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
        }
    
        /* 8)做一些扫尾工作,并返回一个指向子进程的指针 */     
        if (likely(p->pid)) {
            tracehook_finish_clone(p, clone_flags, trace);
    
            if (thread_group_leader(p)) {
                if (is_child_reaper(pid))
                    p->nsproxy->pid_ns->child_reaper = p;
    
                p->signal->leader_pid = pid;
                p->signal->tty = tty_kref_get(current->signal->tty);
                attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
                attach_pid(p, PIDTYPE_SID, task_session(current));
                list_add_tail(&p->sibling, &p->real_parent->children);
                list_add_tail_rcu(&p->tasks, &init_task.tasks);
                __this_cpu_inc(process_counts);
            }
            attach_pid(p, PIDTYPE_PID, pid);
            nr_threads++;
        }
    
        total_forks++;
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
        cgroup_post_fork(p);
        if (clone_flags & CLONE_THREAD)
            threadgroup_fork_read_unlock(current);
        perf_event_fork(p);
        return p;
    
    bad_fork_free_pid:
        if (pid != &init_struct_pid)
            free_pid(pid);
    bad_fork_cleanup_io:
        if (p->io_context)
            exit_io_context(p);
    bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
    bad_fork_cleanup_mm:
        if (p->mm) {
            task_lock(p);
            if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
                atomic_dec(&p->mm->oom_disable_count);
            task_unlock(p);
            mmput(p->mm);
        }
    bad_fork_cleanup_signal:
        if (!(clone_flags & CLONE_THREAD))
            free_signal_struct(p->signal);
    bad_fork_cleanup_sighand:
        __cleanup_sighand(p->sighand);
    bad_fork_cleanup_fs:
        exit_fs(p); /* blocking */
    bad_fork_cleanup_files:
        exit_files(p); /* blocking */
    bad_fork_cleanup_semundo:
        exit_sem(p);
    bad_fork_cleanup_audit:
        audit_free(p);
    bad_fork_cleanup_policy:
        perf_event_free_task(p);
    #ifdef CONFIG_NUMA
        mpol_put(p->mempolicy);
    bad_fork_cleanup_cgroup:
    #endif
        if (clone_flags & CLONE_THREAD)
            threadgroup_fork_read_unlock(current);
        cgroup_exit(p, cgroup_callbacks_done);
        delayacct_tsk_free(p);
        module_put(task_thread_info(p)->exec_domain->module);
    bad_fork_cleanup_count:
        atomic_dec(&p->cred->user->processes);
        exit_creds(p);
    bad_fork_free:
        free_task(p);
    fork_out:
        return ERR_PTR(retval);
    }
    
    
    NORET_TYPE void do_exit(long code)
    {
        struct task_struct *tsk = current;
        int group_dead;
    
        profile_task_exit(tsk);
    
        WARN_ON(atomic_read(&tsk->fs_excl));
        WARN_ON(blk_needs_flush_plug(tsk));
    
        if (unlikely(in_interrupt()))
            panic("Aiee, killing interrupt handler!");
        if (unlikely(!tsk->pid))
            panic("Attempted to kill the idle task!");
    
        /*
         * If do_exit is called because this processes oopsed, it's possible
         * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
         * continuing. Amongst other possible reasons, this is to prevent
         * mm_release()->clear_child_tid() from writing to a user-controlled
         * kernel address.
         */
        set_fs(USER_DS);
    
        tracehook_report_exit(&code);
    
        validate_creds_for_do_exit(tsk);
    
        /*
         * We're taking recursive faults here in do_exit. Safest is to just
         * leave this task alone and wait for reboot.
         */
        if (unlikely(tsk->flags & PF_EXITING)) {
            printk(KERN_ALERT
                "Fixing recursive fault but reboot is needed!
    ");
            /*
             * We can do this unlocked here. The futex code uses
             * this flag just to verify whether the pi state
             * cleanup has been done or not. In the worst case it
             * loops once more. We pretend that the cleanup was
             * done as there is no way to return. Either the
             * OWNER_DIED bit is set by now or we push the blocked
             * task into the wait for ever nirwana as well.
             */
            tsk->flags |= PF_EXITPIDONE;
            set_current_state(TASK_UNINTERRUPTIBLE);
            schedule();
        }
    
        exit_irq_thread();
    
        /* 1)将task_struct中的标志成员设置为PF_EXTING */
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
         * tsk->flags are checked in the futex code to protect against
         * an exiting task cleaning up the robust pi futexes.
         */
        smp_mb();
        raw_spin_unlock_wait(&tsk->pi_lock);
    
        if (unlikely(in_atomic()))
            printk(KERN_INFO "note: %s[%d] exited with preempt_count %d
    ",
                    current->comm, task_pid_nr(current),
                    preempt_count());
    
        acct_update_integrals(tsk);
        /* sync mm's RSS info before statistics gathering */
        if (tsk->mm)
            sync_mm_rss(tsk, tsk->mm);
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
            hrtimer_cancel(&tsk->signal->real_timer);
            exit_itimers(tsk->signal);
            if (tsk->mm)
                setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
        }
        acct_collect(code, group_dead);
        if (group_dead)
            tty_audit_exit();
        if (unlikely(tsk->audit_context))
            audit_free(tsk);
    
        tsk->exit_code = code;
        taskstats_exit(tsk, group_dead);
    
        /* 4)释放进程占用的mm_struct,如果没有别的进程使用它们,就彻底释放它们 */
        exit_mm(tsk);
    
        if (group_dead)
            acct_process();
        trace_sched_process_exit(tsk);
    
        /* 5)如果进程派对等候IPC信号,则离开队列 */
        exit_sem(tsk);
        /* 6)分别递减文件描述符、文件系统数据的引用计数
         * 如果其中某个引用计数的数值降为零,
         * 那么就代表没有进程在使用相应的资源,可以释放
         */
        exit_files(tsk);
        exit_fs(tsk);
        check_stack_usage();
        exit_thread();
    
        /*
         * Flush inherited counters to the parent - before the parent
         * gets woken up by child-exit notifications.
         *
         * because of cgroup mode, must be called before cgroup_exit()
         */
        /* 把存放task_struct的exit_code成员中的任务退出代码,置为由exit()提供的退出代码
         * 或者去完成任何其他由内核机制规定的退出动作。
         * 退出代码存放在这里供父进程随时检索
         */
        perf_event_exit_task(tsk);
    
        cgroup_exit(tsk, 1);
    
        if (group_dead)
            disassociate_ctty(1);
    
        module_put(task_thread_info(tsk)->exec_domain->module);
    
        proc_exit_connector(tsk);
    
        /*
         * FIXME: do that only when needed, using sched_exit tracepoint
         */
        ptrace_put_breakpoints(tsk);
    
        /* 8)向父进程发送信号,给子进程重新找养父,养父为线程组中的其他线程或者init进程,
         * 并把进程状态设成EXIT_ZOMBIE
         */
        exit_notify(tsk, group_dead);
    #ifdef CONFIG_NUMA
        task_lock(tsk);
        mpol_put(tsk->mempolicy);
        tsk->mempolicy = NULL;
        task_unlock(tsk);
    #endif
    #ifdef CONFIG_FUTEX
        if (unlikely(current->pi_state_cache))
            kfree(current->pi_state_cache);
    #endif
        /*
         * Make sure we are holding no locks:
         */
        debug_check_no_locks_held(tsk);
        /*
         * We can do this unlocked here. The futex code uses this flag
         * just to verify whether the pi state cleanup has been done
         * or not. In the worst case it loops once more.
         */
        tsk->flags |= PF_EXITPIDONE;
    
        if (tsk->io_context)
            exit_io_context(tsk);
    
        if (tsk->splice_pipe)
            __free_pipe_info(tsk->splice_pipe);
    
        validate_creds_for_do_exit(tsk);
    
        preempt_disable();
        exit_rcu();
        /* causes final put_task_struct in finish_task_switch(). */
        tsk->state = TASK_DEAD;
        /* 切换到新的进程。因为处于EXIT_ZOMBIE状态
         * 进程不会再被调度,所以这是进程所执行的最后一段代码
         */
        schedule();
        BUG();
        /* Avoid "noreturn function does return".  */
        for (;;)
            cpu_relax();    /* For when BUG is null */
    }
    do_fork()和copy_process()

    3.2 创建线程

    和创建进程类似,使用clone()的时候需要一些参数标志来指明共享资源。

    clone(CLONE_VM|CLONE|FS|CLONE_FILES|CLONE_SIGHAND, 0);

    这些标志在<linux/sched.h>中定义的

    CLONE_FILES    父子进程共享打开文件
    CLONE_FS    父子进程共享文件系统信息
    CLONE_IDLETASK    将PID设置为0(只供idle进程使用)
    CLONE_NEWNS    为子进程创建新的命名空间
    CLONE_PARENT    指定子进程与父进程拥有同一个父进程
    CLONE_PTRACE    继续调试子进程
    CLONE_SETTID    将TID回写至用户空间
    CLONE_SETTLS    为子进程创建新的TLS
    CLONE_SIGHAND    父子进程共享信号处理函数及被阻断的信号
    CLONE_SYSVSEM    父子进程共享system V SEM_UNDO语义
    CLONE_THREAD    父子进程放入相同的线程组
    CLONE_VFORK    调用vfork(),所以父进程准备睡眠等待子进程将其唤醒
    CLONE_UNTRACED    防止跟踪进程在子进程上强制执行CLONE_PTRACE
    CLONE_STOP    以TASK_STOPPED状态开始进程
    CLONE_SETTLS    为子进程创建新的TLS
    CLONE_CHILD_CLEARTID    清除子进程TID
    CLONE_CHILD_SETTID    设置子进程TID
    CLONE_PARENT_SETTID    设置父进程TID
    CLONE_VM    父子进程共享地址空间
    clone()参数标志

    3.3 内核线程

    内核线程和普通线程的区别在于内核线程没有独立的地址空间,并且只运行在内核空间,同样可以被调度和抢占。

    在装有Linux系统上的运行ps -ef就可以看到内核线程了,在<linux/kthread.h>中声明有接口。创建一个新的内核线程的方法:

    struct task_struct *kthread_create(int (*threadfn)(void *data),
        void *data,
        const char namefmt[],
        ...)
    threadfn:运行的函数
    data:给其传递的参数为data
    namefmt:接受可变参数列表,类似于printf()

    新创建的线程处于不可运行状态,需要通过wake_up_process()明确的唤醒,才会主动运行。

    两步一起的函数为:

    struct task_struct *kthread_run(int (*threadfn)(void *data),
        void *data,
        const char namefmt[],
        ...)

    内核线程启动后一直运行直到调用do_exit(),或者kthread_stop(),参数是创建时的task_struct结构地址。

    int kthread_stop(struct task_struct *k)

    四、进程终结

    大部分任务依靠do_exit()来终结,定义于<kernel/exit.c>。

    NORET_TYPE void do_exit(long code)
    {
        struct task_struct *tsk = current;
        int group_dead;
    
        profile_task_exit(tsk);
    
        WARN_ON(atomic_read(&tsk->fs_excl));
        WARN_ON(blk_needs_flush_plug(tsk));
    
        if (unlikely(in_interrupt()))
            panic("Aiee, killing interrupt handler!");
        if (unlikely(!tsk->pid))
            panic("Attempted to kill the idle task!");
    
        /*
         * If do_exit is called because this processes oopsed, it's possible
         * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
         * continuing. Amongst other possible reasons, this is to prevent
         * mm_release()->clear_child_tid() from writing to a user-controlled
         * kernel address.
         */
        set_fs(USER_DS);
    
        tracehook_report_exit(&code);
    
        validate_creds_for_do_exit(tsk);
    
        /*
         * We're taking recursive faults here in do_exit. Safest is to just
         * leave this task alone and wait for reboot.
         */
        if (unlikely(tsk->flags & PF_EXITING)) {
            printk(KERN_ALERT
                "Fixing recursive fault but reboot is needed!
    ");
            /*
             * We can do this unlocked here. The futex code uses
             * this flag just to verify whether the pi state
             * cleanup has been done or not. In the worst case it
             * loops once more. We pretend that the cleanup was
             * done as there is no way to return. Either the
             * OWNER_DIED bit is set by now or we push the blocked
             * task into the wait for ever nirwana as well.
             */
            tsk->flags |= PF_EXITPIDONE;
            set_current_state(TASK_UNINTERRUPTIBLE);
            schedule();
        }
    
        exit_irq_thread();
    
        /* 1)设置进程标志为PF_EXITING */
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
         * tsk->flags are checked in the futex code to protect against
         * an exiting task cleaning up the robust pi futexes.
         */
        smp_mb();
        raw_spin_unlock_wait(&tsk->pi_lock);
    
        if (unlikely(in_atomic()))
            printk(KERN_INFO "note: %s[%d] exited with preempt_count %d
    ",
                    current->comm, task_pid_nr(current),
                    preempt_count());
    
        acct_update_integrals(tsk);
        /* sync mm's RSS info before statistics gathering */
        if (tsk->mm)
            sync_mm_rss(tsk, tsk->mm);
        /* 2)调用del_timer_sync()删除任一内核定时器 */
        group_dead = atomic_dec_and_test(&tsk->signal->live);
        if (group_dead) {
            hrtimer_cancel(&tsk->signal->real_timer);
            exit_itimers(tsk->signal);
            if (tsk->mm)
                setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
        }
        acct_collect(code, group_dead);
        if (group_dead)
            tty_audit_exit();
        if (unlikely(tsk->audit_context))
            audit_free(tsk);
    
        tsk->exit_code = code;
        taskstats_exit(tsk, group_dead);
    
        /* 4)调用exit_mm()释放进程占用的mm_struct,如果没有别的进程使用它们 */
        exit_mm(tsk);
    
        if (group_dead)
            acct_process();
        trace_sched_process_exit(tsk);
    
        /* 5)如果进程派对等候IPC信号,它则离开队列 */
        exit_sem(tsk);
        /* 6)以分别递减文件描述符、文件系统数据的引用计数。如果其中某个引用计数的数值降为零
         * 那么就代表没有进程在使用相应的资源,此时可以释放 */
        exit_files(tsk);
        exit_fs(tsk);
        check_stack_usage();
        exit_thread();
    
        /*
         * Flush inherited counters to the parent - before the parent
         * gets woken up by child-exit notifications.
         *
         * because of cgroup mode, must be called before cgroup_exit()
         */
        perf_event_exit_task(tsk);
    
        cgroup_exit(tsk, 1);
    
        if (group_dead)
            disassociate_ctty(1);
    
        module_put(task_thread_info(tsk)->exec_domain->module);
    
        proc_exit_connector(tsk);
    
        /*
         * FIXME: do that only when needed, using sched_exit tracepoint
         */
        ptrace_put_breakpoints(tsk);
    
        /* 8)向父进程发送信号,给子进程重新找养父,养父为线程组中的其他线程或者位init线程
         * 并把进程状态设为EXIT_ZOMBIE */
        exit_notify(tsk, group_dead);
    #ifdef CONFIG_NUMA
        task_lock(tsk);
        mpol_put(tsk->mempolicy);
        tsk->mempolicy = NULL;
        task_unlock(tsk);
    #endif
    #ifdef CONFIG_FUTEX
        if (unlikely(current->pi_state_cache))
            kfree(current->pi_state_cache);
    #endif
        /*
         * Make sure we are holding no locks:
         */
        debug_check_no_locks_held(tsk);
        /*
         * We can do this unlocked here. The futex code uses this flag
         * just to verify whether the pi state cleanup has been done
         * or not. In the worst case it loops once more.
         */
        tsk->flags |= PF_EXITPIDONE;
    
        if (tsk->io_context)
            exit_io_context(tsk);
    
        if (tsk->splice_pipe)
            __free_pipe_info(tsk->splice_pipe);
    
        validate_creds_for_do_exit(tsk);
    
        preempt_disable();
        exit_rcu();
        /* causes final put_task_struct in finish_task_switch(). */
        tsk->state = TASK_DEAD;
        /* 调用schedule切换到新的进程,处于ZOMBIE不会再被调度 */
        schedule();
        BUG();
        /* Avoid "noreturn function does return".  */
        for (;;)
            cpu_relax();    /* For when BUG is null */
    }
    do_exit

    4.1 删除进程描述符

    调用了do_exit()后,在父进程获得已结束的子进程信息后,或者通知内核它并不关注这些信息后,task_struct结构才被释放。

    当最终释放进程描述符时,release_task()或被调用:

    void release_task(struct task_struct * p)
    {
        struct task_struct *leader;
        int zap_leader;
    repeat:
        tracehook_prepare_release_task(p);
        /* don't need to get the RCU readlock here - the process is dead and
         * can't be modifying its own credentials. But shut RCU-lockdep up */
        rcu_read_lock();
        atomic_dec(&__task_cred(p)->user->processes);
        rcu_read_unlock();
    
        proc_flush_task(p);
    
        write_lock_irq(&tasklist_lock);
        tracehook_finish_release_task(p);
        /* 1)该函数调用_unhash_process(),后者调用detach_pid()从pidhash
         * 上删除该进程,同时也要从任务列表中删除该进程
         * 2)释放目前僵死进程所使用的所有剩余资源,并进行最终统计和记录     */
        __exit_signal(p);
    
        /*
         * If we are the last non-leader member of the thread
         * group, and the leader is zombie, then notify the
         * group leader's parent process. (if it wants notification.)
         */
         /* 3)如果这个进程是线程组最后一个进程,并且领头进程已经死掉,
          * 那么release_task()就要通知僵死的头领进程的父进程     */
        zap_leader = 0;
        leader = p->group_leader;
        if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
            BUG_ON(task_detached(leader));
            do_notify_parent(leader, leader->exit_signal);
            /*
             * If we were the last child thread and the leader has
             * exited already, and the leader's parent ignores SIGCHLD,
             * then we are the one who should release the leader.
             *
             * do_notify_parent() will have marked it self-reaping in
             * that case.
             */
            zap_leader = task_detached(leader);
    
            /*
             * This maintains the invariant that release_task()
             * only runs on a task in EXIT_DEAD, just for sanity.
             */
            if (zap_leader)
                leader->exit_state = EXIT_DEAD;
        }
    
        write_unlock_irq(&tasklist_lock);
        /* 4)释放进程内核栈和thread_info结构所占的页,
         * 并释放task_struct所占的slab高速缓存 */
        release_thread(p);
        call_rcu(&p->rcu, delayed_put_task_struct);
    
        p = leader;
        if (unlikely(zap_leader))
            goto repeat;
    }
    release_task
  • 相关阅读:
    HTC G12 HBOOT 2.0.0002 官解、降级精简教程!
    android call require api level
    刷机流程
    Android通过tcpdump抓包
    [原创]c#的线性表 Virus
    [原创]c++线性表的插入 Virus
    [原创]关于编程论坛封我ID的意见 Virus
    [原创]文件管理 Virus
    [原创]c#高级编程学习笔记(连载_委托) Virus
    [原创]c#中的内存管理 Virus
  • 原文地址:https://www.cnblogs.com/ch122633/p/9953136.html
Copyright © 2020-2023  润新知