• do_fork实现


    重点结构体学习

    struct task_struct {
        struct thread_info thread_info;
     
        void* stack;
     
       /* CPU-specific state of this task: */
        struct thread_struct        thread;
    }
     

    上次在学threadinfo和内核栈的时候介绍过thread_info和stack的关系,今天再需要介绍一个结构体struct thread_struct结构。从注释上看这个结构体是个CPU体系相关的。

    struct cpu_context {
        unsigned long x19;
        unsigned long x20;
        unsigned long x21;
        unsigned long x22;
        unsigned long x23;
        unsigned long x24;
        unsigned long x25;
        unsigned long x26;
        unsigned long x27;
        unsigned long x28;
        unsigned long fp;
        unsigned long sp;
        unsigned long pc;
    };
     
     
    struct thread_struct {
        struct cpu_context    cpu_context;    /* cpu context */
     
        unsigned int        fpsimd_cpu;
        void            *sve_state;    /* SVE registers, if any */
        unsigned int        sve_vl;        /* SVE vector length */
        unsigned int        sve_vl_onexec;    /* SVE vl after next exec */
        unsigned long        fault_address;    /* fault info */
        unsigned long        fault_code;    /* ESR_EL1 value */
        struct debug_info    debug;        /* debugging */
    };
     

    当然了我们目前只关注struct cpu_context结构,此结构会在进程切换时用来保存上一个进程的寄存器的值。一般会需要切换出去的进程的x19-x28以及fp, sp, lr寄存器保存到cpu_context中。这个会在进程调度文章中有详细描述。

    再看一个结构体:

    struct user_pt_regs {
        __u64        regs[31];
        __u64        sp;
        __u64        pc;
        __u64        pstate;
    };
     
    /*
     * This struct defines the way the registers are stored on the stack during an
     * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
     * stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
     */
    struct pt_regs {
        union {
            struct user_pt_regs user_regs;
            struct {
                u64 regs[31];
                u64 sp;
                u64 pc;
                u64 pstate;
            };
        };
        u64 orig_x0;
    #ifdef __AARCH64EB__
        u32 unused2;
        s32 syscallno;
    #else
        s32 syscallno;
        u32 unused2;
    #endif
     
        u64 orig_addr_limit;
        u64 unused;    // maintain 16 byte alignment
        u64 stackframe[2];
    };
     

    从注释上看struct pt_regs主要的作用是用来保存,当用户空间的进程发生异常(系统调用,中断等)进入内核模式,则需要将用户进程当前的寄存器状态保存到pt_regs中。

    struct thread_struct & struct pt_regs的区别

    thread_struct结构体主要是在内核态两个进程发生切换时,thread_struct用来保存上一个进程的相关寄存器。
    pt_regs结构体主要是当用户态的进程陷入到内核态时,需要使用pt_regs来保存用户态进程的寄存器状态。 

    ThreadInfo结构和内核栈的两种关系

    ThreadInfo结构在内核栈中

    Threadinfo结构存储在内核栈中,这种方式是最经典的。因为task_struct结构从1.0到现在5.0内核此结构一直在增大。如果将此结构放在内核栈中则很浪费内核栈的空间,则在threadinfo结构中有一个task_struct的指针就可以避免。

    struct thread_info {
        unsigned long        flags;        /* low level flags */
        mm_segment_t        addr_limit;    /* address limit */
        struct task_struct    *task;        /* main task structure */
        int            preempt_count;    /* 0 => preemptable, <0 => bug */
        int            cpu;        /* cpu */
    };

    可以看到thread_info结构中存在一个struct task_struct的指针。

    我们接着看下struct task_struct结构体

    struct task_struct {
        volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
        void *stack;
        atomic_t usage;
        unsigned int flags;    /* per process flags, defined below */
        unsigned int ptrace;
     
    #ifdef CONFIG_SMP
        struct llist_node wake_entry;
        int on_cpu;
        unsigned int wakee_flips;
        unsigned long wakee_flip_decay_ts;
        struct task_struct *last_wakee;
     
        int wake_cpu;
    #endif
        int on_rq;
        ......

    可以看到struct task_struct结构体重有一个stack的结构,此stack指针就是内核栈的指针。

    接下来再看看内核stack和thread_info结构的关系

    union thread_union {
        struct thread_info thread_info;
        unsigned long stack[THREAD_SIZE/sizeof(long)];
    };
     
    #define THREAD_SIZE        16384
    #define THREAD_START_SP        (THREAD_SIZE - 16)

    arm64 ThreadInfo在task_struct结构中

    上面的一种方式是thread_info结构和内核栈共用一块存储区域,而另一种方式是thread_info结构存储在task_struct结构中。

    struct task_struct {
    #ifdef CONFIG_THREAD_INFO_IN_TASK
        /*
         * For reasons of header soup (see current_thread_info()), this
         * must be the first element of task_struct.
         */
        struct thread_info        thread_info;
    #endif
        /* -1 unrunnable, 0 runnable, >0 stopped: */
        volatile long            state;
     
        /*
         * This begins the randomizable portion of task_struct. Only
         * scheduling-critical items should be added above here.
         */
        randomized_struct_fields_start
     
        void                *stack;
        atomic_t            usage;
        /* Per task flags (PF_*), defined further below: */
        unsigned int            flags;
        unsigned int            ptrace;

    可以看到必须打开CONFIG_THREAD_INFO_IN_TASK这个配置,这时候thread_info就会在task_struct的第一个成员。而task_struct中依然存在void* stack结构

    接着看下thread_info结构,如下是ARM64架构定义的thread_info结构

    struct thread_info {
        unsigned long        flags;        /* low level flags */
        mm_segment_t        addr_limit;    /* address limit */
    #ifdef CONFIG_ARM64_SW_TTBR0_PAN
        u64            ttbr0;        /* saved TTBR0_EL1 */
    #endif
        union {
            u64        preempt_count;    /* 0 => preemptible, <0 => bug */
            struct {
    #ifdef CONFIG_CPU_BIG_ENDIAN
                u32    need_resched;
                u32    count;
    #else
                u32    count;
                u32    need_resched;
    #endif
            } preempt;
        };
    };
    #include <linux/init.h>
    #include <linux/module.h>
    #include <linux/sched.h>
    
    //#define task_stack_page(task)    ((void *)(task)->stack)
    //#define task_pt_regs(p) \
    //    ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
    
    static void print_task_info(struct task_struct *task)
    {
        printk(KERN_NOTICE "%10s %5d task_struct (%p) / stack(%p~%p) / thread_info(%p)",
            task->comm, 
            task->pid,
            task,
            task->stack,
            ((unsigned long *)task->stack) + THREAD_SIZE,
            task_thread_info(task));
      
         printk(KERN_NOTICE "thread_info(%p)", &(task->thread_info));
         struct pt_regs *regs = ((struct pt_regs *)(THREAD_SIZE + task->stack) - 1);
        printk(KERN_NOTICE "pc(%lu),  sp(%lu, fp(%lu)",regs->user_regs.pc, regs->user_regs.sp, regs->user_regs.pstate);
        // struct thread_struct  struct cpu_context 
        printk(KERN_NOTICE "saved pc(%lu), saved sp(%lu,saved fp(%lu)",task->thread.cpu_context.pc, task->thread.cpu_context.sp, task->thread.cpu_context.fp);
          
    }
    
    static int __init task_init(void)
    {
        struct task_struct *task = current;
    
        printk(KERN_INFO "task module init\n");
    
        print_task_info(task);
        do {
            task = task->parent;
            print_task_info(task);
        } while (task->pid != 0);
    
        return 0;
    }
    module_init(task_init);
    
    static void __exit task_exit(void)
    {
        printk(KERN_INFO "task module exit\n ");
    }
    module_exit(task_exit);
    [root@centos7 SimplestLKM]# cat Makefile 
    # If KERNELRELEASE is defined, we've been invoked from the
    # kernel build system and can use its language.
    ifneq ($(KERNELRELEASE),)
            obj-m := hello.o
    
    # Otherwise we were called directly from the command
    # line; invoke the kernel build system.
    else
            KERNELDIR ?= /lib/modules/$(shell uname -r)/build
            PWD := $(shell pwd)
    default:
            make -C $(KERNELDIR) M=$(PWD) modules
    endif
    [5977098.807472] task module init
    [5977098.815288]     insmod 35864 task_struct (ffffa05ee640cc00) / stack(ffff000255ce0000~ffff000255d60000) / thread_info(ffffa05ee640cc00)
    [5977098.815289] thread_info(ffffa05ee640cc00)
    [5977098.827500] pc(281473453696900),  sp(281474013337664, fp(2147483648)
    [5977098.831664] saved pc(18446462598867607252), saved sp(18446462608762403696,saved fp(18446462608762403696)
    [5977098.838168]       bash 34602 task_struct (ffffa05e96c63100) / stack(ffff000256580000~ffff000256600000) / thread_info(ffffa05e96c63100)
    [5977098.847780] thread_info(ffffa05e96c63100)
    [5977098.859985] pc(281473840816960),  sp(281474351498656, fp(1610612736)
    [5977098.864149] saved pc(18446462598867607252), saved sp(18446462608771447792,saved fp(18446462608771447792)
    [5977098.870652]       sshd 34600 task_struct (ffffa03f8b12db00) / stack(ffff0002555e0000~ffff000255660000) / thread_info(ffffa03f8b12db00)
    [5977098.880263] thread_info(ffffa03f8b12db00)
    [5977098.892467] pc(281473631283180),  sp(281474922455808, fp(536870912)
    [5977098.896636] saved pc(18446462598867607252), saved sp(18446462608755062512,saved fp(18446462608755062512)
    [5977098.903047]       sshd 102252 task_struct (ffffa03fccec3300) / stack(ffff000262720000~ffff0002627a0000) / thread_info(ffffa03fccec3300)
    [5977098.912659] thread_info(ffffa03fccec3300)
    [5977098.924950] pc(281473593010156),  sp(281474592600848, fp(536870912)
    [5977098.929118] saved pc(18446462598867607252), saved sp(18446462608974477040,saved fp(18446462608974477040)
    [5977098.935530]    systemd     1 task_struct (ffff805fc3280000) / stack(ffff00000b260000~ffff00000b2e0000) / thread_info(ffff805fc3280000)
    [5977098.945141] thread_info(ffff805fc3280000)
    [5977098.957345] pc(281472955906148),  sp(281474866521616, fp(2147483648)
    [5977098.961508] saved pc(18446462598867607252), saved sp(18446462598919945168,saved fp(18446462598919945168)
    [5977098.968010]  swapper/0     0 task_struct (ffff000008dc5600) / stack(ffff000008d80000~ffff000008e00000) / thread_info(ffff000008dc5600)
    [5977098.977621] thread_info(ffff000008dc5600)
    [5977098.989824] pc(0),  sp(35596688531008, fp(264)
    [5977098.993988] saved pc(18446462598867607252), saved sp(18446462598881279536,saved fp(18446462598881279536)

    copy_process继续分析

    static inline int copy_thread_tls(
            unsigned long clone_flags, unsigned long sp, unsigned long arg,
            struct task_struct *p, unsigned long tls)
    {
        return copy_thread(clone_flags, sp, arg, p);
    }
     
    //代码路径:arch/arm64/kernel/process.c
    int copy_thread(unsigned long clone_flags, unsigned long stack_start,
            unsigned long stk_sz, struct task_struct *p)
    {
        struct pt_regs *childregs = task_pt_regs(p);
     
        memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
     

    在copy_thread就会涉及到我们刚才上面学习的两个结构体,我们来做简单的分析下。

    • struct pt_regs *childregs = task_pt_regs(p);   获取到新创建进程的pt_regs结构,看下是如何获取的。
    #define task_stack_page(task)    ((void *)(task)->stack)
     
    #define task_pt_regs(p) \
        ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
     

    很清楚,通过进程的task_struct结构获取到内核栈stack成员,然后加上THREAD_SIZE就是内核栈的大小,所以pt_regs是存储在内核栈的栈底的。
    memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); 将新创建进程的thread_struct结构清空

     
     

    我们继续分析copy_thread函数,为了更清楚的表述,我们分段描述copy_thread函数

    if (likely(!(p->flags & PF_KTHREAD))) {       //用户进程
        *childregs = *current_pt_regs();
        childregs->regs[0] = 0;
     
        /*
         * Read the current TLS pointer from tpidr_el0 as it may be
         * out-of-sync with the saved value.
         */
        *task_user_tls(p) = read_sysreg(tpidr_el0);
     
        if (stack_start) {
            if (is_compat_thread(task_thread_info(p)))
                childregs->compat_sp = stack_start;
            else
                childregs->sp = stack_start;
        }
     
        /*
         * If a TLS pointer was passed to clone (4th argument), use it
         * for the new thread.
         */
        if (clone_flags & CLONE_SETTLS)
            p->thread.uw.tp_value = childregs->regs[3];
     
    • 接着就会去判断当前进程是不是内核线程,很明显没有设置PF_KTHREAD标志
    • 通过current_pt_regs获取当前进程的pt_regs, 然后将当前进程的pt_regs结构的值赋值给新创建进程的pt_regs
    • childregs->regs[0] = 0; 这里操作的原因是,一般用户态通过系统调度陷入到内核态后处理完毕后会通过x0寄存器设置返回值的,这里首先将返回值设置为0
    • 如果stack_start设置了,这个是在clone时候传递的参数。当创建内核线程或者通过pthread_create会设置此值,此值就对应的是线程的回调处理函数
    • 如果stack_start设置了,则这里是pthread_create创建的用户线程,则设置用户态的SP_EL0指针,childregs->sp = stack_start;
    •  
    } else {
        memset(childregs, 0, sizeof(struct pt_regs));
        childregs->pstate = PSR_MODE_EL1h;
        if (IS_ENABLED(CONFIG_ARM64_UAO) &&
            cpus_have_const_cap(ARM64_HAS_UAO))
            childregs->pstate |= PSR_UAO_BIT;
     
        if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
            childregs->pstate |= PSR_SSBS_BIT;
     
        p->thread.cpu_context.x19 = stack_start;
        p->thread.cpu_context.x20 = stk_sz;
    
     
    • 走到这里,则当前创建的是一个内核线程。如果是内核线程的话则不需要pt_regs结构,则需要清空memset(childregs, 0, sizeof(struct pt_regs));
    • childregs->pstate = PSR_MODE_EL1h; 设置当前进程是pstate是在EL1模式下,ARM64架构中使用pstate来描述当前处理器模式.
    • p->thread.cpu_context.x19 = stack_start; 创建内核线程的时候会传递内核线程的回调函数到stack_start的参数,将其设置到x19寄存器。
    • p->thread.cpu_context.x20 = stk_sz; 通用创建内核线程的时候也会传递回调函数的参数,设置到x20寄存器
    p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
        p->thread.cpu_context.sp = (unsigned long)childregs;
     
    asmlinkage void ret_from_fork(void) asm("ret_from_fork")
     

    设置新创建进程的pc指针为ret_from_fork,当新创建的进程运行时会从ret_from_fork运行,ret_from_fork是个汇编语言编写的
    设置新创建进程的SP_EL1的值为childregs, SP_EL1则是指向内核栈的栈底处
    我们用一张图简单的总结下:

     

    至此分析完毕了copy_thread函数。

     cpu_switch_to进程切换 和thread.cpu_context

    DEFINE(THREAD_CPU_CONTEXT,    offsetof(struct task_struct, thread.cpu_context));
    cpu_switch_to  //arch/arm64/kernel/entry.S
    SYM_FUNC_START(cpu_switch_to)
            mov     x10, #THREAD_CPU_CONTEXT
            add     x8, x0, x10
            mov     x9, sp
            stp     x19, x20, [x8], #16             // store callee-saved registers
            stp     x21, x22, [x8], #16
            stp     x23, x24, [x8], #16
            stp     x25, x26, [x8], #16
            stp     x27, x28, [x8], #16
            stp     x29, x9, [x8], #16
            str     lr, [x8]
            add     x8, x1, x10
            ldp     x19, x20, [x8], #16             // restore callee-saved registers
            ldp     x21, x22, [x8], #16
            ldp     x23, x24, [x8], #16
            ldp     x25, x26, [x8], #16
            ldp     x27, x28, [x8], #16
            ldp     x29, x9, [x8], #16
            ldr     lr, [x8]
            mov     sp, x9
            msr     sp_el0, x1
            ptrauth_keys_install_kernel x1, x8, x9, x10
            scs_save x0, x8
            scs_load x1, x8
            ret
    SYM_FUNC_END(cpu_switch_to)

    这里传递过来的是x0为prev进程的进程描述符(struct task_struct)地址, x1为next的进程描述符地址。会就将prev进程的 x19-x28,fp,sp,lr保存到prev进程的tsk.thread.cpu_context中,next进程的这些寄存器值从next进程的tsk.thread.cpu_context中恢复到相应寄存器。这里还做了sp_el0设置为next进程描述符的操作,为了通过current宏找到当前的任务。

    需要注意的是:

    1. mov sp, x9 做了切换进程内核栈的操作。
    2. ldr lr, [x8] 设置了链接寄存器,然后ret的时候会将lr恢复到pc从而真正完成了执行流的切换

    新创建的进程第一次运行

    当copy_process返回新创建进程的task_struct结构后,则wake_up_new_task来唤醒进程,此函数中设置进程的状态为TASK_RUNNING, 选择需要在那个cpu上运行,然后将此进程加入到该cpu的对应的就绪队列中,等待CPU的调度。

    当调度器选择此进程运行时,则就会运行之前在copy_thread中设置的ret_from_fork函数
     

    /* GPRs used by entry code */
    tsk    .req    x28        // current thread_info
     
    /*
     * Return the current thread_info.
     */
        .macro    get_thread_info, rd
        mrs    \rd, sp_el0
        .endm
     
     
    /*
     * This is how we return from a fork.
     */
    ENTRY(ret_from_fork)
        bl    schedule_tail
        cbz    x19, 1f                // not a kernel thread
        mov    x0, x20
        blr    x19 // kernel thread start func 
    1:    get_thread_info tsk
        b    ret_to_user
    ENDPROC(ret_from_fork)
     

    schedule_tail 此函数主要是为上一个切换出去的进程做一个扫尾的工作,在进程切换小节详解
    接着就判断x19的值是不是为0,在copy_thread中如果是一个内核线程会设置x19的。
    如果x19的值不为0,则会通过blr x19,去处理内核线程的回调函数的。其中x20要赋值给x0, x0一般当做参数传递
    如果x19的值是为0的话,则会跳到标号1处。
    get_thread_info会去读SP_EL0的值,SP_EL0的值存储的是当前进程的thread_info的值。
    tsk代表的是x28,则使用x28存储当前进程thread_info的值,然后跳转到ret_to_user处返回用户空间
     

    
    
     struct pt_regs *regs = ((struct pt_regs *)(THREAD_SIZE + task->stack) - 1);
        printk(KERN_NOTICE "pc(%lu),  sp(%lu, fp(%lu)",regs->user_regs.pc, regs->user_regs.sp, regs->user_regs.pstate);

    ret_to_user分析

    /*
     * Ok, we need to do extra processing, enter the slow path.
     */
    work_pending:
        mov    x0, sp                // 'regs'
        bl    do_notify_resume
    #ifdef CONFIG_TRACE_IRQFLAGS
        bl    trace_hardirqs_on        // enabled while in userspace
    #endif
        ldr    x1, [tsk, #TSK_TI_FLAGS]    // re-check for single-step
        b    finish_ret_to_user
    /*
     * "slow" syscall return path.
     */
    ret_to_user:
        disable_daif
        ldr    x1, [tsk, #TSK_TI_FLAGS]
        and    x2, x1, #_TIF_WORK_MASK
        cbnz    x2, work_pending
    finish_ret_to_user:
        enable_step_tsk x1, x2
    #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
        bl    stackleak_erase
    #endif
        kernel_exit 0
    ENDPROC(ret_to_user)
    • ret_to_user的第一句就是disable_daif, daif是ARM64 PSTATE代表当前处理器状态的,disable_daif则就会关闭DAIF各个位,D(debug)A(Serror)I(IRQ)F(FIQ)
    • ldr x1, [tsk, #TSK_TI_FLAGS], 将thread_info.flags的值赋值给X1
    • and x2, x1, #_TIF_WORK_MASK, 将X1的值和_TIF_WORK_MASK的值或,_TIF_WORK_MASK是一个宏,里面包含了很多字段,比如是否需要调度字段_TIF_NEED_RESCHED等
    • cbnz x2, work_pending 当X2的值不等于0时,则跳转到work_pending做一个慢速的ret过程,在do_notify_resume中检查是否要对pending的任务做进一步的操作
    • 然后调用kernel_exit 0返回到用户空间

    kernel_exit分析

    kernel_exit的代码有点长,分段来简单看下。

    struct pt_regs 和kernel_exit

      DEFINE(S_X6,                  offsetof(struct pt_regs, regs[6]));
      DEFINE(S_X8,                  offsetof(struct pt_regs, regs[8]));
      DEFINE(S_X10,                 offsetof(struct pt_regs, regs[10]));
      DEFINE(S_X12,                 offsetof(struct pt_regs, regs[12]));
      DEFINE(S_X14,                 offsetof(struct pt_regs, regs[14]));
      DEFINE(S_X16,                 offsetof(struct pt_regs, regs[16]));
      DEFINE(S_X18,                 offsetof(struct pt_regs, regs[18]));
      DEFINE(S_X20,                 offsetof(struct pt_regs, regs[20]));
      DEFINE(S_X22,                 offsetof(struct pt_regs, regs[22]));
      DEFINE(S_X24,                 offsetof(struct pt_regs, regs[24]));
      DEFINE(S_X26,                 offsetof(struct pt_regs, regs[26]));
      DEFINE(S_X28,                 offsetof(struct pt_regs, regs[28]));
      DEFINE(S_FP,                  offsetof(struct pt_regs, regs[29]));
      DEFINE(S_LR,                  offsetof(struct pt_regs, regs[30]));
      DEFINE(S_SP,                  offsetof(struct pt_regs, sp));
      DEFINE(S_PSTATE,              offsetof(struct pt_regs, pstate));
      DEFINE(S_PC,                  offsetof(struct pt_regs, pc));
      DEFINE(S_SYSCALLNO,           offsetof(struct pt_regs, syscallno));
      DEFINE(S_SDEI_TTBR1,          offsetof(struct pt_regs, sdei_ttbr1));
      DEFINE(S_PMR_SAVE,            offsetof(struct pt_regs, pmr_save));
      DEFINE(S_STACKFRAME,          offsetof(struct pt_regs, stackframe));
    .macro  kernel_exit, el
        .if \el != 0
        /* Restore the task's original addr_limit. */
        ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
        str x20, [tsk, #TI_ADDR_LIMIT]
        .endif
    
        ldp x21, x22, [sp, #S_PC]       // load ELR, SPSR
        .if \el == 0
        ct_user_enter
        ldr x23, [sp, #S_SP]        // load return stack pointer
        msr sp_el0, x23
    #ifdef CONFIG_ARM64_ERRATUM_845719
    alternative_if_not ARM64_WORKAROUND_845719
        nop
        nop
    #ifdef CONFIG_PID_IN_CONTEXTIDR
        nop
    #endif
    alternative_else
        tbz x22, #4, 1f
    #ifdef CONFIG_PID_IN_CONTEXTIDR
        mrs x29, contextidr_el1
        msr contextidr_el1, x29
    #else
        msr contextidr_el1, xzr
    #endif
    1:
    alternative_endif
    #endif
        .endif
        msr elr_el1, x21            // set up the return data
        msr spsr_el1, x22
        ldp x0, x1, [sp, #16 * 0]
        ldp x2, x3, [sp, #16 * 1]
        ldp x4, x5, [sp, #16 * 2]
        ldp x6, x7, [sp, #16 * 3]
        ldp x8, x9, [sp, #16 * 4]
        ldp x10, x11, [sp, #16 * 5]
        ldp x12, x13, [sp, #16 * 6]
        ldp x14, x15, [sp, #16 * 7]
        ldp x16, x17, [sp, #16 * 8]
        ldp x18, x19, [sp, #16 * 9]
        ldp x20, x21, [sp, #16 * 10]
        ldp x22, x23, [sp, #16 * 11]
        ldp x24, x25, [sp, #16 * 12]
        ldp x26, x27, [sp, #16 * 13]
        ldp x28, x29, [sp, #16 * 14]
        ldr lr, [sp, #S_LR]
        add sp, sp, #S_FRAME_SIZE       // restore sp
        eret                    // return to kernel
        .endm

    .执行指令eret的时候,处理器自动使用寄存器SPSR_EL1保存的值恢复处理器状态,使用寄存器ELR_EL1保存的返回地址恢复程序计数器(PC).

    .macro    kernel_exit, el
    .if    \el != 0
    disable_daif
     
    /* Restore the task's original addr_limit. */
    ldr    x20, [sp, #S_ORIG_ADDR_LIMIT]
    str    x20, [tsk, #TSK_TI_ADDR_LIMIT]
     
    /* No need to restore UAO, it will be restored from SPSR_EL1 */
    .endif
     
    ldp    x21, x22, [sp, #S_PC]        // load ELR, SPSR
    .if    \el == 0
    ct_user_enter
    .endif
    • 当el不等于0时,此时还是调用disable_daif来关闭中断,debug等功能
    • 恢复task原始的add_limit,没研究这东西是做啥的,不关系。
    • ldp x21, x22, [sp, #S_PC] ,其中SP是在copy_thread的时候设置了,sp是指向了struct pt_regs结构的。而此条指令是load pt_regs结构中的PC=X21, PSTATE=X22寄存器
    .if    \el == 0
    ldr    x23, [sp, #S_SP]        // load return stack pointer
    msr    sp_el0, x23
    tst    x22, #PSR_MODE32_BIT        // native task?
    b.eq    3f
    • 如果el=0的话,ldr x23, [sp, #S_SP] 这条指令是返回struct pt_regs结构中的SP=X23
    • msr sp_el0, x23 #将x23的值设置到SP_EL0寄存器中,SP_EL0就是用户态EL0的堆栈寄存器
    3:
        apply_ssbd 0, x0, x1
        .endif
     
        msr    elr_el1, x21            // set up the return data
        msr    spsr_el1, x22
        ldp    x0, x1, [sp, #16 * 0]
        ldp    x2, x3, [sp, #16 * 1]
        ldp    x4, x5, [sp, #16 * 2]
        ldp    x6, x7, [sp, #16 * 3]
        ldp    x8, x9, [sp, #16 * 4]
        ldp    x10, x11, [sp, #16 * 5]
        ldp    x12, x13, [sp, #16 * 6]
        ldp    x14, x15, [sp, #16 * 7]
        ldp    x16, x17, [sp, #16 * 8]
        ldp    x18, x19, [sp, #16 * 9]
        ldp    x20, x21, [sp, #16 * 10]
        ldp    x22, x23, [sp, #16 * 11]
        ldp    x24, x25, [sp, #16 * 12]
        ldp    x26, x27, [sp, #16 * 13]
        ldp    x28, x29, [sp, #16 * 14]
        ldr    lr, [sp, #S_LR]
        add    sp, sp, #S_FRAME_SIZE        // restore sp
     
    DEFINE(S_LR,            offsetof(struct pt_regs, regs[30]));
    DEFINE(S_FRAME_SIZE,        sizeof(struct pt_regs))
    • 刚才已经从x21,x22获取了pc和pstate的值,则通过msr指令将x21和x22的设置到elr_el1,spsr_el1寄存器中。
    • 接着就是从pt_regs结构体宏恢复x0-x29寄存器的值。这些寄存器都是从用户态陷入到内核态时保存的。现在给恢复回去
    • ldr lr, [sp, #S_LR] 获取LR寄存器的值,LR就是连接返回地址。
    • add sp, sp, #S_FRAME_SIZE, 给sp加上pt_regs结构体的大小,则恢复SP堆栈指针的值
    .if    \el == 0
    alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
    #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
        bne    4f
        msr    far_el1, x30
        tramp_alias    x30, tramp_exit_native
        br    x30
    4:
        tramp_alias    x30, tramp_exit_compat
        br    x30
    #endif
        .else
        eret
        .endif
        sb
        .endm
    • 等el=0时,则br跳转到x30返回,x30就是lr寄存器
    • 否则通过eret返回。

    这个是kernel_exit的实现,大家有兴趣的话可以看看kernel_entry的实现,里面会有保存寄存器的过程。这里就不分析了。

    至此我们关系do_fork的实现分析完毕,总结下我们都涉及的内容

    • copy_process的实现,有几个重点
      • sched_fork
      • copy_mm
      • copy_thread
      • 这三个函数是重点,调度会在后面学习调度的时候分析。mm会在内存管理的时候分析
    • 新创建进程的第一次运行
    • ret_to_user的解释
    • kernel_exit的解释
  • 相关阅读:
    [kuangbin带你飞]专题1-23
    ES code study
    ES特点
    CENTOS7命令
    ES单机版安装
    ES安装手册
    数据库三大范式(1NF,2NF,3NF)及ER图
    win10下Spark的环境搭建
    MySQL安装详细图解整理
    MySQL中format()函数
  • 原文地址:https://www.cnblogs.com/dream397/p/16012205.html
Copyright © 2020-2023  润新知