• linux thread_info 与thread_struct


     有个同事看3.10代码中,看着两个结构,会混淆,所以我简单答复了一下。

    thread_info是和内核栈放一块的,网上到处都是thread_info的资料,但thread_struct的资料比较少,在此记录下,以备忘

    struct thread_info {
        struct task_struct    *task;        /* main task structure */
        struct exec_domain    *exec_domain;    /* execution domain */
        __u32            flags;        /* low level flags */
        __u32            status;        /* thread synchronous flags */
        __u32            cpu;        /* current CPU */
        int            preempt_count;    /* 0 => preemptable,
                               <0 => BUG */
        mm_segment_t        addr_limit;
        struct restart_block    restart_block;
        void __user        *sysenter_return;
    #ifdef CONFIG_X86_32
        unsigned long           previous_esp;   /* ESP of the previous stack in
                               case of nested (IRQ) stacks
                            */
        __u8            supervisor_stack[0];
    #endif
        unsigned int        sig_on_uaccess_error:1;
        unsigned int        uaccess_err:1;    /* uaccess failed */
    };

     thread_info 在task_struct中的stack成员,它为啥能和内核栈成为union呢,按道理union里面的内容都是不同时有效的,也就是既然用作了A成员,则B不可能使用,但是明显我们的

    thread_info结构和内核栈是同时使用的,其实可以理解为thread_info 放在了内核栈的下面,因为栈的增长方向是地址大到地址小,所以两者不冲突。这也间接说明了,

    其实内核栈没有union那么大,要被thread_info占据一部分。放在一起还有个好处就是根据esp能够

    快速地查找到task_struct的指针,因为thread_info的第一个成员就是task_struct指针。通过将esp的末尾几位设置为0就ok。到底设置多少位,是与栈的大小相关的。

    比如64为的x86,默认内核栈大小为:

    #define THREAD_SIZE_ORDER    2
    #define THREAD_SIZE  (PAGE_SIZE << THREAD_SIZE_ORDER)
    #define get_current() (current_thread_info()->task)
    #define current get_current()
    
    static inline struct thread_info *current_thread_info(void)
    {
        register unsigned long sp asm ("sp");
        return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
    }

    所以经常可以看到代码中使用 current 宏,就是通过sp指针来找到taks_struct.看下面的一个例子更能理解:

    crash> bt
    PID: 178838  TASK: ffff88290f7ddee0  CPU: 9   COMMAND: "kthread_send/9"----------------当前task指针为ffff88290f7ddee0
     #0 [ffff882fbe843a70] machine_kexec at ffffffff8105d77b
     #1 [ffff882fbe843ad0] __crash_kexec at ffffffff8110aca2
     #2 [ffff882fbe843ba0] panic at ffffffff816ad52f
     #3 [ffff882fbe843c20] watchdog_timer_fn at ffffffff81135a51
     #4 [ffff882fbe843c58] __hrtimer_run_queues at ffffffff810b93a6
     #5 [ffff882fbe843cb0] hrtimer_interrupt at ffffffff810b993f
     #6 [ffff882fbe843cf8] local_apic_timer_interrupt at ffffffff8105467b
     #7 [ffff882fbe843d10] smp_apic_timer_interrupt at ffffffff816c9e83
     #8 [ffff882fbe843d28] apic_timer_interrupt at ffffffff816c6732
     #9 [ffff882fbe843dc8] queued_spin_lock_slowpath at ffffffff816adeee
    #10 [ffff882fbe843dd8] _raw_spin_lock at ffffffff816bb080
    #11 [ffff882fbe843de8] dev_watchdog at ffffffff815bca52
    #12 [ffff882fbe843e28] call_timer_fn at ffffffff8109a9c8
    #13 [ffff882fbe843e60] run_timer_softirq at ffffffff8109ceed
    #14 [ffff882fbe843ed8] __do_softirq at ffffffff8109404d
    #15 [ffff882fbe843f48] call_softirq at ffffffff816c8afc
    #16 [ffff882fbe843f60] do_softirq at ffffffff8102d435
    #17 [ffff882fbe843f80] irq_exit at ffffffff81094495
    #18 [ffff882fbe843f98] smp_apic_timer_interrupt at ffffffff816c9e88
    #19 [ffff882fbe843fb0] apic_timer_interrupt at ffffffff816c6732
    --- <IRQ stack> ---
    #20 [ffff882b680d3c28] apic_timer_interrupt at ffffffff816c6732
        [exception RIP: ixgbe_xmit_frame_ring+83]
        RIP: ffffffffc01299e3  RSP: ffff882b680d3cd0  RFLAGS: 00000212---------------------在中断之前的rsp
        RAX: 0000000000000562  RBX: 0000000000000001  RCX: 000000000000403d
        RDX: ffff882fb9331c00  RSI: ffff8828d7b8fac0  RDI: 0000000000000001
        RBP: ffff882b680d3d48   R8: 0000000000000008   R9: 0000a0a5447b9d78
        R10: ffff8828c6e84f00  R11: 000000002b3000b8  R12: ffff8828c0291b00
        R13: 0000000022300000  R14: 0000000000000001  R15: ffff882b680d3cc0
        ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
    #21 [ffff882b680d3d50] ixgbe_xmit_frame at ffffffffc012a918 [ixgbe]
    #22 [ffff882b680d3d80] wit_send_tasklet at ffffffffc043b63c [witdriver]
    #23 [ffff882b680d3e78] wit_kthread_xmit_fn at ffffffffc043ba95 [witdriver]
    #24 [ffff882b680d3ec8] kthread at ffffffff810b5241
    #25 [ffff882b680d3f50] ret_from_fork at ffffffff816c5577

    根据task_struct 找stack:

    crash> task_struct.stack ffff88290f7ddee0
      stack = 0xffff882b680d0000
    crash> rd 0xffff882b680d0000
    ffff882b680d0000:  ffff88290f7ddee0----------------------stack中的第一个成员就是指向task_struct的

    再看看esp 的值  ffff882b680d3cd0 与 stack的值 0xffff882b680d0000 ,两者其实就是14位的相差,也就是 16k的低位不同。

    有时候我们会遇到内核堆栈越界的情况,越界就是栈变量向下扩展的时候,踩到了thread_info结构的成员。

    这时会遇到:Thread overran stack, or stack corrupted 这样的打印,判断的标准就是thread_info的上面留了一个magic特征字:

    #define STACK_END_MAGIC        0x57AC6E9D

    以下面例子来说明:

    crash> struct thread_info
    struct thread_info {
        struct task_struct *task;
        struct exec_domain *exec_domain;
        __u32 flags;
        __u32 status;
        __u32 cpu;
        int preempt_count;
        mm_segment_t addr_limit;
        struct restart_block restart_block;
        void *sysenter_return;
        unsigned int sig_on_uaccess_error : 1;
        unsigned int uaccess_err : 1;
    }
    SIZE: 104

    crash> px 0xffff882b680d0000 + 104
    $8 = 0xffff882b680d0068

    crash> rd 0xffff882b680d0068
    ffff882b680d0068: 0000000057ac6e9d .n.W.... -----------------对应的magic特征字

     

    在一些服务器中,经常会使用 echo 1 > /proc/sys/kernel/stack_tracer_enabled 的方式来监控线程栈,这个会使得能够打印最深的栈

    cat /sys/kernel/debug/tracing/stack_trace
            Depth    Size   Location    (41 entries)
            -----    ----   --------
      0)     4120      16   mempool_alloc_slab+0x15/0x20
      1)     4104     128   mempool_alloc+0x6e/0x170
      2)     3976      16   sg_pool_alloc+0x45/0x50
      3)     3960      88   __sg_alloc_table+0xd6/0x140
      4)     3872      40   sg_alloc_table_chained+0x3c/0x90
      5)     3832      40   scsi_init_sgtable+0x26/0x70
      6)     3792      72   scsi_init_io+0x4e/0x200
      7)     3720      80   sd_setup_read_write_cmnd+0x3d/0x950 [sd_mod]
      8)     3640      16   sd_init_command+0x2f/0xc0 [sd_mod]
      9)     3624      32   scsi_setup_cmnd+0x111/0x1c0
     10)     3592      56   scsi_prep_fn+0xdb/0x180
     11)     3536      40   blk_peek_request+0x16a/0x290
     12)     3496     104   scsi_request_fn+0x48/0x680
     13)     3392      24   __blk_run_queue+0x39/0x50
     14)     3368     192   cfq_insert_request+0x384/0x550
     15)     3176      56   __elv_add_request+0x1a2/0x2e0
     16)     3120      72   blk_queue_bio+0x35b/0x3a0
     17)     3048      88   generic_make_request+0x10b/0x320
     18)     2960      88   submit_bio+0x70/0x150
     19)     2872      48   _submit_bh+0x127/0x160
     20)     2824      16   submit_bh+0x10/0x20
     21)     2808      88   ext4_read_block_bitmap_nowait+0x48c/0x5f0 [ext4]
     22)     2720     152   ext4_mb_init_cache+0x181/0x6e0 [ext4]
     23)     2568      72   ext4_mb_load_buddy+0x2b6/0x340 [ext4]
     24)     2496     160   ext4_mb_regular_allocator+0x1d7/0x470 [ext4]
     25)     2336     176   ext4_mb_new_blocks+0x658/0xa20 [ext4]
     26)     2160     232   ext4_alloc_branch+0x3b9/0x430 [ext4]
     27)     1928     248   ext4_ind_map_blocks+0x34f/0x7b0 [ext4]
     28)     1680     136   ext4_map_blocks+0x2a5/0x6f0 [ext4]
     29)     1544     104   _ext4_get_block+0x1df/0x220 [ext4]
     30)     1440      16   ext4_get_block+0x16/0x20 [ext4]
     31)     1424     184   __block_write_begin+0x17d/0x4b0
     32)     1240     136   ext4_write_begin+0x18f/0x440 [ext4]
     33)     1104     200   generic_file_buffered_write+0x124/0x2c0
     34)      904     128   __generic_file_aio_write+0x1e2/0x400
     35)      776      64   generic_file_aio_write+0x59/0xa0
     36)      712     184   ext4_file_write+0xdb/0x470 [ext4]
     37)      528     216   do_sync_write+0x93/0xe0
     38)      312      64   vfs_write+0xc0/0x1f0
     39)      248      72   SyS_write+0x7f/0xe0
     40)      176     176   system_call_fastpath+0x1c/0x21

    如果新增加了内核模块,测试时最好能够监控起来,保证不会栈越界。

    如果说 thread_info 在进程运行时访问很多,比如取当前task_struct指针,设置是否能够抢占的 preempt_count ,是跟arch体系无关的一些参数,那么thread_struct 就是与体系强相关的

    一个结构了,比如x86的架构如下,321位和64位用一些宏来控制。

    struct thread_struct {
        /* Cached TLS descriptors: */
        struct desc_struct    tls_array[GDT_ENTRY_TLS_ENTRIES];
        unsigned long        sp0;
        unsigned long        sp;
    #ifdef CONFIG_X86_32
        unsigned long        sysenter_cs;
    #else
        unsigned long        usersp;    /* Copy from PDA */
        unsigned short        es;
        unsigned short        ds;
        unsigned short        fsindex;
        unsigned short        gsindex;
    #endif
    #ifdef CONFIG_X86_32
        unsigned long        ip;
    #endif
    #ifdef CONFIG_X86_64
        unsigned long        fs;
    #endif
        unsigned long        gs;
        /* Save middle states of ptrace breakpoints */
        struct perf_event    *ptrace_bps[HBP_NUM];
        /* Debug status used for traps, single steps, etc... */
        unsigned long           debugreg6;
        /* Keep track of the exact dr7 value set by the user */
        unsigned long           ptrace_dr7;
        /* Fault info: */
        unsigned long        cr2;
        unsigned long        trap_nr;
        unsigned long        error_code;
        /* floating point and extended processor state */
        struct fpu        fpu;
    #ifdef CONFIG_X86_32
        /* Virtual 86 mode info */
        struct vm86_struct __user *vm86_info;
        unsigned long        screen_bitmap;
        unsigned long        v86flags;
        unsigned long        v86mask;
        unsigned long        saved_sp0;
        unsigned int        saved_fs;
        unsigned int        saved_gs;
    #endif
        /* IO permissions: */
        unsigned long        *io_bitmap_ptr;
        unsigned long        iopl;
        /* Max allowed port in the bitmap, in bytes: */
        unsigned        io_bitmap_max;
    };

     arm32的长成这样:

    struct thread_struct {
                                /* fault info      */
        unsigned long        address;
        unsigned long        trap_no;
        unsigned long        error_code;
                                /* debugging      */
        struct debug_info    debug;
    };

    arm64的长成这样:

    struct cpu_context {
        unsigned long x19;
        unsigned long x20;
        unsigned long x21;
        unsigned long x22;
        unsigned long x23;
        unsigned long x24;
        unsigned long x25;
        unsigned long x26;
        unsigned long x27;
        unsigned long x28;
        unsigned long fp;
        unsigned long sp;
        unsigned long pc;
    };
    
    struct thread_struct {
        struct cpu_context    cpu_context;    /* cpu context */
        unsigned long        tp_value;
        struct fpsimd_state    fpsimd_state;
        unsigned long        fault_address;    /* fault info */
        struct debug_info    debug;        /* debugging */
    };

    因为不同的结构,寄存器明显不一样,所以cpu的上下文显然不一样,这个结构就是用来保存在进程切换的时候,用于特定于arch的进程上下文切换的。

    水平有限,如果有错误,请帮忙提醒我。如果您觉得本文对您有帮助,可以点击下面的 推荐 支持一下我。版权所有,需要转发请带上本文源地址,博客一直在更新,欢迎 关注 。
  • 相关阅读:
    C# Winform 运行异常 CefSharp.core.dll 找不到指定的模块
    WCF TCP通信方式 通过IIS承载调试
    [译]Modern Core Graphics with Swift系列
    博客搬家
    [ios] 定位报错Error Domain=kCLErrorDomain Code=0 "The operation couldn’t be completed. (kCLErrorDomain error 0.)"
    [IOS] 'Double' is not convertible to 'CGFloat'
    [IOS]swift 使用AVOS的API
    [IOS]使用了cocoapods 抱错Pods was rejected as an implicit dependency for ‘libPods.a’ because its architectures ......
    [IOS]cocoapos 两个ruby源的对比
    [IOS]Swift 遍历预制的本地资源文件
  • 原文地址:https://www.cnblogs.com/10087622blog/p/9752568.html
Copyright © 2020-2023  润新知