• 用crash工具分析内核死锁问题


    拿到一个定屏现场后先ramdump一下,再用crash工具加载dump文件和vmlinux。

    进入crash工具环境后,首先查看一下所有D状态的线程:

    crash_ARM64> ps |grep UN
         59      2   1  ffffffd774998000  UN   0.0       0      0  [kworker/u16:1]
        161      2   0  ffffffd7738e6400  UN   0.0       0      0  [mdss_dsi_event]
        428      2   3  ffffffd772fb8000  UN   0.0       0      0  [mmc-cmdqd/0]
        494      2   1  ffffffd772943e80  UN   0.0       0      0  [msm-core:sampli]
       1119   1473   1  ffffffd6ae10f080  UN   2.7 2089184 222932  Binder:1651_5
       1643      1   6  ffffffd757d65780  UN   0.2  133072  15584  Binder:1474_2
       1651   1473   2  ffffffd728b8b200  UN   2.7 2089184 222932  tencent.karaoke
       1659   1473   0  ffffffd728b89900  UN   2.7 2089184 222932  JDWP
       1663   1473   2  ffffffd64894b200  UN   2.7 2089184 222932  HeapTaskDaemon
       1711   1473   2  ffffffd627cff080  UN   2.7 2089184 222932  ccess.Collector
       1749   1473   5  ffffffd705b4f080  UN   2.7 2089184 222932  TcmReceiver
       1805   1473   1  ffffffd728b8e400  UN   2.7 2089184 222932  ReporterMachine
       1818   1473   2  ffffffd64fbe3e80  UN   2.7 2089184 222932  pool-2-thread-1
       1862   1473   2  ffffffd71ba99900  UN   2.7 2089184 222932  pool-4-thread-1
       1864   1473   0  ffffffd7071de400  UN   2.7 2089184 222932  pool-4-thread-3
       1869   1473   1  ffffffd6c3f6a580  UN   2.7 2089184 222932  TcmReceiver
       1872   1472   4  ffffffd75a8abe80  UN   4.2 2678340 350496  android.bg
       1873   1472   6  ffffffd75a8acb00  UN   4.2 2678340 350496  ActivityManager
       1909   1472   6  ffffffd7587e1900  UN   4.2 2678340 350496  android.display
       2107   1473   1  ffffffd75ef54b00  UN   2.7 2089184 222932  PackageProcesso
       2650   1472   0  ffffffd758db2580  UN   4.2 2678340 350496  ConnectivitySer
       2767    601   2  ffffffd6e2180c80  UN   0.0   19556   3888  sdcard
       2980   1472   0  ffffffd63ad2e400  UN   4.2 2678340 350496  watchdog
       3337   1472   0  ffffffd65bb72580  UN   4.2 2678340 350496  Binder:1817_18
      18694      2   3  ffffffd60443a580  UN   0.0       0      0  [mdss_fb0]
      28387      1   6  ffffffd616f6cb00  UN   0.0    9656   1936  ps
      29239      1   2  ffffffd6156f8c80  UN   0.0    9656   1944  ps
      30079      1   6  ffffffd659b72580  UN   0.0    9656   1936  ps
      30271      1   6  ffffffd626a1be80  UN   0.0    9656   1940  ps

    一般界面定平跟system_server相关,所以先看system_server的几个关键线程:

    crash_ARM64> bt 1909
    PID: 1909   TASK: ffffffd7587e1900  CPU: 6   COMMAND: "android.display"
     #0 [ffffffd7587ef8d0] __switch_to at ffffff99a6685560
     #1 [ffffffd7587ef900] __schedule at ffffff99a74df9f0
     #2 [ffffffd7587ef960] schedule at ffffff99a74dfd54
     #3 [ffffffd7587ef980] schedule_preempt_disabled at ffffff99a74e009c
     #4 [ffffffd7587ef9a0] __mutex_lock_slowpath at ffffff99a74e15d4
     #5 [ffffffd7587efa00] mutex_lock at ffffff99a74e1664
     #6 [ffffffd7587efa20] binder_alloc_new_buf at ffffff99a71b9f9c
     #7 [ffffffd7587efab0] binder_transaction at ffffff99a71b5400
     #8 [ffffffd7587efbe0] binder_thread_write at ffffff99a71b7500
     #9 [ffffffd7587efcd0] binder_ioctl_write_read at ffffff99a71b7f38
    #10 [ffffffd7587efd50] binder_ioctl at ffffff99a71b834c
    #11 [ffffffd7587efdf0] do_vfs_ioctl at ffffff99a67c4820
    #12 [ffffffd7587efe80] sys_ioctl at ffffff99a67c4958
    
    crash_ARM64> bt 1873
    PID: 1873   TASK: ffffffd75a8acb00  CPU: 6   COMMAND: "ActivityManager"
     #0 [ffffffd75ca378d0] __switch_to at ffffff99a6685560
     #1 [ffffffd75ca37900] __schedule at ffffff99a74df9f0
     #2 [ffffffd75ca37960] schedule at ffffff99a74dfd54
     #3 [ffffffd75ca37980] schedule_preempt_disabled at ffffff99a74e009c
     #4 [ffffffd75ca379a0] __mutex_lock_slowpath at ffffff99a74e15d4
     #5 [ffffffd75ca37a00] mutex_lock at ffffff99a74e1664
     #6 [ffffffd75ca37a20] binder_alloc_new_buf at ffffff99a71b9f9c
     #7 [ffffffd75ca37ab0] binder_transaction at ffffff99a71b5400
     #8 [ffffffd75ca37be0] binder_thread_write at ffffff99a71b7500
     #9 [ffffffd75ca37cd0] binder_ioctl_write_read at ffffff99a71b7f38
    #10 [ffffffd75ca37d50] binder_ioctl at ffffff99a71b834c
    #11 [ffffffd75ca37df0] do_vfs_ioctl at ffffff99a67c4820
    #12 [ffffffd75ca37e80] sys_ioctl at ffffff99a67c4958

    看到都是在binder_alloc_new_buf时候被挂起的,显然是同一个锁,我们得先找出这个锁的地址。

    以下是找锁的方法:

    首先从mutex_lock函数的汇编代码入手:

    crash_ARM64> p mutex_lock
    mutex_lock = $26 = 
     {void (struct mutex *)} 0xffffff99a74e163c
    
    crash_ARM64> dis mutex_lock
    0xffffff99a74e163c <mutex_lock>:        stp     x29, x30, [sp,#-32]!
    0xffffff99a74e1640 <mutex_lock+4>:      mov     x29, sp
    0xffffff99a74e1644 <mutex_lock+8>:      str     x19, [sp,#16]
    0xffffff99a74e1648 <mutex_lock+12>:     mov     x19, x0
    0xffffff99a74e164c <mutex_lock+16>:     prfm    pstl1strm, [x0]
    0xffffff99a74e1650 <mutex_lock+20>:     ldaxr   w1, [x0]
    0xffffff99a74e1654 <mutex_lock+24>:     sub     w1, w1, #0x1
    0xffffff99a74e1658 <mutex_lock+28>:     stxr    w2, w1, [x0]
    0xffffff99a74e165c <mutex_lock+32>:     cbnz    w2, 0xffffff99a74e1650
    0xffffff99a74e1660 <mutex_lock+36>:     tbz     w1, #31, 0xffffff99a74e1668
    0xffffff99a74e1664 <mutex_lock+40>:     bl      0xffffff99a74e14ec
    0xffffff99a74e1668 <mutex_lock+44>:     mrs     x0, sp_el0
    0xffffff99a74e166c <mutex_lock+48>:     ldr     x0, [x0,#16]
    0xffffff99a74e1670 <mutex_lock+52>:     str     x0, [x19,#24]
    0xffffff99a74e1674 <mutex_lock+56>:     ldr     x19, [sp,#16]
    0xffffff99a74e1678 <mutex_lock+60>:     ldp     x29, x30, [sp],#32
    0xffffff99a74e167c <mutex_lock+64>:     ret

    mutex_lock的第一个参数就是我们要找的struct mutex,在函数入口处被保存在x19寄存器中,

    0xffffff99a74e1648 <mutex_lock+12>:     mov     x19, x0

    下一步会调用__mutex_lock_slowpath:

    0xffffff99a74e1664 <mutex_lock+40>:     bl      0xffffff99a74e14ec

    一般x19寄存器会保存在下一级函数的栈帧中,比如

    crash_ARM64> dis __mutex_lock_slowpath
    0xffffff99a74e14ec <__mutex_lock_slowpath>:     stp     x29, x30, [sp,#-96]!
    0xffffff99a74e14f0 <__mutex_lock_slowpath+4>:   mov     x29, sp
    0xffffff99a74e14f4 <__mutex_lock_slowpath+8>:   stp     x19, x20, [sp,#16]
    0xffffff99a74e14f8 <__mutex_lock_slowpath+12>:  stp     x21, x22, [sp,#32]
    0xffffff99a74e14fc <__mutex_lock_slowpath+16>:  stp     x23, x24, [sp,#48]
    0xffffff99a74e1500 <__mutex_lock_slowpath+20>:  mrs     x20, sp_el0
    0xffffff99a74e1504 <__mutex_lock_slowpath+24>:  ldr     w1, [x20,#24]
    0xffffff99a74e1508 <__mutex_lock_slowpath+28>:  mov     x19, x0
    ...

    因此,我们只需要在__mutex_lock_slowpath的栈帧中找到x19,它就是我们要找的struct mutex了。

    通过bt的内容可知,__mutex_lock_slowpath的栈帧范围是ffffffd75ca379a0ffffffd75ca37a00

    crash_ARM64> bt 1873
    PID: 1873   TASK: ffffffd75a8acb00  CPU: 6   COMMAND: "ActivityManager"
     #0 [ffffffd75ca378d0] __switch_to at ffffff99a6685560
     #1 [ffffffd75ca37900] __schedule at ffffff99a74df9f0
     #2 [ffffffd75ca37960] schedule at ffffff99a74dfd54
     #3 [ffffffd75ca37980] schedule_preempt_disabled at ffffff99a74e009c
     #4 [ffffffd75ca379a0] __mutex_lock_slowpath at ffffff99a74e15d4
     #5 [ffffffd75ca37a00] mutex_lock at ffffff99a74e1664
     #6 [ffffffd75ca37a20] binder_alloc_new_buf at ffffff99a71b9f9c
     #7 [ffffffd75ca37ab0] binder_transaction at ffffff99a71b5400
     #8 [ffffffd75ca37be0] binder_thread_write at ffffff99a71b7500
     #9 [ffffffd75ca37cd0] binder_ioctl_write_read at ffffff99a71b7f38
    #10 [ffffffd75ca37d50] binder_ioctl at ffffff99a71b834c
    #11 [ffffffd75ca37df0] do_vfs_ioctl at ffffff99a67c4820
    #12 [ffffffd75ca37e80] sys_ioctl at ffffff99a67c4958

    通过rd命令查看这个地址:

    crash_ARM64> rd ffffffd75ca379a0 -e ffffffd75ca37a00
    ffffffd75ca379a0:  ffffffd75ca37a00 ffffff99a74e1668   .z.....h.N.....
    ffffffd75ca379b0:  ffffffd6dfa02200 ffffffd6dfa02200   ."......."......
    ffffffd75ca379c0:  0000000000000000 ffffffd75ca37c80   .........|.....
    ffffffd75ca379d0:  ffffff99a9109000 ffffffd75c1cbc00   ...............
    ffffffd75ca379e0:  ffffffd700000000 ffffffd7587ef9e8   ..........~X....
    ffffffd75ca379f0:  ffffffd758dc39e8 ffffffd75a8acb00   .9.X.......Z....

    再对应__mutex_lock_slowpath的汇编代码:

    crash_ARM64> dis __mutex_lock_slowpath
    0xffffff99a74e14ec <__mutex_lock_slowpath>:     stp     x29, x30, [sp,#-96]!
    0xffffff99a74e14f0 <__mutex_lock_slowpath+4>:   mov     x29, sp
    0xffffff99a74e14f4 <__mutex_lock_slowpath+8>:   stp     x19, x20, [sp,#16]
    0xffffff99a74e14f8 <__mutex_lock_slowpath+12>:  stp     x21, x22, [sp,#32]
    0xffffff99a74e14fc <__mutex_lock_slowpath+16>:  stp     x23, x24, [sp,#48]
    ...

    栈帧中的各个寄存器的对应位置如下:

    ffffffd75ca379a0:  ffffffd75ca37a00 ffffff99a74e1668   .z.....h.N.....
                x29              x30
    ffffffd75ca379b0:  ffffffd6dfa02200 ffffffd6dfa02200   ."......."......
                       x19              x20
    ffffffd75ca379c0:  0000000000000000 ffffffd75ca37c80   .........|.....
                       x21              x22
    ffffffd75ca379d0:  ffffff99a9109000 ffffffd75c1cbc00   ...............
                       x23              x24
    ffffffd75ca379e0:  ffffffd700000000 ffffffd7587ef9e8   ..........~X....
    ffffffd75ca379f0:  ffffffd758dc39e8 ffffffd75a8acb00   .9.X.......Z....

    我们要找的struct mutex就是x19,也就是ffffffd6dfa02200

    不仅仅是mutex,通过上面的方式任何一个参数都有可能在栈里面找到,这也是栈推导的最基本的方式。

    接下来再用crash查看这个值:

    crash_ARM64> struct mutex ffffffd6dfa02200
    struct mutex {
      count = {
        counter = -4
      }, 
      ...
      owner = 0xffffffd65bb72580, 
      ...
    }

    其中onwer就是持有该所的线程的task_struct指针。它的pid为:

    crash_ARM64> task_struct.pid 0xffffffd65bb72580
      pid = 3337

    查看这个线程的调用栈:

    crash_ARM64> bt 3337
    PID: 3337   TASK: ffffffd65bb72580  CPU: 0   COMMAND: "Binder:1817_18"
     #0 [ffffffd66ccf7870] __switch_to at ffffff99a6685560
     #1 [ffffffd66ccf78a0] __schedule at ffffff99a74df9f0
     #2 [ffffffd66ccf7900] schedule at ffffff99a74dfd54
     #3 [ffffffd66ccf7920] rwsem_down_write_failed at ffffff99a74e2388
     #4 [ffffffd66ccf7990] down_write at ffffff99a74e1a20
     #5 [ffffffd66ccf79b0] binder_update_page_range at ffffff99a71b9938
     #6 [ffffffd66ccf7a20] binder_alloc_new_buf at ffffff99a71ba238
     #7 [ffffffd66ccf7ab0] binder_transaction at ffffff99a71b5400
     #8 [ffffffd66ccf7be0] binder_thread_write at ffffff99a71b7500
     #9 [ffffffd66ccf7cd0] binder_ioctl_write_read at ffffff99a71b7f38
    #10 [ffffffd66ccf7d50] binder_ioctl at ffffff99a71b834c
    #11 [ffffffd66ccf7df0] do_vfs_ioctl at ffffff99a67c4820
    #12 [ffffffd66ccf7e80] sys_ioctl at ffffff99a67c4958

    原来这个pid为3337的Binder:1817_18,它持了mutex所以后,又被semaphore锁给挂住了。

    下一步就是要看这个semaphore锁是谁持有的。

    同样的方法,先看down_write函数:

    crash_ARM64> p down_write
    down_write = $27 = 
     {void (struct rw_semaphore *)} 0xffffff99a74e19f0
    
    crash_ARM64> dis down_write
    0xffffff99a74e19f0 <down_write>:        stp     x29, x30, [sp,#-32]!
    0xffffff99a74e19f4 <down_write+4>:      mov     x1, #0xffffffff00000001         // #-4294967295
    0xffffff99a74e19f8 <down_write+8>:      mov     x29, sp
    0xffffff99a74e19fc <down_write+12>:     str     x19, [sp,#16]
    0xffffff99a74e1a00 <down_write+16>:     mov     x19, x0
    0xffffff99a74e1a04 <down_write+20>:     prfm    pstl1strm, [x0]
    0xffffff99a74e1a08 <down_write+24>:     ldaxr   x2, [x0]
    0xffffff99a74e1a0c <down_write+28>:     add     x2, x2, x1
    0xffffff99a74e1a10 <down_write+32>:     stxr    w3, x2, [x0]
    0xffffff99a74e1a14 <down_write+36>:     cbnz    w3, 0xffffff99a74e1a08
    0xffffff99a74e1a18 <down_write+40>:     cmp     x2, x1
    0xffffff99a74e1a1c <down_write+44>:     b.eq    0xffffff99a74e1a24
    0xffffff99a74e1a20 <down_write+48>:     bl      0xffffff99a74e20b4
    0xffffff99a74e1a24 <down_write+52>:     mrs     x0, sp_el0
    0xffffff99a74e1a28 <down_write+56>:     ldr     x0, [x0,#16]
    0xffffff99a74e1a2c <down_write+60>:     str     x0, [x19,#32]
    0xffffff99a74e1a30 <down_write+64>:     ldr     x19, [sp,#16]
    0xffffff99a74e1a34 <down_write+68>:     ldp     x29, x30, [sp],#32
    0xffffff99a74e1a38 <down_write+72>:     ret

    它的参数被保存在x19,因此我们可以去下一级函数rwsem_down_write_failed中查找这个x19的值。

    crash_ARM64> dis 0xffffff99a74e20b4
    0xffffff99a74e20b4 <rwsem_down_write_failed>:   stp     x29, x30, [sp,#-112]!
    0xffffff99a74e20b8 <rwsem_down_write_failed+4>: mov     x29, sp
    0xffffff99a74e20bc <rwsem_down_write_failed+8>: stp     x19, x20, [sp,#16]
    0xffffff99a74e20c0 <rwsem_down_write_failed+12>:        stp     x21, x22, [sp,#32]
    0xffffff99a74e20c4 <rwsem_down_write_failed+16>:        mov     x19, x0
    0xffffff99a74e20c8 <rwsem_down_write_failed+20>:        str     x25, [sp,#64]
    0xffffff99a74e20cc <rwsem_down_write_failed+24>:        stp     x23, x24, [sp,#48]
    0xffffff99a74e20d0 <rwsem_down_write_failed+28>:        mov     x0, #0xffffffff                 // #4294967295
    0xffffff99a74e20d4 <rwsem_down_write_failed+32>:        prfm    pstl1strm, [x19]
    ...

    x19保存在rwsem_down_write_failed栈帧偏移16的位置。rwsem_down_write_failed栈帧的地址是ffffffd66ccf7920。

     #3 [ffffffd66ccf7920] rwsem_down_write_failed at ffffff99a74e2388
     #4 [ffffffd66ccf7990] down_write at ffffff99a74e1a20

    因此可以直接得到x19的值为ffffffd76e349a68:

    crash_ARM64> rd ffffffd66ccf7920 -e ffffffd66ccf7990
    ffffffd66ccf7920:  ffffffd66ccf7990 ffffff99a74e1a24   .y.l....$.N.....
                       x29              x30
    ffffffd66ccf7930:  ffffffd76e349a68 ffffffd6dfa02200   h.4n....."......
                       x19              x20
    ffffffd66ccf7940:  ffffffd76e349a00 0000000000000000   ..4n............
    ffffffd66ccf7950:  ffffff801250dee8 0000000000000000   ..P.............
    ffffffd66ccf7960:  ffffff801250e000 ffffff99a68e2340   ..P.....@#......
    ffffffd66ccf7970:  ffffffd6a3937d70 ffffffd70e1abd70   p}......p.......
    ffffffd66ccf7980:  ffffffd65bb72580 ffffff9900000000   .%.[............

    参数struct rw_semaphore的指针为ffffffd76e349a68:

    crash_ARM64> struct -x rw_semaphore ffffffd76e349a68
    struct rw_semaphore {
      count = 0xffffffff00000001, 
      ...
      owner = 0x0
    }

    由于没有打开CONFIG_RWSEM_SPIN_ON_OWNER宏,所以owner为0。

    其实mutex也是需要打开指定的宏(CONFIG_DEBUG_MUTEXES或CONFIG_MUTEX_SPIN_ON_OWNER),才会记录owner的。

    没有记录owner,再要查找哪个线程持有锁就比较难了。

    先从代码中看看被锁住的是哪个锁:

    static int binder_update_page_range(struct binder_alloc *alloc, int allocate,
                        void *start, void *end,
                        struct vm_area_struct *vma)
    {
        ...
        if (vma)
            mm = NULL;
        else
            mm = get_task_mm(alloc->tsk);
    
        if (mm) {
            down_write(&mm->mmap_sem);
            vma = alloc->vma;
            ...

    原来是mm->mmap_sem,这个是个大锁,很多地方都会申请这个锁。

    我们可以通过脚本遍历每个task的栈,来找出哪个线程了持了这个semaphore,具体思路后面会讲到,这里我们先换一种方法。

    我们先查看所有D状态的线程,他们都是被哪些锁给block了。

    通过bt命令一个个查找调用栈后,发现绝大部分都是mm->mmap_sem锁和前面的mutex锁给block的,如:

    crash_ARM64> bt 1749
    PID: 1749   TASK: ffffffd705b4f080  CPU: 5   COMMAND: "TcmReceiver"
     #0 [ffffffd6db78fc90] __switch_to at ffffff99a6685560
     #1 [ffffffd6db78fcc0] __schedule at ffffff99a74df9f0
     #2 [ffffffd6db78fd20] schedule at ffffff99a74dfd54
     #3 [ffffffd6db78fd40] rwsem_down_read_failed at ffffff99a74e2094
     #4 [ffffffd6db78fd90] down_read at ffffff99a74e19e4
     #5 [ffffffd6db78fda0] do_page_fault at ffffff99a6696f00
     #6 [ffffffd6db78fe10] do_mem_abort at ffffff99a6680ad8

    只有一个sdcard线程是被其他锁给block的:

    crash_ARM64> bt 2767
    PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
     #0 [ffffffd6e218fac0] __switch_to at ffffff99a6685560
     #1 [ffffffd6e218faf0] __schedule at ffffff99a74df9f0
     #2 [ffffffd6e218fb50] schedule at ffffff99a74dfd54
     #3 [ffffffd6e218fb70] schedule_preempt_disabled at ffffff99a74e009c
     #4 [ffffffd6e218fb90] __mutex_lock_slowpath at ffffff99a74e15d4
     #5 [ffffffd6e218fbf0] mutex_lock at ffffff99a74e1664
     #6 [ffffffd6e218fc10] do_truncate at ffffff99a67b35bc
     #7 [ffffffd6e218fc80] path_openat at ffffff99a67c1938
     #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
     #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4
    #10 [ffffffd6e218feb0] sys_openat at ffffff99a67b4594
    #11 [ffffffd6e218fed0] el0_svc_naked at ffffff99a668462c

    通过前面的方法可知,这个mutex锁的指针是存放在__mutex_lock_slowpath的栈帧地址+16的位置,这里就是ffffffd6e218fba0

    crash_ARM64> rd ffffffd6e218fba0
    ffffffd6e218fba0:  ffffffd6948f4090

    因此,这里的mutex的指针为ffffffd6948f4090,由于mutex已经打开了DEBUG宏,所以很容易得到owner:

    crash_ARM64> struct mutex ffffffd6948f4090
    struct mutex {
      count = {
        counter = -1
      }, 
      ...
      owner = 0xffffffd771be3e80, 
      ...
    }

    前面提到owner就是持有这个mutex锁的线程的task_struct指针,其pid为:

    crash_ARM64> struct task_struct.pid 0xffffffd771be3e80
      pid = 2124

    其调用栈为:

    crash_ARM64> bt 2124
    PID: 2124   TASK: ffffffd771be3e80  CPU: 0   COMMAND: "wnloader.impl.c"
     #0 [ffffffd6d396b4b0] __switch_to at ffffff99a6685560
     #1 [ffffffd6d396b4e0] __schedule at ffffff99a74df9f0
     #2 [ffffffd6d396b540] schedule at ffffff99a74dfd54
     #3 [ffffffd6d396b560] request_wait_answer at ffffff99a68c0350
     #4 [ffffffd6d396b5d0] __fuse_request_send at ffffff99a68c2f60
     #5 [ffffffd6d396b600] fuse_request_send at ffffff99a68c2fb8
     #6 [ffffffd6d396b610] fuse_send_readpages at ffffff99a68c8ef8
     #7 [ffffffd6d396b640] fuse_readpages at ffffff99a68c8fe8
     #8 [ffffffd6d396b6c0] __do_page_cache_readahead at ffffff99a676cfc0
     #9 [ffffffd6d396b780] filemap_fault at ffffff99a6762fc4
    #10 [ffffffd6d396b7f0] __do_fault at ffffff99a6789c38
    #11 [ffffffd6d396b860] handle_mm_fault at ffffff99a678d540
    #12 [ffffffd6d396b920] do_page_fault at ffffff99a6696f4c
    #13 [ffffffd6d396b990] do_mem_abort at ffffff99a6680ad8
    #14 [ffffffd6d396bb80] el1_da at ffffff99a6683cf8
         PC: ffffffd7017056c0  [unknown or invalid address]
         LR: ffffff99a695da6c  [iov_iter_fault_in_readable+64]
         SP: 0000000080000145  PSTATE: 00000000
        X29: ffffffd6d396bb70  X28: ffffff99a67619a0  X27: ffffffd6d396bb80
        X26: 0000000000001000  X25: ffffff99a7604000  X24: 0000000000000000
        X23: ffffff99a761b580  X22: ffffffd623366500  X21: 0000000000001000
        X20: ffffffd6948f4140  X19: 0000000000000000  X18: 0000000000000000
        X17: ffffffd6d396be08  X16: 0000000000000000  X15: 0000000000000000
        X14: ffffff99a668f4a0  X13: 0000000000000000  X12: 00000000ee04c905
        X11: 00000000d0bf8f50  X10: 0000000000000018   X9: 0101010101010101
         X8: 7f7f7f7f7f7f7f7f   X7: 722433ffff787368   X6: 7f7f7f7f7f7f7fff
         X5: 0000000000002800   X4: 0000008080000000   X3: ffffffd6d396bdd0
         X2: 0000000000002800   X1: 00000000d5d3e000   X0: 0000000000000000
    #15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
    #16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4
    #17 [ffffffd6d396bd00] fuse_passthrough_write_iter at ffffff99a68cd4d0
    #18 [ffffffd6d396bd50] fuse_file_write_iter at ffffff99a68ca1f4
    #19 [ffffffd6d396bdb0] __vfs_write at ffffff99a67b49a0
    #20 [ffffffd6d396be30] vfs_write at ffffff99a67b515c
    #21 [ffffffd6d396be70] sys_pwrite64 at ffffff99a67b5a1c

    注意这里的do_page_fault,从前面1749线程的的调用栈可知do_page_fault是需要持有mm->mmap_sem锁的,

    但这里却没有被block,很可能mm->mmap_sem锁的owner就是这个2124线程,查看代码可确认这一点:

    static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
                       struct pt_regs *regs)
    {
        struct task_struct *tsk;
        struct mm_struct *mm;
        ...
        if (!down_read_trylock(&mm->mmap_sem)) {
           if (!user_mode(regs) && !search_exception_tables(regs->pc))
                goto no_context;
    retry:
            down_read(&mm->mmap_sem);
       ...
       fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
       ...
    
    
    static int __do_page_fault(struct mm_struct *mm, unsigned long addr,
                   unsigned int mm_flags, unsigned long vm_flags,
                   struct task_struct *tsk)
    {
        ...
        return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags);

    当前函数已经执行到handle_mm_fault以下了,就能确定已经持有mm->mmap_sem锁了。

    因此前面所有的线程其实都是被2124线程给block的,现在只需要解决2124线程为什么被block就可以了。

    crash_ARM64> ps 2124
       PID    PPID  CPU       TASK        ST  %MEM     VSZ    RSS  COMM
       2124   1473   0  ffffffd771be3e80  IN   2.7 2089184 222932  wnloader.impl.c

    2124线程是处于IN状态,也就是说它并不是被锁住的。

    从调用栈中可以看到2124线程其实是在等待fuse的处理结果,我们先通过栈推导,算出fuse_req的地址为ffffffd6db690d00

    crash_ARM64> struct -x fuse_req ffffffd6db690d00
    struct fuse_req {
      list = {
        next = 0xffffffd7063d8000, 
        prev = 0xffffffd76f868680
      }, 
      intr_entry = {
        next = 0xffffffd6db690d10, 
        prev = 0xffffffd6db690d10
      }, 
      count = {
        counter = 0x2
      }, 
      intr_unique = 0x0, 
      flags = 0x89, 
      ...

    其中flags = 0x89 = 0b10001001

    而代码中:

    enum fuse_req_flag {
    	FR_ISREPLY,		1	
    	FR_FORCE,		0
    	FR_BACKGROUND,	        0
    	FR_WAITING,		1
    	FR_ABORTED,		0
    	FR_INTERRUPTED,	        0
    	FR_LOCKED,		0
    	FR_PENDING,		1
    	FR_SENT,		
    	FR_FINISHED,
    	FR_PRIVATE,
    };

    因此当前请求的状态为FR_ISREPLY|FR_WAITING|FR_PENDING,注意当前状态请求的状态是pending状态,

    这表示fuse的deamon进程在忙碌,还没取走当前请求,也就是说deamon进程还在忙着处理前一个请求。

    fuse的deamon进程就是sdcard进程,而fuse的请求一般都是sdcard进程来处理的。

    我们看一下sdcard进程的状态:

    crash_ARM64> ps -g 2767
    PID: 2755   TASK: ffffffd75b29e400  CPU: 5   COMMAND: "sdcard"
      PID: 2764   TASK: ffffffd75b29f080  CPU: 0   COMMAND: "sdcard"
      PID: 2765   TASK: ffffffd75b29be80  CPU: 2   COMMAND: "sdcard"
      PID: 2766   TASK: ffffffd6e2180000  CPU: 4   COMMAND: "sdcard"
      PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
    crash_ARM64> bt 2765
    PID: 2765   TASK: ffffffd75b29be80  CPU: 2   COMMAND: "sdcard"
     #0 [ffffffd6e217fbf0] __switch_to at ffffff99a6685560
     #1 [ffffffd6e217fc20] __schedule at ffffff99a74df9f0
     #2 [ffffffd6e217fc80] schedule at ffffff99a74dfd54
     #3 [ffffffd6e217fca0] fuse_dev_do_read at ffffff99a68c1f50
     #4 [ffffffd6e217fd50] fuse_dev_read at ffffff99a68c2568
     #5 [ffffffd6e217fdb0] __vfs_read at ffffff99a67b48d8
     #6 [ffffffd6e217fe30] vfs_read at ffffff99a67b5038
     #7 [ffffffd6e217fe70] sys_read at ffffff99a67b5888
     #8 [ffffffd6e217fed0] el0_svc_naked at ffffff99a668462c
    
    crash_ARM64> bt 2766
    PID: 2766   TASK: ffffffd6e2180000  CPU: 4   COMMAND: "sdcard"
     #0 [ffffffd6e218bbf0] __switch_to at ffffff99a6685560
     #1 [ffffffd6e218bc20] __schedule at ffffff99a74df9f0
     #2 [ffffffd6e218bc80] schedule at ffffff99a74dfd54
     #3 [ffffffd6e218bca0] fuse_dev_do_read at ffffff99a68c1f50
     #4 [ffffffd6e218bd50] fuse_dev_read at ffffff99a68c2568
     #5 [ffffffd6e218bdb0] __vfs_read at ffffff99a67b48d8
     #6 [ffffffd6e218be30] vfs_read at ffffff99a67b5038
     #7 [ffffffd6e218be70] sys_read at ffffff99a67b5888
     #8 [ffffffd6e218bed0] el0_svc_naked at ffffff99a668462c
    
    crash_ARM64> bt 2767
    PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
     #0 [ffffffd6e218fac0] __switch_to at ffffff99a6685560
     #1 [ffffffd6e218faf0] __schedule at ffffff99a74df9f0
     #2 [ffffffd6e218fb50] schedule at ffffff99a74dfd54
     #3 [ffffffd6e218fb70] schedule_preempt_disabled at ffffff99a74e009c
     #4 [ffffffd6e218fb90] __mutex_lock_slowpath at ffffff99a74e15d4
     #5 [ffffffd6e218fbf0] mutex_lock at ffffff99a74e1664
     #6 [ffffffd6e218fc10] do_truncate at ffffff99a67b35bc
     #7 [ffffffd6e218fc80] path_openat at ffffff99a67c1938
     #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
     #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4
    #10 [ffffffd6e218feb0] sys_openat at ffffff99a67b4594
    #11 [ffffffd6e218fed0] el0_svc_naked at ffffff99a668462c

    它的3个工作线程2765 2766 2767中,前两个是在等待状态,而2767线程是处在被2124锁住的状态。

    2124线程在等这2767线程,2767线程又被2124线程给锁住,也就是死锁状态了!

    先看看2767的状态:

    crash_ARM64> bt 2767
    PID: 2767   TASK: ffffffd6e2180c80  CPU: 2   COMMAND: "sdcard"
     #0 [ffffffd6e218fac0] __switch_to at ffffff99a6685560
     #1 [ffffffd6e218faf0] __schedule at ffffff99a74df9f0
     #2 [ffffffd6e218fb50] schedule at ffffff99a74dfd54
     #3 [ffffffd6e218fb70] schedule_preempt_disabled at ffffff99a74e009c
     #4 [ffffffd6e218fb90] __mutex_lock_slowpath at ffffff99a74e15d4
     #5 [ffffffd6e218fbf0] mutex_lock at ffffff99a74e1664
     #6 [ffffffd6e218fc10] do_truncate at ffffff99a67b35bc
     #7 [ffffffd6e218fc80] path_openat at ffffff99a67c1938
     #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
     #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4
    #10 [ffffffd6e218feb0] sys_openat at ffffff99a67b4594
    #11 [ffffffd6e218fed0] el0_svc_naked at ffffff99a668462c

    从调用栈中可以看到吃锁的地方是do_truncate(),其源码为:

    int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, struct file *filp)
    {
    	...
    	mutex_lock(&dentry->d_inode->i_mutex);

    这里我们再来一次栈推导,算出dentry的值:

    首先,查看do_truncate的汇编代码:

    crash_ARM64> dis do_truncate
    0xffffff99a67b3554 <do_truncate>:       stp     x29, x30, [sp,#-112]!
    0xffffff99a67b3558 <do_truncate+4>:     mov     w4, #0xffffffea                 // #-22
    0xffffff99a67b355c <do_truncate+8>:     mov     x29, sp
    0xffffff99a67b3560 <do_truncate+12>:    stp     x19, x20, [sp,#16]
    0xffffff99a67b3564 <do_truncate+16>:    mov     x19, x0
    0xffffff99a67b3568 <do_truncate+20>:    tbnz    x1, #63, 0xffffff99a67b35e4
    0xffffff99a67b356c <do_truncate+24>:    orr     w0, w2, #0x8
    0xffffff99a67b3570 <do_truncate+28>:    str     x1, [x29,#48]
    0xffffff99a67b3574 <do_truncate+32>:    str     w0, [x29,#32]
    0xffffff99a67b3578 <do_truncate+36>:    cbz     x3, 0xffffff99a67b358c
    0xffffff99a67b357c <do_truncate+40>:    mov     w0, #0x2008                     // #8200
    0xffffff99a67b3580 <do_truncate+44>:    str     x3, [x29,#104]
    0xffffff99a67b3584 <do_truncate+48>:    orr     w2, w2, w0
    0xffffff99a67b3588 <do_truncate+52>:    str     w2, [x29,#32]
    0xffffff99a67b358c <do_truncate+56>:    mov     x0, x19
    0xffffff99a67b3590 <do_truncate+60>:    bl      0xffffff99a67cb704
    0xffffff99a67b3594 <do_truncate+64>:    cmp     w0, wzr
    0xffffff99a67b3598 <do_truncate+68>:    mov     w4, w0
    0xffffff99a67b359c <do_truncate+72>:    b.lt    0xffffff99a67b35e4
    0xffffff99a67b35a0 <do_truncate+76>:    b.eq    0xffffff99a67b35b4
    0xffffff99a67b35a4 <do_truncate+80>:    ldr     w1, [x29,#32]
    0xffffff99a67b35a8 <do_truncate+84>:    orr     w1, w1, #0x200
    0xffffff99a67b35ac <do_truncate+88>:    orr     w0, w1, w0
    0xffffff99a67b35b0 <do_truncate+92>:    str     w0, [x29,#32]
    0xffffff99a67b35b4 <do_truncate+96>:    ldr     x0, [x19,#48]
    0xffffff99a67b35b8 <do_truncate+100>:   add     x0, x0, #0xa8
    0xffffff99a67b35bc <do_truncate+104>:   bl      0xffffff99a74e163c
    ...

    第一个参数struct dentry的值x0,保存在x19寄存器中。

    0xffffff99a67b35bc行中mutex_lock函数之前x19一直没变过。

    因此我们可以在mutex_lock的栈帧中找到这个x19。

    crash_ARM64> dis mutex_lock
    0xffffff99a74e163c <mutex_lock>:        stp     x29, x30, [sp,#-32]!
    0xffffff99a74e1640 <mutex_lock+4>:      mov     x29, sp
    0xffffff99a74e1644 <mutex_lock+8>:      str     x19, [sp,#16]
    ...

    mutex_lock栈帧地址是0xffffffd6e218fbf0,因此x19是保存在0xffffffd6e218fbf0+0x10=0xffffffd6e218fc00这个地址上。

    crash_ARM64> rd 0xffffffd6e218fc00
    ffffffd6e218fc00:  ffffffd7017056c0
    
    crash_ARM64> struct dentry ffffffd7017056c0
    struct dentry {
      d_flags = 4718784, 
      ...
      d_parent = 0xffffffd6aac46e40, 
      d_name = {
        name = 0xffffffd7017056f8 "test1001"
      }, 
      d_inode = 0xffffffd6948f3fe8, 
      ...

    也就是说2767线程在open并truncate名为test1001的文件时,被该文件的inode的mutex给锁住了。

    通过parent可以查找上一级的dentry以及文件名,也能推出全路径:

    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac46e40
      d_parent = 0xffffffd6aac46540
      d_name.name = 0xffffffd6aac46e78 "speedMeasure"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac46540
      d_parent = 0xffffffd6aac466c0
      d_name.name = 0xffffffd6aac46578 "files"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac466c0
      d_parent = 0xffffffd75d5a0000
      d_name.name = 0xffffffd6aac466f8 "com.tencent.karaoke"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75d5a0000
      d_parent = 0xffffffd6473103c0
      d_name.name = 0xffffffd75d5a0038 "data"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6473103c0
      d_parent = 0xffffffd771750cc0
      d_name.name = 0xffffffd6473103f8 "Android"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd771750cc0
      d_parent = 0xffffffd7715c8a80
      d_name.name = 0xffffffd771750cf8 "0"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd7715c8a80
      d_parent = 0xffffffd6743d2180
      d_name.name = 0xffffffd7715c8ab8 "media"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6743d2180
      d_parent = 0xffffffd6743d2180
      d_name.name = 0xffffffd6743d21b8 "/"

    完整的路径为:“/media/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001

    其实也可以通过调用栈中do_sys_open函数和do_filp_open函数的栈推导也能推出这个文件名:

     #8 [ffffffd6e218fd50] do_filp_open at ffffff99a67c26f8
     #9 [ffffffd6e218fe50] do_sys_open at ffffff99a67b44c4

    其中do_filp_open的定义如下:

    crash_ARM64> p do_filp_open
    do_filp_open = $31 = 
     {struct file *(int, struct filename *, const struct open_flags *)} 0xffffff99a67c26c0

    我们需要知道do_filp_open的第二个参数struct filename指针,也就是说x1的值。

    为此我们查找它上一级函数do_sys_open在调用do_filp_open前的寄存器,特别是x1的赋值的状态:

    crash_ARM64> dis ffffff99a67b44b4 5
    0xffffff99a67b44b4 <do_sys_open+336>:   tbnz    w21, #31, 0xffffff99a67b453c
    0xffffff99a67b44b8 <do_sys_open+340>:   mov     w0, w20
    0xffffff99a67b44bc <do_sys_open+344>:   mov     x1, x19
    0xffffff99a67b44c0 <do_sys_open+348>:   add     x2, x29, #0x48
    0xffffff99a67b44c4 <do_sys_open+352>:   bl      0xffffff99a67c26c0

    x1是从x19赋值的,因此我们在do_filp_open的栈帧中查找x19的值就可以了。

    首先看下do_filp_open的汇编代码,看它是将x19保存在栈帧的哪个位置:

    crash_ARM64> dis do_filp_open
    0xffffff99a67c26c0 <do_filp_open>:      stp     x29, x30, [sp,#-256]!
    0xffffff99a67c26c4 <do_filp_open+4>:    mov     w4, w0
    0xffffff99a67c26c8 <do_filp_open+8>:    mov     x3, x1
    0xffffff99a67c26cc <do_filp_open+12>:   mov     x29, sp
    0xffffff99a67c26d0 <do_filp_open+16>:   stp     x19, x20, [sp,#16]
    ...

    也是保存在栈帧起始位置+16的地方,也就是0xffffffd6e218fd50+0x10=0xffffffd6e218fd60

    crash_ARM64> rd 0xffffffd6e218fd60
    ffffffd6e218fd60:  ffffffd680d36000

    因此,x19也就是struct filename指针的值为ffffffd680d36000:

    crash_ARM64> struct filename ffffffd680d36000
    struct filename {
      name = 0xffffffd680d3601c "/data/media/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001", 
      uptr = 0x7f81401380 <Address 0x7f81401380 out of bounds>, 
      aname = 0x0, 
      refcnt = 1, 
      iname = 0xffffffd680d3601c "/data/media/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001"
    }

    跟前面的结论差不多,唯一不同的是这里多了/data目录,这里就不深究了(我也不清楚为什么,可能是命名空间或者进程工作目录之类的原因吧)。

    反正可以确定的是,当前sdcard的工作线程2767是在open sdcard上的com.tencent.karaoke这个应用的某个文件。

    而跟它产生死锁的com.tencent.karaoke应用的2124线程,确是在write文件,而且当前的fuse_req是pending状态。

    因此可以推测com.tencent.karaoke应用进程中还有一个线程在打开test1001这个文件,并且用了O_TRUNC这个flag(要不然open不会调用do_truncate)。

    为了证明这个,先看一下com.tencent.karaoke应用的所有线程的状态:

    crash_ARM64> ps -g 2124
    PID: 1651   TASK: ffffffd728b8b200  CPU: 2   COMMAND: "tencent.karaoke"
      PID: 1119   TASK: ffffffd6ae10f080  CPU: 1   COMMAND: "Binder:1651_5"
      PID: 1657   TASK: ffffffd6e942f080  CPU: 0   COMMAND: "Jit thread pool"
      PID: 1658   TASK: ffffffd728b88000  CPU: 4   COMMAND: "Signal Catcher"
      PID: 1659   TASK: ffffffd728b89900  CPU: 0   COMMAND: "JDWP"
      PID: 1660   TASK: ffffffd728b8be80  CPU: 0   COMMAND: "ReferenceQueueD"
      PID: 1661   TASK: ffffffd64894a580  CPU: 0   COMMAND: "FinalizerDaemon"
      PID: 1662   TASK: ffffffd64fbe1900  CPU: 0   COMMAND: "FinalizerWatchd"
      PID: 1663   TASK: ffffffd64894b200  CPU: 2   COMMAND: "HeapTaskDaemon"
      PID: 1664   TASK: ffffffd64894f080  CPU: 4   COMMAND: "Binder:1651_1"
      PID: 1665   TASK: ffffffd627caa580  CPU: 4   COMMAND: "Binder:1651_2"
      PID: 1667   TASK: ffffffd64894e400  CPU: 4   COMMAND: "FileObserver"
      PID: 1673   TASK: ffffffd627ca9900  CPU: 4   COMMAND: "Profile Saver"
      PID: 1680   TASK: ffffffd64fbe6400  CPU: 4   COMMAND: "Binder:1651_3"
      PID: 1690   TASK: ffffffd66d819900  CPU: 1   COMMAND: "ent.File.Tracer"
      PID: 1693   TASK: ffffffd66d818000  CPU: 0   COMMAND: "Binder:1651_4"
      PID: 1707   TASK: ffffffd6e9428000  CPU: 1   COMMAND: "pool-1-thread-1"
      PID: 1708   TASK: ffffffd6e942be80  CPU: 0   COMMAND: ".Event.Notifier"
      PID: 1709   TASK: ffffffd627caf080  CPU: 0   COMMAND: "Service.Invoker"
      PID: 1710   TASK: ffffffd627cfcb00  CPU: 0   COMMAND: "Timeout.Monitor"
      PID: 1711   TASK: ffffffd627cff080  CPU: 2   COMMAND: "ccess.Collector"
      PID: 1712   TASK: ffffffd627cfd780  CPU: 1   COMMAND: "pool-1-thread-2"
      PID: 1713   TASK: ffffffd6b6e45780  CPU: 5   COMMAND: "priority-thread"
      PID: 1730   TASK: ffffffd70ea54b00  CPU: 5   COMMAND: "le.common.b$1$1"
      PID: 1731   TASK: ffffffd632759900  CPU: 6   COMMAND: "le.common.b$1$1"
      PID: 1732   TASK: ffffffd63275cb00  CPU: 0   COMMAND: "TbsHandlerThrea"
      PID: 1734   TASK: ffffffd64fbe2580  CPU: 0   COMMAND: "SDKStarter"
      PID: 1735   TASK: ffffffd63275e400  CPU: 0   COMMAND: "NetworkWatcher"
      PID: 1745   TASK: ffffffd6d7fbf080  CPU: 6   COMMAND: "priority-thread"
      PID: 1747   TASK: ffffffd705b4d780  CPU: 0   COMMAND: "priority-thread"
      PID: 1749   TASK: ffffffd705b4f080  CPU: 5   COMMAND: "TcmReceiver"
      PID: 1794   TASK: ffffffd6e20aa580  CPU: 0   COMMAND: "Okio Watchdog"
      PID: 1804   TASK: ffffffd648948c80  CPU: 0   COMMAND: "YunYingReporter"
      PID: 1805   TASK: ffffffd728b8e400  CPU: 1   COMMAND: "ReporterMachine"
      PID: 1810   TASK: ffffffd6d7fbe400  CPU: 1   COMMAND: "business-defaul"
      PID: 1811   TASK: ffffffd6cd39be80  CPU: 4   COMMAND: "priority-thread"
      PID: 1812   TASK: ffffffd683c81900  CPU: 4   COMMAND: "tencent.karaoke"
      PID: 1818   TASK: ffffffd64fbe3e80  CPU: 2   COMMAND: "pool-2-thread-1"
      PID: 1830   TASK: ffffffd648948000  CPU: 5   COMMAND: "RenderThread"
      PID: 1831   TASK: ffffffd6d20f8c80  CPU: 0   COMMAND: "ConnectivityThr"
      PID: 1833   TASK: ffffffd61bfba580  CPU: 5   COMMAND: "resolver_thread"
      PID: 1842   TASK: ffffffd6d7fb8c80  CPU: 0   COMMAND: "CheckDropFrame"
      PID: 1850   TASK: ffffffd6c6bdcb00  CPU: 7   COMMAND: "hwuiTask1"
      PID: 1856   TASK: ffffffd6b6d6d780  CPU: 7   COMMAND: "business-defaul"
      PID: 1858   TASK: ffffffd633b20000  CPU: 7   COMMAND: "pool-3-thread-1"
      PID: 1862   TASK: ffffffd71ba99900  CPU: 2   COMMAND: "pool-4-thread-1"
      PID: 1863   TASK: ffffffd729b89900  CPU: 2   COMMAND: "pool-4-thread-2"
      PID: 1864   TASK: ffffffd7071de400  CPU: 0   COMMAND: "pool-4-thread-3"
      PID: 1869   TASK: ffffffd6c3f6a580  CPU: 1   COMMAND: "TcmReceiver"
      PID: 1878   TASK: ffffffd6d1c48000  CPU: 0   COMMAND: "pool-1-thread-3"
      PID: 1903   TASK: ffffffd6cfba6400  CPU: 0   COMMAND: "ent.File.Tracer"
      PID: 1914   TASK: ffffffd66a407080  CPU: 5   COMMAND: "pool-5-thread-1"
      PID: 1915   TASK: ffffffd66a405780  CPU: 0   COMMAND: "MidService"
      PID: 1920   TASK: ffffffd6d1c49900  CPU: 0   COMMAND: "t.handlerthread"
      PID: 1921   TASK: ffffffd67358f080  CPU: 1   COMMAND: "SDK_SUB"
      PID: 1923   TASK: ffffffd62cbe3200  CPU: 7   COMMAND: "pool-8-thread-1"
      PID: 1925   TASK: ffffffd6373ab200  CPU: 7   COMMAND: "pool-6-thread-1"
      PID: 1969   TASK: ffffffd648f03200  CPU: 0   COMMAND: "HttpManager"
      PID: 1988   TASK: ffffffd71619be80  CPU: 7   COMMAND: "pool-9-thread-1"
      PID: 2102   TASK: ffffffd690635780  CPU: 1   COMMAND: "SenderManager"
      PID: 2107   TASK: ffffffd75ef54b00  CPU: 1   COMMAND: "PackageProcesso"
      PID: 2120   TASK: ffffffd75ef50c80  CPU: 2   COMMAND: "wnloader.impl.c"
      PID: 2122   TASK: ffffffd690630c80  CPU: 0   COMMAND: "resolver_thread"
      PID: 2124   TASK: ffffffd771be3e80  CPU: 0   COMMAND: "wnloader.impl.c"
      PID: 2125   TASK: ffffffd690636400  CPU: 1   COMMAND: "resolver_thread"
      PID: 2126   TASK: ffffffd68ac03200  CPU: 2   COMMAND: "pool-2-thread-2"
      PID: 2128   TASK: ffffffd6becd5780  CPU: 4   COMMAND: "le.common.b$1$1"
      PID: 2134   TASK: ffffffd6cfacb200  CPU: 0   COMMAND: "karaoke_image_d"
      PID: 2135   TASK: ffffffd737c43e80  CPU: 2   COMMAND: "hwuiTask2"
      PID: 2247   TASK: ffffffd75b30be80  CPU: 4   COMMAND: "database-thread"
      PID: 2282   TASK: ffffffd6e5e05780  CPU: 2   COMMAND: "database-thread"
      PID: 2395   TASK: ffffffd7059a0c80  CPU: 0   COMMAND: "service_process"
      PID: 2684   TASK: ffffffd6a39fbe80  CPU: 2   COMMAND: "Wns.File.Tracer"
      PID: 2719   TASK: ffffffd6a39f8c80  CPU: 3   COMMAND: "pool-11-thread-"
      PID: 2725   TASK: ffffffd66a403200  CPU: 6   COMMAND: "RenderThread"
      PID: 2726   TASK: ffffffd66a404b00  CPU: 6   COMMAND: "RenderThread"
      PID: 2727   TASK: ffffffd66a402580  CPU: 7   COMMAND: "RenderThread"
      PID: 2728   TASK: ffffffd6ba9ccb00  CPU: 6   COMMAND: "RenderThread"
      PID: 2732   TASK: ffffffd6ca225780  CPU: 6   COMMAND: "RenderThread"
      PID: 2733   TASK: ffffffd76a711900  CPU: 6   COMMAND: "RenderThread"
      PID: 2743   TASK: ffffffd65bb73e80  CPU: 6   COMMAND: "RenderThread"
      PID: 2744   TASK: ffffffd6dff60000  CPU: 3   COMMAND: "RenderThread"
      PID: 4224   TASK: ffffffd7035dcb00  CPU: 0   COMMAND: "NanoHttpd Main "
      PID: 4228   TASK: ffffffd637fae400  CPU: 1   COMMAND: "delete-song-cac"
      PID: 4328   TASK: ffffffd6e52ebe80  CPU: 1   COMMAND: "business-extra-"
      PID: 6490   TASK: ffffffd6d86dcb00  CPU: 4   COMMAND: "TaskSchedulerSe"
      PID: 6491   TASK: ffffffd6d86df080  CPU: 2   COMMAND: "TaskSchedulerBa"
      PID: 6492   TASK: ffffffd6d86dbe80  CPU: 2   COMMAND: "TaskSchedulerBa"
      PID: 6493   TASK: ffffffd6d86dd780  CPU: 2   COMMAND: "TaskSchedulerFo"
      PID: 6494   TASK: ffffffd63df65780  CPU: 2   COMMAND: "TaskSchedulerFo"
      PID: 6495   TASK: ffffffd6ca224b00  CPU: 0   COMMAND: "Chrome_DBThread"
      PID: 6496   TASK: ffffffd6734a5780  CPU: 0   COMMAND: "Chrome_FileThre"
      PID: 6497   TASK: ffffffd6a7b96400  CPU: 1   COMMAND: "Chrome_FileUser"
      PID: 6498   TASK: ffffffd6a7b90000  CPU: 0   COMMAND: "Chrome_ProcessL"
      PID: 6499   TASK: ffffffd6a7b92580  CPU: 0   COMMAND: "Chrome_CacheThr"
      PID: 6500   TASK: ffffffd6a7b93200  CPU: 2   COMMAND: "Chrome_IOThread"
      PID: 6503   TASK: ffffffd6a7b94b00  CPU: 5   COMMAND: "IndexedDB"
      PID: 6504   TASK: ffffffd71b2b7080  CPU: 2   COMMAND: "Thread-45"
      PID: 6505   TASK: ffffffd72bd84b00  CPU: 1   COMMAND: "Thread-46"
      PID: 6508   TASK: ffffffd71b2b3e80  CPU: 6   COMMAND: "CleanupReferenc"
      PID: 6510   TASK: ffffffd71b2b1900  CPU: 2   COMMAND: "ConnectivityMan"
      PID: 6511   TASK: ffffffd71b2b6400  CPU: 0   COMMAND: "CookieMonsterCl"
      PID: 6512   TASK: ffffffd71b2b5780  CPU: 5   COMMAND: "CookieMonsterBa"
      PID: 6513   TASK: ffffffd72bd83e80  CPU: 2   COMMAND: "Chrome_InProcRe"
      PID: 6514   TASK: ffffffd681bf5780  CPU: 5   COMMAND: "Chrome_ChildIOT"
      PID: 6515   TASK: ffffffd6e2e25780  CPU: 7   COMMAND: "GpuMemoryThread"
      PID: 6520   TASK: ffffffd61b65e400  CPU: 1   COMMAND: "Compositor"
      PID: 6521   TASK: ffffffd61b65be80  CPU: 6   COMMAND: "Renderer::FILE"
      PID: 6522   TASK: ffffffd61b65f080  CPU: 6   COMMAND: "CompositorTileW"
      PID: 6523   TASK: ffffffd72bd81900  CPU: 5   COMMAND: "CompositorTileW"
      PID: 6524   TASK: ffffffd70e808c80  CPU: 1   COMMAND: "Chrome_InProcGp"
      PID: 6525   TASK: ffffffd6ae108000  CPU: 3   COMMAND: "SimpleCacheWork"
      PID: 6526   TASK: ffffffd71b2b3200  CPU: 3   COMMAND: "Chrome_ChildIOT"
      PID: 6529   TASK: ffffffd6ae109900  CPU: 3   COMMAND: "SimpleCacheWork"
      PID: 6546   TASK: ffffffd758750c80  CPU: 6   COMMAND: "HTMLParserThrea"
      PID: 6556   TASK: ffffffd6ae7f8000  CPU: 1   COMMAND: "ScriptStreamerT"
      PID: 8502   TASK: ffffffd76ed58000  CPU: 1   COMMAND: "KaraProxyPlayer"
      PID: 15621  TASK: ffffffd68116cb00  CPU: 5   COMMAND: "le.common.b$1$1"
      PID: 15689  TASK: ffffffd687cea580  CPU: 2   COMMAND: "resolver_thread"
      PID: 15822  TASK: ffffffd68ac11900  CPU: 2   COMMAND: "resolver_thread"
      PID: 28112  TASK: ffffffd603048c80  CPU: 7   COMMAND: "resolver_thread"

    (注:还有另外一种遍历方式crash_ARM64> list task_struct.thread_group -s task_struct.pid -h ffffffd728b8b200)

    挨个用bt命令查看,最终定位到:

    crash_ARM64> bt 2120
    PID: 2120 TASK: ffffffd75ef50c80 CPU: 2 COMMAND: "wnloader.impl.c"
    #0 [ffffffd6e08478f0] __switch_to at ffffff99a6685560
    #1 [ffffffd6e0847920] __schedule at ffffff99a74df9f0
    #2 [ffffffd6e0847980] schedule at ffffff99a74dfd54
    #3 [ffffffd6e08479a0] request_wait_answer at ffffff99a68c0350
    #4 [ffffffd6e0847a10] __fuse_request_send at ffffff99a68c2f60
    #5 [ffffffd6e0847a40] fuse_request_send at ffffff99a68c2fb8
    #6 [ffffffd6e0847a50] fuse_simple_request at ffffff99a68c3604
    #7 [ffffffd6e0847a80] fuse_send_open at ffffff99a68c64d8
    #8 [ffffffd6e0847b40] fuse_do_open at ffffff99a68c7d54
    #9 [ffffffd6e0847bb0] fuse_open_common at ffffff99a68c8010
    #10 [ffffffd6e0847bf0] fuse_open at ffffff99a68c8058
    #11 [ffffffd6e0847c00] do_dentry_open at ffffff99a67b309c
    #12 [ffffffd6e0847c50] vfs_open at ffffff99a67b4144
    #13 [ffffffd6e0847c80] path_openat at ffffff99a67c1870
    #14 [ffffffd6e0847d50] do_filp_open at ffffff99a67c26f8
    #15 [ffffffd6e0847e50] do_sys_open at ffffff99a67b44c4
    #16 [ffffffd6e0847eb0] compat_sys_openat at ffffff99a67fbb68

    通过前面的栈推导方式,我们可以得到这个文件名:

    crash_ARM64> struct filename ffffffd680d33000
    struct filename {
      name = 0xffffffd680d3301c "/storage/emulated/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001", 
      uptr = 0xd1dbe960 <Address 0xd1dbe960 out of bounds>, 
      aname = 0x0, 
      refcnt = 1, 
      iname = 0xffffffd680d3301c "/storage/emulated/0/Android/data/com.tencent.karaoke/files/speedMeasure/test1001"
    }

    可以看到,app和sdcard访问的路径前缀有点不同,原因是app是向fuse发请求,而sdcard是向ext4发请求,不过这两个请求都是同一个文件。

    再看一下这个open请求的flag。首先从do_filp_open的定义可以看到它的第三个参数就是我们要的struct open_flags指针。

    crash_ARM64> p do_filp_open
    do_filp_open = $33 = 
     {struct file *(int, struct filename *, const struct open_flags *)} 0xffffff99a67c26c0

    通过前面的栈推导方式可推导出这个值为ffffffd6e0847e98:

    crash_ARM64> struct -x open_flags ffffffd6e0847e98
    struct open_flags {
      open_flag = 0x20241, 
      mode = 0x81b6, 
      acc_mode = 0x22, 
      intent = 0x300, 
      lookup_flags = 0x1
    }

    将open_flag转换成8进制数:

    crash_ARM64> p /o 0x20241
    $9 = 0401101

    通过flag的宏定义:

    #define O_TRUNC 00001000
    #define O_CREAT 00000100
    #define O_WRONLY 00000001

    确定sdcard的工作线程2767正在处理的就是2120线程的open("test1001",O_WRONLY|O_CREAT|O_TRUNC)请求。

    而这个open请求在做do_truncate的时候被test1001这个文件的inode锁给锁住了,持这个锁的就是com.tencent.karaoke的2124线程。

    接下来看一下2124线程的状态。

    首先当然要查看是在哪个代码中持了test1001这个文件的inode锁,前面分析到这个mutex锁的地址为ffffffd6948f4090。

    这里有个通用的方法:从2124线程的栈里查找这个mutex的值ffffffd6948f4090。

    先通过bt命令查找2124的栈范围为ffffffd6d396b4b0~ffffffd6d396be70:

    crash_ARM64> bt 2124
    PID: 2124   TASK: ffffffd771be3e80  CPU: 0   COMMAND: "wnloader.impl.c"
     #0 [ffffffd6d396b4b0] __switch_to at ffffff99a6685560
     #1 [ffffffd6d396b4e0] __schedule at ffffff99a74df9f0
     #2 [ffffffd6d396b540] schedule at ffffff99a74dfd54
     #3 [ffffffd6d396b560] request_wait_answer at ffffff99a68c0350
     #4 [ffffffd6d396b5d0] __fuse_request_send at ffffff99a68c2f60
     #5 [ffffffd6d396b600] fuse_request_send at ffffff99a68c2fb8
     #6 [ffffffd6d396b610] fuse_send_readpages at ffffff99a68c8ef8
     #7 [ffffffd6d396b640] fuse_readpages at ffffff99a68c8fe8
     #8 [ffffffd6d396b6c0] __do_page_cache_readahead at ffffff99a676cfc0
     #9 [ffffffd6d396b780] filemap_fault at ffffff99a6762fc4
    #10 [ffffffd6d396b7f0] __do_fault at ffffff99a6789c38
    #11 [ffffffd6d396b860] handle_mm_fault at ffffff99a678d540
    #12 [ffffffd6d396b920] do_page_fault at ffffff99a6696f4c
    #13 [ffffffd6d396b990] do_mem_abort at ffffff99a6680ad8
    #14 [ffffffd6d396bb80] el1_da at ffffff99a6683cf8
         PC: ffffffd7017056c0  [unknown or invalid address]
         LR: ffffff99a695da6c  [iov_iter_fault_in_readable+64]
         SP: 0000000080000145  PSTATE: 00000000
        X29: ffffffd6d396bb70  X28: ffffff99a67619a0  X27: ffffffd6d396bb80
        X26: 0000000000001000  X25: ffffff99a7604000  X24: 0000000000000000
        X23: ffffff99a761b580  X22: ffffffd623366500  X21: 0000000000001000
        X20: ffffffd6948f4140  X19: 0000000000000000  X18: 0000000000000000
        X17: ffffffd6d396be08  X16: 0000000000000000  X15: 0000000000000000
        X14: ffffff99a668f4a0  X13: 0000000000000000  X12: 00000000ee04c905
        X11: 00000000d0bf8f50  X10: 0000000000000018   X9: 0101010101010101
         X8: 7f7f7f7f7f7f7f7f   X7: 722433ffff787368   X6: 7f7f7f7f7f7f7fff
         X5: 0000000000002800   X4: 0000008080000000   X3: ffffffd6d396bdd0
         X2: 0000000000002800   X1: 00000000d5d3e000   X0: 0000000000000000
    #15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
    #16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4
    #17 [ffffffd6d396bd00] fuse_passthrough_write_iter at ffffff99a68cd4d0
    #18 [ffffffd6d396bd50] fuse_file_write_iter at ffffff99a68ca1f4
    #19 [ffffffd6d396bdb0] __vfs_write at ffffff99a67b49a0
    #20 [ffffffd6d396be30] vfs_write at ffffff99a67b515c
    #21 [ffffffd6d396be70] sys_pwrite64 at ffffff99a67b5a1c

    再将栈里面的数据都打印出来:

    crash_ARM64> rd ffffffd6d396b4b0 -e ffffffd6d396be70
    ffffffd6d396b4b0:  ffffffd6d396b4e0 ffffff99a74df9f4   ..........M.....
    ffffffd6d396b4c0:  ffffffd771be3e80 ffffff99a74dfc3c   .>.q....<.M.....
    ffffffd6d396b4d0:  ffffffd771be3e80 ffffff99a8a15740   .>.q....@W......
    ffffffd6d396b4e0:  ffffffd6d396b540 ffffff99a74dfd58   @.......X.M.....
    ffffffd6d396b4f0:  ffffffd6d3968000 ffffffd66d943800   .........8.m....
    ffffffd6d396b500:  0000000000000000 ffffffd6db690dd0   ..........i.....
    ...
    ffffffd6d396bc00:  ffffffd6d396bc50 ffffff99a68201f8   P...............
    ffffffd6d396bc10:  ffffffd6948f3fe8 0000000000002800   .?.......(......
    ffffffd6d396bc20:  0000000000000000 ffffffd6d396bde0   ................
    ffffffd6d396bc30:  ffffffd6d396be08 ffffffd623366500   .........e6#....
    ffffffd6d396bc40:  ffffffd6948f4090 0000000000000000   .@..............
    ffffffd6d396bc50:  ffffffd6d396bd00 ffffff99a68cd4d4   ................
    ffffffd6d396bc60:  ffffffd623366500 fffffffffffffffb   .e6#............
    ffffffd6d396bc70:  ffffffd6891b9340 ffffffd6948f3fe8   @........?......
    ffffffd6d396bc80:  ffffffd7060c7100 ffffffd6d396bde0   .q..............
    ffffffd6d396bc90:  ffffffd66d943800 ffffffd6891b93e8   .8.m............
    ffffffd6d396bca0:  ffffff99a7604000 0000000000000000   .@`.............
    ffffffd6d396bcb0:  00000000d396bde0 ffffffd6891b9340   ........@.......
    ...

    mutex值在ffffffd6d396bc40这个地址上找到了,它是在__generic_file_write_iter的栈帧里。

    #15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
    #16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4

    那可以肯定是在__generic_file_write_iter之前就持锁了,并且很可能是ext4_file_write_iter中,查看其源码:

    static ssize_t
    ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
    {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(iocb->ki_filp);
        ...
        mutex_lock(&inode->i_mutex);
        ...
        ret = __generic_file_write_iter(iocb, from);
        mutex_unlock(&inode->i_mutex);

    通过栈推导,可以获得struct kiocb的值为0xffffffd6d396bde0:

    crash_ARM64> struct kiocb ffffffd6d396bde0
    struct kiocb {
      ki_filp = 0xffffffd623366500, 
      ki_pos = 0, 
      ki_complete = 0x0, 
      private = 0xffffffd6d396bcb4, 
      ki_flags = 0
    }

    其中,struct file为0xffffffd623366500:

    crash_ARM64> struct file 0xffffffd623366500
    struct file {
      ...
      f_path = {
        mnt = 0xffffffd776661b20, 
        dentry = 0xffffffd7017056c0
      }, 
      f_inode = 0xffffffd6948f3fe8,
    }

    对应的struct dentry为0xffffffd7017056c0:

    crash_ARM64> struct dentry ffffffd7017056c0
    struct dentry {
      d_parent = 0xffffffd6aac46e40, 
      d_name = {
        name = 0xffffffd7017056f8 "test1001"
      }, 
      d_inode = 0xffffffd6948f3fe8, 
      ...

    这个和sdcard工作线程2767操作的是同一个inode。因此可以确定就是ext4_file_write_iter中持了test1001文件的inode锁。

    这里有个疑问,app的write请求应该通过fuse系统给fuse发请求,但这里却直接拿着ext4对应inode操作了ext4。

    原因是fuse中的一个叫passthrough的一个优化,思路是在应用进程中获取文件的ext4对应的inode后直接对其做读写操作。

    static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb,
                            struct iov_iter *iter, int do_write)
    {
        ssize_t ret_val;
        struct fuse_file *ff;
        struct file *fuse_file, *passthrough_filp;
        struct inode *fuse_inode, *passthrough_inode;
        struct fuse_conn *fc;
    
        ff = iocb->ki_filp->private_data;         /*从fused对应的file中获取private_data*/
        fuse_file = iocb->ki_filp;                /*这里保存原先的fuse对应的file*/
        passthrough_filp = ff->passthrough_filp;  /*这个是open时fuse系统返回的ext4对应的file*/
        fc = ff->fc;
    
        get_file(passthrough_filp);
        iocb->ki_filp = passthrough_filp;         /*这里将原有的fuse对应的file替换成ext4对应的file*/
        fuse_inode = fuse_file->f_path.dentry->d_inode;
        passthrough_inode = file_inode(passthrough_filp);
    
        if (do_write) {
            if (!passthrough_filp->f_op->write_iter)
                return -EIO;
            ret_val = passthrough_filp->f_op->write_iter(iocb, iter);   /*这里调用ext4的file_opration*/

    open时的具体代码如下:

    int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir)
    {
        struct fuse_file *ff;
        struct file *passthrough_filp = NULL;
        int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
    
        ff = fuse_file_alloc(fc);
    
        ff->fh = 0;
        ff->open_flags = FOPEN_KEEP_CACHE;
        if (!fc->no_open || isdir) {
            struct fuse_open_out outarg;
            int err;
    
            err = fuse_send_open(fc, nodeid, file, opcode, &outarg, &(passthrough_filp)); /*fuse返回ext4的file*/
            if (!err) {
                ff->fh = outarg.fh;
                ff->open_flags = outarg.open_flags;
                ff->passthrough_filp = passthrough_filp;   /*将ext4的file保存在ff->passthrough_filp中*/
        ...
    
        file->private_data = fuse_file_get(ff);   /*将ff保存在fuse对应的file的pritvate_data中*/
        return 0;
    }

    这样可以跳过fuse deamon的过度从而增加读写效率,但这种优化导致app与sdcard产生竞争。

    一般来说这种竞争是允许的,大不了在mutex锁中等待就是了。

    而本例的特点是ext4_file_write_iter执行时触发了pagefault,接着触发了fuse的read请求。

    此时因为sdcard的工作线程2767已经被ext4对应的inode的锁给block了,所以请求被block。

    这里有两个疑问:

    1、sdcard工作线程有3个,其他两个工作线程处于待命状态,为什么不会处理这个read请求。

    2、为什么会触发pagefault,而且这个pagefault又会触发fuse的read请求。

    第一个疑问:sdcard有三个工作线程:

    2001            || pthread_create(&thread_default, NULL, start_handler, &handler_default)
    2002            || pthread_create(&thread_read, NULL, start_handler, &handler_read)
    2003            || pthread_create(&thread_write, NULL, start_handler, &handler_write)) {

     这几个工作线程名字起的不好,很容易让人产生误解。

    我之前以为thread_default线程处理open、close这种请求,thread_read线程处理read请求,thread_write线程处理write请求。

    所以对2124的调用栈非常困惑。为什么刚开始是write,之后又变成read、之后又再等待thread_write线程。

    后来跟同事沟通才发现这三个线程可以处理所有类型的请求,只不过每一个app只会对应一个特定的工作线程。

    这个是在app启动时,根据media和sdcard的访问权限的设定去匹配对应的工作线程的。

            mountServiceInternal.addExternalStoragePolicy(
                    new MountServiceInternal.ExternalStorageMountPolicy() {
                @Override
                public int getMountMode(int uid, String packageName) {
                    if (Process.isIsolated(uid)) {
                        return Zygote.MOUNT_EXTERNAL_NONE;
                    }
                    if (checkUidPermission(WRITE_MEDIA_STORAGE, uid) == PERMISSION_GRANTED) {
                        return Zygote.MOUNT_EXTERNAL_DEFAULT;
                    }
                    if (checkUidPermission(READ_EXTERNAL_STORAGE, uid) == PERMISSION_DENIED) {
                        return Zygote.MOUNT_EXTERNAL_DEFAULT;
                    }
                    if (checkUidPermission(WRITE_EXTERNAL_STORAGE, uid) == PERMISSION_DENIED) {
                        return Zygote.MOUNT_EXTERNAL_READ;
                    }
                    return Zygote.MOUNT_EXTERNAL_WRITE;
                }

    所以com.tencent.karaoke进程的2120线程的open请求被block,同进程的2124线程被pending也是正常的。

    第二个疑问:

    为什么write操作会触发pagefault,且这个pagefault引发read请求。

    从调用栈中可以看到:

    #13 [ffffffd6d396b990] do_mem_abort at ffffff99a6680ad8
    #14 [ffffffd6d396bb80] el1_da at ffffff99a6683cf8
         PC: ffffffd7017056c0  [unknown or invalid address]
         LR: ffffff99a695da6c  [iov_iter_fault_in_readable+64]
         ...
    #15 [ffffffd6d396bc00] __generic_file_write_iter at ffffff99a6762af8
    #16 [ffffffd6d396bc50] ext4_file_write_iter at ffffff99a68201f4

    这里__generic_file_write_iter函数在调用iov_iter_fault_in_readable函数时触发的pagefault。

    查看源码:

    ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
    {
        struct file *file = iocb->ki_filp;
        struct address_space * mapping = file->f_mapping;
        struct inode     *inode = mapping->host;
        ...
        if (iocb->ki_flags & IOCB_DIRECT) {
            ...
            status = generic_perform_write(file, from, pos = iocb->ki_pos);
    
    
    ssize_t generic_perform_write(struct file *file, struct iov_iter *i, loff_t pos)
    {
        struct address_space *mapping = file->f_mapping;
        const struct address_space_operations *a_ops = mapping->a_ops;
        long status = 0;
        ssize_t written = 0;
        unsigned int flags = 0;
        ...
        do {
            struct page *page;
            unsigned long offset;    /* Offset into pagecache page */
            unsigned long bytes;    /* Bytes to write to page */
            size_t copied;        /* Bytes copied from user */
            void *fsdata;
    
            offset = (pos & (PAGE_CACHE_SIZE - 1));
            bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, iov_iter_count(i));
    
    again:
            ...
            if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
                status = -EFAULT;
                break;
            }
    
            if (fatal_signal_pending(current)) {
                status = -EINTR;
                break;
            }
    
            status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata);
    
            if (mapping_writably_mapped(mapping))
                flush_dcache_page(page);
    
            copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
            flush_dcache_page(page);
    
            status = a_ops->write_end(file, mapping, pos, bytes, copied,page, fsdata);
            if (unlikely(status < 0))
                break;
            copied = status;
    
            cond_resched();
    
            iov_iter_advance(i, copied);
            ...
            }
            pos += copied;
            written += copied;
    
            balance_dirty_pages_ratelimited(mapping);
        } while (iov_iter_count(i));
    
        return written ? written : status;
    }
    EXPORT_SYMBOL(generic_perform_write);

    这里产生pagefault的是iov_iter_fault_in_readable调用,它的参数是i,而这个参数__generic_file_write_iter函数的from这个参数。

    也就是说这里产生pagefault的是from指向的buffer,

    write的一般调用是write(fd,buf,flag),from其实就是这里的buf。

    而为什么这个bufffer的pagefault会调用fuse的read请求呢?唯一的解释就是这个buffer其实是sdcard中的文件被mmap到当前进程中的。

    如何证明呢?我们看到fuse_readpages的参数中有struct file指针:

    crash_ARM64> p fuse_readpages
    fuse_readpages = $36 = 
     {int (struct file *, struct address_space *, struct list_head *, unsigned int)} 0xffffff99a68c8f24

    因此通过栈推导可以找出这个struct file指针为ffffffd7060c6800:

    crash_ARM64> struct file ffffffd7060c6800
    struct file {
      ...
      f_path = {
        mnt = 0xffffffd648f13820, 
        dentry = 0xffffffd7017a1900
      }, 
      ...

    通过前面的方法可以得到完整路径:

    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd7017a1900
    d_parent = 0xffffffd75d695600
    d_name.name = 0xffffffd7017a1938 "-111111870"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75d695600
    d_parent = 0xffffffd77179ec00
    d_name.name = 0xffffffd75d695638 "tmp"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd77179ec00
    d_parent = 0xffffffd6470033c0
    d_name.name = 0xffffffd77179ec38 "file"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6470033c0
    d_parent = 0xffffffd6aac460c0
    d_name.name = 0xffffffd6470033f8 "cache"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd6aac460c0
    d_parent = 0xffffffd759bc1780
    d_name.name = 0xffffffd6aac460f8 "com.tencent.karaoke"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd759bc1780
    d_parent = 0xffffffd759bc1600
    d_name.name = 0xffffffd759bc17b8 "data"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd759bc1600
    d_parent = 0xffffffd75995b780
    d_name.name = 0xffffffd759bc1638 "Android"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75995b780
    d_parent = 0xffffffd75d6869c0
    d_name.name = 0xffffffd75995b7b8 "0"
    crash_ARM64> struct dentry.d_parent,d_name.name 0xffffffd75d6869c0
    d_parent = 0xffffffd75d6869c0
    d_name.name = 0xffffffd75d6869f8 "/"

    完整路径为/0/Android/data/com.tencent.karaoke/cache/file/tmp/-111111870

    问题清楚了,com.tencent.karaoke应用的2120线程在

    open("test1001",O_WRONLY|O_CREAT|O_TRUNC)

    同时2124线程在

    fd = open("test1001")
    addr = mmap("-111111870")
    write(fd,addr)

    原理已经清楚了,接下来按照上面的条件写测试程序:

    static void* fuse_test1(void* data)
    {
        int fd = -1;    
    
        while (1) {
            fd = open("/sdcard/test2.txt", O_WRONLY|O_CREAT|O_TRUNC );
            close(fd);
        }
        return NULL;
    }
    
    int main(int argc, char** argv)
    {
        int ret = 0;
        int fd1 = -1;
        int fd2 = -1;
        pthread_t thread1;
        unsigned char * base = NULL;
        fd1 = open("/sdcard/test1.txt", O_RDWR);
        fd2 = open("/sdcard/test2.txt", O_RDWR);
        pthread_create(&thread1, NULL, fuse_test1, NULL);
        while(1) {
            base = (unsigned char *)mmap(0, 4096*50000,PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0);
            ret = write(fd2, base, 4096*50000);
            munmap(base, 4096*50000);
            usleep(1000);
        }
    
        return 0;
    }

    测试程序push到手机后必现问题,调用栈如下:

    # ps -t 4225
    USER      PID   PPID  VSIZE  RSS   WCHAN              PC  NAME
    root      4225  4169  210128 1676  request_wa 7f9d038358 S fusetest
    root      4226  4225  210128 1676  request_wa 7f9d037338 S fusetest
    
    # cat proc/4225/stack
    [<0000000000000000>] __switch_to+0x98/0xa4
    [<0000000000000000>] request_wait_answer+0x64/0x248
    [<0000000000000000>] __fuse_request_send+0x84/0x98
    [<0000000000000000>] fuse_request_send+0x44/0x4c
    [<0000000000000000>] fuse_send_readpages+0x90/0xb8
    [<0000000000000000>] fuse_readpages+0xc8/0xf0
    [<0000000000000000>] __do_page_cache_readahead+0x144/0x210
    [<0000000000000000>] ondemand_readahead+0x214/0x230
    [<0000000000000000>] page_cache_async_readahead+0xc8/0xdc
    [<0000000000000000>] filemap_fault+0xc4/0x418
    [<0000000000000000>] __do_fault+0x3c/0x8c
    [<0000000000000000>] handle_mm_fault+0x648/0x10b4
    [<0000000000000000>] do_page_fault+0x168/0x30c
    [<0000000000000000>] do_mem_abort+0x40/0x9c
    [<0000000000000000>] el1_da+0x18/0x78
    [<0000000000000000>] __generic_file_write_iter+0xcc/0x16c
    [<0000000000000000>] ext4_file_write_iter+0x230/0x2f4
    [<0000000000000000>] fuse_passthrough_write_iter+0x68/0xec
    [<0000000000000000>] fuse_file_write_iter+0x11c/0x210
    [<0000000000000000>] __vfs_write+0xa0/0xd0
    [<0000000000000000>] vfs_write+0xac/0x144
    [<0000000000000000>] SyS_write+0x48/0x84
    [<0000000000000000>] el0_svc_naked+0x24/0x28
    [<0000000000000000>] 0xffffffffffffffff
    
    # cat /proc/4226/stack
    [<0000000000000000>] __switch_to+0x98/0xa4
    [<0000000000000000>] request_wait_answer+0x64/0x248
    [<0000000000000000>] __fuse_request_send+0x84/0x98
    [<0000000000000000>] fuse_request_send+0x44/0x4c
    [<0000000000000000>] fuse_simple_request+0x120/0x170
    [<0000000000000000>] fuse_send_open+0x9c/0xb8
    [<0000000000000000>] fuse_do_open+0x80/0x100
    [<0000000000000000>] fuse_open_common+0x7c/0xb4
    [<0000000000000000>] fuse_open+0x10/0x18
    [<0000000000000000>] do_dentry_open+0x1d0/0x2ec
    [<0000000000000000>] vfs_open+0x6c/0x78
    [<0000000000000000>] path_openat+0x948/0xbd4
    [<0000000000000000>] do_filp_open+0x3c/0x84
    [<0000000000000000>] do_sys_open+0x164/0x1fc
    [<0000000000000000>] SyS_openat+0x10/0x18
    [<0000000000000000>] el0_svc_naked+0x24/0x28
    [<0000000000000000>] 0xffffffffffffffff
    
    # ps -t 2086
    USER      PID   PPID  VSIZE  RSS   WCHAN              PC  NAME
    media_rw  2086  569   15460  2652  wait_woken 7fb3d1bd70 S /system/bin/sdcard
    media_rw  2099  2086  15460  2652   pipe_wait 7fb3d1bd70 S sdcard
    media_rw  2100  2086  15460  2652  do_truncat 7fb3d1b338 D sdcard
    media_rw  2101  2086  15460  2652  fuse_dev_d 7fb3d1bd70 S sdcard
    media_rw  2102  2086  15460  2652  fuse_dev_d 7fb3d1bd70 S sdcard
                                                                                                                                                                               
    # cat /proc/2100/stack
    [<0000000000000000>] __switch_to+0x98/0xa4
    [<0000000000000000>] do_truncate+0x6c/0xa0
    [<0000000000000000>] path_openat+0xa10/0xbd4
    [<0000000000000000>] do_filp_open+0x3c/0x84
    [<0000000000000000>] do_sys_open+0x164/0x1fc
    [<0000000000000000>] SyS_openat+0x10/0x18
    [<0000000000000000>] el0_svc_naked+0x24/0x28
    [<0000000000000000>] 0xffffffffffffffff

    相关流程如下:

    解决方案:

    出问题原因是fuse的passthrough流程中先持inode锁再去访问了buf地址产生pagefault导致了死锁。

    如果在持锁前先访问buf就能避免出现死锁,也就能避免这种死锁:

    static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
    {
        ...
    
        if (ff && ff->passthrough_enabled && ff->passthrough_filp) {
    +        size_t bytes = iov_iter_count(from);
    +        /*
    +         * page fault before fuse_passthrough_write_iter.
    +         * Otherwise there is a deadlock to send a fuse req to fuse deamon.
    +         * as it hold the inode->i_mutex
    +         */
    +        iov_iter_fault_in_multipages_readable(from, bytes);
            written = fuse_passthrough_write_iter(iocb, from);
            goto out;
        }
  • 相关阅读:
    方法要求ref object参数, 如果传递double值?
    注册表获取exe位置+ Process类启动exe+参数让exe打开指定文件
    ArcEngine 实现SurfaceAnalysis中的Cut/Fill功能
    ArcEngine 3D extension has not been enabled .
    ArcEngine 实现 SurfaceAnalysis的Area/Volumn功能
    安装部署程序,将安装目录写入注册表
    About Death _Island
    73 Left(Lake DT) . Neophocaena phoconoides(江豚) 20120419
    判断是否安装客户端,没有安装则进行下载
    ArcEngine 没有Esri.ArcGis.GeoAnalyst 命名空间
  • 原文地址:https://www.cnblogs.com/YYPapa/p/7152867.html
Copyright © 2020-2023  润新知