• gdb调试分析多线程死锁


    转载:

    http://blog.chinaunix.net/uid-30343738-id-5757210.html

    #include <stdio.h>
    #include <pthread.h>
    #include <stdlib.h>
    #include <unistd.h>
    
    static int sequence1 = 0;
    static int sequence2 = 0;
    
    pthread_mutex_t lock1;
    pthread_mutex_t lock2;
    
    int func1()
    {
        pthread_mutex_lock(&lock1); 
        ++sequence1; 
        sleep(1); 
        pthread_mutex_lock(&lock2); 
        ++sequence2; 
        pthread_mutex_unlock(&lock2); 
        pthread_mutex_unlock(&lock1); 
    
        return sequence1; 
    }
    
    int func2()
    {
        pthread_mutex_lock(&lock2); 
        ++sequence2; 
        sleep(1); 
        pthread_mutex_lock(&lock1); 
        ++sequence2; 
        pthread_mutex_unlock(&lock1); 
        pthread_mutex_unlock(&lock2); 
    
        return sequence1; 
    }
    
    
    void* thread1(void *arg)
    {
        int rev = 0;
        while(1)
        {
            rev = func1();
            
            if (rev == 100000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    void* thread2(void *arg)
    {
        int rev = 0;
        while(1)
        {
            rev = func2();
            
            if (rev == 100000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    void* thread3(void *arg)
    {
        int count = 0;
        while(1)
        {
            sleep(1);
            if ( count++ > 10000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    void* thread4(void *arg)
    {
        int count = 0;
        while(1)
        {
            sleep(1);
            if ( count++ > 10000)
            {
                pthread_exit(NULL);
            }
        }
    }
    
    
    
    int main()
    {
        pthread_t tid[4];
        
        pthread_mutex_init(&lock1, NULL);
    
        pthread_mutex_init(&lock2, NULL);
    
        
    
        if(pthread_create(&tid[0], NULL, &thread1, NULL) != 0)
        {
            _exit(1);
        }
    
        if(pthread_create(&tid[1], NULL, &thread2, NULL) != 0)
        {
            _exit(1);
        }
    
        if(pthread_create(&tid[2], NULL, &thread3, NULL) != 0)
        {
            _exit(1);
        }
    
        if(pthread_create(&tid[3], NULL, &thread4, NULL) != 0)
        {
            _exit(1);
        }
    
        sleep(5);
    
    
        pthread_join(tid[0], NULL);
        pthread_join(tid[1], NULL);
        pthread_join(tid[2], NULL);
        pthread_join(tid[3], NULL);
    
    
        pthread_mutex_destroy( &lock1 );
        pthread_mutex_destroy( &lock2 );
    
        return 0;
    }

    编译执行程序。

    gcc -o main main17.c -lpthread -g

     

    使用 pstack 和 gdb 工具对死锁程序进行分析

    1、使用pstack 

    查找测试程序的进程号

    root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
    root 7197 7179 0 10:04 pts/1 00:00:00 ./main
    root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main

    对死锁进程第一次执行 pstack(pstack –进程号)的输出结果

     Thread 5 (Thread 0x41e37940 (LWP 6722)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a9b in func1() () 
     #4  0x0000000000400ad7 in thread1(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 4 (Thread 0x42838940 (LWP 6723)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a17 in func2() () 
     #4  0x0000000000400a53 in thread2(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 3 (Thread 0x43239940 (LWP 6724)): 
     #0  0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6 
     #1  0x0000003d19c9a364 in sleep () from /lib64/libc.so.6 
     #2  0x00000000004009bc in thread3(void*) () 
     #3  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #4  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 2 (Thread 0x43c3a940 (LWP 6725)): 
     #0  0x0000003d19c9a541 in nanosleep () from /lib64/libc.so.6 
     #1  0x0000003d19c9a364 in sleep () from /lib64/libc.so.6 
     #2  0x0000000000400976 in thread4(void*) () 
     #3  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #4  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 1 (Thread 0x2b984ecabd90 (LWP 6721)): 
     #0  0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0 
     #1  0x0000000000400900 in main ()  

     对死锁进程第二次执行 pstack(pstack –进程号)的输出结果

     Thread 5 (Thread 0x40bd6940 (LWP 6722)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a87 in func1() () 
     #4  0x0000000000400ac3 in thread1(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 4 (Thread 0x415d7940 (LWP 6723)): 
     #0  0x0000003d1a80d4c4 in __lll_lock_wait () from /lib64/libpthread.so.0 
     #1  0x0000003d1a808e1a in _L_lock_1034 () from /lib64/libpthread.so.0 
     #2  0x0000003d1a808cdc in pthread_mutex_lock () from /lib64/libpthread.so.0 
     #3  0x0000000000400a03 in func2() () 
     #4  0x0000000000400a3f in thread2(void*) () 
     #5  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #6  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 3 (Thread 0x41fd8940 (LWP 6724)): 
     #0  0x0000003d19c7aec2 in memset () from /lib64/libc.so.6 
     #1  0x00000000004009be in thread3(void*) () 
     #2  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #3  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 2 (Thread 0x429d9940 (LWP 6725)): 
     #0  0x0000003d19c7ae0d in memset () from /lib64/libc.so.6 
     #1  0x0000000000400982 in thread4(void*) () 
     #2  0x0000003d1a80673d in start_thread () from /lib64/libpthread.so.0 
     #3  0x0000003d19cd40cd in clone () from /lib64/libc.so.6 
     Thread 1 (Thread 0x2af906fd9d90 (LWP 6721)): 
     #0  0x0000003d1a807b35 in pthread_join () from /lib64/libpthread.so.0 
     #1  0x0000000000400900 in main () 

     

    连续多次查看这个进程的函数调用关系堆栈进行分析:当进程吊死时,多次使用 pstack 查看进程的函数调用堆栈,死锁线程将一直处于等锁的状态,对比多次的函数调用堆栈输出结果,

    确定哪两个线程(或者几个线程)一直没有变化且一直处于等锁的状态(可能存在两个线程 一直没有变化)。

    输出分析:

    根据上面的输出对比可以发现,线程 1 和线程 2 由第一次 pstack 输出的处在 sleep 函数变化为第二次 pstack 输出的处在 memset 函数。但是线程 4 和线程 5 一直处在等锁状态(pthread_mutex_lock),

    在连续两次的 pstack 信息输出中没有变化,所以我们可以推测线程 4 和线程 5 发生了死锁

     

    2、使用gdb进行进一步的分析

    查找测试程序的进程号

    root 5383 1 0 06:31 ? 00:00:43 gedit /root/Project/xa/main17.c
    root 7197 7179 0 10:04 pts/1 00:00:00 ./main
    root 7218 7206 0 10:04 pts/2 00:00:00 grep --color=auto main

    使用gdb 的attach功能

    gdb attach 7197

    查看当前进程的线程信息

    (gdb) info thread
    Id Target Id Frame
    5 Thread 0xb7539b40 (LWP 7198) "main" 0xb7717424 in __kernel_vsyscall ()
    4 Thread 0xb6d38b40 (LWP 7199) "main" 0xb7717424 in __kernel_vsyscall ()
    3 Thread 0xb6537b40 (LWP 7200) "main" 0xb7717424 in __kernel_vsyscall ()
    2 Thread 0xb5d36b40 (LWP 7201) "main" 0xb7717424 in __kernel_vsyscall ()
    * 1 Thread 0xb753a6c0 (LWP 7197) "main" 0xb7717424 in __kernel_vsyscall ()

     

     切换到线程 5 的输出

    (gdb) thread  5
    [Switching to thread 5 (Thread 0xb7539b40 (LWP 7198))]
    #0 0xb7717424 in __kernel_vsyscall ()
    (gdb) where
    #0 0xb7717424 in __kernel_vsyscall ()
    #1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
    #2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
    #3 0xb76edcf3 in pthread_mutex_lock ()
    from /lib/i386-linux-gnu/libpthread.so.0
    #4 0x0804864b in func1 () at main17.c:17
    #5 0x080486ef in thread1 (arg=0x0) at main17.c:44
    #6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
    #7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
    (gdb) f  4
    #4 0x0804864b in func1 () at main17.c:17
    warning: Source file is more recent than executable.
    17 pthread_mutex_lock(&lock2);     ////线程 5 正试图获得锁 lock2

     

    切换到线程4的输出

    (gdb) thread 4
    [Switching to thread 4 (Thread 0xb6d38b40 (LWP 7199))]
    #0 0xb7717424 in __kernel_vsyscall ()
    (gdb) where
    #0 0xb7717424 in __kernel_vsyscall ()
    #1 0xb76f25a2 in __lll_lock_wait () from /lib/i386-linux-gnu/libpthread.so.0
    #2 0xb76edead in _L_lock_686 () from /lib/i386-linux-gnu/libpthread.so.0
    #3 0xb76edcf3 in pthread_mutex_lock ()
    from /lib/i386-linux-gnu/libpthread.so.0
    #4 0x080486ae in func2 () at main17.c:30
    #5 0x0804871c in thread2 (arg=0x0) at main17.c:58
    #6 0xb76ebd4c in start_thread () from /lib/i386-linux-gnu/libpthread.so.0
    #7 0xb762adde in clone () from /lib/i386-linux-gnu/libc.so.6
    (gdb) f 4
    #4 0x080486ae in func2 () at main17.c:30
    30 pthread_mutex_lock(&lock1);      //线程 4 正试图获得锁 lock1

    打印锁的信息

    (gdb) p lock1
    $1 = {__data = {__lock = 2, __count = 0, __owner = 7198, __kind = 0,
    __nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
    __size = "0200000000000000363400000000000001000000000000", __align = 2}
    (gdb) p lock2
    $2 = {__data = {__lock = 2, __count = 0, __owner = 7199, __kind = 0,
    __nusers = 1, {__spins = 0, __list = {__next = 0x0}}},
    __size = "0200000000000000373400000000000001000000000000", __align = 2}

     

    从上面可以发现,线程 4 正试图获得锁 lock1,但是锁 lock1已经被 LWP 为 7198的线程得到(__owner = 7198),

    线程 5 正试图获得锁 lock2,但是锁 lock2 已经被 LWP 为 7199的 得到(__owner = 7199),从 pstack 的输出可以发现(gdb info thread),LWP 7198与线程 5 是对应的,LWP 7199与线程 4 是对应的。

    所以我们可以得出, 线程 4 和线程 5 发生了交叉持锁的死锁现象。查看线程的源代码发现,线程 4 和线程 5 同时使用 mutex1 和 mutex2,且申请顺序不合理

     

  • 相关阅读:
    民族、学历学位、所学专业、、专业技术职务 对应表
    Spring企业业务快速开发平台应该具备的基本框架
    winform中与asp.net中的 TreeView节点处理对比
    GB85611988《专业技术职务代码》
    ASP.NET获取文件名,后缀名
    各种国家标准代码表
    同样的门通向同样的结果要想得到没有的就要做不同的事
    哈佛MBA生是这样找工作的
    富爸爸,穷爸爸 总结财务自由
    WebSite和Web Application\网站与Web项目的区别
  • 原文地址:https://www.cnblogs.com/zhangxuan/p/6385329.html
Copyright © 2020-2023  润新知