• Valgrind的多线程调试工具


    Valgrind的多线程调试工具 
    Helgrind是Valgrind的一个重点功能 本节主要针对与多线程基本安全问题进行检测:【所有的代码环境都是在POSIX_THREAD模式下】

    写线程代码时 经常碰到如下问题
    1)  资源不安全访问 【就是多个线程在没有同步的情况下写某个资源体】
    2) 死锁问题
    3)  POSIX pthreads API的错误使用
    4)  在前面几个基础上都能安全无误的情况下 多于多线程程序就是要能够能好将同步块尽量缩到最小  【这是一个很大的课题】
      解决问题:
        ​问题1:   调用Helgrind能够很好的解决掉  已基本例子为例:
        ​

    1. #include <pthread.h>

    2. int var = 0;

    3. void* child_fn ( void* arg ) {

    4.          var++;

    5.          return NULL;

    6. }

    7. int main ( void ) {

    8.          pthread_t child;

    9.          pthread_t child2;

    10.          pthread_create(&child,NULL, child_fn, NULL);

    11.          pthread_create(&child2,NULL,child_fn,NULL);

    12.          pthread_join(child,NULL);

    13.          pthread_join(child2,NULL);

    14.          return 0;

    15. }

    明显var是共享的 不安全访问,调用Helgrind看看怎么能够检测出来
    gcc -g thread_helgrind.c  -o thread_helgrind -lpthread
    valgrind --tool=helgrind ./thread_helgrind 
    可以看出valgrind弹出如下输出信息:

    ==25516== Helgrind, a thread error detector

    ==25516== Copyright (C) 2007-2013, and GNU GPL'd, by OpenWorks LLP et al.

    ==25516== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info

    ==25516== Command: ./thread_helgrind

    ==25516==

    ==25516== ---Thread-Announcement------------------------------------------

    ==25516==

    ==25516== Thread #3 was created

    ==25516==    at 0x415B3C8: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==25516==

    ==25516== ---Thread-Announcement------------------------------------------

    ==25516==

    ==25516== Thread #2 was created

    ==25516==    at 0x415B3C8: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==25516==

    ==25516== ----------------------------------------------------------------

    ==25516==

    ==25516== Possible data race during read of size 4 at 0x804A028 by thread #3

    ==25516== Locks held: none

    ==25516==    at 0x804851F: child_fn (thread_helgrind.c:12)

    ==25516==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==25516==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==25516==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==25516==

    ==25516== This conflicts with a previous write of size 4 by thread #2

    ==25516== Locks held: none

    ==25516==    at 0x8048527: child_fn (thread_helgrind.c:12)

    ==25516==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==25516==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==25516==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==25516==

    ==25516== ----------------------------------------------------------------

    ==25516==

    ==25516== Possible data race during write of size 4 at 0x804A028 by thread #3

    ==25516== Locks held: none

    ==25516==    at 0x8048527: child_fn (thread_helgrind.c:12)

    ==25516==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==25516==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==25516==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==25516==

    ==25516== This conflicts with a previous write of size 4 by thread #2

    ==25516== Locks held: none

    ==25516==    at 0x8048527: child_fn (thread_helgrind.c:12)

    ==25516==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==25516==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==25516==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==25516==

    ==25516==

    ==25516== For counts of detected and suppressed errors, rerun with: -v

    ==25516== Use --history-level=approx or =none to gain increased speed, at

    ==25516== the cost of reduced accuracy of conflicting-access information

    ==25516== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)

    可以看出绿色就显示的data_race错误  可以直接定位到var前后没有locks/

    问题2:
        ​死锁问题是尽量避免  对于helgrind可以检测出加锁解锁顺序出现问题导致的死锁问题    这个问题我们可以好好看下:
        ​首先 看下一个正常的程序
        ​

    #include <pthread.h>

    pthread_mutex_t mut_thread;

    int var = 0;

    void* child_fn ( void* arg ) {

    pthread_mutex_lock(&mut_thread);

    var++;

    pthread_mutex_unlock(&mut_thread);

             return NULL;

    }

    int main ( void ) {

             pthread_t child;

             pthread_t child2;

             pthread_mutex_init(&mut_thread,NULL);

             pthread_create(&child,NULL, child_fn, NULL);

             pthread_create(&child2,NULL,child_fn,NULL);

             pthread_join(child,NULL);

             pthread_join(child2,NULL);

             return 0;

    }

    正常加锁解锁  没有问题 
    在看下连续加2次锁的情况:

    #include <pthread.h>

    pthread_mutex_t mut_thread;

    int var = 0;

    void* child_fn ( void* arg ) {

    pthread_mutex_lock(&mut_thread);

    var++;

    pthread_mutex_lock(&mut_thread);

             return NULL;

    }

    int main ( void ) {

             pthread_t child;

             pthread_t child2;

             pthread_mutex_init(&mut_thread,NULL);

             pthread_create(&child,NULL, child_fn, NULL);

             pthread_create(&child2,NULL,child_fn,NULL);

             pthread_join(child,NULL);

             pthread_join(child2,NULL);

             return 0;

    }

        看下这个helgrind打印出来的东西 【当然要杀死 不然会一直卡在那里动都不动一下】

    ==26534== Helgrind, a thread error detector

    ==26534== Copyright (C) 2007-2013, and GNU GPL'd, by OpenWorks LLP et al.

    ==26534== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info

    ==26534== Command: ./deadlock_helgrind

    ==26534==

    ==26534== ---Thread-Announcement------------------------------------------

    ==26534==

    ==26534== Thread #2 was created

    ==26534==    at 0x415B3C8: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26534==

    ==26534== ----------------------------------------------------------------

    ==26534==

    ==26534== Thread #2: Attempt to re-lock a non-recursive lock I already hold

    ==26534==    at 0x402E8E5: pthread_mutex_lock (hg_intercepts.c:507)

    ==26534==    by 0x80485C6: child_fn (deadlock_helgrind.c:14)

    ==26534==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26534==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26534==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26534==  Lock was previously acquired

    ==26534==    at 0x402E95D: pthread_mutex_lock (hg_intercepts.c:518)

    ==26534==    by 0x80485AD: child_fn (deadlock_helgrind.c:12)

    ==26534==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26534==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26534==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26534==

    ^C==26534== ----------------------------------------------------------------

    ==26534==

    ==26534== Thread #2: Exiting thread still holds 1 lock

    ==26534==    at 0x4001182: ??? (in /lib/i386-linux-gnu/ld-2.17.so)

    ==26534==    by 0x405B4D1: __lll_lock_wait (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26534==    by 0x4056ED3: _L_lock_776 (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26534==    by 0x4056D11: pthread_mutex_lock (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26534==    by 0x402E914: pthread_mutex_lock (hg_intercepts.c:510)

    ==26534==    by 0x80485C6: child_fn (deadlock_helgrind.c:14)

    ==26534==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26534==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26534==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26534==

    ==26534==

    ==26534== For counts of detected and suppressed errors, rerun with: -v

    ==26534== Use --history-level=approx or =none to gain increased speed, at

    ==26534== the cost of reduced accuracy of conflicting-access information

    ==26534== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0
    Lock was previously acquired   对于自身线程来讲  这样的做法明显出问题 它自身还在尝试获取这个lock 这样就导致死锁的发生。可以定位到==26534==    by 0x80485C6: child_fn (deadlock_helgrind.c:14)出问题了 所以这个helgrind解决这类问题时还是非常的厉害的~!
    接下来看一个2 mutex导致的问题:

    #include <pthread.h>

    pthread_mutex_t mut_thread;

    pthread_mutex_t mut_thread1;

    int var = 0;

    void* child_fn ( void* arg ) {

    pthread_mutex_lock(&mut_thread);

    pthread_mutex_lock(&mut_thread1);

    var++;

    pthread_mutex_unlock(&mut_thread);

    pthread_mutex_unlock(&mut_thread1);

             return NULL;

    }

    void* child_fn1(void *arg)

    {

    pthread_mutex_lock(&mut_thread1);

    pthread_mutex_lock(&mut_thread);

    var++;

    pthread_mutex_unlock(&mut_thread1);

    pthread_mutex_unlock(&mut_thread);

             return NULL;

    }

    int main ( void ) {

             pthread_t child;

             pthread_t child2;

             pthread_mutex_init(&mut_thread,NULL);

             pthread_mutex_init(&mut_thread1,NULL);

             pthread_create(&child,NULL, child_fn, NULL);

             pthread_create(&child2,NULL,child_fn1,NULL);

             pthread_join(child,NULL);

             pthread_join(child2,NULL);

             return 0;

    }

    加锁顺序导致死锁问题

    ==26785== Helgrind, a thread error detector

    ==26785== Copyright (C) 2007-2013, and GNU GPL'd, by OpenWorks LLP et al.

    ==26785== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info

    ==26785== Command: ./deadlock_helgrind

    ==26785==

    ==26785== ---Thread-Announcement------------------------------------------

    ==26785==

    ==26785== Thread #3 was created

    ==26785==    at 0x415B3C8: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26785==

    ==26785== ----------------------------------------------------------------

    ==26785==

    ==26785== Thread #3: lock order "0x804A038 before 0x804A050" violated

    ==26785==

    ==26785== Observed (incorrect) order is: acquisition of lock at 0x804A050

    ==26785==    at 0x402E95D: pthread_mutex_lock (hg_intercepts.c:518)

    ==26785==    by 0x8048637: child_fn1 (deadlock_helgrind.c:22)

    ==26785==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26785==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26785==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26785==

    ==26785==  followed by a later acquisition of lock at 0x804A038

    ==26785==    at 0x402E95D: pthread_mutex_lock (hg_intercepts.c:518)

    ==26785==    by 0x8048643: child_fn1 (deadlock_helgrind.c:23)

    ==26785==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26785==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26785==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26785==

    ==26785== Required order was established by acquisition of lock at 0x804A038

    ==26785==    at 0x402E95D: pthread_mutex_lock (hg_intercepts.c:518)

    ==26785==    by 0x80485ED: child_fn (deadlock_helgrind.c:13)

    ==26785==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26785==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26785==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26785==

    ==26785==  followed by a later acquisition of lock at 0x804A050

    ==26785==    at 0x402E95D: pthread_mutex_lock (hg_intercepts.c:518)

    ==26785==    by 0x80485F9: child_fn (deadlock_helgrind.c:14)

    ==26785==    by 0x402E5F6: mythread_wrapper (hg_intercepts.c:233)

    ==26785==    by 0x4054D77: start_thread (in /lib/i386-linux-gnu/libpthread-2.17.so)

    ==26785==    by 0x415B3DD: clone (in /lib/i386-linux-gnu/libc-2.17.so)

    ==26785==

    ==26785==

    ==26785== For counts of detected and suppressed errors, rerun with: -v

    ==26785== Use --history-level=approx or =none to gain increased speed, at

    ==26785== the cost of reduced accuracy of conflicting-access information

    ==26785== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 9 from 9

    这是观察法得出的  加锁顺序出错导致了这种情况的发生。 
    posix_thread erro这里就不列举了  这个完全是看基础。

    下篇待续

  • 相关阅读:
    java基础:9.2 接口implements,Comparable,Cloneable接口
    java基础:9.1 抽象类
    java基础:9.4 web爬虫
    java基础:6.0 ArrayList
    java基础:9.3 从web上读取数据
    java基础:12.1 文本I/O(一)
    3.2 FPGA 配置电路/主模式 从模式 JTAG模式
    谷歌浏览器查看HTTP协议
    MIME类型说明
    常见的响应码说明
  • 原文地址:https://www.cnblogs.com/sfwtoms/p/3931719.html
Copyright © 2020-2023  润新知