• [原]调试实战——使用windbg调试TerminateThread导致的死锁


    前言

    项目里的一个升级程序偶尔会死锁,查看dump后发现是死在了ShellExecuteExW里。经验少,不知道为什么,于是在高端调试论坛里发帖求助,链接如下http://advdbg.org/forums/6520/ShowPost.aspx

    根据张银奎老师的描述可知,应该是拥有关键段的线程意外结束了。仔细检查项目中的代码,发现程序中有使用TerminateThread()来强制杀线程的代码。很可疑,于是写了一个测试程序,还原了这个问题。

    {% note info %}

    这也是几年前在项目中遇到的一个问题,我对之前的笔记进行了整理重新发布于此。

    {% endnote %}

    问题重现

    重现方法

    主程序会加载一个DLL,并调用该DLL的导出函数创建一个线程,然后调用TerminateThread()强制杀死这个线程,然后调用RunProcess()(内部封装了对ShellExecuteEx()的调用)执行一个新进程,会卡死在ShellExecuteEx()。为了让问题更容易重现,特地在DllMain()的参数ul_reason_for_callDLL_THREAD_DETACH时,强制睡眠了5秒。

    代码摘录

    主工程 testTerminateThread

    //testTerminateThread.cpp
    #include "stdafx.h"
    #include "windows.h"
    #include "process.h"
    
    typedef HANDLE (*pfnGenerateThread)();
    
    HANDLE RunProcess(const TCHAR* app_name, const TCHAR* cmd)
    {
        SHELLEXECUTEINFO shex = {sizeof(SHELLEXECUTEINFO)};
        shex.fMask = SEE_MASK_NOCLOSEPROCESS;
        shex.lpVerb = _T("open");
        shex.lpFile = app_name; 
        shex.lpParameters = cmd; 
        shex.lpDirectory = NULL; 
        shex.nShow = SW_NORMAL;
    
        if (!::ShellExecuteEx(&shex))
        {
            return INVALID_HANDLE_VALUE;
        }
    
        return shex.hProcess;
    }
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        while ( 1 )
        {
            HMODULE hModule = LoadLibrary(_T("testDll.dll"));
            if ( NULL == hModule )
                return 0;
    
            pfnGenerateThread pfn = (pfnGenerateThread)GetProcAddress(hModule, "GenerateThread");
            if ( NULL == pfn )
                return 0;
    
            HANDLE hThread = pfn();
    
            // give thread time to start up
            Sleep(1000);
            
            // terminate thread.
            BOOL bOk = TerminateThread(hThread, 0);
    
            // dead lock in this function...
            RunProcess(argv[0], NULL);
    
            FreeLibrary(hModule);
        }
    
        return 0;
    }

    DLL工程 testDll

    // DllMain.cpp
    #include "stdafx.h"
    #include "windows.h"
    
    BOOL APIENTRY DllMain( HMODULE hModule,
                           DWORD  ul_reason_for_call,
                           LPVOID lpReserved
                         )
    {
    
        switch (ul_reason_for_call)
        {
        case DLL_PROCESS_ATTACH:
            OutputDebugString(L"====> DLL_PROCESS_ATTACH called.
    ");
            break;
        case DLL_THREAD_ATTACH:
            OutputDebugString(L"----> DLL_THREAD_ATTACH called.
    ");
            break;
        case DLL_THREAD_DETACH:
            OutputDebugString(L"<---- DLL_THREAD_DETACH called.
    ");
            // with LdrpLoaderLock held! sleep 5 seconds. 
            Sleep(5000);
            break;
        case DLL_PROCESS_DETACH:
            OutputDebugString(L"<==== DLL_PROCESS_DETACH called.
    ");
            break;
        }
        return TRUE;
    }
    // testDll.cpp
    #include "stdafx.h"
    #include "stdio.h"
    #include "process.h"
    #include "windows.h"
    
    void OutputCurrentThreadId()
    {
        TCHAR szBuffer[1024];
        swprintf_s(szBuffer, L"thread [0x%x], running & exiting...
    ", GetCurrentThreadId());
        OutputDebugString(szBuffer);
        return;
    }
    
    unsigned __stdcall testProc(void *)
    {
        OutputCurrentThreadId();
        return 0;
    }
    
    HANDLE GenerateThread()
    {
        HANDLE hThread = (HANDLE)_beginthreadex(NULL, 0, &testProc, NULL, 0, NULL);
        return hThread;
    }
    

    debug-deadlock-caused-by-TerminateThread-Demo.zip 10.63 KB

    问题分析

    运行测试程序前先打开DbgView监视调试信息,然后运行测试程序。

    DebugView
    DebugView

    从日志可知,我们启动的测试线程的线程id0x1400

    当程序hang住后,使用windbg附加。附加成功后,先运行~*kvn查看线程及每个线程的的调用栈信息。发现只有一个0号线程(1号线程是windbg附加到进程时产生的)。

    0:001> ~*kvn
    
       0  Id: 18c0.1008 Suspend: 1 Teb: 7ffdf000 Unfrozen
     # ChildEBP RetAddr  Args to Child              
    00 002bf614 775a6a64 77592278 00000064 00000000 ntdll!KiFastSystemCallRet (FPO: [0,0,0])
    01 002bf618 77592278 00000064 00000000 00000000 ntdll!NtWaitForSingleObject+0xc (FPO: [3,0,0])
    02 002bf67c 7759215c 00000000 00000000 00000001 ntdll!RtlpWaitOnCriticalSection+0x13e (FPO: [Non-Fpo])
    03 002bf6a4 775c00e1 77637340 77bf1b77 00000000 ntdll!RtlEnterCriticalSection+0x150 (FPO: [Non-Fpo])
    04 002bf6dc 75587bc3 00000001 00000000 002bf704 ntdll!LdrLockLoaderLock+0xe4 (FPO: [Non-Fpo])
    05 002bf728 7679215d 00000000 002bf73c 00000104 KERNELBASE!GetModuleFileNameW+0x75 (FPO: [Non-Fpo])
    06 002bf948 76792112 002bfbb0 002bf968 7ffdb000 SHELL32!InRunDllProcess+0x39 (FPO: [Non-Fpo])
    *** WARNING: Unable to verify checksum for C:UsersBianChengNanDocumentsVisual Studio 2012Projects	estTerminateThreadDebug	estTerminateThread.exe
    07 002bf95c 013714db 002bfa44 002bfcbc 002bfbc0 SHELL32!ShellExecuteExW+0x51 (FPO: [Non-Fpo])
    08 002bfbb0 01371685 000ac518 00000000 00000000 testTerminateThread!RunProcess+0xdb (FPO: [Non-Fpo]) (CONV: cdecl) [c:usersianchengnandocumentsvisual studio 2012projects	estterminatethread	estterminatethread	estterminatethread.cpp @ 28]
    09 002bfcbc 01371c69 00000001 000ac510 000ae660 testTerminateThread!wmain+0xc5 (FPO: [Non-Fpo]) (CONV: cdecl) [c:usersianchengnandocumentsvisual studio 2012projects	estterminatethread	estterminatethread	estterminatethread.cpp @ 59]
    0a 002bfd0c 01371e5d 002bfd20 758ced6c 7ffdb000 testTerminateThread!__tmainCRTStartup+0x199 (FPO: [Non-Fpo]) (CONV: cdecl) [f:ddvctoolscrt_bldself_x86crtsrccrtexe.c @ 533]
    0b 002bfd14 758ced6c 7ffdb000 002bfd60 775c37eb testTerminateThread!wmainCRTStartup+0xd (FPO: [Non-Fpo]) (CONV: cdecl) [f:ddvctoolscrt_bldself_x86crtsrccrtexe.c @ 377]
    0c 002bfd20 775c37eb 7ffdb000 77bf10cb 00000000 kernel32!BaseThreadInitThunk+0xe (FPO: [Non-Fpo])
    0d 002bfd60 775c37be 01371082 7ffdb000 00000000 ntdll!__RtlUserThreadStart+0x70 (FPO: [Non-Fpo])
    0e 002bfd78 00000000 01371082 7ffdb000 00000000 ntdll!_RtlUserThreadStart+0x1b (FPO: [Non-Fpo])
    
    #  1  Id: 18c0.193c Suspend: 1 Teb: 7ffde000 Unfrozen
     # ChildEBP RetAddr  Args to Child              
    00 0133fbac 775ff20f 76a71677 00000000 00000000 ntdll!DbgBreakPoint (FPO: [0,0,0])
    01 0133fbdc 758ced6c 00000000 0133fc28 775c37eb ntdll!DbgUiRemoteBreakin+0x3c (FPO: [Non-Fpo])
    02 0133fbe8 775c37eb 00000000 76a71183 00000000 kernel32!BaseThreadInitThunk+0xe (FPO: [Non-Fpo])
    03 0133fc28 775c37be 775ff1d3 00000000 00000000 ntdll!__RtlUserThreadStart+0x70 (FPO: [Non-Fpo])
    04 0133fc40 00000000 775ff1d3 00000000 00000000 ntdll!_RtlUserThreadStart+0x1b (FPO: [Non-Fpo])

    通过调用栈,我们发现程序卡在了ShellExecuteExW里。

    运行!cs -l看下输出结果:

    0:001> !cs -l
    -----------------------------------------
    DebugInfo          = 0x77637540
    Critical section   = 0x77637340 (ntdll!LdrpLoaderLock+0x0)
    LOCKED
    LockCount          = 0x1
    WaiterWoken        = No
    OwningThread       = 0x00001400
    RecursionCount     = 0x1
    LockSemaphore      = 0x64
    SpinCount          = 0x00000000

    注意OwningThread的值0x00001400 正是我们生成的测试线程,与我们在DbgView里看到的线程id一致。但是该线程已经被我们杀死了,它在被杀死前获得了进程加载锁0x77637340 (ntdll!LdrpLoaderLock+0x0)

    至此,真相大白。

    总结

    • 不要随便用TerminateThread来强行杀死线程!
    • windbg真是windows下的调试神器。
    • !cs -l可以帮助我们快速的查找到死锁的关键段。

    参考资料

  • 相关阅读:
    Swing程序最佳架构设计—以业务对象为中心的MVC模式(转)
    股市投资策略总结(转)
    php学习笔记--高级教程--读取文件、创建文件、写入文件
    史上最简单的Hibernate入门简单介绍
    Java中StringBuilder的清空方法比較
    DHCP Option 60 的理解
    ICMP报文分析
    软件測试自学指南---从入门到精通
    Qt多线程学习:创建多线程
    Bulk Insert命令具体
  • 原文地址:https://www.cnblogs.com/bianchengnan/p/12158723.html
Copyright © 2020-2023  润新知