• pythonGPU状态pynvml


    python-pynvml

    简介

    深度学习过程中经常需要和GPU打交道,需要在训练之前确定GPU工作状态。

    Nvidia的显卡提供了 NVML(英伟达显卡管理库)以及构建在其上的 nvidia-smi 显卡系统管理界面),可以方便的查询显卡的信息和工作状况。在python中同样可以利用pynvml库来实现显卡信息的获取

    官方指导文件

    https://docs.nvidia.com/deploy/nvml-api/index.html

    安装

    pip install nvidia-ml-py
    
    pip install nvidia-ml-py3  # python3
    

    使用

    
    import pynvml
    
    pynvml.nvmlInit() # 初始化
    ... # 函数调用
    pynvml.nvmlShutdown() # 最后要关闭管理工具
    

    获取驱动版本号

    #GPU驱动信息
    
    pynvml.nvmlSystemGetDriverVersion()
    =>
    b'426.00' # 版本号426
    

    获取显卡句柄

    获得GPU数量

    #获取Nvidia GPU块数
    pynvml.nvmlDeviceGetCount()
    -> 1
    

    获得GPU名称

    handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
    
    name=pynvml.nvmlDeviceGetName(handle)
    
    print(name)
    
    ->  b'GeForce GTX 1650 6GB'
    

    获取GPU 0 内存信息

    # 每MB包含的字节数
    NUM_EXPAND = 1024 * 1024
    
    gpu_id=0
    handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
    info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    
    gpu_Total = info.total   # 总显存
    gpu_Free = info.free 
    gpu_Used = info.used 
    
    print(gpu_Total)  # 显卡总的显存大小,6442450944Bit
    print(gpu_Free)  # 显存使用大小,4401950720Bit
    print(gpu_Used)  # 显卡剩余显存大小,2040500224Bit
    
    print(gpu_Total / NUM_EXPAND)
    print(gpu_Free / NUM_EXPAND)
    print(gpu_Used / NUM_EXPAND)
    
    # meminfo.used / 1024 / 1024  
    #    4198 M
    
    

    获得GPU 进程信息

    获得运行下的进程,进程线程大小

    # 
    gpu_id=0
    
    handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
    info_list = pynvml.nvmlDeviceGetComputeRunningProcesses(handle)
    
    for each_pidinfo in info_list:
    	
        print(type(each_pidinfo))
        print(each_pidinfo.__dict__)   # 打印pid的属性
        print(each_pidinfo.pid)
    
        
    
    -> <class 'pynvml.nvml.nvmlFriendlyObject'>    
        
    -> {'pid': 31566, 'usedGpuMemory': 1259339776, 'gpuInstanceId': 4294967295, 'computeInstanceId': 4294967295}
        
    -> 31566
        
    
    

    案例

    
    #简单使用
    from pynvml import *
    nvmlInit()     #初始化
    print("Driver: "nvmlSystemGetDriverVersion())  #显示驱动信息
    #>>> Driver: 384.xxx
    
    #查看设备
    deviceCount = nvmlDeviceGetCount()
    for i in range(deviceCount):
        handle = nvmlDeviceGetHandleByIndex(i)
        print("GPU", i, ":", nvmlDeviceGetName(handle))
    #>>>
    #GPU 0 : b'GeForce GTX 1080 Ti'
    #GPU 1 : b'GeForce GTX 1080 Ti'
    
    #查看显存、温度、风扇、电源
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print("Memory Total: ",info.total)
    print("Memory Free: ",info.free)
    print("Memory Used: ",info.used)
    
    print("Temperature is %d C"%nvmlDeviceGetTemperature(handle,0))
    print("Fan speed is "nvmlDeviceGetFanSpeed(handle))
    print("Power ststus",nvmlDeviceGetPowerState(handle))
    
    
    #最后要关闭管理工具
    nvmlShutdown()
    
    
    #nvmlDeviceXXX有一系列函数可以调用,包括了NVML的大多数函数。
    #具体可以参考:https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries
    
    

    其他方法

    手动解析 nvidia-smi 命令

    以下是GPU p40的命令解析

    
    
    import subprocess
    def get_p40_pid():
        shell_data = subprocess.check_output('nvidia-smi', shell=True)
        shell_data = str(shell_data)
        shell_data = \
            shell_data.split('|=============================================================================|')[-1]
        shell_data = \
            shell_data.split('+-----------------------------------------------------------------------------+')[0]
        shell_data = shell_data.split(r'\n')[1:-1]
        
        Pid_dicts = {}
        for text in shell_data:
            pidtext = text.split()
            
            pid_id = pidtext[2]
            pid_name = pidtext[4]
            pid_gpu_memory = pidtext[5][:-3]  # [:-3]去掉单位
    
    
            Pid_dicts[str(pid_id)] = {"pid_id": pid_id,
                                      "pid_name": pid_name,
                                      "pid_gpu_memory": pid_gpu_memory}
        return Pid_dicts
    
  • 相关阅读:
    C++学习笔记32:泛型编程拓展1
    C++学习笔记31:术语翻译
    Sqrt(x)
    Search a 2D Matrix
    Pascal's Triangle II
    Pascal's Triangle
    Climbing Stairs
    Linux实用命令
    Binary Tree Inorder Traversal
    Minimum Depth of Binary Tree
  • 原文地址:https://www.cnblogs.com/tian777/p/15250993.html
Copyright © 2020-2023  润新知