• Torch速查_CPU和GPU的mnist预测训练_模型导出_模型导入再预测_导出script并预测_导出onnx并预测


    需要做点什么

    方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用torch,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。

    系统环境

    python 3.7.4
    torch 1.9.0+cu111
    onnx 1.9.0
    onnxruntime-gpu 1.9.0

    数据准备

    MNIST数据集csv文件是一个42000x785的矩阵
    42000表示有42000张图片
    785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:

    1 0 0 0 0 0 0 0 0 0 ..
    0 0 0 0 0 0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0
    4 0 0 0 0 0 0 0 0 0
    0 0 0 0 0 0 0 0 0 0
    0 0 0 0 0 0 0 0 0 0
    7 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    5 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    8 0 0 0 0 0 0 0 0 0
    9 0 0 0 0 0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    3 0 0 0 0 0 0 0 0 0
    1 0 0 0 0 0 0 0 0 0
    2 0 0 0 0 0 0 0 0 0
    0 0 0 0 0 0 0 0 0 0

    1. 导入需要的包

    import os
    import time
    import onnx
    import torch
    import numpy as np
    import pandas as pd
    import torch.nn as nn
    import onnxruntime as ort
    import torch.nn.functional as F
    from sklearn.metrics import accuracy_score
    from torch.utils.data import Dataset, DataLoader
    

    2. 参数准备

    N_EPOCH = 1
    N_BATCH = 128
    N_BATCH_NUM = 250
    S_DATA_PATH = r"mnist_train.csv"
    S_TORCH_MODEL_FULL_PATH = r"cnn_model.pth"
    S_TORCH_MODEL_PARAMS_PATH = r"cnn_model_state.pth"
    S_TORCH_MODEL_SCRIPT_PATH = r"cnn_model.torch_script.pt"
    S_ONNX_MODEL_PATH = r"cnn_model_batch%d.onnx" % N_BATCH
    S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0"
    # S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", 0, "cpu"
    

    3. 读取数据

    df = pd.read_csv(S_DATA_PATH, header=None)
    print(df.shape)
    np_mat = np.array(df)
    print(np_mat.shape)
    
    X = np_mat[:, 1:]
    Y = np_mat[:, 0]
    X = X.astype(np.float32) / 255
    X_train = X[:N_BATCH * N_BATCH_NUM]
    X_test = X[N_BATCH * N_BATCH_NUM:]
    Y_train = Y[:N_BATCH * N_BATCH_NUM]
    Y_test = Y[N_BATCH * N_BATCH_NUM:]
    
    X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
    X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
    
    print(X_train.shape)
    print(Y_train.shape)
    print(X_test.shape)
    print(Y_test.shape)
    
    
    class MnistDataSet(Dataset):
        def __init__(self, X, Y):
            self.l_data, self.l_label = [], []
            for i in range(X.shape[0]):
                self.l_data.append(X[i, :, :, :])
                self.l_label.append(Y[i])
    
        def __getitem__(self, index):
            return self.l_data[index], self.l_label[index]
    
        def __len__(self):
            return len(self.l_data)
    
    
    train_loader = DataLoader(MnistDataSet(X_train, Y_train), batch_size=N_BATCH, shuffle=True)
    test_loader = DataLoader(MnistDataSet(X_test, Y_test), batch_size=N_BATCH, shuffle=False)
    
    

    运行输出

    (42000, 785)
    (42000, 785)
    (32000, 1, 28, 28)
    (32000,)
    (10000, 1, 28, 28)
    (10000,)
    

    4. 模型构建

    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.encoder = torch.nn.Sequential(nn.Conv2d(1, 16, 3, 1),
                                               nn.MaxPool2d(2), nn.Flatten(1),
                                               nn.Linear(2704, 128), nn.ReLU(),
                                               nn.Linear(128, 10))
    
        def forward(self, x):
            out = self.encoder(x)
            return out
    
    
    net = Net().to(S_DEVICE)
    optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
    loss_fun = nn.CrossEntropyLoss()
    

    运行输出

    Net(
      (encoder): Sequential(
        (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
        (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (2): Flatten(start_dim=1, end_dim=-1)
        (3): Linear(in_features=2704, out_features=128, bias=True)
        (4): ReLU()
        (5): Linear(in_features=128, out_features=10, bias=True)
      )
    )
    

    5. 模型训练

    print("model train")
    for i in range(N_EPOCH):
        net.train()
        t_loss = 0.
        np_pred, np_y = None, None
        for j, (t_x_b, t_y_b) in enumerate(train_loader):
            t_y_b = t_y_b.long().to(S_DEVICE)
            t_x_b = t_x_b.float().to(S_DEVICE)
    
            t_logits_b = net(t_x_b)
            t_loss_b = loss_fun(t_logits_b, t_y_b)
    
            optimizer.zero_grad()
            t_loss_b.backward()
            optimizer.step()
    
            t_loss += t_loss_b
            np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
            np_pred = np_pred_b if np_pred is None else np.concatenate(
                (np_pred, np_pred_b), 0)
            np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                (np_y, t_y_b.cpu().numpy()), 0)
    
        f_acc = accuracy_score(np_y, np_pred)
        print("train ", t_loss, f_acc)
        print()
    

    运行输出

    model train
    train  tensor(113.2757, device='cuda:0', grad_fn=<AddBackward0>) 0.8743125
    
    

    6.模型预测

    with torch.no_grad():
        for i in range(N_EPOCH):
            net.eval()
            t_loss = 0.
            np_pred, np_y = None, None
            for j, (t_x_b, t_y_b) in enumerate(test_loader):
                t_y_b = t_y_b.long().to(S_DEVICE)
                t_x_b = t_x_b.float().to(S_DEVICE)
    
                t_logits_b = net(t_x_b)
                t_loss_b = loss_fun(t_logits_b, t_y_b)
    
                t_loss += t_loss_b
    
                np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
                np_pred = np_pred_b if np_pred is None else np.concatenate(
                    (np_pred, np_pred_b), 0)
                np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                    (np_y, t_y_b.cpu().numpy()), 0)
    
            f_acc = accuracy_score(np_y, np_pred)
            print("test ", t_loss, f_acc)
            print()
    

    运行输出

    test  tensor(17.3666, device='cuda:0') 0.9334
    

    7.模型保存

    torch.save(net, S_TORCH_MODEL_FULL_PATH)
    torch.save(net.state_dict(), S_TORCH_MODEL_PARAMS_PATH)
    

    8.模型加载和加载模型使用

    print("load torch model and pred test data")
    net_load = torch.load(S_TORCH_MODEL_FULL_PATH,
                          map_location=lambda storage, loc: storage)
    net_load = net_load.to(S_DEVICE)
    print("load model ok")
    with torch.no_grad():
        for i in range(N_EPOCH):
            net_load.eval()
            t_loss = 0.
            np_pred, np_y = None, None
            for j, (t_x_b, t_y_b) in enumerate(test_loader):
                t_y_b = t_y_b.long().to(S_DEVICE)
                t_x_b = t_x_b.float().to(S_DEVICE)
    
                t_logits_b = net_load(t_x_b)
                t_loss_b = loss_fun(t_logits_b, t_y_b)
    
                t_loss += t_loss_b
                np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
                np_pred = np_pred_b if np_pred is None else np.concatenate(
                    (np_pred, np_pred_b), 0)
                np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                    (np_y, t_y_b.cpu().numpy()), 0)
    
            f_acc = accuracy_score(np_y, np_pred)
            print("load torch model ", t_loss, f_acc)
            print()
    

    运行输出

    load torch model and pred test data
    load model ok
    load torch model  tensor(17.3666, device='cuda:0') 0.9334
    

    9.导出Torch Scirpt

    torch_script_trace = torch.jit.trace(net_load, t_x_b)
    print(torch_script_trace)
    torch_script_trace.save(S_TORCH_MODEL_SCRIPT_PATH)
    

    运行输出

    Net(
      original_name=Net
      (encoder): Sequential(
        original_name=Sequential
        (0): Conv2d(original_name=Conv2d)
        (1): MaxPool2d(original_name=MaxPool2d)
        (2): Flatten(original_name=Flatten)
        (3): Linear(original_name=Linear)
        (4): ReLU(original_name=ReLU)
        (5): Linear(original_name=Linear)
      )
    )
    

    10. 加载Torch Script并预测

    torch_script_load = torch.jit.load(S_TORCH_MODEL_SCRIPT_PATH)
    torch_script_load = torch_script_load.to(S_DEVICE)
    print(torch_script_load)
    print(torch_script_load.code)
    print("load scirpt model ok")
    with torch.no_grad():
        for i in range(N_EPOCH):
            torch_script_load.eval()
            t_loss = 0.
            np_pred, np_y = None, None
            for j, (t_x_b, t_y_b) in enumerate(test_loader):
                t_y_b = t_y_b.long().to(S_DEVICE)
                t_x_b = t_x_b.float().to(S_DEVICE)
    
                t_logits_b = torch_script_load(t_x_b)
                t_loss_b = loss_fun(t_logits_b, t_y_b)
    
                t_loss += t_loss_b
                np_pred_b = torch.argmax(t_logits_b, -1).detach().cpu().numpy()
                np_pred = np_pred_b if np_pred is None else np.concatenate(
                    (np_pred, np_pred_b), 0)
                np_y = t_y_b.cpu().numpy() if np_y is None else np.concatenate(
                    (np_y, t_y_b.cpu().numpy()), 0)
    
            f_acc = accuracy_score(np_y, np_pred)
            print("load scirpt torch model ", t_loss, f_acc)
            print()
    

    运行输出

    RecursiveScriptModule(
      original_name=Net
      (encoder): RecursiveScriptModule(
        original_name=Sequential
        (0): RecursiveScriptModule(original_name=Conv2d)
        (1): RecursiveScriptModule(original_name=MaxPool2d)
        (2): RecursiveScriptModule(original_name=Flatten)
        (3): RecursiveScriptModule(original_name=Linear)
        (4): RecursiveScriptModule(original_name=ReLU)
        (5): RecursiveScriptModule(original_name=Linear)
      )
    )
    def forward(self,
        x: Tensor) -> Tensor:
      return (self.encoder).forward(x, )
    
    load scirpt model ok
    load scirpt torch model  tensor(17.3666, device='cuda:0') 0.9334
    

    11.导出ONNX

    dummy_in = torch.randn(N_BATCH, 1, 28, 28)
    torch.onnx.export(
        net_load.cpu(),
        dummy_in,
        S_ONNX_MODEL_PATH,
        verbose=True,
        input_names=["data"],
        output_names=["output"],
        dynamic_axes={
            'data': {0: 'batch_size'},
            'output': {0: 'batch_size'}
        })
    

    运行输出

    graph(%data : Float(*, 1, 28, 28, strides=[784, 784, 28, 1], requires_grad=0, device=cpu),
          %encoder.0.weight : Float(16, 1, 3, 3, strides=[9, 9, 3, 1], requires_grad=1, device=cpu),
          %encoder.0.bias : Float(16, strides=[1], requires_grad=1, device=cpu),
          %encoder.3.weight : Float(128, 2704, strides=[2704, 1], requires_grad=1, device=cpu),
          %encoder.3.bias : Float(128, strides=[1], requires_grad=1, device=cpu),
          %encoder.5.weight : Float(10, 128, strides=[128, 1], requires_grad=1, device=cpu),
          %encoder.5.bias : Float(10, strides=[1], requires_grad=1, device=cpu)):
      %7 : Float(*, 16, 26, 26, strides=[10816, 676, 26, 1], requires_grad=1, device=cpu) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[0, 0, 0, 0], strides=[1, 1]](%data, %encoder.0.weight, %encoder.0.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\modules\conv.py:440:0
      %8 : Float(*, 16, 13, 13, strides=[2704, 169, 13, 1], requires_grad=1, device=cpu) = onnx::MaxPool[kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2]](%7) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:718:0
      %9 : Float(*, 2704, strides=[2704, 1], requires_grad=1, device=cpu) = onnx::Flatten[axis=1](%8) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\modules\flatten.py:40:0
      %10 : Float(*, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%9, %encoder.3.weight, %encoder.3.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1847:0
      %11 : Float(*, 128, strides=[128, 1], requires_grad=1, device=cpu) = onnx::Relu(%10) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1298:0
      %output : Float(*, 10, strides=[10, 1], requires_grad=1, device=cpu) = onnx::Gemm[alpha=1., beta=1., transB=1](%11, %encoder.5.weight, %encoder.5.bias) # D:\Dev_Utils\Anaconda3\lib\site-packages\torch\nn\functional.py:1847:0
      return (%output)
    
    

    8. 加载ONNX并运行

    model = onnx.load(S_ONNX_MODEL_PATH)
    print(onnx.checker.check_model(model))  # Check that the model is well formed
    print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
    ls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]
    print("input name ", ls_input_name)
    print("output name ", ls_output_name)
    s_input_name = ls_input_name[0]
    
    x_input = X_train[:N_BATCH*2, :, :, :].astype(np.float32)
    ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)
    print("val device ", ort_val.device_name())
    print("val shape ", ort_val.shape())
    print("val data type ", ort_val.data_type())
    print("is_tensor ", ort_val.is_tensor())
    print("array_equal ", np.array_equal(ort_val.numpy(), x_input))
    providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'
    print("providers ", providers)
    ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH,
                                       providers=[providers])  # gpu运行
    ort_session.set_providers([providers])
    outputs = ort_session.run(None, {s_input_name: ort_val})
    print("sess env ", ort_session.get_providers())
    print(type(outputs))
    print(outputs[0])
    '''
    For example ['CUDAExecutionProvider', 'CPUExecutionProvider']
        means execute a node using CUDAExecutionProvider if capable, otherwise execute using CPUExecutionProvider.
    '''
    

    运行输出

    None
    graph torch-jit-export (
      %data[FLOAT, batch_sizex1x28x28]
    ) initializers (
      %encoder.0.weight[FLOAT, 16x1x3x3]
      %encoder.0.bias[FLOAT, 16]
      %encoder.3.weight[FLOAT, 128x2704]
      %encoder.3.bias[FLOAT, 128]
      %encoder.5.weight[FLOAT, 10x128]
      %encoder.5.bias[FLOAT, 10]
    ) {
      %7 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], pads = [0, 0, 0, 0], strides = [1, 1]](%data, %encoder.0.weight, %encoder.0.bias)
      %8 = MaxPool[kernel_shape = [2, 2], pads = [0, 0, 0, 0], strides = [2, 2]](%7)
      %9 = Flatten[axis = 1](%8)
      %10 = Gemm[alpha = 1, beta = 1, transB = 1](%9, %encoder.3.weight, %encoder.3.bias)
      %11 = Relu(%10)
      %output = Gemm[alpha = 1, beta = 1, transB = 1](%11, %encoder.5.weight, %encoder.5.bias)
      return %output
    }
    input name  ['data']
    output name  ['output']
    val device  cuda
    val shape  [256, 1, 28, 28]
    val data type  tensor(float)
    is_tensor  True
    array_equal  True
    providers  CUDAExecutionProvider
    sess env  ['CUDAExecutionProvider', 'CPUExecutionProvider']
    <class 'list'>
    [[ -3.5930414    8.179376     1.1969751  ...  -2.913561     2.5138445
       -2.2389767 ]
     [ 11.716089   -11.836465     2.8341749  ...  -1.8803438    0.31916314
       -1.637662  ]
     [ -6.1383176    7.9563417    0.18428418 ...   0.2816238    0.55466944
       -1.2241261 ]
     ...
     [  0.02245945  -5.2462187   -2.9979806  ...   1.0633407   -0.07040683
       -0.49605215]
     [ -7.219374    -3.159672    -0.64644974 ...   5.7991867   -1.9511163
        1.4337606 ]
     [ -4.0595794    7.265975     0.7286219  ...  -0.5744688    0.522286
       -1.5456666 ]]
    

    你甚至不愿意Start的Github

    ai_fast_handbook

  • 相关阅读:
    [crontab]修改默认编辑器
    [mysql]忘记用户密码或者误删用户账号
    [vim]多行注释和多行删除
    [mysql]my.cnf在哪里
    [python]有中文字符程序异常的解决方案
    [Linux]虚拟机无法安装deepin15.9的解决方案
    Elasticsearch5.X IN Windows 10 系列文章(2)
    Elasticsearch5.X IN Windows 10 系列文章(1)
    HTTP Error 502.5
    centos7 yum install redis
  • 原文地址:https://www.cnblogs.com/Kalafinaian/p/16037680.html
Copyright © 2020-2023  润新知