• PyTorch教程 | 1 图片数据建模流程范例


    构建数据流程是实践过程中核心环节。熟悉pipeline的的构建过程,有助于理解不同代码的结构,也是实现自主创建网络的第一步。

    使用Pytorch实现神经网络模型的一般流程包括:1,准备数据 2,定义模型 3,训练模型 4,评估模型 5,使用模型 6,保存模型。

    1- 数据加载
    在Pytorch中构建图片数据管道通常有三种方法。
    第一种是使用 torchvision中的datasets.ImageFolder来读取图片然后用 DataLoader来并行加载。
    第二种是通过继承 torch.utils.data.Dataset 实现用户自定义读取逻辑然后用 DataLoader来并行加载。
    第三种方法是读取用户自定义数据集的通用方法,既可以读取图片数据集,也可以读取文本数据集。
     
    2- 定义模型
    使用Pytorch通常有三种方式构建模型:使用nn.Sequential按层顺序构建模型,继承nn.Module基类构建自定义模型,继承nn.Module基类构建模型并辅助应用模型容器(nn.Sequential,nn.ModuleList,nn.ModuleDict)进行封装。
     
    3- 训练模型
    Pytorch通常需要用户编写自定义训练循环,大致分为脚本形式训练循环,函数形式训练循环,类形式训练循环。
     
    以下为详细示例代码
     
    #1 准备数据
    
    import torch
    from torch import nn
    from torch.utils.data import Dataset, Dataloader
    from torchvision import transforms, datasets
    
    #转换
    transforms_train = transforms.Compose([transforms.ToTensor(),
                                           transforms.Scale(40),
                                           transforms.RandomHorizontalFlip(),
                                           transforms.RandomCrop(32),])
    transforms_val   = transforms.Compose([transforms.ToTensor()])
    
    
    
    #加载
    ds_train = datasets.ImageFolder("./data/cifar2/train/",
                transform = transform_train, target_transform= lambda t:torch.tensor([t]).float())
    ds_valid = datasets.ImageFolder("./data/cifar2/test/",
                transform = transform_valid,target_transform= lambda t:torch.tensor([t]).float())
    
    print(ds_train.class_to_idx)
    
    dl_train = Dataloader(ds_train, batch_size=50, shuffle=True, num_workers=3)
    dl_valid = DataLoader(ds_valid,batch_size = 50,shuffle = True,num_workers=3)
    
    
    #显示
    %matplotlib inline
    %config InlineBackend.figure_format = 'svg'
    
    #查看部分样本
    from matplotlib import pyplot as plt 
    
    plt.figure(figsize=(8,8)) 
    for i in range(9):
        img,label = ds_train[i]
        img = img.permute(1,2,0)
        ax=plt.subplot(3,3,i+1)
        ax.imshow(img.numpy())
        ax.set_title("label = %d"%label.item())
        ax.set_xticks([])
        ax.set_yticks([]) 
    plt.show()
    
    
    # Pytorch的图片默认顺序是 Batch,Channel,Width,Height
    for x,y in dl_train:
        print(x.shape,y.shape) 
        break
    
    #2- 定义模型
    #此处为继承nn.Module基类
    class Net(nn.Module):
    
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(in_channel=3, out_channels=32, kernel_size=3)
            self.pool = nn.Maxpool2d(kernel_size=2, stride=2)
            self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size = 5)
            self.dropout = nn.Dropout2d(p = 0.1)
            self.adaptive_pool = nn.AdaptiveMaxPool2d((1,1))
            self.flatten = nn.Flatten()
            self.linear1 = nn.Linear(64,32)
            self.relu = nn.ReLU()
            self.linear2 = nn.Linear(32,1)
            self.sigmoid = nn.Sigmoid()
    
        def forward(self, x):
            x = self.conv1(x)
            x = self.pool(x)
            x = self.conv2(x)
            x = self.pool(x)
            x = self.dropout(x)
            x = self.adaptive_pool(x)
            x = self.flatten(x)
            x = self.linear1(x)
            x = self.relu(x)
            x = self.linear2(x)
            y = self.sigmoid(x)
            return y
    
    net = Net()
    print(net)
    
    import torchkeras
    torchkeras.summary(net, input_shape=(3, 32, 32))
    
    #3 训练模型
    #此处为函数形式训练循环
    import pandas as pd 
    from sklearn.metrics import roc_auc_score
    
    model = net
    model.optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    model.loss_func = torch.nn.BCELoss()
    model.metric_func = lambda y_pred, y_true: roc_auc_score(y_true.data.numpy(),y_pred.data.numpy())
    model.metric_name = "auc"
    
    
    def train_step(model, features, label):
        #训练模式,dropout层发生作用
        model.train()
    
        #梯度清零
        model.optimizer.zero_grad()
    
        #正向传播求损失
        predictions = model(features)
        loss = model.loss_func(predictions, labels)
        metric = model.metric_func(predictions, labels)
    
        #反向传播求梯度
        loss.backward()
        model.optimizer.step()
    
        return loss.item(), metric.item()
    
    def valid_step(model,features,labels):
        # 预测模式,dropout层不发生作用
        model.eval()
        # 关闭梯度计算
        with torch.no_grad():
            predictions = model(features)
            loss = model.loss_func(predictions, labels)
            metric = model.metric_func(parameters, labels)
    
        return loss.item(), metric.item()
    
    # 测试train_step效果
    features,labels = next(iter(dl_train))
    train_step(model,features,labels)
    
    
    def train_model(model, epochs, dl_train, dl_valid, log_step_freq):
        metric_name = model.metric_name
        dfhistory = pd.DataFrame(columns = ["epoch","loss",metric_name,"val_loss","val_"+metric_name]) 
        print("Start Training...")
        nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        print("=========="*8 + "%s"%nowtime)
    
    
        for epoch in range(1, epochs+1):
            # 1,训练循环-------------------------------------------------
            loss_sum = 0.0
            metric_sum = 0.0
            step = 1
    
            for step, (features, labels) in enumerate(dl_train, 1):
                loss, metric = train_step(model, features, labels)
    
                # 打印batch级别日志
                loss_sum += loss
                metric_sum += metric
                if step%log_step_freq == 0:   
                    print(("[step = %d] loss: %.3f, "+metric_name+": %.3f") %
                          (step, loss_sum/step, metric_sum/step))
    
            # 2,验证循环-------------------------------------------------
            val_loss_sum = 0.0
            val_metric_sum = 0.0
            val_step = 1
    
            for val_step, (features,labels) in enumerate(dl_valid, 1):
    
                val_loss,val_metric = valid_step(model,features,labels)
    
                val_loss_sum += val_loss
                val_metric_sum += val_metric
    
            # 3,记录日志-------------------------------------------------
            info = (epoch, loss_sum/step, metric_sum/step, 
                    val_loss_sum/val_step, val_metric_sum/val_step)
            dfhistory.loc[epoch-1] = info
    
            # 打印epoch级别日志
            print(("
    EPOCH = %d, loss = %.3f,"+ metric_name + 
                  "  = %.3f, val_loss = %.3f, "+"val_"+ metric_name+" = %.3f") 
                  %info)
            nowtime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("
    "+"=========="*8 + "%s"%nowtime)
    
        print('Finished Training...')
        
        return dfhistory
    
    
    epochs = 20
    dfhistory = train_model(model, epochs, dl_train, dl_valid, log_step_freq=50)
    
    
    #4 评估模型
    #打印history
    print(dfhistory)
    
    #查看loss曲线
    %matplotlib inline
    %config InlineBackend.figure_format = 'svg'
    
    import matplotlib.pyplot as plt
    
    def plot_metric(dfhistory, metric):
        train_metrics = dfhistory[metric]
        val_metrics = dfhistory['val_'+metric]
        epochs = range(1, len(train_metrics) + 1)
        plt.plot(epochs, train_metrics, 'bo--')
        plt.plot(epochs, val_metrics, 'ro-')
        plt.title('Training and validation '+ metric)
        plt.xlabel("Epochs")
        plt.ylabel(metric)
        plt.legend(["train_"+metric, 'val_'+metric])
        plt.show()
    
    plot_metric(dfhistory,"loss")
    plot_metric(dfhistory,"auc")
    
    #5 使用模型
    def predict(model, dl):
        model.eval()
        with torch.no_grad():
            result = torch.cat([model.forward(t[0]) for t in dl])
        return(result.data)
    
    
    y_pred_probs = predict(model, dl_valid)
    print(y_pred_probs)
    
    y_pred = torch.where(y_pred_probs>0.5, 
                torch.ones_like(y_pred_probs), torch.zeros_like(y_pred_probs))
    print(y_pred)
    
    
    #6 保存模型
    torch.save(model.state_dict(), "./data/model_parameter.pkl")
    
    net_clone = Net()
    net_clone.load_state_dict(torch.load("./data/model_parameter.pkl"))
    
    predict(net_clone,dl_valid)

  • 相关阅读:
    windows 创建python独立开发环境
    sql多列排序
    mysql 导入sql脚本中文乱码问题
    廖雪峰Python教学课后作业---datetime
    poj 1004:Financial Management(水题,求平均数)
    【POJ水题完成表】
    poj 1003:Hangover(水题,数学模拟)
    ytu 2558: 游起来吧!超妹!(水题,趣味数学题)
    poj 1005:I Think I Need a Houseboat(水题,模拟)
    hdu 2393:Higher Math(计算几何,水题)
  • 原文地址:https://www.cnblogs.com/geo-will/p/13549484.html
Copyright © 2020-2023  润新知