• PyTorch学习笔记7--案例3:基于CNN的CIFAR10数据集的图像分类


    基于CNN的CIFAR10图像分类

    完整代码如下:

    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim
    from torch.utils.data.sampler import SubsetRandomSampler
    import torchvision
    import torchvision.transforms as transforms
    import numpy as np
    import time
    from matplotlib import pyplot as plt
    
    # ===========================================================================================
    # 准备数据
    # Compose的意思是将多个transform组合在一起用,ToTensor 将像素转化为[0,1]的数字,Normalize则正则化变为 [-1,1]
    tf = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    # 下载数据集,训练集:需要训练;测试集:不需要训练
    train_set = torchvision.datasets.CIFAR10(root='./cifar10', train=True, download=True, transform=tf)
    test_set = torchvision.datasets.CIFAR10(root='./cifar10', train=False, download=True, transform=tf)
    # 指定十个类别的标签,有的数据集很大的回加载相应的标签文件(groundtruth)
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'truck', 'ship')
    
    # Training 共50000,取前20000作为训练集
    n_training_sample = 50000
    train_sample = SubsetRandomSampler(np.arange(n_training_sample, dtype=np.int64))
    # Validation 取训练集中的[20000,20000+5000]作为验证集
    # n_validation_sample = 5000
    # validation_sample = SubsetRandomSampler(np.arange(n_training_sample, n_training_sample + n_validation_sample,dtype=np.int64))
    # Testing 共10000,取前5000作为测试集
    n_test_sample = 10000
    test_sample = SubsetRandomSampler(np.arange(n_test_sample, dtype=np.int64))
    
    
    # 开启shuffle就等于全集使用SubsetRandomSampler,都是随机采样,num_workers代表多线程加载数据,Windows上不能用(必须0),Linux可用
    train_batch_size = 100
    test_batch_size = 4
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=train_batch_size, sampler=train_sample, num_workers=0)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size, sampler=test_sample, num_workers=0)
    #val_loader = torch.utils.data.DataLoader(train_set, batch_size=500, sampler=validation_sample, num_workers=0)
    
    # ================================================================================================
    # 2 建立模型 
    #  MNIST案例的网络是卷积+全连接层的形式,这种结构的网络效果其实不好:
    # 因为全连接层传递效率较低,同时会干扰到卷积层提取出的局部特征。
    # 并且也没有用到BatchNorm和Dropout来防止过拟合的问题。
    # 现在流行的网络结构大多采用全卷积层的结构:
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv2d(3, 64, 3, padding = 1)
            self.conv2 = nn.Conv2d(64, 64, 3, padding = 1)
            self.conv3 = nn.Conv2d(64, 128, 3, padding = 1)
            self.conv4 = nn.Conv2d(128, 128, 3, padding = 1)
            self.conv5 = nn.Conv2d(128, 256, 3, padding = 1)
            self.conv6 = nn.Conv2d(256, 256, 3, padding = 1)
            self.maxpool = nn.MaxPool2d(2, 2)
            self.avgpool = nn.AvgPool2d(2, 2)
            self.globalavgpool = nn.AvgPool2d(8, 8)
            self.bn1 = nn.BatchNorm2d(64)
            self.bn2 = nn.BatchNorm2d(128)
            self.bn3 = nn.BatchNorm2d(256)
            self.dropout50 = nn.Dropout(0.5)
            self.dropout10 = nn.Dropout(0.1)
            self.fc = nn.Linear(256, 10)
     
        def forward(self, x):
            x = self.bn1(F.relu(self.conv1(x)))
            x = self.bn1(F.relu(self.conv2(x)))
            x = self.maxpool(x)
            x = self.dropout10(x)
            x = self.bn2(F.relu(self.conv3(x)))
            x = self.bn2(F.relu(self.conv4(x)))
            x = self.avgpool(x)
            x = self.dropout10(x)
            x = self.bn3(F.relu(self.conv5(x)))
            x = self.bn3(F.relu(self.conv6(x)))
            x = self.globalavgpool(x)
            x = self.dropout50(x)
            x = x.view(x.size(0), -1)
            x = self.fc(x)
            return x
    cnn = CNN()
    # 如有GPU则自动使用GPU计算
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
    cnn.to(device)
    # ===================================================================================
    # 损失函数
    loss_func = torch.nn.CrossEntropyLoss()  # 交叉熵损失函数
    # 优化器
    optimizer = optim.Adam(cnn.parameters(), lr=0.001)     # Adam 优化算法是随机梯度下降算法的扩展式
     
    # ==========================================================================================
    def trainNet(epoch):
        print('Epoch {}'.format(epoch))
        # 加载数据集上边的方法解释了获取训练数据
        training_start_time = time.time()   # 开始时间,为了后边统计一个训练花费时间
        #循环训练 n_epochs是5,也就是重复扫 五遍样本数据,CIFAR10数据集将50000条训练数据分为了五个batch,所以这个地方不要有疑惑
        start_time = time.time()
        train_loss = 0
        for step,(x_batch, y_batch) in enumerate(train_loader):
            x_batch = x_batch.cuda()
            y_batch = y_batch.cuda()
            # forward:前向传播
            outputs = cnn(x_batch)  #
            loss = loss_func(outputs, y_batch)
            train_loss += loss.item()
            # 在一个epoch里。每十组batchsize大小的数据输出一次结果,即以batch_size大小的数据为一组,到第10组,20组,30组...的时候输出
            if step % (len(train_loader)/100) == 0:
                print("epoch{}, {:d}% 	 loss:{:.6f} took:{:.2f}s".format(epoch, int(100 * (step) / len(train_loader)),loss.item(), time.time()-start_time))
                start_time = time.time()
            #backward:后向传播
            optimizer.zero_grad()  # 将所有的梯度置零,原因是防止每次backward的时候梯度会累加
            loss.backward()  # 根据反向传播更新所有的参数
            optimizer.step()
        print("Training loss={}, took {:.2f}s".format(train_loss/(len(train_loader)),time.time() - training_start_time))  # 所有的Epoch结束,也就是训练结束,计算花费的时间
    
    #使用以下方法保存和恢复网络参数
    #torch.save(cnn, 'cifar10.pkl')
    #cnn = torch.load('cifar10.pkl')
    
    def test():
        correct = 0
        test_loss = 0
        cnn.eval()
        with torch.no_grad():
            for data in test_loader:
                # Forward pass
                x_batch,y_batch = data
                x_batch = x_batch.cuda()
                y_batch = y_batch.cuda()
                out = cnn(x_batch)
                loss = loss_func(out, y_batch)
                predicted = torch.max(out, 1)[1]
                correct += (predicted == y_batch).sum().item()
                test_loss += loss.item()
            print("test loss = {:.2f}, Accuracy={:.6f}".format(test_loss / len(test_loader),correct/len(test_loader)/test_batch_size))  # 求验证集的平均损失是多少
        
    
    
    # 执行整个训练过程
    for epoch in range(1,11):
        trainNet(epoch)
        test()
    
    # 统计每类的分类准确率
    cnn.eval()
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in test_loader:
            x_batch, y_batch = data
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            out = cnn(x_batch)
            predicted = torch.max(out, 1)[1]
            c = (predicted == y_batch).squeeze()
            # 
            for i in range(test_batch_size):
                label = y_batch[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1
        for i in range(10):
            print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))
    

    cifar10教程补充内容

    更优选的网络,类似VGG

    这个网络比前面那个准确率更高一些.

    class Net(nn.Module):
        def __init__(self):
            super(Net,self).__init__()
            self.conv1 = nn.Conv2d(3,64,3,padding=1)
            self.conv2 = nn.Conv2d(64,64,3,padding=1)
            self.pool1 = nn.MaxPool2d(2, 2)
            self.bn1 = nn.BatchNorm2d(64)
            self.relu1 = nn.ReLU()
    
            self.conv3 = nn.Conv2d(64,128,3,padding=1)
            self.conv4 = nn.Conv2d(128, 128, 3,padding=1)
            self.pool2 = nn.MaxPool2d(2, 2, padding=1)
            self.bn2 = nn.BatchNorm2d(128)
            self.relu2 = nn.ReLU()
    
            self.conv5 = nn.Conv2d(128,128, 3,padding=1)
            self.conv6 = nn.Conv2d(128, 128, 3,padding=1)
            self.conv7 = nn.Conv2d(128, 128, 1,padding=1)
            self.pool3 = nn.MaxPool2d(2, 2, padding=1)
            self.bn3 = nn.BatchNorm2d(128)
            self.relu3 = nn.ReLU()
    
            self.conv8 = nn.Conv2d(128, 256, 3,padding=1)
            self.conv9 = nn.Conv2d(256, 256, 3, padding=1)
            self.conv10 = nn.Conv2d(256, 256, 1, padding=1)
            self.pool4 = nn.MaxPool2d(2, 2, padding=1)
            self.bn4 = nn.BatchNorm2d(256)
            self.relu4 = nn.ReLU()
    
            self.conv11 = nn.Conv2d(256, 512, 3, padding=1)
            self.conv12 = nn.Conv2d(512, 512, 3, padding=1)
            self.conv13 = nn.Conv2d(512, 512, 1, padding=1)
            self.pool5 = nn.MaxPool2d(2, 2, padding=1)
            self.bn5 = nn.BatchNorm2d(512)
            self.relu5 = nn.ReLU()
    
            self.fc14 = nn.Linear(512*4*4,1024)
            self.drop1 = nn.Dropout2d()
            self.fc15 = nn.Linear(1024,1024)
            self.drop2 = nn.Dropout2d()
            self.fc16 = nn.Linear(1024,10)
    
    
        def forward(self,x):
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.pool1(x)
            x = self.bn1(x)
            x = self.relu1(x)
    
    
            x = self.conv3(x)
            x = self.conv4(x)
            x = self.pool2(x)
            x = self.bn2(x)
            x = self.relu2(x)
    
            x = self.conv5(x)
            x = self.conv6(x)
            x = self.conv7(x)
            x = self.pool3(x)
            x = self.bn3(x)
            x = self.relu3(x)
    
            x = self.conv8(x)
            x = self.conv9(x)
            x = self.conv10(x)
            x = self.pool4(x)
            x = self.bn4(x)
            x = self.relu4(x)
    
            x = self.conv11(x)
            x = self.conv12(x)
            x = self.conv13(x)
            x = self.pool5(x)
            x = self.bn5(x)
            x = self.relu5(x)
            # print(" x shape ",x.size())
            x = x.view(-1,512*4*4)
            x = F.relu(self.fc14(x))
            x = self.drop1(x)
            x = F.relu(self.fc15(x))
            x = self.drop2(x)
            x = self.fc16(x)
    
            return x
    

    显示图片及标签

    显示一些训练集中的照片:

    import matplotlib.pyplot as plt
    import numpy as np
    
    def imshow(img):
        img = img / 2 + 0.5     # unnormalize
        npimg = img.numpy()
        plt.imshow(np.transpose(npimg, (1, 2, 0)))
        plt.show()
    dataiter = iter(trainloader)
    images, labels = dataiter.next()
    imshow(torchvision.utils.make_grid(images))
    print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
    

    显示预测结果和实际结果:

    dataiter = iter(testloader)
    images, labels = dataiter.next()
    imshow(torchvision.utils.make_grid(images))
    print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
    outputs = net(images)
    predicted = torch.max(outputs, 1)[1]
    print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))
    
  • 相关阅读:
    MvcScaffolding: OnetoMany Relationships
    未能从程序集“System.ServiceModel, Version=3.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089”中加载
    MVC中使用Scaffold生成代码
    项目开发的三个阶段 我的一点体会
    在Eclipse工作空间中创建新项目的方法
    Visual Studio 使用常见问题
    Matlab代码:为图像添加信噪比为SNR db的高斯噪声
    《Ogre 3D 游戏开发框架指南》配套光盘的一个小瑕疵
    如何阅读C++源代码
    在Eclipse工作空间中移除再添加项目的方法
  • 原文地址:https://www.cnblogs.com/charleechan/p/12320557.html
Copyright © 2020-2023  润新知