版本:Pytorch 1.0 代码是在jupter中执行的。
导包:
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms
设置超参:
BATCH_SIZE = 512 # 大概需要2G的显存 EPOCHS = 20 # 总共训练批次 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
加载数据:
# 下载训练集 train_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train = True, download = True, transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081,)) ])), batch_size = BATCH_SIZE, shuffle = True) # 测试集 test_loader = torch.utils.data.DataLoader( datasets.MNIST('data', train = False, transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081,)) ])), batch_size = BATCH_SIZE, shuffle = True)
构建网络:方式一
# 定义模型 class ConvNet(nn.Module): def __init__(self): super().__init__() #1*1*28*28 self.conv1 = nn.Conv2d(1, 10, 5) self.conv2 = nn.Conv2d(10, 20, 3) self.fc1 = nn.Linear(20 * 10 * 10, 500) self.fc2 = nn.Linear(500, 10) def forward(self, x): in_size = x.size(0) out= self.conv1(x) # 1* 10 * 24 *24 out = F.relu(out) out = F.max_pool2d(out, 2, 2) # 1* 10 * 12 * 12 out = self.conv2(out) # 1* 20 * 10 * 10 out = F.relu(out) out = out.view(in_size, -1) # 1 * 2000 out = self.fc1(out) # 1 * 500 out = F.relu(out) out = self.fc2(out) # 1 * 10 out = F.log_softmax(out, dim = 1) return out
构建网络:方式二——把更多的内容放在了Sequential里面,觉得网络会显得清楚一些
class MyNet(torch.nn.Module): def __init__(self): super(MyNet, self).__init__() self.conv1 = nn.Sequential( # (1,28,28) nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2), # (16,28,28) # 想要con2d卷积出来的图片尺寸没有变化, padding=(kernel_size-1)/2 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # (16,14,14) ) self.conv2 = nn.Sequential( # (16,14,14) nn.Conv2d(16, 32, 5, 1, 2), # (32,14,14) nn.ReLU(), nn.MaxPool2d(2) # (32,7,7) ) self.out = nn.Linear(32*7*7, 10) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = x.view(x.size(0), -1) # 将(batch,32,7,7)展平为(batch,32*7*7) output = self.out(x) return output
定义优化器:
#生成模型和优化器 model = MyNet().to(DEVICE) # MyNet可以改为ConvNet 调用不同模型 optimizer = optim.Adam(model.parameters())
定义训练和测试函数:
# 定义训练函数 def train(model, device, train_loader, optimizer, epoch): model.train() # 设置为trainning模式 for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() # 优化器梯度初始化为零 output = model(data) # 把数据输入网络并得到输出,即进行前向传播 loss = F.cross_entropy(output, target) # 定义损失函数 loss.backward() # 反向传播梯度 optimizer.step() # 结束一次前传+反传之后,更新参数 if (batch_idx + 1) % 30 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) # 定义测试函数 def test(model, device, test_loader): model.eval() # 设置为test模式 test_loss =0 # 初始化测试损失值为0 correct = 0 # 初始化预测正确的数据个数为0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) #计算前要把变量变成Variable形式,因为这样子才有梯度 output = model(data) test_loss += F.nll_loss(output, target, reduction = 'sum') # 将一批的损失相加 pred = output.max(1, keepdim = True)[1] # 找到概率最大的下标 correct += pred.eq(target.view_as(pred)).sum().item() # 对预测正确的数据个数进行累加 test_loss /= len(test_loader.dataset) print(" Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%) ".format( test_loss, correct, len(test_loader.dataset), 100.* correct / len(test_loader.dataset) ))
main函数
# 最后开始训练和测试 for epoch in range(1, EPOCHS + 1): train(model, DEVICE, train_loader, optimizer, epoch) test(model, DEVICE, test_loader)