1 导入实验所需要的包
import torch import torch.nn as nn import numpy as np import torchvision import torchvision.transforms as transforms import matplotlib.pyplot as plt from torch.utils.data import DataLoader,TensorDataset
2 下载MNIST数据集和读取数据
train_dataset = torchvision.datasets.MNIST(root="../Datasets/MNIST", train=True, transform=transforms.ToTensor(), download=True) test_dataset = torchvision.datasets.MNIST(root="../Datasets/MNIST", train=False, transform=transforms.ToTensor(), download=True) train_x = train_dataset.data.cuda().float() / 255 train_y = train_dataset.targets.cuda().long() test_x = test_dataset.data.cuda().float() / 255 test_y = test_dataset.targets.cuda().long() train_dataset = TensorDataset(train_x,train_y) test_dataset = TensorDataset(test_x,test_y) batch_size=32 train_iter = DataLoader(train_dataset, batch_size=32, shuffle=True) test_iter = DataLoader(test_dataset, batch_size=32, shuffle=False) next(iter(train_iter))[0].shape next(iter(test_iter))[0].shape
3 定义模型参数
#训练次数和学习率 num_epochs ,lr = 50, 0.01 num_inputs, num_outputs = 28*28, 10
4 定义模型
第一种:定义一个有 三层 的前馈神经网络
class LinearNet_1(nn.Module): def __init__(self,num_inputs=784, num_outputs=10, num_hiddens=100): super(LinearNet_1,self).__init__() self.linear1 = nn.Linear(num_inputs,num_hiddens) self.relu = nn.ReLU() self.linear2 = nn.Linear(num_hiddens,num_outputs) def forward(self,x): x = self.linear1(x) x = self.relu(x) x = self.linear2(x) y = self.relu(x) return y
第二种:定义一个有 四层 的前馈神经网络
class LinearNet_2(nn.Module): def __init__(self,num_inputs=784, num_outputs=10, num_hiddens1=100, num_hiddens2=100): super(LinearNet_2,self).__init__() self.linear1 = nn.Linear(num_inputs,num_hiddens1) self.relu = nn.ReLU() self.linear2 = nn.Linear(num_hiddens1,num_hiddens2) self.linear3 = nn.Linear(num_hiddens2,num_outputs) def forward(self,x): x = self.linear1(x) x = self.relu(x) x = self.linear2(x) x = self.relu(x) x = self.linear3(x) y = self.relu(x) return y
第三种:定义一个有 五层 的前馈神经网络
class LinearNet_3(nn.Module): def __init__(self,num_inputs=784, num_outputs=10, num_hiddens1=100, num_hiddens2=100, num_hiddens3=100): super(LinearNet_3,self).__init__() self.linear1 = nn.Linear(num_inputs,num_hiddens1) self.relu = nn.ReLU() self.linear2 = nn.Linear(num_hiddens1,num_hiddens2) self.linear3 = nn.Linear(num_hiddens2,num_hiddens3) self.linear4 = nn.Linear(num_hiddens3,num_outputs) def forward(self,x): x = self.linear1(x) x = self.relu(x) x = self.linear2(x) x = self.relu(x) x = self.linear3(x) x = self.relu(x) x = self.linear4(x) y = self.relu(x) return y
5 定义训练模型
def train(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,optimizer=None): train_ls, test_ls = [], [] for epoch in range(num_epochs): ls, count = 0, 0 for X,y in train_iter: X = X.reshape(-1,num_inputs) #[32, 28, 28] -> [32, 784] l=loss(net(X),y) optimizer.zero_grad() l.backward() optimizer.step() ls += l.item()*y.shape[0] train_ls.append(ls) ls, count = 0, 0 for X,y in test_iter: X = X.reshape(-1,num_inputs) l=loss(net(X),y) ls += l.item()*y.shape[0] test_ls.append(ls) if(epoch+1)%5==0: print('epoch: %d, train loss: %f, test loss: %f'%(epoch+1,train_ls[-1],test_ls[-1])) return train_ls,test_ls
6 模型训练
total_net = [LinearNet_1,LinearNet_2,LinearNet_3] Train_loss, Test_loss = [], [] #定义损失函数 loss = nn.CrossEntropyLoss() for cur_net in total_net: net = cur_net() for param in net.parameters(): nn.init.normal_(param,mean=0, std= 0.01) optimizer = torch.optim.SGD(net.parameters(),lr = 0.001) train_ls, test_ls = train(net,train_iter,test_iter,loss,num_epochs,batch_size,net.cuda().parameters,lr,optimizer) Train_loss.append(train_ls) Test_loss.append(test_ls)
7 绘制不同隐藏层数损失图
x = np.linspace(0,len(train_ls),len(train_ls)) plt.figure(figsize=(10,8)) for i in range(0,3): plt.plot(x,Train_loss[i],label= f'with {i+1} hiddens layers:',linewidth=1.5) plt.xlabel('epoch') plt.ylabel('loss') plt.legend() plt.title('train loss') plt.show()