1 导入实验所需要的包
import torch
import numpy as np
import random
from IPython import display
from matplotlib import pyplot as plt
from torch import nn
import torch.utils.data as Data
import torch.optim as optim
from torch.nn import init
import torchvision
import torchvision.transforms as transforms
2 下载MNIST数据集
#下载MNIST手写数据集
mnist_train = torchvision.datasets.MNIST(root='../Datasets/MNIST', train=True,download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.MNIST(root='../Datasets/MNIST', train=False, download=True, transform=transforms.ToTensor())
3 读取数据
#读取数据
batch_size = 32
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False,num_workers=0)
4 利用torch.nn构建模型以及初始化参数
- 使用ReLU函数
class FlattenLayer(torch.nn.Module):
def __init__(self):
super(FlattenLayer, self).__init__()
def forward(self, x):
return x.view(x.shape[0],-1)
#模型定义和参数初始化
num_inputs,num_hiddens,num_outputs = 784,256,10
def use_ReLU():
net = nn.Sequential(
FlattenLayer(),
nn.Linear(num_inputs,num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens,num_outputs)
)
return net
- 使用ELU函数
def use_ELU():
net = nn.Sequential(
FlattenLayer(),
nn.Linear(num_inputs,num_hiddens),
nn.ELU(),
nn.Linear(num_hiddens,num_outputs)
)
return net
- 使用Sigmoid函数
def use_Sigmoid():
net = nn.Sequential(
FlattenLayer(),
nn.Linear(num_inputs,num_hiddens),
nn.Sigmoid(),
nn.Linear(num_hiddens,num_outputs)
)
return net
初始化参数
def init_params(net):
for params in net.parameters():
init.normal_(params,mean=0,std=0.01)
return torch.optim.SGD(net.parameters(),lr)
5 定义交叉熵损失函数
#训练次数和学习率
num_epochs = 50
lr = 0.01
#定义交叉熵损失函数
loss_fn = torch.nn.CrossEntropyLoss()
6 定义测试集 loss 和 准确率
def evaluate_testset(data_iter,net):
acc_sum,loss_sum,n = 0.0,0.0,0
for X,y in data_iter:
y_hat = net(X)
acc_sum += (y_hat.argmax(dim=1)==y).sum().item()
l = loss_fn(y_hat,y) # l是有关小批量X和y的损失
loss_sum += l.item()*y.shape[0]
n+=y.shape[0]
return acc_sum/n,loss_sum/n
7 定义模型训练函数
#定义模型训练函数
def train(model,train_loader,test_loader,loss_fn,num_epochs,batch_size,params=None,lr=None,optimizer=None):
train_ls = []
test_ls = []
for epoch in range(num_epochs): # 训练模型一共需要num_epochs个迭代周期
train_loss_sum, train_acc_num,total_examples = 0.0,0.0,0
for x, y in train_loader: # x和y分别是小批量样本的特征和标签
y_pred = model(x)
loss = loss_fn(y_pred, y) #计算损失
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播
optimizer.step() #梯度更新
total_examples += y.shape[0]
train_loss_sum += loss.item()
train_acc_num += (y_pred.argmax(dim=1)==y).sum().item()
train_ls.append(train_loss_sum)
test_acc,test_loss = evaluate_testset(test_loader,model)
test_ls.append(test_loss)
print('epoch %d, train_loss %.6f,test_loss %f,train_acc %.6f,test_acc %.6f'%(epoch+1, train_ls[epoch],test_ls[epoch],train_acc_num/total_examples,test_acc))
return
8 绘制训练集和测试集的loss曲线方法
def show_plots(mytrain_loss,mytest_loss):
x = np.linspace(0,len(mytrain_loss),len(mytest_loss))
plt.plot(x,train_loss,label="train_loss",linewidth=1.5)
plt.plot(x,test_loss,label="test_loss",linewidth=1.5)
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.show()
9 开始训练模型
使用 ReLU 激活函数的损失分析以及训练集和测试集loss曲线
net = use_ReLU()
optimizer = init_params(net)
train_loss,test_loss = train(net,train_iter,test_iter,loss,num_epochs,batch_size,net.parameters,lr,optimizer)
show_plots(train_loss,test_loss )
使用 ELU 激活函数的损失分析以及训练集和测试集loss曲线
net = use_ELU()
optimizer = init_params(net)
train_loss,test_loss = train(net,train_iter,test_iter,loss_fn,num_epochs,batch_size,net.parameters,lr,optimizer)
show_plots(train_loss,test_loss )
使用 Sigmoid 激活函数的损失分析以及训练集和测试集loss曲线
net = use_Sigmoid()
optimizer = init_params(net)
train_loss,test_loss = train(net,train_iter,test_iter,loss_fn,num_epochs,batch_size,net.parameters,lr,optimizer)
show_plots(train_loss,test_loss )