1 导入所需要的包
import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt
from IPython import display
from torch.utils.data import TensorDataset ,DataLoader
from sklearn.model_selection import train_test_split
2 自定义数据
要求:
1、生成单个数据集。
2、数据集的大小为10000且训练集大小为7000,测试集大小为3000。
3、数据集的样本特征维度p为500,且服从如下的高维线性函数:$y=0.028+\sum \limits _{i=1}^{p} 0.0056 x_{i}+\epsilon$
num_input,num_example = 500,10000
true_w = torch.tensor(np.ones((num_input,1)),dtype = torch.float32)*0.0056
# true_w = torch.ones(500,1)*0.0056
true_b = 0.028
x_data = torch.tensor(np.random.randn(num_example,num_input),dtype = torch.float32)
y_data = torch.mm(x_data ,true_w)+true_b
y_data += torch.normal(mean = 0,std = 0.001,size=y_data.size())
# y_dat += torch.tensor(np.random.normal(0, 0.01, size=y_data.size()), dtype=torch.float)
train_x,test_x,train_y,test_y = train_test_split(x_data,y_data,shuffle=True,test_size=0.3)
print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)
3 读取数据
batch_size = 50
train_datasets = TensorDataset(train_x,train_y)
train_iter = DataLoader(
dataset = train_datasets,
batch_size = batch_size,
shuffle = True,
num_workers = 0
)
test_datasets = TensorDataset(test_x,test_y)
test_iter = DataLoader(
dataset = test_datasets,
batch_size = batch_size,
shuffle = True,
num_workers = 0
)
4 初始化参数
num_hiddens , num_output = 256,1
w1 = torch.normal(mean = 0,std = 0.001,size = (num_hiddens,num_input), dtype=torch.float32)
b1 = torch.ones(1,dtype = torch.float32)
w2 = torch.normal(mean = 0,std = 0.001,size = (num_output,num_hiddens), dtype=torch.float32)
b2 = torch.ones(1,dtype = torch.float32)
params = [w1,w2,b1,b2]
for param in params:
param.requires_grad_(requires_grad=True)
5 定义隐藏层的激活函数
def ReLU(X):
return torch.max(X,other = torch.tensor(0.0))
6 定义模型
def DNN(x):
H = ReLU(torch.matmul(x,w1.t())+b1)
H = ReLU(torch.matmul(H,w2.t())+b2)
return H
7 定义最小化均方误差以及随机梯度下降法
loss = torch.nn.MSELoss()
def SGD(paras,lr,batch_size):
for param in params:
param.data -= lr*param.grad/batch_size
8 定义训练函数
def train(model,train_iter,loss,num_epochs,batch_size,lr,optimizer =None):
train_ls ,test_ls = [],[]
for epoch in range(num_epochs):
train_ls_sum,train_acc_num,n = 0,0,0
for x ,y in train_iter:
y_pred = model(x)
l = loss(y_pred,y)
if params is not None and params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
SGD(params,lr,batch_size)
train_ls.append(loss(model(train_x),train_y).item())
test_ls.append(loss(model(test_x),test_y).item())
print('epoch %d, train_loss %.6f,test_loss %f'%(epoch+1, train_ls[epoch],test_ls[epoch]))
return train_ls,test_ls
9 训练
lr = 0.001
num_epochs = 500
train_loss ,test_lss =train(DNN,train_iter,loss,num_epochs,batch_size,lr)
10 可视化
x = np.linspace(0,len(train_loss),len(train_loss))
plt.plot(x,train_loss,label="train_loss",linewidth=1.5)
plt.plot(x,test_loss,label="test_loss",linewidth=1.5)
plt.xlabel("epoch")
plt.ylabel("loss")
plt.legend()
plt.show()