• 深度学习的优化器选择(SGD、Momentum、RMSprop、Adam四种)


    
    
    import torch
    import torch.utils.data as Data
    import torch.nn.functional as F
    import matplotlib.pyplot as plt
    import torch.nn as nn
    
    LR=0.01
    BATCH_SIZE=32
    EPOCH=5
    
    x=torch.unsqueeze(torch.linspace(-1,1,1000),dim=1)#将一维数据转换为二维数据
    y=x.pow(2)+0.1*torch.normal(torch.zeros(*x.size()))
    
    torch_dataset=Data.TensorDataset(x,y)
    loader=Data.DataLoader(dataset=torch_dataset,batch_size=BATCH_SIZE,shuffle=True)
    #构建网络
    class Net(nn.Module):
        def __init__(self):
            super(Net,self).__init__()
            self.hidden=nn.Linear(1,20)#Sequential是将网络的层组合到一起
            self.predict = nn.Linear(20,1)
        def forward(self,x):
            x=F.relu(self.hidden(x))#将ReLU层添加到网络
            x = F.relu(self.predict(x))
            return x
    
    net_SGD=Net()
    net_Momentum=Net()
    net_RMSProp=Net()
    net_Adam=Net()
    nets=[net_SGD,net_Momentum,net_RMSProp,net_Adam]
    opt_SGD=torch.optim.SGD(net_SGD.parameters(),lr=LR)
    opt_Momentum=torch.optim.SGD(net_Momentum.parameters(),lr=LR,momentum=0.9)
    opt_RMSProp=torch.optim.RMSprop(net_RMSProp.parameters(),lr=LR,alpha=0.9)
    opt_Adam=torch.optim.Adam(net_Adam.parameters(),lr=LR,betas=(0.9,0.99))
    optimizers=[opt_SGD,opt_Momentum,opt_RMSProp,opt_Adam]
    
    loss_func=torch.nn.MSELoss()
    loss_his=[[],[],[],[]]
    for epoch in range(EPOCH):
        for step,(batch_x,batch_y) in enumerate(loader):
            for net,opt,l_his in zip(nets,optimizers,loss_his):
                output=net(batch_x)
                loss=loss_func(output,batch_y)
                opt.zero_grad()
                loss.backward()
                opt.step()
                l_his.append(loss.data.numpy())
    labels=['SGD','Momentum','RMSprop','Adam']
    
    print(loss_his)
    for i,l_his in enumerate(loss_his):
        plt.plot(l_his,label=labels[i])
    
    plt.legend(loc='best')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    plt.ylim((0,0.4))
    plt.show()

    最终得到的训练比较图,如下,可以看出各种个优化器的:

  • 相关阅读:
    hdu1257
    P6198 [EER1]单调栈 题解(分治+构造)
    P3193 [HNOI2008]GT考试 题解(kmp+矩阵快速幂)
    Product of GCDs 题解(欧拉降幂+贡献)
    P2501 [HAOI2006]数字序列 题解(dp+构造)
    欧拉降幂
    I love max and multiply 题解(二进制dp)
    永不言弃 题解(线段树维护hash+二分)
    Problem D. Ice Cream Tower 题解(二分+贪心)
    E. Kefa and Watch 题解(线段树维护hash+循环节结论)
  • 原文地址:https://www.cnblogs.com/gao109214/p/13864862.html
Copyright © 2020-2023  润新知