• Neural Network模型复杂度之Dropout Python实现


    • 背景介绍
      Neural Network之模型复杂度主要取决于优化参数个数与参数变化范围. 优化参数个数可手动调节, 参数变化范围可通过正则化技术加以限制. 本文从优化参数个数出发, 以dropout技术为例, 简要演示dropout参数丢弃比例对Neural Network模型复杂度的影响.

    • 算法特征
      ①. 训练阶段以概率丢弃数据点; ②. 测试阶段保留所有数据点

    • 算法推导
      以概率\(p\)对数据点\(x\)进行如下变换,

      \[\begin{equation*} x' = \left\{\begin{split} &0 &\quad\text{with probability $p$,} \\ &\frac{x}{1-p} &\quad\text{otherwise,} \end{split}\right. \end{equation*} \]

      即数据点\(x\)以概率\(p\)置零, 以概率\(1-p\)放大\(1/(1-p)\)倍. 此时有,

      \[\begin{equation*} \mathbf{E}[x'] = p\mathbf{E}[0] + (1-p)\mathbf{E}[\frac{x}{1-p}] = \mathbf{E}[x], \end{equation*} \]

      此变换不改变数据点均值, 为无偏变换.
      若数据点\(x\)作为某线性变换之输入, 将其置零, 则对此线性变换无贡献, 等效于无效化该数据点及相关权重参数, 减少了优化参数个数, 降低了模型复杂度.

    • 数据、模型与损失函数
      数据生成策略如下,

      \[\begin{equation*} \left\{\begin{aligned} x &= r + 2g + 3b \\ y &= r^2 + 2g^2 + 3b^2 \\ lv &= -3r - 4g - 5b \end{aligned}\right. \end{equation*} \]

      Neural Network网络模型如下,

      其中, 输入层为$(r, g, b)$, 隐藏层取激活函数$\tanh$, 输出层为$(x, y, lv)$且不取激活函数.
      损失函数如下, $$ \begin{equation*} L = \sum_i\frac{1}{2}(\bar{x}^{(i)}-x^{(i)})^2+\frac{1}{2}(\bar{y}^{(i)}-y^{(i)})^2+\frac{1}{2}(\bar{lv}^{(i)}-lv^{(i)})^2 \end{equation*} $$ 其中, $i$为data序号, $(\bar{x}, \bar{y}, \bar{lv})$为相应观测值.
    • 代码实现
      本文拟将中间隐藏层节点数设置为300, 使模型具备较高复杂度. 后逐步提升置零概率\(p\), 使模型复杂度降低, 以此观察泛化误差的变化. 具体实现如下,

      code
      import numpy
      import torch
      from torch import nn
      from torch import optim
      from torch.utils import data
      from matplotlib import pyplot as plt
      
      
      # 获取数据与封装数据
      def xFunc(r, g, b):
          x = r + 2 * g + 3 * b
          return x
      
      
      def yFunc(r, g, b):
          y = r ** 2 + 2 * g ** 2 + 3 * b ** 2
          return y
      
      
      def lvFunc(r, g, b):
          lv = -3 * r - 4 * g - 5 * b
          return lv
      
      
      class GeneDataset(data.Dataset):
          
          def __init__(self, rRange=[-1, 1], gRange=[-1, 1], bRange=[-1, 1], num=100, transform=None,\
                       target_transform=None):
              self.__rRange = rRange
              self.__gRange = gRange
              self.__bRange = bRange
              self.__num = num
              self.__transform = transform
              self.__target_transform = transform
              
              self.__X = self.__build_X()
              self.__Y_ = self.__build_Y_()
              
          
          def __build_X(self):
              rArr = numpy.random.uniform(*self.__rRange, (self.__num, 1))
              gArr = numpy.random.uniform(*self.__gRange, (self.__num, 1))
              bArr = numpy.random.uniform(*self.__bRange, (self.__num, 1))
              X = numpy.hstack((rArr, gArr, bArr))
              return X
          
          
          def __build_Y_(self):
              rArr = self.__X[:, 0:1]
              gArr = self.__X[:, 1:2]
              bArr = self.__X[:, 2:3]
              xArr = xFunc(rArr, gArr, bArr)
              yArr = yFunc(rArr, gArr, bArr)
              lvArr = lvFunc(rArr, gArr, bArr)
              Y_ = numpy.hstack((xArr, yArr, lvArr))
              return Y_
          
          
          def __len__(self):
              return self.__num
          
          
          def __getitem__(self, idx):
              x = self.__X[idx]
              y_ = self.__Y_[idx]
              if self.__transform:
                  x = self.__transform(x)
              if self.__target_transform:
                  y_ = self.__target_transform(y_)
              return x, y_
      
      
      # 构建模型
      class Linear(nn.Module):
          
          def __init__(self, dim_in, dim_out):
              super(Linear, self).__init__()
              
              self.__dim_in = dim_in
              self.__dim_out = dim_out
              self.weight = nn.Parameter(torch.randn((dim_in, dim_out)))
              self.bias = nn.Parameter(torch.randn((dim_out,)))
              
              
          def forward(self, X):
              X = torch.matmul(X, self.weight) + self.bias
              return X
          
          
      class Tanh(nn.Module):
          
          def __init__(self):
              super(Tanh, self).__init__()
              
              
          def forward(self, X):
              X = torch.tanh(X)
              return X
      
      
      class Dropout(nn.Module):
          
          def __init__(self, p):
              super(Dropout, self).__init__()
              
              assert 0 <= p <= 1
              self.__p = p     # 置零概率
              
              
          def forward(self, X):
              if self.__p == 0:
                  return X
              if self.__p == 1:
                  return torch.zeros_like(X)
              mark = (torch.rand(X.shape) > self.__p).type(torch.float)
              X = X * mark / (1 - self.__p)
              return X
          
      
      class MLP(nn.Module):
          
          def __init__(self, dim_hidden=50, p=0, is_training=True):
              super(MLP, self).__init__()
              
              self.__dim_hidden = dim_hidden
              self.__p = p
              self.training = True
              self.__dim_in = 3
              self.__dim_out = 3
              
              self.lin1 = Linear(self.__dim_in, self.__dim_hidden)
              self.tanh = Tanh()
              self.drop = Dropout(self.__p)
              self.lin2 = Linear(self.__dim_hidden, self.__dim_out)
      
              
          def forward(self, X):
              X = self.tanh(self.lin1(X))
              if self.training:
                  X = self.drop(X)
              X = self.lin2(X)
              return X
      
      
      # 构建损失函数
      class MSE(nn.Module):
              
          def __init__(self):
              super(MSE, self).__init__()
              
              
          def forward(self, Y, Y_):
              loss = torch.sum((Y - Y_) ** 2) / 2
              return loss
      
      
      # 训练单元与测试单元
      def train_epoch(trainLoader, model, loss_fn, optimizer):
          model.train()
          loss = 0
          
          with torch.enable_grad():
              for X, Y_ in trainLoader:
                  optimizer.zero_grad()
                  Y = model(X)
                  loss_tmp = loss_fn(Y, Y_)
                  loss_tmp.backward()
                  optimizer.step()
                  
                  loss += loss_tmp.item()
          return loss
                  
              
      def test_epoch(testLoader, model, loss_fn):
          model.eval()
          loss = 0
          
          with torch.no_grad():
              for X, Y_ in testLoader:
                  Y = model(X)
                  loss_tmp = loss_fn(Y, Y_)
                  loss += loss_tmp.item()
                  
          return loss
      
      
      # 进行训练与测试
      def train(trainLoader, testLoader, model, loss_fn, optimizer, epochs):
          minLoss = numpy.inf
          for epoch in range(epochs):
              trainLoss = train_epoch(trainLoader, model, loss_fn, optimizer) / len(trainLoader.dataset)
              testLoss = test_epoch(testLoader, model, loss_fn) / len(testLoader.dataset)
              if testLoss < minLoss:
                  minLoss = testLoss
                  torch.save(model.state_dict(), "./mlp.params")
      #         if epoch % 100 == 0:
      #             print(f"epoch = {epoch:8}, trainLoss = {trainLoss:15.9f}, testLoss = {testLoss:15.9f}")
          return minLoss
      
      
      numpy.random.seed(0)
      torch.random.manual_seed(0)
      
      def search_dropout():
          trainData = GeneDataset(num=50, transform=torch.Tensor, target_transform=torch.Tensor)
          trainLoader = data.DataLoader(trainData, batch_size=50, shuffle=True)
          testData = GeneDataset(num=1000, transform=torch.Tensor, target_transform=torch.Tensor)
          testLoader = data.DataLoader(testData, batch_size=1000, shuffle=False)
      
          dim_hidden1 = 300
          p = 0.005
          model = MLP(dim_hidden1, p)
          loss_fn = MSE()
          optimizer = optim.Adam(model.parameters(), lr=0.003)
          train(trainLoader, testLoader, model, loss_fn, optimizer, 100000)
      
          pRange = numpy.linspace(0, 1, 101)
          lossList = list()
          for idx, p in enumerate(pRange):
              model = MLP(dim_hidden1, p)
              loss_fn = MSE()
              optimizer = optim.Adam(model.parameters(), lr=0.003)
              model.load_state_dict(torch.load("./mlp.params"))
              loss = train(trainLoader, testLoader, model, loss_fn, optimizer, 100000)
              lossList.append(loss)
              print(f"p = {p:10f}, loss = {loss:15.9f}")
      
          minIdx = numpy.argmin(lossList)
          pBest = pRange[minIdx]
          lossBest = lossList[minIdx]
      
          fig = plt.figure(figsize=(5, 4))
          ax1 = fig.add_subplot(1, 1, 1)
          ax1.plot(pRange, lossList, ".--", lw=1, markersize=5, label="testing error", zorder=1)
          ax1.scatter(pBest, lossBest, marker="*", s=30, c="red", label="optimal", zorder=2)
          ax1.set(xlabel="$p$", ylabel="error", title="optimal dropout probability = {:.5f}".format(pBest))
          ax1.legend()
          fig.tight_layout()
          fig.savefig("search_p.png", dpi=100)
          # plt.show()
      
      
      
      if __name__ == "__main__":
          search_dropout()
      
    • 结果展示

      可以看到, 泛化误差在提升置零概率后先下降后上升, 大致对应降低模型复杂度使模型表现从过拟合至欠拟合.
    • 使用建议
      ①. dropout为使整个节点失效, 通常作用在节点的最终输出上(即激活函数后);
      ②. dropout适用于神经网络全连接层.

    • 参考文档
      ①. 动手学深度学习 - 李牧

  • 相关阅读:
    梯度消失和梯度爆炸
    BN的作用与使用过程
    百面机器学习笔记(二)
    正则表达式
    CSS Sprite
    事件绑定
    拖拽
    oncontextmenu
    鼠标跟随
    鼠标事件
  • 原文地址:https://www.cnblogs.com/xxhbdk/p/16310649.html
Copyright © 2020-2023  润新知