• Multilayer Perceptron


    Non-linear Hypotheses

    线性回归和逻辑回归在特征很多时,计算量会很大。
    一个简单的三层神经网络模型:

    [a_i^{(j)} = ext{"activation" of unit $i$ in layer $j$}$$$$Theta^{(j)} = ext{matrix of weights controlling function mapping from layer $j$ to layer $j+1$} ]

    在这里插入图片描述
    其中:$$a_1^{(2)} = g(Theta_{10}^{(1)}x_0 + Theta_{11}^{(1)}x_1 + Theta_{12}^{(1)}x_2 + Theta_{13}{(1)}x_3)$$$$a_2{(2)} = g(Theta_{20}^{(1)}x_0 + Theta_{21}^{(1)}x_1 + Theta_{22}^{(1)}x_2 + Theta_{23}{(1)}x_3)$$$$a_3{(2)} = g(Theta_{30}^{(1)}x_0 + Theta_{31}^{(1)}x_1 + Theta_{32}^{(1)}x_2 + Theta_{33}^{(1)}x_3)$$$$h_Theta(x) = a_1^{(3)} = g(Theta_{10}{(2)}a_0{(2)} + Theta_{11}{(2)}a_1{(2)} + Theta_{12}{(2)}a_2{(2)} + Theta_{13}{(2)}a_3{(2)})$$

    vectorized implementation

    将上面公式中函数(g)中的东西用(z)代替:

    [a_1^{(2)} = g(z_1^{(2)})$$$$a_2^{(2)} = g(z_2^{(2)})$$$$a_3^{(2)} = g(z_3^{(2)}) ]

    (x=a^{(1)})

    [z^{(j)} = Theta^{(j-1)}a^{(j-1)} ]

    得到:

    [egin{aligned}z^{(j)} = egin{bmatrix}z_1^{(j)} \ z_2^{(j)} \ cdots \z_n^{(j)}end{bmatrix}end{aligned} ]

    这块的记号比较多,用例子梳理下:
    实现一个逻辑与的神经网络:
    在这里插入图片描述
    那么:
    在这里插入图片描述
    在这里插入图片描述
    所以有:
    在这里插入图片描述
    再来一个多层的,实现XNOR功能(两输入都为0或都为1,输出才为1):
    在这里插入图片描述
    基本的神经元:

    • 逻辑与
      在这里插入图片描述
    • 逻辑或
      在这里插入图片描述
    • 逻辑非
      在这里插入图片描述
      先构造一个表示后半部分的神经元:在这里插入图片描述
      这样的:
      在这里插入图片描述
      接着将前半部分组合起来:
      在这里插入图片描述

    Multiclass Classification

    在这里插入图片描述

    Motivation

    Implementation

    import torch
    from torchvision.datasets import FashionMNIST
    from torch.utils.data import DataLoader
    import torchvision.transforms as transforms
    import numpy as np
    
    
    def load_data():
        # My data is in ../data/FashionMNIST/raw & ../data/FashionMNIST/processed
        train_data = FashionMNIST(root='../data', train=True, download=False, transform=transforms.ToTensor())
        test_data = FashionMNIST(root='../data', train=False, download=False, transform=transforms.ToTensor())
        train_iter = DataLoader(train_data, batch_size=256, shuffle=True, num_workers=0)
        test_iter = DataLoader(test_data, batch_size=256, shuffle=False, num_workers=0)
        return train_iter, test_iter
    
    
    num_inputs, num_outputs, num_hiddens = 784, 10, 256
    W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
    b1 = torch.zeros(num_hiddens, dtype=torch.float)
    W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
    b2 = torch.zeros(num_outputs, dtype=torch.float)
    # W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
    # b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
    # W2 = nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True) * 0.01)
    # b2 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
    params = [W1, b1, W2, b2]
    for param in params:
        param.requires_grad_(requires_grad=True)
    
    
    def relu(X):
        return torch.max(input=X, other=torch.zeros(X.shape))
    
    
    def softmax(X):
        X_exp = torch.exp(X)
        partition = X_exp.sum(1, keepdim=True)
        return X_exp / partition
    
    
    def net(X):
        X = X.reshape((-1, num_inputs))
        H = relu(torch.matmul(X, W1) + b1)
        output = torch.matmul(H, W2) + b2
        return softmax(output)
    
    
    def cross_entropy(y_predict, y):
        return -torch.log(y_predict[range(len(y_predict)), y])
    
    
    def sgd(params, lr, batch_size):
        for param in params:
            param.data -= lr * param.grad / batch_size
    
    
    def evaluate_accuracy(net, data_iter):
        acc_sum, n = 0.0, 0
        for X, y in data_iter:
            acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
        return acc_sum / n
    
    
    def train(net, train_iter, test_iter, loss, num_epochs, batch_size=256, lr=0.1, params=None, optimizer=None):
        for epoch in range(num_epochs):
            train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
            for X, y in train_iter:
                y_predict = net(X)
                l = cross_entropy(y_predict, y).sum()
                l.backward()
                sgd(params, lr, batch_size)
    
                W1.grad.data.zero_()
                b1.grad.data.zero_()
                W2.grad.data.zero_()
                b2.grad.data.zero_()
    
                train_loss_sum += l.item()
                train_acc_sum += (y_predict.argmax(dim=1) == y).sum().item()
                n += y.shape[0]
            test_acc = evaluate_accuracy(net, test_iter)
            print('epoch %d, loss %.4f, train_acc %.4f, test_acc %.4f'
                  % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))
    
    
    if __name__ == "__main__":
        # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        train_iter, test_iter = load_data()
        train(net, train_iter, test_iter, cross_entropy, 10, lr=0.1, params=params)
    
    
    import torch
    import torch.nn as nn
    import torch.nn.init as init
    from torchvision.datasets import FashionMNIST
    from torch.utils.data import DataLoader
    import torchvision.transforms as transforms
    
    
    def load_data():
        # My data is in ../data/FashionMNIST/raw & ../data/FashionMNIST/processed
        train_data = FashionMNIST(root='../data', train=True, download=False, transform=transforms.ToTensor())
        test_data = FashionMNIST(root='../data', train=False, download=False, transform=transforms.ToTensor())
        train_iter = DataLoader(train_data, batch_size=256, shuffle=True, num_workers=0)
        test_iter = DataLoader(test_data, batch_size=256, shuffle=False, num_workers=0)
        return train_iter, test_iter
    
    
    class Net(nn.Module):
        def __init__(self, num_inputs, num_outputs, num_hiddens):
            super(Net, self).__init__()
            self.l1 = nn.Linear(num_inputs, num_hiddens)
            self.relu1 = nn.ReLU()
            self.l2 = nn.Linear(num_hiddens, num_outputs)
    
        def forward(self, X):
            X = X.view(X.shape[0], -1)
            o1 = self.relu1(self.l1(X))
            o2 = self.l2(o1)
            return o2
    
        def init_params(self):
            for param in self.parameters():
                init.normal_(param, mean=0, std=0.01)
    
    
    def evaluate_accuracy(net, data_iter):
        acc_sum, n = 0.0, 0
        for X, y in data_iter:
            acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
        return acc_sum / n
    
    
    def train(net, train_iter, test_iter, loss, num_epochs, batch_size=256, lr=0.1, params=None, optimizer=None):
        for epoch in range(num_epochs):
            train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
            for X, y in train_iter:
                y_predict = net(X)
                l = loss(y_predict, y).sum()
                l.backward()
    
                optimizer.step()
                optimizer.zero_grad()
    
                train_loss_sum += l.item()
                train_acc_sum += (y_predict.argmax(dim=1) == y).sum().item()
                n += y.shape[0]
            test_acc = evaluate_accuracy(net, test_iter)
            print('epoch %d, loss %.4f, train_acc %.4f, test_acc %.4f'
                  % (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))
    
    
    num_inputs, num_outputs, num_hiddens = 784, 10, 256
    net = Net(num_inputs, num_outputs, num_hiddens)
    net.init_params()
    
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
    
    
    if __name__ == "__main__":
        train_iter, test_iter = load_data()
        train(net, train_iter, test_iter, loss, 10, optimizer=optimizer)
        
    
  • 相关阅读:
    小毛病,大问题
    [zz]Libvirt 虚拟化库剖析
    libvirt XML 学习笔记
    Ubuntu 10.04下使用 libvirt 创建KVM虚拟机
    [zz]使用libvirt管理kvm虚拟机(更新中)
    [zz]LXC:Linux 容器工具
    一些比较好的URL
    [zz]Libvirt XML学习笔记
    [zz]一些KVM xml 例子
    [zz]kvm环境使用libvirt创建虚拟机
  • 原文地址:https://www.cnblogs.com/EIMadrigal/p/12130871.html
Copyright © 2020-2023  润新知