• BP优化


    import numpy as np
    import struct
    import random
    import matplotlib.pyplot as plt
    import pandas as pd
    import math
    
    
    class Dataset:
        def __init__(self, images, labels):
            self.images = images
            self.labels = labels
    
        # 获取他的一个item,  dataset = Dataset(),   dataset[index]
        def __getitem__(self, index):
            return self.images[index], self.labels[index]
    
        # 获取数据集的长度,个数
        def __len__(self):
            return len(self.images)
    
    
    class DataLoaderIterator:
        def __init__(self, dataloader):
            self.dataloader = dataloader
            self.cursor = 0
            self.indexs = list(range(self.dataloader.count_data))  # 0, ... 60000
            if self.dataloader.shuffle:
                # 打乱一下
                np.random.shuffle(self.indexs)
    
        def __next__(self):
            if self.cursor >= self.dataloader.count_data:
                raise StopIteration()
    
            batch_data = []
            remain = min(self.dataloader.batch_size, self.dataloader.count_data - self.cursor)  # 256, 128
            for n in range(remain):
                index = self.indexs[self.cursor]
                data = self.dataloader.dataset[index]
    
                # 如果batch没有初始化,则初始化n个list成员
                if len(batch_data) == 0:
                    batch_data = [[] for i in range(len(data))]
    
                # 直接append进去
                for index, item in enumerate(data):
                    batch_data[index].append(item)
                self.cursor += 1
    
            # 通过np.vstack一次性实现合并,而非每次一直在合并
            for index in range(len(batch_data)):
                batch_data[index] = np.vstack(batch_data[index])
            return batch_data
    
    
    class DataLoader:
    
        # shuffle 打乱
        def __init__(self, dataset, batch_size, shuffle):
            self.dataset = dataset
            self.shuffle = shuffle
            self.count_data = len(dataset)
            self.batch_size = batch_size
    
        def __iter__(self):
            return DataLoaderIterator(self)
    
    
    class Module:
        def __init__(self, name):
            self.name = name
            self.train_mode = False
    
        def __call__(self, *args):
            return self.forward(*args)
    
        def train(self):
            self.train_mode = True
            for m in self.modules():
                m.train()
    
        def eval(self):
            self.train_mode = False
            for m in self.modules():
                m.eval()
    
        def modules(self):
            ms = []
            for attr in self.__dict__:
                m = self.__dict__[attr]
                if isinstance(m, Module):
                    ms.append(m)
            return ms
    
        def params(self):
            ps = []
            for attr in self.__dict__:
                p = self.__dict__[attr]
                if isinstance(p, Parameter):
                    ps.append(p)
    
            ms = self.modules()
            for m in ms:
                ps.extend(m.params())
            return ps
    
        def info(self, n):
            ms = self.modules()
            output = f"{self.name}\n"
            for m in ms:
                output += ('  ' * (n + 1)) + f"{m.info(n + 1)}\n"
            return output[:-1]
    
        def __repr__(self):
            return self.info(0)
    
    
    class Initializer:
        def __init__(self, name):
            self.name = name
    
        def __call__(self, *args):
            return self.apply(*args)
    
    
    class GaussInitializer(Initializer):
        # where :math:`\mu` is the mean and :math:`\sigma` the standard
        # deviation. The square of the standard deviation, :math:`\sigma^2`,
        # is called the variance.
        def __init__(self, mu, sigma):
            self.mu = mu
            self.sigma = sigma
    
        def apply(self, value):
            value[...] = np.random.normal(self.mu, self.sigma, value.shape)
    
    
    class Parameter:
        def __init__(self, value):
            self.value = value
            self.delta = np.zeros(value.shape)
    
        def zero_grad(self):
            self.delta[...] = 0
    
    
    class Linear(Module):
        def __init__(self, input_feature, output_feature):
            super().__init__("Linear")
            self.input_feature = input_feature
            self.output_feature = output_feature
            self.weights = Parameter(np.zeros((input_feature, output_feature)))
            self.bias = Parameter(np.zeros((1, output_feature)))
    
            # 权重初始化
            initer = GaussInitializer(0, np.sqrt(2 / input_feature))  # np.sqrt(2 / input_feature)
            initer.apply(self.weights.value)
    
        def forward(self, x):
            self.x_save = x.copy()
            return x @ self.weights.value + self.bias.value
    
        # AB = C  G
        # dB = A.T @ G
        # dA = G @ B.T
        def backward(self, G):
            self.weights.delta += self.x_save.T @ G
            self.bias.delta += np.sum(G, 0)  # 值复制
            return G @ self.weights.value.T
    
    
    class ReLU(Module):
        def __init__(self, inplace=True):
            super().__init__("ReLU")
            self.inplace = inplace
    
        # 亿点点
        def forward(self, x):
            self.negative_position = x < 0
            if not self.inplace:
                x = x.copy()
    
            x[self.negative_position] = 0
            return x
    
        def backward(self, G):
            if not self.inplace:
                G = G.copy()
    
            G[self.negative_position] = 0
            return G
    
    
    def sigmoid(x):
        p0 = x < 0
        p1 = ~p0
        x = x.copy()
    
        # 如果x的类型是整数,那么会造成丢失精度
        x[p0] = np.exp(x[p0]) / (1 + np.exp(x[p0]))
        x[p1] = 1 / (1 + np.exp(-x[p1]))
        return x
    
    
    class SWish(Module):
        def __init__(self):
            super().__init__("SWish")
    
        def forward(self, x):
            self.x_save = x.copy()
            self.sx = sigmoid(x)
            return x * self.sx
    
        def backward(self, G):
            return G * (self.sx + self.x_save * self.sx * (1 - self.sx))
    
    
    class Dropout(Module):
        def __init__(self, prob_keep=0.5, inplace=True):
            super().__init__("Dropout")
            self.prob_keep = prob_keep
            self.inplace = inplace
    
        def forward(self, x):
            if not self.train_mode:
                return x
    
            self.mask = np.random.binomial(size=x.shape, p=1 - self.prob_keep, n=1)
            if not self.inplace:
                x = x.copy()
    
            x[self.mask] = 0
            x *= 1 / self.prob_keep
            return x
    
        def backward(self, G):
            if not self.inplace:
                G = G.copy()
            G[self.mask] = 0
            G *= 1 / self.prob_keep
            return G
    
    
    class ModuleList(Module):
        def __init__(self, *args):
            super().__init__("ModuleList")
            self.ms = list(args)
    
        def modules(self):
            return self.ms
    
        def forward(self, x):
            for m in self.ms:
                x = m(x)
            return x
    
        def backward(self, G):
            for i in range(len(self.ms) - 1, -1, -1):
                G = self.ms[i].backward(G)
            return G
    
    
    class SigmoidCrossEntropy(Module):
        def __init__(self, params, weight_decay=1e-5):
            super().__init__("CrossEntropyLoss")
            self.params = params
            self.weight_decay = weight_decay
    
        def sigmoid(self, x):
            # return 1 / (1 + np.exp(-x))
            p0 = x < 0
            p1 = ~p0
            x = x.copy()
            x[p0] = np.exp(x[p0]) / (1 + np.exp(x[p0]))
            x[p1] = 1 / (1 + np.exp(-x[p1]))
            return x
    
        def decay_loss(self):
            loss = 0
            for p in self.params:
                loss += np.sqrt(np.sum(p.value ** 2)) / (2 * p.value.size) * self.weight_decay
            return loss
    
        def decay_backward(self):
            for p in self.params:
                eps = 1e-8
                p.delta += 1 / (2 * np.sqrt(np.sum(p.value ** 2)) + eps) / (
                            2 * p.value.size) * self.weight_decay * 2 * p.value
    
        def forward(self, x, label_onehot):
            eps = 1e-6
            self.label_onehot = label_onehot
            self.predict = self.sigmoid(x)
            self.predict = np.clip(self.predict, a_max=1 - eps, a_min=eps)  # 裁切
            self.batch_size = self.predict.shape[0]
            return -np.sum(label_onehot * np.log(self.predict) + (1 - label_onehot) *
                           np.log(1 - self.predict)) / self.batch_size + self.decay_loss()
    
        def backward(self):
            self.decay_backward()
            return (self.predict - self.label_onehot) / self.batch_size
    
    
    class SoftmaxCrossEntropy(Module):
        def __init__(self):
            super().__init__("SoftmaxCrossEntropy")
    
        def softmax(self, x):
            # return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
            max_x = np.max(x, axis=1, keepdims=True)
            exp_x = np.exp(x - max_x)
            return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
        def forward(self, x, label_onehot):
            eps = 1e-6
            self.label_onehot = label_onehot
            self.predict = self.softmax(x)
            self.predict = np.clip(self.predict, a_max=1 - eps, a_min=eps)  # 裁切
            self.batch_size = self.predict.shape[0]
            return -np.sum(label_onehot * np.log(self.predict)) / self.batch_size
    
        def backward(self):
            return (self.predict - self.label_onehot) / self.batch_size
    
    
    class Optimizer:
        def __init__(self, name, model, lr):
            self.name = name
            self.model = model
            self.lr = lr
            self.params = model.params()
    
        def zero_grad(self):
            for param in self.params:
                param.zero_grad()
    
        def set_lr(self, lr):
            self.lr = lr
    
    
    class SGD(Optimizer):
        def __init__(self, model, lr=1e-3):
            super().__init__("SGD", model, lr)
    
        def step(self):
            for param in self.params:
                param.value -= self.lr * param.delta
    
    
    class SGDMomentum(Optimizer):
        def __init__(self, model, lr=1e-3, momentum=0.9):
            super().__init__("SGDMomentum", model, lr)
            self.momentum = momentum
    
            for param in self.params:
                param.v = 0
    
        # 移动平均
        def step(self):
            for param in self.params:
                param.v = self.momentum * param.v - self.lr * param.delta
                param.value += param.v
    
    
    class Adam(Optimizer):
        def __init__(self, model, lr=1e-3, beta1=0.9, beta2=0.999, l2_regularization=0):
            super().__init__("Adam", model, lr)
            self.beta1 = beta1
            self.beta2 = beta2
            self.l2_regularization = l2_regularization
            self.t = 0
    
            for param in self.params:
                param.m = 0
                param.v = 0
    
        # 指数移动平均
        def step(self):
            eps = 1e-8
            self.t += 1
            for param in self.params:
                g = param.delta
                param.m = self.beta1 * param.m + (1 - self.beta1) * g
                param.v = self.beta2 * param.v + (1 - self.beta2) * g ** 2
                mt_ = param.m / (1 - self.beta1 ** self.t)
                vt_ = param.v / (1 - self.beta2 ** self.t)
                param.value -= self.lr * mt_ / (np.sqrt(vt_) + eps) + self.l2_regularization * param.value
    
    
    class Model(Module):
        def __init__(self, num_feature, num_hidden, num_classes):
            super().__init__("Model")
            self.backbone = ModuleList(
                Linear(num_feature, num_hidden),
                ReLU(),
                Dropout(),
                Linear(num_hidden, num_classes)
            )
    
        def forward(self, x):
            return self.backbone(x)
    
        def backward(self, G):
            return self.backbone.backward(G)
    
    
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    
    def estimate_val(predict, gt_labels, classes, loss_func):
        plabel = predict.argmax(1)
        positive = plabel == gt_labels
        total_images = predict.shape[0]
        accuracy = sum(positive) / total_images
        return accuracy, loss_func(predict, one_hot(gt_labels, classes))
    
    
    def lr_schedule_cosine(lr_min, lr_max, per_epochs):
        def compute(epoch):
            return lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos(epoch / per_epochs * np.pi))
    
        return compute
    
    def load_images(file):
        with open(file, "rb") as f:
            data = f.read()
    
        magic_number, num_samples, image_width, image_height = struct.unpack(">iiii", data[:16])
        if magic_number != 2051:  # 0x00000803
            print(f"magic number mismatch {magic_number} != 2051")
            return None
    
        image_data = np.frombuffer(data[16:], dtype=np.uint8).reshape(num_samples, -1)
        return image_data
    
    
    def one_hot(labels, classes, label_smoothing=0):
        n = len(labels)
        eoff = label_smoothing / classes
        output = np.ones((n, classes), dtype=np.float32) * eoff
        for row, label in enumerate(labels):
            output[row, label] = 1 - label_smoothing + eoff
        return output
    def load_labels(file):
        with open(file, "rb") as f:
            data = f.read()
    
        magic_number, num_samples = struct.unpack(">ii", data[:8])
        if magic_number != 2049:  # 0x00000801
            print(f"magic number mismatch {magic_number} != 2049")
            return None
    
        labels = np.array(list(data[8:]))
        return labels
    val_labels = load_labels("E:/杜老师课程/dataset/t10k-labels-idx1-ubyte")  # 10000,
    val_images = load_images("E:/杜老师课程/dataset/t10k-images-idx3-ubyte")  # 10000, 784
    numdata = val_images.shape[0]  # 60000
    val_images = np.hstack((val_images / 255 - 0.5, np.ones((numdata, 1))))  # 10000, 785
    val_pd = pd.DataFrame(val_labels, columns=["label"])
    
    train_labels = load_labels("E:/杜老师课程/dataset/train-labels-idx1-ubyte")  # 60000,
    train_images = load_images("E:/杜老师课程/dataset/train-images-idx3-ubyte")  # 60000, 784
    numdata = train_images.shape[0]  # 60000
    train_images = np.hstack((train_images / 255 - 0.5, np.ones((numdata, 1))))  # 60000, 785
    train_pd = pd.DataFrame(train_labels, columns=["label"])
    np.random.seed(3)
    classes = 10  # 定义10个类别
    batch_size = 64  # 定义每个批次的大小
    epochs = 20  # 退出策略,也就是最大把所有数据看10次
    lr = 1e-2
    numdata, data_dims = train_images.shape  # 60000, 784
    
    # 定义dataloader和dataset,用于数据抓取
    train_data = DataLoader(Dataset(train_images, one_hot(train_labels, classes)), batch_size, shuffle=True)
    model = Model(data_dims, 1024, classes)
    # loss_func = SoftmaxCrossEntropy()
    loss_func = SigmoidCrossEntropy(model.params(), 0)
    optim = Adam(model, lr)
    iters = 0  # 定义迭代次数,因为我们需要展示loss曲线,那么x将会是iters
    
    lr_schedule = {
        5: 1e-3,
        15: 1e-4,
        18: 1e-5
    }
    
    # 开始进行epoch循环,总数是epochs次
    for epoch in range(epochs):
    
        if epoch in lr_schedule:
            lr = lr_schedule[epoch]
            optim.set_lr(lr)
    
        model.train()
        # 对一个批次内的数据进行迭代,每一次迭代都是一个batch(即256)
        for index, (images, labels) in enumerate(train_data):
            x = model(images)
    
            # 计算loss值
            loss = loss_func(x, labels)
    
            optim.zero_grad()
            G = loss_func.backward()
            model.backward(G)
            optim.step()  # 应用梯度,更新参数
            iters += 1
    
        print(f"Iter {iters}, {epoch} / {epochs}, Loss {loss:.3f}, LR {lr:g}")
    
        model.eval()
        val_accuracy, val_loss = estimate_val(model(val_images), val_labels, classes, loss_func)
        print(f"Val set, Accuracy: {val_accuracy:.6f}, Loss: {val_loss:.3f}")
    def load_labels(file):
        with open(file, "rb") as f:
            data = f.read()
    
        magic_number, num_samples = struct.unpack(">ii", data[:8])
        if magic_number != 2049:  # 0x00000801
            print(f"magic number mismatch {magic_number} != 2049")
            return None
    
        labels = np.frombuffer(data[8:], dtype=np.uint8)
        return labels
    View Code
  • 相关阅读:
    linux安装JRE和Tomcat
    微信公众号授权登录
    linux Nginx设置多级域名
    bootstrap制作收藏夹导航
    js下拉菜单
    QQ授权登录
    centos7.6下安装LNMP环境(linux+nginx+mysql5.7+PHP)
    typora快捷键(转载)
    旋转魔方(2)-添加照片
    test
  • 原文地址:https://www.cnblogs.com/xiaoruirui/p/16821210.html
Copyright © 2020-2023  润新知