• 用一些激活函数使用出现nan


    import numpy as np
    import struct
    import random
    import matplotlib.pyplot as plt
    import pandas as pd
    import math
    
    
    def load_labels(file):
        with open(file, "rb") as f:
            data = f.read()
    
        magic_number, num_samples = struct.unpack(">ii", data[:8])
        if magic_number != 2049:  # 0x00000801
            print(f"magic number mismatch {magic_number} != 2049")
            return None
    
        labels = np.array(list(data[8:]))
        return labels
    
    
    def load_images(file):
        with open(file, "rb") as f:
            data = f.read()
    
        magic_number, num_samples, image_width, image_height = struct.unpack(">iiii", data[:16])
        if magic_number != 2051:  # 0x00000803
            print(f"magic number mismatch {magic_number} != 2051")
            return None
    
        image_data = np.asarray(list(data[16:]), dtype=np.uint8).reshape(num_samples, -1)
        return image_data
    
    def one_hot(labels, classes):
        n = len(labels)
        output = np.zeros((n, classes), dtype=np.int32)
        for row, label in enumerate(labels):
            output[row, label] = 1
        return output
    
    
    val_labels = load_labels("E:/杜老师课程/dataset/t10k-labels-idx1-ubyte")  # 10000,
    val_images = load_images("E:/杜老师课程/dataset/t10k-images-idx3-ubyte")  # 10000, 784
    numdata = val_images.shape[0]  # 60000
    val_images = np.hstack((val_images / 255 - 0.5, np.ones((numdata, 1))))  # 10000, 785
    val_pd = pd.DataFrame(val_labels, columns=["label"])
    
    train_labels = load_labels("E:/杜老师课程/dataset/train-labels-idx1-ubyte")  # 60000,
    train_images = load_images("E:/杜老师课程/dataset/train-images-idx3-ubyte")  # 60000, 784
    numdata = train_images.shape[0]  # 60000
    train_images = np.hstack((train_images / 255 - 0.5, np.ones((numdata, 1))))  # 60000, 785
    train_pd = pd.DataFrame(train_labels, columns=["label"])
    
    
    def show_hist(labels, num_classes):
        label_map = {key: 0 for key in range(num_classes)}
        for label in labels:
            label_map[label] += 1
    
        labels_hist = [label_map[key] for key in range(num_classes)]
        pd.DataFrame(labels_hist, columns=["label"]).plot(kind="bar")
    
    
    show_hist(train_labels, 10)
    
    
    class Dataset:
        def __init__(self, images, labels):
            self.images = images
            self.labels = labels
    
        # 获取他的一个item,  dataset = Dataset(),   dataset[index]
        def __getitem__(self, index):
            return self.images[index], self.labels[index]
    
        # 获取数据集的长度,个数
        def __len__(self):
            return len(self.images)
    
    
    class DataLoaderIterator:
        def __init__(self, dataloader):
            self.dataloader = dataloader
            self.cursor = 0
            self.indexs = list(range(self.dataloader.count_data))  # 0, ... 60000
            if self.dataloader.shuffle:
                # 打乱一下
                random.shuffle(self.indexs)
    
        # 合并batch的数据
        def merge_to(self, container, b):
            if len(container) == 0:
                for index, data in enumerate(b):
                    if isinstance(data, np.ndarray):
                        container.append(data)
                    else:
                        container.append(np.array([data], dtype=type(data)))
            else:
                for index, data in enumerate(b):
                    container[index] = np.vstack((container[index], data))
            return container
    
        def __next__(self):
            if self.cursor >= self.dataloader.count_data:
                raise StopIteration()
    
            batch_data = []
            remain = min(self.dataloader.batch_size, self.dataloader.count_data - self.cursor)  # 256, 128
            for n in range(remain):
                index = self.indexs[self.cursor]
                data = self.dataloader.dataset[index]
                batch_data = self.merge_to(batch_data, data)
                self.cursor += 1
            return batch_data
    
    
    class DataLoader:
    
        # shuffle 打乱
        def __init__(self, dataset, batch_size, shuffle):
            self.dataset = dataset
            self.shuffle = shuffle
            self.count_data = len(dataset)
            self.batch_size = batch_size
    
        def __iter__(self):
            return DataLoaderIterator(self)
    
    
    def estimate(plabel, gt_labels, classes):
        plabel = plabel.copy()
        gt_labels = gt_labels.copy()
        match_mask = plabel == classes
        mismatch_mask = plabel != classes
        plabel[match_mask] = 1
        plabel[mismatch_mask] = 0
    
        gt_mask = gt_labels == classes
        gt_mismatch_mask = gt_labels != classes
        gt_labels[gt_mask] = 1
        gt_labels[gt_mismatch_mask] = 0
    
        TP = sum(plabel & gt_labels)
        FP = sum(plabel & (1 - gt_labels))
        FN = sum((1 - plabel) & gt_labels)
        TN = sum((1 - plabel) & (1 - gt_labels))
    
        precision = TP / (TP + FP)
        recall = TP / (TP + FN)
        accuracy = (TP + TN) / (TP + FP + FN + TN)
        F1 = 2 * (precision * recall) / (precision + recall)
        return precision, recall, accuracy, F1
    
    
    def estimate_val(images, gt_labels, theta, classes):
        predict = sigmoid(val_images @ theta)
        plabel = predict.argmax(1)
        prob = plabel == val_labels
        total_images = images.shape[0]
        accuracy = sum(prob) / total_images
        return accuracy, cross_entropy(predict, one_hot(gt_labels, classes))
    
    
    def cross_entropy(predict, gt):
        eps = 1e-4
        predict = np.clip(predict, a_max=1 - eps, a_min=eps)  # 裁切
        batch_size = predict.shape[0]
        return -np.sum(gt * np.log(predict) + (1 - gt) * np.log(1 - predict)) / batch_size
    
    
    def lr_schedule_cosine(lr_min, lr_max, per_epochs):
        def compute(epoch):
            return lr_min + 0.5 * (lr_max - lr_min) * (1 + np.cos(epoch / per_epochs * np.pi))
    
        return compute
    
    
    import matplotlib.pyplot as plt
    
    
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    
    classes = 10  # 定义10个类别
    batch_size = 512  # 定义每个批次的大小
    lr_warm_up_alpha = 1e-2  # 定义warm up的起点值
    lr_min = 1e-4  # cosine学习率的最小值
    lr_max = 1e-1  # cosine学习率的最大值
    epochs = 10  # 退出策略,也就是最大把所有数据看10次
    numdata, data_dims = train_images.shape  # 60000, 785
    
    # 定义dataloader和dataset,用于数据抓取
    train_data = DataLoader(Dataset(train_images, one_hot(train_labels, classes)), batch_size, shuffle=True)
    
    # 初始化theta参数,采用正态分布,大小是数据维度为行,类别数为列。kaiming
    theta = np.random.normal(size=(data_dims, classes))
    iters = 0  # 定义迭代次数,因为我们需要展示loss曲线,那么x将会是iters
    
    # 定义warm up的参数,我们在开始值是0.01,1轮时为0.1,2轮时回归cosine学习率,所以是1
    lr_warm_up_schedule = {
        1: 1e-1,
        2: 1
    }
    
    cosine_total_epoch = 3  # 定义cosine学习率的周期大小,固定为3.
    cosine_itepoch = 0  # 定义cosine学习率的周期内epoch索引
    lr_cosine_schedule = lr_schedule_cosine(lr_min, lr_max, cosine_total_epoch)  # 定义cosine学习率的函数,指定好参数
    train_losses = []  # 定义train loss的收集变量,用于后面的绘图展示
    val_losses = []  # 定义val loss的收集变量(accuracy,loss),用于后面的绘图展示
    
    # 开始进行epoch循环,总数是epochs次
    for epoch in range(epochs):
    
        # 如果cosine学习率的周期索引达到了周期的最后一次时,将其索引置位为0,相当于是周期性重启的意思
        if cosine_itepoch == cosine_total_epoch:
            cosine_itepoch = 0
    
        # 通过周期epoch索引来计算当前应该给定的学习率值
        lr_select = lr_cosine_schedule(cosine_itepoch)
    
        # 周期索引+1
        cosine_itepoch += 1
    
        # 如果当前的迭代次数在warm up计划改变的节点时。就修改warm up的alpha值为当前需要修改的值
        if epoch in lr_warm_up_schedule:
            lr_warm_up_alpha = lr_warm_up_schedule[epoch]
    
        # 定义最终的学习率,是等于cosine学习率 * warm_up_alpha
        lr = lr_select * lr_warm_up_alpha
        print(f"Set learning rate to {lr:.5f}")
    
        # 对一个批次内的数据进行迭代,每一次迭代都是一个batch(即256)
        for index, (images, labels) in enumerate(train_data):
    
            # 计算预测值
            predict = images @ theta  # n * 785    dot   785  * 10   =  n * 10
    
            # 转换为概率
            predict = sigmoid(predict)
    
            # 计算loss值
            loss = cross_entropy(predict, labels)
    
            # 计算theta的梯度
            d_theta = images.T @ (predict - labels)  # 785xn  dot  nx10  = 785 x 10
    
            # 更新theta。除以batch_size是为了求平均值
            theta = theta - lr * d_theta / batch_size
            iters += 1
    
            # 记录当前的迭代信息和loss值,为后面展示做准备
            train_losses.append([iters, loss])
    
            if index % 100 == 0:
                print(f"Iter {iters}. {epoch} / {epochs}, Loss: {loss:.3f}, Learning Rate: {lr:.5f}")
    
        # 每一轮结束后,每把数据全看完一遍后,使用theta对测试数据集进行测试。来检验训练效果
        val_accuracy, val_loss = estimate_val(val_images, val_labels, theta, classes)
    
        # 记录val的loss和accuracy,用于后面展示
        val_losses.append([iters, val_accuracy, val_loss])
        print(f"Val set, Accuracy: {val_accuracy}, Loss: {val_loss}")
    View Code

    上面的结果为

    E:\anaconda\python.exe E:/dustartlearnproject/minstLearn/minst.py
    Set learning rate to 0.00100
    Iter 1. 0 / 10, Loss: 24.275, Learning Rate: 0.00100
    Iter 101. 0 / 10, Loss: 16.112, Learning Rate: 0.00100
    Val set, Accuracy: 0.0967, Loss: 15.854472258208313
    Set learning rate to 0.00750
    Iter 119. 1 / 10, Loss: 16.177, Learning Rate: 0.00750
    Iter 219. 1 / 10, Loss: 10.643, Learning Rate: 0.00750
    Val set, Accuracy: 0.1025, Loss: 10.272491602919825
    Set learning rate to 0.02508
    Iter 237. 2 / 10, Loss: 9.819, Learning Rate: 0.02508
    Iter 337. 2 / 10, Loss: 7.824, Learning Rate: 0.02508
    Val set, Accuracy: 0.2056, Loss: 7.8075571751523665
    Set learning rate to 0.10000
    Iter 355. 3 / 10, Loss: 7.838, Learning Rate: 0.10000
    Iter 455. 3 / 10, Loss: 3.850, Learning Rate: 0.10000
    Val set, Accuracy: 0.5677, Loss: 3.6745011642839085
    Set learning rate to 0.07503
    Iter 473. 4 / 10, Loss: 3.915, Learning Rate: 0.07503
    Iter 573. 4 / 10, Loss: 3.122, Learning Rate: 0.07503
    Val set, Accuracy: 0.658, Loss: 2.8318372736284445
    Set learning rate to 0.02508
    Iter 591. 5 / 10, Loss: 3.012, Learning Rate: 0.02508
    Iter 691. 5 / 10, Loss: 2.516, Learning Rate: 0.02508
    Val set, Accuracy: 0.6794, Loss: 2.663056994692587
    Set learning rate to 0.10000
    Iter 709. 6 / 10, Loss: 2.822, Learning Rate: 0.10000
    Iter 809. 6 / 10, Loss: 2.314, Learning Rate: 0.10000
    Val set, Accuracy: 0.7327, Loss: 2.230728843855682
    Set learning rate to 0.07503
    Iter 827. 7 / 10, Loss: 2.326, Learning Rate: 0.07503
    Iter 927. 7 / 10, Loss: 2.068, Learning Rate: 0.07503
    Val set, Accuracy: 0.7574, Loss: 2.0282295027093924
    Set learning rate to 0.02508
    Iter 945. 8 / 10, Loss: 2.028, Learning Rate: 0.02508
    Iter 1045. 8 / 10, Loss: 2.148, Learning Rate: 0.02508
    Val set, Accuracy: 0.7653, Loss: 1.9720897257607903
    Set learning rate to 0.10000
    Iter 1063. 9 / 10, Loss: 2.035, Learning Rate: 0.10000
    Iter 1163. 9 / 10, Loss: 1.891, Learning Rate: 0.10000
    Val set, Accuracy: 0.7832, Loss: 1.8004302841913162
    
    Process finished with exit code 0

    基本没有问题但是如果修改了

     如果不做裁切的话

    E:\anaconda\python.exe E:/dustartlearnproject/minstLearn/minst.py
    Set learning rate to 0.00100
    Iter 1. 0 / 10, Loss: nan, Learning Rate: 0.00100
    E:\dustartlearnproject\minstLearn\minst.py:168: RuntimeWarning: divide by zero encountered in log
      return -np.sum(gt * np.log(predict) + (1 - gt) * np.log(1 - predict)) / batch_size
    E:\dustartlearnproject\minstLearn\minst.py:168: RuntimeWarning: invalid value encountered in multiply
      return -np.sum(gt * np.log(predict) + (1 - gt) * np.log(1 - predict)) / batch_size
    Iter 101. 0 / 10, Loss: inf, Learning Rate: 0.00100
    Val set, Accuracy: 0.0895, Loss: 48.24693390577862
    Set learning rate to 0.00750
    Iter 119. 1 / 10, Loss: 49.018, Learning Rate: 0.00750
    Iter 219. 1 / 10, Loss: 12.323, Learning Rate: 0.00750
    Val set, Accuracy: 0.1176, Loss: 12.242563352955944
    Set learning rate to 0.02508
    Iter 237. 2 / 10, Loss: 11.778, Learning Rate: 0.02508
    Iter 337. 2 / 10, Loss: 9.340, Learning Rate: 0.02508
    Val set, Accuracy: 0.2352, Loss: 8.752362999505012
    Set learning rate to 0.10000
    Iter 355. 3 / 10, Loss: 8.544, Learning Rate: 0.10000
    Iter 455. 3 / 10, Loss: 4.175, Learning Rate: 0.10000
    Val set, Accuracy: 0.5513, Loss: 3.981317252906817
    Set learning rate to 0.07503
    Iter 473. 4 / 10, Loss: 4.012, Learning Rate: 0.07503
    Iter 573. 4 / 10, Loss: 3.343, Learning Rate: 0.07503
    Val set, Accuracy: 0.648, Loss: 3.0855568357946415
    Set learning rate to 0.02508
    Iter 591. 5 / 10, Loss: 3.164, Learning Rate: 0.02508
    Iter 691. 5 / 10, Loss: 3.096, Learning Rate: 0.02508
    Val set, Accuracy: 0.6668, Loss: 2.901230304990213
    Set learning rate to 0.10000
    Iter 709. 6 / 10, Loss: 2.972, Learning Rate: 0.10000
    Iter 809. 6 / 10, Loss: 2.644, Learning Rate: 0.10000
    Val set, Accuracy: 0.7176, Loss: 2.42363543726263
    Set learning rate to 0.07503
    Iter 827. 7 / 10, Loss: 2.563, Learning Rate: 0.07503
    Iter 927. 7 / 10, Loss: 2.370, Learning Rate: 0.07503
    Val set, Accuracy: 0.7415, Loss: 2.19418632803101
    Set learning rate to 0.02508
    Iter 945. 8 / 10, Loss: 2.054, Learning Rate: 0.02508
    Iter 1045. 8 / 10, Loss: 2.033, Learning Rate: 0.02508
    Val set, Accuracy: 0.7482, Loss: 2.1308324327122503
    Set learning rate to 0.10000
    Iter 1063. 9 / 10, Loss: 2.322, Learning Rate: 0.10000
    Iter 1163. 9 / 10, Loss: 1.971, Learning Rate: 0.10000
    Val set, Accuracy: 0.7704, Loss: 1.940373967677279
    
    Process finished with exit code 0

    这次没有出现其实出现过

  • 相关阅读:
    Windows系统下静态库和动态库的生成方法
    c语言 9-9
    c语言中统计字符串中数字字符出现的次数
    c语言 9-8
    c语言 9-7
    c语言中使用putchar显示字符串
    c语言 9-6
    c语言 9-5
    c语言 9-4
    c语言中输出字符串的长度
  • 原文地址:https://www.cnblogs.com/xiaoruirui/p/16817839.html
Copyright © 2020-2023  润新知