• 自己利用python动手实现手写数字识别(逻辑回归)



     在这里我自己用python写了识别手写0-9数字的程序,利用了逻辑回归方法以及正则化,one-vs-all的技巧(如果不知道这两个概念的可以看一下我以前发的博客)。

    数据准备和环境

     这里的数据是一个5000 × 400的矩阵,意思就是有5000张图片而每一张都是20*20的灰度图片(这里是被拉伸成了一行),y就是标签是个5000×1的矩阵,取值范围为1-10,这里10就代表0。而且数据是以mat的形式存储的(matlab存文件就是这个形式)。
     这里为了方便矩阵运算,读取文件和画图需要引入numpy,matplot,scipy三个库。

    主程序(调度用的)

    # ================== Multi-class Classification | part 1: one vs all ========================
    from scipy.io import loadmat  # read matfile
    import numpy as np
    import displayData
    import lrCostFunction
    import oneVsAll
    import Predict
    import matplotlib.pyplot as plt
    import predictOneVsAll
    # ================== load Data =============================================================
    data = loadmat("../machine-learning-ex3/ex3/ex3data1.mat")
    x_data = data["X"]   # x_data here is a 5000 * 400 matrix (5000 training examples, 20 * 20 grid of pixel is unrolled into 4000-deminsional vector
    y_data = data.get("y")  # y_data here is a 5000 * 1 matrix(label)
    # hint : must transpose the data to get the oriented data
    x_data = np.array([im.reshape((20, 20)).T for im in x_data])
    x_data = np.array([im.reshape((400, )) for im in x_data])
    print(y_data)
    # can use data.key() and debugging to get this information
    # ================== load end ==============================================================
    # set some params
    input_layer_size = 400
    num_labels = 10
    # ================== visualize the data ====================================================
    rand = np.random.randint(0, 5000, (100, ))  # [0, 5000)
    displayData.data_display(x_data[rand, :])   # get 100 images randomly
    # ======================= Test case for lrCostFunction =============================
    theta_t = np.array([-2, -1, 1, 2])
    t = np.linspace(1, 15, 15) / 10
    t = t.reshape((3, 5))
    x_t = np.column_stack((np.ones((5, 1)), t.T))
    y_t = np.array([1, 0, 1, 0, 1])
    l_t = 3
    cost = lrCostFunction.cost_reg(theta_t, x_t, y_t, l_t)
    grad = lrCostFunction.grad_reg(theta_t, x_t, y_t, l_t)
    print("cost is {}".format(cost))
    print("expected cost is 2.534819")
    print("grad is {}".format(grad))
    print("expected grad is 0.146561 -0.548558 0.724722 1.398003")
    # ============================ test end =============================================
    # ============================ one vs all:predict ===========================================
    l = 0.1
    theta = oneVsAll.one_vs_all(x_data, y_data, l, num_labels)
    result = Predict.pred_lr(theta, x_data[1500, :])
    np.set_printoptions(precision=2, suppress=True)  # don't use  scientific notation
    print("this number is {}".format(result))  # 10 here is 0
    plt.imshow(x_data[1500, :].reshape((20, 20)), cmap='gray', vmin=-1, vmax=1)
    plt.show()
    accuracy = predictOneVsAll.pred_accuracy(theta, x_data, y_data)
    print("test 5000 images, accuracy is {:%}".format(accuracy))
    # ============================ predict  end ======================================================
    

    数据可视化(displayData)

     这里需要说明的是这里的x_data数据需要把其还原成20*20的矩阵再转置才能画出摆放方向正确的图像。

    import numpy as np
    import matplotlib.pyplot as plt
    
    
    def data_display(data, image_width=20):
        # compute image_height
        m, n = data.shape
        image_height = n / image_width
        # compute rows and cols
        display_rows = np.round(np.sqrt(m))
        display_cols = m / display_rows
        # set image padding
        pad = 1
        rows_pixel = np.ceil(display_rows * image_height + (display_rows + 1) * pad)
        cols_pixel = np.ceil(display_cols * image_width + (display_cols + 1) * pad)
        rows_pixel = rows_pixel.astype(np.int64)
        cols_pixel = cols_pixel.astype(np.int64)
        # initialize display matrix
        display_matrix = -np.ones((rows_pixel, cols_pixel))
        # the first pixel of every image is 1+(image_width+pad)*(n-1) or 1+(image_height+pad)*(n-1)
        for i in range(data.shape[0]):
            image_data = data[i, :].reshape((int(image_width), int(image_height)))
            row_index = np.floor(i / display_cols)
            cols_index = i % display_cols
            row_position = pad+(image_height+pad)*row_index
            cols_position = pad+(image_width+pad)*cols_index
            display_matrix[int(row_position):int(row_position+image_height), int(cols_position):int(cols_position+image_width)] = image_data[:, :]
        plt.imshow(display_matrix, cmap='gray', vmin=-1, vmax=1)
        plt.show()
    

     我这里在展示手写图片的时候,只需要一次循环就可以完成,主要的技巧就是利用等差数列求出来每张图片左上角的像素在大矩阵中的位置即可比较方便地完成图片展示。
    在这里插入图片描述
     这就是展示出来的样子,可以调节pad来控制图片的黑边间隔。

    求损失和梯度的程序(lrCostFunction)

    import numpy as np
    
    
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    
    def cost_reg(theta, x_data, y_data, l):
        m, n = x_data.shape
        theta = theta.reshape((-1, 1))
        y_data = y_data.reshape((-1, 1))
        hx = sigmoid(np.dot(x_data, theta))
        ln_h = np.log(hx)
        part1 = -np.dot(ln_h.T, y_data) / m
        ln_1h = np.log(1 - hx)
        part2 = -np.dot(ln_1h.T, 1-y_data) / m
        #   don't do that : theta[0, 0] = 0  # don't penalize theta0
        reg = l * np.dot(theta[1:, :].T, theta[1:, :]) / (2 * m)
        return (part1+part2+reg).flatten()
    
    
    def grad_reg(theta, x_data, y_data, l):
        m, n = x_data.shape
        theta_tempt = theta.reshape((-1, 1)).copy()
        y_data = y_data.reshape((-1, 1))
        hx = sigmoid(np.dot(x_data, theta_tempt))
        theta_tempt[0, 0] = 0  # don't penalize theta0
        part1 = np.dot(x_data.T, hx - y_data)
        part2 = l * theta_tempt
        return ((part1+part2) / m).flatten()
    

     这里比较坑的一个地方就是第二个函数求梯度的时候一定要进行拷贝,不然的话直接对这个引用进行操作将其[0,0]的元素赋值为0就会导致外部的theta改变。

    一对多训练(oneVsAll)

    import numpy as np
    import fmincg
    
    def one_vs_all(x_data, y_data, l, num_labels):
        # initialize some params
        m, n = x_data.shape
        x_data = np.column_stack((np.ones((m, 1)), x_data))
        # initialize initial_theta
        initial_thata = np.zeros((num_labels, n+1))
        theta = fmincg.fmincg(initial_thata, x_data, y_data, l, num_labels)
        return theta
    
    

    这里就是一对多的训练,我们要进行k分类就需要训练出k个分类器。

    批量优化theta(oneVsAll)

    import scipy.optimize as op
    import numpy as np
    import lrCostFunction
    
    
    def fmincg(theta, x_data, y_data, l, num_labels):
        for i in range(num_labels):
            y_tempt = y_data.copy()  # hint: must copy !
            # pre-treat
            pos = np.where(y_data == i)
            neg = np.where(y_data != i)
            if i == 0:
                pos = np.where(y_data == 10)
                neg = np.where(y_data != 10)
            y_tempt[pos] = 1
            y_tempt[neg] = 0
            result = op.minimize(lrCostFunction.cost_reg, theta[i, :].T, args=(x_data, y_tempt, l), method="TNC", jac=lrCostFunction.grad_reg)
            print("{} : {}".format(i, result.success))
            theta[i, :] = result.x
        return theta
    
    

    这里就会返回一个10*401(10个分类器)的theta矩阵,这里利用scipy中比梯度下降更快的高级算法,这个算法一般不会需要自己去实现,除非对自己的数值计算很有信心。当然我在github上面也放上了自己实现的梯度下降算法也可以来优化theta,但是速度肯定要慢一些。

    预测(Predict)

    import numpy as np
    import lrCostFunction
    
    
    def pred_lr(theta, x_data):
        x_data = x_data.reshape((1, -1))
        x_data = np.column_stack((np.ones((1, 1)), x_data))
        result = lrCostFunction.sigmoid(x_data @ theta.T)
        predict = np.argmax(result.flatten())
        if predict == 0:
            predict = 10
        return predict
    
    

    这就是预测的代码。

    精度验证(predictOneVsAll)

    import Predict
    
    def pred_accuracy(theta, x_data, y_data):
        right_num = 0
        m, _ = x_data.shape
        for i in range(m):
            pred = Predict.pred_lr(theta, x_data[i, :])
            if pred == y_data[i, :]:
                right_num += 1
        return right_num / m
    
    

    还有神经网络版本的实现,我就放在我的github上面了。
    https://github.com/lsl1229840757/machine_learning/tree/master/programming_ex3/ex3

  • 相关阅读:
    Nvidia TX2 Robot 环境配置记录
    [DL学习笔记]从人工神经网络到卷积神经网络_2_卷积神经网络
    [DL学习笔记]从人工神经网络到卷积神经网络_1_神经网络和BP算法
    windows重建图标缓存(解决快捷方式图标丢失,图标加载时间长问题)
    tensorflow安装日志(PIP)
    java中字符串的排序(1)
    可行性分析报告
    冒泡,选择,插入,快速排序在Java中的实现
    四则运算法则在Java中的实现
    关于二次方程计算器的程序开发
  • 原文地址:https://www.cnblogs.com/lsl1229840757/p/14122609.html
Copyright © 2020-2023  润新知