• 稀疏自编码器和矢量化编程


     相关的公式

    证明参考PPT: http://wenku.baidu.com/link?url=dBZZq7TYJOnIw2mwilKsJT_swT52I0OoikmvmgBaYE_NvP_KChFZ-HOURH5LMiLEuSVFcGmJ0bQfkG-ZYk-IRJf7D-w6P9PBec8EZ9IxgFS

    Python实现代码参考(数据在同文件夹)

    @author: Paul Rothnie
    email : paul.rothnie@googlemail.com
    https://github.com/siddharth950/Sparse-Autoencoder

    # coding: utf8
    # Refer to https://github.com/siddharth950/Sparse-Autoencoder
    
    import numpy as np
    import scipy.io
    import scipy.optimize
    import matplotlib.pyplot
    import struct
    import array
    
    
    class sparse_autoencoder(object):  # 稀疏自编码类
        def __init__(self, visible_size, hidden_size, lambda_, rho, beta):
            self.visible_size = visible_size
            self.hidden_size = hidden_size
            self.lambda_ = lambda_
            self.rho = rho
            self.beta = beta
            w_max = np.sqrt(6.0 / (visible_size + hidden_size + 1.0))
            w_min = -w_max
            W1 = (w_max - w_min) * np.random.random_sample(size=(hidden_size,
                                                                 visible_size)) + w_min
            W2 = (w_max - w_min) * np.random.random_sample(size=(visible_size,
                                                                 hidden_size)) + w_min
            b1 = np.zeros(hidden_size)
            b2 = np.zeros(visible_size)
            self.idx_0 = 0
            self.idx_1 = hidden_size * visible_size  # 64*25
            self.idx_2 = self.idx_1 + hidden_size * visible_size  # 25*64
            self.idx_3 = self.idx_2 + hidden_size  # 64
            self.idx_4 = self.idx_3 + visible_size  # 25
            self.initial_theta = np.concatenate((W1.flatten(), W2.flatten(),
                                                 b1.flatten(), b2.flatten()))
    
        def sigmoid(self, x):  # sigmoid函数
            return 1.0 / (1.0 + np.exp(-x))
    
        def unpack_theta(self, theta):  # 获取传递给scipy.optimize.minimize的theta
            W1 = theta[self.idx_0: self.idx_1]
            W1 = np.reshape(W1, (self.hidden_size, self.visible_size))
            W2 = theta[self.idx_1: self.idx_2]
            W2 = np.reshape(W2, (self.visible_size, self.hidden_size))
            b1 = theta[self.idx_2: self.idx_3]
            b1 = np.reshape(b1, (self.hidden_size, 1))
            b2 = theta[self.idx_3: self.idx_4]
            b2 = np.reshape(b2, (self.visible_size, 1))
            return W1, W2, b1, b2
    
        def cost(self, theta, visible_input):  # cost函数
            W1, W2, b1, b2 = self.unpack_theta(theta)
            # layer=f(w*l+b)
            hidden_layer = self.sigmoid(np.dot(W1, visible_input) + b1)
            output_layer = self.sigmoid(np.dot(W2, hidden_layer) + b2)
            m = visible_input.shape[1]
            error = -(visible_input - output_layer)
            sum_sq_error = 0.5 * np.sum(error * error, axis=0)
            avg_sum_sq_error = np.mean(sum_sq_error)
            reg_cost = self.lambda_ * (np.sum(W1 * W1) + np.sum(W2 * W2)) / 2.0  # L2正则化
            rho_bar = np.mean(hidden_layer, axis=1)  # 平均激活程度
            KL_div = np.sum(self.rho * np.log(self.rho / rho_bar) +
                            (1 - self.rho) * np.log((1 - self.rho) / (1 - rho_bar)))  # 相对熵
            cost = avg_sum_sq_error + reg_cost + self.beta * KL_div  # 损失函数
            KL_div_grad = self.beta * (- self.rho / rho_bar + (1 - self.rho) /
                                       (1 - rho_bar))
            del_3 = error * output_layer * (1.0 - output_layer)
            del_2 = np.transpose(W2).dot(del_3) + KL_div_grad[:, np.newaxis]
    
            del_2 *= hidden_layer * (1 - hidden_layer)  # *=残差项
            W1_grad = del_2.dot(visible_input.transpose()) / m  # delt_w=del*(l.T)
            W2_grad = del_3.dot(hidden_layer.transpose()) / m
            b1_grad = del_2  # delt_b=del
            b2_grad = del_3
            W1_grad += self.lambda_ * W1
            W2_grad += self.lambda_ * W2
            b1_grad = b1_grad.mean(axis=1)
            b2_grad = b2_grad.mean(axis=1)
            theta_grad = np.concatenate((W1_grad.flatten(), W2_grad.flatten(),
                                         b1_grad.flatten(), b2_grad.flatten()))
            return [cost, theta_grad]
    
        def train(self, data, max_iterations):  # 训练令cost最小
            opt_soln = scipy.optimize.minimize(self.cost,
                                               self.initial_theta,
                                               args=(data,), method='L-BFGS-B',
                                               jac=True, options=
                                               {'maxiter': max_iterations})
            opt_theta = opt_soln.x
            return opt_theta
    
    
    def normalize_data(data):  # 0.1<=data[i][j]<=0.9
        data = data - np.mean(data)
        pstd = 3 * np.std(data)
        data = np.maximum(np.minimum(data, pstd), -pstd) / pstd
        data = (data + 1.0) * 0.4 + 0.1
        return data
    
    
    def loadMNISTImages(file_name):  # 获取mnist数据
        image_file = open(file_name, 'rb')
        head1 = image_file.read(4)
        head2 = image_file.read(4)
        head3 = image_file.read(4)
        head4 = image_file.read(4)
        num_examples = struct.unpack('>I', head2)[0]
        num_rows = struct.unpack('>I', head3)[0]
        num_cols = struct.unpack('>I', head4)[0]
        dataset = np.zeros((num_rows * num_cols, num_examples))
        images_raw = array.array('B', image_file.read())
        image_file.close()
        for i in range(num_examples):
            limit1 = num_rows * num_cols * i
            limit2 = num_rows * num_cols * (i + 1)
            dataset[:, i] = images_raw[limit1: limit2]
        return dataset / 255
    
    
    def load_data(num_patches, patch_side):  # 随机选取num_patches个数据
        images = scipy.io.loadmat('IMAGES.mat')  # 515*512*10
        images = images['IMAGES']
        patches = np.zeros((patch_side * patch_side, num_patches))
        seed = 1234
        rand = np.random.RandomState(seed)
        image_index = rand.random_integers(0, 512 - patch_side, size=
        (num_patches, 2))
        image_number = rand.random_integers(0, 10 - 1, size=num_patches)
        for i in xrange(num_patches):
            idx_1 = image_index[i, 0]
            idx_2 = image_index[i, 1]
            idx_3 = image_number[i]
            patch = images[idx_1:idx_1 + patch_side, idx_2:idx_2 + patch_side,
                    idx_3]
            patch = patch.flatten()
            patches[:, i] = patch
        patches = normalize_data(patches)
        return patches
    
    
    def visualizeW1(opt_W1, vis_patch_side, hid_patch_side):  # 可视化
        figure, axes = matplotlib.pyplot.subplots(nrows=hid_patch_side,
                                                  ncols=hid_patch_side)
        index = 0
        for axis in axes.flat:
            axis.imshow(opt_W1[index, :].reshape(vis_patch_side,
                                                 vis_patch_side), cmap=matplotlib.pyplot.cm.gray,
                        interpolation='nearest')
            axis.set_frame_on(False)
            axis.set_axis_off()
            index += 1
        matplotlib.pyplot.show()
    
    
    def run_sparse_ae():  # 稀疏自编码器
        beta = 3.0
        lamda = 0.0001
        rho = 0.01
        visible_side = 8
        hidden_side = 5
        visible_size = visible_side * visible_side
        hidden_size = hidden_side * hidden_side
        m = 10000
        max_iterations = 400
        training_data = load_data(num_patches=m, patch_side=visible_side)
        sae = sparse_autoencoder(visible_size, hidden_size, lamda, rho, beta)
        opt_theta = sae.train(training_data, max_iterations)
        opt_W1 = opt_theta[0: visible_size * hidden_size].reshape(hidden_size,
                                                                  visible_size)
        visualizeW1(opt_W1, visible_side, hidden_side)
    
    
    def run_sparse_ae_MNIST():  # 矢量化MNIST
        beta = 3.0
        lamda = 3e-3
        rho = 0.1
        visible_side = 28
        hidden_side = 14
        visible_size = visible_side * visible_side
        hidden_size = hidden_side * hidden_side
        m = 10000
        max_iterations = 400
        training_data = loadMNISTImages('train-images.idx3-ubyte')
        training_data = training_data[:, 0:m]
        sae = sparse_autoencoder(visible_size, hidden_size, lamda, rho, beta)
        opt_theta = sae.train(training_data, max_iterations)
        opt_W1 = opt_theta[0: visible_size * hidden_size].reshape(hidden_size,
                                                                  visible_size)
        visualizeW1(opt_W1, visible_side, hidden_side)
    
    
    if __name__ == "__main__":
        run_sparse_ae()
        # run_sparse_ae_MNIST()



  • 相关阅读:
    larbin结构分析
    《钱不要存银行》OneNote
    全局变量、extern/static/const区别与联系
    GIS网址,转自别处
    MSDN无法显示页面的解决
    人生没有奇迹
    开源GIS系统
    推荐:GDAL学习资源
    中国农科院资源区划所MODIS的遥感信息地面接收站
    泡沫产生的特点
  • 原文地址:https://www.cnblogs.com/qw12/p/5817530.html
Copyright © 2020-2023  润新知