稀疏自编码器和矢量化编程

相关的公式

证明参考PPT: http://wenku.baidu.com/link?url=dBZZq7TYJOnIw2mwilKsJT_swT52I0OoikmvmgBaYE_NvP_KChFZ-HOURH5LMiLEuSVFcGmJ0bQfkG-ZYk-IRJf7D-w6P9PBec8EZ9IxgFS

Python实现代码参考（数据在同文件夹）

@author: Paul Rothnie
email : paul.rothnie@googlemail.com
https://github.com/siddharth950/Sparse-Autoencoder

# coding: utf8
# Refer to https://github.com/siddharth950/Sparse-Autoencoder

import numpy as np
import scipy.io
import scipy.optimize
import matplotlib.pyplot
import struct
import array


class sparse_autoencoder(object):  # 稀疏自编码类
    def __init__(self, visible_size, hidden_size, lambda_, rho, beta):
        self.visible_size = visible_size
        self.hidden_size = hidden_size
        self.lambda_ = lambda_
        self.rho = rho
        self.beta = beta
        w_max = np.sqrt(6.0 / (visible_size + hidden_size + 1.0))
        w_min = -w_max
        W1 = (w_max - w_min) * np.random.random_sample(size=(hidden_size,
                                                             visible_size)) + w_min
        W2 = (w_max - w_min) * np.random.random_sample(size=(visible_size,
                                                             hidden_size)) + w_min
        b1 = np.zeros(hidden_size)
        b2 = np.zeros(visible_size)
        self.idx_0 = 0
        self.idx_1 = hidden_size * visible_size  # 64*25
        self.idx_2 = self.idx_1 + hidden_size * visible_size  # 25*64
        self.idx_3 = self.idx_2 + hidden_size  # 64
        self.idx_4 = self.idx_3 + visible_size  # 25
        self.initial_theta = np.concatenate((W1.flatten(), W2.flatten(),
                                             b1.flatten(), b2.flatten()))

    def sigmoid(self, x):  # sigmoid函数
        return 1.0 / (1.0 + np.exp(-x))

    def unpack_theta(self, theta):  # 获取传递给scipy.optimize.minimize的theta
        W1 = theta[self.idx_0: self.idx_1]
        W1 = np.reshape(W1, (self.hidden_size, self.visible_size))
        W2 = theta[self.idx_1: self.idx_2]
        W2 = np.reshape(W2, (self.visible_size, self.hidden_size))
        b1 = theta[self.idx_2: self.idx_3]
        b1 = np.reshape(b1, (self.hidden_size, 1))
        b2 = theta[self.idx_3: self.idx_4]
        b2 = np.reshape(b2, (self.visible_size, 1))
        return W1, W2, b1, b2

    def cost(self, theta, visible_input):  # cost函数
        W1, W2, b1, b2 = self.unpack_theta(theta)
        # layer=f(w*l+b)
        hidden_layer = self.sigmoid(np.dot(W1, visible_input) + b1)
        output_layer = self.sigmoid(np.dot(W2, hidden_layer) + b2)
        m = visible_input.shape[1]
        error = -(visible_input - output_layer)
        sum_sq_error = 0.5 * np.sum(error * error, axis=0)
        avg_sum_sq_error = np.mean(sum_sq_error)
        reg_cost = self.lambda_ * (np.sum(W1 * W1) + np.sum(W2 * W2)) / 2.0  # L2正则化
        rho_bar = np.mean(hidden_layer, axis=1)  # 平均激活程度
        KL_div = np.sum(self.rho * np.log(self.rho / rho_bar) +
                        (1 - self.rho) * np.log((1 - self.rho) / (1 - rho_bar)))  # 相对熵
        cost = avg_sum_sq_error + reg_cost + self.beta * KL_div  # 损失函数
        KL_div_grad = self.beta * (- self.rho / rho_bar + (1 - self.rho) /
                                   (1 - rho_bar))
        del_3 = error * output_layer * (1.0 - output_layer)
        del_2 = np.transpose(W2).dot(del_3) + KL_div_grad[:, np.newaxis]

        del_2 *= hidden_layer * (1 - hidden_layer)  # *=残差项
        W1_grad = del_2.dot(visible_input.transpose()) / m  # delt_w=del*(l.T)
        W2_grad = del_3.dot(hidden_layer.transpose()) / m
        b1_grad = del_2  # delt_b=del
        b2_grad = del_3
        W1_grad += self.lambda_ * W1
        W2_grad += self.lambda_ * W2
        b1_grad = b1_grad.mean(axis=1)
        b2_grad = b2_grad.mean(axis=1)
        theta_grad = np.concatenate((W1_grad.flatten(), W2_grad.flatten(),
                                     b1_grad.flatten(), b2_grad.flatten()))
        return [cost, theta_grad]

    def train(self, data, max_iterations):  # 训练令cost最小
        opt_soln = scipy.optimize.minimize(self.cost,
                                           self.initial_theta,
                                           args=(data,), method='L-BFGS-B',
                                           jac=True, options=
                                           {'maxiter': max_iterations})
        opt_theta = opt_soln.x
        return opt_theta


def normalize_data(data):  # 0.1<=data[i][j]<=0.9
    data = data - np.mean(data)
    pstd = 3 * np.std(data)
    data = np.maximum(np.minimum(data, pstd), -pstd) / pstd
    data = (data + 1.0) * 0.4 + 0.1
    return data


def loadMNISTImages(file_name):  # 获取mnist数据
    image_file = open(file_name, 'rb')
    head1 = image_file.read(4)
    head2 = image_file.read(4)
    head3 = image_file.read(4)
    head4 = image_file.read(4)
    num_examples = struct.unpack('>I', head2)[0]
    num_rows = struct.unpack('>I', head3)[0]
    num_cols = struct.unpack('>I', head4)[0]
    dataset = np.zeros((num_rows * num_cols, num_examples))
    images_raw = array.array('B', image_file.read())
    image_file.close()
    for i in range(num_examples):
        limit1 = num_rows * num_cols * i
        limit2 = num_rows * num_cols * (i + 1)
        dataset[:, i] = images_raw[limit1: limit2]
    return dataset / 255


def load_data(num_patches, patch_side):  # 随机选取num_patches个数据
    images = scipy.io.loadmat('IMAGES.mat')  # 515*512*10
    images = images['IMAGES']
    patches = np.zeros((patch_side * patch_side, num_patches))
    seed = 1234
    rand = np.random.RandomState(seed)
    image_index = rand.random_integers(0, 512 - patch_side, size=
    (num_patches, 2))
    image_number = rand.random_integers(0, 10 - 1, size=num_patches)
    for i in xrange(num_patches):
        idx_1 = image_index[i, 0]
        idx_2 = image_index[i, 1]
        idx_3 = image_number[i]
        patch = images[idx_1:idx_1 + patch_side, idx_2:idx_2 + patch_side,
                idx_3]
        patch = patch.flatten()
        patches[:, i] = patch
    patches = normalize_data(patches)
    return patches


def visualizeW1(opt_W1, vis_patch_side, hid_patch_side):  # 可视化
    figure, axes = matplotlib.pyplot.subplots(nrows=hid_patch_side,
                                              ncols=hid_patch_side)
    index = 0
    for axis in axes.flat:
        axis.imshow(opt_W1[index, :].reshape(vis_patch_side,
                                             vis_patch_side), cmap=matplotlib.pyplot.cm.gray,
                    interpolation='nearest')
        axis.set_frame_on(False)
        axis.set_axis_off()
        index += 1
    matplotlib.pyplot.show()


def run_sparse_ae():  # 稀疏自编码器
    beta = 3.0
    lamda = 0.0001
    rho = 0.01
    visible_side = 8
    hidden_side = 5
    visible_size = visible_side * visible_side
    hidden_size = hidden_side * hidden_side
    m = 10000
    max_iterations = 400
    training_data = load_data(num_patches=m, patch_side=visible_side)
    sae = sparse_autoencoder(visible_size, hidden_size, lamda, rho, beta)
    opt_theta = sae.train(training_data, max_iterations)
    opt_W1 = opt_theta[0: visible_size * hidden_size].reshape(hidden_size,
                                                              visible_size)
    visualizeW1(opt_W1, visible_side, hidden_side)


def run_sparse_ae_MNIST():  # 矢量化MNIST
    beta = 3.0
    lamda = 3e-3
    rho = 0.1
    visible_side = 28
    hidden_side = 14
    visible_size = visible_side * visible_side
    hidden_size = hidden_side * hidden_side
    m = 10000
    max_iterations = 400
    training_data = loadMNISTImages('train-images.idx3-ubyte')
    training_data = training_data[:, 0:m]
    sae = sparse_autoencoder(visible_size, hidden_size, lamda, rho, beta)
    opt_theta = sae.train(training_data, max_iterations)
    opt_W1 = opt_theta[0: visible_size * hidden_size].reshape(hidden_size,
                                                              visible_size)
    visualizeW1(opt_W1, visible_side, hidden_side)


if __name__ == "__main__":
    run_sparse_ae()
    # run_sparse_ae_MNIST()

相关阅读:
larbin结构分析
 《钱不要存银行》OneNote
全局变量、extern/static/const区别与联系
 GIS网址，转自别处
 MSDN无法显示页面的解决
 人生没有奇迹
 开源GIS系统
 推荐：GDAL学习资源
 中国农科院资源区划所MODIS的遥感信息地面接收站
 泡沫产生的特点
原文地址：https://www.cnblogs.com/qw12/p/5817530.html