• deep learning:RBM公式推导+源码 ----- C++


    花了好多天去推导RBM公式,只能说数学是硬伤,推导过程在后面给出大概,看了下yusugomori的java版源码,又花了一天时间来写C++版本,其主要思路参照yusugomori。发现java和C++好多地方差不多,呵呵。本人乃初学小娃,错误难免,多多指教。

    出处:http://www.cnblogs.com/wn19910213/p/3441707.html

    RBM.h

     1 #include <iostream>
     2 
     3 using namespace std;
     4 
     5 class RBM
     6 {
     7     public:
     8     size_t N;
     9     size_t n_visible;
    10     size_t n_hidden;
    11     double **W;
    12     double *hbias;
    13     double *vbias;
    14 
    15     RBM(size_t,size_t,size_t,double**,double*,double*);
    16     ~RBM();
    17 
    18     void contrastive_divergence(int *,double,int);
    19     void sample_h_given_v(int*,double*,int*);
    20     double sigmoid(double);
    21     double Vtoh_sigm(int* ,double* ,int );
    22     void gibbs_hvh(int* ,double* ,int* ,double* ,int* );
    23     double HtoV_sigm(int* ,int ,int );
    24     void sample_v_given_h(int* ,double* ,int* );
    25     void reconstruct(int* ,double*);
    26 
    27     private:
    28 };

    RBM.cpp

    #include <iostream>
    #include <stdlib.h>
    #include <stdio.h>
    #include <cmath>
    #include <cstring>
    #include "RBM.h"
    
    using namespace std;
    
    void test_rbm();
    double uniform(double ,double);
    double binomial(double );
    
    int main()
    {
        test_rbm();
        return 0;
    }
    
    //start
    void test_rbm()
    {
        srand(0);
        size_t train_N = 6;
        size_t test_N = 2;
        size_t n_visible = 6;
        size_t n_hidden = 3;
    
        double learning_rate = 0.1;
        int training_num = 1000;
        int k = 1;
    
        int train_data[6][6] = {
        {1, 1, 1, 0, 0, 0},
        {1, 0, 1, 0, 0, 0},
        {1, 1, 1, 0, 0, 0},
        {0, 0, 1, 1, 1, 0},
        {0, 0, 1, 0, 1, 0},
        {0, 0, 1, 1, 1, 0}
        };
    
        RBM rbm(train_N,n_visible,n_hidden,NULL,NULL,NULL);    //第一步、构造
    
        for(size_t j=0;j<training_num;j++)
        {
            for(size_t i=0;i<train_N;i++)
            {
                rbm.contrastive_divergence(train_data[i],learning_rate,k);  //第二步、训练数据集
            }
        }
    
        //测试数据
        int test_data[2][6] = {
            {1,1,0,0,0,0},
            {0,0,0,1,1,0}
        };
    
        double reconstructed_data[2][6];
    
        for(size_t i=0;i<test_N;i++)
        {
            rbm.reconstruct(test_data[i],reconstructed_data[i]);    //第三步、重构数据,其主要过程就是说把训练出来的权重,偏移量拿出来对测试数据先转换到隐含层,在转换回来。
            for(size_t j=0;j<n_visible;j++)
            {
                cout << reconstructed_data[i][j] << "  ";
            }
            cout << endl;
        }
    }
    
    void RBM::reconstruct(int* test_data,double* reconstructed_data)
    {
        double* h = new double[n_hidden];
        double temp;
    
        for(size_t i=0;i<n_hidden;i++)
        {
            h[i] = Vtoh_sigm(test_data,W[i],hbias[i]);
        }
    
        for(size_t i=0;i<n_visible;i++)
        {
            temp = 0.0;
            for(size_t j=0;j<n_hidden;j++)
            {
                temp += W[j][i] * h[j];
            }
            temp += vbias[i];
            reconstructed_data[i] =  sigmoid(temp);
        }
        delete[] h;
    }
    
    //第二步1、CD-K
    void RBM::contrastive_divergence(int *train_data,double learning_rate,int k)
    {
        double* ph_sigm_out = new double[n_hidden];
        int* ph_sample = new int[n_hidden];
        double* nv_sigm_outs = new double[n_visible];
        int* nv_samples = new int[n_visible];
        double* nh_sigm_outs = new double[n_hidden];
        int* nh_samples = new int[n_hidden];
    
        sample_h_given_v(train_data,ph_sigm_out,ph_sample);        //获得h0
    
        for(size_t i=0;i<k;i++)             //根据hinton教授指出只需要抽样到V1即可有足够好的近似,所以k=1
        {
            if(i == 0)
            {
                gibbs_hvh(ph_sample,nv_sigm_outs,nv_samples,nh_sigm_outs,nh_samples); //获得V1,h1
            }
            else
            {
                gibbs_hvh(nh_samples,nv_sigm_outs,nv_samples,nh_sigm_outs,nh_samples);
            }
        }
    
        //更新权值,双向偏移量。由于hinton提出的CD-K,可以知道其v0代表的是原始数据x
        //h0即ph_sigm_out,h0近似等于对v0下h的概率
        //v1即代表的是经过一次转换后的x,近似等于对h0下v的概率。
        //h1同理。CD-K主要就是求出这个三个数据,便能够很好的近似计算梯度。至于为什么我也不知道。
        for(size_t i=0;i<n_hidden;i++)
        {
            for(size_t j=0;j<n_visible;j++)
            {
                //可以根据权重公式发现,其实P(hi=1|v)代表的就是h0,p(hi=1|Vyk)和Vyk代表的就是h1和V1.
                W[i][j] += learning_rate * (ph_sigm_out[i] * train_data[j] - nh_sigm_outs[i] * nv_samples[j]) / N;
            }
            hbias[i] += learning_rate * (ph_sample[i] - nh_sigm_outs[i]) / N;
        }
    
        for(size_t i=0;i<n_visible;i++)
        {
            vbias[i] += learning_rate * (train_data[i] - nv_samples[i]) / N;
        }
    
        delete[] ph_sigm_out;
        delete[] ph_sample;
        delete[] nv_sigm_outs;
        delete[] nv_samples;
        delete[] nh_sigm_outs;
        delete[] nh_samples;
    }
    
    void RBM::gibbs_hvh(int* ph_sample,double* nv_sigm_outs,int* nv_samples,double* nh_sigm_outs,int* nh_samples)
    {
        sample_v_given_h(ph_sample,nv_sigm_outs,nv_samples);
        sample_h_given_v(nv_samples,nh_sigm_outs,nh_samples);
    }
    
    void RBM::sample_v_given_h(int* h0_sample,double* nv_sigm_outs,int* nv_samples)
    {
        for(size_t i=0;i<n_visible;i++)
        {
            nv_sigm_outs[i] =  HtoV_sigm(h0_sample,i,vbias[i]);
            nv_samples[i] = binomial(nv_sigm_outs[i]);
        }
    }
    
    double RBM::HtoV_sigm(int* h0_sample,int i,int vbias)
    {
        double temp = 0.0;
        for(size_t j=0;j<n_hidden;j++)
        {
            temp += W[j][i] * h0_sample[j];
        }
        temp += vbias;
        return sigmoid(temp);
    }
    
    void RBM::sample_h_given_v(int* train_data,double* ph_sigm_out,int* ph_sample)
    {
        for(size_t i=0;i<n_hidden;i++)
        {
            ph_sigm_out[i] = Vtoh_sigm(train_data,W[i],hbias[i]);
            ph_sample[i] = binomial(ph_sigm_out[i]);
        }
    }
    
    double binomial(double p)
    {
        if(p<0 || p>1){
            return 0;
        }
        double r = rand()/(RAND_MAX + 1.0);
        if(r < p)
        {
            return 1;
        }
        else
        {
            return 0;
        }
    }
    
    double RBM::Vtoh_sigm(int* train_data,double* W,int hbias)
    {
        double temp = 0.0;
        for(size_t i=0;i<n_visible;i++)
        {
            temp += W[i] * train_data[i];
        }
        temp += hbias;
        return sigmoid(temp);
    }
    
    double RBM::sigmoid(double x)
    {
        return 1.0/(1.0 + exp(-x));
    }
    
    RBM::RBM(size_t train_N,size_t n_v,size_t n_h,double **w,double *hb,double *vb)
    {
        N = train_N;
        n_visible = n_v;
        n_hidden = n_h;
    
        if(w == NULL)
        {
            W = new double*[n_hidden];
            double a = 1.0/n_visible;
            for(size_t i=0;i<n_hidden;i++)
            {
                W[i] = new double[n_visible];
            }
            for(size_t i=0;i<n_hidden;i++)
            {
                for(size_t j=0;j<n_visible;j++)
                {
                    W[i][j] = uniform(-a,a);
                }
            }
        }
        else
        {
            W = w;
        }
    
        if(hb == NULL)
        {
            hbias = new double[n_hidden];
            for(size_t i=0;i<n_hidden;i++)
            {
                hbias[i] = 0.0;
            }
        }
        else
        {
            hbias = hb;
        }
    
        if(vb == NULL)
        {
            vbias = new double[n_visible];
            for(size_t i=0;i<n_visible;i++)
            {
                vbias[i] = 0.0;
            }
        }
        else
        {
            vbias = vb;
        }
    }
    
    RBM::~RBM()
    {
        for(size_t i=0;i<n_hidden;i++)
        {
            delete[] W[i];
        }
        delete[] W;
        delete[] hbias;
        delete[] vbias;
    }
    
    double uniform(double min,double max)
    {
        return rand() / (RAND_MAX + 1.0) * (max - min) + min;
    }

    推导过程:

  • 相关阅读:
    python爬虫学习笔记(七)-数据提取之正则表达式
    python爬虫学习笔记(六)-Request库的用法
    python爬虫学习笔记(五)-URLError与Cookie
    python爬虫学习笔记(四)-urllib库的高级用法
    python爬虫学习笔记(三)-爬取数据之urllib库
    20183215 实验四《Python程序设计》实验报告
    20183215 实验二《Python程序设计》实验报告
    20183215 实验一《Python程序设计》实验报告
    文件读写
    类和正则表达(自动更正、代数运算)
  • 原文地址:https://www.cnblogs.com/wn19910213/p/3441707.html
Copyright © 2020-2023  润新知