• discrete adaboost的C++实现


    参考之前的博文,AdaBoost算法学习实现的c++代码

    //adaboost.h
    #ifndef ADABOOST_H
    #define ADABOOST_H
    
    #include<cmath>
    #include<iostream>
    #include<vector>
    #include<assert.h>
    
    
    using namespace std;
    
    #define FEATURETYPE double
    
    
    struct FeaVec
    {
    	unsigned int dim;
    	std::vector<FEATURETYPE>fea;
    	int label;//这里只去两个值,-1,1
    	FeaVec(unsigned int d) :dim(d)
    	{
    
    	}
    };
    
    class weakclassfier;
    
    class adaboost
    {
    
    public:
    	friend class weakclassfier;
    	adaboost();
    	virtual ~adaboost();
    	void train();
    	int classify(FeaVec data);
    	void load_trainset(vector<FeaVec>*data);
    
    protected:
    
    private:
    	double*W;
    	int dim;//特征维数
    	std::vector<FeaVec>trainset;
    	std::vector<weakclassfier*>classfier;
    	double aggri_error;
    
    };
    
    #endif // ADABOOST_H


    //adaboost.cpp
    #include "stdafx.h"
    #include "adaboost.h"
    
    class weakclassfier
    {
    public:
    	friend class adaboost;
    	weakclassfier(adaboost*ada)
    	{
    		this->ada = ada;
    		min_error_rate = 1000000;
    	}
    	void build();
    	std::vector<int>* stumpclassify(int const k, double const threshold,
    		vector<FeaVec>& data, bool greatthan);
    	~weakclassfier();
    private:
    	bool greaterthan;//控制不等式符号
    	int dim;//当前分类器在那一维进行分类
    	double threshold;
    	double min_error_rate;//当前弱分类器在训练集上的错误率
    	std::vector<int>*predicted;//保存对训练集的分类结果
    	double alpha;//在强分类器中所占的权重
    	adaboost* ada;
    };
    weakclassfier::~weakclassfier()
    {
    	if (predicted != NULL)
    		delete predicted;
    }
    void weakclassfier::build()
    {
    	double minerror = 100000;
    	for (int i = 0; i < ada->dim; i++)//外循环次数少
    	{
    		double min = 100000;
    		double max = -100000;
    		for (int j = 0; j<ada->trainset.size(); j++)
    		{
    			if (ada->trainset[j].fea[i]>max)
    				max = ada->trainset[j].fea[i];
    			if (ada->trainset[j].fea[i] < min)
    				min = ada->trainset[j].fea[i];
    		}
    
    		double step = (max - min) / double(10);
    		for (double j = min; j < max;)
    		{
    			j += step;
    			double current_error = 0;
    			bool flag = false;
    			vector<int>*aa = stumpclassify(i, j, ada->trainset, true);
    			for (int k = 0; k < ada->trainset.size(); k++)
    				current_error += ((*aa)[k] != ada->trainset[k].label) ? ada->W[k] : 0;
    			if (current_error < min_error_rate)
    			{
    				min_error_rate = current_error;
    				threshold = j;
    				greaterthan = true;
    				dim = i;
    				if (predicted != NULL)
    					delete predicted;
    				predicted = aa;
    				flag = true;
    			}
    			current_error = 0;
    			aa = stumpclassify(i, j, ada->trainset, false);
    			for (int k = 0; k < ada->trainset.size(); k++)
    				current_error += ((*aa)[k] != ada->trainset[k].label) ? ada->W[k] : 0;
    			//current_error += abs((*aa)[k] -ada->trainset[k].label) *ada->W[k];
    			if (current_error < min_error_rate)
    			{
    				min_error_rate = current_error;
    				threshold = j;
    				greaterthan = false;
    				dim = i;
    				if (predicted != NULL)
    					delete predicted;
    				predicted = aa;
    				flag = true;
    			}
    			if (!flag)//new和delete必须配套使用
    				delete aa;
    		}
    	}
    	assert(min_error_rate < 0.5);
    }
    
    std::vector<int>* weakclassfier::stumpclassify(int const k, double const threshold,
    	vector<FeaVec>&data, bool greatthan)
    {
    	std::vector<int>*pre = new vector < int > ;
    	//开始假设都满足大于阈值
    	//开始假设都满足小于阈值
    	(*pre).insert((*pre).begin(), data.size(), 1);
    
    	for (int j = 0; j < data.size(); j++)
    	{
    		if (greatthan&&data[j].fea[k] < threshold)//对于greater_than,ada->trainset[j]被预测为另一个类
    		{
    			(*pre)[j] = -1;
    		}
    		else if (!greatthan&&data[j].fea[k] > threshold)
    		{
    			(*pre)[j] = -1;
    		}
    	}
    	return pre;
    }
    
    
    
    adaboost::adaboost()
    {
    	
    }
    
    adaboost::~adaboost()
    {
    	for (int i = 0; i < classfier.size(); i++)
    		delete classfier[i];
    	if (W != NULL)
    		delete[]W;
    }
    
    void adaboost::train()
    {
    	W = new double[trainset.size()];
    	//全部初始化为0,用memset可以,但某一特定值,只能用循环了
    	//memset(W, double(1) / double(trainset.size()), trainset.size()*sizeof(double));
    	for (int i = 0; i < trainset.size(); i++)
    		W[i] = double(1) / double(trainset.size());
    	vector<double> aggrigate;
    	aggrigate.resize(trainset.size());
    
    	while (classfier.size() < 4)
    	{
    		aggri_error = 0;
    		weakclassfier*weak = new weakclassfier(this);
    		weak->build();
    		if (weak->min_error_rate < 0.5)
    		{
    			//弱分类器的准确率越高,其权重也越大
    			weak->alpha = (0.5*log((1.0 - weak->min_error_rate) / (weak->min_error_rate + 1e-16)));
    			classfier.push_back(weak);
    			double sumW = 0;
    			for (int j = 0; j < trainset.size(); j++)
    			{
    				//根据当前弱分类器分类结果将错分样本的权重提升
    				W[j] *= exp(weak->alpha*((*weak->predicted)[j] == trainset[j].label ? -1 : 1));
    				sumW += W[j];
    			}
    			for (int j = 0; j < trainset.size(); j++)
    			{
    				W[j] /= (sumW + 0.00000001);
    				//	aggrigate[j] += weak->alpha*(*weak->predicted)[j];
    				//aggri_error += ((aggrigate[j] > 0) ? 1 : -1) == trainset[j].label ? 0 : 1;
    			}
    			//aggri_error /= double(trainset.size());
    			//	if (aggri_error == 0)
    			//	break;
    		}
    		delete weak->predicted;
    	}
    }
    
    int adaboost::classify(FeaVec data)
    {
    	vector<FeaVec>bb;
    	bb.push_back(data);
    	double cc = 0;
    
    	for (int i = 0; i < classfier.size(); i++)
    	{
    		vector<int>*aa = classfier[i]->stumpclassify(classfier[i]->dim,
    			classfier[i]->threshold, bb, classfier[i]->greaterthan);
    		//	for (int j = 0; j < data.dim; j++)
    		cc += (*aa)[0] * classfier[i]->alpha;
    		delete aa;
    	}
    	return cc > 0 ? 1 : -1;
    }
    
    
    void adaboost::load_trainset(vector<FeaVec>*data)
    {
    	trainset = *data;
    	dim = data->back().dim;
    }
    
    







    //main
    #include "stdafx.h"
    #include"adaboost.h"
    
    int _tmain(int argc, _TCHAR* argv[])
    {
    	cout << double(1) / double(5) << endl;
    	FeaVec aa(2), bb(2), cc(2), dd(2),ee(2);
    	aa.fea.push_back(2);
    	aa.fea.push_back(1.1);
    	aa.label = 1;
    	bb.fea.push_back(1.3);
    	bb.fea.push_back(1.0);
    	bb.label = -1;
    	cc.fea.push_back(1.0);
    	cc.fea.push_back(1.0);
    	cc.label = -1;
    	dd.fea.push_back(2);
    	dd.fea.push_back(1.0);
    	dd.label = 1;
    	ee.fea.push_back(1);
    	ee.fea.push_back(2.1);
    	ee.label = 1;
    	vector<FeaVec>pp;
    	pp.push_back(aa);
    	pp.push_back(bb);
    	pp.push_back(cc);
    	pp.push_back(dd);
    	pp.push_back(ee);
    	adaboost ada;
    	ada.load_trainset(&pp);
    	ada.train();
    	FeaVec ff(2);
    	ff.fea.push_back(0.9);
    	ff.fea.push_back(1.1);
    	int a = ada.classify(ff);
    
    	return 0;
    }
    


    版权声明:

  • 相关阅读:
    Java反射机制
    dd命令
    分区工具fdisk,gdisk,parted
    硬盘初识
    shell脚本之算术运算和逻辑运算
    linux防火墙简单的使用
    压缩解压打包工具基础
    find命令基础讲解
    个人数据备份方案
    数据库的表名字段名大小写问题
  • 原文地址:https://www.cnblogs.com/walccott/p/4956870.html
Copyright © 2020-2023  润新知