CV_BOOST_IMPL CvClassifier* cvCreateStumpClassifier( CvMat* trainData, //训练样本的数据,包含图像大小。数量,类别,权重等 int flags, //0表示矩阵的列是训练样本。1表示行是训练样本 CvMat* trainClasses, //表示训练样本的类别矩阵 CvMat* /*typeMask*/, CvMat* missedMeasurementsMask, CvMat* compIdx, //特征序列 CvMat* sampleIdx, //训练样本排序后的寻列 CvMat* weights, //训练样本的权重矩阵 CvClassifierTrainParams* trainParams//训练參数 ) { CvStumpClassifier* stump = NULL; int m = 0; /* 样本数量number of samples */ int n = 0; /* 特征数量number of components */ uchar* data = NULL; int cstep = 0; //一个特征(component)在水平方向上的长度,即是水平方向上所占字节数 int sstep = 0; //一个样本(sample)在水平方向上的长度,即是水平方向上所占字节数(这两个參数我看了非常长时间才看出来) uchar* ydata = NULL; int ystep = 0; uchar* idxdata = NULL; int idxstep = 0; int l = 0; /* number of indices */ uchar* wdata = NULL; int wstep = 0; int* idx = NULL; int i = 0; float sumw = FLT_MAX; float sumwy = FLT_MAX; float sumwyy = FLT_MAX; CV_Assert( trainData != NULL ); CV_Assert( CV_MAT_TYPE( trainData->type ) == CV_32FC1 ); CV_Assert( trainClasses != NULL ); CV_Assert( CV_MAT_TYPE( trainClasses->type ) == CV_32FC1 ); CV_Assert( missedMeasurementsMask == NULL ); CV_Assert( compIdx == NULL ); CV_Assert( weights != NULL ); CV_Assert( CV_MAT_TYPE( weights->type ) == CV_32FC1 ); CV_Assert( trainParams != NULL ); data = trainData->data.ptr; if( CV_IS_ROW_SAMPLE( flags ) )//当traindata训练样本是按行排列,一行表示一个训练样本在不同特征下的特征值 { cstep = CV_ELEM_SIZE( trainData->type ); sstep = trainData->step; m = trainData->rows; //行数表示样本数量 n = trainData->cols; //列数表示特征的个数 } else //当traindata训练样本是按列排列,一列表示一个训练样本在不同特征下的特征值 { sstep = CV_ELEM_SIZE( trainData->type ); cstep = trainData->step; m = trainData->cols; //列数表示样本的数量 n = trainData->rows; //行数表示特征的个数 } ydata = trainClasses->data.ptr; if( trainClasses->rows == 1 ) { assert( trainClasses->cols == m ); ystep = CV_ELEM_SIZE( trainClasses->type ); } else { assert( trainClasses->rows == m ); ystep = trainClasses->step; } wdata = weights->data.ptr; if( weights->rows == 1 ) { assert( weights->cols == m ); wstep = CV_ELEM_SIZE( weights->type ); } else { assert( weights->rows == m ); wstep = weights->step; } l = m; if( sampleIdx != NULL ) { assert( CV_MAT_TYPE( sampleIdx->type ) == CV_32FC1 ); idxdata = sampleIdx->data.ptr; if( sampleIdx->rows == 1 ) { l = sampleIdx->cols; idxstep = CV_ELEM_SIZE( sampleIdx->type ); } else { l = sampleIdx->rows; idxstep = sampleIdx->step; } assert( l <= m ); } idx = (int*) cvAlloc( l * sizeof( int ) );//为idx分配内存 stump = (CvStumpClassifier*) cvAlloc( sizeof( CvStumpClassifier) );//为stump分配内存 /* START */ memset( (void*) stump, 0, sizeof( CvStumpClassifier ) );//stump内存初始化为零 stump->eval = cvEvalStumpClassifier; stump->tune = NULL; stump->save = NULL; stump->release = cvReleaseStumpClassifier; stump->lerror = FLT_MAX; stump->rerror = FLT_MAX; stump->left = 0.0F; stump->right = 0.0F; /* copy indices */ if( sampleIdx != NULL ) { for( i = 0; i < l; i++ ) { idx[i] = (int) *((float*) (idxdata + i*idxstep)); } } else { for( i = 0; i < l; i++ ) { idx[i] = i; } } for( i = 0; i < n; i++ ) //遍历全部特征 { CvValArray va; va.data = data + i * ((size_t) cstep); va.step = sstep; icvSortIndexedValArray_32s( idx, l, &va );//对数据进行排序 if( findStumpThreshold_32s[(int) ((CvStumpTrainParams*) trainParams)->error] //该error是计算不纯度的方式,包含四种,各自是:熵不纯度,吉尼不纯度,错分类不纯度,和最小二乘不纯度 ( data + i * ((size_t) cstep), sstep, wdata, wstep, ydata, ystep, (uchar*) idx, sizeof( int ), l, &(stump->lerror), &(stump->rerror), &(stump->threshold), &(stump->left), &(stump->right), &sumw, &sumwy, &sumwyy ) ) //寻找树桩分类器的阈值 { stump->compidx = i; } } /* for each component */ /* END */ cvFree( &idx ); if( ((CvStumpTrainParams*) trainParams)->type == CV_CLASSIFICATION_CLASS ) { stump->left = 2.0F * (stump->left >= 0.5F) - 1.0F; stump->right = 2.0F * (stump->right >= 0.5F) - 1.0F; } return (CvClassifier*) stump; }