先贴上我对Opencv3.1中sift源码的注释吧,虽然还有很多没看懂。先从detectAndCompute看起
void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints) { int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0; Mat image = _image.getMat(), mask = _mask.getMat(); if( image.empty() || image.depth() != CV_8U ) CV_Error( Error::StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" ); if( !mask.empty() && mask.type() != CV_8UC1 ) CV_Error( Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)" ); if( useProvidedKeypoints ) { firstOctave = 0; int maxOctave = INT_MIN; for( size_t i = 0; i < keypoints.size(); i++ ) { int octave, layer; float scale; unpackOctave(keypoints[i], octave, layer, scale); firstOctave = std::min(firstOctave, octave); maxOctave = std::max(maxOctave, octave); actualNLayers = std::max(actualNLayers, layer-2); } firstOctave = std::min(firstOctave, 0); CV_Assert( firstOctave >= -1 && actualNLayers <= nOctaveLayers ); actualNOctaves = maxOctave - firstOctave + 1; } Mat base = createInitialImage(image, firstOctave < 0, (float)sigma); //是否double size再模糊,返回模糊后的float型Mat std::vector<Mat> gpyr, dogpyr; // nOcatves = actualNOctaves 或者 cvRound[log(min(cols, rows)) / log(2) - 2] + 1 -- 如果是500 则大概返回8 int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(std::log( (double)std::min( base.cols, base.rows ) ) / std::log(2.) - 2) - firstOctave; //double t, tf = getTickFrequency(); //t = (double)getTickCount(); buildGaussianPyramid(base, gpyr, nOctaves); //构建尺度金字塔 buildDoGPyramid(gpyr, dogpyr); //构建DOG金字塔 //t = (double)getTickCount() - t; //printf("pyramid construction time: %g ", t*1000./tf); if( !useProvidedKeypoints ) { //t = (double)getTickCount(); findScaleSpaceExtrema(gpyr, dogpyr, keypoints); //找到极值点,过滤极值点,根据极值点主方向计算keypoints的angle KeyPointsFilter::removeDuplicated( keypoints ); //删去坐标相同,size相同(layer_id),角度相同的点 if( nfeatures > 0 ) KeyPointsFilter::retainBest(keypoints, nfeatures); //takes keypoints and culls them by the response //t = (double)getTickCount() - t; //printf("keypoint detection time: %g ", t*1000./tf); if( firstOctave < 0 ) for( size_t i = 0; i < keypoints.size(); i++ ) { KeyPoint& kpt = keypoints[i]; float scale = 1.f/(float)(1 << -firstOctave); //这里具体在做什么也不清楚 kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255); kpt.pt *= scale; kpt.size *= scale; } if( !mask.empty() ) KeyPointsFilter::runByPixelsMask( keypoints, mask ); } else { // filter keypoints by mask //KeyPointsFilter::runByPixelsMask( keypoints, mask ); } if( _descriptors.needed() ) //如果要计算描述子 { //t = (double)getTickCount(); int dsize = descriptorSize(); _descriptors.create((int)keypoints.size(), dsize, CV_32F); //初始化内存 Mat descriptors = _descriptors.getMat(); calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave); //传入 //t = (double)getTickCount() - t; //printf("descriptor extraction time: %g ", t*1000./tf); } }
该函数分别调用了
createInitialImage
buildGaussianPyramid
buildDoGPyramid
findScaleSpaceExtrema 找到极值点
adjustLocalExtrema 去掉不好的极值点
calcOrientationHist 计算主方向
KeyPointsFilter::removeDuplicated 删除重复的点
KeyPointsFilter::retainBest 根据响应保留最好的点
calcDescriptors 计算最终的方向描述符
然后各自的源码注释: createInitialImage就是将初始图片进行两倍放大然后模糊作为高斯金字塔的底。
//如果是要doubleImageSize就resize一下再模糊,否则直接模糊 static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma ) //image, firstOctave < 0, (float)sigma { Mat gray, gray_fpt; if( img.channels() == 3 || img.channels() == 4 ) cvtColor(img, gray, COLOR_BGR2GRAY); //转成灰度图像 else img.copyTo(gray); //拷贝到gray中 //转成float型进行高斯模糊,但是为什么不转到0-1 gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0); // SIFT_FIXPT_SCALE -> 1 float sig_diff; if( doubleImageSize ) { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) ); // SIFT_INIT_SIGMA = 0.5f Mat dbl; resize(gray_fpt, dbl, Size(gray.cols*2, gray.rows*2), 0, 0, INTER_LINEAR); //resize成两倍 GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff); //sig_diff=sigma^2-1, 0.01f // can differ but they both must be positive and odd. Or, they can be zero’s and then they are computed from sigma* return dbl; } else { sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f) ); GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff); return gray_fpt; } }
buildGaussianPyramid 就是构建高斯金字塔,模拟人眼的尺度空间吧,这几个还比较好理解。
void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector<Mat>& pyr, int nOctaves ) const //base, gpyr, nOctaves { std::vector<double> sig(nOctaveLayers + 3); //nOctaveLayers是输入, +3是为了保证尺度连续 pyr.resize(nOctaves*(nOctaveLayers + 3)); // precompute Gaussian sigmas using the following formula: // sigma_{total}^2 = sigma_{i}^2 + sigma_{i-1}^2 sig[0] = sigma;//sig每层的基础sigma double k = std::pow( 2., 1. / nOctaveLayers ); //k因子,== 2^(1/s) s is number of layers for( int i = 1; i < nOctaveLayers + 3; i++ ) { double sig_prev = std::pow(k, (double)(i-1))*sigma; // k^(i-1) * sigma 每层的sigma double sig_total = sig_prev*k; //k^i * sigma sig[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev); //sqrt(k^2-1) * sig_prev 这里不知道为什么这样算,和博客中的计算方法不一样 } for( int o = 0; o < nOctaves; o++ ) { for( int i = 0; i < nOctaveLayers + 3; i++ ) { Mat& dst = pyr[o*(nOctaveLayers + 3) + i]; //取出第o个金字塔的第i层 if( o == 0 && i == 0 ) //如果是第0个金字塔的第0层就直接赋值就好 dst = base; // base of new octave is halved image from end of previous octave else if( i == 0 ) //如果是下一个金字塔的第0层,需要用第nOctaveLayers层进行缩放 { const Mat& src = pyr[(o-1)*(nOctaveLayers + 3) + nOctaveLayers]; resize(src, dst, Size(src.cols/2, src.rows/2), //缩小一倍 0, 0, INTER_NEAREST); } else { const Mat& src = pyr[o*(nOctaveLayers + 3) + i-1]; //平常的层就由其上一层进行模糊得到 GaussianBlur(src, dst, Size(), sig[i], sig[i]); } } } }
buildDoGPyramid,计算DOG
void SIFT_Impl::buildDoGPyramid( const std::vector<Mat>& gpyr, std::vector<Mat>& dogpyr ) const { int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3); //从高斯尺度金字塔中计算出有多少座金字塔 dogpyr.resize( nOctaves*(nOctaveLayers + 2) ); //DOG金字塔的层数是高斯金字塔层数-1 for( int o = 0; o < nOctaves; o++ ) { for( int i = 0; i < nOctaveLayers + 2; i++ ) { const Mat& src1 = gpyr[o*(nOctaveLayers + 3) + i]; //取该层 const Mat& src2 = gpyr[o*(nOctaveLayers + 3) + i + 1]; //取上面一层 Mat& dst = dogpyr[o*(nOctaveLayers + 2) + i]; subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type); //做差 src2-src1 -> dst 上面一层减去下面一层然后存到dst中 } } }
findScaleSpaceExtrema,就是求尺度空间的极大极小点
// // Detects features at extrema in DoG scale space. Bad features are discarded // based on contrast and ratio of principal curvatures. void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const std::vector<Mat>& dog_pyr, std::vector<KeyPoint>& keypoints ) const { int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); //获得金字塔的个数 int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); //contrastThreshold由用户给出 //threshold其实和contrastThreshold成正比和nOctaveLayers成反比 const int n = SIFT_ORI_HIST_BINS; //SIFT_ORI_HIST_BINS = 36; 方向的bin的个数 float hist[n]; //直方图 KeyPoint kpt; //关键点 keypoints.clear(); //清除原本的keypoints, //用clear方法并没有释放内存,详情见:http://blog.jobbole.com/37700/ //可以考虑vector<KeyPoint>(keypoints).swap(keypoints); for( int o = 0; o < nOctaves; o++ ) //从第0个金字塔开始 for( int i = 1; i <= nOctaveLayers; i++ ) //这个范围需要举个例子就明白了5+3层的G层就有5+2的DOG层,然后去头去尾从第1到第6层,第6层的标号就是nOctaveLayers { int idx = o*(nOctaveLayers+2)+i; const Mat& img = dog_pyr[idx]; //取出中间一层 const Mat& prev = dog_pyr[idx-1]; //取出下面一层 const Mat& next = dog_pyr[idx+1]; //取出上面一层 int step = (int)img.step1(); int rows = img.rows, cols = img.cols; //SIFT_IMG_BORDER = 5; 这个仅仅只是为了防止越界 //从5开始到rows-5,防止越界 for( int r = SIFT_IMG_BORDER; r < rows-SIFT_IMG_BORDER; r++) { const sift_wt* currptr = img.ptr<sift_wt>(r); //获取img的第r行的float指针 const sift_wt* prevptr = prev.ptr<sift_wt>(r); const sift_wt* nextptr = next.ptr<sift_wt>(r); for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) { sift_wt val = currptr[c]; //取得img的 第r行第c列的像素值 // find local extrema with pixel accuracy //极大极小值都需要,是在上层下层和周围3*3的像素正方体下判断极值 if( std::abs(val) > threshold && //绝对值大于阈值要响应强烈 ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) { int r1 = r, c1 = c, layer = i; //取得r行号,c列号,layer在DOG的层号 if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, //过滤不好的极值点 nOctaveLayers, (float)contrastThreshold, (float)edgeThreshold, (float)sigma) ) continue; //kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2; 后面这两项(1 << octv)*2不是和0.5f/(1 << o)低效了吗 float scl_octv = kpt.size*0.5f/(1 << o); //o越大就要缩小1/2^o倍,由于金字塔之间是有resize的 float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer], Point(c1, r1), //对于极值点求方向直方图 cvRound(SIFT_ORI_RADIUS * scl_octv), //SIFT_ORI_RADIUS = 3 * SIFT_ORI_SIG_FCTR; SIFT_ORI_SIG_FCTR * scl_octv, // SIFT_ORI_SIG_FCTR = 1.5f; hist, n); float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); // SIFT_ORI_PEAK_RATIO = 0.8f; for( int j = 0; j < n; j++ ) { int l = j > 0 ? j - 1 : n - 1; int r2 = j < n-1 ? j + 1 : 0; if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) //比较该bin和周边两个bin的值大小,并且要大于等于最大方向幅值的0.8倍 { //这里的插值求极值的公式,估计是通过Newton等距插值然后求导得到,我算了一早上没算出来,但是应该是这样推导没错 //Adds features to an array for every orientation in a histogram greater than a specified threshold. float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); //Interpolates a histogram peak from left, center, and right values bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; //控制在[0,n]当中 kpt.angle = 360.f - (float)((360.f/n) * bin); if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) //FLT_EPSILON是浮点数非常小的一个数1.19209290E-07F kpt.angle = 0.f; keypoints.push_back(kpt); } } } } } } }
adjustLocalExtrema,使用泰勒展开然后求导得到极值点的逼近算法,不断逼近可能的极值点,如果所走的步长小于0.5,在离散的条件下就是该点为极值点。并且计算极值点的幅值并且要求该极值要足够的大,然后还有主曲率用Hassian矩阵来判断就像Harris角点检测一样,取出边界响应的极值点。最后还对keypoint的坐标等进行编码(完全看不懂),可以使用unpackOctave进行解码。
// // Interpolates a scale-space extremum's location and scale to subpixel // accuracy to form an image feature. Rejects features with low contrast. // Based on Section 4 of Lowe's paper. static bool adjustLocalExtrema( const std::vector<Mat>& dog_pyr, KeyPoint& kpt, int octv, //octv,layer,r,c显示了关键点的位置 int& layer, int& r, int& c, int nOctaveLayers, //还有四组参数nOctaveLayers,contrastThreshold,edgeThreshold,sigma float contrastThreshold, float edgeThreshold, float sigma ) { const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE); //pixel的scale因子 const float deriv_scale = img_scale*0.5f; const float second_deriv_scale = img_scale; const float cross_deriv_scale = img_scale*0.25f; float xi=0, xr=0, xc=0, contr=0; int i = 0; for( ; i < SIFT_MAX_INTERP_STEPS; i++ ) // maximum steps of keypoint interpolation before failure { int idx = octv*(nOctaveLayers+2) + layer; //找到该层 const Mat& img = dog_pyr[idx];//分别获得该层和上下两层 const Mat& prev = dog_pyr[idx-1]; const Mat& next = dog_pyr[idx+1]; Vec3f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale, //列上的差分/2就是一阶导数,然后还要归一化到0-1 (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale, //行上的差分 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale); //尺度上的差分 float v2 = (float)img.at<sift_wt>(r, c)*2; float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale; //求导 float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale; float dss = (next.at<sift_wt>(r, c) + prev.at<sift_wt>(r, c) - v2)*second_deriv_scale; float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) - img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1))*cross_deriv_scale; float dxs = (next.at<sift_wt>(r, c+1) - next.at<sift_wt>(r, c-1) - prev.at<sift_wt>(r, c+1) + prev.at<sift_wt>(r, c-1))*cross_deriv_scale; float dys = (next.at<sift_wt>(r+1, c) - next.at<sift_wt>(r-1, c) - prev.at<sift_wt>(r+1, c) + prev.at<sift_wt>(r-1, c))*cross_deriv_scale; Matx33f H(dxx, dxy, dxs, dxy, dyy, dys, dxs, dys, dss); Vec3f X = H.solve(dD, DECOMP_LU); //LU分解 xi = -X[2]; xr = -X[1]; xc = -X[0]; if( std::abs(xi) < 0.5f && std::abs(xr) < 0.5f && std::abs(xc) < 0.5f ) //发现只能移动小于1/2,那其实极值点就是该点,在离散情况下就不需要移动了 break; if( std::abs(xi) > (float)(INT_MAX/3) || //如果太过于大了就直接抛弃 std::abs(xr) > (float)(INT_MAX/3) || //什么情况会发生这个,我也不太清楚 std::abs(xc) > (float)(INT_MAX/3) ) //这个问题需要看数值分析 return false; c += cvRound(xc); r += cvRound(xr); layer += cvRound(xi); if( layer < 1 || layer > nOctaveLayers || c < SIFT_IMG_BORDER || c >= img.cols - SIFT_IMG_BORDER || r < SIFT_IMG_BORDER || r >= img.rows - SIFT_IMG_BORDER ) //这个条件根本不可能发生。是为了鲁棒吧 return false; } // ensure convergence of interpolation if( i >= SIFT_MAX_INTERP_STEPS ) //如果移动次数太多也抛弃,代表其实可能是局部像素抖动使得恰好其满足了极值条件 return false; { int idx = octv*(nOctaveLayers+2) + layer; const Mat& img = dog_pyr[idx]; const Mat& prev = dog_pyr[idx-1]; const Mat& next = dog_pyr[idx+1]; Matx31f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale, (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale, (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale); float t = dD.dot(Matx31f(xc, xr, xi)); contr = img.at<sift_wt>(r, c)*img_scale + t * 0.5f; if( std::abs( contr ) * nOctaveLayers < contrastThreshold ) //判断新的极值点的极值要大 return false; // principal curvatures are computed using the trace and det of Hessian float v2 = img.at<sift_wt>(r, c)*2.f; //类似Harris角点检测去除线条的极值点和 float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale; float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale; float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) - img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1)) * cross_deriv_scale; float tr = dxx + dyy; float det = dxx * dyy - dxy * dxy; if( det <= 0 || tr*tr*edgeThreshold >= (edgeThreshold + 1)*(edgeThreshold + 1)*det ) return false; } kpt.pt.x = (c + xc) * (1 << octv); //不明白, (c+xc) * (2^octv) 这里编码了,估计是使用unpackOctave进行解码 kpt.pt.y = (r + xr) * (1 << octv); kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16); kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2; kpt.response = std::abs(contr); //响应就是泰勒展开后求导后的带入的极值 return true; }
calcOrientationHist 计算极值点的主方向,在findScaleSpaceExtrema中最后有个细节就是它还保存一些方向较好的极值点,结果就是同一个极值点坐标可能有两个angle
/* @img: 极值点所处的高斯金字塔的第layer层的图像 @pt: 极值点的列号和行号 @radius: radius -> SIFT_ORI_RADIUS*scl_octv -> SIFT_ORI_RADIUS*pt.size -> SIFT_ORI_RADIUS*sigma*powf(2.f, (layer + xi) / nOctaveLayers) @sigma: SIFT_ORI_SIG_FCTR * scl_octv, // SIFT_ORI_SIG_FCTR = 1.5f; @hist: 直方图 float hist[n]; @n: 直方图bin的个数 */ // Computes a gradient orientation histogram at a specified pixel static float calcOrientationHist( const Mat& img, Point pt, int radius, float sigma, float* hist, int n ) { int i, j, k, len = (radius*2+1)*(radius*2+1); //len直径+1变奇数 float expf_scale = -1.f/(2.f * sigma * sigma); //与sigma^2成反比的一个东西 AutoBuffer<float> buf(len*4 + n+4); //n个bin, float *X = buf, *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len; //X指向0, Y指向len, Mag指向X, Ori指向2len, W指向3len float* temphist = W + len + 2; //temphist指向4len+2, 后面4len 和 4len+1有用处, 以及4len+n 和 4len+n+1也有用处, 用来平滑 for( i = 0; i < n; i++ ) //初始化为0 temphist[i] = 0.f; for( i = -radius, k = 0; i <= radius; i++ ) //在特征点pt.x,pt.y周围按radius取一块正方块 { int y = pt.y + i; //取该正方块左上的点 if( y <= 0 || y >= img.rows - 1 ) //如果越界就该块的下一个像素 continue; for( j = -radius; j <= radius; j++ ) { int x = pt.x + j; if( x <= 0 || x >= img.cols - 1 ) //如果越界就下一个 continue; float dx = (float)(img.at<sift_wt>(y, x+1) - img.at<sift_wt>(y, x-1)); //计算该块所取出的像素点的x方向梯度 float dy = (float)(img.at<sift_wt>(y-1, x) - img.at<sift_wt>(y+1, x)); //y方向梯度 X[k] = dx; Y[k] = dy; W[k] = (i*i + j*j)*expf_scale; //X存该像素的dx, Y存dy, W存距离与极值点的距离的平方的expf_scale因子倍 k++; //记录取出了多少块,其实就是没有越界的像素个数 } } len = k; // compute gradient values, orientations and the weights over the pixel neighborhood cv::hal::exp32f(W, W, len); //计算exp^W存到W中 cv::hal::fastAtan2(Y, X, Ori, len, true); //计算arctan(y/x)存到Ori中, true是以角度返回, false以弧度返回, 范围是0-360 cv::hal::magnitude32f(X, Y, Mag, len); //计算幅值 存入到 Mag中 for( k = 0; k < len; k++ ) { int bin = cvRound((n/360.f)*Ori[k]); //计算所属的bin号 if( bin >= n ) bin -= n; if( bin < 0 ) bin += n; temphist[bin] += W[k]*Mag[k]; //根据距离进行加权 } // smooth the histogram temphist[-1] = temphist[n-1]; temphist[-2] = temphist[n-2]; temphist[n] = temphist[0]; temphist[n+1] = temphist[1]; for( i = 0; i < n; i++ ) { hist[i] = (temphist[i-2] + temphist[i+2])*(1.f/16.f) + //其实就是加权平滑,用左右2个距离的直方图幅值进行平滑 (temphist[i-1] + temphist[i+1])*(4.f/16.f) + temphist[i]*(6.f/16.f); } float maxval = hist[0]; for( i = 1; i < n; i++ ) maxval = std::max(maxval, hist[i]); //返回最大的方向的直方图幅值 return maxval; }
KeyPointsFilter::removeDuplicated( keypoints ); //删去坐标相同,size相同(layer_id),角度相同的点,可以去看源码
KeyPointsFilter::retainBest(keypoints, nfeatures); //takes keypoints and culls them by the response
https://github.com/opencv/opencv/blob/master/modules/features2d/src/keypoint.cpp
calcDescriptors 就是计算极值点的梯度方向描述符
static void calcDescriptors(const std::vector<Mat>& gpyr, const std::vector<KeyPoint>& keypoints, Mat& descriptors, int nOctaveLayers, int firstOctave ) { int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS; for( size_t i = 0; i < keypoints.size(); i++ ) { KeyPoint kpt = keypoints[i]; int octave, layer; float scale; unpackOctave(kpt, octave, layer, scale); CV_Assert(octave >= firstOctave && layer <= nOctaveLayers+2); float size=kpt.size*scale; Point2f ptf(kpt.pt.x*scale, kpt.pt.y*scale); //用unpackOctave进行解码,恢复了x,y坐标 const Mat& img = gpyr[(octave - firstOctave)*(nOctaveLayers + 3) + layer]; float angle = 360.f - kpt.angle; if(std::abs(angle - 360.f) < FLT_EPSILON) angle = 0.f; calcSIFTDescriptor(img, ptf, angle, size*0.5f, d, n, descriptors.ptr<float>((int)i)); } }
calcSIFTDescriptor 很难懂,尤其是在旋转到主方向那里,还有插值那里。最后那里有个细节就是这个描述符输出的是一连串uchar整数,(应该是可以直接使用的吧,毕竟同一个计算算法而且没有随机的因素在里面所得到的描述符肯定可以进行比较,待验证),原因到时候可以参考我前面的一个SIFT博客,主要是为了节省空间。
static void calcSIFTDescriptor( const Mat& img, Point2f ptf, float ori, float scl, int d, int n, float* dst ) { Point pt(cvRound(ptf.x), cvRound(ptf.y)); float cos_t = cosf(ori*(float)(CV_PI/180)); float sin_t = sinf(ori*(float)(CV_PI/180)); float bins_per_rad = n / 360.f; float exp_scale = -1.f/(d * d * 0.5f); float hist_width = SIFT_DESCR_SCL_FCTR * scl; // determines the size of a single descriptor orientation histogram SIFT_DESCR_SCL_FCTR = 3.f; int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f); // Clip the radius to the diagonal of the image to avoid autobuffer too large exception radius = std::min(radius, (int) sqrt((double) img.cols*img.cols + img.rows*img.rows)); cos_t /= hist_width; sin_t /= hist_width; int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2); //d==4 , n==8 int rows = img.rows, cols = img.cols; AutoBuffer<float> buf(len*6 + histlen); float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len; float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len; for( i = 0; i < d+2; i++ ) { for( j = 0; j < d+2; j++ ) for( k = 0; k < n+2; k++ ) hist[(i*(d+2) + j)*(n+2) + k] = 0.; } for( i = -radius, k = 0; i <= radius; i++ ) for( j = -radius; j <= radius; j++ ) { // Calculate sample's histogram array coords rotated relative to ori. // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e. // r_rot = 1.5) have full weight placed in row 1 after interpolation. float c_rot = j * cos_t - i * sin_t; //乘以旋转矩阵 并且还放缩了 hist_width?? float r_rot = j * sin_t + i * cos_t; float rbin = r_rot + d/2 - 0.5f; //没看懂 float cbin = c_rot + d/2 - 0.5f; int r = pt.y + i, c = pt.x + j; //j是x方向,i是y方向 if( rbin > -1 && rbin < d && cbin > -1 && cbin < d && r > 0 && r < rows - 1 && c > 0 && c < cols - 1 ) { float dx = (float)(img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1)); //就算旋转后,不影响计算梯度 float dy = (float)(img.at<sift_wt>(r-1, c) - img.at<sift_wt>(r+1, c)); X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin; W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale; k++; } } len = k; cv::hal::fastAtan2(Y, X, Ori, len, true); //计算梯度方向 cv::hal::magnitude32f(X, Y, Mag, len); //计算幅值 cv::hal::exp32f(W, W, len); //计算权重 for( k = 0; k < len; k++ ) { float rbin = RBin[k], cbin = CBin[k]; float obin = (Ori[k] - ori)*bins_per_rad; float mag = Mag[k]*W[k]; int r0 = cvFloor( rbin ); int c0 = cvFloor( cbin ); int o0 = cvFloor( obin ); rbin -= r0; cbin -= c0; obin -= o0; if( o0 < 0 ) o0 += n; if( o0 >= n ) o0 -= n; // histogram update using tri-linear interpolation float v_r1 = mag*rbin, v_r0 = mag - v_r1; //没看懂, tri-linear float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11; float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01; float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111; float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101; float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011; float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001; int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0; hist[idx] += v_rco000; hist[idx+1] += v_rco001; hist[idx+(n+2)] += v_rco010; hist[idx+(n+3)] += v_rco011; hist[idx+(d+2)*(n+2)] += v_rco100; hist[idx+(d+2)*(n+2)+1] += v_rco101; hist[idx+(d+3)*(n+2)] += v_rco110; hist[idx+(d+3)*(n+2)+1] += v_rco111; } // finalize histogram, since the orientation histograms are circular for( i = 0; i < d; i++ ) for( j = 0; j < d; j++ ) { int idx = ((i+1)*(d+2) + (j+1))*(n+2); hist[idx] += hist[idx+n]; hist[idx+1] += hist[idx+n+1]; for( k = 0; k < n; k++ ) dst[(i*d + j)*n + k] = hist[idx+k]; } // copy histogram to the descriptor, // apply hysteresis thresholding // and scale the result, so that it can be easily converted // to byte array float nrm2 = 0; len = d*d*n; for( k = 0; k < len; k++ ) nrm2 += dst[k]*dst[k]; float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR; for( i = 0, nrm2 = 0; i < k; i++ ) { float val = std::min(dst[i], thr); dst[i] = val; nrm2 += val*val; } nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON); #if 1 for( k = 0; k < len; k++ ) { dst[k] = saturate_cast<uchar>(dst[k]*nrm2); } #else float nrm1 = 0; for( k = 0; k < len; k++ ) { dst[k] *= nrm2; nrm1 += dst[k]; } nrm1 = 1.f/std::max(nrm1, FLT_EPSILON); for( k = 0; k < len; k++ ) { dst[k] = std::sqrt(dst[k] * nrm1);//saturate_cast<uchar>(std::sqrt(dst[k] * nrm1)*SIFT_INT_DESCR_FCTR); } #endif }