opencv3.3.1
https://github.com/alantrrs/OpenTLD
https://github.com/arthurv/OpenTLD
problem
opencv3.0以及后续版本弃用legacy模块了;
solution
write cv::PatchGenerator module by yourself.
patchgenerator.h in folder include
//#pragma once #ifndef PATCH_GENERATOR_H #define PATCH_GENERATOR_H #include "opencv2/opencv.hpp" namespace cv{ class CV_EXPORTS PatchGenerator //class PatchGenerator { public: PatchGenerator(); PatchGenerator(double _backgroundMin, double _backgroundMax, double _noiseRange, bool _randomBlur=true, double _lambdaMin=0.6, double _lambdaMax=1.5, double _thetaMin=-CV_PI, double _thetaMax=CV_PI, double _phiMin=-CV_PI, double _phiMax=CV_PI ); void operator()(const Mat& image, Point2f pt, Mat& patch, Size patchSize, RNG& rng) const; void operator()(const Mat& image, const Mat& transform, Mat& patch, Size patchSize, RNG& rng) const; void warpWholeImage(const Mat& image, Mat& matT, Mat& buf, CV_OUT Mat& warped, int border, RNG& rng) const; void generateRandomTransform(Point2f srcCenter, Point2f dstCenter, CV_OUT Mat& transform, RNG& rng, bool inverse=false) const; void setAffineParam(double lambda, double theta, double phi); double backgroundMin, backgroundMax; double noiseRange; bool randomBlur; double lambdaMin, lambdaMax; double thetaMin, thetaMax; double phiMin, phiMax; }; }; #endif
patchgenerator.cpp in folder src
#include "opencv2/opencv.hpp" #include "patchgenerator.h" namespace cv { const int progressBarSize = 50; //////////////////////////// Patch Generator ////////////////////////////////// static const double DEFAULT_BACKGROUND_MIN = 0; static const double DEFAULT_BACKGROUND_MAX = 256; static const double DEFAULT_NOISE_RANGE = 5; static const double DEFAULT_LAMBDA_MIN = 0.6; static const double DEFAULT_LAMBDA_MAX = 1.5; static const double DEFAULT_THETA_MIN = -CV_PI; static const double DEFAULT_THETA_MAX = CV_PI; static const double DEFAULT_PHI_MIN = -CV_PI; static const double DEFAULT_PHI_MAX = CV_PI; PatchGenerator::PatchGenerator() : backgroundMin(DEFAULT_BACKGROUND_MIN), backgroundMax(DEFAULT_BACKGROUND_MAX), noiseRange(DEFAULT_NOISE_RANGE), randomBlur(true), lambdaMin(DEFAULT_LAMBDA_MIN), lambdaMax(DEFAULT_LAMBDA_MAX), thetaMin(DEFAULT_THETA_MIN), thetaMax(DEFAULT_THETA_MAX), phiMin(DEFAULT_PHI_MIN), phiMax(DEFAULT_PHI_MAX) { } PatchGenerator::PatchGenerator(double _backgroundMin, double _backgroundMax, double _noiseRange, bool _randomBlur, double _lambdaMin, double _lambdaMax, double _thetaMin, double _thetaMax, double _phiMin, double _phiMax ) : backgroundMin(_backgroundMin), backgroundMax(_backgroundMax), noiseRange(_noiseRange), randomBlur(_randomBlur), lambdaMin(_lambdaMin), lambdaMax(_lambdaMax), thetaMin(_thetaMin), thetaMax(_thetaMax), phiMin(_phiMin), phiMax(_phiMax) { } void PatchGenerator::generateRandomTransform(Point2f srcCenter, Point2f dstCenter, Mat& transform, RNG& rng, bool inverse) const { double lambda1 = rng.uniform(lambdaMin, lambdaMax); double lambda2 = rng.uniform(lambdaMin, lambdaMax); double theta = rng.uniform(thetaMin, thetaMax); double phi = rng.uniform(phiMin, phiMax); // Calculate random parameterized affine transformation A, // A = T(patch center) * R(theta) * R(phi)' * // S(lambda1, lambda2) * R(phi) * T(-pt) double st = sin(theta); double ct = cos(theta); double sp = sin(phi); double cp = cos(phi); double c2p = cp*cp; double s2p = sp*sp; double A = lambda1*c2p + lambda2*s2p; double B = (lambda2 - lambda1)*sp*cp; double C = lambda1*s2p + lambda2*c2p; double Ax_plus_By = A*srcCenter.x + B*srcCenter.y; double Bx_plus_Cy = B*srcCenter.x + C*srcCenter.y; transform.create(2, 3, CV_64F); Mat_<double>& T = (Mat_<double>&)transform; T(0,0) = A*ct - B*st; T(0,1) = B*ct - C*st; T(0,2) = -ct*Ax_plus_By + st*Bx_plus_Cy + dstCenter.x; T(1,0) = A*st + B*ct; T(1,1) = B*st + C*ct; T(1,2) = -st*Ax_plus_By - ct*Bx_plus_Cy + dstCenter.y; if( inverse ) invertAffineTransform(T, T); } void PatchGenerator::operator ()(const Mat& image, Point2f pt, Mat& patch, Size patchSize, RNG& rng) const { double buffer[6]; Mat_<double> T(2, 3, buffer); generateRandomTransform(pt, Point2f((patchSize.width-1)*0.5f, (patchSize.height-1)*0.5f), T, rng); (*this)(image, T, patch, patchSize, rng); } void PatchGenerator::operator ()(const Mat& image, const Mat& T, Mat& patch, Size patchSize, RNG& rng) const { patch.create( patchSize, image.type() ); if( backgroundMin != backgroundMax ) { rng.fill(patch, RNG::UNIFORM, Scalar::all(backgroundMin), Scalar::all(backgroundMax)); warpAffine(image, patch, T, patchSize, INTER_LINEAR, BORDER_TRANSPARENT); } else warpAffine(image, patch, T, patchSize, INTER_LINEAR, BORDER_CONSTANT, Scalar::all(backgroundMin)); int ksize = randomBlur ? (unsigned)rng % 9 - 5 : 0; if( ksize > 0 ) { ksize = ksize*2 + 1; GaussianBlur(patch, patch, Size(ksize, ksize), 0, 0); } if( noiseRange > 0 ) { AutoBuffer<uchar> _noiseBuf( patchSize.width*patchSize.height*image.elemSize() ); Mat noise(patchSize, image.type(), (uchar*)_noiseBuf); int delta = image.depth() == CV_8U ? 128 : image.depth() == CV_16U ? 32768 : 0; rng.fill(noise, RNG::NORMAL, Scalar::all(delta), Scalar::all(noiseRange)); if( backgroundMin != backgroundMax ) addWeighted(patch, 1, noise, 1, -delta, patch); else { for( int i = 0; i <patchSize.height; i++ ) { uchar* prow = patch.ptr<uchar>(i); const uchar* nrow = noise.ptr<uchar>(i); for( int j = 0; j <patchSize.width; j++ ) if( prow[j] != backgroundMin ) prow[j] = saturate_cast<uchar>(prow[j] + nrow[j] - delta); } } } } void PatchGenerator::warpWholeImage(const Mat& image, Mat& matT, Mat& buf, Mat& warped, int border, RNG& rng) const { Mat_<double> T = matT; Rect roi(INT_MAX, INT_MAX, INT_MIN, INT_MIN); for( int k = 0; k <4; k++ ) { Point2f pt0, pt1; pt0.x = (float)(k == 0 || k == 3 ? 0 : image.cols); pt0.y = (float)(k <2 ? 0 : image.rows); pt1.x = (float)(T(0,0)*pt0.x + T(0,1)*pt0.y + T(0,2)); pt1.y = (float)(T(1,0)*pt0.x + T(1,1)*pt0.y + T(1,2)); roi.x = std::min(roi.x, cvFloor(pt1.x)); roi.y = std::min(roi.y, cvFloor(pt1.y)); roi.width = std::max(roi.width, cvCeil(pt1.x)); roi.height = std::max(roi.height, cvCeil(pt1.y)); } roi.width -= roi.x - 1; roi.height -= roi.y - 1; int dx = border - roi.x; int dy = border - roi.y; if( (roi.width+border*2)*(roi.height+border*2) > buf.cols ) buf.create(1, (roi.width+border*2)*(roi.height+border*2), image.type()); warped = Mat(roi.height + border*2, roi.width + border*2, image.type(), buf.data); T(0,2) += dx; T(1,2) += dy; (*this)(image, T, warped, warped.size(), rng); if( T.data != matT.data ) T.convertTo(matT, matT.type()); } // Params are assumed to be symmetrical: lambda w.r.t. 1, theta and phi w.r.t. 0 void PatchGenerator::setAffineParam(double lambda, double theta, double phi) { lambdaMin = 1. - lambda; lambdaMax = 1. + lambda; thetaMin = -theta; thetaMax = theta; phiMin = -phi; phiMax = phi; } };
CMakeLists.txt
#Set minimum version requered cmake_minimum_required(VERSION 2.4.6) #just to avoid the warning if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy) #set project name project(TLD) #Append path to the module path list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}) #OpenCV find_package(OpenCV 3.3.1 REQUIRED) #set the default path for built executables to the "bin" directory set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/../bin) #set the default path for built libraries to the "lib" directory set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/../lib) #set the include directories include_directories (${PROJECT_SOURCE_DIR}/../include ${OpenCV_INCLUDE_DIRS}) #libraries add_library(tld_utils tld_utils.cpp) add_library(LKTracker LKTracker.cpp) add_library(ferNN FerNNClassifier.cpp) add_library(tld TLD.cpp patchgenerator.cpp) #add_library(tld TLD.cpp) #executables add_executable(run_tld run_tld.cpp) #link the libraries target_link_libraries(run_tld tld LKTracker ferNN tld_utils ${OpenCV_LIBS}) #set optimization level set(CMAKE_BUILD_TYPE Release)
run
cd OpenTLD mkdir build cd build cmake ../src/ make cd ../bin/ %To run from camera ./run_tld -p ../parameters.yml -tl %To run from file ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -tl %To init bounding box from file ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt -tl %To train only in the firs frame (no tracking, no learning) ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt %To test the final detector (Repeat the video, first time learns, second time detects) ./run_tld -p ../parameters.yml -s ../datasets/06_car/car.mpg -b ../datasets/06_car/init.txt -tl -r
choose a bbox as one tracking object.
evaluation
===================================== Evaluation ===================================== The output of the program is a file called bounding_boxes.txt which contains all the detections made through the video. This file should be compared with the ground truth file to evaluate the performance of the algorithm. This is done using a python script: python ../datasets/evaluate_vis.py ../datasets/06_car/car.mpg bounding_boxes.txt ../datasets/06_car/gt.txt
result
$python2 ../datasets/evaluate_vis.py ../datasets/06_car/car.mpg bounding_boxes.txt ../datasets/06_car/gt.txt ../datasets/evaluate_vis.py:22: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future frame[y1:y2,x1:x2]=(100,200,100) detections = 916.000000 true detections = 860.000000 correct detections = 850.000000 precision=0.927948 recall=0.988372 f-measure= 0.957207
re:
1. https://github.com/alantrrs/OpenTLD
2. https://github.com/arthurv/OpenTLD
3. http://blog.sina.com.cn/s/blog_b30296270102wbbw.html;
end