煤矸石分类项目,提取的煤矸石灰度均值和灰度方差作为特征进行分类,SVM的简单代码如下,使用的二次封装的opencv库,在其他机器上运行将头文件和条件编译宏替换成opencv自己的就可以了
#include "sv.h" #ifdef _DEBUG #pragma comment(lib,"BoxCV100d.lib") #pragma comment(lib,"SvRunTimed.lib") #else #pragma comment(lib,"BoxCV100.lib") #pragma comment(lib,"SvRunTime.lib") #endif using namespace cv; using namespace cv::ml; int main(int argc, char** argv) { int width = 720, height = 720; //三通道可视化窗口 Mat image = Mat::zeros(height, width, CV_8UC3); int labels[11] = { 1,1,1,1,1,1, -1,-1,-1,-1,-1 }; Mat LabelsMat(10, 1, CV_32SC1, labels);
float TrainingData1[11][2] =
{
{291.4,252},{115.3,186},{277.7,695},{367.5,620},{352.4,645},{117.2,203},
{117.2,129},{122.8,112},{116.4,175},{132.5,78},{106.3,125}
};
Mat TrainMat(10, 2, CV_32FC1, TrainingData); //创建分类器设置参数 Ptr<SVM> model = SVM::create(); model->setType(SVM::C_SVC); model->setKernel(SVM::LINEAR); //设置训练参数 Ptr<TrainData> tData = TrainData::create(TrainMat, ROW_SAMPLE, LabelsMat); //训练分类器 model->train(tData); Vec3b ColorGreen(0, 255, 0), ColorBlue(255, 0, 0); //显示svm决策边界 for(int i=0;i<image.rows;++i) for (int j = 0; j < image.cols; ++j) { //生成测试数据 Mat SampleMat = (Mat_<float>(1, 2) << j, i); //predict float response = model->predict(SampleMat); if (1 == response) image.at<Vec3b>(i, j) = ColorGreen; else if (-1 == response) image.at<Vec3b>(i, j) = ColorBlue; } //绘图 //显示结果 int thickness = -1; int lineType = 8; //两类结果一个黑色一个白色 Scalar c1 = Scalar::all(0); Scalar c2 = Scalar::all(255); for (int i = 0; i < LabelsMat.rows; ++i) { //取出每行指针 const float* v = TrainMat.ptr<float>(i); Point pt = Point((int)v[0], (int)v[1]); if (1 == labels[i]) circle(image, pt, 5, c1, thickness, lineType); else circle(image, pt, 5, c2, thickness, lineType); } imshow("SVM Simple Example", image); waitKey(); }
结果图
但是这个程序试将训练数据写死的,在项目中分类器是一个独立的模块,分类器也要达到更高的精度需要更多的训练数据,当训练数据大到一定程度时,全写在程序的数组里是不现实的,还有一种情况是更换分类任务时需要重新训练,这时就需要分类器能够自己读取训练数据。
训练数据x1,x2用“,”隔开,标签用“#”隔开。首先进行文件读取,先读进来一行数据,然后解析读进来的字符串,用string.find(',')函数来找到“,”的位置,用string.find('#')来找到标记的位置。当训练数据中包含多种特征时(用一个逗号分隔不够用,需要用多个逗号),可以自己写一个循环,遍历这一行
flag="a"; position=0; int i=1; while((position=s.find(flag,position))!=string::npos) { cout<<"position "<<i<<" : "<<position<<endl; position++; i++; }
这个代码以后有需要再加上,其中字符串操作参考https://www.cnblogs.com/lifexy/p/8642163.html
字符串读进来后进行分割,然后进行类型转换储存在vector或者数组中,string类型转数字类型可以使用模板函数,参考https://www.cnblogs.com/lyggqm/p/4562727.html
代码如下
Type stringToNum(const string& str) { istringstream iss(str); Type num; iss >> num; return num; }
从文件中读取训练数据的完整代码如下
#include<fstream> #include<sstream> #include<string> #include<vector> #include<iostream> using namespace std; template <class Type> Type stringToNum(const string& str) { istringstream iss(str); Type num; iss >> num; return num; } int main(void) { ifstream in("com.txt"); string s; //char* temp; vector<float> Feature1; vector<float> Feature2; vector<int> TrainLabel; while (getline(in, s))//逐行读取数据并存于s中,直至数据全部读取 { //cout << s.c_str() << endl; int FeaIndex = s.find(','); int LabelIndex = s.find('#'); string FeaString1(s, 0, FeaIndex); Feature1.push_back(stringToNum<float>(FeaString1)); string FeaString2(s, FeaIndex+1, LabelIndex - FeaIndex-1); Feature2.push_back(stringToNum<float>(FeaString2)); string Label(s, LabelIndex + 1, s.length() - LabelIndex - 1); TrainLabel.push_back(stringToNum<int>(Label)); //cout << "Fea1: " << FeaString1 << endl; //cout << "Fea2: " << FeaString2 << endl; //cout << "Label: " << Label << endl << endl; } in.close(); for (size_t i = 0; i < Feature1.size(); ++i) { cout << "Feature 1: " << Feature1[i] << endl; cout << "Feature 2: " << Feature2[i] << endl; cout << "Label: " << TrainLabel[i] << endl; cout << endl; } system("pause"); return 0; }
训练数据从文件中读入时是以二维vector形式存储在内存中的,我将它转化为float的二维指针中
float** TrainingData = new float*[TLabel.size()]; for (int i = 0; i < TLabel.size(); ++i) TrainingData[i] = new float[2]; for (int i = 0; i < TLabel.size(); ++i) { TrainingData[i][0] = TData.Feature1[i]*10; TrainingData[i][1] = TData.Feature2[i]*100; /*cout << TrainingData[i][0] << endl << TrainingData[i][1] << endl;*/ }
在构造训练数据时出现了问题,传入svm的数据需要是Mat格式,而Mat的构造函数无法将二维地址直接传入,所以会导致训练数据集没传入数据
Mat赋值方法如下
Mat TrainMat1(TrainLabel.size(), 2, CV_32FC1); for (int i = 0; i < TrainLabel.size(); ++i) { TrainMat1.at<float>(i, 0) = TrainingData.Feature1[i] * 10; TrainMat1.at<float>(i, 1) = TrainingData.Feature2[i] * 100; }
下面是完整代码
#include "sv.h" #include<vector> #include<iostream> #include<fstream> #include<sstream> #include<string> #ifdef _DEBUG #pragma comment(lib,"BoxCV100d.lib") #pragma comment(lib,"SvRunTimed.lib") #else #pragma comment(lib,"BoxCV100.lib") #pragma comment(lib,"SvRunTime.lib") #endif using namespace std; using namespace cv; using namespace cv::ml; //------------------------------------------------------------------ // 训练数据 //------------------------------------------------------------------ typedef struct _TRAINING_DATA { vector<float> Feature1; vector<float> Feature2; }TRAINING_DATA; //------------------------------------------------------------------ // 模板函数,转换类型 //------------------------------------------------------------------ template <class Type> Type stringToNum(const string& str) { istringstream iss(str); Type num; iss >> num; return num; } //------------------------------------------------------------------ // 从文件中读取训练数据,存放到向量中 //------------------------------------------------------------------ void ReadTrainingData(const string& Dir,TRAINING_DATA &TrainingData,vector<int> &TrainLabel) { //打开文件 ifstream TrainingDataFile(Dir); string FileLineData; //逐行读取数据并存于s中,直至数据全部读取 while (getline(TrainingDataFile, FileLineData)) { //寻找分隔符 int FeaIndex = FileLineData.find(','); int LabelIndex = FileLineData.find('#'); //读取x1 string FeaString1(FileLineData, 0, FeaIndex); TrainingData.Feature1.push_back(stringToNum<float>(FeaString1)); //读取x2 string FeaString2(FileLineData, FeaIndex+1, LabelIndex - FeaIndex-1); TrainingData.Feature2.push_back(stringToNum<float>(FeaString2)); //读取标签 string Label(FileLineData, LabelIndex + 1, FileLineData.length() - LabelIndex - 1); TrainLabel.push_back(stringToNum<int>(Label)); } //关闭文件 TrainingDataFile.close(); for (size_t i = 0; i < TrainLabel.size(); ++i) { cout << "Feature 1: " << TrainingData.Feature1[i] << endl; cout << "Feature 2: " << TrainingData.Feature2[i] << endl; cout << "Label: " << TrainLabel[i] << endl; cout << endl; } } //------------------------------------------------------------------ // 训练SVM分类器 //------------------------------------------------------------------ void TrainSVM(Ptr<SVM> &Model, TRAINING_DATA &TrainingData, vector<int> &TrainLabel) { //创建显示窗口 int width = 720, height = 720; //三通道可视化窗口 Mat image = Mat::zeros(height, width, CV_8UC3); int* labels = new int[TrainLabel.size()]; for (int i = 0; i < TrainLabel.size(); ++i) { labels[i] = TrainLabel[i]; cout << labels[i] << endl; } Mat LabelsMat(TrainLabel.size(), 1, CV_32SC1, labels); Mat TrainMat1(TrainLabel.size(), 2, CV_32FC1); for (int i = 0; i < TrainLabel.size(); ++i) { TrainMat1.at<float>(i, 0) = TrainingData.Feature1[i] * 10; TrainMat1.at<float>(i, 1) = TrainingData.Feature2[i] * 100; } //创建分类器设置参数 Ptr<SVM> model = SVM::create(); model->setType(SVM::C_SVC); model->setKernel(SVM::LINEAR); //设置训练参数 Ptr<TrainData> tData = TrainData::create(TrainMat1, ROW_SAMPLE, LabelsMat); //训练分类器 model->train(tData); Vec3b ColorGreen(0, 255, 0), ColorBlue(255, 0, 0); //显示svm决策边界 for (int i = 0; i < image.rows; ++i) for (int j = 0; j < image.cols; ++j) { //生成测试数据 Mat SampleMat = (Mat_<float>(1, 2) << j, i); //predict float response = model->predict(SampleMat); if (1 == response) image.at<Vec3b>(i, j) = ColorGreen; else if (-1 == response) image.at<Vec3b>(i, j) = ColorBlue; } //绘图 //显示结果 int thickness = -1; int lineType = 8; //两类结果一个黑色一个白色 Scalar c1 = Scalar::all(0); Scalar c2 = Scalar::all(255); for (int i = 0; i < LabelsMat.rows; ++i) { //取出每行指针 const float* v = TrainMat1.ptr<float>(i); Point pt = Point((int)v[0], (int)v[1]); if (1 == labels[i]) circle(image, pt, 5, c1, thickness, lineType); else circle(image, pt, 5, c2, thickness, lineType); } imshow("SVM Simple Example", image); waitKey(); } int main(int argc, char** argv) { static string dir = "train_data.txt"; Ptr<SVM> SvmModel; TRAINING_DATA TData; vector<int> TLabel; ReadTrainingData(dir, TData, TLabel); TrainSVM(SvmModel, TData, TLabel); waitKey(); }