#include<stdio.h> #include<Windows.h> #include<map> #include<vector> #include<string> #include <set> #include <time.h> #define MAX_RESULT 256 using namespace std; /************************************************************************/ /* 获得directory 下的所有文件名的map */ /************************************************************************/ void enumFiles(const char *directory,map<int ,string> &result) { WIN32_FIND_DATA findFileData; HANDLE hFind; //map<int,string> result; char pattern[MAX_PATH]; int i=0; //开始查找 strcpy(pattern,directory); strcat(pattern,"\*"); hFind =FindFirstFile(pattern,&findFileData); if(hFind==INVALID_HANDLE_VALUE) { return ; } else { do { string tmp(findFileData.cFileName); if (tmp.find_first_of('.')==string::npos) { result.insert(make_pair(i++,tmp)); } } while (FindNextFile(hFind,&findFileData)!=0); } //查找结束 } int main(int argc,char *argv[]) { //1. 获取文件夹下的所有文件名 string baseDir("E:\\研究生课程\\下学期\\机器学习\\作业\\2_MLKD-Project2-Release\\MLKD-Project2-Release\\tc"); map<int,string> result; string sourceDir=baseDir+"\\train\\"; enumFiles(sourceDir.c_str(),result); srand(time(NULL)); int swapIndex; string tmpFileName; //1. 随机化 for (int i=0;i<result.size();i++) { swapIndex=rand()%(result.size()); if(i!=swapIndex) { pos1 =result.find(i); tmpFileName = pos1->second; pos2=result.find(swapIndex); pos1->second = pos2->second; pos2->second = tmpFileName; } } FILE *rfp; rfp=fopen("E:\\研究生课程\\下学期\\机器学习\\作业\\2_MLKD-Project2-Release\\MLKD-Project2-Release\\tc\\train.doc.label","r"); if (rfp==NULL) { printf("open train.doc.label error!\n"); return -1; } //3. 读入label map<int,int> trainLabels; map<int,int>::iterator pos3; int tmpFileNo,label; for(int i=0;i<4500;i++) { fscanf(rfp,"%d\t%d",&tmpFileNo,&label); trainLabels.insert(make_pair(tmpFileNo,label)); } fclose(rfp); //2. 随机生成3组不相同的文件名 复制文件到文件夹 int part=3; int size=result.size()/part; string desDir; char tmp='A'; map<int,string>::iterator pos1,pos2; for(int i=0;i<part;i++) { FILE *fLabels; string fLabel=baseDir+"\\train"; desDir=baseDir+"\\train"; desDir.push_back(tmp); fLabel.push_back(tmp); fLabel.append(".doc.label"); fLabels=fopen(fLabel.c_str(),"w+"); if (fLabels==NULL) { printf("write to train.doc.label error!\n"); return -1; } //创建文件夹 bool isCorrect = CreateDirectory(desDir.c_str(),NULL); if (!isCorrect) { printf("copying error!\n"); return -1; } tmp=tmp+1; //在每个文件夹中复制size个文件 for (int j=0;j<size;j++) { pos1 = result.find(j+size*i); string sourceFile = sourceDir+pos1->second; string desFile = desDir+"\\"+pos1->second; //复制文件到trainA\B\C中 isCorrect=CopyFile(sourceFile.c_str(),desFile.c_str(),FALSE); if(!isCorrect) { printf("copying error!\n"); return -1; } printf("copying %d \n",j+size*i+1); //复制label到文件trainA\B\C.doc.label中 int fileNo = atoi(pos1->second.c_str()); pos3 = trainLabels.find(fileNo); fprintf(fLabels,"%d\t%d\n",pos3->first,pos3->second); } fclose(fLabels); } return 0; }