2013-07-11 22:00:28
第二章 算法
本章围绕三个问题进行算法讨论,包括元素的查找、字符串的旋转、以及变位词的查找。
下面给出了实现代码、以及测试结果。
问题一 查找不存在的元素
思路一:用位图;
思路二:用二分搜索法,这种方法需要先对数组排序;
若用二分搜索法,有以下限制:
满足以下假设:
1)数组中没有重复元素
2)数组中必有缺失元素
书中给出的问题也正是具有这样的特殊性,因此可以使用二分法进行缺失元素的查找。
下面的代码给出了上述两种方法的实现代码,以及测试代码所需要的数据产生程序、位图排序程序,后面附上了测试结果。
注意:此处的代码没有数据的合法性检查,假设数据满足上面的条件。
完整代码:
1 #include <iostream> 2 #include <bitset> 3 #include <ctime> 4 using namespace std; 5 6 const int MAX_NUMBER = 10000010; 7 const int NUMBER_OF_DATA = 10000000; 8 9 //产生在区间[lowerBound,upperBound]内的一个随机数 10 int RandomInt(const int lowerBound,const int upperBound) 11 { 12 return ( lowerBound + ( RAND_MAX * rand() + rand() ) % (upperBound - lowerBound + 1) ); 13 } 14 15 //产生在[0,maxOfRandomInt]范围内的numberOfRandomInt个随机数 16 //结果存入数组randomInt中 17 void RandomIntGen(const int numberOfRandomInt,const int maxOfRandomInt,int randomInt[]) 18 { 19 int i; 20 int tmp; 21 int randomTmp; 22 int *sequenceInt = new int[maxOfRandomInt]; 23 24 srand((unsigned) time(NULL)); 25 26 for (i = 0; i < maxOfRandomInt; i++) 27 { 28 sequenceInt[i] = i; 29 } 30 31 for (i = 0; i < numberOfRandomInt; i++) 32 { 33 randomTmp = RandomInt(i, maxOfRandomInt - 1); 34 tmp = sequenceInt[randomTmp]; 35 sequenceInt[randomTmp] = sequenceInt[i]; 36 sequenceInt[i] = tmp; 37 randomInt[i] = sequenceInt[i]; //随机数保存在数组中 38 } 39 40 delete [] sequenceInt; //释放内存 41 } 42 43 //注意写初始化函数 44 void Init(int BitmapArray[],const int n) 45 { 46 for (int i = 0;i < n;++i) 47 { 48 BitmapArray[i] = 0; 49 } 50 } 51 52 //用int型模拟位图,实现的位图排序 53 const int BitsPerInt = 32; 54 const int Div32Shift = 5; 55 const int Mod32Mask = 0x1F; 56 57 //BitmapSort_2:用位运算实现置位、清零、判断是否为1 58 void SetBit(int BitmapArray[],const int BitToSet) 59 { 60 //BitmapArray[ (BitToSet >> Div32Shift) ] | = ( 1 << (BitToSet & Mod32Mask) ); //error C2059: syntax error : '=' 61 BitmapArray[ (BitToSet >> Div32Shift) ] = BitmapArray[ (BitToSet >> Div32Shift) ] 62 | ( 1 << (BitToSet & Mod32Mask) ); 63 } 64 65 void ClearBit(int BitmapArray[],const int BitToClear) 66 { 67 BitmapArray[ (BitToClear >> Div32Shift) ] = BitmapArray[ (BitToClear >> Div32Shift) ] 68 & ~( 1 << (BitToClear & Mod32Mask) ); 69 } 70 71 bool IsBitSet(const int BitmapArray[],const int BitToCheck) //定义为const类型,在不小心改变时,会给出报错信息 72 { 73 return ( BitmapArray[ (BitToCheck >> Div32Shift) ] & ( 1 << (BitToCheck & Mod32Mask) ) ); 74 } 75 76 void BitmapSort_2(int ArrayUnsorted[],const int n) 77 { 78 int i ; 79 int cnt = 0; 80 int *BitmapArray = new int[1 + MAX_NUMBER/BitsPerInt]; //当MaxRandomInt较大时,必须从对上分配内存,否则内存不够用,导致出错 81 82 if (NULL == BitmapArray) 83 { 84 cout<<"memory allocation error !"<<endl; 85 exit(0); 86 } 87 88 Init(BitmapArray,1 + MAX_NUMBER/BitsPerInt); //初始化为0 89 90 for (i = 0;i < n;++i) 91 { 92 SetBit(BitmapArray,ArrayUnsorted[i]); 93 } 94 95 for (i = 0;i < MAX_NUMBER;++i) //不是for (i = 0;i < n;++i) 96 { 97 if ( IsBitSet(BitmapArray,i) ) 98 { 99 ArrayUnsorted[cnt++] = i; 100 } 101 } 102 103 delete [] BitmapArray; 104 } 105 106 //用位图的方法查找丢失元素 107 //满足以下假设: 108 //1)数组中没有重复元素 109 //2)数组中必有缺失元素 110 int FindLostByBitmap(int arrayToFind[],int numberOfData) 111 { 112 int i ; 113 int cnt = 0; 114 int *BitmapArray = new int[1 + MAX_NUMBER/BitsPerInt]; //当MaxRandomInt较大时,必须从对上分配内存,否则内存不够用,导致出错 115 116 if (NULL == BitmapArray) 117 { 118 cout<<"memory allocation error !"<<endl; 119 exit(0); 120 } 121 122 Init(BitmapArray,1 + MAX_NUMBER/BitsPerInt); //初始化为0 123 124 for (i = 0;i < numberOfData;++i) 125 { 126 SetBit(BitmapArray,arrayToFind[i]); 127 } 128 129 for (i = 0;i < MAX_NUMBER;++i) //不是for (i = 0;i < n;++i) 130 { 131 if ( !IsBitSet(BitmapArray,i) ) 132 { 133 return i; 134 } 135 } 136 137 if (MAX_NUMBER == i) 138 { 139 cout<<"no element is lost!"<<endl; 140 return -1; 141 } 142 143 delete [] BitmapArray; 144 } 145 146 //二分搜索法 147 //满足以下假设: 148 //1)数组中没有重复元素 149 //2)数组中必有缺失元素 150 int FindLostByBinarySearch(int arrayToFind[],int numberOfData) 151 { 152 int cnt = 0; 153 int begin = 0; 154 int end = numberOfData - 1; 155 int mid = 0; 156 157 if (NULL == arrayToFind || numberOfData < 2) 158 { 159 cout<<"the input array is invalid!"<<endl; 160 return -1; 161 } 162 163 while (begin < end - 1) 164 { 165 mid = (begin + end)/2; 166 if (arrayToFind[mid] - arrayToFind[begin] > mid - begin) 167 { 168 end = mid; 169 } 170 else 171 { 172 begin = mid; 173 } 174 } 175 176 if (arrayToFind[end] - arrayToFind[begin] > 1) 177 { 178 return (arrayToFind[begin] + 1); 179 } 180 else 181 { 182 cout<<"no element is lost!"<<endl; 183 return -1; 184 } 185 } 186 187 //将数组写入txt中,便于检查结果的正确性 188 void WriteArrayToUnsortedTxt(int array[],int n) 189 { 190 FILE *fout; 191 int i; 192 193 fout = fopen("array_unsorted.txt","wt"); 194 195 if(fout == NULL) 196 { 197 printf("forward_i.txt open error! "); 198 exit(0); 199 } 200 201 printf("file open success! "); 202 203 for (i = 0;i < n;i++) 204 { 205 fprintf(fout,"%d ",array[i]); 206 } 207 208 fclose(fout); 209 } 210 211 //将数组写入txt中,便于检查结果的正确性 212 void WriteArrayToSortedTxt(int array[],int n) 213 { 214 FILE *fout; 215 int i; 216 217 fout = fopen("array_sorted.txt","wt"); 218 219 if(fout == NULL) 220 { 221 printf("forward_i.txt open error! "); 222 exit(0); 223 } 224 225 printf("file open success! "); 226 227 for (i = 0;i < n;i++) 228 { 229 fprintf(fout,"%d ",array[i]); 230 } 231 232 fclose(fout); 233 } 234 235 //显示数组元素 236 void DisplayArray(int ArrayUnsorted[],int n) 237 { 238 for (int i = 0;i < n;++i) 239 { 240 cout<<ArrayUnsorted[i]<<" "; 241 } 242 cout<<endl; 243 } 244 245 //测试代码 246 int main(void) 247 { 248 const int numberOfData = NUMBER_OF_DATA; 249 const int maxOfRandomInt = MAX_NUMBER; 250 int *arrayToFind = new int[NUMBER_OF_DATA]; 251 int StartTime = 0; 252 int TimeCost = 0; 253 254 cout<<"the max number of integer is : "<<maxOfRandomInt<<endl; 255 cout<<"the number of integer is : "<<numberOfData<<endl; 256 257 RandomIntGen(numberOfData,maxOfRandomInt,arrayToFind); 258 WriteArrayToUnsortedTxt(arrayToFind,numberOfData); 259 /*cout<<"the randomInt array is :"<<endl; 260 DisplayArray(arrayToFind,numberOfData);*/ 261 262 //Test FindLostByBitmap 263 cout<<"Test FindLostByBitmap..."<<endl; 264 StartTime = clock(); 265 cout<<"the lost number is : "<<FindLostByBitmap(arrayToFind,numberOfData)<<endl; 266 TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC; 267 cout<<"the time cost of FindLostByBitmap is : "<<TimeCost<<"ms"<<endl; 268 269 //BitmapSort_2,为使用基于二分法的搜索进行排序的预处理 270 cout<<"Test BitmapSort_2..."<<endl; 271 272 StartTime = clock(); 273 BitmapSort_2(arrayToFind,numberOfData); 274 TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC; 275 cout<<"the time cost of BitmapSort_2 is : "<<TimeCost<<"ms"<<endl; 276 277 WriteArrayToSortedTxt(arrayToFind,numberOfData); 278 /*cout<<"the sorted array is : "<<endl; 279 DisplayArray(arrayToFind,numberOfData); */ 280 281 //Test FindLostByBinarySearch 282 cout<<"Test FindLostByBinarySearch..."<<endl; 283 StartTime = clock(); 284 cout<<"the lost number is : "<<FindLostByBinarySearch(arrayToFind,numberOfData)<<endl; 285 TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC; 286 cout<<"the time cost of FindLostByBinarySearch is : "<<TimeCost<<"ms"<<endl; 287 288 delete [] arrayToFind; //delete与new要配对出现 289 return 0; 290 }
测试结果,1000,0010个数据中缺失10个时:
(可见,在该规模下,用二分搜索法的速度约为位图法的1000+倍)
the max number of integer is : 10000010 the number of integer is : 10000000 file open success! Test FindLostByBitmap... the lost number is : 7952 the time cost of FindLostByBitmap is : 1486000ms Test BitmapSort_2... the time cost of BitmapSort_2 is : 2078000ms file open success! Test FindLostByBinarySearch... the lost number is : 7952 the time cost of FindLostByBinarySearch is : 8000ms 请按任意键继续. . .
上面的结果只是程序运行一次的结果,如果要得到比较靠靠的分析,应该多次测试,而且测试的结果可能与数据集有关,若要固定数据集,将随机数产生函数RandomIntGen中的//srand((unsigned) time(NULL));注释掉即可。
问题二 数组的旋转
对于该问题,大多数人都能想到比较直观的解决方法,书中已有说明,此处不再赘述,另外,书中给出了一种比较巧妙的解法,称为杂技法,并给出了代码实现,但对于该方法的原理没有介绍,杂技法不易理解,此处暂时不做讨论。
问题三 变位词的查找
可借助于[标识,单词]对,将变位词进行标识,根据标识划分变位词。
借助于C++的工具vector、以及map,可以方便地实现。
注意几点:
- 数据结构的选择,存储单词采用 vector <string> ,因为每个单词是string类型 ;
- 存储[标识,单词],采用map < string ,vector <string> >,因为一个标识sign对应多个string类型的单词;
- 标识通过对单词的排序得到,此处采用C++的排序函数sort,sort( sign.begin(),sign.end() );
完整代码如下:
1 //显示vector <string>类型对象的元素 2 void DisplayVector(vector <string> stringVector) 3 { 4 vector <string> ::iterator iterVector; 5 6 for (iterVector = stringVector.begin();iterVector != stringVector.end();++iterVector) 7 { 8 cout<<*iterVector<<" "; 9 } 10 cout<<endl; 11 } 12 13 //变位词的查找 14 void FindAnagrams() 15 { 16 map < string ,vector <string> > signWordMap; 17 vector <string> dictVector; 18 string word; 19 string sign; 20 21 cout<<"please enter a word ,end with ctrl+z: "<<endl; 22 while (cin>>word) //输入字典中单词 23 { 24 dictVector.push_back(word); 25 cout<<"please enter a word ,end with ctrl+z: "<<endl; 26 } 27 cout<<"the dictionary is : "<<endl; 28 DisplayVector(dictVector); 29 30 vector <string> :: iterator iterVector; 31 32 for (iterVector = dictVector.begin();iterVector != dictVector.end();++iterVector) //制作标识、单词对 33 { 34 sign = *iterVector; 35 sort( sign.begin(),sign.end() ); 36 signWordMap[sign].push_back(*iterVector); 37 } 38 39 map < string ,vector <string> > :: iterator iterMap; 40 41 for (iterMap = signWordMap.begin();iterMap != signWordMap.end();++iterMap)//输出变位词 42 { 43 cout<<"the anagrams signed by "<<(*iterMap).first<<" is :"<<endl; 44 for (iterVector = (*iterMap).second.begin() ;iterVector != (*iterMap).second.end();++iterVector) 45 { 46 cout<<*iterVector<<" "; 47 } 48 cout<<endl; 49 } 50 } 51 52 int main(void) 53 { 54 //test FindAnagrams 55 FindAnagrams(); 56 return 0; 57 }
运行结果:
please enter a word ,end with ctrl+z: tops stop snap pans top ant at please enter a word ,end with ctrl+z: please enter a word ,end with ctrl+z: please enter a word ,end with ctrl+z: please enter a word ,end with ctrl+z: please enter a word ,end with ctrl+z: please enter a word ,end with ctrl+z: please enter a word ,end with ctrl+z: ^Z the dictionary is : tops stop snap pans top ant at the anagrams signed by anps is : snap pans the anagrams signed by ant is : ant the anagrams signed by at is : at the anagrams signed by opst is : tops stop the anagrams signed by opt is : top 请按任意键继续. . .