• 【编程珠玑】读书笔记 第二章 算法


    2013-07-11 22:00:28

    第二章 算法

    本章围绕三个问题进行算法讨论,包括元素的查找、字符串的旋转、以及变位词的查找。

    下面给出了实现代码、以及测试结果。

    问题一 查找不存在的元素

    思路一:用位图;

    思路二:用二分搜索法,这种方法需要先对数组排序;

    若用二分搜索法,有以下限制:

    满足以下假设:
    1)数组中没有重复元素
    2)数组中必有缺失元素

    书中给出的问题也正是具有这样的特殊性,因此可以使用二分法进行缺失元素的查找。

    下面的代码给出了上述两种方法的实现代码,以及测试代码所需要的数据产生程序、位图排序程序,后面附上了测试结果。

    注意:此处的代码没有数据的合法性检查,假设数据满足上面的条件。

    完整代码:

      1 #include <iostream>
      2 #include <bitset>
      3 #include <ctime>
      4 using namespace std;
      5 
      6 const int MAX_NUMBER = 10000010;
      7 const int NUMBER_OF_DATA = 10000000;
      8 
      9 //产生在区间[lowerBound,upperBound]内的一个随机数
     10 int RandomInt(const int lowerBound,const int upperBound)
     11 {
     12     return ( lowerBound + ( RAND_MAX * rand() + rand() ) % (upperBound - lowerBound + 1) );
     13 }
     14 
     15 //产生在[0,maxOfRandomInt]范围内的numberOfRandomInt个随机数
     16 //结果存入数组randomInt中
     17 void RandomIntGen(const int numberOfRandomInt,const int maxOfRandomInt,int randomInt[])
     18 {
     19     int i;
     20     int tmp;
     21     int randomTmp;
     22     int *sequenceInt = new int[maxOfRandomInt];
     23 
     24     srand((unsigned) time(NULL));
     25 
     26     for (i = 0; i < maxOfRandomInt; i++)
     27     {
     28         sequenceInt[i] = i;    
     29     }
     30 
     31     for (i = 0; i < numberOfRandomInt; i++) 
     32     {
     33         randomTmp = RandomInt(i, maxOfRandomInt - 1);
     34         tmp = sequenceInt[randomTmp];
     35         sequenceInt[randomTmp] = sequenceInt[i];
     36         sequenceInt[i] = tmp;
     37         randomInt[i] = sequenceInt[i];    //随机数保存在数组中
     38     }
     39 
     40     delete [] sequenceInt;   //释放内存
     41 }
     42 
     43 //注意写初始化函数
     44 void Init(int BitmapArray[],const int n)
     45 {
     46     for (int i = 0;i < n;++i)
     47     {
     48         BitmapArray[i] = 0;
     49     }
     50 }
     51 
     52 //用int型模拟位图,实现的位图排序
     53 const int BitsPerInt = 32;
     54 const int Div32Shift = 5;
     55 const int Mod32Mask = 0x1F;
     56 
     57 //BitmapSort_2:用位运算实现置位、清零、判断是否为1
     58 void SetBit(int BitmapArray[],const int BitToSet)
     59 {
     60     //BitmapArray[ (BitToSet >> Div32Shift) ] | = ( 1 << (BitToSet & Mod32Mask) );  //error C2059: syntax error : '='
     61     BitmapArray[ (BitToSet >> Div32Shift) ] = BitmapArray[ (BitToSet >> Div32Shift) ] 
     62     | ( 1 << (BitToSet & Mod32Mask) ); 
     63 }
     64 
     65 void ClearBit(int BitmapArray[],const int BitToClear)
     66 {
     67     BitmapArray[ (BitToClear >> Div32Shift) ] = BitmapArray[ (BitToClear >> Div32Shift) ] 
     68     & ~( 1 << (BitToClear & Mod32Mask) ); 
     69 }
     70 
     71 bool IsBitSet(const int BitmapArray[],const int BitToCheck)  //定义为const类型,在不小心改变时,会给出报错信息
     72 {
     73     return ( BitmapArray[ (BitToCheck >> Div32Shift) ] & ( 1 << (BitToCheck & Mod32Mask) ) ); 
     74 }
     75 
     76 void BitmapSort_2(int ArrayUnsorted[],const int n)
     77 {
     78     int i ;
     79     int cnt = 0;
     80     int *BitmapArray = new int[1 + MAX_NUMBER/BitsPerInt];   //当MaxRandomInt较大时,必须从对上分配内存,否则内存不够用,导致出错
     81 
     82     if (NULL == BitmapArray)
     83     {
     84         cout<<"memory allocation error !"<<endl;
     85         exit(0);
     86     }
     87 
     88     Init(BitmapArray,1 + MAX_NUMBER/BitsPerInt);   //初始化为0
     89 
     90     for (i = 0;i < n;++i)
     91     {
     92         SetBit(BitmapArray,ArrayUnsorted[i]);
     93     }
     94 
     95     for (i = 0;i < MAX_NUMBER;++i)  //不是for (i = 0;i < n;++i)
     96     {
     97         if ( IsBitSet(BitmapArray,i) )
     98         {
     99             ArrayUnsorted[cnt++] = i;
    100         }
    101     }
    102 
    103     delete [] BitmapArray;
    104 }
    105 
    106 //用位图的方法查找丢失元素
    107 //满足以下假设:
    108 //1)数组中没有重复元素
    109 //2)数组中必有缺失元素
    110 int FindLostByBitmap(int arrayToFind[],int numberOfData)
    111 {
    112     int i ;
    113     int cnt = 0;
    114     int *BitmapArray = new int[1 + MAX_NUMBER/BitsPerInt];   //当MaxRandomInt较大时,必须从对上分配内存,否则内存不够用,导致出错
    115 
    116     if (NULL == BitmapArray)
    117     {
    118         cout<<"memory allocation error !"<<endl;
    119         exit(0);
    120     }
    121 
    122     Init(BitmapArray,1 + MAX_NUMBER/BitsPerInt);   //初始化为0
    123 
    124     for (i = 0;i < numberOfData;++i)
    125     {
    126         SetBit(BitmapArray,arrayToFind[i]);
    127     }
    128 
    129     for (i = 0;i < MAX_NUMBER;++i)  //不是for (i = 0;i < n;++i)
    130     {
    131         if ( !IsBitSet(BitmapArray,i) )
    132         {
    133             return i;
    134         }
    135     }
    136 
    137     if (MAX_NUMBER == i)
    138     {
    139         cout<<"no element is lost!"<<endl;
    140         return -1;
    141     }
    142 
    143     delete [] BitmapArray;
    144 }
    145 
    146 //二分搜索法
    147 //满足以下假设:
    148 //1)数组中没有重复元素
    149 //2)数组中必有缺失元素
    150 int FindLostByBinarySearch(int arrayToFind[],int numberOfData)
    151 {
    152     int cnt = 0;
    153     int begin  = 0;
    154     int end = numberOfData - 1;
    155     int mid = 0;
    156 
    157     if (NULL == arrayToFind || numberOfData < 2)
    158     {
    159         cout<<"the input array is invalid!"<<endl;
    160         return -1;
    161     }
    162 
    163     while (begin < end - 1)
    164     {
    165         mid = (begin + end)/2;
    166         if (arrayToFind[mid] - arrayToFind[begin] > mid - begin)
    167         {
    168             end = mid;
    169         }
    170         else
    171         {
    172             begin = mid;
    173         }
    174     }
    175 
    176     if (arrayToFind[end] - arrayToFind[begin] > 1)
    177     {  
    178         return (arrayToFind[begin] + 1);
    179     }
    180     else 
    181     {
    182         cout<<"no element is lost!"<<endl;
    183         return -1;
    184     }
    185 }
    186 
    187 //将数组写入txt中,便于检查结果的正确性
    188 void WriteArrayToUnsortedTxt(int array[],int n)
    189 {
    190     FILE *fout;
    191     int i;
    192 
    193     fout = fopen("array_unsorted.txt","wt");
    194 
    195     if(fout == NULL)
    196     {
    197         printf("forward_i.txt open error!
    ");
    198         exit(0);
    199     }
    200 
    201     printf("file open success!
    ");
    202 
    203     for (i = 0;i < n;i++) 
    204     {
    205         fprintf(fout,"%d
    ",array[i]);
    206     }
    207 
    208     fclose(fout);    
    209 }
    210 
    211 //将数组写入txt中,便于检查结果的正确性
    212 void WriteArrayToSortedTxt(int array[],int n)
    213 {
    214     FILE *fout;
    215     int i;
    216 
    217     fout = fopen("array_sorted.txt","wt");
    218 
    219     if(fout == NULL)
    220     {
    221         printf("forward_i.txt open error!
    ");
    222         exit(0);
    223     }
    224 
    225     printf("file open success!
    ");
    226 
    227     for (i = 0;i < n;i++) 
    228     {
    229         fprintf(fout,"%d
    ",array[i]);
    230     }
    231 
    232     fclose(fout);    
    233 }
    234 
    235 //显示数组元素
    236 void DisplayArray(int ArrayUnsorted[],int n)
    237 {
    238     for (int i = 0;i < n;++i)
    239     {
    240         cout<<ArrayUnsorted[i]<<"	";
    241     }
    242     cout<<endl;
    243 }
    244 
    245 //测试代码
    246 int main(void)
    247 {
    248     const int numberOfData = NUMBER_OF_DATA;
    249     const int maxOfRandomInt = MAX_NUMBER;
    250     int *arrayToFind = new int[NUMBER_OF_DATA];
    251     int StartTime = 0;
    252     int TimeCost = 0; 
    253 
    254     cout<<"the max number of integer is : "<<maxOfRandomInt<<endl;
    255     cout<<"the number of integer is : "<<numberOfData<<endl;
    256 
    257     RandomIntGen(numberOfData,maxOfRandomInt,arrayToFind);
    258     WriteArrayToUnsortedTxt(arrayToFind,numberOfData);
    259     /*cout<<"the randomInt array is :"<<endl;
    260     DisplayArray(arrayToFind,numberOfData);*/
    261     
    262     //Test FindLostByBitmap
    263     cout<<"Test FindLostByBitmap..."<<endl;
    264     StartTime = clock();
    265     cout<<"the lost number is : "<<FindLostByBitmap(arrayToFind,numberOfData)<<endl;
    266     TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC;
    267     cout<<"the time cost of FindLostByBitmap is : "<<TimeCost<<"ms"<<endl;
    268 
    269     //BitmapSort_2,为使用基于二分法的搜索进行排序的预处理
    270     cout<<"Test BitmapSort_2..."<<endl;
    271 
    272     StartTime = clock();
    273     BitmapSort_2(arrayToFind,numberOfData);
    274     TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC;
    275     cout<<"the time cost of BitmapSort_2 is : "<<TimeCost<<"ms"<<endl;
    276 
    277     WriteArrayToSortedTxt(arrayToFind,numberOfData);
    278     /*cout<<"the sorted array is : "<<endl;
    279     DisplayArray(arrayToFind,numberOfData);  */
    280 
    281     //Test FindLostByBinarySearch
    282     cout<<"Test FindLostByBinarySearch..."<<endl;
    283     StartTime = clock();
    284     cout<<"the lost number is : "<<FindLostByBinarySearch(arrayToFind,numberOfData)<<endl;
    285     TimeCost = 1e6 * ( clock() - StartTime ) / CLOCKS_PER_SEC;
    286     cout<<"the time cost of FindLostByBinarySearch is : "<<TimeCost<<"ms"<<endl;
    287 
    288     delete [] arrayToFind;   //delete与new要配对出现
    289     return 0;
    290 }

    测试结果,1000,0010个数据中缺失10个时:

    (可见,在该规模下,用二分搜索法的速度约为位图法的1000+倍)

    the max number of integer is : 10000010
    the number of integer is : 10000000
    file open success!
    Test FindLostByBitmap...
    the lost number is : 7952
    the time cost of FindLostByBitmap is : 1486000ms
    Test BitmapSort_2...
    the time cost of BitmapSort_2 is : 2078000ms
    file open success!
    Test FindLostByBinarySearch...
    the lost number is : 7952
    the time cost of FindLostByBinarySearch is : 8000ms
    请按任意键继续. . .

    上面的结果只是程序运行一次的结果,如果要得到比较靠靠的分析,应该多次测试,而且测试的结果可能与数据集有关,若要固定数据集,将随机数产生函数RandomIntGen中的//srand((unsigned) time(NULL));注释掉即可。


    问题二 数组的旋转

    对于该问题,大多数人都能想到比较直观的解决方法,书中已有说明,此处不再赘述,另外,书中给出了一种比较巧妙的解法,称为杂技法,并给出了代码实现,但对于该方法的原理没有介绍,杂技法不易理解,此处暂时不做讨论。


    问题三 变位词的查找

    可借助于[标识,单词]对,将变位词进行标识,根据标识划分变位词。

    借助于C++的工具vector、以及map,可以方便地实现。

    注意几点:

    1. 数据结构的选择,存储单词采用 vector <string> ,因为每个单词是string类型 ;
    2. 存储[标识,单词],采用map < string ,vector <string> >,因为一个标识sign对应多个string类型的单词;
    3. 标识通过对单词的排序得到,此处采用C++的排序函数sort,sort( sign.begin(),sign.end() );

    完整代码如下:

     1 //显示vector <string>类型对象的元素
     2 void DisplayVector(vector <string> stringVector)
     3 {
     4     vector <string> ::iterator iterVector;
     5 
     6     for (iterVector = stringVector.begin();iterVector != stringVector.end();++iterVector)
     7     {
     8         cout<<*iterVector<<"	";
     9     }
    10     cout<<endl;
    11 }
    12 
    13 //变位词的查找
    14 void FindAnagrams()
    15 {
    16     map < string ,vector <string> > signWordMap;  
    17     vector <string> dictVector;
    18     string word;
    19     string sign;
    20     
    21     cout<<"please enter a word ,end with ctrl+z: "<<endl;
    22     while (cin>>word)        //输入字典中单词
    23     {
    24         dictVector.push_back(word);
    25         cout<<"please enter a word ,end with ctrl+z: "<<endl;
    26     }
    27     cout<<"the dictionary is : "<<endl;
    28     DisplayVector(dictVector);
    29 
    30     vector <string> :: iterator iterVector;
    31 
    32     for (iterVector = dictVector.begin();iterVector != dictVector.end();++iterVector)  //制作标识、单词对
    33     {
    34         sign = *iterVector;
    35         sort( sign.begin(),sign.end() );
    36         signWordMap[sign].push_back(*iterVector);
    37     }
    38 
    39     map < string ,vector <string> > :: iterator iterMap;
    40 
    41     for (iterMap = signWordMap.begin();iterMap != signWordMap.end();++iterMap)//输出变位词
    42     {
    43         cout<<"the anagrams signed by "<<(*iterMap).first<<" is :"<<endl;
    44         for (iterVector = (*iterMap).second.begin() ;iterVector != (*iterMap).second.end();++iterVector)
    45         {
    46             cout<<*iterVector<<"	";
    47         }
    48         cout<<endl;
    49     }
    50 }
    51 
    52 int main(void)
    53 {
    54     //test FindAnagrams
    55     FindAnagrams();
    56     return 0;
    57 }

    运行结果:

    please enter a word ,end with ctrl+z:
    tops  stop snap pans top ant at
    please enter a word ,end with ctrl+z:
    please enter a word ,end with ctrl+z:
    please enter a word ,end with ctrl+z:
    please enter a word ,end with ctrl+z:
    please enter a word ,end with ctrl+z:
    please enter a word ,end with ctrl+z:
    please enter a word ,end with ctrl+z:
    ^Z
    the dictionary is :
    tops    stop    snap    pans    top     ant     at
    the anagrams signed by anps is :
    snap    pans
    the anagrams signed by ant is :
    ant
    the anagrams signed by at is :
    at
    the anagrams signed by opst is :
    tops    stop
    the anagrams signed by opt is :
    top
    请按任意键继续. . .
  • 相关阅读:
    error LNK1123: 转换到 COFF 期间失败: 文件无效或损坏 E:OCX
    JQUERY解析XML IE8的兼容问题
    IE11下,IIS7.5不自动生成__doPostBack事件
    IE11无法支持Forms认证,,,也就是无法保存COOKIE
    IIS7.5真变态,服务器时间格式导致不生成WebResource.axd
    django ajax MTV与MVC 多对多创建方式
    django F与Q查询 事务 only与defer
    模型层
    模板层(模板语法)
    django 路由层 伪静态网页 虚拟环境 视图层
  • 原文地址:https://www.cnblogs.com/youngforever/p/3185149.html
Copyright © 2020-2023  润新知