• 【字符串问题】求一个字符串中重复出现的最长的子串


    2013-09-14 15:34:16

    用后缀数组求一个字符串中重复出现的最长的子串。

    1. 用C++中的string类可以很方便地进行操作,需将后缀数组保存在vector<string>,如下面代码中的string版本所示,但这样就会因为<string>有很大的开销;
    2. 直接用字符指针指向后缀字符串的首地址,可以节省很大的空间,如下面代码中的char *版本所示.
    3. 注意使用char *版本时,用qsort函数最后缀字符串数组排序,需要提供comp函数,该函数的写法如下:
    1 int pStrcmp(const void *p,const void *q)
    2 {
    3     return ( strcmp( *(char **)p , *(char **)q ) );
    4 }

    更多关于该函数的说明,详见博文http://www.cnblogs.com/youngforever/articles/3321469.html

    代码(string版本):

      1 #include <iostream>
      2 #include <cassert>
      3 #include <string>        //用string类模板,必须包含该头文件
      4 #include <vector>        //用vector模板,必须包含该头文件
      5 #include <algorithm>   //用sort函数,必须包含该头文件
      6 using namespace std;
      7 
      8 //获取两个字符串的最长公共子串
      9 size_t GetLCS(const string &str1,const string &str2)
     10 {
     11     size_t len1 = str1.length();
     12     size_t len2 = str2.length();
     13 
     14     size_t end  = len1 < len2 ? len1 : len2;
     15 
     16     size_t index = 0;
     17     size_t commenLen = 0;
     18 
     19     for (index = 0;index < end;++index)
     20     {
     21         if (str1.at(index) == str2.at(index))
     22         {
     23             ++commenLen;
     24         }
     25         else
     26         {
     27             break;
     28         }
     29     }
     30 
     31     return commenLen;
     32 }
     33 
     34 //获取字符串中重复出现且最长的子串
     35 size_t FindLongeStringApearTwice(const string &srcStr,string &subStr)
     36 {
     37     vector<string> vecStr;
     38 
     39     size_t index;
     40     size_t end = srcStr.length();
     41 
     42     string tmpStr;
     43     size_t tmpLen = 0;
     44     size_t maxLen = 0;
     45 
     46     for ( index = 0;index < end;++index)   //生成后缀数组
     47     {
     48         tmpStr = srcStr.substr(index,(end - 1 - index));
     49         vecStr.push_back(tmpStr);
     50         //tmpStr.clear();  //此处的清楚是不必要的,可以删去
     51     }
     52 
     53     sort(vecStr.begin(),vecStr.end());  //对后缀字符串数组排序
     54 
     55     vector<string>::iterator iter;
     56 
     57     for (iter = vecStr.begin();(iter + 1) != vecStr.end();++iter)  //求相邻string的LCS
     58     {
     59         tmpLen = GetLCS(*iter,*(iter + 1));
     60 
     61         if(tmpLen > maxLen)
     62         {
     63             maxLen = tmpLen;
     64             subStr = (*iter).substr(0,maxLen);
     65         }
     66     }
     67 
     68     return maxLen;
     69 }
     70 
     71 
     72 
     73 
     74 //测试FindLongeStringApearTwice
     75 void TestDriver()
     76 {
     77     string strArray[] = {"0123456","yyabcdabjcabceg","abcbcbcabc","hello,li mei! hello,li lei!"};
     78     size_t arrayLength = 4;
     79 
     80     string srcStr;
     81     string subStr;
     82     size_t maxLen = 0;
     83 
     84     for (size_t index = 0;index < arrayLength;++index)
     85     {
     86         //srcStr.clear();  //此处的清楚是不必要的,可以删去
     87         srcStr = strArray[index];
     88         maxLen = FindLongeStringApearTwice(srcStr,subStr);
     89 
     90         cout<<"the source string is : "<<srcStr<<endl;
     91         cout<<"the longest sub string is : "<<subStr<<endl;
     92         cout<<"the max length is : "<<maxLen<<endl;
     93         cout<<endl;
     94     }
     95 }
     96 
     97 int main()
     98 {
     99     TestDriver();
    100     return 0;
    101 }


    代码(char *版本):

      1 #include <iostream>
      2 #include <cassert>
      3 #include <string>
      4 #include <vector>
      5 #include <algorithm>
      6 using namespace std;
      7 
      8 size_t GetLCS(const char *str1,const char *str2)
      9 {
     10     assert(str1 != NULL && str2 != NULL);
     11 
     12     size_t len1 = strlen(str1);
     13     size_t len2 = strlen(str2);
     14 
     15     size_t end  = len1 < len2 ? len1 : len2;
     16 
     17     size_t index = 0;
     18     size_t commenLen = 0;
     19 
     20     for (index = 0;index < end;++index)
     21     {
     22         if ( *(str1 + index) == *(str2 + index) )
     23         {
     24             ++commenLen;
     25         }
     26         else
     27         {
     28             break;
     29         }
     30     }
     31 
     32     return commenLen;
     33 }
     34 
     35 typedef char *  pCHAR;
     36 
     37 int pStrcmp(const void *p,const void *q)
     38 {
     39     return ( strcmp( *(char **)p , *(char **)q ) );
     40 }
     41 
     42 void DisplayString(const char *pStr)
     43 {
     44     size_t index = 0;
     45 
     46     while (*(pStr + index))
     47     {
     48         cout<<*(pStr + index);
     49         ++index;
     50     }
     51     cout<<endl;
     52 }
     53 
     54 size_t FindLongestStringApearTwice(const char *pSrcStr,char *&pSubStr)
     55 {
     56     assert(pSrcStr != NULL && pSubStr != NULL);
     57 
     58     const size_t lenOfSrcStr  = strlen(pSrcStr);
     59     pCHAR *pSuffixArray = new pCHAR[lenOfSrcStr];
     60 
     61     size_t index;
     62     size_t end = strlen(pSrcStr);
     63 
     64     size_t tmpLen = 0;
     65     size_t maxLen = 0;
     66 
     67     for ( index = 0;index < end;++index)
     68     {
     69         pSuffixArray[index] = (char *)pSrcStr + index;
     70         //DisplayString(pSuffixArray[index]);
     71     }
     72 
     73     qsort(pSuffixArray,lenOfSrcStr,sizeof(pCHAR),pStrcmp);
     74 /*
     75     for ( index = 0;index < end;++index )
     76     {
     77         DisplayString(pSuffixArray[index]);
     78     }*/
     79 
     80     for (index = 0;index + 1 < end;++index) 
     81     {
     82         tmpLen = GetLCS(pSuffixArray[index],pSuffixArray[index + 1]);
     83 
     84         if(tmpLen > maxLen)
     85         {
     86             maxLen = tmpLen;
     87             pSubStr = pSuffixArray[index];
     88         }
     89     }
     90 
     91     return maxLen;
     92 }
     93 
     94 
     95 void TestDriver()
     96 {
     97     pCHAR strArray[] = {"0123456","yyabcdabjcabceg","abcbcbcabc","hello,li mei! hello,li lei!"};
     98     size_t arrayLength = 4;
     99 
    100     pCHAR srcStr;
    101     pCHAR subStr;
    102     size_t maxLen = 0;
    103 
    104     for (size_t index = 0;index < arrayLength;++index)
    105     {
    106         srcStr = strArray[index];
    107         maxLen = FindLongestStringApearTwice(srcStr,subStr);
    108 
    109         cout<<"the source string is : "<<srcStr<<endl;
    110         cout<<"the longest sub string is : ";
    111 
    112         for (size_t i = 0;i < maxLen;++i)
    113         {
    114             cout<<*(subStr + i);
    115         }
    116         cout<<endl;
    117 
    118         cout<<"the max length is : "<<maxLen<<endl;
    119         cout<<endl;
    120     }
    121 }
    122 
    123 
    124 int main()
    125 {
    126     TestDriver();
    127     return 0;
    128 }

    测试结果:

    the source string is : 0123456
    the longest sub string is :
    the max length is : 0
    
    the source string is : yyabcdabjcabceg
    the longest sub string is : abc
    the max length is : 3
    
    the source string is : abcbcbcabc
    the longest sub string is : bcbc
    the max length is : 4
    
    the source string is : hello,li mei! hello,li lei!
    the longest sub string is : hello,li
    the max length is : 9
    
    请按任意键继续. . .
  • 相关阅读:
    Spacemacs配置yasnippe插件
    设置SSH只允许指定IP才能访问
    firewall的规则设置与命令(白名单设置)
    Docker存储驱动介绍
    elasticsearch启动常见错误
    ES系列:解决Exception in thread “main” java.nio.file.AccessDeniedException: /usr/local/elasticsearch
    ELK6.3.2搭建配置文件篇(filebeat版)
    Linux:sudo,没有找到有效的 sudoers 资源。
    Filebeat安装及使用
    elk + filebeat,6.3.2版本简单搭建,实现我们自己的集中式日志系统
  • 原文地址:https://www.cnblogs.com/youngforever/p/3321470.html
Copyright © 2020-2023  润新知