上一篇中讲到暴力法字符串匹配算法,但是暴力法明显存在这样一个问题:一次只移动一个字符。但实际上,针对不同的匹配情况,每次移动的间隔可以更大,没有必要每次只是移动一位:
关于KMP算法的描述,推荐一篇博客:https://blog.csdn.net/weixin_36604953/article/details/78576637
该博客详细的描述了KMP算法原理。下面的代码实现了KMP算法:
1 //使用暴力穷举法, KMP算法完成字符串匹配算法 2 # include "iostream" 3 #include"string" 4 #include"vector" 5 using namespace std; 6 vector<int>& BFmatch(string & , string & , vector<int>&); 7 vector<int>& KMPStrMatch(string &, string &, vector<int>&); 8 void ShowPos(vector<int>& ); 9 int main() 10 { 11 string ModelStr, SonStr; 12 vector<int> pos; 13 cout << "请输入待匹配字符串:"; 14 cin >> ModelStr ; 15 cout << endl; 16 cout << "请输入子字符串:"; 17 cin >> SonStr; 18 cout << endl; 19 //BFmatch(ModelStr, SonStr, pos); 20 KMPStrMatch(ModelStr, SonStr, pos); 21 ShowPos(pos); 22 system("pause"); 23 } 24 vector<int>& BFmatch(string & ModelStr, string & SonStr,vector<int>& pos) 25 { 26 for (int i = 0; i < ModelStr.size(); i++) 27 { 28 int k = 0; 29 for (int j = i; k < SonStr.size(); j++, k++) 30 { 31 if (SonStr[k] == ModelStr[j]) 32 continue; 33 else 34 break; 35 } 36 if (k == SonStr.size()) 37 pos.push_back(i); 38 } 39 return pos; 40 } 41 void ShowPos(vector<int>& pos) 42 { 43 if (pos.size() != 0) 44 { 45 cout << "the first position of MatchingStr:"; 46 for (int i = 0; i < pos.size(); i++) 47 { 48 cout << pos[i] << " "; 49 } 50 cout << endl; 51 } 52 else 53 cout << "no such string!" << endl; 54 } 55 vector<int>& KMPStrMatch(string & ModelStr, string & SonStr, vector<int>& pos) 56 { 57 string ComStr; 58 string tmp1, tmp2; 59 int j = 0, i = 0, len = 0;; 60 while(j< (ModelStr.size()- SonStr.size()+1)) 61 { 62 if (ModelStr[j] != SonStr[0]) 63 { 64 j++; 65 continue;//首位不匹配直接加1 66 } 67 else 68 { 69 while ((j< ModelStr.size())&&(ModelStr[j] == SonStr[i]))//&&前面的约束条件保证了不会发生内存越界 70 { 71 j++; 72 i++; 73 } 74 if (i == SonStr.size()) 75 pos.push_back(j - SonStr.size()); 76 j = j - i; 77 ComStr = SonStr.substr(0, i - 1); 78 for (int q = 1; q < ComStr.size(); q++) 79 { 80 tmp1=ComStr.substr(q, ComStr.size() - 1); 81 tmp2=ComStr.substr(0, ComStr.size() - 1 - q); 82 if (tmp1 == tmp2) 83 len++; 84 } 85 j = j + i-len; 86 i = 0; 87 len = 0; 88 } 89 } 90 return pos; 91 }
总之,KMP的核心思想在于:通过部分匹配字符串的长度来决定待匹配字符串的移动长度,而不是每次只是移动一位。