这货居然还有个学名,Brute Force search,其实翻译过来就是简单粗暴法。
public static bool IsSubString(string a, string b) { if (string.IsNullOrEmpty(a) || string.IsNullOrEmpty(b) || b.Length > a.Length) return false; int[] particalMatchTbl = new int[b.Length]; for (int i = 0; i < b.Length; i++) { particalMatchTbl[i] = GetPartialMatchCount(b.Substring(0, i + 1)); } int startPosInA = 0; int currPosInB = 0; for (startPosInA = 0; startPosInA <= a.Length - b.Length; ) { for (; currPosInB < b.Length; currPosInB++) { if (a[startPosInA + currPosInB] != b[currPosInB]) break; } if (currPosInB == b.Length) return true; //If position in B string is 0, not need to move in B, just increase pos in A if (currPosInB == 0) startPosInA++; else { //currPosInB is the number already matched in string B //particalMatchTbl[currPosInB - 1] is the max partial match length of matched string //currPosInB - particalMatchTbl[currPosInB - 1] mean how much char could be skipped // // | //a ABCDAB ABCDABCDABDE //b ABCDABD // | //in this case, in b string, 'ABCDAB' part is matched, AB is the max partial string //So the first part 'ABCD' could be skipped // // | //a ABCDAB ABCDABCDABDE //b ABCDABD // | startPosInA += currPosInB - particalMatchTbl[currPosInB - 1]; //start from 0, so partial match length is the the next value in B need to check currPosInB = particalMatchTbl[currPosInB - 1]; } } return false; } public static int GetPartialMatchCount(string str) { int commonLength = 0; for (int i = 1; i <= str.Length - 1; i++) { if (str.Substring(0, i) == str.Substring(str.Length - i, i)) commonLength = i; } return commonLength; }
- KMP算法实现 -- 代码可读性较好,感觉主要还是参考了Linux中的实现
- Linux KMP 源码
public static int[] BuildJumpTable(string str) { //this table has two meanings: //1. mean how many chars the prefix and suffix shared. //2. because the array start from 0, this value also mean if current position is the last matched // position, which position the match algo should continue in this array. int[] next = new int[str.Length]; // first char have 0 shared prefix and suffix next[0] = 0; //i is a stright forward cusor, for (int i = 1, j = 0; i < str.Length; i++) { //use the jump table already generated, if str[i] not match str[j], //then jump to the pos last matched char point to while (j > 0 && str[i] != str[j]) { j = next[j - 1]; } if (str[i] == str[j]) { j++; } next[i] = j; } return next; }
- 这里的局部匹配表其实有两层含义,必须明白这两层含义才能够理解这段算法。一个是阮一峰博客中指的最长的匹配字符串,我姑且叫做匹配表;而另一个含义则利用了数组计数从0开始的这个特点,表达的意思是当这个最后一个被匹配的字符,那么下一次匹配从哪个位置开始,姑且叫做跳跃表。
j = next[j - 1]
这个地方就是当做跳跃表来使用,而next[i] = j
则是表示匹配表,我觉得只有区分开来才能够更好的理解。- i是一直增长的数,表示已经匹配到那个位置。而j表达的是下一个要匹配的位置同时又表示已经匹配了多少个,这也是因为数组从0开始才会导致这两个值相等。
- 比较巧妙的一块代码就是while循环那里,这段代码其实在算这个jump table的时候也已经用了一些kmp算法的思想在里面。如果当前的j和i不匹配,如果j比0大就说明j - 1那个位置一定是匹配的,而next[j-1]跳跃表里面存着恰恰就是下一个要尝试匹配的位置。就这样一直回溯回去就能够找到相等的那个字符,或者是找到第0个。
public static bool IsSubString(string a, string b) { if (string.IsNullOrEmpty(a) || string.IsNullOrEmpty(b) || b.Length > a.Length) return false; int[] next = BuildJumpTable(b); for (int posInA = 0, posInB = 0; posInA < a.Length; posInA++) { //if posInB > 0 mean at least posInB - 1 is matched, so got the next position need to match while (posInB > 0 && a[posInA] != b[posInB]) posInB = next[posInB - 1]; //if they match, move posInB forward if (a[posInA] == b[posInB]) posInB++; if (posInB == b.Length) return true; } return false; }
可以看到这段代码和算jump table的代码非常相似,不做解释~