后缀数组又被称为字符串处理神器;
http://blog.csdn.net/xymscau/article/details/8798046 这里讲的非常好
实现rank排名是用到了倍增法和一个比较神奇的计数排序,时间复杂度是nlongn
height[i]存放的是排名第i的后缀与排名第i-1的后缀的最长前缀,
sa[i]存的是排名第i的后缀是第几位开头的
rk[i]存放第i个位置开头的后缀的字典序排名
Poj 2774,Poj1743,Poj3294,Poj3261,Poj2758
题意:给你两串字符,要你找出在这两串字符中都出现过的最长子串.........
思路:先用个分隔符将两个字符串连接起来,再用后缀数组求出height数组的值,找出一个height值最大并且i与i-1的sa值分别在两串字符中就好.....
正确性证明,另一个后缀是i,与它拥有最长公共前缀的的后缀j,我们知道i和j一定排名是相连的。
那么我们将两个字符用空格连接起来之后,如果答案是在i和j这两个位置,如果这两个位置的rank是不相连的,那么设中间有一个值是k,那么显然i与k,或者j与k是一个更优的解。
#include<iostream> #include<string.h> #include<stdio.h> using namespace std; #define rep(i,n) for(int i = 0;i < n; i++) using namespace std; const int size = 200005,INF = 1<<30; int rk[size],sa[size],height[size],w[size],wa[size],res[size]; void getSa (int len,int up) { int *k = rk,*id = height,*r = res, *cnt = wa; rep(i,up) cnt[i] = 0; rep(i,len) cnt[k[i] = w[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[k[i]]] = i; } int d = 1,p = 0; while(p < len){ for(int i = len - d; i < len; i++) id[p++] = i; rep(i,len) if(sa[i] >= d) id[p++] = sa[i] - d; rep(i,len) r[i] = k[id[i]]; rep(i,up) cnt[i] = 0; rep(i,len) cnt[r[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[r[i]]] = id[i]; } swap(k,r); p = 0; k[sa[0]] = p++; rep(i,len-1) { if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d]) k[sa[i+1]] = p - 1; else k[sa[i+1]] = p++; } if(p >= len) return ; d *= 2,up = p, p = 0; } } void getHeight(int len) { rep(i,len) rk[sa[i]] = i; height[0] = 0; for(int i = 0,p = 0; i < len - 1; i++) { int j = sa[rk[i]-1]; while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) { p++; } height[rk[i]] = p; p = max(0,p - 1); } } int getSuffix(char s[]) { int len = strlen(s),up = 0; for(int i = 0; i < len; i++) { w[i] = s[i]; up = max(up,w[i]); } w[len++] = 0; getSa(len,up+1); getHeight(len); return len; }const int maxa = 100000*2+1; char str[maxa]; int main(){ while(scanf("%s", str)!=EOF){ int l = strlen(str); str[l] = ' '; scanf("%s", str+l+1); getSuffix(str); int ans = 0; int L = strlen(str); for(int i = 1;i < L; i++){ if((sa[i-1] < l && sa[i] > l) || (sa[i-1] > l && sa[i] < l)){ ans = max(ans, height[i]); } } printf("%d ", ans); } } /* abcde bcde */
2.poj1743
题意:给一串数字,求变化相同,且不重叠的最长字符串
变化相同就是将字符串s[i]变成s[i]-s[i-1]
那么再求后缀数组的话height[i]代表的是两个长度是height[i]+1变化相等,而如果s[i]与s[j]间距是n的话那么他们在实际字符串中的间距也是n,所以如果两个地方的height最小值是n的话他们的间距应该是n+1才行。
二分答案的方法这里讲的很好http://blog.sina.com.cn/s/blog_6635898a0102e0me.html
#include<iostream> #include<string.h> #include<stdio.h> using namespace std; #define rep(i,n) for(int i = 0;i < n; i++) using namespace std; const int size = 200005,INF = 1<<30; int rk[size],sa[size],height[size],w[size],wa[size],res[size]; void getSa (int len,int up) { int *k = rk,*id = height,*r = res, *cnt = wa; rep(i,up) cnt[i] = 0; rep(i,len) cnt[k[i] = w[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[k[i]]] = i; } int d = 1,p = 0; while(p < len){ for(int i = len - d; i < len; i++) id[p++] = i; rep(i,len) if(sa[i] >= d) id[p++] = sa[i] - d; rep(i,len) r[i] = k[id[i]]; rep(i,up) cnt[i] = 0; rep(i,len) cnt[r[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[r[i]]] = id[i]; } swap(k,r); p = 0; k[sa[0]] = p++; rep(i,len-1) { if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d]) k[sa[i+1]] = p - 1; else k[sa[i+1]] = p++; } if(p >= len) return ; d *= 2,up = p, p = 0; } } void getHeight(int len) { rep(i,len) rk[sa[i]] = i; height[0] = 0; for(int i = 0,p = 0; i < len - 1; i++) { int j = sa[rk[i]-1]; while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) { p++; } height[rk[i]] = p; p = max(0,p - 1); } } int getSuffix(int s[], int n) { int len = n,up = 0; /*for(int i = 0;i < len; i++){ printf("%d ", s[i]); }puts("");*/ for(int i = 0; i < len; i++) { w[i] = s[i]; up = max(up,w[i]); } w[len++] = 0; getSa(len,up+1); getHeight(len); return len; }const int maxa = 100000*2+1; int str[maxa]; int a[maxa]; int judge(int ans, int n){ int l = sa[0], r = sa[0]; for(int i = 0;i <= n; i++){ if(height[i] >= ans){ l = min(l, sa[i]); r = max(r, sa[i]); if(r - l > ans) return 1; } else{ l = r = sa[i]; } } return 0; } int main(){ int n; while(scanf("%d", &n)!=EOF){ if(n == 0)return 0; for(int i = 0; i < n; i++){ scanf("%d", &a[i]); } /*a[n] = a[n-1]; n++;*/ for(int i = 0; i < n-1; i++){ str[i] = a[i+1] - a[i] + 100; } str[n-1] = 0; getSuffix(str, n-1); int l = 0, r = n-1; while(l < r){ int mid = (l+r) / 2; if(judge(mid, n-1)) l = mid+1; else r = mid ; } //printf("%d " , l); if(l < 5){ printf("0 "); }else{ printf("%d ", l); } } } /* abcde bcde */
3.poj3261
题意:找出一个字符串中的所有出现次数不小于K的最长字串:
依旧是二分:
#include<iostream> #include<string.h> #include<vector> #include<map> #include<set> #include<stdio.h> #include<algorithm> using namespace std; #define rep(i,n) for(int i = 0;i < n; i++) using namespace std; const int size = 200005,INF = 1<<30; int rk[size],sa[size],height[size],w[size],wa[size],res[size]; void getSa (int len,int up) { int *k = rk,*id = height,*r = res, *cnt = wa; rep(i,up) cnt[i] = 0; rep(i,len) cnt[k[i] = w[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[k[i]]] = i; } int d = 1,p = 0; while(p < len){ for(int i = len - d; i < len; i++) id[p++] = i; rep(i,len) if(sa[i] >= d) id[p++] = sa[i] - d; rep(i,len) r[i] = k[id[i]]; rep(i,up) cnt[i] = 0; rep(i,len) cnt[r[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[r[i]]] = id[i]; } swap(k,r); p = 0; k[sa[0]] = p++; rep(i,len-1) { if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d]) k[sa[i+1]] = p - 1; else k[sa[i+1]] = p++; } if(p >= len) return ; d *= 2,up = p, p = 0; } } void getHeight(int len) { rep(i,len) rk[sa[i]] = i; height[0] = 0; for(int i = 0,p = 0; i < len - 1; i++) { int j = sa[rk[i]-1]; while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) { p++; } height[rk[i]] = p; p = max(0,p - 1); } } int getSuffix(int s[], int n) { int len = n,up = 0; for(int i = 0; i < len; i++) { w[i] = s[i]; up = max(up,w[i]); } w[len++] = 0; getSa(len,up+1); getHeight(len); return len; } const int maxa = 100000*2+5; int num[maxa]; char str[maxa]; int mp[maxa]; int vis[200]; int que[maxa], qq[maxa]; int o; int judge(int n, int mid, int K){ int siz = 0; for(int i = 0; i <= n; i++){ if(height[i] >= mid){ siz ++; }else{ if(siz >= K)return 1; siz = 1; } } // printf("%d == size ", siz); if(siz >= K){ return 1; } return 0; } int snum[maxa]; map<int,int>mp1; int main(){ int n, K; int first = 0; while(scanf("%d%d", &n, &K)!=EOF){ mp1.clear(); for(int i =0;i < n; i++){ scanf("%d", &num[i]); snum[i] = num[i]; } sort(snum, snum+n); for(int i = 0;i < n; i++){ mp1[num[i]] = i+1; } for(int i = 0;i < n; i++){ num[i] = mp1[num[i]]; } getSuffix(num, n); int high = n + 10; int low = 0; while(low < high){ int mid = (low + high) / 2; if(judge(n, mid, K)) low = mid+1; else high = mid; } printf("%d ", low -1); } }
4.poj3294
题意:给n个字符串,找出出现在大于一半字串中的最长字串,如果有多个按字典序输出
没看到按字典序输出卡了两天啊啊啊啊啊啊啊啊啊啊啊
用一些不同的字符去连接所有字串,二分答案,找出连续的height大于K的所有位置,如果分别属于不同字串就成立
#include<iostream> #include<string.h> #include<vector> #include<set> #include<stdio.h> using namespace std; #define rep(i,n) for(int i = 0;i < n; i++) using namespace std; const int size = 200005,INF = 1<<30; int rk[size],sa[size],height[size],w[size],wa[size],res[size]; void getSa (int len,int up) { int *k = rk,*id = height,*r = res, *cnt = wa; rep(i,up) cnt[i] = 0; rep(i,len) cnt[k[i] = w[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[k[i]]] = i; } int d = 1,p = 0; while(p < len){ for(int i = len - d; i < len; i++) id[p++] = i; rep(i,len) if(sa[i] >= d) id[p++] = sa[i] - d; rep(i,len) r[i] = k[id[i]]; rep(i,up) cnt[i] = 0; rep(i,len) cnt[r[i]]++; rep(i,up) cnt[i+1] += cnt[i]; for(int i = len - 1; i >= 0; i--) { sa[--cnt[r[i]]] = id[i]; } swap(k,r); p = 0; k[sa[0]] = p++; rep(i,len-1) { if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d]) k[sa[i+1]] = p - 1; else k[sa[i+1]] = p++; } if(p >= len) return ; d *= 2,up = p, p = 0; } } void getHeight(int len) { rep(i,len) rk[sa[i]] = i; height[0] = 0; for(int i = 0,p = 0; i < len - 1; i++) { int j = sa[rk[i]-1]; while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) { p++; } height[rk[i]] = p; p = max(0,p - 1); } } int getSuffix(int s[], int n) { int len = n,up = 0; for(int i = 0; i < len; i++) { w[i] = s[i]; up = max(up,w[i]); } w[len++] = 0; getSa(len,up+1); getHeight(len); return len; } const int maxa = 100000*2+5; int num[maxa]; char str[maxa]; int mp[maxa]; int vis[200]; int que[maxa], qq[maxa]; int o; int judge(int n, int mid, int K){ int ok = 0; int siz = 0; int oo = 0; int last = sa[0]; memset(vis, 0, sizeof(vis)); for(int i = 0; i <= n; i++){ // printf("%d == hight[i] %d == sa[i] %d == mp ", height[i], sa[i], mp[sa[i]]); if(height[i] >= mid){ if(vis[mp[sa[i]]] == 0){ vis[mp[sa[i]]] = 1; siz ++; last = sa[i]; //printf("--%d %d ", sa[i], mp[sa[i]]); } }else{ memset(vis, 0, sizeof(vis)); if(siz > K){ qq[oo++] = last; ok = 1; } siz = 1; memset(vis, 0, sizeof(vis)); vis[mp[sa[i]]] = 1; } // printf("%d == weizhi %d == size ", sa[i], siz); } // printf("%d == size ", siz); if(siz > K){ qq[oo++] = last; ok = 1; } if(ok == 1){ o = oo; for(int i = 0;i < oo; i++){ que[i] = qq[i]; } return 1; }return 0; } int main(){ int n; int first = 0; while(scanf("%d", &n), n){ int l = 0; for(int i = 0;i < n; i++){ scanf("%s", str); int L = l; for(int k = 0; str[k] ; k++){ num[L+k] = str[k]; l++; mp[L+k] = i; } mp[l] = i; num[l++] = 500+i; } getSuffix(num, l); /*for(int i = 0;i <=l ; i++){ printf("%d %d ", height[i], mp[sa[i]]); } judge(l, 6, n/2); for(int i = 0; i < o; i++){ for(int k = que[i]; k < que[i]+6; k++){ printf("%c", num[k]); }puts(""); } */ int high = 1005; int low = 0; while(low < high){ int mid = (low + high) / 2; if(judge(l, mid, n/2)) low = mid+1; else high = mid; } if(first == 0)first = 1; else puts(""); if(n == 1){printf("%s ", str); continue; } if(low > 1){ for(int i = 0; i < o; i++){ for(int k = que[i]; k < que[i]+low-1; k++){ printf("%c", num[k]); }puts(""); } }else puts("?"); } }