Common Substrings
Time Limit: 5000MS | Memory Limit: 65536K | |
Total Submissions: 12378 | Accepted: 4172 |
Description
A substring of a string T is defined as:
Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):
You are to give the value of |S| for specific A, B and K.
Input
The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.
1 ≤ |A|, |B| ≤ 105
1 ≤ K ≤ min{|A|, |B|}
Characters of A and B are all Latin letters.
Output
For each case, output an integer |S|.
Sample Input
2 aababaa abaabaa 1 xx xx 0
Sample Output
22 5
Source
POJ Monthly--2007.10.06, wintokk
题目大意:求在两个字符串中都出现的并且长度≥K的子串个数(位置不同也算不同).
分析:做法太神了.
这道题显然可以用后缀数组来做. 对于多个字符串的问题,先把它们拼在一起,求出ht,sa,rk数组. 然后考虑怎么统计.
这道题统计答案并不只是统计相邻的后缀,既然能够不相邻,那么就要分组. 分别求出每一组的贡献就好了.
假设后缀A,B在同一组里.那么A,B的贡献就是LCP - k + 1. 要求出在同一组中,对于所有的A,它前面的B与它的贡献和 ,对于B也要求一次. 如果两个后缀是相邻的就好处理了,不相邻的话求LCP就要用RMQ. 每次枚举两个端点复杂度太高,怎么办呢?
一个非常神奇的做法是利用单调栈,A,B分开处理.单调栈维护ht数组.变量tot动态维护答案,如果当前枚举到的后缀在字符串B中,那么就tot就统计进答案里. cnt维护当前元素本身和它弹掉的元素的个数(包括弹掉的元素 本身弹掉的,实际上就是把许多元素绑定在了一起,因为rmq要取最小的限制). 每次弹出元素的时候把弹掉的元素和当前元素绑定在一起,并维护tot就好了.这样就保证了其中的ht是最小的(满足rmq),还能顺便求和.
真是一道好题!这种单调栈的利用方法要记住了.
#include <cstdio> #include <cstring> #include <iostream> #include <algorithm> using namespace std; const int maxn = 200010; char s1[maxn],s2[maxn],s[maxn]; int len1,len2,len,sett[maxn],a[maxn],cnt,fir[maxn],sec[maxn],tong[maxn],pos[maxn]; int rk[maxn],sa[maxn],ht[maxn],poss,K,sta[maxn][2]; long long ans,tot,top; void solve() { memset(rk,0,sizeof(rk)); memset(sa,0,sizeof(sa)); memset(ht,0,sizeof(ht)); memset(fir,0,sizeof(fir)); memset(sec,0,sizeof(sec)); memset(pos,0,sizeof(pos)); memset(tong,0,sizeof(tong)); copy(s + 1,s + len + 1,sett + 1); sort(sett + 1,sett + 1 + len); cnt = unique(sett + 1,sett + 1 + len) - sett - 1; for (int i = 1; i <= len; i++) a[i] = lower_bound(sett + 1,sett + 1 + cnt,s[i]) - sett; for (int i = 1; i <= len; i++) tong[a[i]]++; for (int i = 1; i <= len; i++) tong[i] += tong[i - 1]; for (int i = 1; i <= len; i++) rk[i] = tong[a[i] - 1] + 1; for (int t = 1; t <= len; t *= 2) { for (int i = 1; i <= len; i++) fir[i] = rk[i]; for (int i = 1; i <= len; i++) { if (i + t > len) sec[i] = 0; else sec[i] = rk[i + t]; } fill(tong,tong + 1 + len,0); for (int i = 1; i <= len; i++) tong[sec[i]]++; for (int i = 1; i <= len; i++) tong[i] += tong[i - 1]; for (int i = 1; i <= len; i++) pos[len - --tong[sec[i]]] = i; fill(tong,tong + 1 + len,0); for (int i = 1; i <= len; i++) tong[fir[i]]++; for (int i = 1; i <= len; i++) tong[i] += tong[i - 1]; for (int i = 1; i <= len; i++) { int temp = pos[i]; sa[tong[fir[temp]]--] = temp; } bool flag = true; int last = 0; for (int i = 1; i <= len; i++) { int temp = sa[i]; if (!last) rk[temp] = 1; else if (fir[temp] == fir[last] && sec[temp] == sec[last]) { rk[temp] = rk[last]; flag = false; } else rk[temp] = rk[last] + 1; last = temp; } if (flag) break; } int k = 0; for (int i = 1; i <= len; i++) { if (rk[i] == 1) k = 0; else { if (k) k--; int j = sa[rk[i] - 1]; while (i + k <= len && j + k <= len && a[i + k] == a[j + k]) k++; } ht[rk[i]] = k; } } void solve1() { top = ans = tot = 0; for (int i = 2; i <= len; i++) { if (ht[i] < K) { top = tot = 0; continue; } cnt = 0; if (sa[i - 1] < poss) { cnt++; tot += ht[i] - K + 1; } while (top && ht[i] <= sta[top][0]) { tot += (ht[i] - sta[top][0]) * sta[top][1]; cnt += sta[top][1]; top--; } sta[++top][0] = ht[i]; sta[top][1] = cnt; if (sa[i] > poss) ans += tot; } } void solve2() { top = tot = 0; for (int i = 2; i <= len; i++) { if (ht[i] < K) { top = tot = 0; continue; } cnt = 0; if (sa[i - 1] > poss) { cnt++; tot += ht[i] - K + 1; } while (top && ht[i] <= sta[top][0]) { tot += sta[top][1] * (ht[i] - sta[top][0]); cnt += sta[top][1]; top--; } sta[++top][0] = ht[i]; sta[top][1] = cnt; if (sa[i] < poss) ans += tot; } } int main() { while (scanf("%d",&K) != EOF && K) { scanf("%s",s1 + 1); len = ans = 0; len1 = strlen(s1 + 1); scanf("%s",s2 + 1); len2 = strlen(s2 + 1); for (int i = 1; i <= len1; i++) s[++len] = s1[i]; s[++len] = '&'; poss = len; for (int i = 1; i <= len2; i++) s[++len] = s2[i]; solve(); solve1(); solve2(); printf("%lld ",ans); } return 0; }