Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.
题目大意:给定一个字符串,求不同的子串数,子串是连续的.
分析:每个子串是对应后缀的前缀,利用后缀数组.
求出sa和ht数组. 对于每个sa[i],它能和它本身以及后面的字符形成子串,如果固定sa[i]为子串的左端点,那么它能形成n-sa[i]个子串. 所有的子串加起来等于Σn - sa[i] = n*(n + 1) / 2.
这样统计会将某些子串重复统计. 因为ht数组计算的是排好序的两个相邻后缀的LCP,如果有重叠部分,那么一定是最大的.对于每一个sa[i],他会重复计算ht[i]个子串(固定了左端点嘛,这一段的右端点也是一样的,那么就会重复计算了).减掉就好了.
#include <cstdio> #include <cstring> #include <iostream> #include <algorithm> using namespace std; const int maxn = 2010; int n,ans,fir[maxn],sec[maxn],pos[maxn],sa[maxn],rk[maxn],tong[maxn],ht[maxn]; int sett[maxn],a[maxn],cnt,K,T; char s[maxn]; void solve() { int len = n; memset(rk,0,sizeof(rk)); memset(sa,0,sizeof(sa)); memset(ht,0,sizeof(ht)); memset(fir,0,sizeof(fir)); memset(sec,0,sizeof(sec)); memset(pos,0,sizeof(pos)); memset(tong,0,sizeof(tong)); copy(s + 1,s + len + 1,sett + 1); sort(sett + 1,sett + 1 + len); cnt = unique(sett + 1,sett + 1 + len) - sett - 1; for (int i = 1; i <= len; i++) a[i] = lower_bound(sett + 1,sett + 1 + cnt,s[i]) - sett; for (int i = 1; i <= len; i++) tong[a[i]]++; for (int i = 1; i <= len; i++) tong[i] += tong[i - 1]; for (int i = 1; i <= len; i++) rk[i] = tong[a[i] - 1] + 1; for (int t = 1; t <= len; t *= 2) { for (int i = 1; i <= len; i++) fir[i] = rk[i]; for (int i = 1; i <= len; i++) { if (i + t > len) sec[i] = 0; else sec[i] = rk[i + t]; } fill(tong,tong + 1 + len,0); for (int i = 1; i <= len; i++) tong[sec[i]]++; for (int i = 1; i <= len; i++) tong[i] += tong[i - 1]; for (int i = 1; i <= len; i++) pos[len - --tong[sec[i]]] = i; fill(tong,tong + 1 + len,0); for (int i = 1; i <= len; i++) tong[fir[i]]++; for (int i = 1; i <= len; i++) tong[i] += tong[i - 1]; for (int i = 1; i <= len; i++) { int temp = pos[i]; sa[tong[fir[temp]]--] = temp; } bool flag = true; int last = 0; for (int i = 1; i <= len; i++) { int temp = sa[i]; if (!last) rk[temp] = 1; else if (fir[temp] == fir[last] && sec[temp] == sec[last]) { rk[temp] = rk[last]; flag = false; } else rk[temp] = rk[last] + 1; last = temp; } if (flag) break; } int k = 0; for (int i = 1; i <= len; i++) { if (rk[i] == 1) k = 0; else { if (k) k--; int j = sa[rk[i] - 1]; while (i + k <= len && j + k <= len && a[i + k] == a[j + k]) k++; } ht[rk[i]] = k; } } int main() { scanf("%d",&T); while (T--) { scanf("%s",s + 1); n = strlen(s + 1); solve(); ans = n * (n + 1) / 2; for (int i = 1; i <= n; i++) ans -= ht[i]; printf("%d ",ans); } return 0; }