前言:最近在按照罗穗骞的《后缀数组——处理字符串的有力工具》论文学习后缀数组,我打算将论文中出现的例题自己打一遍,作为部分模板。
后缀数组
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<cstdlib> 5 #include<algorithm> 6 #include<cmath> 7 #define max_n 100005 8 using namespace std; 9 10 char s[max_n]; 11 int t1[max_n],t2[max_n],cc[max_n],x[max_n],sa[max_n],rank[max_n],height[max_n]; 12 int len; 13 inline bool cmp(int *y,int a,int b,int k) 14 { 15 int arank1=y[a]; 16 int brank1=y[b]; 17 int arank2=a+k>=len?-1:y[a+k]; 18 int brank2=b+k>=len?-1:y[b+k]; 19 return arank1==brank1&&arank2==brank2; 20 } 21 inline void make_sa() 22 { 23 int *x=t1,*y=t2; 24 int m=26; 25 for (int i=0;i<m;i++) cc[i]=0; 26 for (int i=0;i<len;i++) ++cc[x[i]=s[i]-'a']; 27 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 28 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 29 for (int k=1;k<len;k<<=1) 30 { 31 int p=0; 32 for (int i=len-k;i<len;i++) y[p++]=i; 33 for (int i=0;i<len;i++) 34 if (sa[i]>=k) y[p++]=sa[i]-k; 35 for (int i=0;i<m;i++) cc[i]=0; 36 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 37 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 38 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 39 swap(x,y); 40 m=1; x[sa[0]]=0; 41 for (int i=1;i<len;i++) 42 x[sa[i]]=cmp(y,sa[i],sa[i-1],k)?m-1:m++; 43 if (m>=len) break; 44 } 45 } 46 inline void make_height() 47 { 48 for (int i=0;i<len;i++) rank[sa[i]]=i; 49 height[0]=0; 50 int k=0; 51 for (int i=0;i<len;i++) 52 { 53 if (!rank[i]) continue; 54 int j=sa[rank[i]-1]; 55 if (k) k--; 56 while (s[i+k]==s[j+k]) k++; 57 height[rank[i]]=k; 58 } 59 } 60 int main() 61 { 62 scanf("%s",s); 63 len=strlen(s); 64 make_sa(); 65 make_height(); 66 for (int i=0;i<len;i++) printf("%s %d ",s+sa[i],height[i]); cout<<endl; 67 }
求两个后缀的最长公共前缀(LCP)
1 #include<iostream> 2 #include<cstdio> 3 #include<cstdlib> 4 #include<algorithm> 5 #include<cmath> 6 #include<cstring> 7 #define N 100005 8 using namespace std; 9 int len,sum_ask; 10 int t1[N],t2[N],sa[N],cc[N],height[N],rank[N],f[N][20]; 11 char s[N]; 12 inline int read() 13 { 14 int a=0,f=1; char c=getchar(); 15 while (c<'0'||c>'9') {if (c=='-') f=-1; c=getchar();} 16 while (c>='0'&&c<='9') {a=a*10+c-'0'; c=getchar();} 17 return a*f; 18 } 19 inline bool cmp(int *y,int a,int b,int k) 20 { 21 int arank1=y[a]; 22 int brank1=y[b]; 23 int arank2=a+k>=len?-1:y[a+k]; 24 int brank2=b+k>=len?-1:y[b+k]; 25 return arank1==brank1&&arank2==brank2; 26 } 27 inline void make_sa() 28 { 29 int *x=t1,*y=t2,m=26; 30 for (int i=0;i<m;i++) cc[i]=0; 31 for (int i=0;i<len;i++) cc[x[i]=s[i]-'a']++; 32 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 33 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 34 for (int k=1;k<len;k<<=1) 35 { 36 int p=0; 37 for (int i=len-k;i<len;i++) y[p++]=i; 38 for (int i=0;i<len;i++) 39 if (sa[i]>=k) y[p++]=sa[i]-k; 40 for (int i=0;i<m;i++) cc[i]=0; 41 for (int i=0;i<len;i++) cc[x[y[i]]]++; 42 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 43 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 44 swap(x,y); 45 m=1; x[sa[0]]=0; 46 for (int i=1;i<len;i++) 47 x[sa[i]]=cmp(y,sa[i],sa[i-1],k)?m-1:m++; 48 if (m>=len) break; 49 } 50 } 51 inline void make_height() 52 { 53 for (int i=0;i<len;i++) rank[sa[i]]=i; 54 height[0]=0; int k=0; 55 for (int i=0;i<len;i++) 56 { 57 if (!rank[i]) continue; 58 int j=sa[rank[i]-1]; 59 if (k) k--; 60 while (s[i+k]==s[j+k]) k++; 61 height[rank[i]]=k; 62 } 63 } 64 inline void make_st() 65 { 66 for (int i=0;i<len;i++) f[i][0]=height[i]; 67 for (int j=1;j<20;j++) 68 for (int i=0;i<len;i++) 69 if (i+(1<<j)-1<len) 70 f[i][j]=min(f[i][j-1],f[i+(1<<(j-1))][j-1]); 71 } 72 int main() 73 { 74 scanf("%s",s); 75 len=strlen(s); 76 make_sa(); 77 make_height(); 78 make_st(); 79 scanf("%d",&sum_ask); 80 while (sum_ask--) 81 { 82 int l=read(),r=read(); 83 l=rank[l]; r=rank[r]; 84 if (l>r) swap(l,r); 85 int k=log(r-l)/log(2); 86 printf("%d ",min(f[l+1][k],f[r-(1<<k)+1][k])); 87 } 88 }
可重叠最长重复子串
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<cstdlib> 5 #include<algorithm> 6 #include<cmath> 7 #include<vector> 8 #define N 100005 9 using namespace std; 10 int len; 11 int cc[N],t1[N],t2[N],height[N],rank[N],sa[N]; 12 char s[N]; 13 inline bool cmp(int *y,int a,int b,int k) 14 { 15 int arank1=y[a]; 16 int brank1=y[b]; 17 int arank2=a+k>=len?-1:y[a+k]; 18 int brank2=b+k>=len?-1:y[b+k]; 19 return arank1==brank1&&arank2==brank2; 20 } 21 inline void make_sa() 22 { 23 int m=26; 24 int *x=t1,*y=t2; 25 for (int i=0;i<m;i++) cc[i]=0; 26 for (int i=0;i<len;i++) ++cc[x[i]=s[i]-'a']; 27 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 28 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 29 for (int k=1;k<len;k<<=1) 30 { 31 int p=0; 32 for (int i=len-k;i<len;i++) y[p++]=i; 33 for (int i=0;i<len;i++) 34 if (sa[i]>=k) y[p++]=sa[i]-k; 35 for (int i=0;i<m;i++) cc[i]=0; 36 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 37 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 38 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 39 swap(x,y); 40 m=1; x[sa[0]]=0; 41 for (int i=1;i<len;i++) 42 x[sa[i]]=cmp(y,sa[i],sa[i-1],k)?m-1:m++; 43 if (m>=len) break; 44 } 45 } 46 inline void make_height() 47 { 48 for (int i=0;i<len;i++) rank[sa[i]]=i; 49 int k=0; height[0]=0; 50 for (int i=0;i<len;i++) 51 { 52 if (!rank[i]) continue; 53 if (k) k--; 54 int j=sa[rank[i]-1]; 55 while (s[i+k]==s[j+k]) k++; 56 height[rank[i]]=k; 57 } 58 } 59 int main() 60 { 61 scanf("%s",s); 62 len=strlen(s); 63 make_sa(); 64 make_height(); 65 printf("%d",*max_element(height,height+len)); 66 }
不可重叠最长重复子串
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<algorithm> 5 #include<cstdlib> 6 #include<cmath> 7 #define N 100005 8 using namespace std; 9 int cc[N],sa[N],t1[N],t2[N],rank[N],height[N]; 10 char s[N]; 11 int len; 12 inline bool cmp(int *y,int a,int b,int k) 13 { 14 int arank1=y[a]; 15 int brank1=y[b]; 16 int arank2=a+k>=len?-1:y[a+k]; 17 int brank2=b+k>=len?-1:y[b+k]; 18 return arank1==brank1&&arank2==brank2; 19 } 20 inline void make_sa() 21 { 22 int m=26; 23 int *x=t1,*y=t2; 24 for (int i=0;i<m;i++) cc[i]=0; 25 for (int i=0;i<len;i++) ++cc[x[i]=s[i]-'a']; 26 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 27 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 28 for (int k=1;k<len;k<<=1) 29 { 30 int p=0; 31 for (int i=len-k;i<len;i++) y[p++]=i; 32 for (int i=0;i<len;i++) 33 if (sa[i]>=k) y[p++]=sa[i]-k; 34 for (int i=0;i<m;i++) cc[i]=0; 35 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 36 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 37 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 38 swap(x,y); 39 int m=1; x[sa[0]]=0; 40 for (int i=1;i<len;i++) 41 x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?m-1:m++; 42 if (m>=len) break; 43 } 44 } 45 inline void make_height() 46 { 47 for (int i=0;i<len;i++) rank[sa[i]]=i; 48 int k=0; height[0]=0; 49 for (int i=0;i<len;i++) 50 { 51 if (!rank[i]) continue; 52 int j=sa[rank[i]-1]; 53 if (k) k--; 54 while (s[i+k]==s[j+k]) k++; 55 height[rank[i]]=k; 56 } 57 } 58 inline bool judge(int x) 59 { 60 int mn=sa[0],mx=sa[0]; 61 for (int i=1;i<len;i++) 62 if (height[i]<x) mn=mx=sa[i]; 63 else 64 { 65 mn=min(mn,sa[i]); 66 mx=max(mx,sa[i]); 67 if (mx-mn>=x) return 1; 68 } 69 return 0; 70 } 71 int main() 72 { 73 scanf("%s",s); 74 len=strlen(s); 75 make_sa(); 76 make_height(); 77 int l=0,r=len/2; 78 while (l<=r) 79 { 80 int mid=(l+r)>>1; 81 if (judge(mid)) l=mid+1; else r=mid-1; 82 } 83 printf("%d",r); 84 return 0; 85 }
可重叠的k次最长重复子串
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<cstdlib> 5 #include<algorithm> 6 #include<cmath> 7 #define N 100005 8 using namespace std; 9 int sa[N],cc[N],rank[N],height[N],t1[N],t2[N]; 10 char s[N]; 11 int k,len; 12 inline bool cmp(int *y,int a,int b,int k) 13 { 14 int arank1=y[a]; 15 int brank1=y[b]; 16 int arank2=a+k>=len?-1:y[a+k]; 17 int brank2=b+k>=len?-1:y[b+k]; 18 return arank1==brank1&&arank2==brank2; 19 } 20 inline void make_sa() 21 { 22 int m=26; 23 int *x=t1,*y=t2; 24 for (int i=0;i<m;i++) cc[i]=0; 25 for (int i=0;i<len;i++) ++cc[x[i]=s[i]-'a']; 26 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 27 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 28 for (int k=1;k<len;k<<=1) 29 { 30 int p=0; 31 for (int i=len-k;i<len;i++) y[p++]=i; 32 for (int i=0;i<len;i++) 33 if (sa[i]-k>=0) y[p++]=sa[i]-k; 34 for (int i=0;i<m;i++) cc[i]=0; 35 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 36 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 37 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 38 swap(x,y); 39 m=1; x[sa[0]]=0; 40 for (int i=1;i<len;i++) 41 x[sa[1]]=cmp(y,sa[i],sa[i-1],k)?m-1:m++; 42 if (m>=len) break; 43 } 44 } 45 inline void make_height() 46 { 47 for (int i=0;i<len;i++) rank[sa[i]]=i; 48 int k=0; height[0]=0; 49 for (int i=0;i<len;i++) 50 { 51 if (!rank[i]) continue; 52 int j=sa[rank[i]-1]; 53 if (k) k--; 54 while (s[i+k]==s[j+k]) k++; 55 height[rank[i]]=k; 56 } 57 } 58 inline bool judge(int x) 59 { 60 int nowl=1; 61 for (int i=1;i<len;i++) 62 if (height[i]<x) nowl=1; 63 else 64 { 65 nowl++; 66 if (nowl>=k) return 1; 67 } 68 return 0; 69 } 70 int main() 71 { 72 scanf("%s",s); 73 scanf("%d",&k); 74 len=strlen(s); 75 make_sa(); 76 make_height(); 77 int l=0,r=len; 78 while (l<=r) 79 { 80 int mid=(l+r)>>1; 81 if (judge(mid)) l=mid+1; else r=mid-1; 82 } 83 printf("%d",r); 84 return 0; 85 }
不相同的子串的个数
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<cstdlib> 5 #include<algorithm> 6 #include<cmath> 7 #define N 100005 8 using namespace std; 9 int sa[N],cc[N],t1[N],t2[N],rank[N],height[N]; 10 int len,ans; 11 char s[N]; 12 inline bool cmp(int *y,int a,int b,int k) 13 { 14 int arank1=y[a]; 15 int brank1=y[b]; 16 int arank2=a+k>=len?-1:y[a+k]; 17 int brank2=b+k>=len?-1:y[b+k]; 18 return arank1==brank1&&arank2==brank2; 19 } 20 inline void make_sa() 21 { 22 int m=26; 23 int *x=t1,*y=t2; 24 for (int i=0;i<m;i++) cc[i]=0; 25 for (int i=0;i<len;i++) ++cc[x[i]=s[i]-'a']; 26 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 27 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 28 for (int k=1;k<len;k<<=1) 29 { 30 int p=0; 31 for (int i=len-k;i<len;i++) y[p++]=i; 32 for (int i=0;i<len;i++) 33 if (sa[i]>=k) y[p++]=sa[i]-k; 34 for (int i=0;i<m;i++) cc[i]=0; 35 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 36 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 37 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 38 swap(x,y); 39 m=1; x[sa[0]]=0; 40 for (int i=1;i<len;i++) 41 x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?m-1:m++; 42 if (m>=len) break; 43 } 44 } 45 inline void make_height() 46 { 47 for (int i=0;i<len;i++) rank[sa[i]]=i; 48 int k=0; height[0]=0; 49 for (int i=0;i<len;i++) 50 { 51 if (!rank[i]) continue; 52 int j=sa[rank[i]-1]; 53 if (k) k--; 54 while (s[i+k]==s[j+k]) k++; 55 height[rank[i]]=k; 56 } 57 } 58 int main() 59 { 60 scanf("%s",s); 61 len=strlen(s); 62 make_sa(); 63 make_height(); 64 for (int i=0;i<len;i++) ans+=len-sa[i]-height[i]; 65 printf("%d",ans); 66 return 0; 67 }
最长回文子串
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<algorithm> 5 #include<cmath> 6 #include<cstdlib> 7 #define N 100005 8 using namespace std; 9 int sa[N],cc[N],rank[N],height[N],t1[N],t2[N],f[N][20]; 10 int len; 11 char s[N]; 12 inline bool cmp(int *y,int a,int b,int k) 13 { 14 int arank1=y[a]; 15 int brank1=y[b]; 16 int arank2=a+k>=len?-1:y[a+k]; 17 int brank2=b+k>=len?-1:y[b+k]; 18 return arank1==brank1&&arank2==brank2; 19 } 20 inline void make_sa() 21 { 22 int m=256; 23 int *x=t1,*y=t2; 24 for (int i=0;i<m;i++) cc[i]=0; 25 for (int i=0;i<len;i++) ++cc[x[i]=s[i]]; 26 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 27 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 28 for (int k=1;k<len;k<<=1) 29 { 30 int p=0; 31 for (int i=len-k;i<len;i++) y[p++]=i; 32 for (int i=0;i<len;i++) 33 if (sa[i]>=k) y[p++]=sa[i]-k; 34 for (int i=0;i<m;i++) cc[i]=0; 35 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 36 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 37 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 38 swap(x,y); 39 m=1; x[sa[0]]=0; 40 for (int i=1;i<len;i++) 41 x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?m-1:m++; 42 if (m>=len) break; 43 } 44 } 45 inline void make_height() 46 { 47 for (int i=0;i<len;i++) rank[sa[i]]=i; 48 int k=0; height[0]=0; 49 for (int i=0;i<len;i++) 50 { 51 if (!rank[i]) continue; 52 int j=sa[rank[i]-1]; 53 if (k) k--; 54 while (s[i+k]==s[j+k]) k++; 55 height[rank[i]]=k; 56 } 57 } 58 inline void make_st() 59 { 60 for (int i=0;i<len;i++) f[i][0]=height[i]; 61 for (int j=1;j<20;j++) 62 for (int i=0;i<len;i++) 63 if (i+(1<<j)-1<len) 64 f[i][j]=min(f[i][j-1],f[i+(1<<(j-1))][j-1]); 65 } 66 inline int lcp(int l,int r) 67 { 68 l=rank[l]; r=rank[r]; 69 if (l>r) swap(l,r); 70 int k=log(r-l)/log(2); 71 return min(f[l+1][k],f[r-(1<<k)+1][k]); 72 } 73 int main() 74 { 75 scanf("%s",s); 76 len=strlen(s); 77 s[len]='#'; 78 for (int i=len+1;i<=2*len;i++) s[i]=s[2*len-i]; 79 len=len*2+1; 80 make_sa(); 81 make_height(); 82 make_st(); 83 int LCP,pos=-1,ans=-1; 84 len--; 85 for (int i=0;i<len;i++) 86 { 87 LCP=lcp(i,len-i); 88 if (LCP*2-1>ans) {ans=LCP*2-1; pos=i-LCP+1;} 89 LCP=lcp(i,len-i+1); 90 if (LCP*2>ans) {ans=LCP*2; pos=i-LCP;} 91 } 92 for (int i=pos;i<pos+ans;i++) putchar(s[i]); 93 }
连续重复子串
1 #include<iostream> 2 #include<cstdio> 3 #include<cstring> 4 #include<cstdlib> 5 #include<algorithm> 6 #include<cmath> 7 #define N 100005 8 using namespace std; 9 char s[N]; 10 int cc[N],sa[N],height[N],rank[N],t1[N],t2[N],f[N][20]; 11 int len; 12 inline bool cmp(int *y,int a,int b,int k) 13 { 14 int arank1=y[a]; 15 int brank1=y[b]; 16 int arank2=a+k>=len?-1:y[a+k]; 17 int brank2=b+k>=len?-1:y[b+k]; 18 return arank1==brank1&&arank2==brank2; 19 } 20 inline void make_sa() 21 { 22 int m=26; 23 int *x=t1,*y=t2; 24 for (int i=0;i<m;i++) cc[i]=0; 25 for (int i=0;i<len;i++) ++cc[x[i]=s[i]-'a']; 26 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 27 for (int i=len-1;i>=0;i--) sa[--cc[x[i]]]=i; 28 for (int k=1;k<len;k<<=1) 29 { 30 int p=0; 31 for (int i=len-k;i<len;i++) y[p++]=i; 32 for (int i=0;i<len;i++) 33 if (sa[i]>=k) y[p++]=sa[i]-k; 34 for (int i=0;i<m;i++) cc[i]=0; 35 for (int i=0;i<len;i++) ++cc[x[y[i]]]; 36 for (int i=1;i<m;i++) cc[i]+=cc[i-1]; 37 for (int i=len-1;i>=0;i--) sa[--cc[x[y[i]]]]=y[i]; 38 swap(x,y); 39 m=1; x[sa[0]]=0; 40 for (int i=1;i<len;i++) 41 x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?m-1:m++; 42 if (m>=len) break; 43 } 44 } 45 inline void make_height() 46 { 47 for (int i=0;i<len;i++) rank[sa[i]]=i; 48 int k=0; height[0]=0; 49 for (int i=0;i<len;i++) 50 { 51 if (!rank[i]) continue; 52 int j=sa[rank[i]-1]; 53 if (k) k--; 54 while (s[i+k]==s[j+k]) k++; 55 height[rank[i]]=k; 56 } 57 } 58 inline void make_st() 59 { 60 for (int i=0;i<len;i++) f[i][0]=height[i]; 61 for (int j=1;j<20;j++) 62 for (int i=0;i<len;i++) 63 if (i+(1<<j)-1<len) 64 f[i][j]=min(f[i][j-1],f[i+(1<<(j-1))][j-1]); 65 } 66 inline int lcp(int l,int r) 67 { 68 l=rank[l]; r=rank[r]; 69 if (l>r) swap(l,r); 70 int k=log(r-l)/log(2); 71 return min(f[l+1][k],f[r-(1<<k)+1][k]); 72 } 73 int main() 74 { 75 scanf("%s",s); 76 len=strlen(s); 77 make_sa(); 78 make_height(); 79 make_st(); 80 int ans=0; 81 for (int i=1;i<len;i++) 82 if (len%i==0) if (lcp(0,i)==len-i) ans=i; 83 printf("%d",ans); 84 }