后缀数组
按照惯例,先上模板:
/* *suffix array *倍增算法 O(n*logn) *待排序数组长度为n,放在0~n-1中,在最后面补一个0 *build_sa( ,n+1, );//注意是n+1; *getHeight(,n); *例如: *n = 8; *num[] = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0,其他大于0 *rank[] = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值,rank[n]必定为0无效值 *sa[] = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值,sa[0]必定为n是无效值 *height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值 * */ int sa[MAXN];//SA数组,表示将S的n个后缀从小到大排序后把排好序的 //的后缀的开头位置顺次放入SA中 int t1[MAXN],t2[MAXN],c[MAXN];//求SA数组需要的中间变量,不需要赋值 int rank[MAXN],height[MAXN]; //待排序的字符串放在s数组中,从s[0]到s[n-1],长度为n,且最大值小于m, //除s[n-1]外的所有s[i]都大于0,r[n-1]=0 //函数结束以后结果放在sa数组中 void build_sa(int s[],int n,int m) { int i,j,p,*x=t1,*y=t2; //第一轮基数排序,如果s的最大值很大,可改为快速排序 for(i=0;i<m;i++)c[i]=0; for(i=0;i<n;i++)c[x[i]=s[i]]++; for(i=1;i<m;i++)c[i]+=c[i-1]; for(i=n-1;i>=0;i--)sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1) { p=0; //直接利用sa数组排序第二关键字 for(i=n-j;i<n;i++)y[p++]=i;//后面的j个数第二关键字为空的最小 for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j; //这样数组y保存的就是按照第二关键字排序的结果 //基数排序第一关键字 for(i=0;i<m;i++)c[i]=0; for(i=0;i<n;i++)c[x[y[i]]]++; for(i=1;i<m;i++)c[i]+=c[i-1]; for(i=n-1;i>=0;i--)sa[--c[x[y[i]]]]=y[i]; //根据sa和x数组计算新的x数组 swap(x,y); p=1;x[sa[0]]=0; for(i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n)break; m=p;//下次基数排序的最大值 } } void getHeight(int s[],int n) { int i,j,k=0; for(i=0;i<=n;i++)rank[sa[i]]=i; for(i=0;i<n;i++) { if(k)k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k])k++; height[rank[i]]=k; } }
参考资料:
(某神论文): http://wenku.baidu.com/link?url=en6joFZZqv55q8AfAh8abnHbSyE6lu7C5maUFR5fTTOSUvus-tdZazcFOMxwv9nv-DvxxkGhFzhtSo4gen2l0ozeCTVywD52-1nOjCMjZdq
height[i]=LCP(suf(sa[i]),suf(sa[i-1])).
1,询问两个子串的最长公共前缀。
由height数组的性质,LCP(suf(i),suf(j))=min(height[rank[i]]+1~height[rank[j]]). 转化为RMQ问题,o(nlogn)预处理,o(1)查询.
2,求最长可重叠的重复子串。
由于后缀数组按字典序构造,最长重复子串一定是名次相邻的,因此只需求height的最小值即可。
3,求最长不可重叠的重复子串长度。
二分查找长度,看长度是否符合条件,即不可重叠,sa[j]-sa[k]>len。
bool check(int k) { int Max=sa[1],Min=sa[1]; REP(i,2,n){ if(height[i]<k) Max=Min=sa[i]; else{ Max=max(sa[i],Max); Min=min(sa[i],Min); if(Max-Min>k) return 1; ///注意这里的>号而不是>= } } return 0; } int l=1,r=n/2; while(l<=r){ int m=(l+r)>>1; if(check(m)) ans=max(ans,m),l=m+1; else r=m-1; }
poj1743:
求某一变化出现多次的子串长度。
前后作差,转化为求最长不可重叠的重复子串长度。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); int a[maxn],n; int str[maxn]; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } bool check(int k) { int Max=sa[1],Min=sa[1]; REP(i,2,n){ if(height[i]<k) Max=Min=sa[i]; else{ Max=max(sa[i],Max); Min=min(sa[i],Min); if(Max-Min>k) return 1; } } return 0; } int main() { freopen("in.txt","r",stdin); while(cin>>n,n){ REP(i,1,n) RI(a[i]); REP(i,2,n) str[i-2]=a[i]-a[i-1]+100; n--; str[n]=0; build_sa(str,n+1,200); getHeight(str,n); int ans=0; int l=1,r=n/2; while(l<=r){ int m=(l+r)>>1; if(check(m)) ans=max(ans,m),l=m+1; else r=m-1; } if(ans+1>=5) cout<<ans+1<<endl; else cout<<0<<endl; } return 0; }
poj3261:
求至少出现k次的子串,可重叠。
二分查找长度,看长度是否符合条件即出现k次或以上。对height分组,是否有一组区间长度>=k-1(k-1个height对应k个后缀)。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); int n,k; int a[maxn]; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } bool check(int len) { int cnt=0; REP(i,2,n){ if(height[i]>=len) cnt++; else cnt=0; if(cnt+1>=k) return 1; } return 0; } int main() { freopen("in.txt","r",stdin); while(cin>>n>>k){ REP(i,0,n-1) RI(a[i]); a[n]=0; build_sa(a,n+1,1000010); getHeight(a,n); //REP(i,2,n) cout<<height[i]<<" ";cout<<endl; int ans=0; int l=1,r=n; while(l<=r){ int m=(l+r)>>1; if(check(m)) ans=max(m,ans),l=m+1; else r=m-1; } cout<<ans<<endl; } return 0; }
子串就是后缀的前缀,height数组就是LCP,后面的就是不同的了,因此只要求所有的n-sa-height的和就行了。详情见上面某神论文。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); char s[maxn]; int a[maxn],n; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } int main() { freopen("in.txt","r",stdin); DRI(T); while(T--){ scanf("%s",s); n=strlen(s); REP(i,0,n) a[i]=s[i]; build_sa(a,n+1,300); getHeight(a,n); ll ans=0; REP(i,1,n) ans+=n-sa[i]-height[i]; cout<<ans<<endl; } return 0; }
poj2406:
找字符串的循环节。
kmp水过。。。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); char s[maxn]; int Next[maxn]; void getNext(char *s,int n) { Next[0]=-1; int i=0,j=-1; while(i<n&&j<n){ if(j==-1||s[i]==s[j]) Next[++i]=++j; else j=Next[j]; } } int main() { freopen("in.txt","r",stdin); while(~RS(s)){ if(strcmp(s,".")==0) break; int n=strlen(s); getNext(s,n); if(n%(n-Next[n])==0) cout<<n/(n-Next[n])<<endl; else puts("1"); } return 0; }
后缀数组解法,很简单,枚举长度k,如果能构成循环,则LCP(suf(0),suf(k))==n-k。由于suf(0)是固定的,所以不需要用RMQ,预处理一下就行了。
然而倍增的nlogn预处理被卡了。。。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); char s[maxn]; int str[maxn],n; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; int LCP[maxn]; void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } int main() { freopen("in.txt","r",stdin); while(~RS(s)){ if(strcmp(s,".")==0) return 0; n=strlen(s); REP(i,0,n) str[i]=s[i]; build_sa(str,n+1,300); getHeight(str,n); int Min=height[rank[0]+1]; REP(i,rank[0]+1,n){ LCP[i]=Min=min(height[i],Min); } Min=height[rank[0]]; rep(i,rank[0],2){ LCP[i-1]=Min=min(height[i],Min); } int ans=1; REP(k,1,n/2){ if(n%k==0){ if(LCP[rank[k]]==n-k){ ans=n/k;break; } } } cout<<ans<<endl; } }
后缀数组+RMQ查询。不会ST,怒建一颗线段树!!!1890ms线段树暴力水过。。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); char s[maxn]; int str[maxn],n; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } int Min[maxn<<2]; #define lson l,m,rt<<1 #define rson m+1,r,rt<<1|1 void push_up(int rt) { Min[rt]=min(Min[rt<<1],Min[rt<<1|1]); } void build(int l,int r,int rt) { if(l==r){ Min[rt]=height[l]; return; } int m=(l+r)>>1; build(lson); build(rson); push_up(rt); } int query(int L,int R,int l,int r,int rt) { if(L<=l&&r<=R){ return Min[rt]; } int m=(l+r)>>1; int res=INF; if(L<=m) res=min(res,query(L,R,lson)); if(R>m) res=min(res,query(L,R,rson)); return res; } int lcp(int a,int b) { a=rank[a];b=rank[b]; //cout<<a<<" "<<b<<endl; if(a>b) swap(a,b); return query(a+1,b,1,n,1); } int main() { // freopen("in.txt","r",stdin); DRI(T); while(T--){ RI(n); REP(i,0,n-1) cin>>s[i]; REP(i,0,n-1) str[i]=s[i]; str[n]=0; build_sa(str,n+1,300); getHeight(str,n); build(1,n,1); int ans=1; REP(L,1,n){ for(int i=0;i+L<n;i+=L){ int K=0; if(str[i]==str[i+L]){ K=lcp(i,i+L); } int cnt=K/L+1; if(K%L){ int t=i-(L-K%L); //cout<<"t="<<t<<endl; if(t>=0&&lcp(t,t+L)>=K) cnt++; } //if(cnt>ans) cout<<"L="<<L<<endl; ans=max(ans,cnt); } } cout<<ans<<endl; } return 0; }
抄了个ST的模板,330ms,果然快了很多
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<vector> #include<stack> #include<queue> #include<set> #include<map> #include<string> #include<math.h> #include<cctype> #define ll long long #define REP(i,a,b) for(int (i)=(a);(i)<=(b);(i)++) #define REPP(i,a,b,t) for(int (i)=(a);(i)<=(b);(i)+=(t)) #define rep(i,a,b) for(int (i)=(a);(i)>=(b);(i)--) #define repp(i,a,b,t) for(int (i)=(a);(i)>=(b);(i)-=(t)) #define PII pair<int,int> #define fst first #define snd second #define MP make_pair #define PB push_back #define RI(x) scanf("%d",&(x)) #define RII(x,y) scanf("%d%d",&(x),&(y)) #define RIII(x,y,z) scanf("%d%d%d",&(x),&(y),&(z)) #define DRI(x) int (x);scanf("%d",&(x)) #define DRII(x,y) int (x),(y);scanf("%d%d",&(x),&(y)) #define DRIII(x,y,z) int (x),(y),(z);scanf("%d%d%d",&(x),&(y),&(z)) #define RS(x) scanf("%s",x) #define RSS(x,y) scanf("%s%s",x,y) #define DRS(x) char x[maxn];scanf("%s",x) #define DRSS(x,y) char x[maxn],y[maxn];scanf("%s%s",x,y) #define MS0(a) memset((a),0,sizeof((a))) #define MS1(a) memset((a),-1,sizeof((a))) #define MS(a,b) memset((a),(b),sizeof((a))) #define ALL(v) v.begin(),v.end() #define SZ(v) (int)(v).size() using namespace std; const int maxn=1000100; const int INF=(1<<29); const double EPS=0.0000000001; const double Pi=acos(-1.0); char s[maxn]; int str[maxn],n; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } int dp[maxn][20]; int mm[maxn]; void initRMQ(int n,int *b) { mm[0]=-1; REP(i,1,n){ mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1]; dp[i][0]=b[i]; } REP(j,1,mm[n]){ for(int i=1;i+(1<<j)-1<=n;i++){ dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]); } } } int rmq(int x,int y) { int k=mm[y-x+1]; return min(dp[x][k],dp[y-(1<<k)+1][k]); } int lcp(int a,int b) { a=rank[a];b=rank[b]; if(a>b) swap(a,b); return rmq(a+1,b); } int main() { // freopen("in.txt","r",stdin); DRI(T); while(T--){ RI(n); REP(i,0,n-1) cin>>s[i]; REP(i,0,n-1) str[i]=s[i]; str[n]=0; build_sa(str,n+1,300); getHeight(str,n); initRMQ(n,height); int ans=1; REP(L,1,n){ for(int i=0;i+L<n;i+=L){ int K=0; if(str[i]==str[i+L]){ K=lcp(i,i+L); } int cnt=K/L+1; if(K%L){ int t=i-(L-K%L); if(t>=0&&lcp(t,t+L)>=K) cnt++; } ans=max(ans,cnt); } } cout<<ans<<endl; } return 0; }
poj3693:
求字符串中具有循环节且循环次数最多的子串,输出字典序最小的。
找循环次数最多的方法和上面一样,关键是找字典序最小的,而字典序首先应该想到sa数组,因为sa数组就是后缀的字典序排名。方法是找到循环次数最大的所有可能的循环节长度,然后在sa数组中枚举lcp(sa[i],sa[i]+L)是否大于循环节*(循环次数-1),枚举到的第一个符合条件的一定是字典序最小的。
#include<iostream> #include<cstdio> #include<cstring> #include<cstdlib> #include<algorithm> #include<set> #define REP(i,a,b) for(int i=a;i<=b;i++) #define rep(i,a,b) for(int i=a;i>=b;i--) #define MS0(a) memset(a,0,sizeof(a)) using namespace std; const int maxn=1000100; char s[maxn]; int str[maxn],n; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; bool cmp(int *r,int a,int b,int l) { return r[a]==r[b] && r[a+l]==r[b+l]; } void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } int dp[maxn][20]; int mm[maxn]; void initRMQ(int n,int *b) { mm[0]=-1; REP(i,1,n){ mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1]; dp[i][0]=b[i]; } REP(j,1,mm[n]){ for(int i=1;i+(1<<j)-1<=n;i++) dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]); } } int rmq(int x,int y) { int k=mm[y-x+1]; return min(dp[x][k],dp[y-(1<<k)+1][k]); } int lcp(int a,int b) { a=rank[a];b=rank[b]; if(a>b) swap(a,b); return rmq(a+1,b); } int main() { freopen("in.txt","r",stdin); int casen=1; while(~scanf("%s",s)&&strcmp(s,"#")){ n=strlen(s); REP(i,0,n) str[i]=s[i]; build_sa(str,n+1,300); getHeight(str,n); initRMQ(n,height); int maxcnt=1; set<int> maxL; REP(L,1,n){ for(int i=0;i+L<n;i+=L){ int cnt=0,K=0; if(str[i]==str[i+L]){ K=lcp(i,i+L); cnt=K/L+1; if(K%L&&i-(L-K%L)>=0){ if(lcp(i-(L-K%L),i-(L-K%L)+L)>K) cnt++; } if(cnt>=maxcnt){ maxcnt=cnt; maxL.insert(L); } } } } printf("Case %d: ",casen++); if(maxcnt==1){ char ans='z'; REP(i,0,n-1){ if(s[i]<ans) ans=s[i]; } cout<<ans<<endl; } else{ bool flag=0; string ans=""; for(int i=1;i<=n;i++){ for(set<int>::iterator it=maxL.begin();it!=maxL.end();++it){ int L=*it; if(lcp(sa[i],sa[i]+L)>=L*maxcnt-L){ REP(k,sa[i],sa[i]+L*maxcnt-1) ans+=s[k]; flag=1;break; } } if(flag) break; } cout<<ans<<endl; } } return 0; }
poj2774:
求两个串的最长连续公共子串。
把两个串接在一起,中间加个'}',合成新串,再处理,本来加'#'的,居然WA了。。
一开始的思路是对长度进行二分,复杂度nlogn,因为如果直接找height数组的话不一定有两个排名相邻的串是原来在两个串中的,然而完全没有必要,因为一定至少存在排名相邻的两个串在不同的原串中,因此只要扫一遍height数组就可以了,复杂度n。
二分解法:
#include<iostream> #include<cstdio> #include<cstdlib> #include<cstring> #include<algorithm> #define REP(i,a,b) for(int i=a;i<=b;i++) #define rep(i,a,b) for(int i=a;i>=b;i--) #define MS0(a) memset(a,0,sizeof(a)) using namespace std; const int maxn=1000100; char s[maxn],t[maxn]; int str[maxn],n; int sa[maxn],height[maxn],rank[maxn]; int t1[maxn],t2[maxn],c[maxn]; int ls,lt; bool cmp(int *r,int a,int b,int l) { return r[a]==r[b]&&r[a+l]==r[b+l]; } void build_sa(int *s,int n,int m) { int i,j,p,*x=t1,*y=t2; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[i]=s[i]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[i]]]=i; for(j=1;j<=n;j<<=1){ p=0; REP(i,n-j,n-1) y[p++]=i; REP(i,0,n-1) if(sa[i]>=j) y[p++]=sa[i]-j; REP(i,0,m-1) c[i]=0; REP(i,0,n-1) c[x[y[i]]]++; REP(i,1,m-1) c[i]+=c[i-1]; rep(i,n-1,0) sa[--c[x[y[i]]]]=y[i]; swap(x,y); p=1;x[sa[0]]=0; REP(i,1,n-1) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++; if(p>=n) break; m=p; } } void getHeight(int *s,int n) { int i,j,k=0; REP(i,0,n) rank[sa[i]]=i; REP(i,0,n-1){ if(k) k--; j=sa[rank[i]-1]; while(s[i+k]==s[j+k]) k++; height[rank[i]]=k; } } bool check(int k) { int Max=sa[1],Min=sa[1]; REP(i,2,n){ if(height[i]>=k){ Max=max(sa[i],Max); Min=min(sa[i],Min); } else{ if(Max>ls&&Min<ls) return 1; Max=sa[i]; Min=sa[i]; } } return 0; } int main() { freopen("in.txt","r",stdin); while(~scanf("%s%s",s,t)){ ls=strlen(s); lt=strlen(t); s[ls]='}';s[ls+1]='