• 后缀数组练习题


     
    Milk Patterns
    Time Limit: 5000MS   Memory Limit: 65536K
    Total Submissions: 17079   Accepted: 7553
    Case Time Limit: 2000MS

    Description

    Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can't predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.

    To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow over N (1 ≤ N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤ KN) times. This may include overlapping patterns -- 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.

    Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at least K times.

    Input

    Line 1: Two space-separated integers: N and K
    Lines 2..N+1: N integers, one per line, the quality of the milk on day i appears on the ith line.

    Output

    Line 1: One integer, the length of the longest pattern which occurs at least K times

    Sample Input

    8 2
    1
    2
    3
    2
    3
    2
    3
    1

    Sample Output

    4

    求最长可重复至少出现k次的子串长度

    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    using namespace std;
    const int N=1e6+88;
    const int M=2e4+88;
    int wa[N],wv[N],ws[N];
    int sa[M],rank[M],wb[M],height[M],num[M];
    bool cmp(int *r,int a,int b,int l){
        return r[a]==r[b]&&r[a+l]==r[b+l];
    }
    void da(int *r,int n,int m){
        int *x=wa,*y=wb;
        for(int i=0;i<m;++i) ws[i]=0;
        for(int i=0;i<n;++i) ++ws[x[i]=r[i]];
        for(int i=1;i<m;++i) ws[i]+=ws[i-1];
        for(int i=0;i<n;++i) sa[--ws[x[i]]]=i;
        int p=1;
        for(int j=1;p<n;j<<=1,m=p) {
            p=0;
            for(int i=n-j;i<n;++i) y[p++]=i;
            for(int i=0;i<n;++i) if(sa[i]>=j) y[p++]=sa[i]-j;
            for(int i=0;i<n;++i) wv[i]=x[y[i]];
            for(int i=0;i<m;++i) ws[i]=0;
            for(int i=0;i<n;++i) ++ws[wv[i]];
            for(int i=1;i<m;++i) ws[i]+=ws[i-1];
            for(int i=n-1;i>=0;--i) sa[--ws[wv[i]]]=y[i];
            swap(x,y),x[sa[0]]=0,p=1;
            for(int i=1;i<n;++i) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        }
        for(int i=1;i<n;++i) rank[sa[i]]=i;
        int k=0;
        for(int i=0;i<n-1;height[rank[i++]]=k){
            if(k) --k;
            for(int j=sa[rank[i]-1];r[i+k]==r[j+k];++k);
        }
    }
    bool Ju(int k,int lim,int ci){
        int cnt=0;
        for(int i=2;i<=lim;++i) 
        {
        if(height[i]>=k) ++cnt;
        else cnt=0;
        if(cnt>=ci) return true;
        }
        return false;
    }
    int main(){
        int ans,l,r,n,k,x,maxx=0;
        scanf("%d%d",&n,&k);
        for(int i=0;i<n;++i) {
            scanf("%d",num+i);
            ++num[i];
            maxx=max(maxx,num[i]);
        }
        num[n]=0;
        da(num,n+1,maxx+1);
        l=1,r=n;
        while(l<=r) {
            int mid=(l+r)>>1;
            if(Ju(mid,n,k-1)) ans=mid,l=mid+1;
            else r=mid-1;
        }
        printf("%d
    ",ans);
    }

    SPOJ SUBST1

    Given a string, we need to find the total number of its distinct substrings.

    Input

    T- number of test cases. T<=20; Each test case consists of one string, whose length is <= 50000

    Output

    For each test case output one number saying the number of distinct substrings.

    Example

    Input:
    2
    CCCCC
    ABABA
    
    Output:
    5
    9
    

    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    using namespace std;
    const int N=5e4+88;
    int wa[N],sa[N],rank[N],height[N],wb[N],wv[N];
    int ws[N],n,T;
    char s[N];
    bool cmp(int *r,int a,int b,int l){
        return r[a]==r[b]&&r[a+l]==r[b+l];
    }
    void da(char *r,int n,int m){
        int *x=wa,*y=wb;
        for(int i=0;i<m;++i) ws[i]=0;
        for(int i=0;i<n;++i) ++ws[x[i]=(r[i]-'A'+1)];
        for(int i=1;i<m;++i) ws[i]+=ws[i-1];
        for(int i=0;i<n;++i) sa[--ws[x[i]]]=i;
        int p=1;
        for(int j=1;p<n;j<<=1,m=p){
            p=0;
            for(int i=n-j;i<n;++i) y[p++]=i;
            for(int i=0;i<n;++i) if(sa[i]>=j) y[p++]=sa[i]-j;
            for(int i=0;i<n;++i) wv[i]=x[y[i]];
            for(int i=0;i<m;++i) ws[i]=0;
            for(int i=0;i<n;++i) ++ws[wv[i]];
            for(int i=1;i<m;++i) ws[i]+=ws[i-1];
            for(int i=n-1;i>=0;--i) sa[--ws[wv[i]]]=y[i];
            swap(x,y),x[sa[0]]=0,p=1;
            for(int i=1;i<n;++i) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        }
        for(int i=1;i<n;++i) rank[sa[i]]=i;
        int k=0;
        for(int i=0;i<n-1;height[rank[i++]]=k){
            if(k) --k;
            for(int j=sa[rank[i]-1];r[i+k]==r[j+k];++k) ;
        }    
    }
    int main(){
        for(scanf("%d",&T);T--;){
            scanf("%s",s);
            n=strlen(s);
            s[n]='A'-1;
            da(s,n+1,199);
            long long ans=0;
            for(int i=1;i<=n;++i) ans+=n-sa[i]-height[i];
            printf("%lld
    ",ans);
        }
    }

     URAL 1297 

    最长回文字串

    马拉车做法

    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    using namespace std;
    const int N=1008;
    char s[N];
    int p[N],n;
    int main(){
        scanf("%s",s+1);
        n=strlen(s+1);
        s[0]='-',s[n+1]='+';
        int mx=0,id=0,n1,n2,m1=-1,m2=-1;
        for(int i=1;i<=n;++i) {
            if(mx>i) p[i]=min(mx-i,p[2*id-i]);else p[i]=1;
            while(s[i-p[i]]==s[i+p[i]]) ++p[i];
            if(i+p[i]>mx) mx=i+p[i],id=i;
            if(p[i]>m1) m1=p[i],n1=i;
        }
        mx=id=0;
        for(int i=1;i<=n;++i) {
            if(mx>i) p[i]=min(mx-i,p[2*id-i]);else p[i]=0;
            while(s[i-p[i]]==s[i+p[i]+1]) ++p[i];
            if(i+p[i]>mx) mx=i+p[i],id=i;
            if(p[i]&&p[i]>m2) m2=p[i],n2=i;
        }
        if(m1>m2) {
            for(int i=n1-m1+1;i<=n1+m1-1;++i) putchar(s[i]);
            puts("");
        }
        else {
            for(int i=n2-m2+1;i<=n2+m2;++i) putchar(s[i]);
            puts("");
        }
    }

     后缀数组法

    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    const int N=2111;
    int r[N],wa[N],wb[N],wv[N],tmp[N],sa[N];
    int rank[N],height[N];
    int cmp(int *r,int a,int b,int l){
        return r[a]==r[b]&&r[a+l]==r[b+l];
    }
    void da(int *r,int n,int m){
        int i,j,p,*x=wa,*y=wb,*ws=tmp;
        for(i=0;i<m;++i) ws[i]=0;
        for(i=0;i<n;++i) ++ws[x[i]=r[i]];
        for(i=1;i<m;++i) ws[i]+=ws[i-1];
        for(i=0;i<n;++i) sa[--ws[x[i]]]=i;
        for(j=1,p=1;p<n;j<<=1,m=p){
            for(p=0,i=n-j;i<n;++i) y[p++]=i;
            for(i=0;i<n;++i) if(sa[i]>=j) y[p++]=sa[i]-j;
            for(i=0;i<n;++i) wv[i]=x[y[i]];
            for(i=0;i<m;++i) ws[i]=0;
            for(i=0;i<n;++i) ++ws[wv[i]];
            for(i=1;i<m;++i) ws[i]+=ws[i-1];
            for(i=n-1;i>=0;--i) sa[--ws[wv[i]]]=y[i];
            std::swap(x,y),p=1,x[sa[0]]=0;
            for(i=1;i<n;++i) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        }
        for(i=1;i<n;++i) rank[sa[i]]=i;
        int k=0;
        for(i=0;i<n-1;height[rank[i++]]=k) 
        {
            if(k) --k;
            for(j=sa[rank[i]-1];r[i+k]==r[j+k];++k);
        }
    }
    int Log[N],mi[N][20];
    void rmq(int n){
        for(int i=1;i<=n;++i) mi[i][0]=height[i];
        int m=Log[n];
        for(int i=1;i<=m;++i) for(int j=1;j<=n;++j) {
            mi[j][i]=mi[j][i-1];
            if(j+(1<<(i-1))<=n) mi[j][i]=std::min(mi[j][i],mi[j+(1<<(i-1))][i-1]);
        }
    }
    int lcp(int a,int b){
        a=rank[a],b=rank[b];
        if(a>b) std::swap(a,b);
        ++a;
        int t=Log[b-a+1];
        return std::min(mi[a][t],mi[b-(1<<t)+1][t]);
    }
    char s[N<<1];
    int main(){
        Log[1]=0;
        for(int i=2;i<N;++i) Log[i]=Log[i>>1]+1;
        scanf("%s",s);
        int len=strlen(s);
        for(int i=0;i<len;++i) r[i]=(int)s[i];
        r[len]=128;
        for(int i=0;i<len;++i) r[len+1+i]=(int)s[len-1-i];
        int n=2*len+1;
        r[n]=0;
        da(r,n+1,130);
        rmq(n);
        int ans=0,pos;
        for(int i=0;i<len;++i) {
            int tmp=lcp(i,n-i-1);
            if(tmp*2-1>ans) {
                ans=tmp*2-1;
                pos=i-tmp+1;
            }
            tmp=lcp(i,n-i);
            if(tmp*2>ans) {
                ans=tmp*2;
                pos=i-tmp;
            }
        }
        for(int i=0;i<ans;++i) putchar(s[pos+i]);
        puts("");
    }

    POJ 2406  

    给定一个字符串S,已知该串是由某串重复K次 连接得到的。

    求最大的k

    kmp做法

    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    const int N=1e6+88;
    char s[N];
    int len,nxt[N];
    void get(){
        int i=0,j=-1;
        nxt[0]=-1;
        while(i<len) {
            if(j==-1||s[i]==s[j]) nxt[++i]=++j;
            else j=nxt[j];
        }
    }
    int main(){
        while(scanf("%s",s),strcmp(s,".")){
        len=strlen(s);
        get();
        int mi=len-nxt[len];
        if(len%mi==0) printf("%d
    ",len/mi);
        else puts("1");
       }
    }

     DC3极限卡过。。。思想和kmp一样

    #include<cstdio>
    #include<cstdlib>
    #include<cstring>
    #include<cmath>
    #include<vector>
    #include<algorithm>
    using namespace std;
    const int maxn = int(3e6)+10;
    #define F(x) ((x)/3+((x)%3==1?0:tb))
    #define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)
    int wa[maxn],wb[maxn],wv[maxn],ws[maxn],sa[maxn];
    int c0(int *r,int a,int b)
    {return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];}
    int c12(int k,int *r,int a,int b)
    {if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1);
    else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];}
    void sort(int *r,int *a,int *b,int n,int m)
    {
        int i;
        for(i=0;i<n;i++) wv[i]=r[a[i]];
        for(i=0;i<m;i++) ws[i]=0;
        for(i=0;i<n;i++) ws[wv[i]]++;
        for(i=1;i<m;i++) ws[i]+=ws[i-1];
        for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i];
    }
    void dc3(int *r,int *sa,int n,int m) //涵义与DA 相同
    {
        int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
        r[n]=r[n+1]=0;
        for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i;
        sort(r+2,wa,wb,tbc,m);
        sort(r+1,wb,wa,tbc,m);
        sort(r,wa,wb,tbc,m);
        for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++)
        rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
        if(p<tbc) dc3(rn,san,tbc,p);
        else for(i=0;i<tbc;i++) san[rn[i]]=i;
        for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3;
        if(n%3==1) wb[ta++]=n-1;
        sort(r,wb,wa,ta,m);
        for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i;
        for(i=0,j=0,p=0;i<ta && j<tbc;p++)
        sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
        for(;i<ta;p++) sa[p]=wa[i++];
        for(;j<tbc;p++) sa[p]=wb[j++];
        return;
    }
    int rank[maxn],height[maxn],minn[maxn],r[maxn];
    void calheight(int *r,int *sa,int n)
    {
        int i,j,k=0;
        for(i=1;i<=n;i++) rank[sa[i]]=i;
        for(i=0;i<n;height[rank[i++]]=k)
        for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
        memset(minn,0x3f3f3f3f,sizeof(minn));
        int tmp=sa[rank[0]];
        for(int i=rank[0]+1;i<n;++i) minn[sa[i]]=min(minn[sa[i]],height[i]);
        for(int i=rank[0]-1;i>0;--i) minn[sa[i]]=min(minn[sa[i]],height[i+1]);
    }
    char s[maxn];
    int main(){
        while(scanf("%s",s),strcmp(s,".")){
            int len=strlen(s);
            s[len]='a'-1;
            for(int i=0;s[i];++i) r[i]=s[i];
            dc3(r,sa,len+1,280);
            calheight(r,sa,len);
            bool ok=0;
            for(int i=1;!ok&&i<=len;++i) {
                if(minn[i]==len-i) {
                    int xun=i;
                    if(len%xun==0) printf("%d
    ",len/xun);
                    else puts("1");
                    ok=1;
                }
            }
            if(!ok) puts("1");
        }
    }

     POJ 3693

    题解链接

    题目大意就是求重复次数最多的连续重复子串。例如abababc 答案就是ababab  因为ab连续出现的次数最多

    并且题目还要求输出字典序最小的

    比如abababcdcdcd 

    ababab和cdcdcd都符合要求

    但是ababab字典序小

    #include<cstdio>
    #include<cstring>
    #include<algorithm>
    using namespace std;
    const int N=1e5+88;
    int r[N],wa[N],wb[N],wv[N],tmp[N],sa[N];
    bool cmp(int *r,int a,int b,int l){
        return r[a]==r[b]&&r[a+l]==r[b+l];
    }
    void da(int *r,int *sa,int n,int m){
        int i,j,p,*x=wa,*y=wb,*ws=tmp;
        for(i=0;i<m;++i) ws[i]=0;
        for(i=0;i<n;++i) ++ws[x[i]=r[i]];
        for(i=1;i<m;++i) ws[i]+=ws[i-1];
        for(i=0;i<n;++i) sa[--ws[x[i]]]=i;
        for(j=1,p=1;p<n;j<<=1,m=p){
            for(p=0,i=n-j;i<n;++i) y[p++]=i;
            for(i=0;i<n;++i) if(sa[i]>=j) y[p++]=sa[i]-j;
            for(i=0;i<n;++i) wv[i]=x[y[i]];
            for(i=0;i<m;++i) ws[i]=0;
            for(i=0;i<n;++i) ++ws[wv[i]];
            for(i=1;i<m;++i) ws[i]+=ws[i-1];
            for(i=n-1;i>=0;--i) sa[--ws[wv[i]]]=y[i];
            swap(x,y),p=1,x[sa[0]]=0;
            for(i=1;i<n;++i) x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        }
    }
    int rank[N],height[N];
    void calheight(int *r,int *sa,int n){
        int i,j,k=0;
        for(i=1;i<=n;++i) rank[sa[i]]=i;
        for(i=0;i<n;height[rank[i++]]=k){
            if(k) --k;
            for(j=sa[rank[i]-1];r[i+k]==r[j+k];++k);
        }
    }
    int Log[N],mi[N][20];
    void rmq(int n){
        for(int i=1;i<=n;++i) mi[i][0]=height[i];
        int m=Log[n];
        for(int i=1;i<=m;++i) for(int j=1;j<=n;++j) {
            mi[j][i]=mi[j][i-1];
            if(j+(1<<(i-1))<=n) mi[j][i]=min(mi[j][i],mi[j+(1<<(i-1))][i-1]);
        }
    }
    int lcp(int a,int b){
        a=rank[a],b=rank[b];
        if(a>b) swap(a,b);
        ++a;
        int t=Log[b-a+1]; 
        return min(mi[a][t],mi[b-(1<<t)+1][t]);
    }
    char s[N];
    int ans[N];
    int main(){
        int cas=0;
        Log[1]=0;
        for(int i=2;i<N;++i) Log[i]=Log[i>>1]+1;
        while(scanf("%s",s),strcmp(s,"#")){
            int n=strlen(s);
            for(int i=0;i<n;++i) r[i]=s[i];
            r[n]=0;
            da(r,sa,n+1,130);
            calheight(r,sa,n);
            rmq(n);
            int cnt=0,mx=-1,l;
            for(l=1;l<n;l++)
            {
                for(int i=0;i+l<n;i+=l)
                {
                    int k=lcp(i,i+l);
                    int p=k/l+1;
                    int t=l-k%l;
                    t=i-t;
                    if(t>=0&&k%l)
                    {
                        int tk=lcp(t,t+l);
                        if(tk/l+1>p) p=tk/l+1;
                    }
                    if(p>mx) cnt=0,mx=p,ans[cnt++]=l;
                    if(p==mx) ans[cnt++]=l;
                }
            }
            int pos=0,flag=0;
            for(int i=1;i<=n&&!flag;i++)
            {
                for(int j=0;j<cnt;j++)
                {
                    int k=ans[j];
                    if(lcp(sa[i],sa[i]+k)>=(mx-1)*k)
                    {
                        pos=sa[i];
                        l=mx*k;
                        flag=1;
                        break;
                    }
                }
            }
            printf("Case %d: ",++cas);
            for(int i=0;i<l;++i) printf("%c",s[pos+i]);
            puts("");
        }
    }
  • 相关阅读:
    springboot搭建环境访问Controller层返回404
    SpringMVC使用注解@RequestMapping映射请求
    Redis数据类型
    mysql小结
    将数据四舍五入到十位
    Repeated DNA Sequences
    Reverse Linked List II
    Shortest Palindrome
    Single Number
    Sort Colors
  • 原文地址:https://www.cnblogs.com/mfys/p/8438460.html
Copyright © 2020-2023  润新知