• POJ-3450 Corporate Identity (KMP+后缀数组)


    Description

    Beside other services, ACM helps companies to clearly state their “corporate identity”, which includes company logo but also other signs, like trademarks. One of such companies is Internet Building Masters (IBM), which has recently asked ACM for a help with their new identity. IBM do not want to change their existing logos and trademarks completely, because their customers are used to the old ones. Therefore, ACM will only change existing trademarks instead of creating new ones.

    After several other proposals, it was decided to take all existing trademarks and find the longest common sequence of letters that is contained in all of them. This sequence will be graphically emphasized to form a new logo. Then, the old trademarks may still be used while showing the new identity.

    Your task is to find such a sequence.

    Input

    The input contains several tasks. Each task begins with a line containing a positive integer N, the number of trademarks (2 ≤ N ≤ 4000). The number is followed by N lines, each containing one trademark. Trademarks will be composed only from lowercase letters, the length of each trademark will be at least 1 and at most 200 characters.

    After the last trademark, the next task begins. The last task is followed by a line containing zero.

    Output

    For each task, output a single line containing the longest string contained as a substring in all trademarks. If there are several strings of the same length, print the one that is lexicographically smallest. If there is no such non-empty string, output the words “IDENTITY LOST” instead.

    Sample Input

    3
    aabbaabb
    abbababb
    bbbbbabb
    2
    xyz
    abc
    0

    Sample Output

    abb
    IDENTITY LOST

    题目大意:给多个字符串,找出最长的并且字典序最小的公共子串.
    两种做法:
      (1).使用KMP.每一个子串都是某个后缀的前缀.枚举最短字符串的每一个后缀suff(i),让其他所有的字符串去跟suff(i)做kmp匹配,会得到一个suff(i)的公共前缀pre_suff(i),选一个最长的pre_suff
    即为答案.枚举后缀时按照字典序枚举,可以省略掉比较长度相同的pre_suff这一过程.
      (2).使用后缀数组.首先将所有的字符串连接成一条长串,求出height数组之后,二分枚举最长公共子串的长度mid,然后根据height值是否不小于mid将数组分成若干个连续的区间.查看每一个区
    间,只要某个区间中的前缀来自所有的字符串,那么存在长度为mid的公共子串.但是我一直TLE.

    代码如下(第一种做法):
    #include <iostream>
    #include <cstdio>
    #include <cstring>
    #include <algorithm>
    using namespace std;
    
    const int N=4005;
    
    int SA[205];
    int tSA[205];
    int rk[205];
    int cnt[205];
    
    bool isSame(int *y,int i,int j,int k,int n)
    {
        if(y[i]!=y[j]) return false;
        if(i+k<n&&j+k>=n) return false;
        if(i+k>=n&&j+k<n) return false;
        return y[i+k]==y[j+k];
    }
    
    void buildSA(char* str,int n)
    {
        int *x=rk;
        int *y=tSA;
        int m=26;
        for(int i=0;i<m;++i) cnt[i]=0;
        for(int i=0;i<n;++i) ++cnt[x[i]=(str[i]-'a')];
        for(int i=1;i<m;++i) cnt[i]+=cnt[i-1];
        for(int i=n-1;i>=0;--i) SA[--cnt[x[i]]]=i;
    
        for(int k=1;k<=n;k<<=1){
            int p=0;
            for(int i=n-k;i<n;++i) y[p++]=i;
            for(int i=0;i<n;++i) if(SA[i]>=k) y[p++]=SA[i]-k;
    
            for(int i=0;i<m;++i) cnt[i]=0;
            for(int i=0;i<n;++i) ++cnt[x[y[i]]];
            for(int i=1;i<m;++i) cnt[i]+=cnt[i-1];
            for(int i=n-1;i>=0;--i) SA[--cnt[x[y[i]]]]=y[i];
    
            p=1;
            swap(x,y);
            x[SA[0]]=0;
            for(int i=1;i<n;++i)
                x[SA[i]]=isSame(y,SA[i],SA[i-1],k,n)?p-1:p++;
    
            if(p>=n) break;
            m=p;
        }
    }
    
    char tdmks[N][205];
    int nxt[205];
    
    void getNext(char* str,int str_len)
    {
        nxt[0]=nxt[1]=0;
        for(int i=1;i<str_len;++i){
            int j=nxt[i];
            while(j&&str[i]!=str[j]) j=nxt[j];
            nxt[i+1]=(str[i]==str[j])?j+1:0;
        }
    }
    
    int match(char* str,int str_len,char* ptr)
    {
        int ptr_len=strlen(ptr);
        int tempLen=0,k=0;
        for(int i=0;i<ptr_len;++i){
            while(k&&ptr[i]!=str[k]) k=nxt[k];
            if(str[k]==ptr[i]){
                ++k;
                tempLen=max(tempLen,k);
            }
        }
        return tempLen;
    }
    
    int getLongestPre(char* str,int str_len,int cnt_tdmks)
    {
        getNext(str,str_len);
        int long_pre=N;
        for(int i=0;i<cnt_tdmks;++i){
            long_pre=min(match(str,str_len,tdmks[i]),long_pre);
        }
        return long_pre;
    }
    
    int input(int cnt_tdmks)
    {
        int minLen=N,id_minLen;
        for(int i=0;i<cnt_tdmks;++i){
            scanf("%s",tdmks[i]);
            if(strlen(tdmks[i])<minLen){
                minLen=strlen(tdmks[i]);
                id_minLen=i;
            }
        }
        return id_minLen;
    }
    
    void solve(int p,int cnt_tdmks)
    {
        int m=strlen(tdmks[p]);
        buildSA(tdmks[p],m);
        int ans_len=0,ans_p;
        for(int i=0;i<m;++i){
            int len=getLongestPre(tdmks[p]+SA[i],m-SA[i],cnt_tdmks);
            if(len>ans_len){
                ans_len=len;
                ans_p=SA[i];
            }
        }
        if(ans_len){
            for(int i=0;i<ans_len;++i)
                printf("%c",tdmks[p][ans_p+i]);
            printf("
    ");
        }else{
            printf("IDENTITY LOST
    ");
        }
    }
    
    int main()
    {
        //freopen("in.txt","r",stdin);
        int n;
        while(scanf("%d",&n)&&n)
        {
            solve(input(n),n);
        }
        return 0;
    }
    
  • 相关阅读:
    使用正向proxy 连调部署在k8s 中的spring cloud 中的rest服务
    goflow golang 的基于flow的编程库
    gvm golang 的多版本工具
    jvm-profiler 学习试用
    httpdiff http 请求diff 工具
    tengine lua 模块docker 镜像集成
    tengine 支持dubbo 的docker镜像
    openresty ngx.location.capture http2 问题
    systemd 使用rc.local 说明
    revel golang的全栈开发框架
  • 原文地址:https://www.cnblogs.com/20143605--pcx/p/6347606.html
Copyright © 2020-2023  润新知