• 暑假集训 || AC自动机


    HDU 2222

    题意:给n个模式串和一个字符串,求有多少个模式串在这个字符串中出现

    思路:裸题,注意数组开的大小

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    const int SZ = 500100;
    char keystr[55];
    int tot;
    int fail[SZ], ch[SZ][33], sum[SZ];
    char str[1000005];
    void insert(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
        }
        sum[p]++;
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 0; i < 26; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    int match(char s[])
    {
        int ans = 0, p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            while(p && ch[p][c] == 0) p = fail[p];
            p = ch[p][c];
            int v = p;
            while(sum[v] > 0)
            {
                ans += sum[v];
                sum[v] = 0;
                v = fail[v];
            }
        }
        return ans;
    }
    void init()
    {
        tot = 0;
        memset(sum, 0, sizeof(sum));
        memset(ch, 0, sizeof(ch));
        memset(fail, 0, sizeof(fail));
    }
    int main()
    {
        int T;
        scanf("%d", &T);
        while(T--)
        {
            int n;
            scanf("%d", &n);
            init();
            for(int i = 0; i < n; i++)
            {
                scanf("%s", keystr);
                insert(keystr);
            }
            getfail();
            scanf("%s", str);
            printf("%d
    ", match(str));
        }
        return 0;
    }
    View Code

    HDU 3065

    题意:给n个模式串,一个字符串,输出在这个字符串中出现的模式串的出现次数

    思路:裸题,记录idx[p] = id;//以p结尾的是第id个模式串

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    typedef long double LD;
    const int SZ = 100100;
    char keystr[1010][55];
    int tot;
    int fail[SZ];
    int ch[SZ][30], idx[SZ], cnt[SZ];
    char str[2000005];
    void insert(char s[], int id)
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'A';
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
        }
        idx[p] = id;//以p结尾的是第id个模式串
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 0; i < 26; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    void match(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'A';
            if(s[i] < 'A' || s[i] > 'Z') c = 26;
            while(p && ch[p][c] == 0) p = fail[p];
            p = ch[p][c];
            int v = p;
            while(v)
            {
                if(idx[v] > 0) cnt[idx[v]]++;
                v = fail[v];
            }
        }
    }
    
    void init()
    {
        tot = 0;
        memset(cnt, 0, sizeof(cnt));
        memset(ch, 0, sizeof(ch));
        memset(fail, 0, sizeof(fail));
        memset(idx, 0, sizeof(idx));
    }
    int main()
    {
        int n;
        while(~scanf("%d", &n))
        {
            init();
            for(int i = 1; i <= n; i++)
            {
                scanf("%s", keystr[i]);
                insert(keystr[i], i);
            }
            getfail();
            scanf("%s", str);
            match(str);
            for(int i = 1; i <= n; i++)
                if(cnt[i]) printf("%s: %d
    ", keystr[i], cnt[i]);
        }
        return 0;
    }
    View Code

    ZOJ 3228

    题意:给n个模式串,一个字符串,分别求每个模式串在字符串中出现的次数,其中输入中0表示可以覆盖着出现,1表示不能

    思路:如果没有1那种情况就是裸题,1的情况下

    用last[i]记录Trie节点i在上一次匹配时所对应的字符在文本串中的位置。

    用pos[i]记录Trie节点i所对应的字符在模式串中的位置。

    没有重叠的判断 —— 当前字符位置 - last[当前节点] >= pos[当前节点]。

    abababac - aba

    pos[1] = 1  pos[2] = 2  pos[3] = 3

    i = 4时,last[3] = 2 pos[3] = 3 而i - last[3] < pos[3] 所以不行

    3再往前到1,last[1] = 2 pos[1] = 1 可以了,然后last[1] = 4,节点1匹配到了文本串中的第4位

    太高端了。。。。

    卡数组大小,记得开n*len的

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    typedef long double LD;
    const int SZ = 600100;
    char keystr[10];
    int tot;
    int fail[SZ];
    int ch[SZ][30], idx[SZ], cnt[2][SZ];
    int last[SZ], pos[SZ], typ[SZ];
    char str[100005];
    void insert(char s[], int id)
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
            pos[p] = i+1;
        }
        idx[id] = p;
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 0; i < 26; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    void match(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = s[i] - 'a';
            p = ch[p][c];
            int v = p;
            while(v)
            {
                cnt[0][v]++;
                if(i - last[v] >= pos[v])
                {
                    cnt[1][v]++;
                    last[v] = i;
                }
                v = fail[v];
            }
        }
    }
    
    void init()
    {
        tot = 0;
        memset(cnt, 0, sizeof(cnt));
        memset(ch, 0, sizeof(ch));
        memset(last, -1, sizeof(last));
        memset(pos, 0, sizeof(pos));
    }
    int main()
    {
        int n, tt = 0;
        while(~scanf("%s", str))
        {
            init();
            scanf("%d", &n);
            for(int i = 0; i < n; i++)
            {
                scanf("%d %s", &typ[i], keystr);
                insert(keystr, i);
            }
            getfail();
            match(str);
            printf("Case %d
    ", ++tt);
            for(int i = 0; i < n; i++)
                printf("%d
    ", cnt[typ[i]][idx[i]]);
            printf("
    ");
        }
        return 0;
    }
    View Code

    HDU 2457 AC自动机+DP

    题意:给n种病毒序列(只含ACGT),给一串基因,为最少修改多少个碱基(?)能使得其中不含病毒,如果不能做到则输出-1

    思路:考虑神仙DP

    f[i][j] 表示文本串的前 i 个字符,处于 Trie 图的节点 j 且不 经过终止节点(危险节点)的最少修改个数

    每个点的是否是终止节点:如果它或它顺着 fail 函数能达到 的点中有一个是终止节点,那么它也是终止节点

    这个讲的蛮明白的:https://blog.csdn.net/human_ck/article/details/6577142

    转移方程:dp[i+1][ch[p][j]] = min(dp[i+1][ch[p][j]], dp[i][p] + (j == c ? 0 : 1)); //j表示枚举把这个非危险节点的点修改成什么,如果和原来相同则操作数不变,如果不同则+1

    #include <iostream>
    #include <cstdio>
    #include <cstdlib>
    #include <cmath>
    #include <cstring>
    #include <algorithm>
    #include <queue>
    using namespace std;
    typedef long long LL;
    const int SZ = 1010;
    const int INF = 1000000100;
    char keystr[22];
    int tot;
    int fail[SZ];
    int ch[SZ][6], dp[1010][SZ];
    char str[1005];
    bool flag[SZ];
    int tran(char c)
    {
        if(c == 'A') return 1;
        if(c == 'G') return 2;
        if(c == 'C') return 3;
        if(c == 'T') return 4;
    }
    void insert(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i < len; i++)
        {
            int c = tran(s[i]);
            if(!ch[p][c]) ch[p][c] = ++tot;
            p = ch[p][c];
        }
        flag[p] = true;
    }
    queue<int> q;
    void getfail()
    {
        fail[0] = 0;
        q.push(0);
        while(q.size())
        {
            int u = q.front();
            q.pop();
            for(int i = 1; i <= 4; i++)
            {
                if(ch[u][i])
                {
                    if(u == 0) fail[ch[u][i]] = 0;
                    else
                    {
                        int v = fail[u];
                        while(v && ch[v][i] == 0) v = fail[v];
                        fail[ch[u][i]] = ch[v][i];
                    }
                    if(flag[fail[ch[u][i]]]) flag[ch[u][i]] = true;
                    q.push(ch[u][i]);
                }
                else ch[u][i] = ch[fail[u]][i];
            }
        }
    }
    
    void match(char s[])
    {
        int p = 0;
        int len = strlen(s);
        for(int i = 0; i <= len; i++)
            for(int j = 0; j <= tot; j++)
                dp[i][j] = INF;
        dp[0][0] = 0;
        for(int i = 0; i < len; i++)
        {
            int c = tran(s[i]);
            for(p = 0; p <= tot; p++)
            {
                if(dp[i][p] == INF) continue;
                for(int j = 1; j <= 4; j++)
                {
                    if(flag[ch[p][j]]) continue;
                    dp[i+1][ch[p][j]] =  min(dp[i+1][ch[p][j]], dp[i][p] + (j == c ? 0 : 1));
                }
            }
        }
    }
    
    void init()
    {
        tot = 0;
        memset(ch, 0, sizeof(ch));
        memset(fail, 0, sizeof(fail));
        memset(flag, false, sizeof(flag));
    }
    int main()
    {
        int n, tt = 0;
        while(scanf("%d", &n) && n)
        {
            init();
            for(int i = 1; i <= n; i++)
            {
                scanf("%s", keystr);
                insert(keystr);
            }
            getfail();
            scanf("%s", str);
            int len = strlen(str);
            match(str);
            int minn = INF;
            for(int i = 0; i <= tot; i++)
                if(!flag[i]) minn = min(minn, dp[len][i]);
            if(minn == INF) minn = -1;
            printf("Case %d: %d
    ", ++tt, minn);
        }
        return 0;
    }
    View Code

    OTZ

  • 相关阅读:
    Linux报错:“/bin/bash^M: 坏的解释器
    搭建单向HTTPS
    Wamp Apache 启动失败检测方法
    Excel 日常操作
    apache https 双向认证
    android搭建
    我身为程序员踩过的坑
    windows 2008 安装 apache + mysql + php
    Svn
    工具软件类
  • 原文地址:https://www.cnblogs.com/pinkglightning/p/9550772.html
Copyright © 2020-2023  润新知