• Keywords Search(AC自动机)


     In the modern time, Search engine came into the life of everybody like Google, Baidu, etc. 
     Wiskey also wants to bring this feature to his image retrieval system. 
     Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched. 
     To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match. 

    Input

     First line will contain one integer means how many cases will follow by. 
     Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000) 
     Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50. 
     The last line is the description, and the length will be not longer than 1000000. 
    Output

     Print how many keywords are contained in the description.

    Sample Input

    1
    5
    she
    he
    say
    shr
    her
    yasherhs

    Sample Output

    3

    多模匹配,AC自动机的模板题。

     1 #include <bits/stdc++.h>
     2 
     3 using namespace std;
     4 
     5 int tree[500005][28], counword[500005], fail[500005];
     6 int coun;
     7 
     8 void AddWord(char a[], int len)   //建树,把每一个模式串加到树上
     9 {
    10     int root, i;
    11     root = 0;
    12     for(i=0;i<len;i++)
    13     {
    14         if(tree[root][a[i]-'a']==0) tree[root][a[i]-'a'] = ++coun;   
    //tree为0表示为空(就算是root也没表示字母),
    //那么就给当前节点赋上++coun,一一对应的标记节点
    15 root = tree[root][a[i]-'a']; 16 } 17 counword[root]++; //当前节点表示的单词个数 18 } 19 20 void getfail() //fail指针的意义类似于KMP的next数组,当前节点匹配失败的时候,
    //我们可以借助fail到达前缀能对应上当前串后缀的节点,匹配继续
    21 { 22 queue <int> q; //每个节点都有自己的fail指针,用队列挨个看 23 int now, i; 24 fail[0] = 0; 25 for(i=0;i<26;i++) //第二层不为空的全部垫在队列里 26 { 27 if(tree[0][i]) 28 { 29 fail[tree[0][i]] = 0; 30 q.push(tree[0][i]); 31 } 32 }
    33 while(!q.empty()) 34 { 35 now = q.front(); 36 q.pop(); 37 for(i=0;i<26;i++) 38 { 39 if(tree[now][i])
    //now的该子节点不为空,fail指向的是now(i的父亲)的fail的i儿子,节点不为空,有意义,进队列
    40 { 41 fail[tree[now][i]] = tree[fail[now]][i]; 42 q.push(tree[now][i]); 43 } 44 else tree[now][i] = tree[fail[now]][i];
    //该节点为空,该节点不要,其编号被赋成now(i的父亲)的fail的i儿子,
    //节点本来为空,不要进队列,否则会有相同节点重复进队列
    45 } 46 } 47 } 48 49 long long query(char a[], int len) //树和fail准备就绪,跑一遍文本串出结果 50 { 51 int now, i, j; 52 long long re; 53 now = 0; 54 re = 0; 55 for(i=0;i<len;i++) 56 { 57 now = tree[now][a[i]-'a']; 58 for(j=now;j&&counword[j]!=-1;j=fail[j]) 59 { 60 re+=counword[j]; 61 counword[j] = -1; 62 } 63 } 64 return re; 65 } 66 67 int main() 68 { 69 int t, n, len, i; 70 long long re; 71 char a[55], txt[1000005]; 72 scanf("%d", &t); 73 while(t--) 74 { 75 coun = 0; 76 memset(counword, 0, sizeof(counword)); 77 memset(tree, 0, sizeof(tree)); 78 scanf("%d", &n); 79 getchar(); 80 for(i=0;i<n;i++) 81 { 82 scanf("%s", a); 83 len = strlen(a); 84 AddWord(a, len); 85 } 86 getfail(); 87 scanf("%s", txt); 88 len = strlen(txt); 89 re = query(txt, len); 90 printf("%lld ", re); 91 } 92 return 0; 93 }

    单模匹配KMP,多模用字典树+fail



  • 相关阅读:
    【洛谷5052】[COCI2017-2018#7] Go(区间DP)
    【洛谷6564】[POI2007] 堆积木KLO(树状数组优化DP)
    【洛谷6940】[ICPC2017 WF] Visual Python++(扫描线)
    【洛谷6939】[ICPC2017 WF] Tarot Sham Boast(PGF结论题)
    【洛谷4123】[CQOI2016] 不同的最小割(最小割树)
    初学最小割树
    【洛谷6122】[NEERC2016] Mole Tunnels(模拟费用流)
    【洛谷6936】[ICPC2017 WF] Scenery(思维)
    【洛谷2805】[NOI2009] 植物大战僵尸(最大权闭合子图)
    【洛谷1393】Mivik 的标题(容斥+border性质)
  • 原文地址:https://www.cnblogs.com/0xiaoyu/p/11300966.html
Copyright © 2020-2023  润新知