后缀自动机:可以识别一个字符串所有的子串,把子串中出现的位置右端点集合相同的子串归为一类记为状态,则状态最多有(O(n)) 个。
在后缀自动机上,一个状态代表了长度在(l[p] -> l[fa[p]]) 之间的若干个子串。
区分SAM的转移 & parent 树的边:
在SAM上状态的转移 = 在保证所代表的字符串左端点不右移的情况下,是否存在可以添加 c 在末尾的串
在 parent 树上,父亲与儿子的关系为:儿子的右端点为父亲右端点的子集,而儿子的左端点在保证右端点不变的情况下尽量向右延伸
(SAM上跳parent, AC自动机 & KMP 跳 fail 都是基于贪心匹配的原则)
*打标记,记录贡献时要记得上传&下放
一点点个人有关广义SAM的心得与体会:
1.对多个串建立SAM :在每插入一个新的串的时候把 (last) 更新为根
2.对trie树建立SAM:每个节点的 (last) 为父亲所建立的 (np) 节点
考虑插入一个新的节点的过程,(last) 为上一个长度最长的,具有 (right = {n}) 的状态,不断跳转 (fa[p]) 直到找到可以转移的地方以插入新的子串。由于 ({n + 1}) 这个右端点从未出现过,所以必然是新建了一个新的状态。
但是在广义SAM上呢?这时候所找到的点 (p) 和 (q),完全有可能是有其它串所建立的,包含了 ({n + 1}) 这个端点的一个状态。那么这个时候我们所新建的节点等于是加入了一个本来存在的状态,又因为此时 (l[p] = l[fa[p]]),所以这个节点等效于没有出现过。其他的与后缀自动机基本没有区别。
1.[COCI2011-2012#5] POPLOCAVANJE
#include <bits/stdc++.h> using namespace std; #define maxn 300100 #define maxm 600100 int n, m, ans, cnt = 1, last = 1; int t[maxn], id[maxm], l[maxm], rec[maxm]; int ch[maxm][26], pos[maxm], fa[maxm]; char s[maxn]; int read() { int x = 0, k = 1; char c; c = getchar(); while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); } while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar(); return x * k; } void Ins(int c, int id) { int p = last, np = ++ cnt; pos[np] = id; last = np; l[np] = l[p] + 1; for(; p && !ch[p][c]; p = fa[p]) ch[p][c] = np; if(!p) fa[np] = 1; else { int q = ch[p][c]; if(l[p] + 1 == l[q]) fa[np] = q; else { int nq = ++ cnt; l[nq] = l[p] + 1; memcpy(ch[nq], ch[q], sizeof(ch[q])); fa[nq] = fa[q], fa[q] = fa[np] = nq; for(; ch[p][c] == q; p = fa[p]) ch[p][c] = nq; } } } void Rsort() { for(int i = 1; i <= cnt; i ++) t[l[i]] ++; for(int i = 1; i <= n; i ++) t[i] += t[i - 1]; for(int i = 1; i <= cnt; i ++) id[t[l[i]] --] = i; } void Cal() { Rsort(); memset(t, 0, sizeof(t)); for(int i = 1; i <= cnt; i ++) rec[id[i]] = max(rec[id[i]], rec[fa[id[i]]]); for(int i = 1; i <= cnt; i ++) if(pos[id[i]]) { int x = id[i]; t[pos[x] + 1] --, t[pos[x] - rec[x] + 1] ++; } for(int i = 1; i <= n; i ++) t[i] += t[i - 1]; for(int i = 1; i <= n; i ++) if(!t[i]) ans ++; } void Get() { int p = 1, m = strlen(s + 1); for(int i = 1; i <= m; i ++) if(ch[p][s[i] - 'a']) p = ch[p][s[i] - 'a']; else return; rec[p] = max(rec[p], m); } int main() { n = read(); scanf("%s", s + 1); for(int i = 1; i <= n; i ++) Ins(s[i] - 'a', i); m = read(); for(int i = 1; i <= m; i ++) scanf("%s", s + 1), Get(); Cal(); printf("%d ", ans); return 0; }
2.[SDOI2016]生成魔咒
#include <bits/stdc++.h> using namespace std; #define maxn 400000 #define LL long long int n, tot = 1, last = 1, fa[maxn]; LL ans, l[maxn]; map <int, int> ch[maxn]; int read() { int x = 0, k = 1; char c; c = getchar(); while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); } while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar(); return x * k; } void Ins(int c) { int np = ++ tot, p = last; l[np] = l[p] + 1; last = np; for(; p && !ch[p][c]; p = fa[p]) ch[p][c] = np; if(!p) fa[np] = 1, ans += l[np] - l[1]; else { int q = ch[p][c]; if(l[p] + 1 == l[q]) fa[np] = q, ans += l[np] - l[q]; else { int nq = ++ tot; l[nq] = l[p] + 1; ch[nq] = ch[q]; ans -= l[q] - l[fa[q]]; fa[nq] = fa[q], ans += l[nq] - l[fa[nq]]; fa[q] = fa[np] = nq, ans += l[q] + l[np] - 2 * l[nq]; for(; ch[p][c] == q; p = fa[p]) ch[p][c] = nq; } } } int main() { n = read(); for(int i = 1; i <= n; i ++) { int x = read(); Ins(x); printf("%lld ", ans); } return 0; }
3.[ZJOI2015]诸神眷顾的幻想乡
#include <bits/stdc++.h> using namespace std; #define maxn 2000000 int n, C, last = 1, cnt = 1, fa[maxn]; int rec[maxn], s[maxn], deg[maxn]; int ch[maxn][15], l[maxn]; long long ans; int read() { int x = 0, k = 1; char c; c = getchar(); while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); } while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar(); return x * k; } struct edge { int cnp, to[maxn], last[maxn], head[maxn]; edge() { cnp = 2; } void add(int u, int v) { to[cnp] = v, last[cnp] = head[u], head[u] = cnp ++; to[cnp] = u, last[cnp] = head[v], head[v] = cnp ++; } }E1; void Ins(int c, int x) { int np = ++ cnt, p = last; rec[x] = cnt; l[np] = l[p] + 1; while(p && !ch[p][c]) ch[p][c] = np, p = fa[p]; if(!p) fa[np] = 1; else { int q = ch[p][c]; if(l[q] == l[p] + 1) fa[np] = q; else { int nq = ++ cnt; l[nq] = l[p] + 1; memcpy(ch[nq], ch[q], sizeof(ch[q])); fa[nq] = fa[q], fa[q] = fa[np] = nq; while(ch[p][c] == q) ch[p][c] = nq, p = fa[p]; } } } void dfs(int u, int fa) { last = rec[fa]; Ins(s[u], u); for(int i = E1.head[u]; i; i = E1.last[i]) { int v = E1.to[i]; if(v != fa) dfs(v, u); } } int main() { n = read(), C = read(); for(int i = 1; i <= n; i ++) s[i] = read(); for(int i = 1; i < n; i ++) { int u = read(), v = read(); E1.add(u, v); deg[u] ++, deg[v] ++; } rec[0] = 1; for(int i = 1; i <= n; i ++) if(deg[i] == 1) dfs(i, 0); for(int i = 1; i <= cnt; i ++) ans += l[i] - l[fa[i]]; printf("%lld ", ans); return 0; }
4.[CTSC2012]熟悉的文章
#include <bits/stdc++.h> using namespace std; #define maxn 4000000 #define db double int n, m, last, tot = 1, ch[maxn][2], l[maxn], fa[maxn]; int head, tail, g[maxn], f[maxn], q[maxn]; char s[maxn]; int read() { int x = 0, k = 1; char c; c = getchar(); while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); } while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar(); return x * k; } void Ins(int x) { int np = ++ tot, p = last; last = np, l[np] = l[p] + 1; for(; p && !ch[p][x]; p = fa[p]) ch[p][x] = np; if(!p) fa[np] = 1; else { int q = ch[p][x]; if(l[q] == l[p] + 1) fa[np] = q; else { int nq = ++ tot; l[nq] = l[p] + 1; memcpy(ch[nq], ch[q], sizeof(ch[q])); fa[nq] = fa[q], fa[q] = fa[np] = nq; for(; ch[p][x] == q; p = fa[p]) ch[p][x] = nq; } } } void Get_pos(int n) { int p = 1; for(int i = 1, now = 1, len = 0; i <= n; i ++) { int x = s[i] - '0'; while(p && !ch[p][x]) p = fa[p], len = l[p]; if(ch[p][x]) p = ch[p][x], len ++; else p = 1, len = 0; g[i] = i - len + 1; } } void Push_in(int x) { while(head <= tail && f[q[tail]] - q[tail] <= f[x] - x) tail --; q[++ tail] = x; } bool DP(int n, int L) { f[0] = 0; int X = ceil((db) n * 0.9); head = 1, tail = 0; for(int i = 1, last = 0; i <= n; i ++) { while(last <= i - L) Push_in(last), last ++; while(head <= tail && q[head] < g[i] - 1) head ++; f[i] = max(head <= tail ? f[q[head]] - q[head] + i : 0, f[i - 1]); } return f[n] >= X; } int Binary(int n) { int l = 1, r = n, ans = 0; while(l <= r) { int mid = (l + r) >> 1; if(DP(n, mid)) ans = mid, l = mid + 1; else r = mid - 1; } return ans; } int main() { n = read(), m = read(); for(int i = 1; i <= m; i ++) { scanf("%s", s + 1); int l = strlen(s + 1); last = 1; for(int j = 1; j <= l; j ++) Ins(s[j] - '0'); } for(int i = 1; i <= n; i ++) { scanf("%s", s + 1); int l = strlen(s + 1); Get_pos(l); printf("%d ", Binary(l)); } return 0; }