FFT在字符串匹配中的应用.
能解决大概这种问题:
给定长度为(m)的A串,长度为(n)的B串。问A串在B串中的匹配数
我们设一个函数(下标从(0)开始)
(C(x,y) =A(x)- B(y)),若为0,表示B串中以第(y)个字符结尾的字符可以与A串中以(x)节为结尾的字符可以匹配
(P(x) = sum_{i = 0}^{m - 1}C(i,x - m + i + 1))
但是很遗憾当(P(x)),等于零时,只能够说明上述子串的字符集相同.
为什么?因为负数的存在!
我们考虑怎么去掉负数,平方!
(P(x) = sum_{i = 0}^{m - 1}(A(i) - B[x - m + i + 1])^2)
这时候,如果上式为(0),就能证明B串中(x)结尾的串可以与A匹配.
老样子设
(f(i) = A(m - i - 1))
(g(i) = B(i))
则有
(P(x) = sum_{i = 0}^{m - 1}f(m - i - 1)^2 -sum_{i = 0}^{m - 1}2f(m - i - 1)g(x - m + i + 1) -sum_{i = 0}^{m - 1}g(x - m + i + 1)^2)
发现第一项和第三项是可以通过处理前缀和搞出来的!
而第二项是个卷积,我们只需要求(P(x))是否为零就好了。
我们终于这到了题目上.
这道题目中含有通配符,上式很明显不再成立
但大体思路还是不变的
(C(x))与(P(x))的意义不变
我们设
(P(x) = sum_{i = 0}^{m - 1}(A(i) - B(x - m + i + 1))^2A(i)B(x - m + i + 1))
即当B串(x)的位置为通配符时,(B(x) = 0),A同理
这样我们就又能用(P(x))表示能否匹配了
同理,设(f(x))与(g(x))意义同上
(P(x) =sum_{i = 0}^{m - 1}f(m - i - 1)^3g(x - m + i + 1) - sum_{i = 0}^{m - 1}f(m - i - 1)^2g(x - m + i + 1)+sum_{i = 0}^{m - 1}f(m - i - 1)g(x - m + i + 1)^3)
然后发现
上式三项都是卷积!
所以我们跑7遍FFT就好了
#include<cstdio>
#include<iostream>
#include<cctype>
#include<algorithm>
#include<cstring>
#include<cmath>
#include<vector>
using namespace std;
const int N = 3e5 + 3;
const double Pi = acos(-1.0);
const double eps = 1e-12;
struct point{
double x,y;
point(double xx = 0,long double yy = 0){
x = xx,y = yy;
}
}a[N << 2],b[N << 2],c[N << 2];
char s1[N],s2[N];
int c1[N],c2[N];
int r[N << 2];
int n,m,limit = 1,l;
vector <int> G;
point operator + (point a,point b){return point(a.x + b.x,a.y + b.y);}
point operator - (point a,point b){return point(a.x - b.x,a.y - b.y);}
point operator * (point a,point b){return point(a.x * b.x - a.y * b.y,a.x * b.y + a.y * b.x);}
inline void fftle(point *A,int type){
for(int i = 0;i < limit;++i)
if(i < r[i]) swap(A[i],A[r[i]]);
for(int mid = 1;mid < limit;mid <<= 1){
point Wn = point(cos(Pi / mid),type * sin(Pi / mid));
for(int R = mid << 1,j = 0;j < limit;j += R){
point w(1,0);
for(int k = 0;k < mid;++k,w = w * Wn){
point x = A[j + k],y = A[j + mid + k] * w;
A[j + k] = x + y;
A[j + mid + k] = x - y;
}
}
}
if(type == -1) for(int i = 0;i < limit;++i) A[i].x = A[i].x / limit;
}
int main(){
scanf("%d%d",&m,&n);
scanf("%s%s",s1,s2);
point zero = point(0,0);
for(int i = 0;i < m;++i) c1[i] = s1[m - i - 1] == '*' ? 0 : s1[m - i - 1] - 'a' + 1;
for(int i = 0;i < n;++i) c2[i] = s2[i] == '*' ? 0 : s2[i] - 'a' + 1;
while(limit <= (n + m)) limit <<= 1,l++;
for(int i = 0;i < limit;++i) r[i] = (r[i >> 1] >> 1) | ((i & 1) << (l - 1));
for(int i = 0;i < m;++i) a[i].x = c1[i] * c1[i] * c1[i];
for(int i = 0;i < n;++i) b[i].x = c2[i];
fftle(a,1);fftle(b,1);
for(int i = 0;i < limit;++i) c[i] = c[i] + (a[i] * b[i]),a[i] = b[i] = zero;
for(int i = 0;i < m;++i) a[i].x = c1[i] * c1[i];
for(int i = 0;i < n;++i) b[i].x = c2[i] * c2[i];
fftle(a,1);fftle(b,1);
point w(2,0);
for(int i = 0;i < limit;++i) c[i] = c[i] - ((a[i] * b[i]) * w),a[i] = b[i] = zero;
for(int i = 0;i < m;++i) a[i].x = c1[i];
for(int i = 0;i < n;++i) b[i].x = c2[i] * c2[i] * c2[i];
fftle(a,1);fftle(b,1);
for(int i = 0;i < limit;++i) c[i] = c[i] + (a[i] * b[i]);
fftle(c,-1);
//for(int i = m - 1;i < n;++i) printf("%lf ",fabs(c[i].x / limit));puts("");
for(int i = m - 1;i < n;++i) if((fabs)(c[i].x) < 0.5) G.push_back(i + 2 - m);
printf("%d
",(int)G.size());
for(int i = 0;i < (int)G.size();++i) printf("%d ",G[i]);
}