分治FFT
目的
解决这样一类式子:
[f[n] = sum_{i = 0}^{n - 1}f[i]g[n - i]
]
算法
看上去跟普通卷积式子挺像的,但是由于计算(f)的每一项时都在利用它前面的项来产生贡献,所以不能一次FFT搞完。用FFT爆算复杂度(O(n^2logn)),比直接枚举复杂度还高……
考虑优化这个算法,如果我们要计算区间([l, r])内的(f)值,如果可以快速算出区间([l, mid])内的(f)值对区间([mid + 1, r])内的(f)值产生了怎样的影响,就可以采取CDQ分治,不断递归下去算。
考虑(x in [mid + 1, r]),([l, mid])给它的贡献是:
[h[x] = sum_{i = l}^{mid}f[i]g[x - i]
]
为了方便,我们将范围扩充到([1, x - 1])(假设此时(f[mid + 1] ... f[r] = 0)),因此有:
[h[x] = sum_{i = l}^{x - 1}f[i]g[x - i]
]
为了便于FFT计算,将枚举改成从0开始。(把表达式中的(i)改成(i + l),因为原来的(i)等于现在的(i + l))
[h[x] = sum_{i = 0}^{x - l - 1}f[i + l]g[x - l - i]
]
为了表示成卷积形式,我们令:
[a[i] = f[i + l], b[i - 1] = g[i]
]
再在原式中用(a[i], b[i])代替(f[i], g[i]).
[h[x] = sum_{i = 0}^{x - l - 1}a[i] b[x - l - 1 - i]
]
观察到后面刚好就是多项式乘法中某一项的系数,即
[h[x] = (a * b)(x - l - 1)
]
在cdq分治的过程中用FFT/NTT计算即可。
代码
洛谷上的模板,因为要取模,所以用的NTT
#include<bits/stdc++.h>
using namespace std;
#define R register int
#define p 998244353
#define AC 400100
#define LL long long
#define ld double
const int G = 3, Gi = 332748118;
int n, lim, len;
int f[AC], g[AC], a[AC], b[AC], rev[AC];
inline int read()
{
int x = 0;char c = getchar();
while(c > '9' || c < '0') c = getchar();
while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar();
return x;
}
inline void up(int &a, int b) {a += b; if(a < 0) a += p; if(a >= p) a -= p;}
inline int qpow(int x, int have)
{
int rnt = 1;
while(have)
{
if(have & 1) rnt = 1LL * rnt * x % p;
x = 1LL * x * x % p, have >>= 1;
}
return rnt;
}
void init(int length)//这里的length已经是2个数组加起来的长度了
{
lim = 1, len = 0;
while(lim <= length) lim <<= 1, ++ len;
for(R i = 0; i < lim; i ++)
rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (len - 1)), a[i] = b[i] = 0;
}
void NTT(int *A, int opt)
{
for(R i = 0; i < lim; i ++)
if(i < rev[i]) swap(A[i], A[rev[i]]);
for(R i = 1; i < lim; i <<= 1)
{
LL W = qpow((opt > 0) ? G : Gi, (p - 1) / (i << 1));
for(R r = i << 1, j = 0; j < lim; j += r)
for(R k = 0, w = 1; k < i; k ++, w = 1LL * w * W % p)
{
int x = A[j + k], y = 1LL * w * A[j + k + i] % p;
A[j + k] = (x + y) % p, A[j + k + i] = (x - y) % p;
}
}
if(opt == -1)
{
int inv = qpow(lim, p - 2);
for(R i = 0; i < lim; i ++) A[i] = 1LL * A[i] * inv % p;
}
}
void pre()
{
n = read(), f[0] = 1;
for(R i = 1; i < n; i ++) g[i] = read();
}
void cal(int *A, int *B)
{
NTT(A, 1), NTT(B, 1);
for(R i = 0; i <= lim; i ++) A[i] = 1LL * A[i] * B[i] % p;
NTT(A, -1);
}
void cdq(int l, int r)
{
if(l == r) return ;
int mid = (l + r) >> 1, length = r - l + 1;
cdq(l, mid);
init(length);
for(R i = l; i <= mid; i ++) a[i - l] = f[i];
for(R i = 1; i < length; i ++) b[i - 1] = g[i];//这里要移动是为了凑x - l - 1
cal(a, b);
for(R i = mid + 1; i <= r; i ++) up(f[i], a[i - l - 1]);
cdq(mid + 1, r);
}
int main()
{
freopen("in.in", "r", stdin);
pre();
cdq(0, n - 1);
for(R i = 0; i < n; i ++) printf("%d ", ((f[i] % p) + p) % p);
printf("
");
fclose(stdin);
return 0;
}