普通版大概 2s 左右
目前洛谷上能稳定跑到1s,更快可能还要再卡
#include <queue>
#include <vector>
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#define MP make_pair
#define ll long long
#define fi first
#define se second
using namespace std;
template <typename T>
void read(T &x) {
x = 0; bool f = 0;
char c = getchar();
for (;!isdigit(c);c=getchar()) if (c=='-') f=1;
for (;isdigit(c);c=getchar()) x=x*10+(c^48);
if (f) x=-x;
}
template<typename F>
inline void write(F x, char ed = '
')
{
static short st[30];short tp=0;
if(x<0) putchar('-'),x=-x;
do st[++tp]=x%10,x/=10; while(x);
while(tp) putchar('0'|st[tp--]);
putchar(ed);
}
template <typename T>
inline void Mx(T &x, T y) { x < y && (x = y); }
template <typename T>
inline void Mn(T &x, T y) { x > y && (x = y); }
#define op com operator
#define con const
typedef double db;
const int N = 3000005;
const int P = 998244353;
int A[N], B[N]; ll E[N];
int r[N], lim = 1, L;
inline int add(int x, int y) {
return x + y >= P ? x + y - P : x + y;
}
void dft(int *A) {
for (int i = 1;i < lim; i++)
if (r[i] > i) swap(A[i], A[r[i]]);
if (lim >= 2)
for (int j = 0;j < lim; j += 2) {
int x = A[j], y = A[j+1];
A[j] = add(x, y), A[j+1] = add(x, P - y);
}
if (lim >= 4)
for (int j = 0;j < lim; j += 4) {
int x = A[j], y = A[j+2];
A[j] = add(x, y), A[j+2] = add(x , P - y);
x = A[j+1], y = E[3] * A[j+3] % P;
A[j+1] = add(x, y), A[j+3] = add(x , P - y);
}
if (lim >= 8)
for (int j = 0;j < lim; j += 8) {
int x = A[j], y = A[j+4];
A[j] = add(x, y), A[j+4] = add(x , P - y);
x = A[j+1], y = E[5] * A[j+5] % P;
A[j+1] = add(x, y), A[j+5] = add(x , P - y);
x = A[j+2], y = E[6] * A[j+6] % P;
A[j+2] = add(x, y), A[j+6] = add(x , P - y);
x = A[j+3], y = E[7] * A[j+7] % P;
A[j+3] = add(x, y), A[j+7] = add(x , P - y);
}
for (int i = 8;i < lim; i <<= 1) {
for (int j = 0;j < lim; j += (i << 1)) {
int *f = A + j, *g = f + i; ll *e = E + i;
for (int k = 0;k < i; k++) {
int x = f[k], y = e[k] * g[k] % P;
f[k] = add(x, y), g[k] = add(x , P - y);
k++;
x = f[k], y = e[k] * g[k] % P;
f[k] = add(x, y), g[k] = add(x , P - y);
}
}
}
}
ll fpw(ll x, ll mi) {
ll res = 1;
for (; mi; mi >>= 1, x = x * x % P)
if (mi & 1) res = res * x % P;
return res;
}
int m, n;
int main() {
read(n), read(m);
for (int i = 0;i <= n; i++) read(A[i]);
for (int i = 0;i <= m; i++) read(B[i]);
while (lim <= (n + m)) lim <<= 1, L++;
int len = lim >> 1;
for (int i = 1;i < lim; i++)
r[i] = r[i >> 1] >> 1 | ((i & 1) ? len : 0);
E[1] = 1;
for (int i = 2;i < lim; i <<= 1) {
ll *e0 = E + i / 2, *e1 = E + i;
ll w = fpw(3, (P - 1) / (i << 1));
for (int j = 0;j < i; j += 2)
e1[j] = e0[j>>1], e1[j+1] = e1[j] * w % P;
}
dft(A), dft(B);
for (int i = 0;i < lim; i++) A[i] = (ll)A[i] * B[i] % P;
dft(A); reverse(A + 1, A + lim); int inv = fpw(lim, P - 2);
for (int i = 0;i <= n + m; i++) write(1ll * A[i] * inv % P, ' ');
return 0;
}
下面是 fft
#include <queue>
#include <vector>
#include <iostream>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <cmath>
#define MP make_pair
#define ll long long
#define fi first
#define se second
using namespace std;
int read(void) {
int x = 0; bool f = 0;
char c = getchar();
for (;!isdigit(c);c=getchar()) if (c=='-') f=1;
for (;isdigit(c);c=getchar()) x=x*10+(c^48);
if (f) x=-x; return x;
}
inline void write(int x, char ed = '
')
{
static short st[30];short tp=0;
if(x<0) putchar('-'),x=-x;
do st[++tp]=x%10,x/=10; while(x);
while(tp) putchar('0'|st[tp--]);
putchar(ed);
}
template <typename T>
inline void Mx(T &x, T y) { x < y && (x = y); }
template <typename T>
inline void Mn(T &x, T y) { x > y && (x = y); }
#define op com operator
#define con const
typedef double db;
const int N = 3000005;
const double Pi = acos(-1.0);
struct com {
db x, y;
com(db a = 0, db b = 0) : x(a) , y(b) {}
op + (con com &w) con { return com(x + w.x, y + w.y); }
op - (con com &w) con { return com(x - w.x, y - w.y); }
op * (con com &w) con { return com(x * w.x - y * w.y, x * w.y + y * w.x); }
op - (void) con { return com(-x, -y); }
com mi() con { return com(-y, x); }
}A[N], E[N];
int r[N], lim = 1, L;
void dft(com *A) {
for (int i = 1;i < lim; i++)
if (r[i] > i) swap(A[i], A[r[i]]);
if (lim >= 2)
for (int j = 0;j < lim; j += 2) {
com x = A[j], y = A[j+1];
A[j] = x + y, A[j+1] = x - y;
}
if (lim >= 4)
for (int j = 0;j < lim; j += 4) {
com x = A[j], y = A[j+2];
A[j] = x + y, A[j+2] = x - y;
x = A[j+1], y = A[j+3].mi();
A[j+1] = x + y, A[j+3] = x - y;
}
if (lim >= 8)
for (int j = 0;j < lim; j += 8) {
com x = A[j], y = A[j+4];
A[j] = x + y, A[j+4] = x - y;
x = A[j+1], y = A[j+5] * E[5];
A[j+1] = x + y, A[j+5] = x - y;
x = A[j+2], y = A[j+6].mi();
A[j+2] = x + y, A[j+6] = x - y;
x = A[j+3], y = A[j+7] * E[7];
A[j+3] = x + y, A[j+7] = x - y;
}
for (int i = 8;i < lim; i <<= 1) {
for (int j = 0;j < lim; j += (i << 1)) {
com *f = A + j, *g = f + i, *e = E + i;
for (int k = 0;k < i; k++) {
com x = f[k], y = g[k] * e[k];
f[k] = x + y, g[k] = x - y;
k++;
x = f[k], y = g[k] * e[k];
f[k] = x + y, g[k] = x - y;
}
}
}
}
int m, n;
int main() {
n = read(), m = read();
for (int i = 0;i <= n; i++) A[i].x = read();
for (int i = 0;i <= m; i++) A[i].y = read();
while (lim <= (n + m)) lim <<= 1, L++;
int len = lim >> 1;
for (int i = 1;i < lim; i++)
r[i] = r[i >> 1] >> 1 | ((i & 1) ? len : 0);
E[1] = com(1, 0);
for (int i = 2;i < lim; i <<= 1) {
com *e0 = E + i / 2, *e1 = E + i;
com w(cos(Pi / i), sin(Pi / i));
for (int j = 0;j < i; j += 2)
e1[j] = e0[j>>1], e1[j+1] = e1[j] * w;
}
dft(A);
for (int i = 0;i < lim; i++) A[i] = A[i] * A[i];
dft(A); reverse(A + 1, A + lim); lim *= 2;
for (int i = 0;i <= n + m; i++)
write((int)(A[i].y / lim + 0.5), ' ');
return 0;
}