• 轻卡常NTT


    普通版大概 2s 左右
    目前洛谷上能稳定跑到1s,更快可能还要再卡

    #include <queue>
    #include <vector>
    #include <iostream>
    #include <cstdio>
    #include <cstring>
    #include <algorithm>
    #include <cmath>
    #define MP make_pair
    #define ll long long
    #define fi first
    #define se second
    using namespace std;
    
    template <typename T>
    void read(T &x) {
        x = 0; bool f = 0;
        char c = getchar();
        for (;!isdigit(c);c=getchar()) if (c=='-') f=1;
        for (;isdigit(c);c=getchar()) x=x*10+(c^48);
        if (f) x=-x;
    }
    
    template<typename F>
    inline void write(F x, char ed = '
    ')
    {
    	static short st[30];short tp=0;
    	if(x<0) putchar('-'),x=-x;
    	do st[++tp]=x%10,x/=10; while(x);
    	while(tp) putchar('0'|st[tp--]);
    	putchar(ed);
    }
    
    template <typename T>
    inline void Mx(T &x, T y) { x < y && (x = y); }
    
    template <typename T>
    inline void Mn(T &x, T y) { x > y && (x = y); }
    
    #define op com operator 
    #define con const
    typedef double db;
    const int N = 3000005;
    const int P = 998244353;
    int A[N], B[N]; ll E[N];
    int r[N], lim = 1, L; 
    
    inline int add(int x, int y) {
    	return x + y >= P ? x + y - P : x + y;
    }
    
    void dft(int *A) {
    	for (int i = 1;i < lim; i++)
    		if (r[i] > i) swap(A[i], A[r[i]]);
    	if (lim >= 2) 
    	for (int j = 0;j < lim; j += 2) {
    		int x = A[j], y = A[j+1];
    		A[j] = add(x, y), A[j+1] = add(x, P - y);
    	}
    	if (lim >= 4)
    	for (int j = 0;j < lim; j += 4) {
    		int x = A[j], y = A[j+2];
    		A[j] = add(x, y), A[j+2] = add(x , P - y);
    		x = A[j+1], y = E[3] * A[j+3] % P;
    		A[j+1] = add(x, y), A[j+3] = add(x , P - y);
    	}
    	if (lim >= 8)
    	for (int j = 0;j < lim; j += 8) {
    		int x = A[j], y = A[j+4];
    		A[j] = add(x, y), A[j+4] = add(x , P - y);
    		x = A[j+1], y = E[5] * A[j+5] % P;
    		A[j+1] = add(x, y), A[j+5] = add(x , P - y);
    		x = A[j+2], y = E[6] * A[j+6] % P;
    		A[j+2] = add(x, y), A[j+6] = add(x , P - y);
    		x = A[j+3], y = E[7] * A[j+7] % P;
    		A[j+3] = add(x, y), A[j+7] = add(x , P - y);
    	}
    	for (int i = 8;i < lim; i <<= 1) {
    		for (int j = 0;j < lim; j += (i << 1)) {
    			int *f = A + j, *g = f + i; ll *e = E + i;
    			for (int k = 0;k < i; k++) {
    				int x = f[k], y = e[k] * g[k] % P;
    				f[k] = add(x, y), g[k] = add(x , P - y);
    				k++;
    				x = f[k], y = e[k] * g[k] % P;
    				f[k] = add(x, y), g[k] = add(x , P - y);
    			}
    		}
    	}
    }
    
    ll fpw(ll x, ll mi) {
    	ll res = 1;
    	for (; mi; mi >>= 1, x = x * x % P)
    		if (mi & 1) res = res * x % P;
    	return res;
    }
    
    int m, n;
    int main() {
    	read(n), read(m);
    	for (int i = 0;i <= n; i++) read(A[i]);
    	for (int i = 0;i <= m; i++) read(B[i]);
    	while (lim <= (n + m)) lim <<= 1, L++;
    	int len = lim >> 1;
    	for (int i = 1;i < lim; i++)
    		r[i] = r[i >> 1] >> 1 | ((i & 1) ? len : 0);
    	E[1] = 1;
    	for (int i = 2;i < lim; i <<= 1) {
    		ll *e0 = E + i / 2, *e1 = E + i;
    		ll w = fpw(3, (P - 1) / (i << 1));
    		for (int j = 0;j < i; j += 2) 
    			e1[j] = e0[j>>1], e1[j+1] = e1[j] * w % P;
    	}
    	dft(A), dft(B);
    	for (int i = 0;i < lim; i++) A[i] = (ll)A[i] * B[i] % P;
    	dft(A); reverse(A + 1, A + lim); int inv = fpw(lim, P - 2);
    	for (int i = 0;i <= n + m; i++) write(1ll * A[i] * inv % P, ' ');
    	return 0;
    }
    

    下面是 fft

    #include <queue>
    #include <vector>
    #include <iostream>
    #include <cstdio>
    #include <cstring>
    #include <algorithm>
    #include <cmath>
    #define MP make_pair
    #define ll long long
    #define fi first
    #define se second
    using namespace std;
    
    int read(void) {
        int x = 0; bool f = 0;
        char c = getchar();
        for (;!isdigit(c);c=getchar()) if (c=='-') f=1;
        for (;isdigit(c);c=getchar()) x=x*10+(c^48);
        if (f) x=-x; return x;
    }
    
    inline void write(int x, char ed = '
    ')
    {
    	static short st[30];short tp=0;
    	if(x<0) putchar('-'),x=-x;
    	do st[++tp]=x%10,x/=10; while(x);
    	while(tp) putchar('0'|st[tp--]);
    	putchar(ed);
    }
    
    template <typename T>
    inline void Mx(T &x, T y) { x < y && (x = y); }
    
    template <typename T>
    inline void Mn(T &x, T y) { x > y && (x = y); }
    
    #define op com operator 
    #define con const
    typedef double db;
    const int N = 3000005;
    const double Pi = acos(-1.0);
    struct com {
    	db x, y;
    	com(db a = 0, db b = 0) : x(a) , y(b) {}
    	op + (con com &w) con { return com(x + w.x, y + w.y); }
    	op - (con com &w) con { return com(x - w.x, y - w.y); }
    	op * (con com &w) con { return com(x * w.x - y * w.y, x * w.y + y * w.x); }
    	op - (void) con { return com(-x, -y); }
    	com mi() con { return com(-y, x); }
    }A[N], E[N];
    
    int r[N], lim = 1, L; 
    void dft(com *A) {
    	for (int i = 1;i < lim; i++)
    		if (r[i] > i) swap(A[i], A[r[i]]);
    	if (lim >= 2) 
    	for (int j = 0;j < lim; j += 2) {
    		com x = A[j], y = A[j+1];
    		A[j] = x + y, A[j+1] = x - y;
    	}
    	if (lim >= 4)
    	for (int j = 0;j < lim; j += 4) {
    		com x = A[j], y = A[j+2];
    		A[j] = x + y, A[j+2] = x - y;
    		x = A[j+1], y = A[j+3].mi();
    		A[j+1] = x + y, A[j+3] = x - y;
    	}
    	if (lim >= 8)
    	for (int j = 0;j < lim; j += 8) {
    		com x = A[j], y = A[j+4];
    		A[j] = x + y, A[j+4] = x - y;
    		x = A[j+1], y = A[j+5] * E[5];
    		A[j+1] = x + y, A[j+5] = x - y;
    		x = A[j+2], y = A[j+6].mi();
    		A[j+2] = x + y, A[j+6] = x - y;
    		x = A[j+3], y = A[j+7] * E[7];
    		A[j+3] = x + y, A[j+7] = x - y;
    	}
    	for (int i = 8;i < lim; i <<= 1) {
    		for (int j = 0;j < lim; j += (i << 1)) {
    			com *f = A + j, *g = f + i, *e = E + i;
    			for (int k = 0;k < i; k++) {
    				com x = f[k], y = g[k] * e[k];
    				f[k] = x + y, g[k] = x - y;
    				k++;
    				x = f[k], y = g[k] * e[k];
    				f[k] = x + y, g[k] = x - y;
    			}
    		}
    	}
    }
    
    int m, n;
    int main() {
    	n = read(), m = read();
    	for (int i = 0;i <= n; i++) A[i].x = read();
    	for (int i = 0;i <= m; i++) A[i].y = read();
    	while (lim <= (n + m)) lim <<= 1, L++;
    	int len = lim >> 1;
    	for (int i = 1;i < lim; i++)
    		r[i] = r[i >> 1] >> 1 | ((i & 1) ? len : 0);
    	E[1] = com(1, 0);
    	for (int i = 2;i < lim; i <<= 1) {
    		com *e0 = E + i / 2, *e1 = E + i;
    		com w(cos(Pi / i), sin(Pi / i));
    		for (int j = 0;j < i; j += 2) 
    			e1[j] = e0[j>>1], e1[j+1] = e1[j] * w;
    	}
    	dft(A);
    	for (int i = 0;i < lim; i++) A[i] = A[i] * A[i];
    	dft(A); reverse(A + 1, A + lim); lim *= 2;
    	for (int i = 0;i <= n + m; i++)
    		write((int)(A[i].y / lim + 0.5), ' ');
    	return 0;
    }
    
  • 相关阅读:
    文件操作小练习
    阶段练习1
    copy小练习
    小练习
    str 小列题
    条款50:使用自定义的new以及delete的时机会
    条款49:了解new-handle行为
    简单的说一下:tarits技法就是一种模板元编程,起可以将本来处于运行期的事拉到编译期来做,增加了运行效率。 看以非模板元编程的例子,就是前面的那个例子:
    条款47:请使用traits class表示类型信息
    条款46:需要类型转换的时候请为模板定义非成员函数
  • 原文地址:https://www.cnblogs.com/Hs-black/p/13414751.html
Copyright © 2020-2023  润新知