• luogu5282 【模板】快速阶乘算法


    由于巨佬 shadowice1984 卡时限,本代码已经 T 请不要粘上去交

    退役之后再写一个常数小的多项式取模吧

    一句话题意:NP问题,求N!%P

    吐槽:出题人太毒瘤...必须写任意模数NTT,而且加法取模还溢出...

    我常数太大,粘的好久以前写的多项式取模,卡了卡常才A,大家1e3 1e4不要写vector,不要参考下面的代码

    orz shadowice1984 写 (O(sqrt nlog n)) 吊打我的 (O(sqrt nlog^2 n))

    以下是 (O(sqrt nlog^2 n)) 的题解

    前置芝士: 多项式多点求值、多项式取模、多项式求逆

    出门左转你谷模板区,包教不包会

    前置芝士: 任意模数NTT

    出门左转你谷模板区,包教不包会

    本题题解

    首先我们发现p是2^31-1的

    你可以考虑像分段打表那样根号分块,把1~p分成 (O(sqrt p))

    然后你求出每一份的值来,最后边角暴力就行了

    那么怎么求呢

    你会发现第一块是 ((1*2*...*s)), 第二块是 (((s+1)*(s+2)*...*(s+s)))

    第i块就是 ((s(i-1)+1)*(s(i-1)+2)*(s(i-1)+3)*(s(i-1)+s))

    我们发现这是一个关于i的多项式,可以用分治+NTT在 (O(sqrt p log^2p))的时间内求出这个多项式

    然后你要求出第i=1...s的每一个数的值,也就是每一块数的积,你会发现是一个多项式多点求值,复杂度也是(O(sqrt plog ^2p))

    直接去隔壁模板区把多项式多点求值板子粘过来就行了

    由于出题人故意卡模数,需要把FFT换成任意模数NTT...

    然后你就在线A题了...

    代码太丑,用vector xjb写的

    #include <bits/stdc++.h>
    using namespace std;
    
    #define int long long
    
    int n, p, s;
    const int sb = 32768, sb2 = 1073741824;
    const double pi = acos(-1);
    
    int qpow(int x, int y)
    {
    	int res = 1;
    	for (x %= p; y > 0; y >>= 1, x = x * (long long)x % p)
    		if (y & 1) res = res * (long long)x % p;
    	return res;
    }
    
    struct Complex { double real, imag; Complex(double r = 0, double i = 0) : real(r), imag(i) { } };
    Complex a1[600000], a2[600000], b1[600000], b2[600000], a1b1[600000], ab[600000], a2b2[600000];
    Complex operator+(const Complex &a, const Complex &b) { return Complex(a.real + b.real, a.imag + b.imag); }
    Complex operator-(const Complex &a, const Complex &b) { return Complex(a.real - b.real, a.imag - b.imag); }
    Complex operator*(const Complex &a, const Complex &b) { return Complex(a.real * b.real - a.imag * b.imag, a.real * b.imag + b.real * a.imag); }
    Complex *w[22];
    Complex getw(int x, int y, int falg) { return Complex(w[x][y].real, falg * w[x][y].imag); }
    int *r[22];
    
    void fftinit()
    {
    	for (int i = 0; i < 19; i++)
    	{
    		w[i] = new Complex[1 << i], r[i] = new int[1 << i];
    		for (int j = 0; j < (1 << i); j++) w[i][j] = Complex(cos(pi * j / (1 << i)), sin(pi * j / (1 << i)));
    		r[i][0] = 0;
    		for (int j = 1; j < (1 << i); j++) r[i][j] = (r[i][j >> 1] >> 1) | ((j & 1) * (1 << (i - 1)));
    	}
    }
    
    void fft(Complex *a, int len, int loglen, int falg)
    {
    	Complex w, t;
    	for (int i = 0; i < len; i++) if (r[loglen][i] < i) swap(a[i], a[r[loglen][i]]);
    	for (int i = 1, logi = 0; i < len; logi++, i <<= 1) for (int j = 0; j < len; j += i << 1) for (int k = 0; k < i; k++)
    		w = getw(logi, k, falg), t = a[j + k + i] * w, a[j + k + i] = a[j + k] - t, a[j + k] = a[j + k] + t;
    	if (falg == -1) for (int i = 0; i < len; i++) a[i].real /= len, a[i].imag /= len;
    }
    
    int toint(Complex x) { return (((long long)(round(x.real) + 0.5)) % p + p) % p; }
    
    vector<int> operator*(vector<int> a, vector<int> b)
    {
    	int len = 1, loglen = 0; int sz = a.size() + b.size() - 1; while (len < sz) len <<= 1, loglen++;
    	a.resize(len), b.resize(len);
    	vector<int> res;
    	for (int i = 0; i < len; i++) a1[i] = a[i] / sb, a2[i] = a[i] % sb, b1[i] = b[i] / sb, b2[i] = b[i] % sb;
    	fft(a1, len, loglen, 1), fft(a2, len, loglen, 1), fft(b1, len, loglen, 1), fft(b2, len, loglen, 1);
    	for (int i = 0; i < len; i++) a1b1[i] = a1[i] * b1[i], ab[i] = a1[i] * b2[i] + a2[i] * b1[i], a2b2[i] = a2[i] * b2[i];
    	fft(a1b1, len, loglen, -1), fft(ab, len, loglen, -1), fft(a2b2, len, loglen, -1);
    	for (int i = 0; i < len; i++)
    		res.push_back(((toint(a1b1[i]) * (long long)sb2 % p + toint(ab[i]) * (long long)sb % p) % p + toint(a2b2[i])) % p);
    	res.resize(sz);
    	return res;
    }
    
    vector<int> operator+(vector<int> a, vector<int> b)
    {
    	vector<int> res; res.resize(max(a.size(), b.size()));
    	a.resize(res.size()); b.resize(res.size());
    	for (int i = 0; i < (int)res.size(); i++) res[i] = (a[i] + b[i]) % p;
    	return res;
    }
    
    vector<int> operator-(vector<int> a, vector<int> b)
    {
    	vector<int> res; res.resize(max(a.size(), b.size()));
    	a.resize(res.size()); b.resize(res.size());
    	for (int i = 0; i < (int)res.size(); i++) res[i] = ((a[i] - b[i]) % p + p) % p;
    	return res;
    }
    
    vector<int> poly_inv(vector<int> a)
    {
    	if (a.size() == 1) { a[0] = qpow(a[0], p - 2); return a; }
    	int n = a.size(), newsz = (n + 1) >> 1;
    	vector<int> b(a); b.resize(newsz); b = poly_inv(b);
    	vector<int> c(a * b);
    	for (int &i: c) i = (p - i) % p;
    	c[0] = (c[0] + 2) % p; a = c * b; a.resize(n); return a;
    }
    
    // vector<int> poly_r(vector<int> a) { reverse(a.begin(), a.end());  return a; }
    
    void div(vector<int> f, vector<int> g, vector<int> &q, vector<int> &r)
    {
    	int n = f.size() - 1, m = g.size() - 1;
    	vector<int> gr = g; reverse(gr.begin(), gr.end());
    	gr.resize(n - m + 1);
    	q = f;
    	reverse(q.begin(), q.end());
    	q = q * poly_inv(gr);
    	q.resize(n - m + 1);
    	reverse(q.begin(), q.end());
    	r = f - g * q;
    	r.resize(m);
    	// vector<int> gq = g * q;
    	// r.resize(m);
    	// gq.resize(m);
    	// f.resize(m);
    	// for (int i = 0; i < m; i++)
    		// r[i] = ((f[i] - gq[i]) % p + p) % p;
    }
    
    vector<int> zz[200010];
    int res[100010];
    
    vector<int> fuck(int l, int r)
    {
    	if (l == r) { vector<int> res; res.push_back(l), res.push_back(s); return res; }
    	int mid = (l + r) / 2;
    
    	return fuck(l, mid) * fuck(mid + 1, r);
    }
    
    void prework(int x, int cl, int cr)
    {
    	if (cl == cr)
    	{
    		zz[x].push_back((p - cl) % p), zz[x].push_back(1);
    		return;
    	}
    	int mid = (cl + cr) / 2;
    	prework(x * 2, cl, mid), prework(x * 2 + 1, mid + 1, cr);
    	zz[x] = zz[x * 2] * zz[x * 2 + 1];
    }
    
    void work(int x, int cl, int cr, vector<int> poly)
    {
    	if (cr - cl <= 400)
    	{
    		int sb = poly.size();
    		for (int t = cl; t <= cr; t++)
    		{
    			int tmp = 1;
    			for (int i = 0; i < sb; i++)
    				res[t] = (res[t] + tmp * (long long)poly[i] % p) % p, tmp = tmp * (long long)t % p;
    		}
    		return;
    	}
    	vector<int> tmp, rel, rer;
    	div(poly, zz[x * 2], tmp, rel);
    	div(poly, zz[x * 2 + 1], tmp, rer);
    	int mid = (cl + cr) / 2;
    	work(x * 2, cl, mid, rel), work(x * 2 + 1, mid + 1, cr, rer);
    }
    
    signed main()
    {
    	fftinit();
    	scanf("%lld%lld", &n, &p);
    	// n = 998244353, p = 2147483647;
    	s = sqrt(p) + 1;
    	vector<int> poly = fuck(1, s);
    	prework(1, 0, s);
    	// printf("prework ok
    ");
    	work(1, 0, s, poly);
    	// printf("work ok
    ");
    	int ans = 1;
    	for (int i = n / s * s + 1; i <= n; i++) ans = ans * (long long)i % p;
    	for (int i = 0; i < n / s; i++) ans = ans * (long long)res[i] % p;
    	printf("%lld
    ", ans);
    	return 0; //拜拜程序~
    }
    
  • 相关阅读:
    Codeforces Beta Round #25 (Div. 2 Only) D. Roads not only in Berland(DFS+并查集)
    HDU 3473 Minimum Sum(划分树)
    Oracle锁表与解锁 对象锁与解锁
    Java实现多个客户端聊天程序
    Javacript实现颜色渐变的效果
    生成对象的模式(PHP)
    C语言实现的简单通讯录例子
    C语言实现加密通讯录雏形
    TP中Model
    php整合Apache
  • 原文地址:https://www.cnblogs.com/oier/p/10652775.html
Copyright © 2020-2023  润新知