• 任意模数NTT和FFT的玄学优化学习笔记


    本来一直都是写(7)次的(MTT)的……然后被(shadowice)巨巨调教了一通之后只好去学一下(4)次的了……

    简单来说就是我们现在需要处理一类模数不为(NTT)模数的情况

    这里是板子

    三模(NTT)

    跑的很慢而且我也不会,这里就不说了

    拆系数(FFT)

    两个多项式(P(z),Q(z)),我们把它们的系数拆成

    [A(z)=sum_{i=0}^infty (P_i>>15)z^i,B(z)=sum_{i=0}^infty (P_i&32767)z^i ]

    [C(z)=sum_{i=0}^infty (Q_i>>15)z^i,D(z)=sum_{i=0}^infty (Q_i&32767)z^i ]

    那么我们只要计算出((A imes C)<<30),((A imes D+B imes C)<<15),((B imes D)),然后把它们的系数加起来就可以了

    //minamoto
    #include<bits/stdc++.h>
    #define R register
    #define ll long long
    #define fp(i,a,b) for(R int i=a,I=b+1;i<I;++i)
    #define fd(i,a,b) for(R int i=a,I=b-1;i>I;--i)
    #define go(u) for(int i=head[u],v=e[i].v;i;i=e[i].nx,v=e[i].v)
    using namespace std;
    char buf[1<<21],*p1=buf,*p2=buf;
    inline char getc(){return p1==p2&&(p2=(p1=buf)+fread(buf,1,1<<21,stdin),p1==p2)?EOF:*p1++;}
    int read(){
        R int res,f=1;R char ch;
        while((ch=getc())>'9'||ch<'0')(ch=='-')&&(f=-1);
        for(res=ch-'0';(ch=getc())>='0'&&ch<='9';res=res*10+ch-'0');
        return res*f;
    }
    char sr[1<<21],z[20];int K=-1,Z=0;
    inline void Ot(){fwrite(sr,1,K+1,stdout),K=-1;}
    void print(R int x){
        if(K>1<<20)Ot();if(x<0)sr[++K]='-',x=-x;
        while(z[++Z]=x%10+48,x/=10);
        while(sr[++K]=z[Z],--Z);sr[++K]=' ';
    }
    const int N=5e5+5;const double Pi=acos(-1.0);
    struct cp{
        double x,y;
        cp(double xx=0,double yy=0){x=xx,y=yy;}
        inline cp operator +(cp b)const{return cp(x+b.x,y+b.y);}
        inline cp operator -(cp b)const{return cp(x-b.x,y-b.y);}
        inline cp operator *(cp b)const{return cp(x*b.x-y*b.y,x*b.y+y*b.x);}
        inline cp operator *(const double &b)const{return cp(x*b,y*b);}
    }A[N],B[N],C[N],D[N],H[N],F[N],G[N],w[N];
    int r[N],lim=1,l,n,m,P,x;
    void FFT(cp *A,int ty){
        fp(i,0,lim-1)if(i<r[i])swap(A[i],A[r[i]]);
        for(R int mid=1;mid<lim;mid<<=1)
            for(R int j=0;j<lim;j+=(mid<<1))
                for(R int k=0;k<mid;++k){
                    cp x=A[j+k],y=w[mid+k]*A[j+k+mid];
                    A[j+k]=x+y,A[j+k+mid]=x-y;
                }
        if(ty==-1){
            reverse(A+1,A+lim);
            double k=1.0/lim;fp(i,0,lim-1)A[i]=A[i]*k;
        }
    }
    int main(){
    //	freopen("testdata.in","r",stdin);
        n=read(),m=read(),P=read();
        while(lim<=n+m)lim<<=1,++l;
        fp(i,0,lim-1)r[i]=(r[i>>1]>>1)|((i&1)<<(l-1));
        for(R int i=1;i<lim;i<<=1)fp(k,0,i-1)w[i+k]=cp(cos(Pi*k/i),sin(Pi*k/i));
        fp(i,0,n)x=read(),A[i].x=x>>15,B[i].x=x&32767;
        fp(i,0,m)x=read(),C[i].x=x>>15,D[i].x=x&32767;
        FFT(A,1),FFT(B,1),FFT(C,1),FFT(D,1);
        fp(i,0,lim-1)
            F[i]=A[i]*C[i],G[i]=A[i]*D[i]+B[i]*C[i],H[i]=B[i]*D[i];
        FFT(F,-1),FFT(G,-1),FFT(H,-1);
        fp(i,0,n+m)print((((ll)(F[i].x+0.5)%P<<30)+((ll)(G[i].x+0.5)<<15)+((ll)(H[i].x+0.5)))%P);
        return Ot(),0;
    }
    

    (FFT)的优化

    [egin{aligned} P(x)=A(x)+iB(x) \ Q(x)=A(x)-iB(x) end{aligned} ]

    (P'[k])(Q'[k])分别表示(P(x))(Q(x))进行(DFT)的序列

    (P'[k]=P(omega_n^k),Q'[k]=Q(omega_n^k)),即代入(n)次单位根之后的点值

    显然有

    [A'[k]={P'[k]+Q'[k]over 2} ]

    [B'[k]={P'[k]-Q'[k]over 2i} ]

    这有啥用啊不还是两次(FFT)么……

    但实际上我们是可以通过(P'(x))求出(Q'(x))

    推倒什么的太长了就直接拉过来好了

    ( ext{conj}(x))表示(x)的共轭复数(实部相等,虚部相反),(A_i)表示(A(x))的第(i)次项系数

    [egin{aligned} P'[k] &= A(omega_{n}^{k}) + i B(omega_{n}^{k}) \ & = sum_{j=0}^{n-1} A_{j} omega_{n}^{jk} + i B_{j} omega_{n}^{jk} \ & = sum_{j=0}^{n-1} (A_{j} + i B_{j}) left(cos left(frac{2 pi jk}{n} ight) + i sin left(frac{2 pi jk}{n} ight) ight) \ \ Q'[k] &= A(omega_{n}^{k}) - i B(omega_{n}^{k}) \ & = sum_{j=0}^{n-1} A_{j} omega_{n}^{jk} - i B_{j} omega_{n}^{jk} \ & = sum_{j=0}^{n-1} (A_{j} - i B_{j}) left(cos left(frac{2 pi jk}{n} ight) + i sin left(frac{2 pi jk}{n} ight) ight) \ & = sum_{j=0}^{n-1} left(A_{j} cos left(frac{2 pi jk}{n} ight) + B_{j} sin left(frac{2 pi jk}{n} ight) ight) + i left(A_{j} sin left(frac{2 pi jk}{n} ight) - B_{j} cos left(frac{2 pi jk}{n} ight) ight) \ & = ext{conj} left( sum_{j=0}^{n-1} left(A_{j} cos left(frac{2 pi jk}{n} ight) + B_{j} sin left(frac{2 pi jk}{n} ight) ight) - i left(A_{j} sin left(frac{2 pi jk}{n} ight) - B_{j} cos left(frac{2 pi jk}{n} ight) ight) ight) \ & = ext{conj} left( sum_{j=0}^{n-1} left(A_{j} cos left(frac{-2 pi jk}{n} ight) - B_{j} sin left(frac{-2 pi jk}{n} ight) ight) + i left(A_{j} sin left(frac{-2 pi jk}{n} ight) + B_{j} cos left(frac{-2 pi jk}{n} ight) ight) ight) \ & = ext{conj} left( sum_{j=0}^{n-1} (A_{j} + i B_{j}) left(cos left(frac{-2 pi jk}{n} ight) + i sin left(frac{-2 pi jk}{n} ight) ight) ight) \ & = ext{conj} left( sum_{j=0}^{n-1} (A_{j} + i B_{j}) omega_{n}^{-jk} ight) \ & = ext{conj} left( sum_{j=0}^{n-1} (A_{j} + i B_{j}) omega_{n}^{(n-k)j} ight) \ & = ext{conj} (P'[n-k]) end{aligned} ]

    注意这里是在模(x^n)意义下的,所以特殊判断(Q'[0]= ext{conj}(P'[n])= ext{conj}(P'[0]))

    我们就可以通过(P(x))得到(Q(x)),只要一次(FFT)就可以了

    (IDFT)的话,我们只要把(P'(x))(IDFT)一下,实部和虚部就分别对应(A(x))(B(x))

    实际上这个东西可以在任意两个多项式做乘法的时候用到,可以优化到(2)(FFT)(然而(NTT)模数下(FFT)(NTT)慢的不是一点点……)

    具体细节可以看代码理解

    //minamoto
    #include<bits/stdc++.h>
    #define R register
    #define ll long long
    #define fp(i,a,b) for(R int i=(a),I=(b)+1;i<I;++i)
    #define fd(i,a,b) for(R int i=(a),I=(b)-1;i>I;--i)
    #define go(u) for(int i=head[u],v=e[i].v;i;i=e[i].nx,v=e[i].v)
    using namespace std;
    char buf[1<<21],*p1=buf,*p2=buf;
    inline char getc(){return p1==p2&&(p2=(p1=buf)+fread(buf,1,1<<21,stdin),p1==p2)?EOF:*p1++;}
    int read(){
        R int res,f=1;R char ch;
        while((ch=getc())>'9'||ch<'0')(ch=='-')&&(f=-1);
        for(res=ch-'0';(ch=getc())>='0'&&ch<='9';res=res*10+ch-'0');
        return res*f;
    }
    char sr[1<<21],z[20];int C=-1,Z=0;
    inline void Ot(){fwrite(sr,1,C+1,stdout),C=-1;}
    void print(R int x){
        if(C>1<<20)Ot();if(x<0)sr[++C]='-',x=-x;
        while(z[++Z]=x%10+48,x/=10);
        while(sr[++C]=z[Z],--Z);sr[++C]=' ';
    }
    const int N=(1<<18)+5;const double Pi=acos(-1.0);
    struct cp{
    	double x,y;
    	inline cp(){}
    	inline cp(R double xx,R double yy):x(xx),y(yy){}
    	inline cp operator +(const cp &b)const{return cp(x+b.x,y+b.y);}
    	inline cp operator -(const cp &b)const{return cp(x-b.x,y-b.y);}
    	inline cp operator *(const cp &b)const{return cp(x*b.x-y*b.y,x*b.y+y*b.x);}
    	inline cp operator *(const double &b)const{return cp(x*b,y*b);}
    	inline cp operator ~()const{return cp(x,-y);}
    }w[2][N],a[N],b[N],f[N],g[N];
    int r[N],n,m,lim,d,P;double iv;
    void FFT(cp *A,int ty){
    	fp(i,0,lim-1)if(i<r[i])swap(A[i],A[r[i]]);
    	cp t;
    	for(R int mid=1;mid<lim;mid<<=1)
    		for(R int j=0;j<lim;j+=(mid<<1))
    			fp(k,0,mid-1)
    				A[j+k+mid]=A[j+k]-(t=w[ty][mid+k]*A[j+k+mid]),
    				A[j+k]=A[j+k]+t;
    	if(!ty)fp(i,0,lim-1)A[i]=A[i]*iv;
    }
    int main(){
    //	freopen("testdata.in","r",stdin);
    	n=read(),m=read(),P=read();
    	lim=1,d=0;while(lim<=n+m)lim<<=1,++d;iv=1.0/lim;
    	fp(i,0,lim-1)r[i]=(r[i>>1]>>1)|((i&1)<<(d-1));
    	for(R int i=0,x;i<=n;++i)x=read(),f[i]=cp(x>>15,x&32767);
    	for(R int i=0,x;i<=m;++i)x=read(),g[i]=cp(x>>15,x&32767);
    	for(R int i=1;i<lim;i<<=1)fp(k,0,i-1)
    		w[1][i+k]=cp(cos(Pi*k/i),sin(Pi*k/i)),w[0][i+k]=cp(cos(Pi*k/i),-sin(Pi*k/i));
    	fp(i,n+1,lim-1)f[i]=cp(0,0);fp(i,m+1,lim-1)g[i]=cp(0,0);
    	FFT(f,1),FFT(g,1);
    	fp(i,0,lim-1){
    		static cp q,f0,f1,g0,g1;
    		q=~f[i?lim-i:0],f0=(f[i]-q)*cp(0,-0.5),f1=(f[i]+q)*0.5,
    		q=~g[i?lim-i:0],g0=(g[i]-q)*cp(0,-0.5),g1=(g[i]+q)*0.5;
    		a[i]=f1*g1,b[i]=f0*g1+f1*g0+f0*g0*cp(0,1);
    	}
    	FFT(a,0),FFT(b,0);
    	fp(i,0,n+m)print((((ll)(a[i].x+0.5)%P<<30)+((ll)(b[i].x+0.5)<<15)+((ll)(b[i].y+0.5)))%P);
    	return Ot(),0;
    }
    

    顺便放一下多项式求逆的好了

    板子题

    //minamoto
    #include<bits/stdc++.h>
    #define R register
    #define ll long long
    #define fp(i,a,b) for(R int i=(a),I=(b)+1;i<I;++i)
    #define fd(i,a,b) for(R int i=(a),I=(b)-1;i>I;--i)
    #define go(u) for(int i=head[u],v=e[i].v;i;i=e[i].nx,v=e[i].v)
    using namespace std;
    char buf[1<<21],*p1=buf,*p2=buf;
    inline char getc(){return p1==p2&&(p2=(p1=buf)+fread(buf,1,1<<21,stdin),p1==p2)?EOF:*p1++;}
    int read(){
        R int res,f=1;R char ch;
        while((ch=getc())>'9'||ch<'0')(ch=='-')&&(f=-1);
        for(res=ch-'0';(ch=getc())>='0'&&ch<='9';res=res*10+ch-'0');
        return res*f;
    }
    char sr[1<<21],z[20];int C=-1,Z=0;
    inline void Ot(){fwrite(sr,1,C+1,stdout),C=-1;}
    void print(R int x){
        if(C>1<<20)Ot();if(x<0)sr[++C]='-',x=-x;
        while(z[++Z]=x%10+48,x/=10);
        while(sr[++C]=z[Z],--Z);sr[++C]=' ';
    }
    const int N=(1<<18)+5,P=1e9+7;const double Pi=acos(-1.0);
    inline int add(R int x,R int y){return x+y>=P?x+y-P:x+y;}
    inline int dec(R int x,R int y){return x-y<0?x-y+P:x-y;}
    inline int mul(R int x,R int y){return 1ll*x*y-1ll*x*y/P*P;}
    int ksm(R int x,R int y){
    	R int res=1;
    	for(;y;y>>=1,x=mul(x,x))(y&1)?res=mul(res,x):0;
    	return res;
    }
    struct cp{
    	double x,y;
    	inline cp(){}
    	inline cp(R double xx,R double yy):x(xx),y(yy){}
    	inline cp operator +(const cp &b)const{return cp(x+b.x,y+b.y);}
    	inline cp operator -(const cp &b)const{return cp(x-b.x,y-b.y);}
    	inline cp operator *(const cp &b)const{return cp(x*b.x-y*b.y,x*b.y+y*b.x);}
    	inline cp operator *(const double &b)const{return cp(x*b,y*b);}
    	inline cp operator ~()const{return cp(x,-y);}
    }w[2][N];
    int r[21][N],lg[N],lim,d;double inv[21];
    void Pre(){
    	fp(d,1,18){
    		fp(i,1,(1<<d)-1)r[d][i]=(r[d][i>>1]>>1)|((i&1)<<(d-1));
    		lg[1<<d]=d,inv[d]=1.0/(1<<d);
    	}
    	for(R int i=1;i<262144;i<<=1)fp(k,0,i-1)
    		w[1][i+k]=cp(cos(Pi*k/i),sin(Pi*k/i)),w[0][i+k]=cp(cos(Pi*k/i),-sin(Pi*k/i));
    }
    void FFT(cp *A,int ty){
    	fp(i,0,lim-1)if(i<r[d][i])swap(A[i],A[r[d][i]]);
    	cp t;
    	for(int mid=1;mid<lim;mid<<=1)
    		for(R int j=0;j<lim;j+=(mid<<1))
    			fp(k,0,mid-1)
    				A[j+k+mid]=A[j+k]-(t=w[ty][mid+k]*A[j+k+mid]),
    				A[j+k]=A[j+k]+t;
    	if(!ty)fp(i,0,lim-1)A[i]=A[i]*inv[d];
    }
    void MTT(int *a,int *b,int len,int *c){
    	static cp f[N],g[N],p[N],q[N];
    	lim=(len<<1),d=lg[lim];
    	fp(i,0,len-1)f[i]=cp(a[i]>>15,a[i]&32767),g[i]=cp(b[i]>>15,b[i]&32767);
    	fp(i,len,lim-1)f[i]=g[i]=cp(0,0);
    	FFT(f,1),FFT(g,1);
    	fp(i,0,lim-1){
    		cp t,f0,f1,g0,g1;
    		t=~f[i?lim-i:0],f0=(f[i]-t)*cp(0,-0.5),f1=(f[i]+t)*0.5;
    		t=~g[i?lim-i:0],g0=(g[i]-t)*cp(0,-0.5),g1=(g[i]+t)*0.5;
    		p[i]=f1*g1,q[i]=f1*g0+f0*g1+f0*g0*cp(0,1);
    	}
    	FFT(p,0),FFT(q,0);
    	fp(i,0,lim-1)c[i]=(((ll)(p[i].x+0.5)%P<<30)+((ll)(q[i].x+0.5)<<15)+((ll)(q[i].y+0.5)))%P;
    }
    void Inv(int *a,int *b,int len){
    	if(len==1)return b[0]=ksm(a[0],P-2),void();
    	Inv(a,b,len>>1);
    	static int c[N],d[N];
    	MTT(a,b,len,c),MTT(c,b,len,d);
    	fp(i,0,len-1)b[i]=dec(add(b[i],b[i]),d[i]);
    }
    int A[N],B[N],n,len;
    int main(){
    //	freopen("testdata.in","r",stdin);
    	n=read(),Pre();
    	len=1;while(len<n)len<<=1;
    	fp(i,0,n-1)A[i]=read();
    	Inv(A,B,len);
    	fp(i,0,n-1)print(B[i]);
    	return Ot(),0;
    }
    
  • 相关阅读:
    算法图解-散列表
    算法图解-笔记
    ERROR:cannot read property 'getAttribute' of null 报错处理
    Error: Cannot find module 'node-sass' 报错处理
    一、Spring Cloud概述
    十、ActiveMQ多节点集群
    九、ActiveMQ的消息存储和持久化
    八、ActiveMQ的传输协议
    七、SpringBoot整合ActiveMQ
    六、Spring整合ActiveMQ
  • 原文地址:https://www.cnblogs.com/bztMinamoto/p/10653220.html
Copyright © 2020-2023  润新知