Description
[sum_{j=1}^nsum_{k=1}^nmu(j k)pmod{2^{32}}
]
其中(nle10^9.)
Solution
首先是莫比乌斯反演经典套路。
[egin{align*}
sum_{j=1}^nsum_{k=1}^nmu(j k)&=sum_{j=1}^nsum_{k=1}^nmu(j)mu(k)[gcd(j,k)=1]\
&=sum_{j=1}^nsum_{k=1}^nmu(j)mu(k)sum_{d|j,d|k}mu(d)\
&=sum_{d=1}^nmu(d)sum_{j=1}^{leftlfloorfrac{n}{d}
ight
floor}mu(j d)sum_{k=1}^{leftlfloorfrac{n}{d}
ight
floor}mu(k d)\
&=sum_{d=1}^nmu(d)sum_{j=1}^{leftlfloorfrac{n}{d}
ight
floor}mu(j)[gcd(j,d)=1]sum_{k=1}^{leftlfloorfrac{n}{d}
ight
floor}mu(k)[gcd(k,d)=1]
end{align*}
]
我们记
[f(n,a)=sum_{k=1}^nmu(k)[gcd(k,a)=1]
]
那么就有
[sum_{j=1}^nsum_{k=1}^nmu(j k)=sum_{d=1}^nmu(d)fleft(leftlfloorfrac{n}{d}
ight
floor,d
ight)^2
]
哪怕不考虑求(f)的开销,这也是(O(n)),必须进行优化。
观察到当(d)很大时,(leftlfloorfrac{n}{d} ight floor)很小,不妨设一个阈值(B),和端点(L=leftlfloorfrac{n}{B+1} ight floor)
[egin{align*}
sum_{j=1}^nsum_{k=1}^nmu(j k)&=sum_{d=1}^Bmu(d)fleft(leftlfloorfrac{n}{d}
ight
floor,d
ight)^2+sum_{B+1}^nmu(d)sum_{j=1}^{leftlfloorfrac{n}{d}
ight
floor}mu(j)[gcd(j,d)=1]sum_{k=1}^{leftlfloorfrac{n}{d}
ight
floor}mu(k)[gcd(k,d)=1]\
&=sum_{d=1}^Bmu(d)fleft(leftlfloorfrac{n}{d}
ight
floor,d
ight)^2+sum_{j=1}^Lsum_{k=1}^Lmu(j)mu(k)sum_{i=B+1}^{min{{n/j,n/k}}}mu(i)[gcd(i,j)=1][gcd(i,k)=1]\
&=sum_{d=1}^Bmu(d)fleft(leftlfloorfrac{n}{d}
ight
floor,d
ight)^2+2sum_{j=1}^Lsum_{k=1}^{j-1}mu(j)mu(k)sum_{i=B+1}^{leftlfloorfrac{n}{j}
ight
floor}mu(i)[gcd(i,jk)=1]\
&qquadqquadqquadqquadqquadqquadqquad+sum_{j=1}^Lmu(j)^2sum_{i=B+1}^{leftlfloorfrac{n}{j}
ight
floor}mu(i)[gcd(i,j)=1] \
&=sum_{d=1}^Bmu(d)fleft(leftlfloorfrac{n}{d}
ight
floor,d
ight)^2+2sum_{j=1}^Lsum_{k=1}^{j-1}mu(j)mu(k)left(fleft(leftlfloorfrac{n}{d}
ight
floor,jk
ight)-f(B,jk)
ight)\
&qquadqquadqquadqquadqquadqquadqquad+sum_{j=1}^Lmu(j)^2left(fleft(leftlfloorfrac{n}{d}
ight
floor,j
ight)-f(B,j)
ight)
end{align*}
]
总时间复杂度(不考虑(f))为(O(B+left(frac{n}{B} ight)^2)),显然当(B=n^frac{2}{3})时取到最小值,为(O(n^frac{2}{3}).)
那么问题就变成如何快速地求出(f(n,a).)
[egin{align*}
f(n,a)&=sum_{k=1}^nmu(k)[gcd(k,a)=1]\
&=sum_{k=1}^nmu(k)sum_{t|k,t|a}mu(t)\
&=sum_{t|a}mu(t)sum_{k=1}^{leftlfloorfrac{n}{t}
ight
floor}mu(k t)\
&=sum_{t|a}mu(t)^2sum_{k=1}^{leftlfloorfrac{n}{t}
ight
floor}mu(k)[gcd(k,t)=1]\
&=sum_{t|a}mu(t)^2fleft(leftlfloorfrac{n}{t}
ight
floor,t
ight)
end{align*}
]
用这个递推式就可以比较快速地计算(f)了。
对于边界:(f(n,1)),就是(mu)的前缀和,用min_25筛预处理出整除分块得出的所有(2sqrt{n})个位置的前缀和。
(原因:(leftlfloorfrac{leftlfloorfrac{n}{a} ight floor}{b} ight floor=leftlfloorfrac{n}{a b} ight floor))
同时,我们可以在(O(A log A))的时间内预处理出所有(n ale A)的(f(n,a)),以卡常。
至此,问题解决。总复杂度是亚线性的。
Code
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N=1e9+1,M=1e6+1;
int n,b,l,cnt,pri[M],frm[M];
bool cm[M];
uint ans,mu[M],mt[1<<8];
vector<uint> pre[M];
inline uint sqr(uint x){return x*x;}
void sieve(){
mu[1]=1;
for(int i=2;i<M;i++){
if(!cm[i]){pri[++cnt]=i;frm[i]=i;mu[i]=-1;}
for(int j=1;j<=cnt&&i*pri[j]<M;j++){
cm[i*pri[j]]=true;
frm[i*pri[j]]=pri[j];
if(i%pri[j])mu[i*pri[j]]=-mu[i];
else break;
}
}
}
namespace min_25{
int m,w[M],id1[M],id2[M];
uint g[M],s[M];
inline int id(int x){
if(x<M)return id1[x];
else return id2[n/x];
}
void work(){
for(int l=1,r;l<=n;l=r+1){
r=n/(n/l);
w[++m]=n/l;
g[m]=-w[m]+1;
if(w[m]<M)id1[w[m]]=m;
else id2[r]=m;
}
for(int j=1;j<=cnt;j++){
for(int i=1;i<=m&&(ll)pri[j]*pri[j]<=w[i];i++){
int k=id(w[i]/pri[j]);
g[i]-=g[k]+j-1;
}
}
for(int i=1;i<=m;i++)s[i]=g[i];
for(int j=cnt;j>=1;j--)for(int i=1;i<=m&&(ll)pri[j]*pri[j]<=w[i];i++)s[i]-=s[id(w[i]/pri[j])]+j;
}
inline uint smu(int x){return s[id(x)]+1;}
}
void init(){
for(int i=1;i<M;i++)pre[i].resize((M-1)/i+1);
pre[1][0]=1;
for(int i=1;i<M;i++)for(int j=1;j<=(M-1)/i;j++)pre[i][j]=(i<=j?pre[i][j-i]:pre[j][i]);
for(int i=1;i<M;i++)for(int j=0;j<=(M-1)/i;j++)pre[i][j]=pre[i][j-1]+mu[j]*pre[i][j];
}
uint f(int n,int a){
if((ll)a*n<=(M-1))return pre[a][n];
if(n==1||a==1)return min_25::smu(n);
vector<int>sum;
while(a!=1){
int p=frm[a];
sum.push_back(p);
while(a%p==0)a/=p;
}
int sz=sum.size();
mt[0]=1;
uint ans=min_25::smu(n);
for(int i=1;i<(1<<sz);i++){
int x=i&(-i);
mt[i]=mt[i^x]*sum[__builtin_ctz(i)];
ans+=f(n/mt[i],mt[i]);
}
return ans;
}
int main(){
sieve();
scanf("%d",&n);
min_25::work();
init();
b=n/(n/min(n,1000000)+1);
l=n/(b+1);
for(int i=1;i<=b;i++)if(mu[i])ans+=mu[i]*sqr(f(n/i,i));
for(int i=1;i<=l;i++)for(int j=1;j<i;j++)if(mu[i]&&mu[j])ans+=2*mu[i]*mu[j]*(f(n/i,i*j)-f(b,i*j));
for(int i=1;i<=l;i++)if(mu[i])ans+=sqr(mu[i])*(f(n/i,i)-f(b,i));
printf("%u
",ans);
}