Description
给定 (n,m) ,求
数据范围 (1le n,mle 10^9),答案对 (998244353) 取模。
Solution
约定
(S_k(n)=sumlimits_{i=1}^{n} i^k)
若 (n>m) ,则交换 (n,m) ,保证 (nle m)。
拆上下指标
先对上下指标做一下处理,得:
考虑拆开 ([x_1<x_2][y_1<y_2]) :
回带到原来式子,得到:
(2[0le x_1<x_2][0le y_1<y_2])
(-2([0=x_1<x_2][0le y_1<y_2]+[0le x_1<x_2][0=y_1<y_2]))
(+2[0=x_1<x_2][0=y_1<y_2])
(+2[x_1>x_2][y_1<y_2])
(+2([x_1<x_2][y_1=y_2]+[x_1=x_2][y_1<y_2]))
(+[x_1=x_2][y_1=y_2])
所以分别求出这 (6) 种情形即可,下面开始推导。
1. ([0le x_1<x_2][0le y_1<y_2])
即求
- 引理1
对于所有的 (a in [0,y), bin[0,x)),((ax+by) mod (xy)) 将集合 (S={ (a,b) | 0le a<y, 0le b<x}) 映射到 (T={ k imes gcd(a,b) | 0le k< frac{ab}{gcd(a,b)}}) ,并且对于 (T) 中的每一元素恰好有 (gcd(a,b)) 个原象。
感性理解一下即可,故:
将前半部分拎出来:
其中 (G_1(n)=n^3 sumlimits_{d|n} mu(d)d)
因此
2. ([0= x_1<x_2][0le y_1<y_2])
即求
其实还有对称的 ([0le x_1<x_2][0=y_1<y_2]) ,推导类似。
3. ([0=x_1<x_2][0=y_1<y_2])
4. ([x_1>x_2][y_1<y_2])
其中 (G_2(n)=n^2sumlimits_{d|n}mu(d)d)
(G_3(n)=nsumlimits_{d|n}mu(d)d)
5. ([x_1=x_2][y_1<y_2])
可以发现 (f_5) 和 (f_2) 的式子完全一致,且符号相反,因此可以抵消。
显然对于另一半对称的式子也是如此。
6. ([x_1=x_2][y_1=y_2])
合并
答案为
因此我们只需要快速求出 (G_1(n),G_2(n),G_3(n)) 的前缀和即可。
杜教筛
考虑杜教筛,以 (G_1(n)) 为例。
(f(n)=n^3sumlimits_{d|n}mu(d)d)
记 (S(n)=sumlimits_{i=1}^{n}f(i))
则 (S(n)=sumlimits_{i=1}^{n}i^3sumlimits_{d|i}mu(d)d)
(=sumlimits_{d=1}^{n}mu(d)d^4S_3(lfloor frac{n}{d} floor))
所以我们需要杜教筛求 (f'(i)=mu(i)i^4) 的前缀和。
考虑 (g=ID^4) ,则 (h(n)=f'*g=[n==1])
故 (S(n)=1-sumlimits_{i=2}^{n}i^4 imes S(lfloor frac{n}{i} floor))
实现
时间复杂度:(O(n^{frac{5}{6}}))
空间复杂度:(O(n^{frac{2}{3}}))
#pragma GCC optimize(2)
#pragma GCC optimize(3)
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-fwhole-program")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-fstrict-overflow")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-skip-blocks")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("-funsafe-loop-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")
#include<bits/stdc++.h>
using namespace std;
#define rint register int
#define rep(i,l,r) for(rint i=l;i<=r;i++)
#define per(i,l,r) for(rint i=l;i>=r;i--)
#define ll long long
#define ull unsigned long long
#define pii pair<int,int>
#define pll pair<ll,ll>
#define pb push_back
#define fir first
#define sec second
#define mset(s,t) memset(s,t,sizeof(s))
template<typename T1,typename T2>void ckmin(T1 &a,T2 b){if(a>b)a=b;}
template<typename T1,typename T2>void ckmax(T1 &a,T2 b){if(a<b)a=b;}
template<typename T>T gcd(T a,T b){return b?gcd(b,a%b):a;}
int read(){
int x=0,f=0;
char ch=getchar();
while(!isdigit(ch))f|=ch=='-',ch=getchar();
while(isdigit(ch))x=10*x+ch-'0',ch=getchar();
return f?-x:x;
}
const int N=10000005;
const int mod=998244353;
ll qpow(ll a,ll b=mod-2){
ll res=1;
a%=mod;
while(b>0){
if(b&1)res=res*a%mod;
a=a*a%mod;
b>>=1;
}
return res;
}
const ll inv2=qpow(2);
const ll inv4=qpow(4);
const ll inv6=qpow(6);
const ll inv8=qpow(8);
const ll inv30=qpow(30);
int mu[N],g1[N],g2[N],g3[N];
bool vis[N];
int pr[N>>3],len,L=1e7;
void init(int n){
mu[1]=1;
for(register int i=2;i<=n;i++){
if(!vis[i])pr[len++]=i,mu[i]=-1;
for(register int j=0;j<len&&pr[j]*i<=n;j++){
vis[pr[j]*i]=1;
if(i%pr[j]==0)break;
mu[pr[j]*i]=-mu[i];
}
}
rep(i,1,n){
g1[i]=(g1[i-1]+1ll*i*i%mod*i%mod*i%mod*mu[i])%mod;
g2[i]=(g2[i-1]+1ll*i*i%mod*i%mod*mu[i])%mod;
g3[i]=(g3[i-1]+1ll*i*i%mod*mu[i])%mod;
}
}
ll S1(ll x){
x%=mod;
return x*(x+1)%mod*inv2%mod;
}
ll S2(ll x){
x%=mod;
return x*(x+1)%mod*(2*x+1)%mod*inv6%mod;
}
ll S3(ll x){
x%=mod;
return S1(x)*S1(x)%mod;
}
ll S4(ll x){
x%=mod;
return x*(x+1)%mod*(2*x+1)%mod*(3*x*x%mod+3*x-1)%mod*inv30%mod;
}
int n,m;
unordered_map<int,ll>Map1,Map2,Map3;
ll GG1(int n){
if(n<=L)return g1[n];
if(Map1[n])return Map1[n];
ll ans=1;
for(int i=2,j;i<=n;i=j+1){
j=n/(n/i);
ans=(ans-GG1(n/i)*(S4(j)-S4(i-1)+mod))%mod;
}
return Map1[n]=(ans%mod+mod)%mod;
}
ll GG2(ll n){
if(n<=L)return g2[n];
if(Map2[n])return Map2[n];
ll ans=1;
for(int i=2,j;i<=n;i=j+1){
j=n/(n/i);
ans=(ans-GG2(n/i)*(S3(j)-S3(i-1)+mod))%mod;
}
return Map2[n]=(ans%mod+mod)%mod;
}
ll GG3(ll n){
if(n<=L)return g3[n];
if(Map3[n])return Map3[n];
ll ans=1;
for(int i=2,j;i<=n;i=j+1){
j=n/(n/i);
ans=(ans-GG3(n/i)*(S2(j)-S2(i-1)+mod))%mod;
}
return Map3[n]=(ans%mod+mod)%mod;
}
unordered_map<int,ll>map4,map5,map6;
ll G1(int n){
if(map4[n])return map4[n];
ll ans=0;
for(int i=1,j;i<=n;i=j+1){
j=n/(n/i);
ans=(ans+(GG1(j)-GG1(i-1)+mod)*S3(n/i))%mod;
}
return map4[n]=ans;
}
ll G2(int n){
if(map5[n])return map5[n];
ll ans=0;
for(int i=1,j;i<=n;i=j+1){
j=n/(n/i);
ans=(ans+(GG2(j)-GG2(i-1)+mod)*S2(n/i))%mod;
}
return map5[n]=ans;
}
ll G3(int n){
if(map6[n])return map6[n];
ll ans=0;
for(int i=1,j;i<=n;i=j+1){
j=n/(n/i);
ans=(ans+(GG3(j)-GG3(i-1)+mod)*S1(n/i))%mod;
}
return map6[n]=ans;
}
ll f1(){
ll ans=0;
for(int i=1,j;i<=n;i=j+1){
j=min(n/(n/i),m/(m/i));
ans=(ans+S2(n/i)*S2(m/i)%mod*(G1(j)-G1(i-1)+mod))%mod;
}
ans=ans*inv2%mod;
ans=(ans+mod-S1(n)*S1(m)%mod*inv2%mod)%mod;
return (ans+mod)%mod;
}
ll f4(){
ll ans=0;
for(int i=1,j;i<=n;i=j+1){
j=min(n/(n/i),m/(m/i));
ans=(ans+S2(n/i)*S2(m/i)%mod*(G1(j)-G1(i-1)+mod))%mod;
ans=(ans-(S1(n/i)*S2(m/i)+S2(n/i)*S1(m/i))%mod*(G2(j)-G2(i-1)+mod))%mod;
ans=(ans+S1(n/i)*S1(m/i)%mod*(G3(j)-G3(i-1)+mod))%mod;
}
ans=ans*inv4%mod;
return (ans+mod)%mod;
}
int main(){
init(L);
//n=1e9,m=1e9;
scanf("%d%d",&n,&m);
if(n>m)swap(n,m);
printf("%lld
",2ll*(f1()+f4())%mod);
return 0;
}
关于对 ([0= x_1<x_2][0le y_1<y_2]) 的彩(推)蛋(导)
qwq 其实是因为我一开始没意料到能相互抵消,所以推了不少,这里就保留一下好了。
右边 (y_2) 的式子,令 (t=m/dk) ,经化简得
因此:
然后令 (T=dk) 化简一下就行了。。。