沙雕题
打一下发现状态很少
可以矩乘优化
但是复杂度不对
考虑一个行向量乘矩阵是的
于是把的矩阵预处理出来每次乘即可
有些卡常
#include<bits/stdc++.h>
using namespace std;
#define re register
#define pb push_back
#define cs const
#define ll long long
#define pii pair<int,int>
#define fi first
#define se second
cs int RLEN=1<<20|1;
inline char gc(){
static char ibuf[RLEN],*ib,*ob;
(ib==ob)&&(ob=(ib=ibuf)+fread(ibuf,1,RLEN,stdin));
return (ib==ob)?EOF:*ib++;
}
inline int read(){
char ch=gc();
int res=0,f=1;
while(!isdigit(ch))f^=ch=='-',ch=gc();
while(isdigit(ch))res=(res+(res<<2)<<1)+(ch^48),ch=gc();
return f?res:-res;
}
inline ll readl(){
char ch=gc();
ll res=0;bool f=1;
while(!isdigit(ch))f^=ch=='-',ch=gc();
while(isdigit(ch))res=(res+(res<<2)<<1)+(ch^48),ch=gc();
return f?res:-res;
}
template<class tp>inline void chemx(tp &a,tp b){a<b?a=b:0;}
template<class tp>inline void chemn(tp &a,tp b){a>b?a=b:0;}
cs int mod=998244353;
cs ll lim=1ll*mod*mod*5;
inline int add(int a,int b){return (a+=b)>=mod?a-mod:a;}
inline int dec(int a,int b){return (a-=b)<0?a+mod:a;}
inline int mul(int a,int b){return 1ll*a*b%mod;}
inline void Add(int &a,int b){(a+=b)>=mod?a-=mod:0;}
inline void Dec(int &a,int b){(a-=b)<0?a+=mod:0;}
inline void Mul(int &a,int b){a=1ll*a*b%mod;}
inline int ksm(int a,int b,int res=1){for(;b;b>>=1,Mul(a,a))(b&1)&&(Mul(res,a),1);return res;}
inline int Inv(int x){return ksm(x,mod-2);}
cs int M=166;
int all,m,k;
ll ss[M][M];
struct mat{
int a[M][M];
inline void clear(){memset(a,0,sizeof(a));}
friend inline mat operator *(cs mat &a,cs mat &b){
mat c;
memset(ss,0,sizeof(ss));
for(int i=0;i<=all;i++)
for(int k=0;k<=all;k++)
for(int j=0;j<=all;j++)
ss[i][j]+=1ll*a.a[i][k]*b.a[k][j],(ss[i][j]>=lim)&&(ss[i][j]-=lim);
for(int i=0;i<=all;i++)
for(int j=0;j<=all;j++)
c.a[i][j]=ss[i][j]%mod;
// Add(c.a[i][j],mul(a.a[i][k],b.a[k][j]));
return c;
}
}trans,pw[70];
int inv[M],id[10][10],id2[10][10][10];
int now[M],tp[M];
inline void init_inv(){
inv[0]=inv[1]=1;
for(int i=2;i<M;i++)inv[i]=mul(mod-mod/i,inv[mod%i]);
}
inline void Mult(int *tp,int p){
static ll tmp[M];
memset(tmp,0,sizeof(ll)*(all+1));
for(int i=0;i<=all;i++)
for(int j=0;j<=all;j++){
tmp[i]+=1ll*tp[j]*pw[p].a[j][i];
if(tmp[i]>=lim)tmp[i]-=lim;
}
// Add(tmp[i],mul(tp[j],pw[p].a[j][i]));
for(int i=0;i<=all;i++)tp[i]=tmp[i]%mod;
}
inline void solve(){
ll n=readl();
memcpy(now,tp,sizeof(tp));
// for(int i=0;i<=all;i++)cout<<now[i]<<" ";puts("");
for(int i=60;i>=0;i--)if(n&(1ll<<i))Mult(now,i);
cout<<now[all]<<'
';
}
int main(){
#ifdef Stargazer
freopen("lx.cpp","r",stdin);
// freopen("my.out","w",stdout);
#endif
int T=read();m=read(),k=read();
init_inv(),trans.clear();
if(m==1){
tp[1]=1;
all=k+1;
for(int i=0;i<=k;i++){
trans.a[i][i]=inv[i+1],trans.a[i][all]=inv[i+1];
if(i)trans.a[i][i-1]=mul(i,inv[i+1]);
}
trans.a[all][all]=1;
}
else if(m==2){
for(int i=0;i<=k;i++)
for(int j=0;i+j<=k;j++)id[i][j]=all++;
tp[id[0][1]]=1;
for(int i=0;i<=k;i++)
for(int j=0;i+j<=k;j++){
int p=id[i][j];
Add(trans.a[p][p],inv[i+j+1]);
Add(trans.a[p][all],inv[i+j+1]);
if(i)Add(trans.a[p][id[i-1][j]],mul(i,inv[i+j+1]));
if(j)Add(trans.a[p][id[i+1][j-(i+j==k)]],mul(j,inv[i+j+1]));
}
trans.a[all][all]=1;
}
else if(m==3){
for(int i=0;i<=k;i++)
for(int j=0;i+j<=k;j++)
for(int p=0;i+j+p<=k;p++)
id2[i][j][p]=all++;
tp[id2[0][0][1]]=1;
for(int i=0;i<=k;i++)
for(int j=0;i+j<=k;j++)
for(int p=0;i+j+p<=k;p++){
int id=id2[i][j][p];
Add(trans.a[id][id],inv[i+j+p+1]);
Add(trans.a[id][all],inv[i+j+p+1]);
if(i)Add(trans.a[id][id2[i-1][j][p]],mul(inv[i+j+p+1],i));
if(j)Add(trans.a[id][id2[i+1][j-1][p+(i+j+p<k)]],mul(inv[i+j+p+1],j));
if(k)Add(trans.a[id][id2[i][j+1][p-(i+j+p==k)]],mul(inv[i+j+p+1],p));
}
trans.a[all][all]=1;
}
pw[0]=trans;
for(int i=1;i<=60;i++)pw[i]=pw[i-1]*pw[i-1];
while(T--)solve();
}