Link
考虑枚举每一对(a_i,a_j)然后计算贡献,此时序列被分为了(?a_i?a_j?)三部分,交换(k)次后只有(AB,BA,A?,?A,B?,?B,??)总共七种情况,那么我们就可以矩阵快速幂计算出概率然后计算贡献了。
然后枚举(j)计算所有(i)的贡献,用BIT维护即可。
#include<cstdio>
#include<cctype>
#include<cstring>
namespace IO
{
char ibuf[(1<<21)+1],*iS,*iT;
char Get(){return (iS==iT? (iT=(iS=ibuf)+fread(ibuf,1,(1<<21)+1,stdin),(iS==iT? EOF:*iS++)):*iS++);}
int read(){int x=0,c=Get();while(!isdigit(c))c=Get();while(isdigit(c))x=x*10+c-48,c=Get();return x;}
}
using IO::read;
const int N=1000007,P=1000000007;
int mod(int x){return x+(x>>31&P);}
int inc(int a,int b){return mod(a+b-P);}
int dec(int a,int b){return mod(a-b);}
int mul(int a,int b){return 1ll*a*b%P;}
int pow(int a,int k){int r=1;for(;k;k>>=1,a=mul(a,a))if(k&1)r=mul(a,r);return r;}
int n,k,a[N];
struct matrix{int a[7][7];matrix(){memset(a,0,sizeof a);}int*operator[](int x){return a[x];}}E,I;
matrix operator*(matrix&a,matrix&b)
{
matrix c;
for(int i=0;i<7;++i) for(int j=0;j<7;++j) for(int k=0;k<7;++k) c[i][j]=inc(c[i][j],mul(a[i][k],b[k][j]));
return c;
}
struct BIT
{
int c[N];BIT(){memset(c,0,sizeof c);}
void add(int p,int v){for(;p;p^=p&-p)c[p]=inc(c[p],v);}
int ask(int p){int r=0;for(;p<=n;p+=p&-p)r=inc(r,c[p]);return r;}
}t[3];
void upd(int p,int a,int b,int c,int d,int e,int f,int g){E[p][0]+=a,E[p][1]+=b,E[p][2]+=c,E[p][3]+=d,E[p][4]+=e,E[p][5]+=f,E[p][6]+=g;}
int main()
{
n=read(),k=read();int in=pow(n-2,P-2),i2=500000004,s1=0,s2=0,ans=0;
for(int i=0;i<7;++i) I[i][i]=1,E[i][i]=1ll*(n-2)*(n-3)/2%P;
upd(0,0,n-2,0,n-2,0,1,0);
upd(1,1,n-3,1,0,n-3,0,1);
upd(2,0,1,n-3,1,n-3,1,0);
upd(3,1,0,1,n-3,n-3,0,1);
upd(4,0,1,1,1,2*n-7,0,1);
upd(5,1,0,n-2,0,0,0,n-2);
upd(6,0,1,0,1,n-3,1,n-3);
for(;k;k>>=1,E=E*E)if(k&1)I=I*E;
for(int i=1;i<=n;++i) a[i]=read();
for(int i=1;i<=n;++i)
{
int x=t[0].ask(a[i]),X=dec(i-1,x),y=t[1].ask(a[i]),Y=dec(s1,y),z=t[2].ask(a[i]),Z=dec(s2,z),now=0;
ans=inc(ans,mul(x,I[0][0]));
ans=inc(ans,mul(X,I[0][5]));
now=inc(now,mul(inc(y,Z),I[0][1]));
now=inc(now,mul(inc(Y,z),I[0][2]));
now=inc(now,mul(inc(mul(i-2,x),mul(n-i,X)),I[0][3]));
now=inc(now,mul(inc(mul(i-2,X),mul(n-i,x)),I[0][6]));
s1=inc(s1,n-i-1),s2=inc(s2,i-1);
t[0].add(a[i],1);
t[1].add(a[i],n-i-1);
t[2].add(a[i],i-1);
ans=inc(ans,mul(now,in));
}
ans=inc(ans,1ll*n*(n-1)/2%P*I[0][4]%P*i2%P);
printf("%d",ans);
}