\(\text{Solution}\)
当 \(m=2\) 时,\(ans=2n\sum a_{i,j}\)
当 \(m=3\) 时
当然先套路地考虑某一行的贡献,记为第 \(x\) 行
则当取 \(max\) 时有 \(a_{x,i}+a_{x,j}>a_{y,i}+a_{y,j},a_{x,i}+a_{x,j}>a_{z,i}+a_{z,j}\)
即 \(a_{x,i}-a_{y,i}>a_{y,j}-a_{x,j},a_{x,i}-a_{z,i}>a_{z,j}-a_{x,j}\)
把第 \(i\) 列看作二元组 \((a_{y,j}-a_{x,j},a_{z,j}-a_{x,j})\),询问则是 \(<(a_{x,i}-a_{y,i},a_{x,i}-a_{z,i})\) 的数量
二位偏序,树状数组即可
当 \(m=4\) 时,可以同 \(m=3\) 那样,三位偏序 \(cdq\) 分治
当然 \(\text{Min-max}\) 容斥更妙,因为
\[\min_{i\in S} x_i = \sum_{T\subseteq S} (-1)^{|T|-1} \max_{i\in T} x_i
\]
\(\max\) 同理
把 \(\min+\max\) 展开,则可以发现 \(2ans=\sum_{T\subseteq S} (-1)^{|T|-1} \max_{i\in T} x_i + \min_{i\in T} x_i\)
于是沿用 \(m=2\) 和 \(m=3\) 的做法即可
注意到这里用了严格大于小于符号,但需要考虑取等,这东西很容易算重
一个笨点的方法就是当不等式相等时,默认 \(x<y\) 的更小,\(x<z\) 的更小,将贡献放到最早的有相等的行
这需要在修改和查询时讨论相等情况
\(\text{Code}\)
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <iostream>
#define RE register
#define IN inline
using namespace std;
typedef long long LL;
const int N = 2e5 + 5, len = 4e5 + 7, D = 2e5 + 3;
int m, n, a[5][N];
LL SUM, ans;
IN void read(int &x)
{
char ch = getchar(); x = 0; int f = 1;
for(; !isdigit(ch); f = (ch == '-' ? -1 : 1), ch = getchar());
for(; isdigit(ch); x = (x<<3)+(x<<1)+(ch^48), ch = getchar());
x *= f;
}
struct node{
int ty, x, y, v;
IN bool operator < (const node &a) const{return x < a.x ? 1 : (x == a.x ? ty < a.ty : 0);}
}Q[N * 2];
struct BIT{
int c[len + 3];
IN void init(){memset(c, 0, sizeof c);}
IN int lowbit(int x){return x & (-x);}
IN void add(int x){x += D; for(; x <= len; x += lowbit(x)) c[x]++;}
IN int Query(int x){x += D; int s = 0; for(; x; x -= lowbit(x)) s += c[x]; return s;}
}T;
IN LL calc(int x, int y, int z)
{
int cnt = 0; LL res = 0;
for(RE int i = 1; i <= n; i++)
Q[++cnt] = node{1, a[y][i] - a[x][i], a[z][i] - a[x][i]},
Q[++cnt] = node{0, a[x][i] - a[y][i], a[x][i] - a[z][i], a[x][i]};
sort(Q + 1, Q + cnt + 1);
for(RE int i = 1, j; i <= cnt; i = j + 1)
{
j = i;
while (j < cnt && Q[j + 1].x == Q[i].x) ++j;
if (x < y) for(RE int k = j; k >= i; k--) if (Q[k].ty) T.add(Q[k].y);
else res += (LL)Q[k].v * T.Query(Q[k].y - 1 + (x < z));
else for(RE int k = i; k <= j; k++) if (Q[k].ty) T.add(Q[k].y);
else res += (LL)Q[k].v * T.Query(Q[k].y - 1 + (x < z));
}
T.init();
for(RE int i = cnt, j; i; i = j - 1)
{
j = i;
while (j > 1 && Q[j - 1].x == Q[i].x) --j;
if (x < y) for(RE int k = i; k >= j; k--) if (Q[k].ty) T.add(Q[k].y);
else res += (LL)Q[k].v * (T.Query(len - D) - T.Query(Q[k].y - (x < z)));
else for(RE int k = j; k <= i; k++) if (Q[k].ty) T.add(Q[k].y);
else res += (LL)Q[k].v * (T.Query(len - D) - T.Query(Q[k].y - (x < z)));
}
T.init();
return res * 2;
}
IN LL solve3(int i, int j, int k){return calc(i, j, k) + calc(j, i, k) + calc(k, i, j);}
IN LL solve2(int i, int j)
{
LL res = 0;
for(RE int x = 1; x <= n; x++) res += a[i][x] + a[j][x];
return res * n * 2;
}
int main()
{
read(m), read(n);
for(RE int i = 0; i < m; i++)
for(RE int j = 1; j <= n; j++) read(a[i][j]), SUM += a[i][j];
if (m == 2) ans = solve2(0, 1);
else if (m == 3) ans = solve3(0, 1, 2);
else{
ans = SUM * n * 4;
for(RE int i = 0; i < 4; i++)
for(RE int j = i + 1; j < 4; j++) ans -= solve2(i, j);
for(RE int i = 0; i < 4; i++)
for(RE int j = i + 1; j < 4; j++)
for(RE int k = j + 1; k < 4; k++) ans += solve3(i, j, k);
ans >>= 1;
}
printf("%lld\n", ans);
}