提供一种不用树分块的做法。
考虑使用 bitset,对值域建 bitset,显然答案就是 bitset 中 \(1\) 的数量。
那么现在的问题就是怎么把一条路径上的 bitset 并起来,可以使用轻重链剖分维护,询问时在把经过的若干条重链上的 bitset 并起来即可。
现在只要求出一条重链上某区间内的 bitset,变成了序列上的问题,对序列分块,预处理出每一块中所有 bitset 的并,询问时散块暴力将数插入 bitset 内,整块直接并上预处理出的信息。时间复杂度 \(\Theta(m \log n \cdot \sqrt{n} \cdot \dfrac{n}{w})\),显然无法通过本题。
发现这么做的瓶颈在于对整块的合并,最坏有 \(\sqrt{n}\) 个整块要合并,每次合并复杂度是 \(\Theta(\dfrac{n}{w})\) 的,合起来就是 \(\Theta(\sqrt{n} \cdot \dfrac{n}{w})\) 的。
考虑优化这一部分,由于没有修改,并且或运算是可重复贡献的,故可以使用 ST 表加速合并整块的过程,这一部分时间复杂度由 \(\Theta(\sqrt{n} \cdot \dfrac{n}{w})\) 变为 \(\Theta(\dfrac{n}{w})\)。总时间复杂度变为 \(\Theta(m \log n \cdot \dfrac{n}{w})\),乍一看好像还是过不去,但是这东西跑不满,而且常数小,因此可以通过本题。甚至跑到了最优解。
代码:
#include <bits/stdc++.h>
using namespace std;
template <class T> void fr(T &a, bool f = 0, char ch = getchar()) {
for (a = 0; ch < '0' || ch > '9'; ch = getchar()) ch == '-' ? f = 1 : 0;
for (; ch >= '0' && ch <= '9'; ch = getchar()) a = a * 10 + (ch - '0');
a = f ? -a : a;
}
template <class T, class... Y> void fr(T &t, Y &... a) { fr(t), fr(a...); }
int fr() { int a; return fr(a), a; }
const int N = 4e4;
int n, m, up, etot, bsz, lg2[N + 10], bid[N + 10], w[N + 10], arr[N + 10], a[N + 10], hd[N + 10];
struct Edge { int to, nxt; } e[(N << 1) + 10];
inline void adde(register int x, register int y) { e[++etot] = {y, hd[x]}, hd[x] = etot; }
struct Bitset {
unsigned long long A[626];
inline Bitset operator|(Bitset b) const {
Bitset c;
for (register int i = 0; i <= up; i++) c.A[i] = A[i] | b.A[i];
return c;
}
inline void reset() { for (register int i = 0; i <= up; i++) A[i] = 0; }
inline void set(register int x) { A[x >> 6] |= 1ull << (x & 63); }
inline int count() {
register int ret = 0;
for (register int i = 0; i <= up; i++) ret += __builtin_popcountll(A[i]);
return ret;
}
} ansor, f[9][201];
void work(register int l, register int r) {
if (bid[l] == bid[r]) {
for (register int i = l; i <= r; i++) ansor.set(a[i]);
return;
}
for (register int i = l; bid[i] == bid[l]; i++) ansor.set(a[i]);
for (register int i = r; bid[i] == bid[r]; i--) ansor.set(a[i]);
if (bid[l] + 1 < bid[r]) {
register int t = lg2[bid[r] - bid[l] - 1];
ansor = ansor | f[t][bid[l] + 1] | f[t][bid[r] - (1 << t)];
}
}
namespace TreeLink {
int timer, dfn[N + 10], sz[N + 10], fa[N + 10], dep[N + 10], ltop[N + 10], ch[N + 10];
void dfs1(register int u, register int p) {
sz[u] = 1, fa[u] = p, dep[u] = dep[p] + 1;
for (register int i = hd[u]; i; i = e[i].nxt) {
register int v = e[i].to;
if (v == p) continue;
dfs1(v, u), sz[u] += sz[v];
if (sz[v] > sz[ch[u]]) ch[u] = v;
}
}
void dfs2(register int u, register int p) {
ltop[u] = p, dfn[u] = ++timer, a[dfn[u]] = w[u], bid[dfn[u]] = (dfn[u] - 1) / bsz + 1, f[0][bid[dfn[u]]].set(w[u]);
if (ch[u] == 0) return;
dfs2(ch[u], p);
for (register int i = hd[u]; i; i = e[i].nxt) {
register int v = e[i].to;
if (v == fa[u] || v == ch[u]) continue;
dfs2(v, v);
}
}
void query(register int u, register int v) {
ansor.reset();
while (ltop[u] != ltop[v]) {
if (dep[ltop[u]] < dep[ltop[v]]) swap(u, v);
work(dfn[ltop[u]], dfn[u]), u = fa[ltop[u]];
}
if (dep[u] > dep[v]) swap(u, v);
work(dfn[u], dfn[v]);
}
} // namespace TreeLink
using namespace TreeLink;
struct OI {
int RP, score;
} FJOI2022;
signed main() {
FJOI2022.RP++, FJOI2022.score++;
fr(n, m), bsz = sqrt(n), up = n >> 6;
for (register int i = 1; i <= n; i++) fr(w[i]), arr[i] = w[i];
sort(arr + 1, arr + 1 + n), arr[0] = unique(arr + 1, arr + 1 + n) - arr - 1;
for (register int i = 1; i <= n; i++) w[i] = lower_bound(arr + 1, arr + 1 + arr[0], w[i]) - arr;
for (register int i = 1, x, y; i < n; i++) fr(x, y), adde(x, y), adde(y, x);
dfs1(1, 0), dfs2(1, 1);
for (register int i = 2; i <= bid[n]; i++) lg2[i] = lg2[i >> 1] + 1;
for (register int j = 1; j <= lg2[bid[n]]; j++) {
auto *x = f[j], *y = f[j - 1];
for (register int i = 1; i + (1 << j) - 1 <= bid[n]; i++) x[i] = y[i] | y[i + (1 << (j - 1))];
}
for (register int i = 1, x, y, lstans = 0; i <= m; i++) fr(x, y), x ^= lstans, query(x, y), printf("%d\n", lstans = ansor.count());
return 0;
}