# BFSLEN - Editorial

Author: Abhinav Sharma
Testers: Takuki Kurokawa, Utkarsh Gupta
Editorialist: Nishank Suresh

2715

# PREREQUISITES:

DFS, BFS and intuition for how it operates on a tree, finding lowest common ancestor

# PROBLEM:

Given a tree, let (u_1, u_2, \ldots, u_N) be a bfs ordering of it with u_1 = 1.
Find the minimum possible value of \sum_{i=1}^{N-1}dist(u_i, u_{i+1}) across all possible bfs orderings.

# EXPLANATION:

Let’s first define something that will be useful later: let h_u denote the height of vertex u, i.e, the height of the subtree of which u is the root. This can also be thought of as the length of the longest path from u to a leaf in its subtree.
h_u = 1 if u is a leaf.

Now, look at the problem a bit differently. Instead of thinking about distances, the final answer can also be thought of as the number of times we cross each edge, summed up across all the edges.

So, consider a vertex u and its parent p. How many times do we cross this edge at least?
Well, we move down from p \to u sometimes and up from u \to p sometimes, so let’s consider them separately.

Moving down

Moving down means we enter u from outside.

An arbitrary bfs process will visit u, then visit vertices at the same depth as u, then visit the children of u, then other vertices at the same depth as these children, and so on.

You can see that we will thus enter u exactly h_u times: once for every depth contained in the subtree of u.

Moving up

The analysis here is the exact same as the moving down case: we move up across this edge exactly once for each depth contained in the subtree of u, making h_u moves in total.

This gives us an immediate upper bound on the answer: 2\sum_{i=2}^N h_i.

However, this isn’t the final answer by itself: there is one more thing to consider, which is the fact the we need not make all the moves described above.

In particular, depending on the bfs order chosen, there are some edges that we won’t move up across, or won’t move down across h_i times — for example, we won’t move up across the last edge we visit.
But which edges can satisfy this property?

Once again consider a vertex u and its parent p. Suppose u is at a depth of k. When can we ensure that we never move up along this edge?
The answer is quite simple: if there exists a vertex at depth k+1 that is not a child of u, then we must move up along this edge to visit it.

On the other hand, if every vertex at depth k+1 is indeed a child of u, then there always exists a way to not have to move up from u to p: simply choose the bfs order so that u is the last depth-k vertex visited.
Notice that this also ensures we only go down p \to u exactly once.

Of course, this isn’t the only case where we don’t perform some moves, but nevertheless it is enough to form an intuition for the process.

This should immediately give a somewhat natural greedy solution: the deeper the subtree of a vertex is, the later we should visit it.

That is, suppose we compute the height h_i of each vertex, as described above.
Then, start a bfs from 1, but push children of 1 into the queue in increasing of h_i values.

This process gives us a bfs order (u_1, u_2, \ldots, u_N), after which the final answer is \sum_{i=1}^{N-1} d(u_i, u_{i+1}).

Computing pairwise distances is a standard technique, and can be done in \mathcal{O}(1) or \mathcal{O}(\log N) by finding LCA for example.

# TIME COMPLEXITY

\mathcal{O}(N\log N) per test case.

# CODE:

Setter's code (C++)
#include<bits/stdc++.h>
using namespace std;

#define int long long

vector<vector<int> > g;
vector<int> mxd, mxd1;

void dfs1(int c, int p, int d){
mxd[c] = d;
mxd1[c] = d-1;
for(auto h:g[c]){
if(h!=p){
dfs1(h,c,d+1);
if(mxd[h]>mxd[c]){
if(mxd[c] > d) mxd1[c] = mxd[c];
mxd[c] = mxd[h];
}
else if(mxd[h]>mxd1[c]){
mxd1[c] = mxd[h];
}
}
}
}

int ans;

void dfs2(int c, int p, int d, int ancd){
if(p!=-1){
ans += max(1ll, (min(mxd[c], ancd)-d+1))*2;
}

for(auto h:g[c]){
if(h!=p){
int tancd;
if(mxd[h]==mxd[c]) tancd = max(ancd, mxd1[c]);
else tancd = max(ancd, mxd[c]);
dfs2(h,c,d+1,tancd);
}
}
}

signed main(){

ios_base::sync_with_stdio(false);
cin.tie(0);
cout.tie(0);
#ifndef ONLINE_JUDGE
freopen("input.txt", "r" , stdin);
freopen("output.txt", "w" , stdout);
#endif

int tc;
cin>>tc;

while(tc--){
int n;
cin>>n;

g.assign(n, vector<int>());
mxd.assign(n, 0);
mxd1.assign(n, 0);
ans = 0;
int x,y;

for(int i=0; i<n-1; i++){
cin>>x>>y;
x--, y--;
g[x].push_back(y);
g[y].push_back(x);
}

dfs1(0,-1,0);
dfs2(0,-1,0, -1);

ans -= mxd[0];

cout<<ans<<'\n';

}

return 0;
}

Tester's code (C++)
//Utkarsh.25dec
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cmath>
#include <vector>
#include <set>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <queue>
#include <ctime>
#include <cassert>
#include <complex>
#include <string>
#include <cstring>
#include <chrono>
#include <random>
#include <bitset>
#include <array>
#define ll long long int
#define pb push_back
#define mp make_pair
#define mod 1000000007
#define vl vector <ll>
#define all(c) (c).begin(),(c).end()
using namespace std;
ll power(ll a,ll b) {ll res=1;a%=mod; assert(b>=0); for(;b;b>>=1){if(b&1)res=res*a%mod;a=a*a%mod;}return res;}
ll modInverse(ll a){return power(a,mod-2);}
const int N=500023;
bool vis[N];
const int k=25;
int up[N][k];
int tin[N],tout[N];
int timer=1;
int limit[N],level[N];
long long readInt(long long l,long long r,char endd){
long long x=0;
int cnt=0;
int fi=-1;
bool is_neg=false;
while(true){
char g=getchar();
if(g=='-'){
assert(fi==-1);
is_neg=true;
continue;
}
if('0'<=g && g<='9'){
x*=10;
x+=g-'0';
if(cnt==0){
fi=g-'0';
}
cnt++;
assert(fi!=0 || cnt==1);
assert(fi!=0 || is_neg==false);

assert(!(cnt>19 || ( cnt==19 && fi>1) ));
} else if(g==endd){
if(is_neg){
x= -x;
}

if(!(l <= x && x <= r))
{
cerr << l << ' ' << r << ' ' << x << '\n';
assert(1 == 0);
}

return x;
} else {
assert(false);
}
}
}
string ret="";
int cnt=0;
while(true){
char g=getchar();
assert(g!=-1);
if(g==endd){
break;
}
cnt++;
ret+=g;
}
assert(l<=cnt && cnt<=r);
return ret;
}
long long readIntSp(long long l,long long r){
}
long long readIntLn(long long l,long long r){
}
}
}
void dfs(int curr,int par)
{
level[curr]=level[par]+1;
tin[curr]=timer++;
up[curr][0]=par;
for(int i=1;i<k;i++)
{
up[curr][i]=up[up[curr][i-1]][i-1];
}
{
if(it==par)
continue;
dfs(it,curr);
}
limit[curr]=1;
{
if(it==par)
continue;
limit[curr]=max(limit[curr],limit[it]+1);
}
tout[curr]=timer++;
}
bool is_ancestor(int u, int v)
{
return tin[u] <= tin[v] && tout[u] >= tout[v];
}
int lca(int u, int v)
{
if (is_ancestor(u, v))
return u;
if (is_ancestor(v, u))
return v;
for (int i = k-1; i >= 0; --i)
{
if(is_ancestor(up[u][i], v))
continue;
else
u=up[u][i];
}
return up[u][0];
}
void checktree(int curr)
{
vis[curr]=1;
{
if(vis[it])
continue;
checktree(it);
}
}
int dist(int a, int b)
{
int L=lca(a,b);
int ans=(level[a]+level[b]-2*level[L]);
return ans;
}
bool cmp(int a, int b)
{
return limit[a]<limit[b];
}
void solve()
{
for(int i=1;i<=n;i++)
{
vis[i]=0;
}
timer=0;
if(n==1)
{
cout<<0<<'\n';
return;
}
for(int i=1;i<n;i++)
{
assert(u!=v);
}
checktree(1);
for(int i=1;i<=n;i++)
{
assert(vis[i]==1);
vis[i]=0;
}
dfs(1,1);
for(int i=1;i<=n;i++)
queue <int> q;
q.push(1);
vis[1]=1;
vector <int> order;
while(!q.empty())
{
auto node=q.front();
q.pop();
order.pb(node);
{
if(vis[it])
continue;
q.push(it);
vis[it]=1;
}
}
ll ans=0;
for(int i=0;i<order.size()-1;i++)
ans+=dist(order[i],order[i+1]);
cout<<ans<<'\n';
}
int main()
{
#ifndef ONLINE_JUDGE
freopen("input.txt", "r", stdin);
freopen("output.txt", "w", stdout);
#endif
ios_base::sync_with_stdio(false);
cin.tie(NULL),cout.tie(NULL);
while(T--)
solve();
assert(getchar()==-1);
cerr << "Time : " << 1000 * ((double)clock()) / (double)CLOCKS_PER_SEC << "ms\n";
}

Editorialist's code (C++)
#include "bits/stdc++.h"
// #pragma GCC optimize("O3,unroll-loops")
// #pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
using namespace std;
using ll = long long int;
mt19937_64 rng(chrono::high_resolution_clock::now().time_since_epoch().count());

template<class T>
struct RMQ {
vector<vector<T>> jmp;
RMQ(const vector<T>& V) : jmp(1, V) {
for (int pw = 1, k = 1; pw * 2 <= (int)size(V); pw *= 2, ++k) {
jmp.emplace_back(size(V) - pw * 2 + 1);
for (int j = 0; j < (int)size(jmp[k]); ++j)
jmp[k][j] = min(jmp[k - 1][j], jmp[k - 1][j + pw]);
}
}
T query(int a, int b) {
assert(a < b); // or return inf if a == b
int dep = 31 - __builtin_clz(b - a);
return min(jmp[dep][a], jmp[dep][b - (1 << dep)]);
}
};

struct LCA {
int T = 0;
vector<int> time, path, ret, depth;
RMQ<int> rmq;

LCA(vector<vector<int>>& C) : time(size(C)), depth(size(C)), rmq((dfs(C,0,-1), ret)) {}
void dfs(vector<vector<int>>& C, int v, int par) {
time[v] = T++;
for (int y : C[v]) if (y != par) {
path.push_back(v), ret.push_back(time[v]);
depth[y] = 1 + depth[v];
dfs(C, y, v);
}
}

int lca(int a, int b) {
if (a == b) return a;
tie(a, b) = minmax(time[a], time[b]);
return path[rmq.query(a, b)];
}
int dist(int a, int b) {
return depth[a] + depth[b] - 2*depth[lca(a,b)];
}
};

int main()
{
ios::sync_with_stdio(false); cin.tie(0);

int t; cin >> t;
while (t--) {
int n; cin >> n;
vector<vector<int>> g(n);
for (int i = 0; i < n-1; ++i) {
int u, v; cin >> u >> v;
g[--u].push_back(--v);
g[v].push_back(u);
}
vector<int> ht(n);
auto dfs = [&] (const auto &self, int u, int p) -> void {
for (int v : g[u]) {
if (v == p) continue;
self(self, v, u);
ht[u] = max(ht[u], ht[v]);
}
++ht[u];
};
auto bfs = [&] (int src) {
vector<int> ord;
queue<int> q;
q.push(src);
while (!q.empty()) {
int u = q.front(); q.pop();
ord.push_back(u);
for (int v : g[u]) {
if (ht[v] > ht[u]) continue;
q.push(v);
}
}
return ord;
};
dfs(dfs, 0, 0);
LCA L(g);
for (auto &v : g) {
sort(begin(v), end(v), [&] (int x, int y) {return ht[x] < ht[y];});
}
auto order = bfs(0);
ll ans = 0;
for (int i = 0; i+1 < n; ++i) ans += L.dist(order[i], order[i+1]);
cout << ans << '\n';
}
}


Can anyone please explain setter’s solution? I think he is doing tree dp or smth