# GNSTWOK - Editorial

Author: satyam_343
Testers: apoorv_me, tabr
Editorialist: iceknight1093

TBD

None

# PROBLEM:

Let f(B) be the lexicographically minimum array that can be obtained as a result of the following process:

• Start at any element of B with an empty array C.
• Append the current element to C, and mark it.
• Move to any unmarked element of B that has at least one marked neighbor.

You’re given a permutation P.
Order all the arrays f(P[1, i]).

# EXPLANATION:

Observe that when the array B consists of distinct elements, computing f(B) is quite simple: the optimal strategy is to start at \min(B), and then repeatedly move to whichever side is smaller.

With this in mind, let’s see how f(P[1, i]) differs from f(P[1, i+1]).

First, for convenience, let’s define the following:

• s_i is the index of the minimum element among [P_1, P_2, \ldots, P_i].
• m_i is the index of the maximum element among [P_1, P_2, \ldots, P_i].

Now, observe that:

• If s_{i+1} = i+1, meaning that P_{i+1} is smaller than all of the first i elements, f(P[1, i+1]) will be smaller than every f(P[1, j]) for j \leq i.
This is, of course, because the first element of both arrays will be different (and the one at i+1 is smaller).
• Otherwise, we have s_{i+1} = s_i, so both prefixes have the same starting position.
There are now once again two cases.
• If m_{i+1} \lt s_{i+1}, i.e the maximum occurs to the left of the minimum, we’ll have
f(P[1, i+1]) \lt f(P[1, i]).
This is because the optimal solution for i+1 will be able to avoid the maximum for one step more, and hence will be lexicographically smaller.
• If m_{i+1} \gt s_{i+1}, the opposite is true: f(P[1, i+1]) \gt f(P[1, i]).
This is because the optimal solution will first visit all indices to the left of index m_{i+1}, and only then index m_{i+1}; after which there is no choice but to visit things in order till i+1 is reached.
In particular, this means that f(P[1, i+1]) is obtained by just appending P_{i+1} to f(P[1, i]), justifying the earlier claim of it being larger.

All that’s left is to build the answer array using these observation.

For a simple way of doing that, note that the prefix minima of P are ‘breakpoints’: each time a new prefix minimum is reached, all further indices should be placed strictly before all earlier ones.
Further, if i and j (i \lt j) are consecutive prefix minimums, we seen that:

• If there is no prefix maximum between i and j, we simply have
f(P[1, i]) \gt f(P[1, i+1]) \gt \ldots \gt f(P[1, j-1]).
• If a prefix maximum does exist between i and j, let k be the leftmost prefix maximum in the range [i, j).
Then, we have f(P[1, i]) \gt f(P[1, i+1]) \gt \ldots \gt f(P[1, k-1]), followed by
f(P[1, k-1]) \lt f(P[1, k]) \lt \ldots \lt f(P[1, j-1]).
We also have f(P[1, j-1]) \lt f(P[1, k-2]), connecting the two chains into a single one.

So, once prefix minimums and maximums are precomputed in linear time, we’re able to find the order of indices between two consecutive prefix minimums in linear time (proportional to the distance between the minimums) too.
This allows for the entire answer to be built in \mathcal{O}(N) time in reverse, and the task is solved.

It’s also possible to “simplify” implementation (or at least thinking about it) with the help of a data structure that allows for inserting an element at an arbitrary point of a sequence - for example a treap.
This allows you to build the answer sequence in an essentially brute-force fashion, since each new insertion point is either adjacent to the previous one, or the very start of the sequence; so simply maintaining the index of the last inserted value will suffice.

# TIME COMPLEXITY:

\mathcal{O}(N) per testcase.

# CODE:

Author's code (C++)
#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define nline "\n"
#define all(x) x.begin(),x.end()
const ll MAX=200200;
const ll MOD=998244353;
class ST {
public:
vector<ll> segs;
ll size = 0;

ST(ll sz) {
segs.assign(2 * sz, ID);
size = sz;
}

ll comb(ll a, ll b) {
return max(a, b);
}

void upd(ll idx, ll val) {
ll y=query(idx,idx);
if(y>=val){
return;
}
segs[idx += size] = val;
for(idx /= 2; idx; idx /= 2) segs[idx] = comb(segs[2 * idx], segs[2 * idx + 1]);
}

ll query(ll l, ll r) {
ll lans = ID, rans = ID;
for(l += size, r += size + 1; l < r; l /= 2, r /= 2) {
if(l & 1) lans = comb(lans, segs[l++]);
if(r & 1) rans = comb(segs[--r], rans);
}
return comb(lans, rans);
}
};
ll track[MAX];
void solve(ll l,ll r){
if(l==r){
cout<<l<<" ";
return;
}
if(track[l]==0){
cout<<l<<" ";
solve(l+1,r);
}
else{
solve(l+1,r);
cout<<l<<" ";
}
}
void solve(){
ll n; cin>>n;
vector<ll> p(n+5);
ST segment_tree(n+5);
for(ll i=1;i<=n;i++){
cin>>p[i];
segment_tree.upd(i,p[i]);
track[i-1]=0;
}
vector<ll> pref_min;
for(ll i=1;i<=n;i++){
if(p[i]<nin){
pref_min.push_back(i);
pos=i;
track[i-1]=1;
nin=p[i];
}
else{
ll l=1,r=pos-1,till=pos;
while(l<=r){
ll mid=(l+r)/2;
if(segment_tree.query(mid,pos-1)<p[i]){
till=mid;
r=mid-1;
}
else{
l=mid+1;
}
}
pos=till;
if(pos!=1){
track[i-1]=1;
}
}
}
ll r=n+1;
while(!pref_min.empty()){
ll l=pref_min.back();
pref_min.pop_back();
solve(l,r-1);
r=l;
}
cout<<nline;
}
int main()
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
ll test_cases=1;
cin>>test_cases;
while(test_cases--){
solve();
}
cout<<fixed<<setprecision(10);
cerr<<"Time:"<<1000*((double)clock())/(double)CLOCKS_PER_SEC<<"ms\n";
}

Tester's code (apoorv_me, C++)
#include<bits/stdc++.h>
using namespace std;

#ifdef LOCAL
#include "../debug.h"
#else
#define dbg(...)
#endif

int32_t main() {
ios_base::sync_with_stdio(0);
cin.tie(0);

auto __solve_testcase = [&](int test) {
int N;  cin >> N;
vector<int> A(N), pfmx(N + 1), pfmn(N + 1, N + 1);
for(auto &i: A) cin >> i;

for(int i = 0 ; i < N ; ++i) {
pfmx[i + 1] = max(pfmx[i], A[i]);
pfmn[i + 1] = min(pfmn[i], A[i]);
}

int M = N, cnt = 0;
vector<int> res(N);
for(int i = N - 1 ; i >= 0 ; --i) {
if(pfmn[i + 1] != A[i]) continue;
int L = i;
for(int j = i ; j < M ; ++j) {
if(pfmx[j + 1] == A[j])
break;
L = j;
}
for(int j = L ; j < M ; ++j) {
res[j] = cnt++;
}
for(int j = L - 1 ; j >= i ; --j) {
res[j] = cnt++;
}
M = i;
}
vector<int> sol(N);
for(int i = 0 ; i < N ; ++i) {
sol[res[i]] = i + 1;
}
for(int i = 0 ; i < N ; ++i)
cout << sol[i] << " \n"[i == N - 1];
};

int NumTest = 1;
cin >> NumTest;
for(int testno = 1; testno <= NumTest ; ++testno) {
__solve_testcase(testno);
}

return 0;
}

Tester's code (tabr, C++)
#include <bits/stdc++.h>
using namespace std;

#ifdef tabr
#include "library/debug.cpp"
#else
#define debug(...)
#endif

void solve(int n, vector<int>& p) {
int mx = -1;
int mn = n + 1;
int ok = 0;
deque<int> ans, tmp;
for (int i = 0; i < n; i++) {
if (mn > p[i]) {
mn = p[i];
mx = max(mx, p[i]);
ok = 1;
while (tmp.size()) {
ans.emplace_front(tmp.back());
tmp.pop_back();
}
tmp.emplace_back(i);
} else {
if (ok && mx < p[i]) {
ok = 0;
while (tmp.size() >= 2) {
ans.emplace_front(tmp.back());
tmp.pop_back();
}
}
mx = max(mx, p[i]);
if (ok) {
tmp.emplace_front(i);
} else {
tmp.emplace_back(i);
}
}
}
while (tmp.size()) {
ans.emplace_front(tmp.back());
tmp.pop_back();
}
for (int i : ans) {
cout << i + 1 << " ";
}
cout << "\n";
}

////////////////////////////////////////

#define IGNORE_CR

struct input_checker {
string buffer;
int pos;

const string all = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const string number = "0123456789";
const string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const string lower = "abcdefghijklmnopqrstuvwxyz";

input_checker() {
pos = 0;
while (true) {
int c = cin.get();
if (c == -1) {
break;
}
#ifdef IGNORE_CR
if (c == '\r') {
continue;
}
#endif
buffer.push_back((char) c);
}
}

assert(pos < (int) buffer.size());
string res;
while (pos < (int) buffer.size() && buffer[pos] != ' ' && buffer[pos] != '\n') {
assert(!isspace(buffer[pos]));
res += buffer[pos];
pos++;
}
return res;
}

string readString(int min_len, int max_len, const string& pattern = "") {
assert(min_len <= max_len);
assert(min_len <= (int) res.size());
assert((int) res.size() <= max_len);
for (int i = 0; i < (int) res.size(); i++) {
assert(pattern.empty() || pattern.find(res[i]) != string::npos);
}
return res;
}

int readInt(int min_val, int max_val) {
assert(min_val <= max_val);
assert(min_val <= res);
assert(res <= max_val);
return res;
}

long long readLong(long long min_val, long long max_val) {
assert(min_val <= max_val);
assert(min_val <= res);
assert(res <= max_val);
return res;
}

vector<int> readInts(int size, int min_val, int max_val) {
assert(min_val <= max_val);
vector<int> res(size);
for (int i = 0; i < size; i++) {
if (i != size - 1) {
}
}
return res;
}

vector<long long> readLongs(int size, long long min_val, long long max_val) {
assert(min_val <= max_val);
vector<long long> res(size);
for (int i = 0; i < size; i++) {
if (i != size - 1) {
}
}
return res;
}

assert((int) buffer.size() > pos);
assert(buffer[pos] == ' ');
pos++;
}

assert((int) buffer.size() > pos);
assert(buffer[pos] == '\n');
pos++;
}

assert((int) buffer.size() == pos);
}
};

int main() {
input_checker in;
cerr << tt << endl;
int sn = 0;
while (tt--) {
sn += n;
auto p = in.readInts(n, 1, n);
vector<int> count(n);
for (int i = 0; i < n; i++) {
count[p[i] - 1]++;
}
for (int i = 0; i < n; i++) {
assert(count[i] == 1);
}
solve(n, p);
}
cerr << sn << endl;
assert(sn <= 2e5);
return 0;
}

Editorialist's code (Python)
for _ in range(int(input())):
n = int(input())
p = list(map(int, input().split())) + [0]
mn, mx = n+1, 0
a1, a2 = [], []
ans = []
for i in range(n+1):
mn = min(mn, p[i])
mx = max(mx, p[i])

if p[i] == mn:
ans += a1[:-1]
ans += a2[::-1]
if len(a1) > 0: ans += [a1[-1]]
a1, a2 = [i+1], []
elif p[i] == mx:
a2.append(i+1)
else:
if len(a2) > 0: a2.append(i+1)
else: a1.append(i+1)
print(*reversed(ans[:n]))


Editorialist's code (C++, treap)
// #pragma GCC optimize("O3,unroll-loops")
// #pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
#include "bits/stdc++.h"
using namespace std;
using ll = long long int;
mt19937_64 RNG(chrono::high_resolution_clock::now().time_since_epoch().count());

struct Node {
Node *l = 0, *r = 0;
int val, y, c = 1;
Node(int _val) : val(_val), y(RNG()) {}
void recalc();
};
int cnt(Node* n) { return n ? n->c : 0; }
void Node::recalc() { c = cnt(l) + cnt(r) + 1; }
pair<Node*, Node*> split(Node* n, int k) {
if (!n) return {};
if (cnt(n->l) >= k) { // "n->val >= k" for lower_bound(k)
auto pa = split(n->l, k);
n->l = pa.second; n->recalc();
return {pa.first, n};
} else {
auto pa = split(n->r, k - cnt(n->l) - 1); // and just "k"
n->r = pa.first; n->recalc();
return {n, pa.second};
}
}
Node* merge(Node* l, Node* r) {
if (!l) return r;
if (!r) return l;
if (l->y > r->y) {
l->r = merge(l->r, r); l->recalc();
return l;
} else {
r->l = merge(l, r->l); r->recalc();
return r;
}
}
Node* ins(Node* t, Node* n, int pos) {
auto pa = split(t, pos);
return merge(merge(pa.first, n), pa.second);
}

void dfs(Node *t) {
if (!t) return;
dfs(t->l);
cout << t->val << ' ';
dfs(t->r);
}

int main()
{
ios::sync_with_stdio(false); cin.tie(0);

int t; cin >> t;
while (t--) {
int n; cin >> n;
vector<int> p(n);
for (int &x : p) cin >> x;

int mn = 0, mx = 0, cur = 0;
Node *root = new Node(1);
for (int i = 1; i < n; ++i) {
if (p[mx] < p[i]) mx = i;

if (p[mn] > p[i]) {
root = ins(root, new Node(i+1), 0);
mn = i;
cur = 0;
}
else {
if (mn < mx) {
root = ins(root, new Node(i+1), cur + 1);
++cur;
}
else {
root = ins(root, new Node(i+1), cur);
}
}
}
dfs(root);
cout << '\n';
}
}