# MEX_ARRAY - Editorial

Author: sho358
Tester: tabr
Editorialist: iceknight1093

1965

Frequency arrays

# PROBLEM:

You have an array A, and an initially empty array B.
You can perform the following operation:

• Pick a non-empty subsequence of A.
• Append the mex of this subsequence to B.
• Delete the subsequence from A.

Find the lexicographically maximum possible value of B.

# EXPLANATION:

Lexicographic minimization/maximization problems often require us to make greedy choices; and this one is no different.

In order to obtain the lexicographically largest array, the first step is to extract the maximum possible mex we can.
That is, find the largest possible integer k such that all of 0, 1, 2, \ldots, k exist in the array; giving us a mex of k+1.

It’s easy to see that this process can just be repeated:
While the array has at least one element, find the largest possible mex; and delete one instance of every element less than this mex.

We only need to implement this to run quickly enough.
That can be done with the help of a frequency array.

Let \text{freq}[x] be the number of times x occurs in A.
Then, in each step we can do the following:

• Iterate k starting from 0. Stop when \text{freq}[k] = 0.
k is the largest mex we can obtain.
• If k \gt 0, reduce \text{freq}[x] by 1 for every 0 \leq x \lt k, to simulate deleting the subsequence \{0, 1, 2, \ldots, k-1\} from the array.
• Otherwise, k = 0; meaning that 0 is no longer present in the array.
This means all further mexes will only be 0.
So, if there are m elements remaining in the array, append 0 to B, m times; then break out since we’ve processed everything we can.

The complexity of this is in fact \mathcal{O}(N):

• Building the frequency array takes \mathcal{O}(N) time, obviously.
• Whenever we find a mex of k \gt 0, we iterated k+1 times (and deleted k elements) to do so.
This means the total sum of non-zero mexes we obtain cannot exceed N; meaning the total number of iterations is also \mathcal{O}(N).
• When k = 0, we append a bunch of elements to B and immediately broke out; obviously this is linear time as well.

# TIME COMPLEXITY

\mathcal{O}(N) per testcase.

# CODE:

Author's code (C++)
#include <bits/stdc++.h> //Andrei Alexandru a.k.a Sho
using ll=long long;
using ld=long double;
int const INF=1000000005;
ll const LINF=1000000000000000005;
ll const mod=1e9+7;
ld const PI=3.14159265359;
ll const NMAX=3e6+5;
ld const eps=0.0000001;
#pragma GCC optimize("O3")
#pragma GCC optimize("Ofast")
#define f first
#define s second
#define pb push_back
#define mp make_pair
#define endl '\n'
#define CODE_START  ios_base::sync_with_stdio(false);cin.tie(0);cout.tie(0);
using namespace std;
ll n,a,cnt,pref;
void testcase(){
cin>>n;
for(ll i=1;i<=n;i++)
{
cin>>a[i];
cnt[a[i]]++;
}
pref=cnt;
for(ll i=1;i<=n;i++)
{
pref[i]=min(cnt[i],pref[i-1]);
}
vector<ll>v;
ll sum=0;
for(ll i=n;i>=0;i--){
while(pref[i]-sum>=1){
sum++;
v.pb(i+1);
}
cnt[i]-=sum;
}
for(ll i=1;i<=n;i++)
{
while(cnt[i]){
v.pb(0);
cnt[i]--;
}
}
cout<<v.size()<<endl;
for(auto it : v){
cout<<it<<' ';
}
cout<<endl;
for(ll i=0;i<=n;i++)
{
cnt[i]=0;
pref[i]=0;
}
}
int32_t main(){
CODE_START;
#ifdef LOCAL
freopen("input.in", "r", stdin);
#endif
ll t=1;
cin>>t;
while(t--){
testcase();
}
}

Tester's code (C++)
#include <bits/stdc++.h>
using namespace std;
#ifdef tabr
#include "library/debug.cpp"
#else
#define debug(...)
#endif

struct input_checker {
string buffer;
int pos;

const string all = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
const string number = "0123456789";
const string upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const string lower = "abcdefghijklmnopqrstuvwxyz";

input_checker() {
pos = 0;
while (true) {
int c = cin.get();
if (c == -1) {
break;
}
buffer.push_back((char) c);
}
}

assert(pos < (int) buffer.size());
string res;
while (pos < (int) buffer.size() && buffer[pos] != ' ' && buffer[pos] != '\n') {
res += buffer[pos];
pos++;
}
return res;
}

string readString(int min_len, int max_len, const string& pattern = "") {
assert(min_len <= max_len);
assert(min_len <= (int) res.size());
assert((int) res.size() <= max_len);
for (int i = 0; i < (int) res.size(); i++) {
assert(pattern.empty() || pattern.find(res[i]) != string::npos);
}
return res;
}

int readInt(int min_val, int max_val) {
assert(min_val <= max_val);
assert(min_val <= res);
assert(res <= max_val);
return res;
}

long long readLong(long long min_val, long long max_val) {
assert(min_val <= max_val);
assert(min_val <= res);
assert(res <= max_val);
return res;
}

vector<int> readInts(int size, int min_val, int max_val) {
assert(min_val <= max_val);
vector<int> res(size);
for (int i = 0; i < size; i++) {
if (i != size - 1) {
}
}
return res;
}

vector<long long> readLongs(int size, long long min_val, long long max_val) {
assert(min_val <= max_val);
vector<long long> res(size);
for (int i = 0; i < size; i++) {
if (i != size - 1) {
}
}
return res;
}

assert((int) buffer.size() > pos);
assert(buffer[pos] == ' ');
pos++;
}

assert((int) buffer.size() > pos);
assert(buffer[pos] == '\n');
pos++;
}

assert((int) buffer.size() == pos);
}
};

int main() {
input_checker in;
int sn = 0;
while (tt--) {
cerr << tt << endl;
sn += n;
cerr << tt << endl;
auto a = in.readInts(n, 0, n);
vector<int> c(n + 1);
for (int i = 0; i < n; i++) {
c[a[i]]++;
}
vector<int> b;
while (c > 0) {
b.emplace_back(0);
for (int i = 0; i <= n; i++) {
if (c[i] == 0) {
break;
}
c[i]--;
b.back()++;
}
}
for (int i = 1; i <= n; i++) {
while (c[i] > 0) {
c[i]--;
b.emplace_back(0);
}
}
int m = (int) b.size();
cout << m << '\n';
for (int i = 0; i < m; i++) {
cout << b[i] << " \n"[i == m - 1];
}
}
assert(sn <= (int) 2e5);
return 0;
}

Editorialist's code (Python)
for _ in range(int(input())):
n = int(input())
a = list(map(int, input().split()))
freq = *(n+2)
for x in a:
freq[x] += 1
ans = []
for i in range(n):
mex = 0
while True:
if freq[mex] == 0: break
mex += 1
if mex > 0:
ans.append(mex)
for x in range(mex): freq[x] -= 1
else:
for x in range(sum(freq)): ans.append(0)
break
print(len(ans))
print(*ans)