SORTPERM - Editorial

PROBLEM LINK:

Practice
Contest: Division 1
Contest: Division 2
Contest: Division 3
Contest: Division 4

Authors: satyam_343
Testers: iceknight1093, mexomerf
Editorialist: iceknight1093

DIFFICULTY:

3198

PREREQUISITES:

None

PROBLEM:

You are given a permutation P.
In one move, you can swap P_i and P_j for a cost of |i-j|.

Find the minimum cost to sort P, and a sequence of swaps that achieves this minimum.

EXPLANATION:

First, let’s try to find the minimum cost.

For each 1 \leq i \leq N, define d_i to be the signed distance that element i needs to travel to reach position i. The sign is negative if it needs to move left and positive otherwise.
For example, if P = [3, 2, 1], then d_1 = -2, d_2 = 0, d_3 = 2.

In particular, if P_x = i, then d_i = i - x.
Note that for any permutation P, we have \sum_{i=1}^N d_i = 0.
This tells us that the sum of positive d_i equals the (negative of) the sum of the negative d_i.

So, let S be the sum of positive d_i, i.e, S = \sum_{i=1}^N \max(0, d_i).

It turns out that the minimum cost is exactly S.

Proof of lower bound

Suppose we swap P_i and P_j. Let P_i = x and P_j = y, and i \lt j.

When we perform this swap, it can be seen that d_x decreases by j-i, while d_y increases by j-i.

Note that, in particular, we can only decrease at most one positive d_x in each move.
This means that a lower bound on the cost is clearly the sum of all positive d_x, which is S.

So, if we are able to construct a sequence of swaps that achieves a cost of S, we’ll be done.

There are several ways to achieve this, here’s one using the d_i from above.
The idea is to try and move elements left while we can; while always reducing positive d_i and increasing negative ones.

While the permutation is not sorted, repeat the following:

  • Compute the d_x values for each x.
  • Find the smallest index i such that d_i \lt 0.
  • Find the largest index j such that j \lt i and d_j \gt 0 (such a j will always exist)
  • Perform the swap on positions i and j.

Each step takes \mathcal{O}(N) time since we run a couple of loops, and we do this \mathcal{O}(N^2) times in total, so the algorithm runs in \mathcal{O}(N^3) time.

TIME COMPLEXITY:

\mathcal{O}(N^3) per testcase.

CODE:

Setter's code (C++)
// #pragma GCC optimize("O3")
// #pragma GCC optimize("Ofast,unroll-loops")

#include <bits/stdc++.h>   
#include <ext/pb_ds/tree_policy.hpp>
#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;   
using namespace std;  
#define ll long long  
const ll INF_MUL=1e13;
const ll INF_ADD=1e18;    
#define pb push_back                 
#define mp make_pair          
#define nline "\n"                           
#define f first                                          
#define s second                                             
#define pll pair<ll,ll> 
#define all(x) x.begin(),x.end()     
#define vl vector<ll>           
#define vvl vector<vector<ll>>    
#define vvvl vector<vector<vector<ll>>>          
#ifndef ONLINE_JUDGE    
#define debug(x) cerr<<#x<<" "; _print(x); cerr<<nline;
#else
#define debug(x);  
#endif       
void _print(ll x){cerr<<x;}  
void _print(char x){cerr<<x;}   
void _print(string x){cerr<<x;}    
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());   
template<class T,class V> void _print(pair<T,V> p) {cerr<<"{"; _print(p.first);cerr<<","; _print(p.second);cerr<<"}";}
template<class T>void _print(vector<T> v) {cerr<<" [ "; for (T i:v){_print(i);cerr<<" ";}cerr<<"]";}
template<class T>void _print(set<T> v) {cerr<<" [ "; for (T i:v){_print(i); cerr<<" ";}cerr<<"]";}
template<class T>void _print(multiset<T> v) {cerr<< " [ "; for (T i:v){_print(i);cerr<<" ";}cerr<<"]";}
template<class T,class V>void _print(map<T, V> v) {cerr<<" [ "; for(auto i:v) {_print(i);cerr<<" ";} cerr<<"]";} 
typedef tree<ll, null_type, less<ll>, rb_tree_tag, tree_order_statistics_node_update> ordered_set;
typedef tree<ll, null_type, less_equal<ll>, rb_tree_tag, tree_order_statistics_node_update> ordered_multiset;
typedef tree<pair<ll,ll>, null_type, less<pair<ll,ll>>, rb_tree_tag, tree_order_statistics_node_update> ordered_pset;
//--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
const ll MOD=998244353;     
const ll MAX=500500;
void solve(){
    ll n; cin>>n;
    vector<ll> p(n+5,0);
    ll ans=0;
    for(ll i=1;i<=n;i++){
        cin>>p[i];
        ans+=max(0LL,p[i]-i);
    }
    cout<<ans<<nline;
    vector<pair<ll,ll>> track;
    for(ll i=1;i<=n;i++){
        while(p[i]!=i){
            ll cur=i;
            for(ll j=i+1;j<=n;j++){
                if(p[j]==i){  
                    swap(p[j],p[cur]);
                    ans+=j-cur;
                    track.push_back({j,cur}); 
                    break; 
                }
                if(p[j]>p[cur]){
                    cur=j; 
                }
            }
        }
    }
    cout<<track.size()<<nline;
    for(auto it:track){
        cout<<it.f<<" "<<it.s<<nline;
    }
}                         
int main()                                                                                             
{      
    ios_base::sync_with_stdio(false);                         
    cin.tie(NULL);    
    #ifndef ONLINE_JUDGE                   
    freopen("input.txt", "r", stdin);                                              
    freopen("output.txt", "w", stdout);  
    freopen("error.txt", "w", stderr);                        
    #endif                          
    ll test_cases=1;               
    cin>>test_cases; 
    while(test_cases--){
        solve();
    } 
    cout<<fixed<<setprecision(10);
    cerr<<"Time:"<<1000*((double)clock())/(double)CLOCKS_PER_SEC<<"ms\n"; 
}  
Tester's code (C++)
#include <bits/stdc++.h>
using namespace std;
#define ll long long
#define N 30
void st_c() {
#ifndef ONLINE_JUDGE
  freopen("input.txt", "r", stdin);
  freopen("output.txt", "w", stdout);
#endif
}

int main() {
  st_c();
  int t; cin >> t;
  while (t--) {
    int n; cin >> n;
    int p[n+1];
    vector<pair<int, int>> v;
    ll ans = 0;
    for (int i = 1; i <= n; ++i) {
      cin >> p[i];
    }
    int cnt = 0;
    for(int i = 1; i <= n; i++){
      if(p[i] == i && cnt == i - 1){
        cnt++;
      }else{
        cnt = 0;
      }
      if(p[i] < i){
        for(int j = i - 1; j > 0; j--){
          if(p[j] > j){
            swap(p[j], p[i]);
            ans += abs(i - j);
            v.push_back({i, j});
            break;
          }
        }
        i = 0;
      }
    }
    cout<<ans<<"\n";
    cout<<v.size()<<"\n";
    for(int i = 0; i < v.size(); i++){
      cout<<v[i].first<<" "<<v[i].second<<"\n";
    }
  }
  return 0;
}
Editorialist's code (C++)
#include "bits/stdc++.h"
// #pragma GCC optimize("O3,unroll-loops")
// #pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
using namespace std;
using ll = long long int;
mt19937_64 rng(chrono::high_resolution_clock::now().time_since_epoch().count());

int main()
{
	ios::sync_with_stdio(false); cin.tie(0);

	int t; cin >> t;
	while (t--) {
		int n; cin >> n;
		vector<int> p(n+1);
		ll ans = 0;
		for (int i = 1; i <= n; ++i) {
			cin >> p[i];
			ans += abs(p[i] - i);
		}
		cout << ans/2 << '\n';

		vector<array<int, 2>> swaps;
		vector<int> dif(n+1), pos(n+1);
		while (true) {
			bool done = true;
			for (int i = 1; i <= n; ++i) {
				done &= p[i] == i;
				dif[p[i]] = p[i] - i;
				pos[p[i]] = i;
			}
			if (done) break;
			for (int i = 1; i <= n; ++i) {
				if (dif[p[i]] >= 0) continue;
				for (int j = i-1; ; --j) {
					if (dif[p[j]] > 0) {
						swaps.push_back({i, j});
						swap(p[i], p[j]);
						break;
					}
				}
				break;
			}
		}
		cout << size(swaps) << '\n';
		for (auto [x, y] : swaps) cout << x << ' ' << y << '\n';
	}
}