Setter: Cozma Tiberiu-Stefan
Tester: Nishank Suresh, Satyam
Editorialist: Devendra Singh

Easy-medium

# PROBLEM:

Once in the magic city of Lulympus, students were preparing for the final stage of Lulympiad, the national team selection for the IOI. After solving 3SAT in polynomial time complexity, learning the 4D convex hull trick and many other trivial techniques, Chef Lulus AKed the â€śBaraj Senioriâ€ť and decided to take a break and solve the following trivial problem:

You are given a positive integer N. Let P be the set of all permutations of length N. Find the value of the following expression: \sum_{p \in P} \sum_{i = 1}^{N} \texttt{gcd}(p_1, p_2, \dots, p_i).

Here \texttt{gcd}(p_1, p_2, \dots, p_i) denotes their greatest common divisor.

Since the answer can be large, print it modulo MOD.

# QUICK EXPLANATION:

Fix the length of the prefix of the permutation and then calculate the number of times a particular gcd value g occurs for this length and multiply it with g. Calculate this for all prefix lengths and output the sum.

# EXPLANATION:

O(Nlog^2(N)) approach
Let i be a particular length of prefix of the permutation and g be a possible gcd value for this prefix length. This means that all element values from index 1 to index i are divisible by g.
\because N/g\ge i
\therefore N/i\ge g
For a prefix length i the possible value of gcd g goes from 1 upto N/i. Take any i elements from N/g elements which are divisible by g and rearrange them in the prefix and rearrange other elements in the suffix to obtain the total number of prefixes such that the gcd of the prefix of length i is divisible by g. To calculate the number of prefixes of length i such that the gcd is exactly g we need to subtract the number of prefixes of length i such that the gcd is exactly 2\cdot g,3\cdot g,.... and so on from the above calculated permutations. Then multiply this number by g and add it to the final\: ans.
Time Complexity
We run a loop for each prefix length from i=1 to N and inside each loop we iterate gcd values from g=N/i to 1 in decreasing order. To remove overcounting we now run a loop from 2\cdot g to N/i.
This totals to \sum_{i=1}^{N} \sum_{g=N/i}^{1}N/(i\cdot g) \leq N log(N)\sum_{i=1}^{N}1/i \leq Nlog^2(N)

O(Nlog(N)) approach
The core idea of the problem is still same but now to avoid overcounting we wonâ€™t run a loop from 2\cdot g,3\cdot g,...... instead we will use the property of Eulerâ€™s totient function:
\sum_{d|n} \phi{(d)} = n
Here the sum is over all positive divisors d of n.

For instance the divisors of 10 are 1, 2, 5\: and\: 10. \phi{(1)} + \phi{(2)} + \phi{(5)} + \phi{(10)} = 1 + 1 + 4 + 4 = 10
In the previous approach we multiplied the the number of prefixes of length i such that the gcd is exactly g by g. Here we will multiply the number of prefixes of length i such that the gcd is divisible g by \phi{(g)}. Let the number of prefixes of length i such that the gcd is exactly g be a then we need to add a\cdot g into our answer instead we have added a\cdot \phi{(g)} into our answer. If we observe carefully we can see that these a prefixes will be repeated in the answer for every divisor of g which in the end would add upto a\cdot (\sum_{d|g} \phi{(d)} = g)
Time Complexity
We run a loop for each prefix length from i=1 to N and inside each loop we iterate gcd values from g=N/i to 1 in decreasing order. Euler phi values are precalculated.
This totals to \sum_{i=1}^{N}N/i \leq N log(N)

# TIME COMPLEXITY:

O(Nlog^2(N)) or O(Nlog(N)) for each test case.

# SOLUTION:

Setter's Solution

#include <bits/stdc++.h>
using namespace std;
int n, MOD;
int powmod(int a, int b) {
if (b == 0) {
return 1;
}
if (b % 2 == 1) {
return (1ll * a * powmod(a, b - 1)) % MOD;
}
int P = powmod(a, b / 2);
return (1ll * P * P) % MOD;
}
int inv(int n) {
return powmod(n, MOD - 2);
}
int32_t main() {
cin.tie(nullptr)->sync_with_stdio(false);
cin >> n >> MOD;
vector phi(n + 1);
for (int i = 1; i <= n; ++i) {
phi[i] = i;
}
for (int i = 2; i <= n; ++i) {
if (phi[i] == i) {
for (int j = i; j <= n; j += i) {
phi[j] /= i;
phi[j] *= (i - 1);
}
}
}
for (int i = 1; i <= n; ++i) {
phi[i] += phi[i - 1];
if (phi[i] >= MOD) {
phi[i] -= MOD;
}
}
auto query = [&](int st, int dr) {
return (phi[dr] - phi[st - 1] + MOD) % MOD;
};
vector fact(n + 1);
fact[0] = 1;
for (int i = 1; i <= n; ++i) {
fact[i] = (1ll * fact[i - 1] * i) % MOD;
}
vector finv(n + 1);
finv[n] = inv(fact[n]);
for (int i = n - 1; i >= 0; --i) {
finv[i] = (1ll * (i + 1) * finv[i + 1]) % MOD;
}
vector<pair<int, int>> segms;
int lim = sqrt(n);
for (int i = 1; i <= lim; ++i) {
segms.push_back({ i,i });
}
for (int i = 1; i <= lim; ++i) {
int st = max(lim + 1, n / (i + 1) + 1);
int dr = n / i;
if (st <= dr) {
segms.push_back({ st,dr });
}
}
sort(segms.begin(), segms.end());
int ans = 0;
for (int i = 1; i <= n; ++i) {
int rep = 0;
for (auto j : segms) {
int st = j.first, dr = j.second;
if (i * st > n) {
break;
}
int first = (1ll * fact[n - i] * query(st, dr)) % MOD;
int second = (1ll * fact[n / st] * finv[n / st - i]) % MOD;
rep += (1ll * first * second) % MOD;
if (rep >= MOD) {
rep -= MOD;
}
}
ans += rep;
if (ans >= MOD) {
ans -= MOD;
}
}
cout << ans;
}

Tester-1's Solution(Python)
n, mod = map(int, input().split())
phi = [i for i in range(n+1)]
for i in range(2, n+1):
if phi[i] != i:
continue
for j in range(i, n+1, i):
phi[j] -= phi[j]//i

fact = [i for i in range(n+1)]
fact[0] = 1
for i in range(1, n+1):
fact[i] *= fact[i-1]
if fact[i] >= mod:
fact[i] %= mod

inv = [1]*(n+1)
inv[n] = pow(fact[n], mod-2, mod)
for i in range(n-1, 0, -1):
inv[i] = (i+1)*inv[i+1]
if inv[i] >= mod:
inv[i] %= mod

ans = 0
for i in range(1, n+1):
j = 1
while i*j <= n:
ans += ((fact[n//j] * fact[n - i])%mod) * ((phi[j] * inv[n//j - i])%mod)
ans %= mod
j += 1
print(ans)

Tester-2's Solution
// #pragma GCC target("popcnt")
// #pragma GCC target("avx,avx2,fma")
// #pragma GCC optimize("Ofast,unroll-loops")
#include <bits/stdc++.h>
#include <ext/pb_ds/tree_policy.hpp>
#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;
using namespace std;
#define ll long long
const ll INF_MUL=1e13;
#define pb push_back
#define mp make_pair
#define nline "\n"
#define f first
#define s second
#define pll pair<ll,ll>
#define all(x) x.begin(),x.end()
#define vl vector<ll>
#define vvl vector<vector<ll>>
#define vvvl vector<vector<vector<ll>>>
#ifndef ONLINE_JUDGE
#define debug(x) cerr<<#x<<" "; _print(x); cerr<<nline;
#else
#define debug(x);
#endif
void _print(ll x){cerr<<x;}
void _print(char x){cerr<<x;}
void _print(string x){cerr<<x;}
template<class T,class V> void _print(pair<T,V> p) {cerr<<"{"; _print(p.first);cerr<<","; _print(p.second);cerr<<"}";}
template<class T>void _print(vector<T> v) {cerr<<" [ "; for (T i:v){_print(i);cerr<<" ";}cerr<<"]";}
template<class T>void _print(set<T> v) {cerr<<" [ "; for (T i:v){_print(i); cerr<<" ";}cerr<<"]";}
template<class T>void _print(multiset<T> v) {cerr<< " [ "; for (T i:v){_print(i);cerr<<" ";}cerr<<"]";}
template<class T,class V>void _print(map<T, V> v) {cerr<<" [ "; for(auto i:v) {_print(i);cerr<<" ";} cerr<<"]";}
typedef tree<ll, null_type, less<ll>, rb_tree_tag, tree_order_statistics_node_update> ordered_set;
typedef tree<ll, null_type, less_equal<ll>, rb_tree_tag, tree_order_statistics_node_update> ordered_multiset;
typedef tree<pair<ll,ll>, null_type, less<pair<ll,ll>>, rb_tree_tag, tree_order_statistics_node_update> ordered_pset;
//--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
const ll MOD=1e9+7;
const ll MAX=10000002;
vector<ll> dp(MAX,0);
vector<ll> fact(MAX+2,1),inv_fact(MAX+2,1);
ll binpow(ll a,ll b,ll MOD){
ll ans=1;
a%=MOD;
while(b){
if(b&1)
ans=(ans*a)%MOD;
b/=2;
a=(a*a)%MOD;
}
return ans;
}
ll inverse(ll a,ll MOD){
return binpow(a,MOD-2,MOD);
}
void precompute(ll n,ll MOD){
for(ll i=2;i<=n;i++){
fact[i]=fact[i-1]*i;
if(fact[i]>=MOD){
fact[i]%=MOD;
}
}
inv_fact[n-1]=inverse(fact[n-1],MOD);
for(ll i=n-2;i>=0;i--){
inv_fact[i]=inv_fact[i+1]*(i+1);
if(inv_fact[i]>=MOD){
inv_fact[i]%=MOD;
}
}
}
void solve(){
ll n,mod; cin>>n>>mod;
ll ans=0;
assert(n!=mod);
precompute(n,mod);
vector<ll> phi(n + 5);
for (int i = 1; i <= n; ++i) {
phi[i] = i;
}
for (int i = 2; i <= n; ++i) {
if (phi[i] == i) {
for (int j = i; j <= n; j += i) {
phi[j] -= phi[j]/i;
}
}
}
for (int i = 1; i <= n; ++i) {
ll rep = 0;
for (int j = 1; i * j <= n; ++j) {
ll l=(fact[n/j]*inv_fact[n/j-i]),r=(fact[n-i]*phi[j]);
if(l>=mod){
l%=mod;
}
if(r>=mod){
r%=mod;
}
rep = (rep + l*r);
if(rep>=mod){
rep%=mod;
}
}
ans+=rep;
if(ans>=mod){
ans-=mod;
}
}
cout << ans;
return;
}
int main()
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
#ifndef ONLINE_JUDGE
freopen("input.txt", "r", stdin);
freopen("output.txt", "w", stdout);
freopen("error.txt", "w", stderr);
#endif
ll test_cases=1;
//cin>>test_cases;
while(test_cases--){
solve();
}
cout<<fixed<<setprecision(10);
cerr<<"Time:"<<1000*((double)clock())/(double)CLOCKS_PER_SEC<<"ms\n";
}

Editorialist's Solution
#pragma GCC optimize "Ofast"
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
#include "bits/stdc++.h"
using namespace std;
#define ll long long
#define pb push_back
#define all(_obj) _obj.begin(), _obj.end()
#define F first
#define S second
#define pll pair<ll, ll>
#define vll vector<ll>
const int N = 1e6 + 11;
ll max(ll a, ll b) { return ((a > b) ? a : b); }
ll min(ll a, ll b) { return ((a > b) ? b : a); }
ll n, m, ans;
ll factorial[N], inverse_factorial[N], NumInverse[N];
long long binomial_coefficient(int n, int k)
{
return factorial[n] * inverse_factorial[k] % m * inverse_factorial[n - k] % m;
}
void sol(void)
{
cin >> n >> m;
NumInverse[0] = NumInverse[1] = factorial[0] = factorial[1] = inverse_factorial[0] = inverse_factorial[1] = 1;
for (int i = 2; i < N; i++)
{
NumInverse[i] = NumInverse[m % i] * (m - m / i) % m;
factorial[i] = factorial[i - 1] * i % m;
inverse_factorial[i] = (NumInverse[i] * inverse_factorial[i - 1]) % m;
}
for (int i = 1; i <= n; i++)
{
ll gcdsum[(n / i) + 11];
for (int g = (n / i); g >= 1; g--)
{
ll calc = binomial_coefficient(n / g, i);
calc *= factorial[i];
calc %= m;
calc *= factorial[n - i];
calc %= m;
gcdsum[g] = calc;
for (int gg = g + g; gg <= (n/i); gg += g)
gcdsum[g] = (gcdsum[g] + m - gcdsum[gg]) % m;
ans += (gcdsum[g]*g)%m;
ans %= m;
}
}
cout << ans;
return;
}
int main()
{
ios_base::sync_with_stdio(false);
cin.tie(NULL), cout.tie(NULL);
int test = 1;
// cin>>test;
while (test--)
sol();
}

1 Like