PROBLEM LINK:
Tester: Kasra Mazaheri
Editorialist: Hussain Kara Fallah
PROBLEM EXPLANATION
You are given a string S with length N. You should find two non-intersecting substrings of S such that the second one is a substring of the first one and the product of these substrings’ lengths is the maximum possible. More formally, let’s denote a contiguous substring S_l,S_{l+1},…,S_r by S[l,r]; you should find integers l_1, r_1, l_2 and r_2
which satisfy these four criteria:
- 1≤l_1≤r_1≤N and 1≤l_2≤r_2≤N
- r_2<l_1 or r_1<l_2
- S[l_2,r_2] is a contiguous substring of S[l_1,r_1]
- the product J=(r_2−l_2+1)⋅(r_1−l_1+1) is maximum possible
Find the maximum value of J
PREREQUISITES:
String suffix structure (suffix array/suffix automata)
DIFFICULTY:
Hard
CONSTRAINTS
1 \leq N \leq 10^6
EXPLANATION:
The solution I will describe will be based on suffix automaton. It’s required that you know it well to be able to understand the editorial. If you are not familiar with it, please make sure you read this great TUTORIAL. Also, I will be using definitions from this tutorial.
First of all, let’s build our suffix automaton by extending it character by character using our string.
This block is quoted form the editorial.
Consider any non-empty substring t of the string s. We will denote with endpos(t) the set of all positions in the string s, in which the occurrences of t end.
We will call two substrings t_1 and t_2 endpos-equivalent, if their ending sets coincide: endpos(t_1)=endpos(t_2). Thus all non-empty substrings of the string s can be decomposed into several equivalence classes according to their sets endpos.
It turns out, that in a suffix automaton endpos -equivalent substrings correspond to the same state . In other words, the number of states in a suffix automaton is equal to the number of equivalence classes among all substrings, plus the initial state. Each state of a suffix automaton corresponds to one or more substrings having the same value endpos.
Now, let’s find for each equivalence class (state) the size of its endpos set (more frankly, the number of positions where the occurrences of the string corresponding to this equivalence class end at). Also, among these positions, let’s keep the rightmost position and the leftmost position (we will get to that later).
Finding these values is easy, after each extension of our automaton we set the number of ending positions at the current node to 1. After finishing the automaton, we propagate all values to the top, adding the value at each node to the node which the suffix link points to.
Propagating rightmost-ending-position and left-most-ending position is also easy, instead of summation we use max/min functions respectively.
Now, let’s suppose our solution consists of 2 substrings P,Q such that |P| \leq |Q|. Of course, P is a substring of Q. It’s obvious that P must occur at least 2 times as a substring in S since P,Q are not intersecting.
For achieving maximum P, the other string Q can be either of the following 2 cases:
- The substring L of all characters to the left of P if P occurs as a substring in L.
- The substring R of all characters to the right of P if P occurs as a substring in R
Assuming that our answer’s substring P is known, the solution is either:
- P is equal to the first occurrence of P in S and Q is the string of all characters to the right of it.
- P is equal to the last occurrence of P in S and Q is the string of all characters to the left of it.
Assume that we have a node x in our automata such that |endpos(x)| \geq 2, then any suffix of the string corresponding to x (let’s denote it by Z) can be a candidate for P. For having a maximum J it’s always better to pick the whole suffix. Since we know the first and last ending positions of Z in S we can try both cases mentioned in the previous paragraph.
However, the first and last occurrence of Z may be intersecting and we need to handle this case. Assume that the first and last occurrence are ending at positions mn,mx respectively then the maximum length of P is equal to take=min(|Z|,mx-mn).
As a result:
J = max(J , max( take * (N - mn) , take * (mx - take)));
and that’s for every node in our automaton.
Complexity: O(N)
Editorialist solution
#include<bits/stdc++.h>
using namespace std;
//be careful with MAXLEN limits
const int MAXLEN = (2000005);
const int Alpha = 28;
long long MOD = 1e9 + 7;
struct state {
int len, link;
int next[Alpha];
int mn , mx , cnt;
state(){
len = link = cnt = 0;
mn = (1<<30) , mx = -1;
memset(next , -1 , sizeof(next));
}
};
class SAutomata{
public:
state st[MAXLEN * 2];
int sz, last;
int totlen;
void init() {
for(int j = 1 ; j < MAXLEN * 2 ; j++) st[j] = state();
st[0].len = 0;
st[0].link = -1;
sz = 1;
last = 0;
totlen = 0;
}
void extend(char ccc , int which) {
int c = ccc - 'a';
int cur = sz++;
st[cur].len = st[last].len + 1;
int p = last;
while (p != -1 && st[p].next[c] == -1) {
st[p].next[c] = cur;
p = st[p].link;
}
if (p == -1) {
st[cur].link = 0;
}
else {
int q = st[p].next[c];
if (st[p].len + 1 == st[q].len) {
st[cur].link = q;
} else {
int clone = sz++;
st[clone].len = st[p].len + 1;
for(int j = 0 ; j < Alpha ; j++)
st[clone].next[j] = st[q].next[j];
st[clone].link = st[q].link;
while (p != -1 && st[p].next[c] == q) {
st[p].next[c] = clone;
p = st[p].link;
}
st[q].link = st[cur].link = clone;
}
}
st[cur].mn = st[cur].mx = which;
st[cur].cnt = 1;
++totlen;
last = cur;
}
vector < int > v[MAXLEN*2] , dfsorder;
long long process(){
long long ret = 0;
for(int j = 0 ; j < sz ; j++)
if(st[j].link != -1)
v[st[j].link].push_back(j);
dfs(0);
for(auto x : dfsorder){
int p = st[x].link;
if(p == -1) continue;
st[p].cnt += st[x].cnt;
st[p].mn = min(st[p].mn , st[x].mn);
st[p].mx = max(st[p].mx , st[x].mx);
if(st[x].cnt < 2) continue;
int take = min(st[x].len , st[x].mx - st[x].mn);
ret = max(ret , max(1ll * take * (totlen - st[x].mn) , 1ll * take * (st[x].mx - take)));
// cout<<x<<' '<<st[x].mn<<' '<<st[x].mx<<endl;
}
return ret;
}
void dfs(int x){
for(auto nxt : v[x])
dfs(nxt);
dfsorder.push_back(x);
}
}SA;
int main(){
int n;
string str;
cin>>n>>str; str = "#" + str;
SA.init();
for(int j = 1 ; j <= n ; j++)
SA.extend(str[j] , j);
cout<<SA.process()<<endl;
}
Tester Solution
// In The Name Of The Queen
#include<bits/stdc++.h>
using namespace std;
const int N = 500005, LG = 19;
int n, pw, R[LG][N], P[N], Lcp[N];
inline bool CMP(int i, int j)
{
if (R[pw][i] != R[pw][j])
return (R[pw][i] < R[pw][j]);
i += 1 << pw; j += 1 << pw;
if (i < n && j < n)
return (R[pw][i] < R[pw][j]);
return (i > j);
}
inline int GetLcp(int l, int r, int le, int ri)
{
int lcp = 0;
for (int i = LG - 1; ~ i; i --)
if ((1 << i) <= min(r - l, ri - le))
if (R[i][l] == R[i][le])
l += 1 << i, le += 1 << i, lcp += 1 << i;
return (lcp);
}
inline void Build(char * S)
{
memset(R, 0, sizeof(R));
for (int i = 0; i < n; i ++)
P[i] = i, R[0][i] = S[i];
for (pw = 0; pw + 1 < LG; pw ++)
{
sort(P, P + n, CMP);
for (int i = 1; i < n; i ++)
R[pw + 1][P[i]] = R[pw + 1][P[i - 1]] + CMP(P[i - 1], P[i]);
}
for (int i = 0; i < n - 1; i ++)
Lcp[i] = GetLcp(P[i], n, P[i + 1], n);
}
int rev[N], LCP[LG][N], MX[LG][N], Log[N];
char A[N];
inline int GLcp(int l, int r)
{
int lg = Log[r - l];
return (min(LCP[lg][l], LCP[lg][r - (1 << lg)]));
}
inline int GMax(int l, int r)
{
int lg = Log[r - l];
return (max(MX[lg][l], MX[lg][r - (1 << lg)]));
}
inline bool Check(int l, int r)
{
int i = rev[l];
int a, b, le, ri, mid;
le = i; ri = n;
while (ri - le > 1)
{
mid = (le + ri) >> 1;
if (GLcp(i, mid) >= r - l)
le = mid;
else
ri = mid;
}
b = le;
le = -1; ri = i;
while (ri - le > 1)
{
mid = (le + ri) >> 1;
if (GLcp(mid, i) >= r - l)
ri = mid;
else
le = mid;
}
a = ri;
return (GMax(a, b + 1) >= r);
}
int main()
{
scanf("%d%s", &n, &A);
assert(1 <= n && n <= 500000);
assert(strlen(A) == n);
long long Mx = 0;
for (int w = 0; w <= 1; w ++)
{
Build(A);
for (int i = 0; i < n; i ++)
rev[P[i]] = i;
for (int i = 2; i < N; i ++)
Log[i] = Log[i >> 1] + 1;
for (int i = 0; i < n - 1; i ++)
LCP[0][i] = Lcp[i];
for (int j = 1; j < LG; j ++)
for (int i = 0; i + (1 << j) <= n - 1; i ++)
LCP[j][i] = min(LCP[j - 1][i], LCP[j - 1][i + (1 << (j - 1))]);
for (int i = 0; i < n; i ++)
MX[0][i] = P[i];
for (int j = 1; j < LG; j ++)
for (int i = 0; i + (1 << j) <= n; i ++)
MX[j][i] = max(MX[j - 1][i], MX[j - 1][i + (1 << (j - 1))]);
int r = -1;
for (int i = 0; i < n; i ++)
{
r = max(r, i);
while (r < n && Check(i, r))
r ++;
r --; Mx = max(Mx, (r - i) * 1LL * (n - r));
}
reverse(A, A + n);
memset(LCP, 0, sizeof(LCP));
memset(MX, 0, sizeof(MX));
memset(Lcp, 0, sizeof(Lcp));
memset(rev, 0, sizeof(rev));
memset(R, 0, sizeof(R));
}
return !printf("%lld\n", Mx);
}