PROBLEM LINK:
Author: Jatin Nagpal
Tester: Taranpreet Singh
Editorialist: Jatin Nagpal
DIFFICULTY:
MEDIUM
PREREQUISITES:
Map, Strings, String Hashing, Suffix Array Construction Link-1 or Link-2. Refer Google and Youtube for more on prerequisites.
PROBLEM:
Given 2 strings A and B, you have to find a Substring of length K such that it is common in both the strings and the sum of occurrences in both the strings is maximum. If there exist multiple such substrings, then find the one which is lexicographically smaller among them.
QUICK EXPLANATION:
It’s can be divided into 4 parts:-
- Find Hash of all substrings of String N and M of length K in O(N+M), and create 2 arrays for N and M to store the hash values of each substring of length K at the starting index of substring.
- With the help of map of Frequencies of Hash values, find all the substrings which are common in both strings and have sum of occurrences in both strings maximum.
- Construct a suffix array for one of the string let’s say N either with Suffix tree or with any methods available, an O(N*logN*logN) also works
- With the help of suffix array, u can find the lexicographically smallest substring.
EXPLANATION:
The 4 parts mentioned in quick explanation are explained below as:
Part 1
Why do we need Hash?
Because with the help of a Hash, we can compare if 2 strings are equal just by looking at their Hash values, which is O(1) complexity, while without it can take upto O(N) complexity, where N is length of string.
How do we calculate all Hash of length K in O(N+M)?
Either u can follow this link, or u can first calculate the Hash of string starting from index 0 of length K, and then for every index,
Why do we need to store the Hash values of each substring of length K at the starting index of substring?
We’d need it in the part 4
It is recommended to make a double Hash instead of single Hash, since it reduces the chances of collision.
From the next Part onwards, I’d refer to Hash of All the Substrings of Length K of string N as ASLKN.
Part 2
Make 2 maps which stores the frequencies of ASLKN and ASLKM respectively.
With the help of these 2 maps, we make a single map which stores the frequencies of ASLK(N&M) collectively such that the string is common in both i.e. the hashes which have frequency value positive in ASLKN & ASLKM.
With the map ASLK(N&M) , we can know the max value of frequency. With that value, we can make a special map let’s say SP_MAP which stores only those values of map ASLK(N&M), which have max frequency.
Part 3
U need to create suffix array of one of the strings of ur choice, i.e. N or M
For creating suffix array, u can refer to Prerequsites.
Part 4
U know from Part 3, that suffix array is the sorted array of suffixes of a string, and element of array is the starting index of the suffix string.
Now, U should be able to observe that sorting suffixes of atleast length K of a string is equivalent to sorting all the Substrings of length K of string N.
With that observation, we can iterate over the suffix array from start to end, which means, we are iterating in lexicographical order, and at each point, we check that the hash value at that Index (which we stored int Part 1) is actually present in SP_MAP or not. If it’s present, u can simply stop iterating print the string starting at that index, since U’ve got the required answer.
ALTERNATE SOLUTION:
In part 3 and 4, Instead of using a Suffix Array, u could have used Rolling Hashes to reach the answer.
SOLUTIONS:
Setter's Solution
#include <bits/stdc++.h>
#define ff first
#define ss second
using namespace std;
#define MP make_pair
#define PB push_back
#define ll long long
#define int long long
#define f(i,x,n) for(int i=x;i<n;i++)
#define ld long double
#define SZ 200005
char a[2][200005];
char txt[SZ];
int suffixArr[SZ];
struct suffix
{
int index; // To store original index
int rank[2]; // To store ranks and next rank pair
};
int cmp(struct suffix a, struct suffix b)
{
return (a.rank[0] == b.rank[0])? (a.rank[1] < b.rank[1] ?1: 0):
(a.rank[0] < b.rank[0] ?1: 0);
}
void buildSuffixArray(int n)
{
struct suffix suffixes[n];
for (int i = 0; i < n; i++)
{
suffixes[i].index = i;
suffixes[i].rank[0] = txt[i] - 'a';
suffixes[i].rank[1] = ((i+1) < n)? (txt[i + 1] - 'a'): -1;
}
sort(suffixes, suffixes+n, cmp);
int ind[n];
for (int k = 4; k < 2*n; k = k*2)
{
int rank = 0;
int prev_rank = suffixes[0].rank[0];
suffixes[0].rank[0] = rank;
ind[suffixes[0].index] = 0;
for (int i = 1; i < n; i++)
{
if (suffixes[i].rank[0] == prev_rank && suffixes[i].rank[1] == suffixes[i-1].rank[1])
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = rank;
}
else
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = ++rank;
}
ind[suffixes[i].index] = i;
}
for (int i = 0; i < n; i++)
{
int nextindex = suffixes[i].index + k/2;
suffixes[i].rank[1] = (nextindex < n)?suffixes[ind[nextindex]].rank[0]: -1;
}
sort(suffixes, suffixes+n, cmp);
}
for (int i = 0; i < n; i++)
suffixArr[i] = suffixes[i].index;
return;
}
int Mi[2]={758620695,838709685}; // Mod-inverse Values
int M[2]={1000000007,1000000009};
int D[2]={29,31};
int n[2],k;
pair <int,int> h[2][SZ]; //Double Hash
map < pair <int,int> , pair <int,int> > mp;
void compute_hash(int i)
{
pair <int,int> val={0,0};
pair <int,int> power={1,1};
f(j,0,k)
{
val.ff=(val.ff+power.ff*(a[i][j]-'a'+1))%M[0];
val.ss=(val.ss+power.ss*(a[i][j]-'a'+1))%M[1];
power.ff=(power.ff*D[0])%M[0];
power.ss=(power.ss*D[1])%M[1];
}
h[i][0]=val;
if(i==0)
mp[val].ff++;
else
mp[val].ss++;
f(j,k,n[i])
{
val.ff=(val.ff + power.ff*(a[i][j]-'a'+1) - (a[i][j-k]-'a'+1) + M[0])%M[0];
val.ss=(val.ss + power.ss*(a[i][j]-'a'+1) - (a[i][j-k]-'a'+1) + M[1])%M[1];
val.ff=(val.ff*Mi[0])%M[0];
val.ss=(val.ss*Mi[1])%M[1];
h[i][j-k+1]=val;
if(i==0)
mp[val].ff++;
else
mp[val].ss++;
}
}
int32_t main()
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
cout.tie(NULL);
int t;
cin>>t;
while(t--)
{
mp.clear();
cin>>n[0]>>n[1]>>k>>a[0]>>a[1];
compute_hash(0);
compute_hash(1);
int mx=0;
for(auto i: mp)
{
if(i.ss.ff>0&&i.ss.ss>0)
{
mx=max(mx,i.ss.ff+i.ss.ss);
}
}
if(mx==0)
{
cout<<"-1\n";
continue;
}
set <pair <int,int> > se;
for(auto i: mp)
{
if(i.ss.ff>0&&i.ss.ss>0&&i.ss.ff+i.ss.ss==mx)
{
se.insert({i.ff.ff,i.ff.ss});
}
}
int l=0;
if(n[1]<n[0])
l=1;
f(i,0,n[0])
txt[i]=a[l][i];
buildSuffixArray(n[l]);
f(i,0,n[l])
{
if(suffixArr[i]<=n[l]-k)
{
if(se.count(h[l][suffixArr[i]])==1)
{
f(j,0,k)
{
cout<<a[l][j+suffixArr[i]];
}
cout<<'\n';
break;
}
}
}
}
return 0;
}
Tester's Solution
import java.util.*;
import java.io.*;
import java.text.*;
public class Main{
//SOLUTION BEGIN
//Into the Hardware Mode
void pre() throws Exception{}
void solve(int TC)throws Exception{
int n = ni(), m = ni(), k = ni();
String a = n(), b = n();
long p1 = 31, p2 = 37;
long m1 = (long)1e8+7, m2 = (long)1e9+7;
int mx = 1+Math.max(n, m);
long[][] pip1 = pow(mx, p1, m1), pip2 = pow(mx, p2, m2);
long[] a1 = new long[1+n], a2 = new long[1+n], b1 = new long[1+m], b2 = new long[1+m];
for(int i = 1; i<= n; i++){
a1[i] = add(a1[i-1], mul(a.charAt(i-1)-'a'+1, pip1[0][i], m1), m1);
a2[i] = add(a2[i-1], mul(a.charAt(i-1)-'a'+1, pip2[0][i], m2), m2);
}
for(int i = 1; i<= m; i++){
b1[i] = add(b1[i-1], mul(b.charAt(i-1)-'a'+1, pip1[0][i], m1), m1);
b2[i] = add(b2[i-1], mul(b.charAt(i-1)-'a'+1, pip2[0][i], m2), m2);
}
TreeMap<Pair, Integer> map1 = new TreeMap<>(), map2 = new TreeMap<>();
for(int i = 0; i<= n-k; i++){
Pair hash = new Pair(hash(a1, pip1, m1, i+1, i+k), hash(a2, pip2, m2, i+1, i+k));
map1.put(hash, map1.getOrDefault(hash, 0)+1);
}
for(int i = 0; i<= m-k; i++){
Pair hash = new Pair(hash(b1, pip1, m1, i+1, i+k), hash(b2, pip2, m2, i+1, i+k));
map2.put(hash, map2.getOrDefault(hash, 0)+1);
}
int maxCount = 0;
TreeSet<Pair> set = new TreeSet<>();
for(Map.Entry<Pair, Integer> e:map1.entrySet()){
int c = map2.getOrDefault(e.getKey(), 0);
if(c == 0)continue;
c += e.getValue();
if(c > maxCount){
maxCount = c;
set = new TreeSet<>();
}
if(c == maxCount)set.add(e.getKey());
}
if(maxCount == 0){
pn(-1);
return;
}
int[] sufArray = suffixArray(a);
for(int i = 0; i< sufArray.length; i++){
if(sufArray[i]+k > a.length())continue;
int ind = sufArray[i];
Pair hash = new Pair(hash(a1, pip1, m1, ind+1, ind+k), hash(a2, pip2, m2, ind+1, ind+k));
if(set.contains(hash)){
pn(a.substring(ind, ind+k));
return;
}
}
hold(false);
}
int[] suffixArray(String s){
int n = s.length();
Suffix[] su = new Suffix[n];
for(int i = 0; i< n; i++){
su[i] = new Suffix(i, s.charAt(i)-'$', 0);
}
for(int i = 0; i< n; i++)su[i].next = (i+1 < n?su[i+1].rank:-1);
Arrays.sort(su);
int[] ind = new int[n];
for(int length = 4; length < 2*n; length<<=1){
int rank = 0, prev = su[0].rank;
su[0].rank = rank;
ind[su[0].index] = 0;
for(int i = 1; i< n; i++){
if(su[i].rank == prev && su[i].next == su[i-1].next){
prev = su[i].rank;
su[i].rank = rank;
}else{
prev = su[i].rank;
su[i].rank = ++rank;
}
ind[su[i].index] = i;
}
for(int i = 0; i< n; i++){
int nextP = su[i].index+length/2;
su[i].next = nextP<n?su[ind[nextP]].rank:-1;
}
Arrays.sort(su);
}
int[] suf = new int[n];
for(int i = 0; i< n; i++)suf[i] = su[i].index;
return suf;
}
class Suffix implements Comparable<Suffix>{
int index, rank, next;
public Suffix(int ind, int r, int nr){
index = ind; rank = r; next = nr;
}
public int compareTo(Suffix s){
if(rank != s.rank)return Integer.compare(rank, s.rank);
return Integer.compare(next, s.next);
}
}
class Pair implements Comparable<Pair>{
long h1, h2;
public Pair(long a, long b){
h1 = a;h2 = b;
}
public int compareTo(Pair p){
if(h1 != p.h1)return Long.compare(h1, p.h1);
return Long.compare(h2, p.h2);
}
}
long hash(long[] h, long[][] p, long m, int l, int r){
return mul(add(h[r], m-h[l-1], m), p[1][l-1], m);
}
long[][] pow(int mx, long p, long mod){
long[] P = new long[mx], IP = new long[mx];
P[0] = 1;
for(int i = 1; i< mx; i++)P[i] = (P[i-1]*p)%mod;
long M = mod;
long y = 0, x = 1;
long a = P[mx-1];
while(a> 1){
long q = a/M;
long t = M;
M = a%M;
a = t;
t = y;
y = x-q*y;
x = t;
}
if(x<0)x+=mod;
IP[mx-1] = x;
for(int i = mx-2; i>= 0; i--)IP[i] = (IP[i+1]*p)%mod;
return new long[][]{P, IP};
}
long mul(long a, long b, long m){
if(a>=m)a%=m;
if(b>=m)b%=m;
a*=b;
if(a>=m)a%=m;
return a;
}
long add(long a, long b, long mod){
if(Math.abs(a)>=mod)a%=mod;
if(a<0)a+=mod;
if(Math.abs(b)>=mod)b%=mod;
if(b<0)b+=mod;
a+=b;
if(Math.abs(a)>=mod)a%=mod;
return a;
}
long pow(long a, long p, long mod){
long o = 1;
while(p>0){
if(p%2==1)o = (a*o)%mod;
a = (a*a)%mod;
p>>=1;
}
return o;
}
//SOLUTION END
void hold(boolean b)throws Exception{if(!b)throw new Exception("Hold right there, Sparky!");}
void exit(boolean b){if(!b)System.exit(0);}
long IINF = (long)1e18, mod = (long)1e9+7;
final int INF = (int)1e9, MX = (int)2e6+5;
DecimalFormat df = new DecimalFormat("0.00000000");
double PI = 3.141592653589793238462643383279502884197169399, eps = 1e-6;
static boolean multipleTC = true, memory = false, fileIO = false;
FastReader in;PrintWriter out;
void run() throws Exception{
if(fileIO){
in = new FastReader("input.txt");
out = new PrintWriter("output.txt");
}else {
in = new FastReader();
out = new PrintWriter(System.out);
}
//Solution Credits: Taranpreet Singh
int T = (multipleTC)?ni():1;
pre();
for(int t = 1; t<= T; t++)solve(t);
out.flush();
out.close();
}
public static void main(String[] args) throws Exception{
if(memory)new Thread(null, new Runnable() {public void run(){try{new Main().run();}catch(Exception e){e.printStackTrace();}}}, "1", 1 << 28).start();
else new Main().run();
}
int digit(long s){int ans = 0;while(s>0){s/=10;ans++;}return ans;}
long gcd(long a, long b){return (b==0)?a:gcd(b,a%b);}
int gcd(int a, int b){return (b==0)?a:gcd(b,a%b);}
int bit(long n){return (n==0)?0:(1+bit(n&(n-1)));}
void p(Object o){out.print(o);}
void pn(Object o){out.println(o);}
void pni(Object o){out.println(o);out.flush();}
String n()throws Exception{return in.next();}
String nln()throws Exception{return in.nextLine();}
int ni()throws Exception{return Integer.parseInt(in.next());}
long nl()throws Exception{return Long.parseLong(in.next());}
double nd()throws Exception{return Double.parseDouble(in.next());}
class FastReader{
BufferedReader br;
StringTokenizer st;
public FastReader(){
br = new BufferedReader(new InputStreamReader(System.in));
}
public FastReader(String s) throws Exception{
br = new BufferedReader(new FileReader(s));
}
String next() throws Exception{
while (st == null || !st.hasMoreElements()){
try{
st = new StringTokenizer(br.readLine());
}catch (IOException e){
throw new Exception(e.toString());
}
}
return st.nextToken();
}
String nextLine() throws Exception{
String str = "";
try{
str = br.readLine();
}catch (IOException e){
throw new Exception(e.toString());
}
return str;
}
}
}
I’d also like to know if there exist other solutions for this problem, I even wanted to know how it’d be implemented with rolling Hashes, since I simply have no Idea about it.