CENS20C - Editorial

akshitm16 · August 20, 2020, 11:59pm

We can create a count_arr of size 2 \cdot \mid txt \mid where count_arr[i] will store the number of occurrences of pat in (txt+txt)[0 ... i].
There are two cases:

When there is no full string txt present in level i. This means i< \mid txt \mid. There can be some proper prefix of txt whose length can be calculated as i \%\mid txt \mid. So, ans will be simply count_arr[ (i \%\mid txt \mid ) -1].
When i>= \mid txt \mid. There will be (i/\mid txt \mid) full strings txt concatenated. The extra part will be some proper prefix of txt whose length can be calculated as i \%\mid txt \mid. So, contribution of extra part in ans can be calculated as follows. Since the extra part has a full txt preceding it.
We need occurrences of pat in txt+txt[0... (i \%\mid txt \mid ) -1] but we need to subtract the occurrences which are only in txt as we must have already calculated them (This is just contribution due to extra part not the full ans). Note the following code line. extra is nothing but i \%\mid txt \mid .

        if(val){
        	ans=val*count1 + (val-1)*count3;
        	int extra = line % txt.size();
        	ans+= count_arr[txt.size()-1+extra]-count_arr[txt.size()-1];
        }
        else{
        	int extra = line % txt.size();
        	// cout<<extra<<endl;
        	ans= count_arr[extra-1];
        }

To create count_arr, you just need to change the KMP function a little. I feel you can do this. You may look at the code for the details.

CPP Code

#include <bits/stdc++.h> 
using namespace std;

typedef vector <int> vi;

vi computeLPSArray(string pat, int M); 

vector<int> KMPSearch(string pat, string txt, int start) 
{ 
	int M = pat.size(); 
	int N = txt.size(); 
	vector<int> count_arr(N, 0); 
	vi lps = computeLPSArray(pat, M); 
	int i = 0;
	int j = 0;
	// int count = 0;
	while (i < N) { 
		if(i){count_arr[i]=count_arr[i-1];}
		if (pat[j] == txt[i]) { 
			j++; 
			i++; 
		} 

		if (j == M && i>start) { 
			count_arr[i-1]+=1;
			// count+=1;
			j = lps[j - 1]; 
		} 

		else if (i < N && pat[j] != txt[i]) { 
			if (j != 0) 
				j = lps[j - 1]; 
			else
				i = i + 1; 
		} 
	}
	return count_arr;
} 

vi computeLPSArray(string pat, int M) 
{ 
	int len = 0; 
	vi lps;
	lps.push_back(0); 

	int i = 1; 
	while (i < M) { 
		if (pat[i] == pat[len]) { 
			len++; 
			lps.push_back(len); 
			i++; 
		} 
		else // (pat[i] != pat[len]) 
		{ 
			if (len != 0) { 
				len = lps.at(len - 1); 
			} 
			else // if (len == 0) 
			{ 
				lps.push_back(0); 
				i++; 
			} 
		} 
	} 
	return lps;
} 
int main() 
{ 
    string pat, txt;
    cin >> txt >> pat;
    vector<int> count_arr;  // count_arr[i] will tell occurances of pat in (txt+txt)[0...i]
    int count1 = KMPSearch(pat, txt, 0)[txt.size()-1];
    count_arr=KMPSearch(pat, txt + txt, 0);
    int count2 = count_arr[2*txt.size()-1];
    int count3 = count2 - 2*count1;
    // cout<<count1<<" "<<count2<<endl;
    int q;
    cin >> q;
    while(q--){
        int line;
        cin >> line;
        int val=line/txt.size();
        int ans=0;
        if(val){
        	ans=val*count1 + (val-1)*count3;
        	int extra = line % txt.size();
        	ans+= count_arr[txt.size()-1+extra]-count_arr[txt.size()-1];
        }
        else{
        	int extra = line % txt.size();
        	// cout<<extra<<endl;
        	ans= count_arr[extra-1];
        }
        cout<<ans<<endl;
    }
	return 0; 
}

Python Code

# Python program for KMP Algorithm 
def KMPSearch(pat, txt, lps):
    M = len(pat)
    N = len(txt)
    count_arr=[0 for x in range(N)]
    # # create lps[] that will hold the longest prefix suffix
    # # values for pattern
    # lps = [0] * M
    j = 0  # index for pat[]

    # # Preprocess the pattern (calculate lps[] array)
    # computeLPSArray(pat, M, lps)

    i = 0  # index for txt[]
    while i < N:
        print(i)
        count_arr[i]=count_arr[i-1]
        print(count_arr[i-1])
        if pat[j] == txt[i]:
            i += 1
            j += 1

        if j == M:
            # print(i)
            count_arr[i-1]+=1
            j = lps[j - 1]

            # mismatch after j matches
        elif i < N and pat[j] != txt[i]:
            # Do not match lps[0..lps[j-1]] characters,
            # they will match anyway
            if j != 0:
                j = lps[j - 1]
            else:
                i += 1
    return count_arr


def computeLPSArray(pat, M):
    lps=[0 for x in range(M)]
    len = 0  # length of the previous longest prefix suffix
    i = 1

    # the loop calculates lps[i] for i = 1 to M-1
    while i < M:
        if pat[i] == pat[len]:
            len += 1
            lps[i] = len
            i += 1
        else:
            # This is tricky. Consider the example.
            # AAACAAAA and i = 7. The idea is similar
            # to search step.
            if len != 0:
                len = lps[len - 1]

                # Also, note that we do not increment i here
            else:
                lps[i] = 0
                i += 1
    return lps


txt=input()
pat=input()
lps=computeLPSArray(pat,len(pat))
# print(lps)
count1=KMPSearch(pat,txt,lps)[-1]
count_arr=KMPSearch(pat,txt+txt,lps)
count2=count_arr[-1]
count3=count2-(2*count1)
q=int(input())
for _ in range(q):
    n=int(input())
    val=n//len(txt)
    if val:
        ans=val*count1 + (val-1)*count3
        extra = n % len(txt)
        ans+= count_arr[len(txt)-1+extra]-count_arr[len(txt)-1]
    else:
        extra=n%len(txt)
        ans=count_arr[extra-1]
    print(ans)

Also, to understand KMP (it’s tough actually!!) in detail, here is one good explanation-