CHEFRAIL - TLE even after using prime factorization sieve

#include <iostream>
#include<vector>
#include<map>
#define MAX 100007
using namespace std;
long long int ans = 0;

struct primeFactorization 
{
	int countOfPf, primeFactor;
};
int spf[MAX];
void calcSpf()
{
	spf[1] = 1;
	for (int i = 1; i < MAX; i++)
		spf[i] = i;
	for (int i = 4; i < MAX; i += 2)
		spf[i] = 2;

	for (int i = 3; i*i < MAX; i++)
	{
		if (spf[i] == i)
		{
			for (int j = i * i; j < MAX; j += i) // j+=i;
			{
				
				if ( j == spf[j] )
				{
					spf[j] = i;
				}
				
			}
		}
	}
}

void genDiv(vector<primeFactorization> a,long long int c,long long int d, map<int, int>* m1, map<int, int>* m2,int n) 
{

	if (c == a.size())
	{
		if ((*m1)[d] > 0 && (*m2)[n / d] > 0) // running twice !!
		{
			ans++;
		}
		//cout << d << " ";   // if factor is 2 then remove n/2 also!!
		return;
	}

	for (int i = 0; i <= a[c].countOfPf; i++)
	{
		genDiv(a, c + 1, d,m1,m2,n);
		d *= a[c].primeFactor;
	}
}

void find(int n, map<int, int>* m1, map<int, int>* m2)
{
	long long int temp = (long long int)n * n;
	vector<primeFactorization> a;
	int count = 0;
	for (int i = spf[n]; n != 1; i = spf[n])
	{
		count = 0;
		while (n % i == 0)
		{
			n /= i;
			count++;
		}
		a.push_back({ 2 * count, i });
	}
	long long int curIndex = 0, curDiv = 1;
	genDiv(a, curIndex, curDiv, m1, m2, temp);
}

void solve(vector<int> v,map<int,int>* m1,map<int,int>* m2)
{
	for (int i = 0; i < v.size(); i++)
	{
		find(v[i],m1,m2);
	}
}

int main()
{
	calcSpf();
	int t;
	cin >> t;
	while (t--)
	{

		int n,m;
		cin >> n >> m;
		int xflag = 0,yflag = 0;
		map<int, int> mxp, myp, mxn, myn;
		vector<int> vxp, vxn, vyp, vyn;
		//memset(mxp, 0, sizeof(mxp));
		ans = 0;
		for (int i = 0; i < n; i++)
		{
			int x;
			cin >> x;
			if (x > 0)
			{
				mxp[x]++;
				vxp.push_back(x);
			}
			else if (x < 0)
			{
				mxn[-x]++;
				vxn.push_back(-x);
			}
			else
				xflag = 1;
		}
		for (int i = 0; i < m; i++)
		{
			int x;
			cin >> x;
			if (x > 0)
			{
				myp[x]++;
				vyp.push_back(x);
			}
			else if (x < 0)
			{
				myn[-x]++;
				vyn.push_back(-x);
			}
			else
				yflag = 1;
		}
		
		solve(vxp, &myp, &myn);
		solve(vxn, &myp, &myn);
		solve(vyp, &mxp, &mxn);
		solve(vyn, &mxp, &mxn);
		if (xflag == 1)
		{
			ans += ((long long int)n - 1) * m;
		}
		else if (yflag == 1)
		{
			ans += n * ((long long int)m - 1);
		}
		cout << ans << endl;
	}
}

Can someone tell me why the time limit is exceeding for this code?
Problem Link - https://www.codechef.com/problems/CHEFRAIL

1 Like

It would be much easier if you just explained your approach

I precomputed an array which stores the smallest prime factor for a number.

In my main code, i stored xpositive,xnegative,ypositive,ynegative points in 4 different arrays and ran my solve function 4 times(once for each side of the axis).
Then for each point p on the axes, i calculate its prime factors and the count of its prime factors. But i store the 2*count instead because i actually need to get the factors of p squared.
After storing the prime factors, i create all possible factors of p square from different combinations of its prime factors and check whether the pair of factors lie on the opposite axes or not. If they lie on the opposite axes, i increment ans.
I have incremented value ans if the user inputs origin as a point too.

My code gets 40 points buts TLEs for the last 4 testcases.

1 Like

If the same point exists on all 4 axis, you’ll compute it’s factors 4 times, effectively increasing runtime by a large margin. I reduced the number of times a number was factorized similarly in my submission to remove the TLE.

1 Like

Are you suggesting I use two arrays?(one for x axis and one for y axis) and make the frequency 2 if a point exists at positive as well as negative of an axis ( example -4,0 and 4,0 )? or create a separate array which stores the factors of numbers and then check whether the factors of a number is already computed and use the already computed factors? Because the second option would take a lot of space and time ( I think)

Using the 4 arrays is fine. My point is, instead of solving for each point in the 4 arrays, run a loop for each point, from 1 to Max_Val (10^5). If the point exists in at least 1 array, factorize it if you haven’t done so before. Instead of 4 loops, this will work in one iteration only.

2 Likes

Got it. Thanks!!

I think another thing you can do to optimize your solution is that instead of calling genDiv every time, you can just first gen all suitable divisors of all number from 1 to 100000. The total size of all vectors would be small enough to store.

1 Like

Got AC with running a loop from 1 to 10^5 and checking whether that point exists in any of the 4 arrays. Thanks a lot for that!

I changed maps to arrays as map had log(n) search time and i was checking for the value in a map 4*10^5 times. I changed my code, but the implementation was a bit finicky and I still checked for every factor twice ( one for x axis and one for y ). Is there a better way to implement this without running the factor loop twice?

Here is my code -
factor is a global variable of type vector <long long int
And mxp,myp,mxn,myn are global variables of type vector <int of size (MAX)

for (int i = 1; i < MAX; i++)
		{
			factor.clear();
			if (mxp[i] > 0 || mxn[i] > 0 || myp[i] > 0 || myn[i] > 0)
			{
				find(i);
				long long int r = (long long)i * i;
				int total = 0;
				if (mxp[i] > 0)
					total++;
				if (mxn[i] > 0)
				total++;
			
			if (total > 0)
			{
				for (int j = 0; j < factor.size(); j++)
				{
					long long int w1 = factor[j];
					long long int w2 = r / w1;
					if(w1<MAX && w2<MAX)
					if (myp[w1] > 0 && myn[w2] > 0)
						ans += total;
				}
			}

			total = 0;
			if (myp[i] > 0)
				total++;
			if (myn[i] > 0)
				total++;

			if (total > 0)
			{
				for (int j = 0; j < factor.size(); j++)
				{
					long long int w1 = factor[j];
					long long int w2 = r / w1;
					if (w1 < MAX && w2 < MAX)
					if (mxp[w1] > 0 && mxn[w2] > 0)
						ans += total;
				}
			}
		}

	}

To do this in one loop (xn, xp, yn, yp are arrays storing the points):

for i in range (1,M):
factors = md[i]
y = i*i
for a in factors:
b = y//a
if b<M and a<M:
tx = xn[a]xp[b](yp[i]+yn[i])
ty = yn[a]yp[b](xp[i]+xn[i])
ans += xn[a]xp[b](yp[i]+yn[i])
ans += yn[a]yp[b](xp[i]+xn[i])

1 Like

Instead of checking for every triplet x^2 = -yz on both the axis, I took the squarefree representation of each number on x-axis st x_i = a_i^2 b_i

Now, we had to find all x_j and x_k so that x_jx_k is a perfect square and \sqrt{x_jx_k} existed on y-axis.

Note that this is very easy to do as x_j and x_k will only form such a pair iff their squarefree parts are the same.

Now we iterate over all possible squarefree parts and binary search for \sqrt{x_jx_k} exists on y-axis

This is not O(n^2) even if it seems to be. This can be explained mathematically using Combinatorial Arguements and Cauchy-Swatchz inequality.

Here is my code with gives 100pt AC

Code
#include <bits/stdc++.h>
using namespace std;
#define int long long
#define rep(i,n) for(int i(0);i<n;i++)
#define fast  ios_base::sync_with_stdio(0); cin.tie(0);
#define watch(x) cout<<(#x)<<" is "<<x<<"\n";
#define out(x) cout<<x<<"\n";
#define pb push_back

const int N = 100004;
int lp[N+1], sqfree[N+1];

vector<int> pr;

void sieve(){
	for (int i=2; i<=N; ++i) {
	    if (lp[i] == 0) {
	        lp[i] = i;
	        pr.push_back (i);
	    }
	    for (int j=0; j<(int)pr.size() && pr[j]<=lp[i] && i*pr[j]<=N; ++j)
	        lp[i * pr[j]] = pr[j];
	}

}

void sqf(){
	for(int n(1);n<100005;n++){
		int temp=n,i,ans=1;
		map<int, int> freq;
		map<int,int>::iterator it;
		while(temp>1){
			i = lp[temp];
			freq[i]++;
			temp /=i;
		}
		for(it = freq.begin();it!=freq.end();++it){
			if(it->second % 2 == 1)ans *= it->first;
		}
		sqfree[n] = ans;
	}
}

int32_t main(){
	
	fast;
	sieve();
	sqf();

	int t;
	cin>>t;

	while(t--){
		int n,m;
		cin>>n>>m;
		vector<int> xpos[100005],xneg[100005],yneg[100005] ,ypos[100005],x,y;
		bool xzero=false, yzero=false;
		rep(i,n){
			int r;
			cin>>r;
			x.pb(r);
			if(r==0)xzero=true;
			else if(r>0){
				xpos[sqfree[r]].pb(sqrt(r/sqfree[r]));
			}
			else{
				r *= -1;
				xneg[sqfree[r]].pb(sqrt(r/sqfree[r]));
			}
		}
		rep(i,m){
			int r;
			cin>>r;
			if(r==0)yzero=true;
			else if(r>0){
				y.pb(r);
				ypos[sqfree[r]].pb(sqrt(r/sqfree[r]));
			}
			else{
				y.pb(r);
				r *= -1;
				yneg[sqfree[r]].pb(sqrt(r/sqfree[r]));
			}
		}

		sort(x.begin(), x.end());
		sort(y.begin(), y.end());

		vector<int> u,v,a,b;
		int tempx,tempy,ans=0;

		if(xzero)ans += (n-1)*m;
		if(yzero)ans += (m-1)*n;

		for(int i(1);i<100005;i++){
			u = xpos[i];
			v = xneg[i];
			for(int j=0 ; j<v.size();j++){
				for(int k=0;k<u.size();k++){
					tempx = v[j]*u[k]*i;
					if(tempx!=0){
						if(binary_search(y.begin() , y.end() , tempx))
							ans++;
						if(binary_search(y.begin() , y.end() , -1*tempx))
							ans++;
					}
				}
			}

			a = ypos[i];
			b = yneg[i];
			for(int j=0 ; j<b.size();j++){
				for(int k=0;k<a.size();k++){
					tempy = b[j]*a[k]*i;
					if(tempy!=0){
						if(binary_search(x.begin() , x.end() , tempy))
							ans++;
						if(binary_search(x.begin() , x.end() , -1*tempy))
							ans++;
					}
				}
			}
		}

		cout<<ans<<"\n";		

	}


	return 0;
}