# CHEFRAIL - TLE even after using prime factorization sieve

#include <iostream>
#include<vector>
#include<map>
#define MAX 100007
using namespace std;
long long int ans = 0;

struct primeFactorization
{
int countOfPf, primeFactor;
};
int spf[MAX];
void calcSpf()
{
spf[1] = 1;
for (int i = 1; i < MAX; i++)
spf[i] = i;
for (int i = 4; i < MAX; i += 2)
spf[i] = 2;

for (int i = 3; i*i < MAX; i++)
{
if (spf[i] == i)
{
for (int j = i * i; j < MAX; j += i) // j+=i;
{

if ( j == spf[j] )
{
spf[j] = i;
}

}
}
}
}

void genDiv(vector<primeFactorization> a,long long int c,long long int d, map<int, int>* m1, map<int, int>* m2,int n)
{

if (c == a.size())
{
if ((*m1)[d] > 0 && (*m2)[n / d] > 0) // running twice !!
{
ans++;
}
//cout << d << " ";   // if factor is 2 then remove n/2 also!!
return;
}

for (int i = 0; i <= a[c].countOfPf; i++)
{
genDiv(a, c + 1, d,m1,m2,n);
d *= a[c].primeFactor;
}
}

void find(int n, map<int, int>* m1, map<int, int>* m2)
{
long long int temp = (long long int)n * n;
vector<primeFactorization> a;
int count = 0;
for (int i = spf[n]; n != 1; i = spf[n])
{
count = 0;
while (n % i == 0)
{
n /= i;
count++;
}
a.push_back({ 2 * count, i });
}
long long int curIndex = 0, curDiv = 1;
genDiv(a, curIndex, curDiv, m1, m2, temp);
}

void solve(vector<int> v,map<int,int>* m1,map<int,int>* m2)
{
for (int i = 0; i < v.size(); i++)
{
find(v[i],m1,m2);
}
}

int main()
{
calcSpf();
int t;
cin >> t;
while (t--)
{

int n,m;
cin >> n >> m;
int xflag = 0,yflag = 0;
map<int, int> mxp, myp, mxn, myn;
vector<int> vxp, vxn, vyp, vyn;
//memset(mxp, 0, sizeof(mxp));
ans = 0;
for (int i = 0; i < n; i++)
{
int x;
cin >> x;
if (x > 0)
{
mxp[x]++;
vxp.push_back(x);
}
else if (x < 0)
{
mxn[-x]++;
vxn.push_back(-x);
}
else
xflag = 1;
}
for (int i = 0; i < m; i++)
{
int x;
cin >> x;
if (x > 0)
{
myp[x]++;
vyp.push_back(x);
}
else if (x < 0)
{
myn[-x]++;
vyn.push_back(-x);
}
else
yflag = 1;
}

solve(vxp, &myp, &myn);
solve(vxn, &myp, &myn);
solve(vyp, &mxp, &mxn);
solve(vyn, &mxp, &mxn);
if (xflag == 1)
{
ans += ((long long int)n - 1) * m;
}
else if (yflag == 1)
{
ans += n * ((long long int)m - 1);
}
cout << ans << endl;
}
}


Can someone tell me why the time limit is exceeding for this code?

1 Like

It would be much easier if you just explained your approach

I precomputed an array which stores the smallest prime factor for a number.

In my main code, i stored xpositive,xnegative,ypositive,ynegative points in 4 different arrays and ran my solve function 4 times(once for each side of the axis).
Then for each point p on the axes, i calculate its prime factors and the count of its prime factors. But i store the 2*count instead because i actually need to get the factors of p squared.
After storing the prime factors, i create all possible factors of p square from different combinations of its prime factors and check whether the pair of factors lie on the opposite axes or not. If they lie on the opposite axes, i increment ans.
I have incremented value ans if the user inputs origin as a point too.

My code gets 40 points buts TLEs for the last 4 testcases.

1 Like

If the same point exists on all 4 axis, you’ll compute it’s factors 4 times, effectively increasing runtime by a large margin. I reduced the number of times a number was factorized similarly in my submission to remove the TLE.

1 Like

Are you suggesting I use two arrays?(one for x axis and one for y axis) and make the frequency 2 if a point exists at positive as well as negative of an axis ( example -4,0 and 4,0 )? or create a separate array which stores the factors of numbers and then check whether the factors of a number is already computed and use the already computed factors? Because the second option would take a lot of space and time ( I think)

Using the 4 arrays is fine. My point is, instead of solving for each point in the 4 arrays, run a loop for each point, from 1 to Max_Val (10^5). If the point exists in at least 1 array, factorize it if you haven’t done so before. Instead of 4 loops, this will work in one iteration only.

2 Likes

Got it. Thanks!!

I think another thing you can do to optimize your solution is that instead of calling genDiv every time, you can just first gen all suitable divisors of all number from 1 to 100000. The total size of all vectors would be small enough to store.

1 Like

Got AC with running a loop from 1 to 10^5 and checking whether that point exists in any of the 4 arrays. Thanks a lot for that!

I changed maps to arrays as map had log(n) search time and i was checking for the value in a map 4*10^5 times. I changed my code, but the implementation was a bit finicky and I still checked for every factor twice ( one for x axis and one for y ). Is there a better way to implement this without running the factor loop twice?

Here is my code -
factor is a global variable of type vector <long long int
And mxp,myp,mxn,myn are global variables of type vector <int of size (MAX)

for (int i = 1; i < MAX; i++)
{
factor.clear();
if (mxp[i] > 0 || mxn[i] > 0 || myp[i] > 0 || myn[i] > 0)
{
find(i);
long long int r = (long long)i * i;
int total = 0;
if (mxp[i] > 0)
total++;
if (mxn[i] > 0)
total++;

if (total > 0)
{
for (int j = 0; j < factor.size(); j++)
{
long long int w1 = factor[j];
long long int w2 = r / w1;
if(w1<MAX && w2<MAX)
if (myp[w1] > 0 && myn[w2] > 0)
ans += total;
}
}

total = 0;
if (myp[i] > 0)
total++;
if (myn[i] > 0)
total++;

if (total > 0)
{
for (int j = 0; j < factor.size(); j++)
{
long long int w1 = factor[j];
long long int w2 = r / w1;
if (w1 < MAX && w2 < MAX)
if (mxp[w1] > 0 && mxn[w2] > 0)
ans += total;
}
}
}

}

To do this in one loop (xn, xp, yn, yp are arrays storing the points):

for i in range (1,M):
factors = md[i]
y = i*i
for a in factors:
b = y//a
if b<M and a<M:
tx = xn[a]xp[b](yp[i]+yn[i])
ty = yn[a]yp[b](xp[i]+xn[i])
ans += xn[a]xp[b](yp[i]+yn[i])
ans += yn[a]yp[b](xp[i]+xn[i])

1 Like

Instead of checking for every triplet x^2 = -yz on both the axis, I took the squarefree representation of each number on x-axis st x_i = a_i^2 b_i

Now, we had to find all x_j and x_k so that x_jx_k is a perfect square and \sqrt{x_jx_k} existed on y-axis.

Note that this is very easy to do as x_j and x_k will only form such a pair iff their squarefree parts are the same.

Now we iterate over all possible squarefree parts and binary search for \sqrt{x_jx_k} exists on y-axis

This is not O(n^2) even if it seems to be. This can be explained mathematically using Combinatorial Arguements and Cauchy-Swatchz inequality.

Here is my code with gives 100pt AC

Code
#include <bits/stdc++.h>
using namespace std;
#define int long long
#define rep(i,n) for(int i(0);i<n;i++)
#define fast  ios_base::sync_with_stdio(0); cin.tie(0);
#define watch(x) cout<<(#x)<<" is "<<x<<"\n";
#define out(x) cout<<x<<"\n";
#define pb push_back

const int N = 100004;
int lp[N+1], sqfree[N+1];

vector<int> pr;

void sieve(){
for (int i=2; i<=N; ++i) {
if (lp[i] == 0) {
lp[i] = i;
pr.push_back (i);
}
for (int j=0; j<(int)pr.size() && pr[j]<=lp[i] && i*pr[j]<=N; ++j)
lp[i * pr[j]] = pr[j];
}

}

void sqf(){
for(int n(1);n<100005;n++){
int temp=n,i,ans=1;
map<int, int> freq;
map<int,int>::iterator it;
while(temp>1){
i = lp[temp];
freq[i]++;
temp /=i;
}
for(it = freq.begin();it!=freq.end();++it){
if(it->second % 2 == 1)ans *= it->first;
}
sqfree[n] = ans;
}
}

int32_t main(){

fast;
sieve();
sqf();

int t;
cin>>t;

while(t--){
int n,m;
cin>>n>>m;
vector<int> xpos[100005],xneg[100005],yneg[100005] ,ypos[100005],x,y;
bool xzero=false, yzero=false;
rep(i,n){
int r;
cin>>r;
x.pb(r);
if(r==0)xzero=true;
else if(r>0){
xpos[sqfree[r]].pb(sqrt(r/sqfree[r]));
}
else{
r *= -1;
xneg[sqfree[r]].pb(sqrt(r/sqfree[r]));
}
}
rep(i,m){
int r;
cin>>r;
if(r==0)yzero=true;
else if(r>0){
y.pb(r);
ypos[sqfree[r]].pb(sqrt(r/sqfree[r]));
}
else{
y.pb(r);
r *= -1;
yneg[sqfree[r]].pb(sqrt(r/sqfree[r]));
}
}

sort(x.begin(), x.end());
sort(y.begin(), y.end());

vector<int> u,v,a,b;
int tempx,tempy,ans=0;

if(xzero)ans += (n-1)*m;
if(yzero)ans += (m-1)*n;

for(int i(1);i<100005;i++){
u = xpos[i];
v = xneg[i];
for(int j=0 ; j<v.size();j++){
for(int k=0;k<u.size();k++){
tempx = v[j]*u[k]*i;
if(tempx!=0){
if(binary_search(y.begin() , y.end() , tempx))
ans++;
if(binary_search(y.begin() , y.end() , -1*tempx))
ans++;
}
}
}

a = ypos[i];
b = yneg[i];
for(int j=0 ; j<b.size();j++){
for(int k=0;k<a.size();k++){
tempy = b[j]*a[k]*i;
if(tempy!=0){
if(binary_search(x.begin() , x.end() , tempy))
ans++;
if(binary_search(x.begin() , x.end() , -1*tempy))
ans++;
}
}
}
}

cout<<ans<<"\n";

}

return 0;
}