Why am I getting Time limit for this optimized code(NCC1904-different combinations)?

#include<bits/stdc++.h>
using namespace std;
#define ll long long
#define fast ios_base::sync_with_stdio(false);cin.tie(NULL);cout.tie(NULL);
#define f(i,a,b) for(i=a;i<=b;i++)
#define fd(i,a,b) for(i=a;i>=b;i–)
#define mod 1000000007

int main(){
fast;
ll t,n,i;

ll phi[1000001]; 
for (ll i=1; i<=1000000; i++) 
    phi[i] = i;
for (ll p=2; p<=1000000; p++) 
{ 
    if (phi[p] == p) 
    { 
        phi[p] = p-1; 
        for (ll i = 2*p; i<=1000000; i += p)
        {
           phi[i] = (phi[i]/p) * (p-1); 
        } 
    } 
}
f(i,2,1000000) phi[i]=(phi[i]+phi[i-1])%mod;
cin>>t;
while(t--){
    cin>>n;
    //ll ans=0;
    cout<<(phi[n]*phi[n])%mod<<endl;
}
return 0;

}

replace endl with “\n”. Accepted solution (https://www.codechef.com/submit/complete/23180205)