Help me on problem FINDINGSUM

#include <bits/stdc++.h>
#define ll long long
// #define int long long 
using namespace std;
const ll P=998244353,MAX=500005;
vector<ll> f(MAX+5,1),inv_f(MAX+5,1);
char buf[1<<23],*p1=buf,*p2=buf,obuf[1<<23],*O=obuf;
#define gc() (p1==p2&&(p2=(p1=buf)+fread(buf,1,1<<21,stdin),p1==p2)?EOF:*p1++)
long long n,x;
template <typename T>
inline void read(T &f){
	f=1;
	T x=0;
	char ch=gc();
	while(ch<'0'||ch>'9'){
		if(ch=='-')
			f=-1;
		ch=gc();
	}
	while(ch>='0'&&ch<='9'){
		x=x*10+ch-'0';
		ch=gc();
	}
	f*=x;
}
template <typename T>
inline void print(T x){
	if(x<0){
		putchar('-');
		x=-x;
	}
	if(x>9)
		print(x/10);
	putchar(x%10+'0');
}
int qmi(int a,int b,int p){
	int t=1;
	while(b){
		if(b&1)
			t=(ll)t*a%p;
		a=(ll)a*a%p;
		b>>=1;
	}
	return t;
}
void init(){
    for(ll i=2;i<MAX;i++)
        f[i]=(f[i-1]*i)%P;
    inv_f[MAX-1]=qmi(f[MAX-1],P-2,P);
    for(ll i=MAX-2;i>=0;i--)
        inv_f[i]=(inv_f[i+1]*(i+1))%P;
}  
ll C(ll a,ll b,ll p){
    if(a==b)
        return 1; 
    if((a<0)||(a<b)||(b<0))      
        return 0;  
    return ((inv_f[b]*inv_f[a-b])%p*f[a])%p;     
}
long long Lucas(long long n,long long m,long long p){
	if(m==0)
		return 1;
	return (C(n%p,m%p,p)*Lucas(n/p,m/p,p))%p;
} 
ll ans=0;
signed main(){
// 	freopen("sequence.in","r",stdin);
// 	freopen("sequence.out","w",stdout);
    // ios::sync_with_stdio(0);
    // cin.tie(0);
    // cout.tie(0);
    int T;
    read(T);
    while(T--){
        init();
    	ll n,m;
    	read(n),read(m);
        ll ans=0;
        for(ll i=0;i<=m;i++)
            for(ll j=0;j<=n;j++)
                ans=(ans+((((Lucas(n,j,P)*Lucas(m-i-1,j-1,P))%P)*Lucas(m-i+n-j-1,n-j-1,P))%P)*((((Lucas(n+i-1,n-1,P)*(2ll*(m-i)))%P)*(2ll*(m-i)))%P))%P;
        printf("%lld\n",ans);
    }

	return 0;
}
	

I think my code has a correct time complexity, but it get TLE on #3.