How can I further optimize this code?

This code is giving TLE for some cases. How can I optimize it further?
Refer to this question → Contest Page | CodeChef

#include <bits/stdc++.h>
#define ll long long
#define li long int

using namespace std;

int main(){
    ios_base::sync_with_stdio(false);
    cin.tie(0);
    cout.tie(0);
    
    int t; cin>>t;
    while(t--){
        int n , q; cin>>n>>q;
        int odds=0; int evens=0;
        
        vector<int> arr(n);
        for(int i=0; i<n; i++){
            cin>>arr[i];
        }
        for(int i=0; i<n; i++){         
            int cnt=0;
            while(arr[i]){             // counting the set bits 
                cnt+=arr[i]&1;
                arr[i]>>=1;
            }
            if(cnt%2==0) evens++;
            else odds++;
        }
        
        while(q--){              // taking queries
            int p; cin>>p;
            int cnt=0;
            while(p){            // counting the set bits 
                cnt+=p&1;
                p>>=1;
            }
            if(cnt%2==0){
                cout<<evens<<" "<<odds<<endl;
            }
            else cout<<odds<<" "<<evens<<endl;
        }
    }
    
    
    return 0;
}