SIMILARSUBSQ-Editorial

PROBLEM LINK:

Contest Division 1
Contest Division 2
Contest Division 3
Contest Division 4

Setter: Utkarsh Gupta
Testers: Jatin Garg, Tejas Pandey
Editorialist: Devendra Singh

DIFFICULTY:

2689

PREREQUISITES:

Fast Fourier Transform

PROBLEM:

Chef is given an array A of length N.

He considers a subarray [L, R] to be good if and only if there exists a subsequence [i_1, i_2, \ldots, i_{R-L+1} ] such that:

  • A_{i_k} = A_{L+k-1} for each 1 \leq k \leq R-L+1 (i.e. the subsequence is same as the subarray element-wise)
  • For at least one value of k, i_k \neq L+k-1 (i.e. the subsequence differs from the subarray index-wise).

For each i (1 \leq i \leq N), determine the number of good subarrays of length i.

EXPLANATION:

O(N^2) Brute Force approach : Let us iterate over each subarray of array A and check whether it is good or bad. A subarray A[L,R] can only be good if at least one of the following is true:

  • There is an index i<L such that A_i=A_L.
  • There is an index j>R such that A_R=A_j.
    If none of them is true then the subsequence has to start from L and end at R which violates the second condition for a subarray to be good. If any one of them is true we can just take either [i,L+1.....R] as a subsequence or [L,.....R-1,j].
    This can be implemented easily using Boolean arrays. Both the conditions above can be verified in O(1) time and if any one of them is true we include the subarray in our answer. There are total N\cdot (\frac{N+1}{2}) subarrays hence this approach takes O(N^2) time.

O(Nlog(N)) FFT based approach:
Let us begin with some definitions. Let us define an array goodleft where goodleft_i is true if There is an index j<i such that A_j=A_i. Similarly define an array goodright where goodright_i is true if There is an index j>i such that A_i=A_j. Using these two arrays construct two polynomials P_1 and P_2
where P_1(X)=\sum_{i=1}^{N}X^{-i+1}*goodleft_i and P_2(X)=\sum_{i=1}^{N}X^{i}*goodright_i.
The multiplication of polynomial P_1 with identity polynomial (\sum_{i=1}^{N}X^{i}) results in a polynomial in which the coefficient of X^i represents number of good subarrays of length i which start from one of the indices which goodleft true (not first occurrence of themselves).
The negative exponents should be ignored as they are formed by invalid selection of L and R (L>R). Add all the coefficients of positive exponents to the answer.
The multiplication of polynomial P_2 with a polynomial similar to identity polynomial with negative exponents (\sum_{i=1}^{N}X^{-i+1}) results in a polynomial in which the coefficient of X^i represents number of good subarrays of length i which end at one of the indices which goodright true (not last occurrence of themselves).
The negative exponents should be ignored as they are formed by invalid selection of L and R (L>R). Add all the coefficients of positive exponents to the answer.
Now let’s find number of subarrays which start at an index which is goodleft true and and end at an index which is goodright true. It is easy to see that the multiplication of the polynomial P_1 with P_2 results in a polynomial in which the coefficient of X^i represents number of good subarrays of length i which end at one of the indices which is goodright true (not last occurrence of themselves) and start at an index which is goodleft true (not first occurrence of themselves).
The negative exponents should be ignored as they are formed by invalid selection of L and R (L>R). Subtract all the coefficients of positive exponents to the answer as they are included twice in the answer.
Multiplication of two N length polynomials takes O(Nlog(N)) time using FFT.
For easier implementation we can shift(offset) the exponents by N-1 (mutlitply by X^{N-1}) to avoid dealing with negative exponents each time we multiply two polynomials.
For details of implementation please refer to the solutions attached.

TIME COMPLEXITY:

O(Nlog(N)) for each test case.

SOLUTION:

Setter's solution
//Utkarsh.25dec
#include <bits/stdc++.h>
#define ll long long int
#define pb push_back
#define mp make_pair
#define mod 1000000007
#define vl vector <ll>
#define all(c) (c).begin(),(c).end()
using namespace std;
ll power(ll a,ll b) {ll res=1;a%=mod; assert(b>=0); for(;b;b>>=1){if(b&1)res=res*a%mod;a=a*a%mod;}return res;}
ll modInverse(ll a){return power(a,mod-2);}
const int N=500023;
bool vis[N];
vector <int> adj[N];
long long readInt(long long l,long long r,char endd){
    long long x=0;
    int cnt=0;
    int fi=-1;
    bool is_neg=false;
    while(true){
        char g=getchar();
        if(g=='-'){
            assert(fi==-1);
            is_neg=true;
            continue;
        }
        if('0'<=g && g<='9'){
            x*=10;
            x+=g-'0';
            if(cnt==0){
                fi=g-'0';
            }
            cnt++;
            assert(fi!=0 || cnt==1);
            assert(fi!=0 || is_neg==false);

            assert(!(cnt>19 || ( cnt==19 && fi>1) ));
        } else if(g==endd){
            if(is_neg){
                x= -x;
            }

            if(!(l <= x && x <= r))
            {
                cerr << l << ' ' << r << ' ' << x << '\n';
                assert(1 == 0);
            }

            return x;
        } else {
            assert(false);
        }
    }
}
string readString(int l,int r,char endd){
    string ret="";
    int cnt=0;
    while(true){
        char g=getchar();
        assert(g!=-1);
        if(g==endd){
            break;
        }
        cnt++;
        ret+=g;
    }
    assert(l<=cnt && cnt<=r);
    return ret;
}
long long readIntSp(long long l,long long r){
    return readInt(l,r,' ');
}
long long readIntLn(long long l,long long r){
    return readInt(l,r,'\n');
}
string readStringLn(int l,int r){
    return readString(l,r,'\n');
}
string readStringSp(int l,int r){
    return readString(l,r,' ');
}
namespace FFT {
    using float_t = double;

    struct cd {
        float_t x, y;
        cd(float_t x = 0, float_t y = 0): x(x), y(y) {}
        friend ostream& operator<<(ostream& os, const cd& c) { 
            return os << '(' << c.x << ", " << c.y << ')';
        }

        float_t real() const { return x; }
        float_t imag() const { return y; }

        cd& operator+=(const cd& o) { x += o.x, y += o.y; return *this; }
        cd operator+(const cd& o) const { cd ret = *this; ret += o; return ret; }

        cd& operator-=(const cd& o) { x -= o.x, y -= o.y; return *this; }
        cd operator-(const cd& o) const { cd ret = *this; ret -= o; return ret; }

        cd& operator*=(const cd& o) { tie(x, y) = make_pair(x * o.x - y * o.y, x * o.y + y * o.x); return *this; }
        cd operator*(const cd& o) const { cd ret = *this; ret *= o; return ret; }

        cd& operator/=(const float_t& r) { x /= r, y /= r; return *this; }
        cd operator/(const float_t& r) const { cd ret = *this; ret /= r; return ret; }

    };

    const float_t PI = acos(float_t(-1));
    const int FFT_CUTOFF = 150;
    vector<cd> roots = {{0, 0}, {1, 0}}, inv_roots = {{0, 0}, {1, 0}};
    vector<int> bit_order;

    void prepare_roots(int n) {
        if(roots.size() >= n) return;
        int len = roots.size();
        roots.resize(n); inv_roots.resize(n);
        while(n > len) {
            const cd w(cos(PI / len), sin(PI / len)), inv_w(w.real(), -w.imag());
            for(int i = len >> 1; i < len; i++) {
                roots[i << 1] = roots[i];
                roots[i << 1 | 1] = roots[i] * w;
                inv_roots[i << 1] = inv_roots[i];
                inv_roots[i << 1 | 1] = inv_roots[i] * inv_w;
            } len <<= 1;
        }
    }

    void reorder_bits(int n, vector<cd>& a) {
        if(bit_order.size() != n) {
            bit_order.assign(n, 0);
            int len = __builtin_ctz(n);
            for(int i = 0; i < n; i++)
                bit_order[i] = (bit_order[i >> 1] >> 1) + ((i & 1) << len-1);
        }
        for(int i = 0; i < n; i++)
            if(i > bit_order[i]) swap(a[i], a[bit_order[i]]);
    }

    void fft(int n, vector<cd>& a, bool invert = false) {
        prepare_roots(n); reorder_bits(n, a);
        const auto& ws = invert? inv_roots : roots;
        for(int len = 1; len < n; len <<= 1)
            for(int i = 0; i < n; i += len << 1)
                for(int j = 0; j < len; j++) {
                    const cd& even = a[i + j];
                    cd odd = a[i + len + j] * ws[len + j];
                    a[i + len + j] = even - odd; a[i + j] = even + odd;
                }
        if(invert)
            for(auto& x: a) x /= n;
    }

    template<typename T>
    vector<T> multiply(const vector<T>& a, const vector<T>& b, bool trim = false) {
        int n = a.size(), m = b.size();
        if(n == 0 or m == 0)
            return vector<T>{0};

        if(min(n, m) < FFT_CUTOFF) {
            vector<T> res(n + m - 1);
            for(int i = 0; i < n; i++)
                for(int j = 0; j < m; j++)
                    res[i + j] += a[i] * b[j];
            
            if(trim) {
                while(!res.empty() && res.back() == 0)
                    res.pop_back();
            }

            return res;
        }

        int N = [](int x) { while(x & x-1) x = (x | x-1) + 1; return x; }(n + m - 1);
        vector<cd> fa(a.begin(), a.end()), fb(b.begin(), b.end());
        fa.resize(N); fb.resize(N);
        
        bool equal = n == m and a == b;
        fft(N, fa);

        if(equal) fb = fa;
        else fft(N, fb);

        for(int i = 0; i < N; i++)
            fa[i] *= fb[i];

        fft(N, fa, true);

        fa.resize(n + m - 1);
        vector<T> res(n + m - 1);
        for(int i = 0; i < n + m - 1; i++)
            res[i] = is_integral<T>::value? round(fa[i].real()) : fa[i].real();

        if(trim) {
            while(!res.empty() && res.back() == 0)
                res.pop_back();
        }

        return res;
    }

    template<typename T>
    T expo(T A, int64_t B) {
        T res{1}; while(B) {
            if(B & 1) res = res * A;
            B >>= 1; A = A * A;
        } return res;
    }

    template<typename T>
    vector<T> expo(const vector<T>& a, int e, bool trim = false) {
        int n = a.size();
        int N = [](int x) { while(x & x-1) x = (x | x-1) + 1; return x; }((n-1) * e + 1);
        vector<cd> fa(a.begin(), a.end()); fa.resize(N);

        fft(N, fa);

        for(int i = 0; i < N; i++)
            fa[i] = expo(fa[i], e);

        fft(N, fa, true);

        fa.resize((n-1) * e + 1);
        vector<T> res((n-1) * e + 1);
        for(int i = 0; i < res.size(); i++)
            res[i] = is_integral<T>::value? round(fa[i].real()) : fa[i].real();

        if(trim) {
            while(!res.empty() && res.back() == 0)
                res.pop_back();
        }

        return res;
    }

} // namespace FFT 
int sumN=0;
void solve()
{
    int n=readInt(1,100000,'\n');
    sumN+=n;
    assert(sumN<=100000);
    int A[n+1]={0};
    int goodleft[n+1]={0};
    int goodright[n+1]={0};
    for(int i=1;i<=n;i++)
    {
        if(i==n)
            A[i]=readInt(1,n,'\n');
        else
            A[i]=readInt(1,n,' ');
    }
    int vis[n+1]={0};
    for(int i=1;i<=n;i++)
    {
        goodleft[i]=vis[A[i]];
        vis[A[i]]=1;
    }
    memset(vis,0,sizeof(vis));
    for(int i=n;i>=1;i--)
    {
        goodright[i]=vis[A[i]];
        vis[A[i]]=1;
    }
    vector <int> goodleftpol,goodrightpol;
    for(int i=0;i<=n;i++)
    {
        goodleftpol.pb(goodleft[n-i]);
        goodrightpol.pb(goodright[i]);
    }
    vector <int> identityleft, identityright;
    for(int i=0;i<=n;i++)
    {
        if(i==0)
            identityright.pb(0);
        else
            identityright.pb(1);
    }
    for(int i=0;i<=n;i++)
    {
        if(i==n)
            identityleft.pb(0);
        else
            identityleft.pb(1);
    }
    vector <int> answerpol=FFT::multiply(goodleftpol,identityright);
    vector <int> tmp=FFT::multiply(identityleft,goodrightpol);
    for(int i=0;i<answerpol.size();i++)
        answerpol[i]+=tmp[i];
    tmp.clear();
    tmp=FFT::multiply(goodleftpol,goodrightpol);
    for(int i=0;i<answerpol.size();i++)
        answerpol[i]-=tmp[i];
    for(int i=n;i<=2*n-1;i++)
        cout<<answerpol[i]<<' ';
    cout<<'\n';
}
int main()
{
    #ifndef ONLINE_JUDGE
    freopen("input.txt", "r", stdin);
    freopen("output.txt", "w", stdout);
    #endif
    ios_base::sync_with_stdio(false);
    cin.tie(NULL),cout.tie(NULL);
    int T=readInt(1,5000,'\n');
    while(T--)
        solve();
    assert(getchar()==-1);
    cerr << "Time : " << 1000 * ((double)clock()) / (double)CLOCKS_PER_SEC << "ms\n";
}

Tester-1's Solution
// Jai Shree Ram  
  
#include<bits/stdc++.h>
using namespace std;

#define rep(i,a,n)     for(int i=a;i<n;i++)
#define ll             long long
#define int            long long
#define pb             push_back
#define all(v)         v.begin(),v.end()
#define endl           "\n"
#define x              first
#define y              second
#define gcd(a,b)       __gcd(a,b)
#define mem1(a)        memset(a,-1,sizeof(a))
#define mem0(a)        memset(a,0,sizeof(a))
#define sz(a)          (int)a.size()
#define pii            pair<int,int>
#define hell           1000000007
#define elasped_time   1.0 * clock() / CLOCKS_PER_SEC



template<typename T1,typename T2>istream& operator>>(istream& in,pair<T1,T2> &a){in>>a.x>>a.y;return in;}
template<typename T1,typename T2>ostream& operator<<(ostream& out,pair<T1,T2> a){out<<a.x<<" "<<a.y;return out;}
template<typename T,typename T1>T maxs(T &a,T1 b){if(b>a)a=b;return a;}
template<typename T,typename T1>T mins(T &a,T1 b){if(b<a)a=b;return a;}

// -------------------- Input Checker Start --------------------
 
long long readInt(long long l, long long r, char endd)
{
    long long x = 0;
    int cnt = 0, fi = -1;
    bool is_neg = false;
    while(true)
    {
        char g = getchar();
        if(g == '-')
        {
            assert(fi == -1);
            is_neg = true;
            continue;
        }
        if('0' <= g && g <= '9')
        {
            x *= 10;
            x += g - '0';
            if(cnt == 0)
                fi = g - '0';
            cnt++;
            assert(fi != 0 || cnt == 1);
            assert(fi != 0 || is_neg == false);
            assert(!(cnt > 19 || (cnt == 19 && fi > 1)));
        }
        else if(g == endd)
        {
            if(is_neg)
                x = -x;
            if(!(l <= x && x <= r))
            {
                cerr << l << ' ' << r << ' ' << x << '\n';
                assert(false);
            }
            return x;
        }
        else
        {
            assert(false);
        }
    }
}
 
string readString(int l, int r, char endd)
{
    string ret = "";
    int cnt = 0;
    while(true)
    {
        char g = getchar();
        assert(g != -1);
        if(g == endd)
            break;
        cnt++;
        ret += g;
    }
    assert(l <= cnt && cnt <= r);
    return ret;
}
 
long long readIntSp(long long l, long long r) { return readInt(l, r, ' '); }
long long readIntLn(long long l, long long r) { return readInt(l, r, '\n'); }
string readStringLn(int l, int r) { return readString(l, r, '\n'); }
string readStringSp(int l, int r) { return readString(l, r, ' '); }
void readEOF() { assert(getchar() == EOF); }
 
vector<int> readVectorInt(int n, long long l, long long r)
{
    vector<int> a(n);
    for(int i = 0; i < n - 1; i++)
        a[i] = readIntSp(l, r);
    a[n - 1] = readIntLn(l, r);
    return a;
}
 
// -------------------- Input Checker End --------------------
long long sum_n = 0;





const int MOD = 998244353;

 
struct mod_int {
    int val;
 
    mod_int(long long v = 0) {
        if (v < 0)
            v = v % MOD + MOD;
 
        if (v >= MOD)
            v %= MOD;
 
        val = v;
    }
 
    static int mod_inv(int a, int m = MOD) {
        int g = m, r = a, x = 0, y = 1;
 
        while (r != 0) {
            int q = g / r;
            g %= r; swap(g, r);
            x -= q * y; swap(x, y);
        }
 
        return x < 0 ? x + m : x;
    }
 
    explicit operator int() const {
        return val;
    }
 
    mod_int& operator+=(const mod_int &other) {
        val += other.val;
        if (val >= MOD) val -= MOD;
        return *this;
    }
 
    mod_int& operator-=(const mod_int &other) {
        val -= other.val;
        if (val < 0) val += MOD;
        return *this;
    }
 
    static unsigned fast_mod(uint64_t x, unsigned m = MOD) {
           #if !defined(_WIN32) || defined(_WIN64)
                return x % m;
           #endif
           unsigned x_high = x >> 32, x_low = (unsigned) x;
           unsigned quot, rem;
           asm("divl %4\n"
            : "=a" (quot), "=d" (rem)
            : "d" (x_high), "a" (x_low), "r" (m));
           return rem;
    }
 
    mod_int& operator*=(const mod_int &other) {
        val = fast_mod((uint64_t) val * other.val);
        return *this;
    }
 
    mod_int& operator/=(const mod_int &other) {
        return *this *= other.inv();
    }
 
    friend mod_int operator+(const mod_int &a, const mod_int &b) { return mod_int(a) += b; }
    friend mod_int operator-(const mod_int &a, const mod_int &b) { return mod_int(a) -= b; }
    friend mod_int operator*(const mod_int &a, const mod_int &b) { return mod_int(a) *= b; }
    friend mod_int operator/(const mod_int &a, const mod_int &b) { return mod_int(a) /= b; }
 
    mod_int& operator++() {
        val = val == MOD - 1 ? 0 : val + 1;
        return *this;
    }
 
    mod_int& operator--() {
        val = val == 0 ? MOD - 1 : val - 1;
        return *this;
    }
 
    mod_int operator++(int32_t) { mod_int before = *this; ++*this; return before; }
    mod_int operator--(int32_t) { mod_int before = *this; --*this; return before; }
 
    mod_int operator-() const {
        return val == 0 ? 0 : MOD - val;
    }
 
    bool operator==(const mod_int &other) const { return val == other.val; }
    bool operator!=(const mod_int &other) const { return val != other.val; }
 
    mod_int inv() const {
        return mod_inv(val);
    }
 
    mod_int pow(long long p) const {
        assert(p >= 0);
        mod_int a = *this, result = 1;
 
        while (p > 0) {
            if (p & 1)
                result *= a;
 
            a *= a;
            p >>= 1;
        }
 
        return result;
    }
 
    friend ostream& operator<<(ostream &stream, const mod_int &m) {
        return stream << m.val;
    }
    friend istream& operator >> (istream &stream, mod_int &m) {
        return stream>>m.val;   
    }
};


typedef mod_int base;


namespace algebra {
	const int inf = 1e9;
	const int magic = 200; // threshold for sizes to run the naive algo
	
	namespace NTT {
	vector<mod_int> roots = {0, 1};
		vector<int> bit_reverse;
		int max_size = -1;
		mod_int root;
	 
		bool is_power_of_two(int n) {
			return (n & (n - 1)) == 0;
		}
	 
		int round_up_power_two(int n) {
			while (n & (n - 1))
				n = (n | (n - 1)) + 1;
	 
			return max(n, 1LL);
		}
	 
		// Given n (a power of two), finds k such that n == 1 << k.
		int get_length(int n) {
			assert(is_power_of_two(n));
			return __builtin_ctz(n);
		}
	 
		// Rearranges the indices to be sorted by lowest bit first, then second lowest, etc., rather than highest bit first.
		// This makes even-odd div-conquer much easier.
		void bit_reorder(int n, vector<mod_int> &values) {
			if ((int) bit_reverse.size() != n) {
				bit_reverse.assign(n, 0);
				int length = get_length(n);
	 
				for (int i = 0; i < n; i++)
					bit_reverse[i] = (bit_reverse[i >> 1] >> 1) + ((i & 1) << (length - 1));
			}
	 
			for (int i = 0; i < n; i++)
				if (i < bit_reverse[i])
					swap(values[i], values[bit_reverse[i]]);
		}
	 
		void find_root() {
			max_size = 1 << __builtin_ctz(MOD - 1);
			root = 2;
	 
			// Find a max_size-th primitive root of MOD.
			while (!(root.pow(max_size) == 1 && root.pow(max_size / 2) != 1))
				root++;
		}
	 
		void prepare_roots(int n) {
			if (max_size < 0)
				find_root();
	 
			assert(n <= max_size);
	 
			if ((int) roots.size() >= n)
				return;
	 
			int length = get_length(roots.size());
			roots.resize(n);
	 
			// The roots array is set up such that for a given power of two n >= 2, roots[n / 2] through roots[n - 1] are
			// the first half of the n-th primitive roots of MOD.
			while (1 << length < n) {
				// z is a 2^(length + 1)-th primitive root of MOD.
				mod_int z = root.pow(max_size >> (length + 1));
	 
				for (int i = 1 << (length - 1); i < 1 << length; i++) {
					roots[2 * i] = roots[i];
					roots[2 * i + 1] = roots[i] * z;
				}
	 
				length++;
			}
		}
	 
		void fft_iterative(int N, vector<mod_int> &values) {
			assert(is_power_of_two(N));
			prepare_roots(N);
			bit_reorder(N, values);
	 
			for (int n = 1; n < N; n *= 2)
				for (int start = 0; start < N; start += 2 * n)
					for (int i = 0; i < n; i++) {
						mod_int even = values[start + i];
						mod_int odd = values[start + n + i] * roots[n + i];
						values[start + n + i] = even - odd;
						values[start + i] = even + odd;
					}
		}
	 
		const int FFT_CUTOFF = magic;
	 
		vector<mod_int> mod_multiply(vector<mod_int> left, vector<mod_int> right) {
			int n = left.size();
			int m = right.size();
	 
			// Brute force when either n or m is small enough.
			if (min(n, m) < FFT_CUTOFF) {
				const uint64_t ULL_BOUND = numeric_limits<uint64_t>::max() - (uint64_t) MOD * MOD;
				vector<uint64_t> result(n + m - 1, 0);
	 
				for (int i = 0; i < n; i++)
					for (int j = 0; j < m; j++) {
						result[i + j] += (uint64_t) ((int) left[i]) * ((int) right[j]);
	 
						if (result[i + j] > ULL_BOUND)
							result[i + j] %= MOD;
					}
	 
				for (uint64_t &x : result)
					if (x >= MOD)
						x %= MOD;
	 
				return vector<mod_int>(result.begin(), result.end());
			}
	 
			int N = round_up_power_two(n + m - 1);
			left.resize(N);
			right.resize(N);
	 
			bool equal = left == right;
			fft_iterative(N, left);
	 
			if (equal)
				right = left;
			else
				fft_iterative(N, right);
	 
			mod_int inv_N = mod_int(N).inv();
	 
			for (int i = 0; i < N; i++)
				left[i] *= right[i] * inv_N;
	 
			reverse(left.begin() + 1, left.end());
			fft_iterative(N, left);
			left.resize(n + m - 1);
			return left;
		}
		template<typename T>
		void mul(vector<T> &a,const vector<T> &b){
			a = mod_multiply(a,b);
		}
	}
	
	
	template<typename T>
	struct poly {
		vector<T> a;
		
		void normalize() { // get rid of leading zeroes
			while(!a.empty() && a.back() == T(0)) {
				a.pop_back();
			}
		}
		
		poly(){}
		poly(T a0) : a{a0}{normalize();}
		poly(vector<T> t) : a(t){normalize();}
		
		poly operator += (const poly &t) {
			a.resize(max(a.size(), t.a.size()));
			for(size_t i = 0; i < t.a.size(); i++) {
				a[i] += t.a[i];
			}
			normalize();
			return *this;
		}
		poly operator -= (const poly &t) {
			a.resize(max(a.size(), t.a.size()));
			for(size_t i = 0; i < t.a.size(); i++) {
				a[i] -= t.a[i];
			}
			normalize();
			return *this;
		}
		poly operator + (const poly &t) const {return poly(*this) += t;}
		poly operator - (const poly &t) const {return poly(*this) -= t;}
		
		poly mod_xk(size_t k) const { // get same polynomial mod x^k
			k = min(k, a.size());
			return vector<T>(begin(a), begin(a) + k);
		}
		poly mul_xk(size_t k) const { // multiply by x^k
			poly res(*this);
			res.a.insert(begin(res.a), k, 0);
			return res;
		}
		poly div_xk(size_t k) const { // divide by x^k, dropping coefficients
			k = min(k, a.size());
			return vector<T>(begin(a) + k, end(a));
		}
		poly substr(size_t l, size_t r) const { // return mod_xk(r).div_xk(l)
			l = min(l, a.size());
			r = min(r, a.size());
			return vector<T>(begin(a) + l, begin(a) + r);
		}
		poly inv(size_t n) const { // get inverse series mod x^n
			assert(!is_zero());
			poly ans = a[0].inv();
			size_t a = 1;
			while(a < n) {
				poly C = (ans * mod_xk(2 * a)).substr(a, 2 * a);
				ans -= (ans * C).mod_xk(a).mul_xk(a);
				a *= 2;
			}
			ans.a.resize(n);
			return ans;
		}
		poly sqrt(size_t n) const { 
			int m = a.size();
			poly b ={1};
			size_t s = 1;
			while(s<n){
				vector<T> x(a.begin(),a.begin()+min(a.size(),b.a.size()<<1));
				b.a.resize(b.a.size()<<1);
				poly c(x);
				c *=b.inv(b.a.size());
				T inv2 = static_cast<T>(1)/static_cast<T>(2);
				for(int i = b.a.size()>>1;i<min(c.a.size(),b.a.size());i++){
					b.a[i] = c.a[i]*inv2;
				}
				s *= 2;
			}
			b.a.resize(n);
			return b;
		}
		
		poly operator *= (const poly &t) {NTT::mul(a, t.a); normalize(); return *this;}
		poly operator * (const poly &t) const {return poly(*this) *= t;}
		
		poly reverse(size_t n, bool rev = 0) const { // reverses and leaves only n terms
			poly res(*this);
			if(rev) { // If rev = 1 then tail goes to head
				res.a.resize(max(n, res.a.size()));
			}
			std::reverse(res.a.begin(), res.a.end());
			return res.mod_xk(n);
		}
		
		pair<poly, poly> divmod_slow(const poly &b) const { // when divisor or quotient is small
			vector<T> A(a);
			vector<T> res;
			while(A.size() >= b.a.size()) {
				res.push_back(A.back() / b.a.back());
				if(res.back() != T(0)) {
					for(size_t i = 0; i < b.a.size(); i++) {
						A[A.size() - i - 1] -= res.back() * b.a[b.a.size() - i - 1];
					}
				}
				A.pop_back();
			}
			std::reverse(begin(res), end(res));
			return {res, A};
		}
		
		pair<poly, poly> divmod(const poly &b) const { // returns quotiend and remainder of a mod b
			if(deg() < b.deg()) {
				return {poly{0}, *this};
			}
			int d = deg() - b.deg();
			if(min(d, b.deg()) < magic) {
				return divmod_slow(b);
			}
			poly D = (reverse(d + 1) * b.reverse(d + 1).inv(d + 1)).mod_xk(d + 1).reverse(d + 1, 1);
			return {D, *this - D * b};
		}
		
		poly operator / (const poly &t) const {return divmod(t).first;}
		poly operator % (const poly &t) const {return divmod(t).second;}
		poly operator /= (const poly &t) {return *this = divmod(t).first;}
		poly operator %= (const poly &t) {return *this = divmod(t).second;}
		poly operator *= (const T &x) {
			for(auto &it: a) {
				it *= x;
			}
			normalize();
			return *this;
		}
		poly operator /= (const T &x) {
			for(auto &it: a) {
				it /= x;
			}
			normalize();
			return *this;
		}
		poly operator * (const T &x) const {return poly(*this) *= x;}
		poly operator / (const T &x) const {return poly(*this) /= x;}
		
		void print() const {
			for(auto it: a) {
				cout << it << ' ';
			}
			cout << endl;
		}
		T eval(T x) const { // evaluates in single point x
			T res(0);
			for(int i = sz(a) - 1; i >= 0; i--) {
				res *= x;
				res += a[i];
			}
			return res;
		}
		
		T& lead() { // leading coefficient
			return a.back();
		}
		int deg() const { // degree
			return a.empty() ? -inf : a.size() - 1;
		}
		bool is_zero() const { // is polynomial zero
			return a.empty();
		}
		T operator [](int idx) const {
			return idx >= (int)a.size() || idx < 0 ? T(0) : a[idx];
		}
		
		T& coef(size_t idx) { // mutable reference at coefficient
			return a[idx];
		}
		bool operator == (const poly &t) const {return a == t.a;}
		bool operator != (const poly &t) const {return a != t.a;}
		
		poly deriv() { // calculate derivative
			vector<T> res;
			for(int i = 1; i <= deg(); i++) {
				res.push_back(T(i) * a[i]);
			}
			return res;
		}
		poly integr() { // calculate integral with C = 0
			vector<T> res = {0};
			for(int i = 0; i <= deg(); i++) {
				res.push_back(a[i] / T(i + 1));
			}
			return res;
		}
		size_t leading_xk() const { // Let p(x) = x^k * t(x), return k
			if(is_zero()) {
				return inf;
			}
			int res = 0;
			while(a[res] == T(0)) {
				res++;
			}
			return res;
		}
		poly log(size_t n) { // calculate log p(x) mod x^n
			assert(a[0] == T(1));
			return (deriv().mod_xk(n) * inv(n)).integr().mod_xk(n);
		}
		poly exp(size_t n) { // calculate exp p(x) mod x^n
			if(is_zero()) {
				return T(1);
			}
			assert(a[0] == T(0));
			poly ans = T(1);
			size_t a = 1;
			while(a < n) {
				poly C = ans.log(2 * a).div_xk(a) - substr(a, 2 * a);
				ans -= (ans * C).mod_xk(a).mul_xk(a);
				a *= 2;
			}
			return ans.mod_xk(n);
			
		}
		poly pow_slow(size_t k, size_t n) { // if k is small
			return k ? k % 2 ? (*this * pow_slow(k - 1, n)).mod_xk(n) : (*this * *this).mod_xk(n).pow_slow(k / 2, n) : T(1);
		}
		poly pow(size_t k, size_t n) { // calculate p^k(n) mod x^n
			if(is_zero()) {
				return *this;
			}
			if(k < magic) {
				return pow_slow(k, n);
			}
			int i = leading_xk();
			T j = a[i];
			poly t = div_xk(i) / j;
			return bpow(j, k) * (t.log(n) * T(k)).exp(n).mul_xk(i * k).mod_xk(n);
		}
		poly mulx(T x) { // component-wise multiplication with x^k
			T cur = 1;
			poly res(*this);
			for(int i = 0; i <= deg(); i++) {
				res.coef(i) *= cur;
				cur *= x;
			}
			return res;
		}
		poly mulx_sq(T x) { // component-wise multiplication with x^{k^2}
			T cur = x;
			T total = 1;
			T xx = x * x;
			poly res(*this);
			for(int i = 0; i <= deg(); i++) {
				res.coef(i) *= total;
				total *= cur;
				cur *= xx;
			}
			return res;
		}
		vector<T> chirpz_even(T z, int n) { // P(1), P(z^2), P(z^4), ..., P(z^2(n-1))
			int m = deg();
			if(is_zero()) {
				return vector<T>(n, 0);
			}
			vector<T> vv(m + n);
			T zi = z.inv();
			T zz = zi * zi;
			T cur = zi;
			T total = 1;
			for(int i = 0; i <= max(n - 1, m); i++) {
				if(i <= m) {vv[m - i] = total;}
				if(i < n) {vv[m + i] = total;}
				total *= cur;
				cur *= zz;
			}
			poly w = (mulx_sq(z) * vv).substr(m, m + n).mulx_sq(z);
			vector<T> res(n);
			for(int i = 0; i < n; i++) {
				res[i] = w[i];
			}
			return res;
		}
		vector<T> chirpz(T z, int n) { // P(1), P(z), P(z^2), ..., P(z^(n-1))
			auto even = chirpz_even(z, (n + 1) / 2);
			auto odd = mulx(z).chirpz_even(z, n / 2);
			vector<T> ans(n);
			for(int i = 0; i < n / 2; i++) {
				ans[2 * i] = even[i];
				ans[2 * i + 1] = odd[i];
			}
			if(n % 2 == 1) {
				ans[n - 1] = even.back();
			}
			return ans;
		}
		template<typename iter>
		vector<T> eval(vector<poly> &tree, int v, iter l, iter r) { // auxiliary evaluation function
			if(r - l == 1) {
				return {eval(*l)};
			} else {
				auto m = l + (r - l) / 2;
				auto A = (*this % tree[2 * v]).eval(tree, 2 * v, l, m);
				auto B = (*this % tree[2 * v + 1]).eval(tree, 2 * v + 1, m, r);
				A.insert(end(A), begin(B), end(B));
				return A;
			}
		}
		vector<T> eval(vector<T> x) { // evaluate polynomial in (x1, ..., xn)
			int n = x.size();
			if(is_zero()) {
				return vector<T>(n, T(0));
			}
			vector<poly> tree(4 * n);
			build(tree, 1, begin(x), end(x));
			return eval(tree, 1, begin(x), end(x));
		}
		template<typename iter>
		poly inter(vector<poly> &tree, int v, iter l, iter r, iter ly, iter ry) { // auxiliary interpolation function
			if(r - l == 1) {
				return {*ly / a[0]};
			} else {
				auto m = l + (r - l) / 2;
				auto my = ly + (ry - ly) / 2;
				auto A = (*this % tree[2 * v]).inter(tree, 2 * v, l, m, ly, my);
				auto B = (*this % tree[2 * v + 1]).inter(tree, 2 * v + 1, m, r, my, ry);
				return A * tree[2 * v + 1] + B * tree[2 * v];
			}
		}
	};
	template<typename T>
	poly<T> operator * (const T& a, const poly<T>& b) {
		return b * a;
	}
	
	template<typename T>
	poly<T> xk(int k) { // return x^k
		return poly<T>{1}.mul_xk(k);
	}

	template<typename T>
	T resultant(poly<T> a, poly<T> b) { // computes resultant of a and b
		if(b.is_zero()) {
			return 0;
		} else if(b.deg() == 0) {
			return bpow(b.lead(), a.deg());
		} else {
			int pw = a.deg();
			a %= b;
			pw -= a.deg();
			T mul = bpow(b.lead(), pw) * T((b.deg() & a.deg() & 1) ? -1 : 1);
			T ans = resultant(b, a);
			return ans * mul;
		}
	}
	template<typename iter>
	poly<typename iter::value_type> kmul(iter L, iter R) { // computes (x-a1)(x-a2)...(x-an) without building tree
		if(R - L == 1) {
			return vector<typename iter::value_type>{-*L, 1};
		} else {
			iter M = L + (R - L) / 2;
			return kmul(L, M) * kmul(M, R);
		}
	}
	template<typename T, typename iter>
	poly<T> build(vector<poly<T>> &res, int v, iter L, iter R) { // builds evaluation tree for (x-a1)(x-a2)...(x-an)
		if(R - L == 1) {
			return res[v] = vector<T>{-*L, 1};
		} else {
			iter M = L + (R - L) / 2;
			return res[v] = build(res, 2 * v, L, M) * build(res, 2 * v + 1, M, R);
		}
	}
	template<typename T>
	poly<T> inter(vector<T> x, vector<T> y) { // interpolates minimum polynomial from (xi, yi) pairs
		int n = x.size();
		vector<poly<T>> tree(4 * n);
		return build(tree, 1, begin(x), end(x)).deriv().inter(tree, 1, begin(x), end(x), begin(y), end(y));
	}
};

using namespace algebra;

typedef poly<base> polyn;

using namespace algebra;



int solve(){
 		
		int n = readIntLn(1,1e5);
		sum_n += n;
		vector<int> a = readVectorInt(n,1,n);
		vector<int> occ(n + 1);
		polyn p1,p2;
		p1.a.resize(n + 1);
		p2.a.resize(n + 1);
		for(int i = 1; i <= n; i++){
			if(occ[a[i - 1]]){
				p1.a[n - i] = 1;
			}
			occ[a[i - 1]] = 1;
		}
		fill(all(occ),0);
		for(int i = n; i >= 1; i--){
			if(occ[a[i - 1]]){
				p2.a[i] = 1;
			}
			occ[a[i - 1]] = 1;
		}
		polyn q1,q2;
		q1.a.resize(n + 1,1);
		q2.a.resize(n + 1,1);
		q1.a[n] = 0;
		q2.a[0] = 0;
		polyn ans;
		ans = q1*p2 + q2*p1 - p1*p2;
		while(ans.a.size() < 2*n)ans.a.push_back(0);
		for(int i = n; i < ans.a.size(); i++){
			cout << ans.a[i] << " ";
		}
		cout << endl;
 return 0;
}
signed main(){
    ios_base::sync_with_stdio(0);cin.tie(0);cout.tie(0);
    //freopen("input.txt", "r", stdin);
    //freopen("output.txt", "w", stdout);
    #ifdef SIEVE
    sieve();
    #endif
    #ifdef NCR
    init();
    #endif
    int t = readIntLn(1,5e3);
    while(t--){
        solve();
    }
    assert(sum_n <= 2e5);
    return 0;
}

Can someone explain the approach without using FFT?

1 Like