I’m trying to use (almost a copy paste but rewritten) Segrey Makagonov’s code for Ukken’s algorithm for suffix tree construction and then dfs to construct suffix array.
However for N \sim 10^6 my program uses \sim 1.4 GB of space, and when used on problems on CF it gives memory limit exceeded.
Can someone suggest some changes so that I can bring the memory usage down?
Thanks.
This is the code I’m using:
#include <bits/stdc++.h>
using namespace std;
#define MAXN 1000000 //change as needed
#define alphabet 256 //(use 26 for lowercase latin etc)
//suffix tree implementation mostly based on sergey makagonov's work
const int upper_limit = 1 << 25;
int root; //pos of root
int last_added, pos, needSL;
int rem, active_node, active_edge, active_length;
struct node{
//corresponds also to edge ending at this node
int start;
int end;
int suffix_link;
int next[alphabet];
int edge_length(){
return min(end,pos+1) - start; //distance from start
}
};
node suffix_tree[2*MAXN];
char text[MAXN];
int new_node(int start, int end = upper_limit){
node n;
n.start = start;
n.end = end;
n.suffix_link = 0;
for(int i = 0; i < alphabet; i++)
n.next[i] = 0;
suffix_tree[++last_added] = n;
return last_added;
}
char active_edg(){
return text[active_edge];
}
void add_suffix_link(int nod){
if(needSL > 0) suffix_tree[needSL].suffix_link = nod;
needSL = nod;
}
bool walk_down(int nod){
if(active_length >= suffix_tree[nod].edge_length()){
active_edge += suffix_tree[nod].edge_length();
active_length -= suffix_tree[nod].edge_length();
active_node = nod;
return true;
}
return false;
}
void suffix_tree_init(){
needSL = 0, last_added = 0, pos = -1, rem = 0, active_node = 0;
active_edge = 0, active_length = 0;
root = active_node = new_node(-1,-1);
}
void suffix_tree_extend(char c){
text[++pos] = c;
needSL = 0;
rem++;
while(rem > 0){
if(active_length == 0) active_edge = pos;
if(suffix_tree[active_node].next[active_edg()] == 0){
int leaf = new_node(pos);
suffix_tree[active_node].next[active_edg()] = leaf;
add_suffix_link(active_node);
}
else{
int nxt = suffix_tree[active_node].next[active_edg()];
if(walk_down(nxt)) continue;
if(text[suffix_tree[nxt].start + active_length] == c){
active_length++;
add_suffix_link(active_node);
break;
}
int split = new_node(suffix_tree[nxt].start, suffix_tree[nxt].start + active_length);
suffix_tree[active_node].next[active_edg()] = split;
int leaf = new_node(pos);
suffix_tree[split].next[c] = leaf;
suffix_tree[nxt].start += active_length;
suffix_tree[split].next[text[suffix_tree[nxt].start]] = nxt;
add_suffix_link(split);
}
rem--;
if(active_node == root && active_length > 0){
active_length--;
active_edge = pos - rem + 1;
}
else{
active_node = suffix_tree[active_node].suffix_link > 0? suffix_tree[active_node].suffix_link : root;
}
}
}
int suffix_array[MAXN];
int temp, temp2, ins = 0;
void dfs(int cur, int curlen){
//cout << suffix_tree[cur].start << ' ' << suffix_tree[cur].end << ' ' << curlen << endl;
temp = 0;
if(suffix_tree[cur].next['$']){
temp2 = suffix_tree[cur].next['$'];
dfs(temp2, curlen + suffix_tree[cur].end - suffix_tree[cur].start);
}
for(char c = 'a'; c <= 'z'; c++){
if(suffix_tree[cur].next[c]){
//cout << c << endl;
temp2 = suffix_tree[cur].next[c];
dfs(temp2, curlen + suffix_tree[cur].end - suffix_tree[cur].start);
//cout << '~' << c << endl;
}
}
if(temp == 0){
suffix_array[ins++] = suffix_tree[cur].start - curlen;
}
temp = 1;
}
int main(){
ios_base::sync_with_stdio(false);
cin.tie(NULL);
string S;
cin >> S;
suffix_tree_init();
for(int i = 0; i < S.size(); i++)
suffix_tree_extend(S[i]);
suffix_tree_extend('$');
dfs(root,0);
for(int i = 1; i <= S.size(); i++)
cout << suffix_array[i] << ' ';
cout << endl;
return 0;
}