Need optimization help

I have one function to be more optimized. I’m posting one more (allocate) just to be clearer how it works. Running time of allocate on my computer is ~0.1 sec, and running time of preprocess is ~5 ses for input R = 100 and C = 100. One similar function, which have 2 similar nested for loops [O(n^4) too, but with a little bigger hidden constant], finishes in less than 2 secs. I’m confused, how function which should be faster, runs more than 2 times slower. I don’t know how fast is working with dynamic allocated pointers, but in whole program I’m using same pointers, and difference in running time is abnormal. please help me what I’m missing. COde is below.

Thanks.

int ****memo;
 
//macros
#define SUM(r1,r2,c1,c2) ( B[((r2)+1)][((c2)+1)] - B[((r2)+1)][(c1)] - B[(r1)][((c2)+1)] + B[(r1)][(c1)] )
#define MEMO(r1,r2,c1,c2) memo[(r1)][((r2)-(r1))][(c1)][((c2)-(c1))]
#define VALUE(r1,r2,c1,c2) value[(r1)][((r2)-(r1))][(c1)][((c2)-(c1))]
 
inline void allocate() {
    memo = new int***[101];
 
    for (int i = 0; i <101; ++i) {
        memo[i] = new int**[101-i];
 
        for (int j = 0; j < 101-i; ++j){
            memo[i][j] = new int*[101];
 
            for (int k = 0; k < 101; ++k) {
                memo[i][j][k] = new int[101-k];
            }
        }
    }
}
 
void preprocess() {
    for (int i = 0; i < R; ++i) {
        for (int j = 0; j < C; ++j) {
            memo[i][0][j][0] = A[i][j];
        }
    }
 
    for (int dx = 1; dx < R; ++dx) {
        int lim = R-dx;
        for (int i1 = 0; i1 < lim; ++i1) {
            int i2 = i1+dx;
            for (int j = 0; j < C; ++j) {
                int k = i2-i1;
                memo[i1][k][j][0] = SUM(i1, i2, j, j);
                checkmax(memo[i1][k][j][0], memo[i1][k-1][j][0]);
                checkmax(memo[i1][k][j][0], memo[i1+1][k-1][j][0]);
            }
        }
    }
 
    for (int dy = 1; dy < C; ++dy){
        int lim = C - dy;
        for (int j1 = 0; j1 < lim; ++j1) {
            int j2 = j1+dy;
            for(int i = 0; i < R; ++i) {
                int k = j2-j1;
                memo[i][0][j1][k] = SUM(i, i, j1, j2);
                checkmax(memo[i][0][j1][k], memo[i][0][j1][k-1]);
                checkmax(memo[i][0][j1][k], memo[i][0][j1+1][k-1]);
            }
        }
    }
 
    for (int di = 1; di < R; ++di) {
        for (int dj = 1; dj < C; ++dj) {
            int lim1 = R-di;
            for (int i1 = 0; i1 < lim1; ++i1) {
                int lim2 = C-dj;
                for (int j1 = 0; j1 < lim2; ++j1) {
                    int i2 = i1 + di;
                    int j2 = j1 + dj;
                    int k1 = i2 - i1;
                    int k2 = j2 - j1;
                    memo[i1][k1][j1][k2] = SUM(i1, i2, j1, j2);
                    checkmax(memo[i1][k1][j1][k2], max( max(memo[i1+1][k1-1][j1][k2], memo[i1][k1-1][j1][k2]),
                                                        max(memo[i1][k1][j1+1][k2-1], memo[i1][k1][j1][k2-1])));
                }
            }
        }
    }
}