void add1 (int A[N][N], int B[N][N], int C[N][N]) {
int i, j, k, sum;
for (j=0; jfor (i=0; iC[i][j] = A[i][j] + B[i][j];
}
}
}
void add2 (int A[N][N], int B[N][N], int C[N][N]) {
int i, j, k, sum;
for (i=0; ifor (j=0; jC[i][j] = A[i][j] + B[i][j];
}
}
}
> $ ./matrix
add1
164.947510 nanoseconds per access
add2
34.484863 nanoseconds per access
4x faster?!