%run i ipython_memory_usage_perf.py In [3]: ones_c = np.ones((1e4,1e4)); v=np.ones(1e4) # C memory layout In [5]: %timeit v*ones_c[:,0] # cacheunfriendly Fortranordered layout 1000 loops, best of 3: 211 µs per loop Used 0.1445 MiB RAM in 1.02s, ... perf value for cachemisses averages to 230,318/second In [6]: %timeit v*ones_c[0,:] # cachefriendly Cordered layout 100000 loops, best of 3: 14.2 µs per loop Used 0.0000 MiB RAM in 5.99s, ... perf value for cachemisses averages to 5,591/second