= src[i*32+j]; } #pragma unroll for (int j=0; j<8; ++j) { uchar4 v = scatter_in_be(hs[j]); vstore4(v, 0, &dst[(i*8+j)*4]); } Hashの計算はフルアンローリング #pragma unroll for (int j=0; j<64; ++j) { const uint s1 = rotate_right(e, 6) ^ rotate_right(e, 11) ^ rotate_right(e, 25); const uint ch = (e & f) ^ (~e & g); const uint temp1 = h + s1 + ch + keys[j] + ws[j]; const uint s0 = rotate_right(a, 2) ^ rotate_right(a, 13) ^ rotate_right(a, 22); const uint maj = (a & b) ^ (a & c) ^ (b & c); const uint temp2 = s0 + maj; … }