Upgrade to PRO for Only $50/Year—Limited-Time Offer! 🔥

Антон Шишкин, Иван Комаров: Ускорение программ ...

Avatar for ekbcpp ekbcpp
November 14, 2013
260

Антон Шишкин, Иван Комаров: Ускорение программ с помощью SIMD-операций

Avatar for ekbcpp

ekbcpp

November 14, 2013
Tweet

Transcript

  1. Naive short a[256] __attribute__ ((aligned(16))); short b[256] __attribute__ ((aligned(16))); int

    sum = 0; int j = 0; for(int y=0;y<16;y++){ for(int x=0;x<16;x++){ short v = a[j] - b[j]; sum += v*v; j++; } } T = 1.660s
  2. Intrinsic int j = 0; __m128i xsum = _mm_setzero_si128(); while(j<256)

    { __m128i a16 = _mm_load_si128((__m128i*)&a[j]); __m128i b16 = _mm_load_si128((__m128i*)&b[j]); __m128i diff16 = _mm_sub_epi16(a16, b16); __m128i madd = _mm_madd_epi16(diff16, diff16); xsum = _mm_add_epi32(xsum, madd); j += 8; } __m128i shft = _mm_srli_si128(xsum, 8); xsum = _mm_add_epi32(xsum, shft); shft = _mm_srli_si128(xsum, 4); xsum = _mm_add_epi32(xsum, shft); sum = _mm_cvtsi128_si32(xsum); T = 0.384s
  3. VectorClass lib int sum = 0, j=0; Vec8s xsum =

    0; while (j < 256) { Vec8s va, vb; va.load(&a[j]); vb.load(&b[j]); Vec8s d = va - vb; xsum += d * d; j += 8; } sum = horizontal_add_x(xsum); T = 0.386s