Upgrade to Pro — share decks privately, control downloads, hide ads and more …

SwiftのSIMDとその利用方法

kntk
August 22, 2024

 SwiftのSIMDとその利用方法

iOSDC2024 Day 1 15:40~ Track D

kntk

August 22, 2024
Tweet

More Decks by kntk

Other Decks in Technology

Transcript

  1. w4XJGUʹ΋ଘࡏ w3FBMJUZ,JUͰ࢖͏ wߴ଎ʹԋࢉ͕Ͱ͖Δݻఆ௕഑ྻ ͸͡Ίʹ let a = SIMD4<Float>(1.0, 2.0, 3.0,

    4.0) let b = SIMD4<Float>(5.0, 6.0, 7.0, 8.0) let add = a * b // 6.0, 8.0, 10.0, 12.0
  2. 4XJGUͷ4*.% Ϣʔεέʔε4*.%ϓϩάϥϛϯάʢը૾ͷϘοΫεϑΟϧλʣ let input: [[UInt8]] = … var output: [[UInt8]]

    = … for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
  3. 4XJGUͷ4*.% Ϣʔεέʔε4*.%ϓϩάϥϛϯάʢը૾ͷϘοΫεϑΟϧλʣ let input: [[UInt8]] = … var output: [[UInt8]]

    = … for j in 0..<height { for i in 0..<width { var sum = 0 for ry in -R...R { for rx in -R...R { sum += Int(input[j + ry][i + rx]) } } output[j][i] = UInt8(sum / (2 * R + 1) * (2 * R + 1)) } }
  4. 4XJGUͷ4*.% Ϣʔεέʔε4*.%ϓϩάϥϛϯάʢը૾ͷϘοΫεϑΟϧλʣ for y in 0..<height { do { var

    x = 0 while x < width { var sum = SIMD8<UInt16>.zero for k in 0..<L { let startIndex = x + k sum &+= SIMD8<UInt16>(yresult[startIndex..<startIndex+8]) } sum /= weightSIMD for k in 0..<8 { output[y][x+k] = sum[k] }
  5. 4XJGUͷ4*.% wཁૉຖͷԋࢉΛαϙʔτ Ϣʔεέʔε࠲ඪදݱ let a = SIMD2<Float>(x: 1.0, y: 2.0)

    let b = SIMD2<Float>(x: 1.0, y: 3.0) a + b // (2.0, 5.0) ཁૉຖͷ࿨ a .== b // (true, false) ཁૉຖͷҰக pointwiseMax(a, b) // (1.0, 3.0) ཁૉຖͷmax
  6. 4XJGUͷ4*.% wԋࢉ͕Ͱ͖Δ let a = SIMD4<Float>(1.0, 2.0, 3.0, 4.0) let

    b = SIMD4<Float>(1.0, 2.0, 3.0, 4.0) a + b // SIMD4<Float>(2.0, 4.0, 6.0, 8.0) a - b // SIMD4<Float>(0.0, 0.0, 0.0, 0.0) a * b // SIMD4<Float>(1.0, 4.0, 9.0, 16.0) a / b // SIMD4<Float>(1.0, 1.0, 1.0, 1.0) ࢖༻ྫ4*.%O
  7. 4XJGUͷ4*.% wTUSVDU4*.%O4DBMBS4*.%4DBMBS4*.% wOཁૉ਺       let

    a = SIMD2<Float>(1.0, 2.0) let b = SIMD3<Float>(1.0, 2.0, 3.0) let c = SIMD4<Float>(1.0, 2.0, 3.0, 4.0) ܕͱؔ܎TUSVDU4*.%O
  8. 4XJGUͷ4*.% wTUSVDU4*.%O4DBMBS4*.%4DBMBS4*.% wOཁૉ਺       public

    struct SIMD2<Scalar: SIMDScalar>: SIMD public struct SIMD3<Scalar: SIMDScalar>: SIMD public struct SIMD4<Scalar: SIMDScalar>: SIMD ... ܕͱؔ܎TUSVDU4*.%O
  9. 4XJGUͷ4*.% wTUSVDU4*.%O4DBMBS4*.%4DBMBS4*.% wOཁૉ਺       w4DBMBSཁૉͷܕ

    let a = SIMD2<Float>(1.0, 2.0) let b = SIMD2<Double>(1.0, 2.0) let c = SIMD4<Int32>(1, 2, 3, 4) let d = SIMD4<UInt32>(1, 2, 3, 4) ܕͱؔ܎TUSVDU4*.%O
  10. 4XJGUͷ4*.% // ϥϕϧ͋Γ (SIMD2,3,4ͷΈ) SIMD3<Float>(x: 1.0, y: 2.0, z: 3.0)

    // ϥϕϧͳ͠ SIMD3<Float>(1.0, 2.0, 3.0) // arrayLiteral let a: SIMD3<Int> = [1, 2, 3] // Sequence SIMD3<Int>([1,2,3]) let array = [1,2,3,4,5,6] SIMD3<Int>(array[1..<4]) SIMD3<Int>(array.suffix(3)) ࢖༻ྫ4*.%Oͷੜ੒
  11. 4XJGUͷ4*.% // ֦ு let xy = SIMD2<Int>(10, 20) let xyz

    = SIMD3<Int>(xy, 30) // SIMD3<Int>(10, 20, 30) ࢖༻ྫ4*.%Oͷੜ੒
  12. 4XJGUͷ4*.% // ֦ு let xy = SIMD2<Int>(10, 20) let xyz

    = SIMD3<Int>(xy, 30) // SIMD3<Int>(10, 20, 30) // γϟοϑϧ let yxz = xyz[.init(1, 0, 2)] // SIMD3<Int>(20, 10, 30) ࢖༻ྫ4*.%Oͷੜ੒
  13. 4XJGUͷ4*.% // ֦ு let xy = SIMD2<Int>(10, 20) let xyz

    = SIMD3<Int>(xy, 30) // SIMD3<Int>(10, 20, 30) // γϟοϑϧ let yxz = xyz[.init(1, 0, 2)] // SIMD3<Int>(20, 10, 30) // ෦෼நग़ let xy = xyz[.init(0, 1)] // SIMD2<Int>(10, 20) ࢖༻ྫ4*.%Oͷੜ੒
  14. 4XJGUͷ4*.% w'JYFE8JEUI*OUFHFS޲͚ૢ࡞ let a = SIMD3<UInt8>(x: 1, y: 2, z:

    3) let b = SIMD3<UInt8>(x: 1, y: 1, z: 1) ~a // SIMD3<UInt8>(254, 253, 252) a & b // SIMD3<UInt8>(1, 0, 1) ܕͱؔ܎QSPUPDPM4*.%4DBMBS
  15. 4XJGUͷ4*.% w'JYFE8JEUI*OUFHFS޲͚ૢ࡞ w'MPBUJOH1PJOU޲͚ૢ࡞ let a = SIMD3<Float>(x: 2.0, y: 3.0,

    z: 4.0) a.squareRoot() // SIMD3<Float>(1.4142135, 1.7320508, 2.0) let a = SIMD3<Float>(x: 1.0, y: 1.4, z: 1.8) a.rounded(.up) // SIMD3<Float>(1.0, 2.0, 2.0) let a = SIMD3<UInt8>(x: 1, y: 2, z: 3) let b = SIMD3<UInt8>(x: 1, y: 1, z: 1) ~a // SIMD3<UInt8>(254, 253, 252) a & b // SIMD3<UInt8>(1, 0, 1) ܕͱؔ܎QSPUPDPM4*.%4DBMBS
  16. wશͯͷ4*.%OͰ࢖͑Δؔ਺ 4XJGUͷ4*.% func abs<Vector: SIMD>(_ simd: Vector) -> Vector where

    Vector.Scalar: FloatingPoint { simd.replacing(with: -simd, where: simd .< 0) } let simd2 = SIMD2<Float>(1.0, -2.0) abs(simd2) // SIMD2<Float>(1.0, 2.0) let simd3 = SIMD3<Float>(1.0, -2.0, 3.0) abs(simd3) // SIMD2<Float>(1.0, 2.0, 3.0) ࢖༻ྫQSPUPDPM4*.%
  17. wશͯͷ4*.%OͰ࢖͑Δؔ਺ 4XJGUͷ4*.% extension SIMD where Scalar: SIMDScalar { var xy:

    SIMD2<Scalar> { self[.init(0, 1)] } var xyz: SIMD3<Scalar> { self[.init(0, 1, 2)] } } let simd4 = SIMD4<Float>(0.0, 1.0, 2.0, 3.0) let xy = simd4.xy // SIMD2<Float>(0.0, 1.0) let xyz = simd4.xyz // SIMD3<Float>(0.0, 1.0, 2.0) ࢖༻ྫQSPUPDPM4*.%
  18. 4XJGUͷ4*.% wTUSVDU4*.%.BTL4UPSBHF wཁૉຖͷൺֱ݁Ռ ܕͱؔ܎4*.%.BTL let a = SIMD3<UInt8>(1, 2, 3)

    let b = SIMD3<UInt8>(3, 2, 1) a == b // false a .== b // SIMDMask<SIMD3<Int8>(false, true, false))
  19. 4XJGUͷ4*.% wTUSVDU4*.%.BTL ܕͱؔ܎4*.%.BTL let a = SIMD4<Int32>(1, -1, 2, -2)

    let mask = a.<0 //SIMDMask<SIMD4<Int32>>(false,true,false,true) // mask͕trueͷཁૉ͚ͩ`-a`Ͱஔ͖׵͑Δ a.replacing(with: -a, where: mask) // SIMD4<Int32>(1, 1, 2, 2) // trueͳ΋ͷ͕ҰͭͰ΋͋Δ͔ any(mask) // true // ͢΂ͯtrue͔ all(mask) // false
  20. extension SIMD2 where Scalar == UInt8 { public static func

    &+(a: Self, b: Self) -> Self { Self(Builtin.add_Vec2xInt8(a._storage._value, b._storage._value)) } public static func &-(a: Self, b: Self) -> Self { Self(Builtin.sub_Vec2xInt8(a._storage._value, b._storage._value)) } public static func &*(a: Self, b: Self) -> Self { Self(Builtin.mul_Vec2xInt8(a._storage._value, b._storage._value)) } } 4XJGUͷ4*.% Ͳ͏΍ͬͯ4*.%໋ྩΛൃߦ͍ͯ͠Δʁ
  21. 4XJGUͷ4*.% w૊ΈࠐΈؔ਺Ͱ࣮૷͍ͯ͠ͳ͍ॲཧ͕ଟ͍ w'MPBUJOH1PJOU͸#VJMUJOͷ࣮૷͕ͳ͍ʂ ໰୊఺ // Implementations of floating-point operations. These

    should eventually all // be replaced with @_semantics to lower directly to vector IR nodes. extension SIMD where Scalar: FloatingPoint { public static func +(a: Self, b: Self) -> Self { var result = Self() for i in result.indices { result[i] = a[i] + b[i] } return result } }
  22. 4XJGUͷ4*.% w૊ΈࠐΈؔ਺Ͱ࣮૷͍ͯ͠ͳ͍ॲཧ͕ଟ͍ w'MPBUJOH1PJOU͸#VJMUJOͷ࣮૷͕ͳ͍ʂ  wʢҰԠʣࣗಈϕΫτϧԽ͸ޮ͖΍͍͢ ໰୊఺ extension SIMD where Scalar:

    FloatingPoint { public static func +(a: Self, b: Self) -> Self { var result = Self() for i in result.indices { result[i] = a[i] + b[i] } return result } }