Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Swiftで書かれたプログラムを1000倍速くした話

 Swiftで書かれたプログラムを1000倍速くした話

Swift のプログラムを高速化する過程で、静的ディスパッチ、インライン展開、ポインタ、外部ライブラリとのシームレスな連携など、 Swift が Swift な(速い)プログラムを書けるように作られていることを実感してすごいと感じたという話です。

Yuta Koshizawa

May 25, 2016
Tweet

More Decks by Yuta Koshizawa

Other Decks in Programming

Transcript

  1. Tensor let a = Tensor(shape: [2, 3], elements: [1, 2,

    3, 4, 5, 6]) // [[1, 2, 3], [4, 5, 6]] let b = Tensor(shape: [2, 2, 2], elements: [1, 2, 3, 4, 5, 6, 7, 8]) // [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
  2. ཁૉ΁ͷΞΫηε let a = Tensor(shape: [2, 3], elements: [1, 2,

    3, 4, 5, 6]) // [[1, 2, 3], [4, 5, 6]] let b = Tensor(shape: [2, 2, 2], elements: [1, 2, 3, 4, 5, 6, 7, 8]) // [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] print(a[1, 2]) // 6 print(b[1, 1, 1]) // 8
  3. Tensor ͷ࣮૷ public struct Tensor { public let shape: [Int]

    public private(set) var elements: [Float] }
  4. subscript ͷ࣮૷ extension Tensor { internal func index(indices: [Int]) ->

    Int { return zip(shape, indices).reduce(0) { $0 * $1.0 + $1.1 } } public subscript(indices: Int...) -> Float { get { return elements[index(indices)] } set { elements[index(indices)] = newValue } } }
  5. matmul extension Tensor { // Matrix public func matmul(tensor: Tensor)

    -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements: [Float] = [] elements.reserveCapacity(numCols * numRows) for r in 0..<numRows { for c in 0..<numCols { var e: Float = 0.0 for i in 0..<n { e += self[r, i] * tensor[i, c] // ͜͜Ͱ `subscript` Λར༻ } elements.append(e) } } return Tensor(shape: [numRows, numCols], elements: elements) } }
  6. ߦྻʹݶఆ extension Tensor { // Matrix public func matmul(tensor: Tensor)

    -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements: [Float] = [] elements.reserveCapacity(numCols * numRows) for r in 0..<numRows { for c in 0..<numCols { var e: Float = 0.0 for i in 0..<n { e += self.elements[r * n + i] * tensor.elements[i * numCols + c] // ߦྻͱͯ͠ܭࢉ } elements.append(e) } } return Tensor(shape: [numRows, numCols], elements: elements) } }
  7. ϧʔϓॱΛೖΕସ͑ͯΩϟογϡͷώοτ཰Λ޲্ extension Tensor { // Matrix public func matmul(tensor: Tensor)

    -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements = [Float](count: numCols * numRows, repeatedValue: 0.0) for r in 0..<numRows { for i in 0..<n { // ͜͜ͱ let e = self.elements[r * n + i] for c in 0..<numCols { // ͕͜͜ೖΕସΘͬͨ elements[r * numCols + c] += e * tensor.elements[i * numCols + c] } } } return Tensor(shape: [numRows, numCols], elements: elements) } }
  8. ϙΠϯλʹॻ͖ม͑ extension Tensor { // Matrix public func matmul(tensor: Tensor)

    -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] // `Array` ΛϙΠϯλʹม׵ let leftHead = UnsafeMutablePointer<Float>(self.elements) let rightHead = UnsafeMutablePointer<Float>(tensor.elements) let elements = [Float](count: numCols * numRows, repeatedValue: 0.0) for r in 0..<numRows { for i in 0..<n { var pointer = UnsafeMutablePointer<Float>(elements) + r * numCols let left = leftHead[r * n + i] var rightPointer = rightHead + i * numCols for _ in 0..<numCols { // ͜͜Ͱ `Array` ͷΠϯσοΫεʹΑΔΞυϨεܭࢉ͕ݮͬͯߴ଎Խ pointer.memory += left * rightPointer.memory pointer += 1 rightPointer += 1 } } } return Tensor(shape: [numRows, numCols], elements: elements) } }
  9. BLAS ͷར༻ import Accelerate extension Tensor { // Matrix public

    func matmul(tensor: Tensor) -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) precondition(shape[1] == tensor.shape[0]) let result = Tensor(shape: [shape[0], tensor.shape[1]], elements: [Float](count: shape[0] * tensor.shape[1],repeatedValue: 0.0)) let n = Int32(tensor.shape[1]) let k = Int32(shape[1]) cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Int32(shape[0]), n, k, 1.0, elements, k, tensor.elements, n, 1.0, UnsafeMutablePointer<Float>(result.elements), n) return result } }
  10. References 6 Accelerate Framework: h1ps:/ /developer.apple.com/library/tvos/documenta=on/Accelerate/Reference/AccelerateFWRef/index.html 5 BLAS: h)p:/ /www.netlib.org/blas/

    4 "Deep MNIST for Experts": h5ps:/ /www.tensorflow.org/versions/r0.8/tutorials/mnist/pros/index.html 3 "TensorFlowͰֶशͨ͠ϞσϧΛ࢖ͬͯiOS/Swi1Ͱ࣮ߦ͢Δ" h3p:/ /qiita.com/koher/items/2c0bfca4d6e31cde674b 2 TensorSwi,: h/ps:/ /github.com/qoncept/TensorSwi, 1 Qoncept: h,p:/ /qoncept.co.jp/