Yuta Koshizawa
May 25, 2016
3.1k

# Swiftで書かれたプログラムを1000倍速くした話

Swift のプログラムを高速化する過程で、静的ディスパッチ、インライン展開、ポインタ、外部ライブラリとのシームレスな連携など、 Swift が Swift な（速い）プログラムを書けるように作られていることを実感してすごいと感じたという話です。

May 25, 2016

## Transcript

4. ### Tensor let a = Tensor(shape: [2, 3], elements: [1, 2,

3, 4, 5, 6]) // [[1, 2, 3], [4, 5, 6]] let b = Tensor(shape: [2, 2, 2], elements: [1, 2, 3, 4, 5, 6, 7, 8]) // [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
5. ### ཁૉ΁ͷΞΫηε let a = Tensor(shape: [2, 3], elements: [1, 2,

3, 4, 5, 6]) // [[1, 2, 3], [4, 5, 6]] let b = Tensor(shape: [2, 2, 2], elements: [1, 2, 3, 4, 5, 6, 7, 8]) // [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] print(a[1, 2]) // 6 print(b[1, 1, 1]) // 8
6. ### Tensor ͷ࣮૷ public struct Tensor { public let shape: [Int]

public private(set) var elements: [Float] }
7. ### subscript ͷ࣮૷ extension Tensor { internal func index(indices: [Int]) ->

Int { return zip(shape, indices).reduce(0) { \$0 * \$1.0 + \$1.1 } } public subscript(indices: Int...) -> Float { get { return elements[index(indices)] } set { elements[index(indices)] = newValue } } }
8. ### matmul extension Tensor { // Matrix public func matmul(tensor: Tensor)

-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements: [Float] = [] elements.reserveCapacity(numCols * numRows) for r in 0..<numRows { for c in 0..<numCols { var e: Float = 0.0 for i in 0..<n { e += self[r, i] * tensor[i, c] // ͜͜Ͱ `subscript` Λར༻ } elements.append(e) } } return Tensor(shape: [numRows, numCols], elements: elements) } }
9. ### ߦྻʹݶఆ extension Tensor { // Matrix public func matmul(tensor: Tensor)

-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements: [Float] = [] elements.reserveCapacity(numCols * numRows) for r in 0..<numRows { for c in 0..<numCols { var e: Float = 0.0 for i in 0..<n { e += self.elements[r * n + i] * tensor.elements[i * numCols + c] // ߦྻͱͯ͠ܭࢉ } elements.append(e) } } return Tensor(shape: [numRows, numCols], elements: elements) } }

11. ### ϧʔϓॱΛೖΕସ͑ͯΩϟογϡͷώοτ཰Λ޲্ extension Tensor { // Matrix public func matmul(tensor: Tensor)

-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements = [Float](count: numCols * numRows, repeatedValue: 0.0) for r in 0..<numRows { for i in 0..<n { // ͜͜ͱ let e = self.elements[r * n + i] for c in 0..<numCols { // ͕͜͜ೖΕସΘͬͨ elements[r * numCols + c] += e * tensor.elements[i * numCols + c] } } } return Tensor(shape: [numRows, numCols], elements: elements) } }

13. ### ϙΠϯλʹॻ͖ม͑ extension Tensor { // Matrix public func matmul(tensor: Tensor)

-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] // `Array` ΛϙΠϯλʹม׵ let leftHead = UnsafeMutablePointer<Float>(self.elements) let rightHead = UnsafeMutablePointer<Float>(tensor.elements) let elements = [Float](count: numCols * numRows, repeatedValue: 0.0) for r in 0..<numRows { for i in 0..<n { var pointer = UnsafeMutablePointer<Float>(elements) + r * numCols let left = leftHead[r * n + i] var rightPointer = rightHead + i * numCols for _ in 0..<numCols { // ͜͜Ͱ `Array` ͷΠϯσοΫεʹΑΔΞυϨεܭࢉ͕ݮͬͯߴ଎Խ pointer.memory += left * rightPointer.memory pointer += 1 rightPointer += 1 } } } return Tensor(shape: [numRows, numCols], elements: elements) } }

15. ### BLAS ͷར༻ import Accelerate extension Tensor { // Matrix public

func matmul(tensor: Tensor) -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) precondition(shape[1] == tensor.shape[0]) let result = Tensor(shape: [shape[0], tensor.shape[1]], elements: [Float](count: shape[0] * tensor.shape[1],repeatedValue: 0.0)) let n = Int32(tensor.shape[1]) let k = Int32(shape[1]) cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Int32(shape[0]), n, k, 1.0, elements, k, tensor.elements, n, 1.0, UnsafeMutablePointer<Float>(result.elements), n) return result } }

19. ### References 6 Accelerate Framework: h1ps:/ /developer.apple.com/library/tvos/documenta=on/Accelerate/Reference/AccelerateFWRef/index.html 5 BLAS: h)p:/ /www.netlib.org/blas/

4 "Deep MNIST for Experts": h5ps:/ /www.tensorﬂow.org/versions/r0.8/tutorials/mnist/pros/index.html 3 "TensorFlowͰֶशͨ͠ϞσϧΛ࢖ͬͯiOS/Swi1Ͱ࣮ߦ͢Δ" h3p:/ /qiita.com/koher/items/2c0bfca4d6e31cde674b 2 TensorSwi,: h/ps:/ /github.com/qoncept/TensorSwi, 1 Qoncept: h,p:/ /qoncept.co.jp/