Swiftで書かれたプログラムを1000倍速くした話

Swi$Ͱॻ͔ΕͨϓϩάϥϜΛ 1000ഒ଎ͨ͘͠࿩ Yuta Koshizawa @koher

͕Μ͹ͬͨΒߦྻͷੵͷܭࢉ͕ 1000 ഒ଎͘ͳͬͨ

ͦͷͱ͖ʹ Swi% ͍͢͝ͱײͨ͡࿩

Tensor let a = Tensor(shape: [2, 3], elements: [1, 2,
3, 4, 5, 6]) // [[1, 2, 3], [4, 5, 6]] let b = Tensor(shape: [2, 2, 2], elements: [1, 2, 3, 4, 5, 6, 7, 8]) // [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]

ཁૉ΁ͷΞΫηε let a = Tensor(shape: [2, 3], elements: [1, 2,
3, 4, 5, 6]) // [[1, 2, 3], [4, 5, 6]] let b = Tensor(shape: [2, 2, 2], elements: [1, 2, 3, 4, 5, 6, 7, 8]) // [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] print(a[1, 2]) // 6 print(b[1, 1, 1]) // 8

Tensor ͷ࣮૷ public struct Tensor { public let shape: [Int]
public private(set) var elements: [Float] }

subscript ͷ࣮૷ extension Tensor { internal func index(indices: [Int]) ->
Int { return zip(shape, indices).reduce(0) { $0 * $1.0 + $1.1 } } public subscript(indices: Int...) -> Float { get { return elements[index(indices)] } set { elements[index(indices)] = newValue } } }

matmul extension Tensor { // Matrix public func matmul(tensor: Tensor)
-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements: [Float] = [] elements.reserveCapacity(numCols * numRows) for r in 0..<numRows { for c in 0..<numCols { var e: Float = 0.0 for i in 0..<n { e += self[r, i] * tensor[i, c] // ͜͜Ͱ `subscript` Λར༻ } elements.append(e) } } return Tensor(shape: [numRows, numCols], elements: elements) } }

ߦྻʹݶఆ extension Tensor { // Matrix public func matmul(tensor: Tensor)
-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements: [Float] = [] elements.reserveCapacity(numCols * numRows) for r in 0..<numRows { for c in 0..<numCols { var e: Float = 0.0 for i in 0..<n { e += self.elements[r * n + i] * tensor.elements[i * numCols + c] // ߦྻͱͯ͠ܭࢉ } elements.append(e) } } return Tensor(shape: [numRows, numCols], elements: elements) } }

1.84 ഒߴ଎Խ

ϧʔϓॱΛೖΕସ͑ͯΩϟογϡͷώοτ཰Λ޲্ extension Tensor { // Matrix public func matmul(tensor: Tensor)
-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] var elements = [Float](count: numCols * numRows, repeatedValue: 0.0) for r in 0..<numRows { for i in 0..<n { // ͜͜ͱ let e = self.elements[r * n + i] for c in 0..<numCols { // ͕͜͜ೖΕସΘͬͨ elements[r * numCols + c] += e * tensor.elements[i * numCols + c] } } } return Tensor(shape: [numRows, numCols], elements: elements) } }

͞Βʹ 4.69 ഒߴ଎Խ

ϙΠϯλʹॻ͖ม͑ extension Tensor { // Matrix public func matmul(tensor: Tensor)
-> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) let n = shape[1] precondition(n == tensor.shape[0]) let numRows = shape[0] let numCols = tensor.shape[1] // `Array` ΛϙΠϯλʹม׵ let leftHead = UnsafeMutablePointer<Float>(self.elements) let rightHead = UnsafeMutablePointer<Float>(tensor.elements) let elements = [Float](count: numCols * numRows, repeatedValue: 0.0) for r in 0..<numRows { for i in 0..<n { var pointer = UnsafeMutablePointer<Float>(elements) + r * numCols let left = leftHead[r * n + i] var rightPointer = rightHead + i * numCols for _ in 0..<numCols { // ͜͜Ͱ `Array` ͷΠϯσοΫεʹΑΔΞυϨεܭࢉ͕ݮͬͯߴ଎Խ pointer.memory += left * rightPointer.memory pointer += 1 rightPointer += 1 } } } return Tensor(shape: [numRows, numCols], elements: elements) } }

͞Βʹ 2.99 ഒߴ଎Խ

BLAS ͷར༻ import Accelerate extension Tensor { // Matrix public
func matmul(tensor: Tensor) -> Tensor { precondition(shape.count == 2) precondition(tensor.shape.count == 2) precondition(shape[1] == tensor.shape[0]) let result = Tensor(shape: [shape[0], tensor.shape[1]], elements: [Float](count: shape[0] * tensor.shape[1],repeatedValue: 0.0)) let n = Int32(tensor.shape[1]) let k = Int32(shape[1]) cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, Int32(shape[0]), n, k, 1.0, elements, k, tensor.elements, n, 1.0, UnsafeMutablePointer<Float>(result.elements), n) return result } }

͞Βʹ 39.1 ഒߴ଎Խ

͋Θͤͯ 1012 ഒߴ଎Խ

Swi$ ͸ Swi$ ͳʢ଎͍ʣ ϓϩάϥϜ͕ॻ͚ΔΑ͏ʹ ࡞ΒΕ͍ͯΔʂʂ

References 6 Accelerate Framework: h1ps:/ /developer.apple.com/library/tvos/documenta=on/Accelerate/Reference/AccelerateFWRef/index.html 5 BLAS: h)p:/ /www.netlib.org/blas/
4 "Deep MNIST for Experts": h5ps:/ /www.tensorﬂow.org/versions/r0.8/tutorials/mnist/pros/index.html 3 "TensorFlowͰֶशͨ͠ϞσϧΛ࢖ͬͯiOS/Swi1Ͱ࣮ߦ͢Δ" h3p:/ /qiita.com/koher/items/2c0bfca4d6e31cde674b 2 TensorSwi,: h/ps:/ /github.com/qoncept/TensorSwi, 1 Qoncept: h,p:/ /qoncept.co.jp/

Swiftで書かれたプログラムを1000倍速くした話

Swiftで書かれたプログラムを1000倍速くした話

Yuta Koshizawa

More Decks by Yuta Koshizawa

Other Decks in Programming

Featured

Transcript

Swi$Ͱॻ͔ΕͨϓϩάϥϜΛ 1000ഒ଎ͨ͘͠࿩ Yuta Koshizawa @koher

͕Μ͹ͬͨΒߦྻͷੵͷܭࢉ͕ 1000 ഒ଎͘ͳͬͨ

ͦͷͱ͖ʹ Swi% ͍͢͝ͱײͨ͡࿩

Tensor let a = Tensor(shape: [2, 3], elements: [1, 2,

ཁૉ΁ͷΞΫηε let a = Tensor(shape: [2, 3], elements: [1, 2,

Tensor ͷ࣮૷ public struct Tensor { public let shape: [Int]

subscript ͷ࣮૷ extension Tensor { internal func index(indices: [Int]) ->

matmul extension Tensor { // Matrix public func matmul(tensor: Tensor)

ߦྻʹݶఆ extension Tensor { // Matrix public func matmul(tensor: Tensor)

1.84 ഒߴ଎Խ

ϧʔϓॱΛೖΕସ͑ͯΩϟογϡͷώοτ཰Λ޲্ extension Tensor { // Matrix public func matmul(tensor: Tensor)

͞Βʹ 4.69 ഒߴ଎Խ

ϙΠϯλʹॻ͖ม͑ extension Tensor { // Matrix public func matmul(tensor: Tensor)

͞Βʹ 2.99 ഒߴ଎Խ

BLAS ͷར༻ import Accelerate extension Tensor { // Matrix public

͞Βʹ 39.1 ഒߴ଎Խ

͋Θͤͯ 1012 ഒߴ଎Խ

Swi$ ͸ Swi$ ͳʢ଎͍ʣ ϓϩάϥϜ͕ॻ͚ΔΑ͏ʹ ࡞ΒΕ͍ͯΔʂʂ

References 6 Accelerate Framework: h1ps:/ /developer.apple.com/library/tvos/documenta=on/Accelerate/Reference/AccelerateFWRef/index.html 5 BLAS: h)p:/ /www.netlib.org/blas/