Slide 1

Slide 1 text

&NTDSJQUFO8FC"TTFNCMZOJHIU ը૾ॲཧͰൺ΂Δ 8FC"TTFNCMZ !CPLVXFC

Slide 2

Slide 2 text

!CPLVXFC IUUQCMPHCPLVXFCNF w 8FCϑϩϯτΤϯυΤϯδχΞ w ૊ΈࠐΈϋʔυ΢ΣΞΤϯδχΞ w ࢁʹੜଉ CPLVXFC ࣗݾ঺հ

Slide 3

Slide 3 text

CPLVXFCSVTUZOFT IUUQTHJUIVCDPNCPLVXFCSVTUZOFT

Slide 4

Slide 4 text

SFHWJ[SFHDMJ

Slide 5

Slide 5 text

NBQCPYQJYFMNBUDI

Slide 6

Slide 6 text

XBTNԽͯ͠Έͯ Ͳ͏ͳΔ͔ΈͯΈΔ

Slide 7

Slide 7 text

QJYFMNBUDIͷಈ͖ Ұக

Slide 8

Slide 8 text

QJYFMNBUDIͷಈ͖ ෆҰக

Slide 9

Slide 9 text

QJYFMNBUDIͷಈ͖

Slide 10

Slide 10 text

for(let y = 0; y < height; y++) { for(let x = 0; x < width; x++) { if (৭Ұக) { ੺͘ඳը } else { ബ͘ඳը } } } QJYFMNBUDIͷಈ͖

Slide 11

Slide 11 text

+4XBTNͷσʔλ΍ΓऔΓͲ͏͢Δ͔ ϦχΞϝϞϦ ൪஍ ೖྗը૾ ೖྗը૾̎ ग़ྗը૾

Slide 12

Slide 12 text

+BWB4DSJQU

Slide 13

Slide 13 text

#ZUF NJOJGZޙ +BWB4DSJQU

Slide 14

Slide 14 text

$

Slide 15

Slide 15 text

extern "C" uint32_t pixelmatch(const uint8_t *img1, const uint8_t *img2,uint32_t width,ɹuint32_t height, uint8_t *output){ float maxDelta = 35215 * 0.1 * 0.1; uint32_t diff = 0; for (uint32_t y = 0; y < height; y++) { for (uint32_t x = 0; x < width; x++) { uint32_t pos = (y * width + x) * 4; float delta = colorDelta(img1, img2, pos, pos, false); if (delta > maxDelta) { drawPixel(output, pos, 255, 0, 0); diff++; } else if (output) { uint8_t val = grayPixel(img1, pos, 0.1); drawPixel(output, pos, val, val, val); } } } return diff; } $

Slide 16

Slide 16 text

emcc -O3 -s ALLOW_MEMORY_GROWTH=1 \ -s EXPORTED_FUNCTIONS=[‘_pixelmatch’] \ -o pixelmatch.js -s pixelmatch.cpp $

Slide 17

Slide 17 text

#ZUF ͱάϧʔίʔυͷKT͕,# ࠓճະ࢖༻ 0ͷ৔߹͸,# $

Slide 18

Slide 18 text

3VTU

Slide 19

Slide 19 text

#[no_mangle] pub extern fn pixelmatch(img1: *mut u8, img2: *mut u8, width: u32, height: u32, output: *mut u8) -> u32 { let buf1: &mut [u8] = unsafe { core::slice::from_raw_parts_mut(img1, (width * height * 4) as usize) }; let buf2: &mut [u8] = unsafe { core::slice::from_raw_parts_mut(img2, (width * height * 4) as usize) }; let out: &mut [u8] = unsafe { core::slice::from_raw_parts_mut(output, (width * height * 4) as usize) }; let max_delta = 35215.0 * 0.1 * 0.1; let mut diff_count = 0; for y in 0..height { for x in 0..width { let pos = ((y * width + x) * 4) as u32; let delta = color_delta(buf1, buf2, pos, pos, false); if delta > max_delta { draw_pixel(out, pos, 255, 0, 0, 255); diff_count += 1; } else { let y = gray_pixel(buf1, pos, 0.1); draw_pixel(out, pos, y, y, y, 255); } } } diff_count } 3VTU

Slide 20

Slide 20 text

XBTNQBDLCVJME 3VTU

Slide 21

Slide 21 text

,#ZUF 3VTU

Slide 22

Slide 22 text

#![no_std] #[no_mangle] pub extern fn pixelmatch( img1: *mut u8, img2: *mut u8, width: u32, height: u32, output: *mut u8) -> u32 { … } 3VTU

Slide 23

Slide 23 text

#ZUF ˣ,#ZUF 3VTU

Slide 24

Slide 24 text

wasm-opt -O3 -o \ pkg/pixelmatch_optimized.wasm \ pkg/pixelmatch_bg.wasm * macͳΒbrew install binaryen ͰೖΔ 3VTU

Slide 25

Slide 25 text

#ZUF ˣ#ZUF 3VTU

Slide 26

Slide 26 text

wasm-strip pkg/ pixelmatch_optimized.wasm * https://github.com/WebAssembly/wabt ʹؚ·Ε͍ͯΔ 3VTU

Slide 27

Slide 27 text

#ZUF ˣ#ZUF 3VTU

Slide 28

Slide 28 text

"TTFNCMZ4DSJQU

Slide 29

Slide 29 text

export function pixelmatch(img1: u32,img2: u32, width: u32, height: u32): u32 { let maxDelta = (35215 as f32) * 0.1 * 0.1; let diff = 0; for (let y: u32 = 0; y < height; y++) { for (let x: u32 = 0; x < width; x++) { let pos = (y * width + x) * 4; let delta = colorDelta(img1, img2, pos, pos, false); if (delta > maxDelta) { diff++; drawPixel(img1 + img2, pos, 255, 0, 0); } else { let val = grayPixel(pos, 0.1) as u32; drawPixel(img1 + img2, pos, val, val, val); } } } return diff; } "TTFNCMZ4DSJQU

Slide 30

Slide 30 text

function get(addr: u32, offset: u32): u8 { return load(addr + offset); } function set(addr: u32, offset: u32, value: u8): void { store(addr + offset, value); } "TTFNCMZ4DSJQU

Slide 31

Slide 31 text

asc assembly/index.ts -b build/optimized.wasm \ -t build/optimized.wat --validate —optimize \ —importMemory "TTFNCMZ4DSJQU

Slide 32

Slide 32 text

#ZUF "TTFNCMZ4DSJQU

Slide 33

Slide 33 text

$ISPNF 4BGBSJ 'JSFGPY 1.6 GHz Intel Core i5 16 GB 2133 MHz LPDDR3 ops / sec Bigger is better PQTTFD PQTTFD PQTTFD PQTTFD $ 3VTU "4 +4 PQTTFD PQTTFD PQTTFD PQTTFD $ 3VTU "4 +4 PQTTFD PQTTFD PQTTFD PQTTFD $ 3VTU "4 +4 #FODINBSL

Slide 34

Slide 34 text

(module (type $FUNCSIG$iiiii (func (param i32 i32 i32 i32) (result i32))) (type $FUNCSIG$iii (func (param i32 i32) (result i32))) (type $FUNCSIG$fff (func (param f32 f32) (result f32))) (type $FUNCSIG$ffff (func (param f32 f32 f32) (result f32))) (type $FUNCSIG$viiiii (func (param i32 i32 i32 i32 i32))) (type $FUNCSIG$viii (func (param i32 i32 i32))) (type $FUNCSIG$v (func)) (type $FUNCSIG$fiii (func (param i32 i32 i32) (result f32))) (type $FUNCSIG$fi (func (param i32) (result f32))) (import "env" "memory" (memory $0 0)) (table $0 1 funcref) (elem (i32.const 0) $null) (export "memory" (memory $0)) (export "table" (table $0)) (export "pixelmatch" (func $assembly/index/pixelmatch)) (func $assembly/index/get (; 0 ;) (type $FUNCSIG$iii) (param $0 i32) (param $1 i32) (result i32) local.get $0 local.get $1 i32.add i32.load8_u ) "TTFNCMZ4DSJQU

Slide 35

Slide 35 text

@inline "TTFNCMZ4DSJQU

Slide 36

Slide 36 text

@inline function get(addr: u32, offset: u32): u8 { return load(addr + offset); } @inline function set(addr: u32, offset: u32, value: u8): void { store(addr + offset, value); } "TTFNCMZ4DSJQU

Slide 37

Slide 37 text

#ZUF ˢ#ZUF "TTFNCMZ4DSJQU

Slide 38

Slide 38 text

$ISPNF 4BGBSJ 'JSFGPY 1.6 GHz Intel Core i5 16 GB 2133 MHz LPDDR3 ops / sec Bigger is better PQTTFD PQTTFD PQTTFDˢ PQTTFD $ 3VTU "4 +4 PQTTFD PQTTFD PQTTFDˢ PQTTFD $ 3VTU "4 +4 PQTTFD PQTTFD PQTTFDˢ PQTTFD $ 3VTU "4 +4 #FODINBSL ࠶

Slide 39

Slide 39 text

(module (type (;0;) (func (param i32 i32 i32 i32) (result f32))) (type (;1;) (func (param i32 i32 i32 i32 i32) (result i32))) (import "env" "memory" (memory (;0;) 256)) (func (;0;) (type 0) (param i32 i32 i32 i32) (result f32) (local i32 i32 i32 i32 i32 i32 f32 f32 f32 f32 f32 f32 f32 f32) local.get 0 local.get 2 i32.const 2 i32.add i32.add i32.load8_s local.tee 4 local.get 1 $ DPMPS%FMUB

Slide 40

Slide 40 text

αΠζ H[JQޙ ops / sec Smaller is better $ 3VTU "TTFNCMZ4DSJQU +BWB4DSJQU #ZUF #ZUF #ZUF #ZUF

Slide 41

Slide 41 text

·ͱΊ "ؾʹ͠ͳ͍ͱ+4ΑΓେ͖͘ͳͬͨΓ஗͘ͳͬͨΓ͢Δ ৔߹͕͋ΔΑ "ϒϥ΢βʹΑͬͯ͸͔ͳΓ଎͘ͳΔʢ͔΋ʣ͠ ɹϒϥ΢βؒͰͷ଎౓͕ࠩࠓճ͸େ͖͘ͳ͔ͬͨΑ

Slide 42

Slide 42 text

͋Γ͕ͱ͏͍͟͝·ͨ͠ CPLVXFCXBTNQJYFMNBUDI