Upgrade to Pro — share decks privately, control downloads, hide ads and more …

pt&Goroutines

 pt&Goroutines

pt(the_platinum_searcher) を高速化するために Goroutines まわりで試したことを発表しました。
http://connpass.com/event/6370/

monochromegane

May 31, 2014
Tweet

More Decks by monochromegane

Other Decks in Technology

Transcript

  1. pt
    &Goroutine
    - GoCon 2014 spring -

    View full-size slide

  2. MIYAKE Yusuke
    (@monochromegane)

    View full-size slide

  3. GMO Pepabo, Inc.

    View full-size slide

  4. grep
    ͯ͠·͔͢ʁ

    View full-size slide

  5. pt
    The Platinum Searcher

    View full-size slide

  6. Written in
    Golang

    View full-size slide

  7. Mac OSX
    Linux
    Windows

    View full-size slide

  8. UTF-8
    EUC-JP
    Shift-JIS

    View full-size slide

  9. fast !
    ack go 6.24s user 1.06s system 99% cpu 7.304 total # ack
    ag go 0.88s user 1.39s system 221% cpu 1.027 total # ag
    pt go 1.09s user 1.01s system 235% cpu 0.892 total # pt

    View full-size slide

  10. Goroutine & Channel

    View full-size slide

  11. ͍ͬ͠ΐʹߴ଎Խͯ͠Έ·͠ΐ͏

    View full-size slide

  12. 1. ϑΝΠϧΛݕࡧͯ͠(find)
    2. จࣈྻΛݕࡧͯ͠(grep)
    3. ݁ՌΛදࣔ͢Δ(print)
    ύλʔϯݕࡧͱ͸

    View full-size slide

  13. Approach-0
    !
    ॱ൪ʹ

    View full-size slide

  14. find
    grep
    print

    View full-size slide

  15. // find
    find := find.Find{Option: self.Option}
    find.Do(self.Root)
    !
    // grep
    grep := grep.Grep{
    Files: find.Files, // result
    Pattern: self.Pattern,
    Option: self.Option}
    grep.Do()
    !
    // print
    print := print.Print{
    Matches: grep.Matches, // result
    Pattern: self.Pattern,
    Option: self.Option}
    print.Do()

    View full-size slide

  16. > the_simple_searcher go $GOROOT > /dev/null

    View full-size slide

  17. Approach-1
    !
    ฒߦʹ

    View full-size slide

  18. • GoݴޠͰฒߦॲཧΛ࣮ݱ͢Δ
    • εϨουɺίϧʔνϯͱ͸ҧ͏
    • Concurrency(ฒߦ)ͱParallelism(ฒྻ)
    • ܰྔ
    • go f()

    View full-size slide

  19. find
    grep
    print
    go
    go
    go

    View full-size slide

  20. • Goroutineؒͷϝοηʔδϯά
    • ஋ͷૹड৴
    • όοϑΝʹΑΔϒϩοΫ

    View full-size slide

  21. find
    grep
    print
    go
    go
    go

    View full-size slide

  22. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go

    View full-size slide

  23. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go

    View full-size slide

  24. // channel
    files := make(chan *string, self.Option.Cap)
    matches := make(chan *grep.Match, self.Option.Cap)
    done := make(chan bool)
    !
    // find
    find := find.Find{Files: files, Option: self.Option}
    go find.Do(self.Root)
    !
    // grep
    grep := grep.Grep{
    Files: files,
    Matches: matches,
    Pattern: self.Pattern,
    Option: self.Option}
    go grep.Do()
    !
    // print
    print := print.Print{
    Done: done,
    Matches: matches,
    Pattern: self.Pattern,
    Option: self.Option}
    go print.Do()
    !
    <-done // block

    View full-size slide

  25. walkFunc := func(path string, info os.FileInfo,
    err error) error {
    if info.IsDir() {
    return nil
    }
    self.Files <- &path // send
    return nil
    }
    !
    filepath.Walk(root, walkFunc)
    close(self.Files) // close

    View full-size slide

  26. for file := range self.Files { // receive ( <-self.Files )
    fh, err := os.Open(*file)
    if err != nil {
    panic(err)
    }
    !
    f := bufio.NewReader(fh)
    !
    var buf []byte
    var lineNum = 1
    for {
    buf, _, err = f.ReadLine()
    if err != nil {
    break
    }
    line := string(buf)
    if strings.Contains(line, self.Pattern) {
    self.Matches <- &Match{*file, lineNum, line} // send
    }
    lineNum++
    }
    fh.Close()
    }
    close(self.Matches) // close

    View full-size slide

  27. for match := range self.Matches { // receive
    fmt.Printf("%s:%d:%s\n", match.Path,
    match.Num, match.Match)
    }
    self.Done <- true // send

    View full-size slide

  28. > the_simple_searcher go $GOROOT > /dev/null

    View full-size slide

  29. 0.79 -> 0.87
    seconds

    View full-size slide

  30. • Channelͷड෇༰ྔ
    • ch := make(chan ܕ, ༰ྔ)
    • ༰ྔ·Ͱ͸ड෇
    • ༰ྔ௒͑Δͱૹ৴ଆ͸ड෇଴ͪ
    • ड৴͢Δͱ༰ྔ͕ͻͱۭͭ͘
    • ༰ྔ͕0ͷ৔߹ɺৗʹ଴ͭ

    View full-size slide

  31. // channel with buffer
    files := make(chan *string, self.Option.Cap)
    matches := make(chan *grep.Match, self.Option.Cap)
    done := make(chan bool) // always wait

    View full-size slide

  32. > the_simple_searcher go $GOROOT > /dev/null

    View full-size slide

  33. 0.79 -> 0.8
    seconds

    View full-size slide

  34. Approach-2
    !
    ΋ͬͱฒߦʹ

    View full-size slide

  35. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go

    View full-size slide

  36. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go
    grep
    grep
    grep

    View full-size slide

  37. var wg sync.WaitGroup
    for file := range self.Files {
    wg.Add(1) // goroutineͷىಈ਺ΛΠϯΫϦϝϯτ
    (தུ)
    go func(self *Grep, file *string) {
    defer wg.Done() // goroutine͕׬ྃͨ͠Βىಈ਺ΛσΫϦϝϯτ
    for {
    ɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹ(தུ)
    }
    fh.Close()
    !
    }(self, file) // ΫϩʔδϟΛgoroutineʹ͢Δͱ͖͸ม਺ͷڞ༗ʹ஫ҙ
    !
    }
    wg.Wait() // ෆಛఆ਺ͷgoroutine͕શͯऴྃ͢ΔͷΛ଴ͭ
    close(self.Matches)

    View full-size slide

  38. > the_simple_searcher go $GOROOT > /dev/null

    View full-size slide

  39. panic !
    too many open files

    View full-size slide

  40. var wg sync.WaitGroup
    sem := make(chan bool, self.Option.Cap) // ىಈ͢Δgoroutineͷ਺Λ੍ޚ͢Δchannel
    for file := range self.Files {
    sem <- true // goroutineͷىಈ਺(channelͷbuffer)͕͍ͬͺ͍ͳΒ଴ͭ
    wg.Add(1)
    (தུ)
    go func(self *Grep, file *string) {
    defer wg.Done()
    for {
    ɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹ(தུ)
    }
    fh.Close()
    <-sem // ಉ࣌ىಈ਺channelͷbufferʹۭ͖Λͭ͘Δ
    !
    }(self, file)
    !
    }
    wg.Wait()
    close(self.Matches)

    View full-size slide

  41. > the_simple_searcher go $GOROOT > /dev/null

    View full-size slide

  42. 0.79 -> 0.8
    seconds

    View full-size slide

  43. Approach-3
    !
    ฒྻʹ

    View full-size slide

  44. • Goroutineͷฒྻ౓
    • σϑΥϧτ͸1
    • runtime.NumCPU()ͰίΞ਺Λऔಘ
    • runtime.GOMAXPROCS()Ͱฒྻ౓Λઃఆ

    View full-size slide

  45. > the_simple_searcher go $GOROOT > /dev/null

    View full-size slide

  46. 0.79 -> 0.55 !
    seconds

    View full-size slide

  47. benchmark
    !
    • Mac OSX(10.9.3)
    • CPU: 2.5GHz Core i5(2Core)
    • Memory: 8GB
    • Go: 1.2.2

    View full-size slide

  48. #V⒎FS
    (0."9130$4
    "QQSPBDI

    View full-size slide

  49. ฒߦԽͯ͠ͳ͍ͷͰ
    ฒྻԽͯ͠΋มΘΒͣ ίΞ਺Ҏ্ͷࢦఆ͸
    ޮՌͳ͠
    ଌఆͯ͠ௐ੔͠ͳ͍ͱ
    ৔߹ʹΑͬͯ͸஗͘ͳΔ
    #V⒎FS
    (0."9130$4
    "QQSPBDI

    View full-size slide

  50. –Rob Pike
    • Concurrency is powerful.
    • Concurrency is not parallelism.
    • Concurrency enables parallelism.
    • Concurrency makes parallelism (and
    scaling and everything else) easy.

    View full-size slide

  51. એ఻
    ϖύϘͰ͸ΤϯδχΞΛืू͍ͯ͠·͢ɻ
    ڞʹαʔϏεΛੜΈग़͠ҭͯͯ͘ΕΔ৽͍͠஥ؒ
    Λ଴͍ͬͯ·͢ɻ
    !
    http://pepabo.com/recruit/career/engineer/

    View full-size slide