Upgrade to Pro — share decks privately, control downloads, hide ads and more …

pt&Goroutines

 pt&Goroutines

pt(the_platinum_searcher) を高速化するために Goroutines まわりで試したことを発表しました。
http://connpass.com/event/6370/

monochromegane

May 31, 2014
Tweet

More Decks by monochromegane

Other Decks in Technology

Transcript

  1. pt
    &Goroutine
    - GoCon 2014 spring -

    View Slide

  2. MIYAKE Yusuke
    (@monochromegane)

    View Slide

  3. GMO Pepabo, Inc.

    View Slide

  4. grep
    ͯ͠·͔͢ʁ

    View Slide

  5. grep?

    View Slide

  6. ack?

    View Slide

  7. ag?

    View Slide

  8. pt
    The Platinum Searcher

    View Slide

  9. Written in
    Golang

    View Slide

  10. Mac OSX
    Linux
    Windows

    View Slide

  11. UTF-8
    EUC-JP
    Shift-JIS

    View Slide

  12. AND

    View Slide

  13. fast !
    ack go 6.24s user 1.06s system 99% cpu 7.304 total # ack
    ag go 0.88s user 1.39s system 221% cpu 1.027 total # ag
    pt go 1.09s user 1.01s system 235% cpu 0.892 total # pt

    View Slide

  14. How?

    View Slide

  15. Goroutine & Channel

    View Slide

  16. ͍ͬ͠ΐʹߴ଎Խͯ͠Έ·͠ΐ͏

    View Slide

  17. 1. ϑΝΠϧΛݕࡧͯ͠(find)
    2. จࣈྻΛݕࡧͯ͠(grep)
    3. ݁ՌΛදࣔ͢Δ(print)
    ύλʔϯݕࡧͱ͸

    View Slide

  18. Approach-0
    !
    ॱ൪ʹ

    View Slide

  19. find

    View Slide

  20. find
    grep

    View Slide

  21. find
    grep
    print

    View Slide

  22. // find
    find := find.Find{Option: self.Option}
    find.Do(self.Root)
    !
    // grep
    grep := grep.Grep{
    Files: find.Files, // result
    Pattern: self.Pattern,
    Option: self.Option}
    grep.Do()
    !
    // print
    print := print.Print{
    Matches: grep.Matches, // result
    Pattern: self.Pattern,
    Option: self.Option}
    print.Do()

    View Slide

  23. > the_simple_searcher go $GOROOT > /dev/null

    View Slide

  24. 0.79
    seconds

    View Slide

  25. Approach-1
    !
    ฒߦʹ

    View Slide

  26. Goroutine

    View Slide

  27. • GoݴޠͰฒߦॲཧΛ࣮ݱ͢Δ
    • εϨουɺίϧʔνϯͱ͸ҧ͏
    • Concurrency(ฒߦ)ͱParallelism(ฒྻ)
    • ܰྔ
    • go f()

    View Slide

  28. find
    grep
    print
    go
    go
    go

    View Slide

  29. Channel

    View Slide

  30. • Goroutineؒͷϝοηʔδϯά
    • ஋ͷૹड৴
    • όοϑΝʹΑΔϒϩοΫ

    View Slide

  31. find
    grep
    print
    go
    go
    go

    View Slide

  32. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go

    View Slide

  33. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go

    View Slide

  34. // channel
    files := make(chan *string, self.Option.Cap)
    matches := make(chan *grep.Match, self.Option.Cap)
    done := make(chan bool)
    !
    // find
    find := find.Find{Files: files, Option: self.Option}
    go find.Do(self.Root)
    !
    // grep
    grep := grep.Grep{
    Files: files,
    Matches: matches,
    Pattern: self.Pattern,
    Option: self.Option}
    go grep.Do()
    !
    // print
    print := print.Print{
    Done: done,
    Matches: matches,
    Pattern: self.Pattern,
    Option: self.Option}
    go print.Do()
    !

    View Slide

  35. walkFunc := func(path string, info os.FileInfo,
    err error) error {
    if info.IsDir() {
    return nil
    }
    self.Files return nil
    }
    !
    filepath.Walk(root, walkFunc)
    close(self.Files) // close

    View Slide

  36. for file := range self.Files { // receive ( fh, err := os.Open(*file)
    if err != nil {
    panic(err)
    }
    !
    f := bufio.NewReader(fh)
    !
    var buf []byte
    var lineNum = 1
    for {
    buf, _, err = f.ReadLine()
    if err != nil {
    break
    }
    line := string(buf)
    if strings.Contains(line, self.Pattern) {
    self.Matches }
    lineNum++
    }
    fh.Close()
    }
    close(self.Matches) // close

    View Slide

  37. for match := range self.Matches { // receive
    fmt.Printf("%s:%d:%s\n", match.Path,
    match.Num, match.Match)
    }
    self.Done

    View Slide

  38. > the_simple_searcher go $GOROOT > /dev/null

    View Slide

  39. 0.79 -> 0.87
    seconds

    View Slide

  40. ?

    View Slide

  41. buffer

    View Slide

  42. • Channelͷड෇༰ྔ
    • ch := make(chan ܕ, ༰ྔ)
    • ༰ྔ·Ͱ͸ड෇
    • ༰ྔ௒͑Δͱૹ৴ଆ͸ड෇଴ͪ
    • ड৴͢Δͱ༰ྔ͕ͻͱۭͭ͘
    • ༰ྔ͕0ͷ৔߹ɺৗʹ଴ͭ

    View Slide

  43. // channel with buffer
    files := make(chan *string, self.Option.Cap)
    matches := make(chan *grep.Match, self.Option.Cap)
    done := make(chan bool) // always wait

    View Slide

  44. > the_simple_searcher go $GOROOT > /dev/null

    View Slide

  45. 0.79 -> 0.8
    seconds

    View Slide

  46. Approach-2
    !
    ΋ͬͱฒߦʹ

    View Slide

  47. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go

    View Slide

  48. $IBOFM
    find
    grep
    print
    $IBOFM
    go
    go
    go
    grep
    grep
    grep

    View Slide

  49. var wg sync.WaitGroup
    for file := range self.Files {
    wg.Add(1) // goroutineͷىಈ਺ΛΠϯΫϦϝϯτ
    (தུ)
    go func(self *Grep, file *string) {
    defer wg.Done() // goroutine͕׬ྃͨ͠Βىಈ਺ΛσΫϦϝϯτ
    for {
    ɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹ(தུ)
    }
    fh.Close()
    !
    }(self, file) // ΫϩʔδϟΛgoroutineʹ͢Δͱ͖͸ม਺ͷڞ༗ʹ஫ҙ
    !
    }
    wg.Wait() // ෆಛఆ਺ͷgoroutine͕શͯऴྃ͢ΔͷΛ଴ͭ
    close(self.Matches)

    View Slide

  50. > the_simple_searcher go $GOROOT > /dev/null

    View Slide

  51. panic !
    too many open files

    View Slide

  52. var wg sync.WaitGroup
    sem := make(chan bool, self.Option.Cap) // ىಈ͢Δgoroutineͷ਺Λ੍ޚ͢Δchannel
    for file := range self.Files {
    sem wg.Add(1)
    (தུ)
    go func(self *Grep, file *string) {
    defer wg.Done()
    for {
    ɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹɹ(தུ)
    }
    fh.Close()
    !
    }(self, file)
    !
    }
    wg.Wait()
    close(self.Matches)

    View Slide

  53. > the_simple_searcher go $GOROOT > /dev/null

    View Slide

  54. 0.79 -> 0.8
    seconds

    View Slide

  55. Approach-3
    !
    ฒྻʹ

    View Slide

  56. GOMAXPROCS

    View Slide

  57. • Goroutineͷฒྻ౓
    • σϑΥϧτ͸1
    • runtime.NumCPU()ͰίΞ਺Λऔಘ
    • runtime.GOMAXPROCS()Ͱฒྻ౓Λઃఆ

    View Slide

  58. > the_simple_searcher go $GOROOT > /dev/null

    View Slide

  59. 0.79 -> 0.55 !
    seconds

    View Slide

  60. benchmark
    !
    • Mac OSX(10.9.3)
    • CPU: 2.5GHz Core i5(2Core)
    • Memory: 8GB
    • Go: 1.2.2

    View Slide

  61. #V⒎FS
    (0."9130$4
    "QQSPBDI

    View Slide

  62. ฒߦԽͯ͠ͳ͍ͷͰ
    ฒྻԽͯ͠΋มΘΒͣ ίΞ਺Ҏ্ͷࢦఆ͸
    ޮՌͳ͠
    ଌఆͯ͠ௐ੔͠ͳ͍ͱ
    ৔߹ʹΑͬͯ͸஗͘ͳΔ
    #V⒎FS
    (0."9130$4
    "QQSPBDI

    View Slide

  63. –Rob Pike
    • Concurrency is powerful.
    • Concurrency is not parallelism.
    • Concurrency enables parallelism.
    • Concurrency makes parallelism (and
    scaling and everything else) easy.

    View Slide

  64. એ఻
    ϖύϘͰ͸ΤϯδχΞΛืू͍ͯ͠·͢ɻ
    ڞʹαʔϏεΛੜΈग़͠ҭͯͯ͘ΕΔ৽͍͠஥ؒ
    Λ଴͍ͬͯ·͢ɻ
    !
    http://pepabo.com/recruit/career/engineer/

    View Slide

  65. ͓ΘΓ

    View Slide