Upgrade to Pro — share decks privately, control downloads, hide ads and more …

善用 Go 語言效能測試工具來提升執行效率

Bo-Yi Wu
October 24, 2020

善用 Go 語言效能測試工具來提升執行效率

在 AI 訓練模型前,都需要經過大量的資料處理,而資料處理的速度在整個流程內扮演很重要的角色,寫出高效能的 Parser 能降低整體處理時間,那如何評估程式效能如何,以及如何快速找到效能瓶頸?本議程會帶大家了解 Go 語言內建的效能測試工具,透過 Benchmark 來找出程式效能瓶頸的地方,快速改善及優化,讓整個系統流程更順暢。也會順道分享 Go 在字串處理優化的一些小技巧。聽過此議程相信您對 Go 語言會有更深入的了解,如果你想寫出有效率的程式碼,本議程一定不能錯過。

Bo-Yi Wu

October 24, 2020

More Decks by Bo-Yi Wu

Other Decks in Technology


  1. About me • Software Engineer in Mediatek • Member of

    Drone CI/CD Platform • Member of Gitea Platform • Member of Gin Golang Framework • Maintain Some GitHub Actions Plugins.
  2. Data 資料量 • 1 ~ 2 萬個檔案 • 每個檔案 1000

    萬⾏以上 • 總共 8 億 ~ 1x 億個資料點 (單⼀製程)
  3. 效能結果 (原先使⽤ 400 台機器) 1BSTFS ,# .# . (# .)8

    PME                     VOJUTFDPOE ఏঋYYഒ DPSF( লԼ)8㑌࣍୆(3BN7.
  4. 效能結果 (原先使⽤ 400 台機器) 1BSTFS ,# .# . (# .)8

    PME                     VOJUTFDPOE ఏঋYYഒ DPSF( লԼ)8㑌࣍୆(3BN7.
  5. ⾃⾏研發 Parser 原因 • 提⾼部⾨產能 (太多單位有分析需求) • 減少 Computing Farm

    使⽤ (省錢) • 整合 AI Model 花費時間過久 (inference time 不到⼀秒啊) • ⽬前在市⾯上找不到任何⼀套開源 Parser?
  6. 效能結果 (原先使⽤ 400 台機器) 1BSTFS ,# .# . (# .)8

    PME                     VOJUTFDPOE ఏঋYYഒ DPSF( লԼ)8㑌࣍୆(3BN7.
  7. 第⼀個版本 var group = regexp.MustCompile(`^(\s*)(\S+)\s*\((.*?)\)\s*{\s*$`) var groupDone = regexp.MustCompile(`^\s*}\s*$`) ///

    Simple attribute compile. /// key : value; var simpleAttribute = regexp.MustCompile(`^(\s*)(\S+)\s*:\s*(.+)\s*;.*$`) var specialSimpleAttribute = regexp.MustCompile(`^(\s*)(\S+)\s*:\s*(.+)\s*$`) /// Complex attribute compile. /// key (valueList); var complexAttribute = regexp.MustCompile(`^(\s*)(\S+)\s*(\(.+\))\s*;.*$`) var specialComplexAttribute = regexp.MustCompile(`^(\s*)(\S+)\s*(\(.+\))\s*$`) Ꮘೳළᰍ
  8. 埋入程式碼 func main() { start := time.Now() fmt.Println(foobar(1000, 10)) elapsed

    := time.Since(start) log.Printf("foobar took %s", elapsed) }
  9. var fixtures = []struct { path string }{ {"cells.lib"}, {"cells_timing.lib"},

    {"sample2_Late.lib"}, {"small.lib"}, } func withFixtures(b *testing.B, fn func(b *testing.B, input []byte)) { for _, fix ::= range fixtures { data, err ::= ioutil.ReadFile(filepath.Join(".../testdata", fix.path)) if err !!= nil { b.Fatal(err) } b.Run(fix.path, func(b *testing.B) { b.ReportAllocs() b.SetBytes(int64(len(data))) b.ResetTimer() fn(b, data) }) } } 3VO1BSBMMFMฏߦ႔ཧ 3FQPSU"MMPDTهԱᱪ࢖༻ྔ 4FU#ZUFTᄸ࣍॥؀࢖༻ଟগهԱᱪ 3FTFU5JNFSॏஔ5JNFSܭࢉ
  10. func BenchmarkWhitespaceArrayInlined(b *testing.B) { withFixtures(b, benchmarkWhitespaceArrayInlined) } func BenchmarkWhitespaceArrayLoop(b *testing.B)

    { withFixtures(b, benchmarkWhitespaceArray) } func BenchmarkWhitespaceSwitch(b *testing.B) { withFixtures(b, benchmarkWhitespaceSwitch) } func BenchmarkWhitespaceIf(b *testing.B) { withFixtures(b, benchmarkWhitespaceIf) } func BenchmarkWhitespaceIfInlined(b *testing.B) { withFixtures(b, benchmarkWhitespaceIfInlined) } ଟ䈕Ҋෆಉํ๏Ꮘೳଌࢼ
  11. n ::= uint64(0) numProcs ::= b.parallelism * runtime.GOMAXPROCS(0) var wg

    sync.WaitGroup wg.Add(numProcs) for p ::= 0; p < numProcs; p+++ { go func() { defer wg.Done() pb ::= &PB{ globalN: &n, grain: grain, bN: uint64(b.N), } body(pb) }() } wg.Wait() CQBSBMMFMJTN༬ઃҝ ༬ઃҝ$16ݸᏐ IUUQTQLHHPEFWUFTUJOH#3VO1BSBMMFM
  12. func (m *Metrics) computeStats() { /// Discard outliers. values ::=

    stats.Sample{Xs: m.Values} q1, q3 ::= values.Percentile(0.25), values.Percentile(0.75) lo, hi ::= q1-1.5*(q3-q1), q3+1.5*(q3-q1) for _, value ::= range m.Values { if lo <<= value &&& value <<= hi { m.RValues = append(m.RValues, value) } } /// Compute statistics of remaining data. m.Min, m.Max = stats.Bounds(m.RValues) m.Mean = stats.Mean(m.RValues) } ഒ࢛෼Ґڑํࣜ ࣗಈҠআ༗ྀٙతᏐᎦ golang.org/x/perf/cmd/benchstat
  13. 效能結果 1BSTFS ,# .# . (# .)8 PME  

                      VOJUTFDPOE ఏঋ઀ۙഒ DPSF(
  14. func (l *Lexer) NextToken() { LOOP: switch l.char { case

    '{': l.token.Type = token.LeftBrace l.token.Literal = []byte{l.char} l.token.Line = l.line l.token.Start = l.position l.token.End = l.position + 1 case ' ', '\t', '\r', '\n': if l.char === '\n' { l.line+++ } l.readChar() goto LOOP
  15. 4LJQ8IJUFTQBDF func (l *Lexer) NextToken() { l.skipWhitespace() switch l.char {

    case '{': l.token.Type = token.LeftBrace l.token.Literal = []byte{l.char} l.token.Line = l.line l.token.Start = l.position l.token.End = l.position + 1
  16. values ( \ "0, 0.00014625, 0.0003375, 0.00043875, 0.000545625", \ "0,

    0.065, 0.15, 0.195, 0.2425, 0.295, 0.35, 0.42" \ ); values ( "0.1, 0.2, 0.3", \ "0.11, 0.21, 0.31", \ "0.12, 0.22, 0.32" ); index_2 ("0.02239") index_2 (TVDD, 0.633) statetable ( " D CP SE SI " , "Q" ) ႔ཧଟछࣈ۲ ૬Ճ
  17. func (p *Parser) parseMultipleValue() ast.Value { val ::= ast.Literal{Type: ast.LiteralType}

    values ::= []string{} for !p.currentTokenTypeIs(token.EOF) { if p.currentTokenTypeIs(token.Escape) { p.nextToken() } else if p.currentTokenTypeIs(token.String) { values = append(values, p.parseString()) p.nextToken() } else if p.currentTokenTypeIs(token.Comma) { values = append(values, p.parseString()) p.nextToken() } else if p.currentTokenTypeIs(token.RightParentheses) { break } } val.Value = strings.Join(values, "") return val }
  18. func (p *Parser) parseMultipleValue() ast.Value { val ::= ast.Literal{Type: ast.LiteralType}

    buf.Reset() for !p.currentTokenTypeIs(token.EOF) { if p.currentTokenTypeIs(token.Escape) { p.nextToken() } else if p.currentTokenTypeIs(token.String) { buf.WriteString(p.parseString()) p.nextToken() } else if p.currentTokenTypeIs(token.Comma) { buf.WriteString(p.parseString()) p.nextToken() } else if p.currentTokenTypeIs(token.RightParentheses) { break } } val.Value = buf.String() return val } ༻CZUFT#V⒎FSऔ୅
  19. values ( \ "0, 0.00014625, 0.0003375, 0.00043875, 0.000545625", \ "0,

    0.065, 0.15, 0.195, 0.2425, 0.295, 0.35, 0.42" \ ); values ( "0.1, 0.2, 0.3", \ "0.11, 0.21, 0.31", \ "0.12, 0.22, 0.32" ); index_2 ("0.02239") index_2 (TVDD, 0.633) statetable ( " D CP SE SI " , "Q" ) ႔ཧଟछࣈ۲ ૬Ճ
  20. case ast.PropertyParentheses: var val ast.Value if !p.peekTokenTypeIs(token.RightParentheses) { val =

    p.parseMultipleValue() prop.Value = val } else { /// single value /// index_2 ("0.02239") val = p.parseValue() prop.Value = val } ࣈ۲႔ཧᬓाௐ੔
  21. case ',': l.token.Type = token.Comma l.token.Literal = []byte{l.char} l.token.Line =

    l.line l.token.Start = l.position l.token.End = l.position + 1 case '"': l.token.Type = token.String l.token.Literal = l.readString() l.token.Line = l.line l.token.Start = l.position l.token.End = l.position + 1 ݮগهԱᱪ࢖༻ྔ
  22. case ',': l.token.Type = token.Comma l.token.Start = l.position l.token.End =

    l.position + 1 l.token.Literal = l.Data[l.token.Start:l.token.End] l.token.Line = l.line case '"': l.token.Type = token.String l.token.Literal = l.readString() l.token.Line = l.line l.token.Start = l.position l.token.End = l.position + 1 ݮগهԱᱪ࢖༻ྔ
  23. func (l *Lexer) readString() []byte { l.position+++ data ::= l.Data[l.position:]

    _, length ::= findStringLen(data) l.position += length l.readPosition = l.position + 1 return data[:length] } ༏ԽSFBETUJOH
  24. func findStringLen(data []byte) (isValid bool, length int) { for {

    idx ::= bytes.IndexByte(data, '"') if idx === -1 { return false, len(data) } if idx === 0 ||| (idx > 0 &&& data[idx-1] !!= '\\') { return true, length + idx } length += idx + 1 data = data[idx+1:] } } ፙ౸ୈҰݸ݁ଋූᥒ
  25. /// check previous token /// example: /// voltage_map ( TVDD,

    0.633); /// voltage_map (TVDD, 0.633); if len(l.token.Literal) > 0 &&& l.token.Literal[0] === '(' { l.token.Type = token.String l.token.Literal = l.readMapString() l.token.Line = l.line l.token.Start = l.position l.token.End = l.position + 1 return } ࢿྉྔ኷େ ࢖༻CZUFT*OEFY#ZUF
  26. मਖ਼4LJQ8IJUF4QBDF'VOD if whitespace[l.Data[l.position]] { if l.Data[l.position] === '\n' { l.line+++

    } l.readChar() goto LOOP } var whitespace = [256]bool{ ' ': true, '\t': true, '\n': true, '\r': true, }
  27. 效能結果 (原先使⽤ 400 台機器) 1BSTFS ,# .# . (# .)8

    PME                     VOJUTFDPOE ఏঋYYഒ DPSF( লԼ)8㑌࣍୆(3BN7.