Upgrade to Pro — share decks privately, control downloads, hide ads and more …

用 Go 語言打造多台機器 Scale 架構

Bo-Yi Wu
September 08, 2020

用 Go 語言打造多台機器 Scale 架構

由於公司內部有分多個網路環境架構,各自有不同的限制,以及背後都有各自的運算伺服器資源,那該如何用設計同一份 Go 語言架構來進行部署,讓使用者可以將檔案上傳,並自動部署到後端任意運算伺服器處理,最後將結果傳回前端給使用者。

Bo-Yi Wu

September 08, 2020
Tweet

More Decks by Bo-Yi Wu

Other Decks in Technology

Transcript

  1. About me • Software Engineer in Mediatek • Member of

    Drone CI/CD Platform • Member of Gitea Platform • Member of Gin Golang Framework • Maintain Some GitHub Actions Plugins. • Teacher of Udemy Platform: Golang + Drone
  2. 專案需求 • 客⼾戶單機版 (Docker 版本) • 內建簡易易的 Queue 機制 •

    公司內部架構 (軟體 + 硬體) • 多台 Queue 機制 + 硬體模擬 每個 Job 吃 2core 8GB 記憶體
  3. Service 部分元件 • Database: SQLite (不需要 MySQL, Postgres) • Cache:

    Memory (不需要 Redis) • Queue: ⾃自⾏行行開發
  4. found := make(chan int) limitCh := make(chan struct{}, concurrencyProcesses) for

    i := 0; i < jobCount; i++ { limitCh <- struct{}{} go func(val int) { defer func() { wg.Done() <-limitCh }() found <- val }(i) } jobCount = 100 concurrencyProcesses = 10
  5. found := make(chan int) limitCh := make(chan struct{}, concurrencyProcesses) for

    i := 0; i < jobCount; i++ { limitCh <- struct{}{} go func(val int) { defer func() { wg.Done() <-limitCh }() found <- val }(i) } jobCount = 100 concurrencyProcesses = 10
  6. found := make(chan int) limitCh := make(chan struct{}, concurrencyProcesses) for

    i := 0; i < jobCount; i++ { go func() { limitCh <- struct{}{} }() go func(val int) { defer func() { <-limitCh wg.Done() }() found <- val }(i) } jobCount = 100 concurrencyProcesses = 10
  7. found := make(chan int) limitCh := make(chan struct{}, concurrencyProcesses) for

    i := 0; i < jobCount; i++ { go func() { limitCh <- struct{}{} }() go func(val int) { defer func() { <-limitCh wg.Done() }() found <- val }(i) } 無法解決 Limit Concurrency jobCount = 100 concurrencyProcesses = 10
  8. found := make(chan int) queue := make(chan int) go func(queue

    chan<- int) { for i := 0; i < jobCount; i++ { queue <- i } close(queue) }(queue) for i := 0; i < concurrencyProcesses; i++ { go func(queue <-chan int, found chan<- int) { for val := range queue { defer wg.Done() found <- val } }(queue, found) } jobCount = 100 concurrencyProcesses = 10
  9. type Consumer struct { inputChan chan int jobsChan chan int

    } const PoolSize = 200 func main() { // create the consumer consumer := Consumer{ inputChan: make(chan int, 1), jobsChan: make(chan int, PoolSize), } }
  10. func (c *Consumer) queue(input int) { fmt.Println("send input value:", input)

    c.jobsChan <- input } func (c *Consumer) worker(num int) { for job := range c.jobsChan { fmt.Println("worker:", num, " job value:", job) } } for i := 0; i < WorkerSize; i++ { go consumer.worker(i) }
  11. rewrite queue func func (c *Consumer) queue(input int) bool {

    fmt.Println("send input value:", input) select { case c.jobsChan <- input: return true default: return false } } 避免使⽤用者⼤大量量送資料進來來
  12. func WithContextFunc(ctx context.Context, f func()) context.Context { ctx, cancel :=

    context.WithCancel(ctx) go func() { c := make(chan os.Signal) signal.Notify(c, syscall.SIGINT, syscall.SIGTERM) defer signal.Stop(c) select { case <-ctx.Done(): case <-c: f() cancel() } }() return ctx }
  13. func (c Consumer) startConsumer(ctx context.Context) { for { select {

    case job := <-c.inputChan: if ctx.Err() != nil { close(c.jobsChan) return } c.jobsChan <- job case <-ctx.Done(): close(c.jobsChan) return } } } select 不保證讀取 Channel 的順序性
  14. Cancel by ctx.Done() event func (c *Consumer) worker(num int) {

    for job := range c.jobsChan { fmt.Println("worker:", num, " job value:", job) } } Channel 關閉後,還是可以讀取資料到結束
  15. wg := &sync.WaitGroup{} wg.Add(WorkerSize) // Start [PoolSize] workers for i

    := 0; i < WorkerSize; i++ { go consumer.worker(i) }
  16. func (c Consumer) worker(wg *sync.WaitGroup) { defer wg.Done() for job

    := range c.jobsChan { // handle the job event } }
  17. func WithContextFunc(ctx context.Context, f func()) context.Context { ctx, cancel :=

    context.WithCancel(ctx) go func() { c := make(chan os.Signal) signal.Notify(c, syscall.SIGINT, syscall.SIGTERM) defer signal.Stop(c) select { case <-ctx.Done(): case <-c: cancel() f() } }() return ctx } Add WaitGroup after Cancel Function
  18. End of Program select { case <-finished: case err :=

    <-errChannel: if err != nil { return err } }
  19. r := e.Group("/rpc") r.Use(rpc.Check()) { r.POST("/v1/healthz", web.RPCHeartbeat) r.POST("/v1/request", web.RPCRquest) r.POST("/v1/accept",

    web.RPCAccept) r.POST("/v1/details", web.RPCDetails) r.POST("/v1/updateStatus", web.RPCUpdateStatus) r.POST("/v1/upload", web.RPCUploadBytes) r.POST("/v1/reset", web.RPCResetStatus) } Check RPC Secret
  20. /rpc/v1/accept Update jobs set version = (oldVersion + 1) where

    machine = "fooBar" and version = oldVersion
  21. if r.Capacity != 0 { var g errgroup.Group for i

    := 0; i < r.Capacity; i++ { g.Go(func() error { return r.start(ctx, 0) }) time.Sleep(1 * time.Second) } return g.Wait() } 單機版設定多個 Worker
  22. for { var ( id int64 err error ) if

    id, err = r.request(ctx); err != nil { time.Sleep(1 * time.Second) continue } go func() { if err := r.start(ctx, id); err != nil { log.Error().Err(err).Msg("runner: cannot start the job") } }() } 公司內部 + Submit Job
  23. Break for and select loop func (r *Runner) start(ctx context.Context,

    id int64) error { LOOP: for { select { case <-ctx.Done(): return ctx.Err() default: r.poll(ctx, id) if r.Capacity == 0 { break LOOP } } time.Sleep(1 * time.Second) } return nil }
  24. Context with Cancel or Timeout ctx, cancel := context.WithCancel(context.Background()) defer

    cancel() timeout, cancel := context.WithTimeout(ctx, 60*time.Minute) defer cancel() Job03 context
  25. Context with Cancel or Timeout ctx, cancel := context.WithCancel(context.Background()) defer

    cancel() timeout, cancel := context.WithTimeout(ctx, 60*time.Minute) defer cancel() Job03 context Job05 context
  26. Watch the Cancel event (Agent) go func() { done, _

    := r.Manager.Watch(ctx, id) if done { cancel() } }()
  27. User cancel running job c.Lock() c.cancelled[id] = time.Now().Add(time.Minute * 5)

    for subscriber, build := range c.subscribers { if id == build { close(subscriber) } } c.Unlock()
  28. Agent subscribe the cancel event for { select { case

    <-ctx.Done(): return false, ctx.Err() case <-time.After(time.Minute): c.Lock() _, ok := c.cancelled[id] c.Unlock() if ok { return true, nil } case <-subscriber: return true, nil } }