Channeling Failure

67f4a8f2a209a38d7242829947b26ba3?s=47 mattheath
February 03, 2015

Channeling Failure

Go’s concurrency primitives make it easy to build highly concurrent systems, however any distributed system operating at scale will experience failures. This is especially important in microservice architectures where requests may traverse many systems initiating remote calls, and failures of a single component may cascade through several more.

In this talk we look at a number of common patterns from simple usage of channels to control and throttle concurrency, to more complex patterns such as the CircuitBreaker which can be used to prevent cascading failures; increasing the reliability of our systems.

Images:
ATM - Thomas Hawk - https://www.flickr.com/photos/thomashawk/5650906605/
Network - Norlando Pobre - https://www.flickr.com/photos/npobre/8437956869/

67f4a8f2a209a38d7242829947b26ba3?s=128

mattheath

February 03, 2015
Tweet

Transcript

  1. CHANNELING FAILURE @mattheath

  2. S TA R L I N G

  3. None
  4. None
  5. None
  6. None
  7. if err != nil { // save me }

  8. err := pleaseWork() if err != nil { // save

    me }
  9. if err := pleaseWork(); err != nil { // save

    me }
  10. if err := pleaseWork(); err != nil { // pass

    this on? return err }
  11. if err := pleaseWork(); err != nil { // maybe

    this? return nil, err }
  12. if err := pleaseWork(); err != nil { // or

    this? return "", err }
  13. return fmt.Errorf("failed to load url %s", url)

  14. return fmt.Errorf("failed to load url %s: %s", url, err)

  15. url := "http://beesbeesbees.com" resp, err := http.Get(url) if err !=

    nil { return fmt.Errorf("failed to load url %s: %s", url, err) }
  16. return fmt.Errorf("failed to load url %s: %s", url, err)

  17. return fmt.Errorf("timeout")

  18. return errors.New("timeout")

  19. var ErrTimeout = errors.New("timeout") var ErrNotFound = errors.New("not found") var

    ErrTooManyCooks = errors.New("spoil the broth") return errors.New("timeout")
  20. var ErrTimeout = errors.New("timeout") var ErrNotFound = errors.New("not found") var

    ErrTooManyCooks = errors.New("spoil the broth") return ErrTimeout
  21. if err := loadBees(); err != nil { // maybe

    this is ok? return err }
  22. if err := loadBees(); err != nil { switch err

    { case ErrTimeout: // retry? case ErrNotFound: // abort! } }
  23. if err := loadBees(); err != nil { switch err

    { case ErrTimeout: // retry? case ErrNotFound: return err } }
  24. if err := loadBees(); err != nil { switch err

    { case ErrTimeout: // retry? case ErrNotFound: return err default: return err } }
  25. if err := loadBees(); err != nil { switch err

    { case ErrTimeout: // retry? default: return err } }
  26. for { if err := loadBees(); err != nil {

    switch err { case ErrTimeout: continue // retry...? default: return err } } return nil }
  27. for { if err := loadBees(); err != nil {

    switch err { case ErrTimeout: continue // retry...? default: return err } } return nil }
  28. for { if err := loadBees(); err != nil {

    switch err { case ErrTimeout: // retry...? // backoff // increasing timeouts // limits on this default: return err
  29. ErrTimeout?

  30. package httpclient import ( "net" "net/http" "time" ) func TimeoutDialer(cTimeout

    time.Duration, rwTimeout time.Duration) func(netw, addr string)
 (c net.Conn, err error) { return func(netw, addr string) (net.Conn, error) { conn, err := net.DialTimeout(netw, addr, cTimeout) if err != nil { return nil, err } conn.SetDeadline(time.Now().Add(rwTimeout)) return conn, nil } } func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client { return &http.Client{ Transport: &http.Transport{ Dial: TimeoutDialer(connectTimeout, readWriteTimeout), }, } }
  31. package httpclient import ( "net" "net/http" "time" ) func TimeoutDialer(cTimeout

    time.Duration, rwTimeout time.Duration) func(netw, addr string)
 (c net.Conn, err error) { return func(netw, addr string) (net.Conn, error) { conn, err := net.DialTimeout(netw, addr, cTimeout) if err != nil { return nil, err } conn.SetDeadline(time.Now().Add(rwTimeout)) return conn, nil } } func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client { return &http.Client{ Transport: &http.Transport{ Dial: TimeoutDialer(connectTimeout, readWriteTimeout), }, } }
  32. package httpclient import ( "net" "net/http" "time" ) func TimeoutDialer(cTimeout

    time.Duration, rwTimeout time.Duration) func(netw, addr string)
 (c net.Conn, err error) { return func(netw, addr string) (net.Conn, error) { conn, err := net.DialTimeout(netw, addr, cTimeout) if err != nil { return nil, err } conn.SetDeadline(time.Now().Add(rwTimeout)) return conn, nil } } func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client { return &http.Client{ Transport: &http.Transport{ Dial: TimeoutDialer(connectTimeout, readWriteTimeout), }, } }
  33. package httpclient import ( "net" "net/http" "time" ) func TimeoutDialer(cTimeout

    time.Duration, rwTimeout time.Duration) func(netw, addr string)
 (c net.Conn, err error) { return func(netw, addr string) (net.Conn, error) { conn, err := net.DialTimeout(netw, addr, cTimeout) if err != nil { return nil, err } conn.SetDeadline(time.Now().Add(rwTimeout)) return conn, nil } } func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client { return &http.Client{ Transport: &http.Transport{ Dial: TimeoutDialer(connectTimeout, readWriteTimeout), }, } }
  34. package httpclient import ( "net" "net/http" "time" ) func TimeoutDialer(cTimeout

    time.Duration, rwTimeout time.Duration) func(netw, addr string)
 (c net.Conn, err error) { return func(netw, addr string) (net.Conn, error) { conn, err := net.DialTimeout(netw, addr, cTimeout) if err != nil { return nil, err } conn.SetDeadline(time.Now().Add(rwTimeout)) return conn, nil } } func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client { return &http.Client{ Transport: &http.Transport{ Dial: TimeoutDialer(connectTimeout, readWriteTimeout), }, } }
  35. ErrTimeout?

  36. func loadBees() []bee {} // Blocking bees := loadBees()

  37. func loadBees(chan []bee) {} beeChan := make(chan []bee) go loadBees(beeChan)

    // Still blocking bees := <-beeChan
  38. func loadBees(chan []bee) {} beeChan := make(chan []bee) go loadBees(beeChan)

    // Blocks until timeout select { case bees := <-beeChan: case <-time.After(1 * time.Second): }
  39. func loadBees(chan []bee) {} beeChan := make(chan []bee) go loadBees(beeChan)

    // Blocks until timeout select { case bees := <-beeChan: case <-time.After(1 * time.Second): return ErrTimeout }
  40. func loadBees(chan []bee) {} beeChan := make(chan []bee) go loadBees(beeChan)

    // Blocks until timeout select { case bees := <-beeChan: case <-time.After(1 * time.Second): return ErrTimeout }
  41. func loadBees(chan []bee) {} beeChan := make(chan []bee) go loadBees(beeChan)

    // Blocks until timeout select { case bees := <-beeChan: // errors? case <-time.After(1 * time.Second): return ErrTimeout }
  42. func loadBees(chan []bee) {} beeChan := make(chan []bee) go loadBees(beeChan)

    // On timeout, writer is blocked select { case bees := <-beeChan: case <-time.After(1 * time.Second): return ErrTimeout }
  43. func loadBees(chan []bee) {} beeChan := make(chan []bee, 1) go

    loadBees(beeChan) // On timeout, writer not blocked select { case bees := <-beeChan: case <-time.After(1 * time.Second): return ErrTimeout }
  44. connect()

  45. if err := connect(); err != nil { // retry

    }
  46. for { if err := connect(); err != nil {

    // retry } }
  47. for { if err := connect(); err != nil {

    // retry, with backoff } }
  48. for { if err := connect(); err != nil {

    // retry, with backoff } err := <-Conn.NotifyClose() }
  49. for { if err := connect(); err != nil {

    // retry, with backoff } select { case err := <-Conn.NotifyClose(): } }
  50. for { if err := connect(); err != nil {

    // retry, with backoff } select { case err := <-Conn.NotifyClose(): case <-configChange: // disconnect } }
  51. for { if err := connect(); err != nil {

    // retry, with backoff } select { case err := <-Conn.NotifyClose(): case <-quit: return } }
  52. quit := make(chan struct{}) close(quit) select { case err :=

    <-Conn.NotifyClose(): case <-quit: return // channel closed }
  53. select { case <-configChange: // teardown case <-quit: return }

  54. select { case <-lockExpired: // oh crap, teardown! case <-configChange:

    // teardown case <-quit: return }
  55. select { case <-lockExpired: // oh crap, teardown! case <-configChange:

    // teardown case <-tomb.Dying(): return }
  56. notify := make(chan bool) ... // Blocking write into a

    channel notify <- true
  57. notify := make(chan struct{}) ... // Blocking write into a

    channel notify <- struct{}{}
  58. notify := make(chan struct{}, 1) ... // Non-blocking write? notify

    <- struct{}{}
  59. notify := make(chan struct{}) ... // Non-blocking write select {

    case notify <- struct{}{}: default: }
  60. var observers []chan struct{} ... // Non-blocking write select {

    case notify <- struct{}{}: default: }
  61. var observers []chan struct{} ... // Non-blocking writes for o

    := range observers { select { case o <- struct{}{}: default: } }
  62. var observers []chan struct{} ... // Notify all observers, if

    possible for o := range observers { select { case o <- struct{}{}: default: } }
  63. var traces chan []byte func init() { // Use a

    buffered channel traces = make(chan []byte, 200) // Fire off a background worker defaultClient = NewClient(traces) go defaultClient.Publisher() } // Send, drops trace if the backend is at capacity func Send(trace []byte) { select { case traces <- trace: // Success default: // Default case fired if channel is full // Ensures this is non blocking } }
  64. var traces chan []byte func init() { // Use a

    buffered channel traces = make(chan []byte, 200) // Fire off a background worker defaultClient = NewClient(traces) go defaultClient.Publisher() } // Send, drops trace if the backend is at capacity func Send(trace []byte) { select { case traces <- trace: // Success default: // Default case fired if channel is full // Ensures this is non blocking } }
  65. var traces chan []byte func init() { // Use a

    buffered channel traces = make(chan []byte, 200) // Fire off a background worker defaultClient = NewClient(traces) go defaultClient.Publisher() } // Send, drops trace if the backend is at capacity func Send(trace []byte) { select { case traces <- trace: // Success default: // Default case fired if channel is full // Ensures this is non blocking } }
  66. deliveries, err := rabbit.Connect() for d := range deliveries {

    go handleDelivery(d) } func handleDelivery(d *rabbit.Delivery) { // execute request, send response }
  67. func handleDelivery(d *rabbit.Delivery) { // what if we’re heavily loaded?

    }
  68. var tokens chan struct{} tokens = make(chan struct{}, 1000) //

    push 1000 structs into channel func handleDelivery(d *rabbit.Delivery) { // what if we’re heavily loaded? }
  69. var tokens chan struct{} tokens = make(chan struct{}, 1000) //

    push 1000 structs into channel func handleDelivery(d *rabbit.Delivery) { select { case <-tokens: // block executeRequest(d) tokens <- struct{}{} // replace } }
  70. var tokens chan struct{} tokens = make(chan struct{}, 1000) //

    push 1000 structs into channel func handleDelivery(d *rabbit.Delivery) { select { case <-tokens: executeRequest(d) tokens <- struct{}{} // replace default: return ErrBackendAtCapacity }
  71. var tokens chan struct{} tokens = make(chan struct{}, 1000) //

    push 1000 structs into channel func handleDelivery(d *rabbit.Delivery) { select { case <-tokens: executeRequest(d) tokens <- struct{}{} // replace case <-time.After(100 * time.Millisecond) return ErrBackendAtCapacity }
  72. func handleDelivery(d *rabbit.Delivery) { // server is still receiving requests

    }
  73. None
  74. Failures Timeouts Error Rates

  75. client := circuit.NewHTTPClient( time.Second * 5, // timeout 10, //

    threshold nil // http.Client ) resp, err := client.Get("http://beesbeesbees.com/")
  76. None