$30 off During Our Annual Pro Sale. View Details »

Channeling Failure

mattheath
February 03, 2015

Channeling Failure

Go’s concurrency primitives make it easy to build highly concurrent systems, however any distributed system operating at scale will experience failures. This is especially important in microservice architectures where requests may traverse many systems initiating remote calls, and failures of a single component may cascade through several more.

In this talk we look at a number of common patterns from simple usage of channels to control and throttle concurrency, to more complex patterns such as the CircuitBreaker which can be used to prevent cascading failures; increasing the reliability of our systems.

Images:
ATM - Thomas Hawk - https://www.flickr.com/photos/thomashawk/5650906605/
Network - Norlando Pobre - https://www.flickr.com/photos/npobre/8437956869/

mattheath

February 03, 2015
Tweet

More Decks by mattheath

Other Decks in Programming

Transcript

  1. CHANNELING FAILURE
    @mattheath

    View Slide

  2. S TA R L I N G

    View Slide

  3. View Slide

  4. View Slide

  5. View Slide

  6. View Slide

  7. if err != nil {
    // save me
    }

    View Slide

  8. err := pleaseWork()
    if err != nil {
    // save me
    }

    View Slide

  9. if err := pleaseWork(); err != nil {
    // save me
    }

    View Slide

  10. if err := pleaseWork(); err != nil {
    // pass this on?
    return err
    }

    View Slide

  11. if err := pleaseWork(); err != nil {
    // maybe this?
    return nil, err
    }

    View Slide

  12. if err := pleaseWork(); err != nil {
    // or this?
    return "", err
    }

    View Slide

  13. return fmt.Errorf("failed to
    load url %s", url)

    View Slide

  14. return fmt.Errorf("failed to
    load url %s: %s", url, err)

    View Slide

  15. url := "http://beesbeesbees.com"
    resp, err := http.Get(url)
    if err != nil {
    return fmt.Errorf("failed to
    load url %s: %s", url, err)
    }

    View Slide

  16. return fmt.Errorf("failed to
    load url %s: %s", url, err)

    View Slide

  17. return fmt.Errorf("timeout")

    View Slide

  18. return errors.New("timeout")

    View Slide

  19. var ErrTimeout = errors.New("timeout")
    var ErrNotFound = errors.New("not found")
    var ErrTooManyCooks = errors.New("spoil the broth")
    return errors.New("timeout")

    View Slide

  20. var ErrTimeout = errors.New("timeout")
    var ErrNotFound = errors.New("not found")
    var ErrTooManyCooks = errors.New("spoil the broth")
    return ErrTimeout

    View Slide

  21. if err := loadBees(); err != nil {
    // maybe this is ok?
    return err
    }

    View Slide

  22. if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    // retry?
    case ErrNotFound:
    // abort!
    }
    }

    View Slide

  23. if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    // retry?
    case ErrNotFound:
    return err
    }
    }

    View Slide

  24. if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    // retry?
    case ErrNotFound:
    return err
    default:
    return err
    }
    }

    View Slide

  25. if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    // retry?
    default:
    return err
    }
    }

    View Slide

  26. for {
    if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    continue // retry...?
    default:
    return err
    }
    }
    return nil
    }

    View Slide

  27. for {
    if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    continue // retry...?
    default:
    return err
    }
    }
    return nil
    }

    View Slide

  28. for {
    if err := loadBees(); err != nil {
    switch err {
    case ErrTimeout:
    // retry...?
    // backoff
    // increasing timeouts
    // limits on this
    default:
    return err

    View Slide

  29. ErrTimeout?

    View Slide

  30. package httpclient
    import (
    "net"
    "net/http"
    "time"
    )
    func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(netw, addr string)

    (c net.Conn, err error) {
    return func(netw, addr string) (net.Conn, error) {
    conn, err := net.DialTimeout(netw, addr, cTimeout)
    if err != nil {
    return nil, err
    }
    conn.SetDeadline(time.Now().Add(rwTimeout))
    return conn, nil
    }
    }
    func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client {
    return &http.Client{
    Transport: &http.Transport{
    Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
    },
    }
    }

    View Slide

  31. package httpclient
    import (
    "net"
    "net/http"
    "time"
    )
    func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(netw, addr string)

    (c net.Conn, err error) {
    return func(netw, addr string) (net.Conn, error) {
    conn, err := net.DialTimeout(netw, addr, cTimeout)
    if err != nil {
    return nil, err
    }
    conn.SetDeadline(time.Now().Add(rwTimeout))
    return conn, nil
    }
    }
    func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client {
    return &http.Client{
    Transport: &http.Transport{
    Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
    },
    }
    }

    View Slide

  32. package httpclient
    import (
    "net"
    "net/http"
    "time"
    )
    func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(netw, addr string)

    (c net.Conn, err error) {
    return func(netw, addr string) (net.Conn, error) {
    conn, err := net.DialTimeout(netw, addr, cTimeout)
    if err != nil {
    return nil, err
    }
    conn.SetDeadline(time.Now().Add(rwTimeout))
    return conn, nil
    }
    }
    func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client {
    return &http.Client{
    Transport: &http.Transport{
    Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
    },
    }
    }

    View Slide

  33. package httpclient
    import (
    "net"
    "net/http"
    "time"
    )
    func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(netw, addr string)

    (c net.Conn, err error) {
    return func(netw, addr string) (net.Conn, error) {
    conn, err := net.DialTimeout(netw, addr, cTimeout)
    if err != nil {
    return nil, err
    }
    conn.SetDeadline(time.Now().Add(rwTimeout))
    return conn, nil
    }
    }
    func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client {
    return &http.Client{
    Transport: &http.Transport{
    Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
    },
    }
    }

    View Slide

  34. package httpclient
    import (
    "net"
    "net/http"
    "time"
    )
    func TimeoutDialer(cTimeout time.Duration, rwTimeout time.Duration) func(netw, addr string)

    (c net.Conn, err error) {
    return func(netw, addr string) (net.Conn, error) {
    conn, err := net.DialTimeout(netw, addr, cTimeout)
    if err != nil {
    return nil, err
    }
    conn.SetDeadline(time.Now().Add(rwTimeout))
    return conn, nil
    }
    }
    func NewTimeoutClient(connectTimeout time.Duration, readWriteTimeout time.Duration) *http.Client {
    return &http.Client{
    Transport: &http.Transport{
    Dial: TimeoutDialer(connectTimeout, readWriteTimeout),
    },
    }
    }

    View Slide

  35. ErrTimeout?

    View Slide

  36. func loadBees() []bee {}
    // Blocking
    bees := loadBees()

    View Slide

  37. func loadBees(chan []bee) {}
    beeChan := make(chan []bee)
    go loadBees(beeChan)
    // Still blocking
    bees := <-beeChan

    View Slide

  38. func loadBees(chan []bee) {}
    beeChan := make(chan []bee)
    go loadBees(beeChan)
    // Blocks until timeout
    select {
    case bees := <-beeChan:
    case <-time.After(1 * time.Second):
    }

    View Slide

  39. func loadBees(chan []bee) {}
    beeChan := make(chan []bee)
    go loadBees(beeChan)
    // Blocks until timeout
    select {
    case bees := <-beeChan:
    case <-time.After(1 * time.Second):
    return ErrTimeout
    }

    View Slide

  40. func loadBees(chan []bee) {}
    beeChan := make(chan []bee)
    go loadBees(beeChan)
    // Blocks until timeout
    select {
    case bees := <-beeChan:
    case <-time.After(1 * time.Second):
    return ErrTimeout
    }

    View Slide

  41. func loadBees(chan []bee) {}
    beeChan := make(chan []bee)
    go loadBees(beeChan)
    // Blocks until timeout
    select {
    case bees := <-beeChan: // errors?
    case <-time.After(1 * time.Second):
    return ErrTimeout
    }

    View Slide

  42. func loadBees(chan []bee) {}
    beeChan := make(chan []bee)
    go loadBees(beeChan)
    // On timeout, writer is blocked
    select {
    case bees := <-beeChan:
    case <-time.After(1 * time.Second):
    return ErrTimeout
    }

    View Slide

  43. func loadBees(chan []bee) {}
    beeChan := make(chan []bee, 1)
    go loadBees(beeChan)
    // On timeout, writer not blocked
    select {
    case bees := <-beeChan:
    case <-time.After(1 * time.Second):
    return ErrTimeout
    }

    View Slide

  44. connect()

    View Slide

  45. if err := connect(); err != nil {
    // retry
    }

    View Slide

  46. for {
    if err := connect(); err != nil {
    // retry
    }
    }

    View Slide

  47. for {
    if err := connect(); err != nil {
    // retry, with backoff
    }
    }

    View Slide

  48. for {
    if err := connect(); err != nil {
    // retry, with backoff
    }
    err := <-Conn.NotifyClose()
    }

    View Slide

  49. for {
    if err := connect(); err != nil {
    // retry, with backoff
    }
    select {
    case err := <-Conn.NotifyClose():
    }
    }

    View Slide

  50. for {
    if err := connect(); err != nil {
    // retry, with backoff
    }
    select {
    case err := <-Conn.NotifyClose():
    case <-configChange:
    // disconnect
    }
    }

    View Slide

  51. for {
    if err := connect(); err != nil {
    // retry, with backoff
    }
    select {
    case err := <-Conn.NotifyClose():
    case <-quit:
    return
    }
    }

    View Slide

  52. quit := make(chan struct{})
    close(quit)
    select {
    case err := <-Conn.NotifyClose():
    case <-quit:
    return // channel closed
    }

    View Slide

  53. select {
    case <-configChange:
    // teardown
    case <-quit:
    return
    }

    View Slide

  54. select {
    case <-lockExpired:
    // oh crap, teardown!
    case <-configChange:
    // teardown
    case <-quit:
    return
    }

    View Slide

  55. select {
    case <-lockExpired:
    // oh crap, teardown!
    case <-configChange:
    // teardown
    case <-tomb.Dying():
    return
    }

    View Slide

  56. notify := make(chan bool)
    ...
    // Blocking write into a channel
    notify <- true

    View Slide

  57. notify := make(chan struct{})
    ...
    // Blocking write into a channel
    notify <- struct{}{}

    View Slide

  58. notify := make(chan struct{}, 1)
    ...
    // Non-blocking write?
    notify <- struct{}{}

    View Slide

  59. notify := make(chan struct{})
    ...
    // Non-blocking write
    select {
    case notify <- struct{}{}:
    default:
    }

    View Slide

  60. var observers []chan struct{}
    ...
    // Non-blocking write
    select {
    case notify <- struct{}{}:
    default:
    }

    View Slide

  61. var observers []chan struct{}
    ...
    // Non-blocking writes
    for o := range observers {
    select {
    case o <- struct{}{}:
    default:
    }
    }

    View Slide

  62. var observers []chan struct{}
    ...
    // Notify all observers, if possible
    for o := range observers {
    select {
    case o <- struct{}{}:
    default:
    }
    }

    View Slide

  63. var traces chan []byte
    func init() {
    // Use a buffered channel
    traces = make(chan []byte, 200)
    // Fire off a background worker
    defaultClient = NewClient(traces)
    go defaultClient.Publisher()
    }
    // Send, drops trace if the backend is at capacity
    func Send(trace []byte) {
    select {
    case traces <- trace:
    // Success
    default:
    // Default case fired if channel is full
    // Ensures this is non blocking
    }
    }

    View Slide

  64. var traces chan []byte
    func init() {
    // Use a buffered channel
    traces = make(chan []byte, 200)
    // Fire off a background worker
    defaultClient = NewClient(traces)
    go defaultClient.Publisher()
    }
    // Send, drops trace if the backend is at capacity
    func Send(trace []byte) {
    select {
    case traces <- trace:
    // Success
    default:
    // Default case fired if channel is full
    // Ensures this is non blocking
    }
    }

    View Slide

  65. var traces chan []byte
    func init() {
    // Use a buffered channel
    traces = make(chan []byte, 200)
    // Fire off a background worker
    defaultClient = NewClient(traces)
    go defaultClient.Publisher()
    }
    // Send, drops trace if the backend is at capacity
    func Send(trace []byte) {
    select {
    case traces <- trace:
    // Success
    default:
    // Default case fired if channel is full
    // Ensures this is non blocking
    }
    }

    View Slide

  66. deliveries, err := rabbit.Connect()
    for d := range deliveries {
    go handleDelivery(d)
    }
    func handleDelivery(d *rabbit.Delivery) {
    // execute request, send response
    }

    View Slide

  67. func handleDelivery(d *rabbit.Delivery) {
    // what if we’re heavily loaded?
    }

    View Slide

  68. var tokens chan struct{}
    tokens = make(chan struct{}, 1000)
    // push 1000 structs into channel
    func handleDelivery(d *rabbit.Delivery) {
    // what if we’re heavily loaded?
    }

    View Slide

  69. var tokens chan struct{}
    tokens = make(chan struct{}, 1000)
    // push 1000 structs into channel
    func handleDelivery(d *rabbit.Delivery) {
    select {
    case <-tokens: // block
    executeRequest(d)
    tokens <- struct{}{} // replace
    }
    }

    View Slide

  70. var tokens chan struct{}
    tokens = make(chan struct{}, 1000)
    // push 1000 structs into channel
    func handleDelivery(d *rabbit.Delivery) {
    select {
    case <-tokens:
    executeRequest(d)
    tokens <- struct{}{} // replace
    default:
    return ErrBackendAtCapacity
    }

    View Slide

  71. var tokens chan struct{}
    tokens = make(chan struct{}, 1000)
    // push 1000 structs into channel
    func handleDelivery(d *rabbit.Delivery) {
    select {
    case <-tokens:
    executeRequest(d)
    tokens <- struct{}{} // replace
    case <-time.After(100 * time.Millisecond)
    return ErrBackendAtCapacity
    }

    View Slide

  72. func handleDelivery(d *rabbit.Delivery) {
    // server is still receiving requests
    }

    View Slide

  73. View Slide

  74. Failures
    Timeouts
    Error Rates

    View Slide

  75. client := circuit.NewHTTPClient(
    time.Second * 5, // timeout
    10, // threshold
    nil // http.Client
    )
    resp, err := client.Get("http://beesbeesbees.com/")

    View Slide

  76. View Slide