Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Observability in micro-service architectures

mattheath
October 10, 2014

Observability in micro-service architectures

Lightning talk at dotGo 2014.
Video available here: https://www.youtube.com/watch?v=VoFfhEbcEG0

Introduces how we implement distributed request tracing at Hailo within our platform composed from micro-services built almost entirely in Go.

Using Go's Channels and Selects allows us to forward traces when possible, but not at the expense of serving requests to our end users.

mattheath

October 10, 2014
Tweet

More Decks by mattheath

Other Decks in Programming

Transcript

  1. OBSERVABILITY IN MICROSERVICE
    ARCHITECTURES
    Matt Heath - Technical Lead, Platform
    @mattheath
    dotGo, Oct 2014

    View full-size slide

  2. Go

    Service
    Go

    Service
    Java
    Service
    Load Balancer
    C* C* C*
    API / Routing
    RabbitMQ Message Bus

    (federated clusters per AZ)
    us-east-1 eu-west-1
    Go

    Service
    Go

    Service
    Java
    Service
    Load Balancer
    C* C* C*
    API / Routing
    RabbitMQ Message Bus

    (federated clusters per AZ)

    View full-size slide

  3. hailo~2~api api.v1.customer service.customer
    hailo~2~api api.v1.customer service.customer

    View full-size slide

  4. hailo~2~api api.v1.customer service.customer
    hailo~2~api api.v1.customer service.customer
    REQ
    REP
    REQ
    REP
    IN
    OUT
    IN
    OUT

    View full-size slide

  5. // Req sends a request, and...
    func (c *Client) Req(req *Request, rsp proto.Message,
    options ...Options) errors.Error {
    !
    go c.traceReq(req)
    responseMsg, err := c.doReq(req, options...)
    go c.traceRsp(req, responseMsg, err)
    if err != nil {
    return err
    }
    !
    // Do other things...
    !
    return nil
    }

    View full-size slide

  6. // instrumentedHandler wraps the handler providing instrumentation
    func (ep *Endpoint) instrumentedHandler(req *Request)
    (proto.Message, errors.Error) {
    !
    start := time.Now()
    var err errors.Error
    var msg proto.Message
    !
    // Defer panic handling
    defer func() {
    stats.Record(ep, err, time.Since(start))
    // oh crap i hope this never happens
    }()
    !
    // Execute handler
    go traceIn(req)
    msg, err = ep.Handler(req)
    go traceOut(req, msg, err, time.Since(start))
    return msg, err
    }

    View full-size slide

  7. hailo~2~api api.v1.customer service.customer
    REQ
    RSP
    REQ
    RSP
    IN
    OUT
    IN
    OUT
    NSQ

    View full-size slide

  8. Phosphor
    Host Instances
    Publish
    Service A
    Trace Library
    goroutine
    chan
    UDP
    Service B
    Trace Library
    goroutine
    chan
    UDP
    Trace
    Service
    In-memory
    Aggregates
    Optional

    persistant
    storage
    Dashboards
    Monitoring

    View full-size slide

  9. Phosphor
    Host Instances
    Publish
    Service A
    Trace Library
    goroutine
    chan
    UDP
    Service B
    Trace Library
    goroutine
    chan
    UDP
    Trace
    Service
    In-memory
    Aggregates
    Optional

    persistant
    storage
    Dashboards
    Monitoring

    View full-size slide

  10. var traceChan chan []byte
    !
    func init() {
    // Use a buffered channel
    traceChan = make(chan []byte, 200)
    !
    // Fire off a background worker for this channel
    defaultClient = NewClient(traceChan)
    go defaultClient.publisher()
    }
    !
    // Send, drops trace if the backend is at capacity
    func Send(msg []byte) {
    select {
    case traceChan <- msg:
    // Success
    default:
    // Default case fired if channel is full
    // Ensures non blocking
    }
    }

    View full-size slide

  11. Phosphor
    Trace
    Service
    Host Instances
    Publish
    Service A
    Trace Library
    goroutine
    chan
    UDP
    Service B
    Trace Library
    goroutine
    chan
    UDP
    In-memory
    Aggregates
    Optional

    persistant
    storage
    Dashboards
    Monitoring

    View full-size slide

  12. func (w *worker) loop() {
    var b []byte
    timeout := time.NewTicker(bufferWindow)
    defer timeout.Stop() // Bonus points
    !
    // Spin and forward on traces every time our
    // buffer fills, or when our time window elapses
    for {
    select {
    case b = <-w.ch:
    w.buf = append(w.buf, b)
    if len(w.buf) >= bufferSize {
    w.send()
    }
    case <-timeout.C:
    w.send()
    }
    }
    }

    View full-size slide

  13. Tracing: 33eda743-f124-435c-71fc-3c872bbc98e6
    !
    2014-09-07 02:20:19.867 [/] [START] → -
    2014-09-07 02:20:19.867 [eu-west-1a/ip-10-11-3-51] [REQ] com.hailocab.hailo-2-api → com.hailocab.api.v1.customer.neardrivers -
    2014-09-07 02:20:19.867 [eu-west-1a/ip-10-11-2-203] [IN] com.hailocab.hailo-2-api → com.hailocab.api.v1.customer.neardrivers -
    2014-09-07 02:20:19.868 [eu-west-1a/ip-10-11-2-203] [REQ] com.hailocab.api.v1.customer → com.hailocab.service.feature-flags.features -
    2014-09-07 02:20:19.869 [eu-west-1a/ip-10-11-3-111] [IN] com.hailocab.api.v1.customer → com.hailocab.service.feature-flags.features -
    2014-09-07 02:20:19.876 [eu-west-1a/ip-10-11-3-111] [REQ] com.hailocab.service.feature-flags → com.hailocab.service.hob.list -
    2014-09-07 02:20:19.877 [eu-west-1a/ip-10-11-3-168] [IN] com.hailocab.service.hob → com.hailocab.service.config.compile -
    2014-09-07 02:20:19.877 [eu-west-1a/ip-10-11-3-111] [IN] com.hailocab.service.feature-flags → com.hailocab.service.hob.list -
    2014-09-07 02:20:19.877 [eu-west-1a/ip-10-11-3-111] [REQ] com.hailocab.service.hob → com.hailocab.service.config.compile -
    2014-09-07 02:20:19.883 [eu-west-1a/ip-10-11-3-168] [OUT] com.hailocab.service.hob → com.hailocab.service.config.compile - 5.59 ms
    2014-09-07 02:20:19.886 [eu-west-1a/ip-10-11-3-111] [REP] com.hailocab.service.hob → com.hailocab.service.config.compile - 8.40 ms
    2014-09-07 02:20:19.887 [eu-west-1a/ip-10-11-3-111] [OUT] com.hailocab.service.feature-flags → com.hailocab.service.hob.list - 9.72 ms
    2014-09-07 02:20:19.889 [eu-west-1a/ip-10-11-3-111] [REP] com.hailocab.service.feature-flags → com.hailocab.service.hob.list - 13.23 ms
    2014-09-07 02:20:19.889 [eu-west-1a/ip-10-11-3-111] [OUT] com.hailocab.api.v1.customer → com.hailocab.service.feature-flags.features - 20.58 ms
    2014-09-07 02:20:19.890 [eu-west-1a/ip-10-11-2-203] [REP] com.hailocab.api.v1.customer → com.hailocab.service.feature-flags.features - 22.59 ms
    2014-09-07 02:20:19.902 [eu-west-1a/ip-10-11-2-203] [REQ] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare -
    2014-09-07 02:20:19.903 [eu-west-1a/ip-10-11-2-203] [REQ] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare -
    2014-09-07 02:20:19.903 [eu-west-1a/ip-10-11-2-203] [REQ] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare -
    2014-09-07 02:20:19.904 [eu-west-1a/ip-10-11-3-111] [IN] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare -
    2014-09-07 02:20:19.904 [eu-west-1a/ip-10-11-3-111] [OUT] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare - 0.36 ms
    2014-09-07 02:20:19.905 [eu-west-1a/ip-10-11-2-203] [REP] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare - 1.97 ms
    2014-09-07 02:20:19.905 [eu-west-1a/ip-10-11-2-214] [IN] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare -
    2014-09-07 02:20:19.905 [eu-west-1a/ip-10-11-2-203] [REQ] com.hailocab.api.v1.customer → com.hailocab.service.nearest-driver.search -
    2014-09-07 02:20:19.905 [eu-west-1a/ip-10-11-2-214] [OUT] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare - 0.10 ms

    ERR - com.hailocab.service.fare.basefare: Missing config at xxx
    2014-09-07 02:20:19.906 [eu-west-1a/ip-10-11-2-214] [IN] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare -
    2014-09-07 02:20:19.906 [eu-west-1a/ip-10-11-2-214] [OUT] com.hailocab.api.v1.customer → com.hailocab.service.fare.basefare - 0.06 ms 

    ERR - com.hailocab.service.fare.basefare: Missing config at xxx
    2014-09-07 02:20:19.907 [eu-west-1a/ip-10-11-3-58] [IN] com.hailocab.api.v1.customer → com.hailocab.service.nearest-driver.search -
    2014-09-07 02:20:19.907 [eu-west-1a/ip-10-11-3-58] [REQ] com.hailocab.service.nearest-driver → com.hailocab.service.zoning.search -
    2014-09-07 02:20:19.908 [eu-west-1a/ip-10-11-3-58] [IN] com.hailocab.service.nearest-driver → com.hailocab.service.zoning.search -
    2014-09-07 02:20:19.908 [eu-west-1a/ip-10-11-3-58] [OUT] com.hailocab.service.nearest-driver → com.hailocab.service.zoning.search - 0.20 ms
    2014-09-07 02:20:19.909 [eu-west-1a/ip-10-11-3-58] [REP] com.hailocab.service.nearest-driver → com.hailocab.service.zoning.search - 2.25 ms
    2014-09-07 02:20:19.909 [eu-west-1a/ip-10-11-3-58] [REQ] com.hailocab.service.nearest-driver → com.hailocab.service.raziel.multisearch -
    2014-09-07 02:20:19.912 [eu-west-1a/ip-10-11-3-227] [IN] com.hailocab.service.nearest-driver → com.hailocab.service.raziel.multisearch -
    2014-09-07 02:20:19.919 [eu-west-1a/ip-10-11-3-58] [REP] com.hailocab.service.nearest-driver → com.hailocab.service.raziel.multisearch - 9.46 ms
    2014-09-07 02:20:19.919 [eu-west-1a/ip-10-11-3-58] [REQ] com.hailocab.service.nearest-driver → com.hailocab.service.eta.multitraveltime -
    2014-09-07 02:20:19.919 [eu-west-1a/ip-10-11-3-227] [OUT] com.hailocab.service.nearest-driver → com.hailocab.service.raziel.multisearch - 7.58 ms
    2014-09-07 02:20:19.920 [eu-west-1a/ip-10-11-3-58] [IN] com.hailocab.service.nearest-driver → com.hailocab.service.eta.multitraveltime -
    2014-09-07 02:20:19.920 [eu-west-1a/ip-10-11-3-58] [OUT] com.hailocab.service.nearest-driver → com.hailocab.service.eta.multitraveltime - 0.06 ms
    2014-09-07 02:20:19.921 [eu-west-1a/ip-10-11-3-58] [REP] com.hailocab.service.nearest-driver → com.hailocab.service.eta.multitraveltime - 1.77 ms
    2014-09-07 02:20:19.921 [eu-west-1a/ip-10-11-3-58] [OUT] com.hailocab.api.v1.customer → com.hailocab.service.nearest-driver.search - 14.02 ms
    2014-09-07 02:20:19.921 [eu-west-1a/ip-10-11-2-203] [REP] com.hailocab.api.v1.customer → com.hailocab.service.nearest-driver.search - 15.48 ms
    2014-09-07 02:20:19.941 [eu-west-1a/ip-10-11-2-203] [REQ] com.hailocab.api.v1.customer → com.hailocab.service.experiment.readlastupdated -
    2014-09-07 02:20:19.945 [eu-west-1a/ip-10-11-2-214] [IN] com.hailocab.api.v1.customer → com.hailocab.service.experiment.readlastupdated -
    2014-09-07 02:20:19.947 [eu-west-1a/ip-10-11-2-214] [OUT] com.hailocab.api.v1.customer → com.hailocab.service.experiment.readlastupdated - 1.82 ms
    2014-09-07 02:20:19.947 [eu-west-1a/ip-10-11-2-203] [REP] com.hailocab.api.v1.customer → com.hailocab.service.experiment.readlastupdated - 6.01 ms
    2014-09-07 02:20:19.948 [eu-west-1a/ip-10-11-2-203] [OUT] com.hailocab.hailo-2-api → com.hailocab.api.v1.customer.neardrivers - 80.46 ms
    2014-09-07 02:20:19.950 [eu-west-1a/ip-10-11-3-51] [REP] com.hailocab.hailo-2-api → com.hailocab.api.v1.customer.neardrivers - 82.71 ms

    View full-size slide

  14. THANKS
    PS. We’re hiring!
    dotGo, Oct 2014

    View full-size slide