Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Errors: to log, or not to log?

Errors: to log, or not to log?

https://konradreiche.com/blog/effective-error-monitoring-in-go

Most of the times errors are returned to the calling code. What happens at the end though? Log the error? Logging every error results in a monitoring system which is noisy and easy to ignore.

What if we let errors define whether they should be logged or not? This way errors can be categorized, tracked as metric and communicate to the calling code what to do with the error.

Lightning Talk, GopherCon 2022, Chicago

Konrad Reiche

October 08, 2022
Tweet

More Decks by Konrad Reiche

Other Decks in Programming

Transcript

  1. Errors
    to log, or not to log?
    Konrad Reiche
    By Renée French (CC)

    View Slide

  2. if err != nil {
    return nil, err
    }

    View Slide

  3. if err != nil {
    return nil, err
    }

    View Slide

  4. cmd, err := rdb.Get(ctx, "key")
    if err != nil {
    return nil, err
    }

    View Slide

  5. resp, err := handler(ctx, req) middleware.go

    View Slide

  6. resp, err := handler(ctx, req)
    if err != nil {
    log.Error(err)
    }
    middleware.go

    View Slide

  7. [ERROR] client update: read tcp: i/o timeout
    [ERROR] dial tcp: lookup redis: i/o timeout
    [ERROR] write tcp 10.101.244.225:46470->192.168.22.191: i/o timeout
    [ERROR] redis: read tcp 10.101.244.225:46470->192.168.22.191:6379: i/o timeout
    [ERROR] fetch data: invalid parameter type: int
    [ERROR] context canceled
    [ERROR] unexpected end of JSON input
    [ERROR] read tcp 10.108.131.55:46078->192.168.10.169:9090: read: connection reset by peer
    [ERROR] internal error
    [ERROR] rpc error: code = Unknown desc = no actions were found to be valid
    [ERROR] rpc error: code = Canceled desc = context canceled
    [ERROR] context canceled
    [ERROR] context canceled
    [ERROR] HTTP 500: write tcp 10.108.251.133:9090->10.111.167.92:33674: i/o timeout
    [ERROR] rpc error: code = Unknown desc = no actions were found to be valid

    View Slide

  8. Noisy Error Monitoring System
    - Time consuming to find critical errors
    - Error log is ignored altogether
    - Logging errors in the first place becomes pointless

    View Slide

  9. [ERROR] client update: read tcp: i/o timeout
    [ERROR] dial tcp: lookup redis: i/o timeout
    [ERROR] write tcp 10.101.244.225:46470->192.168.22.191: i/o timeout
    [ERROR] redis: read tcp 10.101.244.225:46470->192.168.22.191:6379: i/o timeout
    [ERROR] fetch data: invalid parameter type: int
    [ERROR] context canceled
    [ERROR] unexpected end of JSON input
    [ERROR] read tcp 10.108.131.55:46078->192.168.10.169:9090: read: connection reset by peer
    [ERROR] internal error
    [ERROR] rpc error: code = Unknown desc = no actions were found to be valid
    [ERROR] rpc error: code = Canceled desc = context canceled
    [ERROR] context canceled
    [ERROR] context canceled
    [ERROR] HTTP 500: write tcp 10.108.251.133:9090->10.111.167.92:33674: i/o timeout
    [ERROR] rpc error: code = Unknown desc = no actions were found to be valid

    View Slide

  10. [ERROR] client update: read tcp: i/o timeout
    [ERROR] dial tcp: lookup redis: i/o timeout
    [ERROR] write tcp 10.101.244.225:46470->192.168.22.191: i/o timeout
    [ERROR] redis: read tcp 10.101.244.225:46470->192.168.22.191:6379: i/o timeout
    [ERROR] context canceled
    [ERROR] rpc error: code = Canceled desc = context canceled
    [ERROR] context canceled
    [ERROR] context canceled
    [ERROR] HTTP 500: write tcp 10.108.251.133:9090->10.111.167.92:33674: i/o timeout

    View Slide

  11. resp, err := handler(ctx, req)
    if err != nil {
    log.Error(err)
    }
    middleware.go

    View Slide

  12. resp, err := handler(ctx, req)
    if err != nil {
    log.Error(err)
    }
    middleware.go

    View Slide

  13. resp, err := handler(ctx, req)
    if err != nil {
    var e net.Error
    if errors.As(err, &e) && e.Timeout() {
    timeouts.Inc()
    return
    }
    log.Error(err)
    }
    middleware.go

    View Slide

  14. resp, err := handler(ctx, req)
    if err != nil {
    var e net.Error
    if errors.As(err, &e) && e.Timeout() {
    timeouts.Inc()
    return
    }
    if errors.Is(err, context.Canceled) {
    contextsCanceled.Inc()
    return
    }
    log.Error(err)
    }
    middleware.go

    View Slide

  15. cmd, err := rdb.Get(ctx, "key")
    if err != nil {
    return nil, err
    }
    redis.go
    middleware.go
    ...
    ...

    View Slide

  16. cmd, err := rdb.Get(ctx, "key")
    if err != nil {
    return nil, err
    }
    redis.go
    middleware.go
    ...
    ...

    View Slide

  17. cmd, err := rdb.Get(ctx, "key")
    if err != nil {
    var e net.Error
    if errors.As(err, &e) && e.Timeout() {
    timeouts.Inc()
    // ???
    }
    return nil, err
    }
    redis.go
    middleware.go
    ...
    ...

    View Slide

  18. type loggable interface {
    error
    Log() bool
    }

    View Slide

  19. type ObservedError struct {
    cause error
    }
    func (e ObservedError) Log() bool {
    return false
    }
    func (e ObservedError) Unwrap() error {
    return e.cause
    }

    View Slide

  20. cmd, err := rdb.Get(ctx, "key")
    if err != nil {
    var e net.Error
    if errors.As(err, &e) && e.Timeout() {
    timeouts.Inc()
    // ???
    }
    return nil, err
    }
    redis.go
    middleware.go
    ...
    ...

    View Slide

  21. cmd, err := rdb.Get(ctx, "key")
    if err != nil {
    var e net.Error
    if errors.As(err, &e) && e.Timeout() {
    timeouts.Inc()
    err = metrics.NewObservedError(err)
    return nil, err
    }
    return nil, err
    }
    redis.go
    middleware.go
    ...
    ...

    View Slide

  22. resp, err := handler(ctx, req)
    if err != nil {
    var e loggable
    if errors.As(err, &e) && !e.Log() {
    return
    }
    log.Error(err)
    }
    middleware.go

    View Slide

  23. [ERROR] client update: read tcp: i/o timeout
    [ERROR] dial tcp: lookup redis: i/o timeout
    [ERROR] fatal error: concurrent map read and map write
    [ERROR] redis: read tcp 10.101.244.225:46470->192.168.22.191:6379: i/o timeout
    [ERROR] fetch data: invalid parameter type: int
    [ERROR] context canceled
    [ERROR] unexpected end of JSON input
    [ERROR] read tcp 10.108.131.55:46078->192.168.10.169:9090: read: connection reset by peer
    [ERROR] internal error
    [ERROR] rpc error: code = Unknown desc = no actions were found to be valid
    [ERROR] rpc error: code = Canceled desc = context canceled
    [ERROR] context canceled
    [ERROR] context canceled
    [ERROR] HTTP 500: write tcp 10.108.251.133:9090->10.111.167.92:33674: i/o timeout
    [ERROR] rpc error: code = Unknown desc = no actions were found to be valid

    View Slide

  24. [ERROR] fatal error: concurrent map read and map write
    [ERROR] fetch data: invalid parameter type: int

    View Slide

  25. Thank you
    [@|u/]konradreiche[.com]

    View Slide