$30 off During Our Annual Pro Sale. View Details »

Kubernetesコントローラーのパフォーマンスチューニング

 Kubernetesコントローラーのパフォーマンスチューニング

Kubernetes Meetup Tokyo #56
2023/03/16
https://k8sjp.connpass.com/event/275280/

Akihiro Ikezoe

March 16, 2023
Tweet

More Decks by Akihiro Ikezoe

Other Decks in Programming

Transcript

  1. View Slide











  2. View Slide










  3. View Slide

  4. View Slide








  5. View Slide







  6. View Slide








  7. View Slide

  8. Controller
    Workers
    Workers
    Workers
    Workers
    Reconciler
    Informer

    View Slide

  9. Controller
    Workers
    Workers
    Workers
    Workers
    Reconciler
    Informer

    View Slide









  10. View Slide










  11. View Slide





  12. View Slide


  13. ◼ https://github.com/kubernetes/enhancements/issues/1602

    ◼ https://kubernetes.io/docs/reference/instrumentation/metrics/

    ◼ https://kubernetes.io/docs/concepts/cluster-administration/system-traces/

    View Slide






  14. ⚫ https://cybozu-go.github.io/moco/metrics.html

    View Slide








  15. View Slide








  16. ◼ https://github.com/cybozu-go/moco/pull/500

    View Slide








  17. View Slide







  18. View Slide









  19. View Slide







  20. View Slide

  21. View Slide









  22. View Slide

  23. Kubernetes Cluster
    Application
    Controller
    ArgoCD
    Server
    Repo
    Server
    Application
    Resource
    Application
    Resource

    View Slide

  24. application-controller
    Workers
    Workers
    Workers
    Workers
    Status
    Processors
    Workers
    Workers
    Operation
    Processors
    Application
    Resource
    Informer
    Informer
    watch
    Events
    Application
    Resource

    View Slide






  25. View Slide









  26. View Slide





  27. View Slide




  28. View Slide

  29. application-controller
    Workers
    Workers
    Workers
    Workers
    Status
    Processors
    Workers
    Workers
    Operation
    Processors
    Application
    Resource Informer
    Informer
    watch
    Events

    View Slide








  30. View Slide



  31. View Slide






  32. View Slide

  33. workqueue_depth{job="kube-controller-manager",name="volumes"}

    View Slide

  34. histogram_quantile(0.99, sum(rate(
    rest_client_rate_limiter_duration_seconds_bucket{
    job="kube-controller-manager"
    }[1m]
    )) by (le))

    View Slide

  35. kube-controller-manager
    PersistentVolume
    Controller

    View Slide


  36. ◼ --kube-api-qps





    View Slide

  37. View Slide




  38. View Slide

  39. View Slide


  40. ◼ https://github.com/zoetrope/kubbernecker






    View Slide

  41. View Slide

  42. # Reconcile 99
    histogram_quantile(0.99,
    sum(
    rate(controller_runtime_reconcile_time_seconds_bucket[1m])
    ) by(job, controller, le)
    )
    # Reconcile
    sum(rate(controller_runtime_reconcile_total[1m]))by(job, controller, result)

    View Slide

  43. # 99
    histogram_quantile(0.99,
    sum(rate(workqueue_queue_duration_seconds_bucket[1m])) by(job, name, le))
    #
    sum(workqueue_depth) by (job, name)

    View Slide



  44. import (
    "context"
    "net/url"
    "time"
    "github.com/prometheus/client_golang/prometheus"
    clmetrics "k8s.io/client-go/tools/metrics"
    crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
    )
    var (
    rateLimiterDelay = prometheus.NewHistogramVec(
    prometheus.HistogramOpts{
    Name: "rest_client_rate_limiter_duration_seconds",
    Help: "client-go rate limiter delay in seconds. Broken down by verb, and host.",
    Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0},
    },
    []string{"verb", "host"},
    )
    _ clmetrics.LatencyMetric = &latencyAdapter{}
    )
    func init() {
    crmetrics.Registry.MustRegister(rateLimiterDelay)
    adapter := latencyAdapter{
    metric: rateLimiterDelay,
    }
    clmetrics.RateLimiterLatency = &adapter
    }
    type latencyAdapter struct {
    metric *prometheus.HistogramVec
    }
    func (c *latencyAdapter) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) {
    c.metric.WithLabelValues(verb, u.Host).Observe(latency.Seconds())
    }

    View Slide

  45. # Rate Limiter 99
    histogram_quantile(0.99,
    sum(
    rate(rest_client_rate_limiter_duration_seconds_bucket[1m])
    ) by(job, verb, le)
    )

    View Slide

  46. # Application Reconcile Status Processor
    {job=~"argocd/argocd-application-controller"}
    | logfmt | msg ="Reconciliation completed"
    | line_format "{{.application}}: {{.time_ms}}"
    # Application Reconcile Operation Processor
    {job=~"argocd/argocd-application-controller"}
    | logfmt | msg = "sync/terminate complete"
    | line_format "{{.application}}: {{.duration}}"

    View Slide

  47. #
    {job=~"argocd/argocd-application-controller"}
    | logfmt | level = "debug" msg =~ "Refreshing app .*"
    apiVersion: v1
    kind: ConfigMap
    metadata:
    name: argocd-cmd-params-cm
    data:
    # Application Controller debug default "info"
    controller.log.level: "debug"

    View Slide


  48. View Slide


  49. $ kubectl port-forward svc/argocd-application-controller-metrics -n argocd 8082:8082
    # 30
    $ curl localhost:8082/debug/pprof/profile > cpu.pprof
    # goroutine
    $ curl localhost:8082/debug/pprof/goroutine?debug=1

    View Slide



  50. --otlp-address

    View Slide

  51. apiVersion: v1
    kind: ConfigMap
    metadata:
    name: argocd-cmd-params-cm
    data:
    # Number of application status processors (default 20)
    controller.status.processors: "20"
    # Number of application operation processors (default 10)
    controller.operation.processors: "10"


    View Slide

  52. import ctrl "sigs.k8s.io/controller-runtime"
    // ・・・途中省略・・・
    cfg, err := ctrl.GetConfig()
    if err != nil {
    return err
    }
    cfg.QPS = 50
    cfg.Burst = int(cfg.QPS * 1.5)
    mgr, err := ctrl.NewManager(cfg, ctrl.Options{
    ...
    })

    View Slide