grafana · dimitarvdimitrov · Feb 13, 2024 · Feb 3, 2024 · Feb 3, 2024 · Feb 3, 2024
diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md
@@ -28,7 +28,7 @@ Entries should include a reference to the Pull Request that introduced the chang
 
 ## main / unreleased
 
-* [FEATURE] Added experimental feature for deploying keda autoscaling objects as part of the helm chart for the components: distributor, querier, query-frontend and ruler. Requires metamonitoring, for more details on metamonitoring see the Helm chart documentation. #7282
+* [FEATURE] Added experimental feature for deploying [KEDA](https://keda.sh) ScaledObjects as part of the helm chart for the components: distributor, querier, query-frontend and ruler. Autoscaling can be enabled via `distributor.kedaAutoscaling`, `ruler.kedaAutoscaling`, `query_frontend.kedaAutoscaling`, and `querier.kedaAutoscaling`. Requires metamonitoring, for more details on metamonitoring see [Monitor the health of your system](https://grafana.com/docs/helm-charts/mimir-distributed/latest/run-production-environment-with-helm/monitor-system-health/). See https://github.com/grafana/mimir/issues/7367 for a migration procedure. #7282
 * [CHANGE] Rollout-operator: remove default CPU limit. #7125
 * [CHANGE] Ring: relaxed the hash ring heartbeat period and timeout for distributor, ingester, store-gateway and compactor: #6860
   * `-distributor.ring.heartbeat-period` set to `1m`

@@ -4,7 +4,7 @@ kind: ScaledObject
 metadata:
   name: {{ include "mimir.resourceName" (dict "ctx" . "component" "distributor") }}
   labels:
-    {{- include "mimir.labels" (dict "ctx" . "component" "distributor" "memberlist" true) | nindent 4 }}
+    {{- include "mimir.labels" (dict "ctx" . "component" "distributor") | nindent 4 }}
   annotations:
     {{- toYaml .Values.distributor.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
@@ -24,8 +24,7 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: cortex_distributor_cpu_hpa_default
-      query: max_over_time(sum(rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m]))[15m:]) * 1000
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       {{- $cpu_request := dig "requests" "cpu" nil .Values.distributor.resources }}
       threshold: {{ mulf (include "mimir.parseCPU" (dict "value" $cpu_request)) (divf .Values.distributor.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
@@ -34,8 +33,7 @@ spec:
       {{- end }}
     type: prometheus
   - metadata:
-      metricName: cortex_distributor_memory_hpa_default
-      query: max_over_time(sum(container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"})[15m:])
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="distributor",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       {{- $mem_request := dig "requests" "memory" nil .Values.distributor.resources }}
       threshold: {{ mulf (include "mimir.siToBytes" (dict "value" $mem_request)) (divf .Values.distributor.kedaAutoscaling.targetMemoryUtilizationPercentage 100) | floor | int64 | quote }}

@@ -7,7 +7,7 @@ kind: ScaledObject
 metadata:
   name: {{ include "mimir.resourceName" (dict "ctx" . "component" "querier") }}
   labels:
-    {{- include "mimir.labels" (dict "ctx" . "component" "querier" "memberlist" true) | nindent 4 }}
+    {{- include "mimir.labels" (dict "ctx" . "component" "querier") | nindent 4 }}
   annotations:
     {{- toYaml .Values.querier.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
@@ -27,7 +27,6 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: cortex_querier_hpa_default
       query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="{{ .Release.Namespace }}",quantile="0.5"}[1m]))
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}
@@ -37,7 +36,6 @@ spec:
     name: cortex_querier_hpa_default
     type: prometheus
   - metadata:
-      metricName: cortex_querier_hpa_default_requests_duration
       query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="{{ .Release.Namespace }}"}[1m]))
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       threshold: {{ .Values.querier.kedaAutoscaling.querySchedulerInflightRequestsThreshold | quote }}

@@ -24,14 +24,17 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: query_frontend_cpu_hpa_default
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
       query: max_over_time(sum(rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[5m]))[15m:]) * 1000
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       {{- $cpu_request := dig "requests" "cpu" nil .Values.query_frontend.resources }}
       threshold: {{ mulf (include "mimir.parseCPU" (dict "value" $cpu_request)) (divf .Values.query_frontend.kedaAutoscaling.targetCPUUtilizationPercentage 100) | floor | int64 | quote }}
+      {{- if .Values.query_frontend.kedaAutoscaling.customHeaders }}
+      customHeaders: {{ (include "mimir.lib.mapToCSVString" (dict "map" .Values.query_frontend.kedaAutoscaling.customHeaders)) | quote }}
+      {{- end }}
     type: prometheus
   - metadata:
-      metricName: query_frontend_memory_hpa_default
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="query-frontend",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
       query: max_over_time(sum(container_memory_working_set_bytes{container="query-frontend",namespace="{{ .Release.Namespace }}"})[15m:])
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       {{- $mem_request := dig "requests" "memory" nil .Values.query_frontend.resources }}

@@ -4,7 +4,7 @@ kind: ScaledObject
 metadata:
   name: {{ include "mimir.resourceName" (dict "ctx" . "component" "ruler") }}
   labels:
-    {{- include "mimir.labels" (dict "ctx" . "component" "ruler" "memberlist" true) | nindent 4 }}
+    {{- include "mimir.labels" (dict "ctx" . "component" "ruler") | nindent 4 }}
   annotations:
     {{- toYaml .Values.ruler.annotations | nindent 4 }}
   namespace: {{ .Release.Namespace | quote }}
@@ -24,7 +24,7 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: ruler_cpu_hpa_default
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m])) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0)[15m:]) * 1000
       query: max_over_time(sum(rate(container_cpu_usage_seconds_total{container="ruler",namespace="{{ .Release.Namespace }}"}[5m]))[15m:]) * 1000
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       {{- $cpu_request := dig "requests" "cpu" nil .Values.ruler.resources }}
@@ -34,7 +34,7 @@ spec:
       {{- end }}
     type: prometheus
   - metadata:
-      metricName: ruler_memory_hpa_default
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"}) and max by (pod) (up{container="ruler",namespace="{{ .Release.Namespace }}"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="ruler",namespace="{{ .Release.Namespace }}", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="ruler",namespace="{{ .Release.Namespace }}"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="ruler",namespace="{{ .Release.Namespace }}", reason="OOMKilled"}) or vector(0))
       query: max_over_time(sum(container_memory_working_set_bytes{container="ruler",namespace="{{ .Release.Namespace }}"})[15m:])
       serverAddress: {{ include "mimir.metaMonitoring.metrics.remoteReadUrl" (dict "ctx" $) }}
       {{- $mem_request := dig "requests" "memory" nil .Values.ruler.resources }}

@@ -739,20 +739,26 @@ distributor:
   # Setting it to null will produce a deployment without replicas set, allowing you to use autoscaling with the deployment
   replicas: 1
 
+  # -- [Experimental] Configure autoscaling via KEDA (https://keda.sh). This requires having
+  # KEDA already installed in the Kubernetes cluster. The metrics for scaling are read
+  # from the the metamonitoring setup (metamonitoring.grafanaAgent.metrics.remote).
+  # Basic auth and extra HTTP headers from metamonitoring are ignored, please use customHeaders.
+  # The remote URL is used even if metamonitoring is disabled.
+  # See https://github.com/grafana/mimir/issues/7367 for more details on how to migrate to autoscaled resources without disruptions.
   kedaAutoscaling:
     enabled: false
     minReplicaCount: 1
     maxReplicaCount: 10
     pollingInterval: 10
-    targetCPUUtilizationPercentage: 80
-    targetMemoryUtilizationPercentage: 80
+    targetCPUUtilizationPercentage: 100
+    targetMemoryUtilizationPercentage: 100
     customHeaders:
       {}
       # X-Scope-OrgID: ""
     behavior:
       scaleDown:
         policies:
-          - periodSeconds: 60
+          - periodSeconds: 600
             type: Percent
             value: 10
 
@@ -1132,13 +1138,19 @@ ruler:
   enabled: true
   replicas: 1
 
+  # -- [Experimental] Configure autoscaling via KEDA (https://keda.sh). This requires having
+  # KEDA already installed in the Kubernetes cluster. The metrics for scaling are read
+  # from the the metamonitoring setup (metamonitoring.grafanaAgent.metrics.remote).
+  # Basic auth and extra HTTP headers from metamonitoring are ignored, please use customHeaders.
+  # The remote URL is used even if metamonitoring is disabled.
+  # See https://github.com/grafana/mimir/issues/7367 for more details on how to migrate to autoscaled resources without disruptions.
   kedaAutoscaling:
     enabled: false
     minReplicaCount: 1
     maxReplicaCount: 10
     pollingInterval: 10
-    targetCPUUtilizationPercentage: 80
-    targetMemoryUtilizationPercentage: 80
+    targetCPUUtilizationPercentage: 100
+    targetMemoryUtilizationPercentage: 100
     customHeaders:
       {}
       # X-Scope-OrgID: ""
@@ -1226,6 +1238,12 @@ ruler:
 querier:
   replicas: 2
 
+  # -- [Experimental] Configure autoscaling via KEDA (https://keda.sh). This requires having
+  # KEDA already installed in the Kubernetes cluster. The metrics for scaling are read
+  # from the the metamonitoring setup (metamonitoring.grafanaAgent.metrics.remote).
+  # Basic auth and extra HTTP headers from metamonitoring are ignored, please use customHeaders.
+  # The remote URL is used even if metamonitoring is disabled.
+  # See https://github.com/grafana/mimir/issues/7367 for more details on how to migrate to autoscaled resources without disruptions.
   kedaAutoscaling:
     enabled: false
     minReplicaCount: 1
@@ -1242,15 +1260,15 @@ querier:
             type: Percent
             value: 10
         stabilizationWindowSeconds: 600
-        scaleUp:
-          policies:
-            - periodSeconds: 120
-              type: Percent
-              value: 50
-            - periodSeconds: 120
-              type: Pods
-              value: 15
-          stabilizationWindowSeconds: 60
+      scaleUp:
+        policies:
+          - periodSeconds: 120
+            type: Percent
+            value: 50
+          - periodSeconds: 120
+            type: Pods
+            value: 15
+        stabilizationWindowSeconds: 60
 
   service:
     annotations: {}
@@ -1331,13 +1349,19 @@ query_frontend:
   # Setting it to null will produce a deployment without replicas set, allowing you to use autoscaling with the deployment
   replicas: 1
 
+  # -- [Experimental] Configure autoscaling via KEDA (https://keda.sh). This requires having
+  # KEDA already installed in the Kubernetes cluster. The metrics for scaling are read
+  # from the the metamonitoring setup (metamonitoring.grafanaAgent.metrics.remote).
+  # Basic auth and extra HTTP headers from metamonitoring are ignored, please use customHeaders.
+  # The remote URL is used even if metamonitoring is disabled.
+  # See https://github.com/grafana/mimir/issues/7367 for more details on how to migrate to autoscaled resources without disruptions.
   kedaAutoscaling:
     enabled: false
     minReplicaCount: 1
     maxReplicaCount: 10
     pollingInterval: 10
-    targetCPUUtilizationPercentage: 80
-    targetMemoryUtilizationPercentage: 80
+    targetCPUUtilizationPercentage: 75
+    targetMemoryUtilizationPercentage: 100
     customHeaders:
       {}
       # X-Scope-OrgID: ""

@@ -8,7 +8,6 @@ metadata:
     app.kubernetes.io/name: mimir
     app.kubernetes.io/instance: keda-autoscaling-metamonitoring-values
     app.kubernetes.io/component: distributor
-    app.kubernetes.io/part-of: memberlist
     app.kubernetes.io/managed-by: Helm
   annotations:
     {}
@@ -19,7 +18,7 @@ spec:
       behavior:
         scaleDown:
           policies:
-          - periodSeconds: 60
+          - periodSeconds: 600
             type: Percent
             value: 10
   maxReplicaCount: 10
@@ -31,15 +30,13 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: cortex_distributor_cpu_hpa_default
-      query: max_over_time(sum(rate(container_cpu_usage_seconds_total{container="distributor",namespace="citestns"}[5m]))[15m:]) * 1000
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="distributor",namespace="citestns"}[5m])) and max by (pod) (up{container="distributor",namespace="citestns"}) > 0)[15m:]) * 1000
       serverAddress: https://mimir.example.com/prometheus
       threshold: "0"
       customHeaders: "X-Scope-OrgID=tenant-1"
     type: prometheus
   - metadata:
-      metricName: cortex_distributor_memory_hpa_default
-      query: max_over_time(sum(container_memory_working_set_bytes{container="distributor",namespace="citestns"})[15m:])
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="distributor",namespace="citestns"}) and max by (pod) (up{container="distributor",namespace="citestns"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="distributor",namespace="citestns", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="distributor",namespace="citestns"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="distributor",namespace="citestns", reason="OOMKilled"}) or vector(0))
       serverAddress: https://mimir.example.com/prometheus
       threshold: "429496729"
       customHeaders: "X-Scope-OrgID=tenant-1"

@@ -8,7 +8,6 @@ metadata:
     app.kubernetes.io/name: mimir
     app.kubernetes.io/instance: keda-autoscaling-metamonitoring-values
     app.kubernetes.io/component: querier
-    app.kubernetes.io/part-of: memberlist
     app.kubernetes.io/managed-by: Helm
   annotations:
     {}
@@ -22,16 +21,16 @@ spec:
           - periodSeconds: 120
             type: Percent
             value: 10
-          scaleUp:
-            policies:
-            - periodSeconds: 120
-              type: Percent
-              value: 50
-            - periodSeconds: 120
-              type: Pods
-              value: 15
-            stabilizationWindowSeconds: 60
           stabilizationWindowSeconds: 600
+        scaleUp:
+          policies:
+          - periodSeconds: 120
+            type: Percent
+            value: 50
+          - periodSeconds: 120
+            type: Pods
+            value: 15
+          stabilizationWindowSeconds: 60
   maxReplicaCount: 10
   minReplicaCount: 2
   pollingInterval: 10
@@ -41,15 +40,13 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: cortex_querier_hpa_default
       query: sum(max_over_time(cortex_query_scheduler_inflight_requests{container="query-scheduler",namespace="citestns",quantile="0.5"}[1m]))
       serverAddress: https://mimir.example.com/prometheus
       threshold: "6"
       customHeaders: "X-Scope-OrgID=tenant-1"
     name: cortex_querier_hpa_default
     type: prometheus
   - metadata:
-      metricName: cortex_querier_hpa_default_requests_duration
       query: sum(rate(cortex_querier_request_duration_seconds_sum{container="querier",namespace="citestns"}[1m]))
       serverAddress: https://mimir.example.com/prometheus
       threshold: "6"

@@ -30,13 +30,14 @@ spec:
     kind: Deployment
   triggers:
   - metadata:
-      metricName: query_frontend_cpu_hpa_default
+      query: max_over_time(sum(sum by (pod) (rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="citestns"}[5m])) and max by (pod) (up{container="query-frontend",namespace="citestns"}) > 0)[15m:]) * 1000
       query: max_over_time(sum(rate(container_cpu_usage_seconds_total{container="query-frontend",namespace="citestns"}[5m]))[15m:]) * 1000
       serverAddress: https://mimir.example.com/prometheus
       threshold: "0"
+      customHeaders: "X-Scope-OrgID=tenant-1"
     type: prometheus
   - metadata:
-      metricName: query_frontend_memory_hpa_default
+      query: max_over_time(sum((sum by (pod) (container_memory_working_set_bytes{container="query-frontend",namespace="citestns"}) and max by (pod) (up{container="query-frontend",namespace="citestns"}) > 0) or vector(0))[15m:]) + sum(sum by (pod) (max_over_time(kube_pod_container_resource_requests{container="query-frontend",namespace="citestns", resource="memory"}[15m])) and max by (pod) (changes(kube_pod_container_status_restarts_total{container="query-frontend",namespace="citestns"}[15m]) > 0) and max by (pod) (kube_pod_container_status_last_terminated_reason{container="query-frontend",namespace="citestns", reason="OOMKilled"}) or vector(0))
       query: max_over_time(sum(container_memory_working_set_bytes{container="query-frontend",namespace="citestns"})[15m:])
       serverAddress: https://mimir.example.com/prometheus
       threshold: "107374182"