From 67059b2f9308bdd9a3f263d2978dc32c867173e4 Mon Sep 17 00:00:00 2001 From: Vernon Miller Date: Thu, 15 Aug 2024 10:29:08 -0600 Subject: [PATCH 1/4] add HA dedupe functionality to docker dev environment --- .../config/mimir.yaml | 9 ++++ .../config/prom-ha-pair-1.yaml | 35 +++++++++++++++ .../config/prom-ha-pair-2.yaml | 35 +++++++++++++++ .../config/prometheus.yaml | 2 - .../docker-compose.jsonnet | 44 +++++++++++++++++-- .../docker-compose.yml | 36 ++++++++++++++- 6 files changed, 154 insertions(+), 7 deletions(-) create mode 100644 development/mimir-microservices-mode/config/prom-ha-pair-1.yaml create mode 100644 development/mimir-microservices-mode/config/prom-ha-pair-2.yaml diff --git a/development/mimir-microservices-mode/config/mimir.yaml b/development/mimir-microservices-mode/config/mimir.yaml index 7f32d5920ce..96d7bd1477b 100644 --- a/development/mimir-microservices-mode/config/mimir.yaml +++ b/development/mimir-microservices-mode/config/mimir.yaml @@ -1,6 +1,10 @@ multitenancy_enabled: false distributor: + ha_tracker: + enable_ha_tracker: true + kvstore: + store: consul pool: health_check_ingesters: true ring: @@ -173,6 +177,11 @@ limits: native_histograms_ingestion_enabled: true cardinality_analysis_enabled: true query_ingesters_within: 20m + # HA tracker configuration + accept_ha_samples: true + ha_cluster_label: ha_cluster + ha_replica_label: ha_replica + ha_max_clusters: 10 runtime_config: file: ./config/runtime.yaml diff --git a/development/mimir-microservices-mode/config/prom-ha-pair-1.yaml b/development/mimir-microservices-mode/config/prom-ha-pair-1.yaml new file mode 100644 index 00000000000..66f9379702c --- /dev/null +++ b/development/mimir-microservices-mode/config/prom-ha-pair-1.yaml @@ -0,0 +1,35 @@ +global: + scrape_interval: 5s + external_labels: + scraped_by: prometheus + +scrape_configs: + - job_name: ancillary-services + static_configs: + - targets: + - 'memcached-exporter:9150' + - 'load-generator:9900' + labels: + ha_cluster: 'prom-ha-pair' + ha_replica: 'prom-ha-pair-1' + namespace: 'mimir-microservices-mode' + relabel_configs: + - source_labels: ['__address__'] + target_label: 'pod' + regex: '([^:]+)(:[0-9]+)?' + replacement: '${1}' + - source_labels: ['namespace', 'pod'] + target_label: 'job' + separator: '/' + regex: '(.+?)(-\d+)?' + replacement: '${1}' + - source_labels: ['pod'] + target_label: 'container' + regex: '(.+?)(-\d+)?' + replacement: '${1}' + scrape_classic_histograms: true + +remote_write: + - url: http://distributor-2:8001/api/v1/push + send_native_histograms: true + send_exemplars: true diff --git a/development/mimir-microservices-mode/config/prom-ha-pair-2.yaml b/development/mimir-microservices-mode/config/prom-ha-pair-2.yaml new file mode 100644 index 00000000000..095728e6be7 --- /dev/null +++ b/development/mimir-microservices-mode/config/prom-ha-pair-2.yaml @@ -0,0 +1,35 @@ +global: + scrape_interval: 5s + external_labels: + scraped_by: prometheus + +scrape_configs: + - job_name: ancillary-services + static_configs: + - targets: + - 'memcached-exporter:9150' + - 'load-generator:9900' + labels: + ha_cluster: 'prom-ha-pair' + ha_replica: 'prom-ha-pair-2' + namespace: 'mimir-microservices-mode' + relabel_configs: + - source_labels: ['__address__'] + target_label: 'pod' + regex: '([^:]+)(:[0-9]+)?' + replacement: '${1}' + - source_labels: ['namespace', 'pod'] + target_label: 'job' + separator: '/' + regex: '(.+?)(-\d+)?' + replacement: '${1}' + - source_labels: ['pod'] + target_label: 'container' + regex: '(.+?)(-\d+)?' + replacement: '${1}' + scrape_classic_histograms: true + +remote_write: + - url: http://distributor-2:8001/api/v1/push + send_native_histograms: true + send_exemplars: true diff --git a/development/mimir-microservices-mode/config/prometheus.yaml b/development/mimir-microservices-mode/config/prometheus.yaml index fb211fc6d0b..196c6a48e31 100644 --- a/development/mimir-microservices-mode/config/prometheus.yaml +++ b/development/mimir-microservices-mode/config/prometheus.yaml @@ -20,8 +20,6 @@ scrape_configs: - 'store-gateway-1:8008' - 'store-gateway-2:8009' - 'query-scheduler:8011' - - 'memcached-exporter:9150' - - 'load-generator:9900' labels: cluster: 'docker-compose' namespace: 'mimir-microservices-mode' diff --git a/development/mimir-microservices-mode/docker-compose.jsonnet b/development/mimir-microservices-mode/docker-compose.jsonnet index da94f9be6b8..ebb0338245d 100644 --- a/development/mimir-microservices-mode/docker-compose.jsonnet +++ b/development/mimir-microservices-mode/docker-compose.jsonnet @@ -33,6 +33,7 @@ std.manifestYamlDoc({ // If true, a query-tee instance with a single backend is started. enable_query_tee: false, + enable_ha_tracker: true, }, // We explicitely list all important services here, so that it's easy to disable them by commenting out. @@ -47,11 +48,12 @@ std.manifestYamlDoc({ self.nginx + self.minio + (if $._config.enable_prometheus then self.prometheus else {}) + + (if $._config.enable_ha_tracker then self.prompair1 + self.prompair2 else {}) + self.grafana + (if $._config.enable_grafana_agent then self.grafana_agent else {}) + (if $._config.enable_otel_collector then self.otel_collector else {}) + self.jaeger + - (if $._config.ring == 'consul' || $._config.ring == 'multi' then self.consul else {}) + + (if $._config.ring == 'consul' || $._config.ring == 'multi' || $._config.enable_ha_tracker then self.consul else {}) + (if $._config.cache_backend == 'redis' then self.redis else self.memcached + self.memcached_exporter) + (if $._config.enable_load_generator then self.load_generator else {}) + (if $._config.enable_query_tee then self.query_tee else {}) + @@ -62,12 +64,14 @@ std.manifestYamlDoc({ name: 'distributor-1', target: 'distributor', httpPort: 8000, + extraArguments: '-distributor.ha-tracker.consul.hostname=consul:8500', }), 'distributor-2': mimirService({ name: 'distributor-2', target: 'distributor', httpPort: 8001, + extraArguments: '-distributor.ha-tracker.consul.hostname=consul:8500', }), }, @@ -250,8 +254,9 @@ std.manifestYamlDoc({ // Other services used by Mimir. consul:: { consul: { - image: 'consul', - command: ['agent', '-dev', '-client=0.0.0.0', '-log-level=info'], + image: 'consul:1.15', + command: ['agent', '-dev', '-client=0.0.0.0', '-log-level=debug'], + hostname: 'consul', ports: ['8500:8500'], }, }, @@ -332,6 +337,39 @@ std.manifestYamlDoc({ }, }, + prompair1:: { + prompair1: { + image: 'prom/prometheus:v2.51.1', + hostname: 'prom-ha-pair-1', + command: [ + '--config.file=/etc/prometheus/prom-ha-pair-1.yaml', + '--enable-feature=exemplar-storage', + '--enable-feature=native-histograms', + ], + volumes: [ + './config:/etc/prometheus', + ], + ports: ['9092:9090'], + }, + }, + + prompair2:: { + prompair2: { + image: 'prom/prometheus:v2.51.1', + hostname: 'prom-ha-pair-2', + command: [ + '--config.file=/etc/prometheus/prom-ha-pair-2.yaml', + '--enable-feature=exemplar-storage', + '--enable-feature=native-histograms', + ], + volumes: [ + './config:/etc/prometheus', + ], + ports: ['9093:9090'], + }, + }, + + grafana:: { grafana: { image: 'grafana/grafana:10.4.3', diff --git a/development/mimir-microservices-mode/docker-compose.yml b/development/mimir-microservices-mode/docker-compose.yml index cbab655f8d9..ed96fba0bb1 100644 --- a/development/mimir-microservices-mode/docker-compose.yml +++ b/development/mimir-microservices-mode/docker-compose.yml @@ -103,6 +103,16 @@ "volumes": - "./config:/mimir/config" - "./activity:/activity" + "consul": + "command": + - "agent" + - "-dev" + - "-client=0.0.0.0" + - "-log-level=debug" + "hostname": "consul" + "image": "consul:1.15" + "ports": + - "8500:8500" "distributor-1": "build": "context": "." @@ -110,7 +120,7 @@ "command": - "sh" - "-c" - - "sleep 3 && exec ./mimir -config.file=./config/mimir.yaml -target=distributor -server.http-listen-port=8000 -server.grpc-listen-port=9000 -activity-tracker.filepath=/activity/distributor-8000 -memberlist.nodename=distributor -memberlist.bind-port=10000 -ingester.ring.store=memberlist -distributor.ring.store=memberlist -compactor.ring.store=memberlist -store-gateway.sharding-ring.store=memberlist -ruler.ring.store=memberlist -alertmanager.sharding-ring.store=memberlist -blocks-storage.bucket-store.index-cache.backend=memcached -blocks-storage.bucket-store.chunks-cache.backend=memcached -blocks-storage.bucket-store.metadata-cache.backend=memcached -query-frontend.results-cache.backend=memcached -ruler-storage.cache.backend=memcached -blocks-storage.bucket-store.index-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dns+memcached:11211 -query-frontend.results-cache.memcached.addresses=dns+memcached:11211 -ruler-storage.cache.memcached.addresses=dns+memcached:11211" + - "sleep 3 && exec ./mimir -config.file=./config/mimir.yaml -target=distributor -server.http-listen-port=8000 -server.grpc-listen-port=9000 -activity-tracker.filepath=/activity/distributor-8000 -distributor.ha-tracker.consul.hostname=consul:8500 -memberlist.nodename=distributor -memberlist.bind-port=10000 -ingester.ring.store=memberlist -distributor.ring.store=memberlist -compactor.ring.store=memberlist -store-gateway.sharding-ring.store=memberlist -ruler.ring.store=memberlist -alertmanager.sharding-ring.store=memberlist -blocks-storage.bucket-store.index-cache.backend=memcached -blocks-storage.bucket-store.chunks-cache.backend=memcached -blocks-storage.bucket-store.metadata-cache.backend=memcached -query-frontend.results-cache.backend=memcached -ruler-storage.cache.backend=memcached -blocks-storage.bucket-store.index-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dns+memcached:11211 -query-frontend.results-cache.memcached.addresses=dns+memcached:11211 -ruler-storage.cache.memcached.addresses=dns+memcached:11211" "depends_on": - "minio" "environment": @@ -135,7 +145,7 @@ "command": - "sh" - "-c" - - "sleep 3 && exec ./mimir -config.file=./config/mimir.yaml -target=distributor -server.http-listen-port=8001 -server.grpc-listen-port=9001 -activity-tracker.filepath=/activity/distributor-8001 -memberlist.nodename=distributor -memberlist.bind-port=10001 -ingester.ring.store=memberlist -distributor.ring.store=memberlist -compactor.ring.store=memberlist -store-gateway.sharding-ring.store=memberlist -ruler.ring.store=memberlist -alertmanager.sharding-ring.store=memberlist -blocks-storage.bucket-store.index-cache.backend=memcached -blocks-storage.bucket-store.chunks-cache.backend=memcached -blocks-storage.bucket-store.metadata-cache.backend=memcached -query-frontend.results-cache.backend=memcached -ruler-storage.cache.backend=memcached -blocks-storage.bucket-store.index-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dns+memcached:11211 -query-frontend.results-cache.memcached.addresses=dns+memcached:11211 -ruler-storage.cache.memcached.addresses=dns+memcached:11211" + - "sleep 3 && exec ./mimir -config.file=./config/mimir.yaml -target=distributor -server.http-listen-port=8001 -server.grpc-listen-port=9001 -activity-tracker.filepath=/activity/distributor-8001 -distributor.ha-tracker.consul.hostname=consul:8500 -memberlist.nodename=distributor -memberlist.bind-port=10001 -ingester.ring.store=memberlist -distributor.ring.store=memberlist -compactor.ring.store=memberlist -store-gateway.sharding-ring.store=memberlist -ruler.ring.store=memberlist -alertmanager.sharding-ring.store=memberlist -blocks-storage.bucket-store.index-cache.backend=memcached -blocks-storage.bucket-store.chunks-cache.backend=memcached -blocks-storage.bucket-store.metadata-cache.backend=memcached -query-frontend.results-cache.backend=memcached -ruler-storage.cache.backend=memcached -blocks-storage.bucket-store.index-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.chunks-cache.memcached.addresses=dns+memcached:11211 -blocks-storage.bucket-store.metadata-cache.memcached.addresses=dns+memcached:11211 -query-frontend.results-cache.memcached.addresses=dns+memcached:11211 -ruler-storage.cache.memcached.addresses=dns+memcached:11211" "depends_on": - "minio" "environment": @@ -314,6 +324,28 @@ - "./config:/etc/prometheus" - "../../operations/mimir-mixin-compiled/alerts.yaml:/etc/mixin/mimir-alerts.yaml" - "../../operations/mimir-mixin-compiled/rules.yaml:/etc/mixin/mimir-rules.yaml" + "prompair1": + "command": + - "--config.file=/etc/prometheus/prom-ha-pair-1.yaml" + - "--enable-feature=exemplar-storage" + - "--enable-feature=native-histograms" + "hostname": "prom-ha-pair-1" + "image": "prom/prometheus:v2.51.1" + "ports": + - "9092:9090" + "volumes": + - "./config:/etc/prometheus" + "prompair2": + "command": + - "--config.file=/etc/prometheus/prom-ha-pair-2.yaml" + - "--enable-feature=exemplar-storage" + - "--enable-feature=native-histograms" + "hostname": "prom-ha-pair-2" + "image": "prom/prometheus:v2.51.1" + "ports": + - "9093:9090" + "volumes": + - "./config:/etc/prometheus" "querier": "build": "context": "." From 17ca622ca6d4b6841451e2814fdfd069110140df Mon Sep 17 00:00:00 2001 From: Vernon Miller Date: Thu, 15 Aug 2024 12:02:00 -0600 Subject: [PATCH 2/4] update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e96517171b..42d1fa95f51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ * `cortex_ruler_independent_rule_evaluation_concurrency_attempts_incomplete_total` * `cortex_ruler_independent_rule_evaluation_concurrency_attempts_completed_total` * [ENHANCEMENT] Expose a new `s3.session-token` configuration option to enable using temporary security credentials. #8952 +* [ENHANCEMENT] Add HA deduplication features to the `mimir-microservices-mode` development environment. #9012 * [BUGFIX] Ruler: add support for draining any outstanding alert notifications before shutting down. This can be enabled with the `-ruler.drain-notification-queue-on-shutdown=true` CLI flag. #8346 * [BUGFIX] Query-frontend: fix `-querier.max-query-lookback` enforcement when `-compactor.blocks-retention-period` is not set, and viceversa. #8388 * [BUGFIX] Ingester: fix sporadic `not found` error causing an internal server error if label names are queried with matchers during head compaction. #8391 From df0fe82451b2b5b137bca866ffa9fcd195db2162 Mon Sep 17 00:00:00 2001 From: Vernon Miller Date: Thu, 15 Aug 2024 19:39:29 -0600 Subject: [PATCH 3/4] address feedback --- .../mimir-microservices-mode/docker-compose.jsonnet | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/development/mimir-microservices-mode/docker-compose.jsonnet b/development/mimir-microservices-mode/docker-compose.jsonnet index ebb0338245d..d1a53d2f1f7 100644 --- a/development/mimir-microservices-mode/docker-compose.jsonnet +++ b/development/mimir-microservices-mode/docker-compose.jsonnet @@ -28,12 +28,14 @@ std.manifestYamlDoc({ // If true, start and enable scraping by these components. // Note that if more than one component is enabled, the dashboards shown in Grafana may contain duplicate series or aggregates may be doubled or tripled. enable_grafana_agent: false, + // If true, start a base prometheus that scrapes the Mimir component metrics and remote writes to distributor-1. + // Two additional Prometheus instances are started that scrape the same memcached-exporter and load-generator + // targets and remote write to distributor-2. enable_prometheus: true, // If Prometheus is disabled, recording rules will not be evaluated and so dashboards in Grafana that depend on these recorded series will display no data. enable_otel_collector: false, // If true, a query-tee instance with a single backend is started. enable_query_tee: false, - enable_ha_tracker: true, }, // We explicitely list all important services here, so that it's easy to disable them by commenting out. @@ -47,8 +49,7 @@ std.manifestYamlDoc({ self.alertmanagers(3) + self.nginx + self.minio + - (if $._config.enable_prometheus then self.prometheus else {}) + - (if $._config.enable_ha_tracker then self.prompair1 + self.prompair2 else {}) + + (if $._config.enable_prometheus then self.prometheus + self.prompair1 + self.prompair2 else {}) + self.grafana + (if $._config.enable_grafana_agent then self.grafana_agent else {}) + (if $._config.enable_otel_collector then self.otel_collector else {}) + From e2575ca3dba4b4892eb13df32968ff3a78215206 Mon Sep 17 00:00:00 2001 From: Vernon Miller Date: Thu, 15 Aug 2024 19:45:38 -0600 Subject: [PATCH 4/4] run consul by default for HA --- development/mimir-microservices-mode/docker-compose.jsonnet | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/development/mimir-microservices-mode/docker-compose.jsonnet b/development/mimir-microservices-mode/docker-compose.jsonnet index d1a53d2f1f7..5d4d695ac8a 100644 --- a/development/mimir-microservices-mode/docker-compose.jsonnet +++ b/development/mimir-microservices-mode/docker-compose.jsonnet @@ -54,7 +54,7 @@ std.manifestYamlDoc({ (if $._config.enable_grafana_agent then self.grafana_agent else {}) + (if $._config.enable_otel_collector then self.otel_collector else {}) + self.jaeger + - (if $._config.ring == 'consul' || $._config.ring == 'multi' || $._config.enable_ha_tracker then self.consul else {}) + + self.consul + (if $._config.cache_backend == 'redis' then self.redis else self.memcached + self.memcached_exporter) + (if $._config.enable_load_generator then self.load_generator else {}) + (if $._config.enable_query_tee then self.query_tee else {}) +