From 06290a097cb62a25ac5ef7240c5515559b17517c Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 12 Mar 2020 10:10:37 -0400 Subject: [PATCH 1/6] [development] Add HA Prometheus lab deployment for dev M3 docker compose lab --- Makefile | 14 ++- docker/grafana/Dockerfile | 2 + docker/m3aggregator/Dockerfile | 2 +- docker/m3aggregator/development.Dockerfile | 10 ++ docker/m3collector/development.Dockerfile | 10 ++ docker/m3query/development.Dockerfile | 10 ++ .../grafana/m3aggregator_dashboard.json | 78 +++++++++------ .../m3aggregator_end_to_end_details.json | 26 +++++ integrations/grafana/m3db_dashboard.json | 30 +++--- .../development/m3_stack/docker-compose.yml | 39 +++++--- scripts/development/m3_stack/m3aggregator.yml | 58 ++++++++++++ ...nator.yml => m3coordinator-aggregator.yml} | 38 +++++++- .../m3_stack/m3coordinator-standard.yml | 94 +++++++++++++++++++ scripts/development/m3_stack/prometheus01.yml | 57 +++++++++++ .../{prometheus.yml => prometheus02.yml} | 25 +++-- scripts/development/m3_stack/start_m3.sh | 87 +++++++++++++---- .../aggregator/m3aggregator.yml | 58 ++++++++++++ src/aggregator/config/m3aggregator.yml | 58 ++++++++++++ 18 files changed, 606 insertions(+), 90 deletions(-) create mode 100644 docker/m3aggregator/development.Dockerfile create mode 100644 docker/m3collector/development.Dockerfile create mode 100644 docker/m3query/development.Dockerfile rename scripts/development/m3_stack/{m3coordinator.yml => m3coordinator-aggregator.yml} (57%) create mode 100644 scripts/development/m3_stack/m3coordinator-standard.yml create mode 100644 scripts/development/m3_stack/prometheus01.yml rename scripts/development/m3_stack/{prometheus.yml => prometheus02.yml} (66%) diff --git a/Makefile b/Makefile index b3ed88dd1b..ff724bb074 100644 --- a/Makefile +++ b/Makefile @@ -123,6 +123,14 @@ install-vendor-m3: install-vendor-m3-remove-bad-dep: ([ -d $(VENDOR)/$(bad_trace_dep) ] && rm -rf $(VENDOR)/$(bad_trace_dep)) || (echo "No bad trace dep" > /dev/null) +.PHONY: docker-dev-prep +docker-dev-prep: + mkdir -p ./bin/config + + # Hacky way to find all configs and put into ./bin/config/ + find ./src | fgrep config | fgrep ".yml" | xargs -I{} cp {} ./bin/config/ + find ./src | fgrep config | fgrep ".yaml" | xargs -I{} cp {} ./bin/config/ + define SERVICE_RULES .PHONY: $(SERVICE) @@ -141,11 +149,7 @@ $(SERVICE)-linux-amd64: .PHONY: $(SERVICE)-docker-dev $(SERVICE)-docker-dev: clean-build $(SERVICE)-linux-amd64 - mkdir -p ./bin/config - - # Hacky way to find all configs and put into ./bin/config/ - find ./src | fgrep config | fgrep ".yml" | xargs -I{} cp {} ./bin/config/ - find ./src | fgrep config | fgrep ".yaml" | xargs -I{} cp {} ./bin/config/ + make docker-dev-prep # Build development docker image docker build -t $(SERVICE):dev -t quay.io/m3dbtest/$(SERVICE):dev-$(USER) -f ./docker/$(SERVICE)/development.Dockerfile ./bin diff --git a/docker/grafana/Dockerfile b/docker/grafana/Dockerfile index 1eeb6de1f4..8ec094c156 100644 --- a/docker/grafana/Dockerfile +++ b/docker/grafana/Dockerfile @@ -8,6 +8,8 @@ COPY ./integrations/grafana/m3query_dashboard.json /tmp/grafana_dashboards/m3que COPY ./integrations/grafana/m3coordinator_dashboard.json /tmp/grafana_dashboards/m3coordinator_dashboard.json COPY ./integrations/grafana/m3db_dashboard.json /tmp/grafana_dashboards/m3db_dashboard.json COPY ./integrations/grafana/temporal_function_comparison.json /tmp/grafana_dashboards/temporal_function_comparison.json +COPY ./integrations/grafana/m3aggregator_dashboard.json /tmp/grafana_dashboards/m3aggregator_dashboard.json +COPY ./integrations/grafana/m3aggregator_end_to_end_details.json /tmp/grafana_dashboards/m3aggregator_end_to_end_details.json # Need to replace datasource template variable with name of actual data source so auto-import # JustWorksTM. Use a temporary directory to host the dashboards since the default diff --git a/docker/m3aggregator/Dockerfile b/docker/m3aggregator/Dockerfile index cb2480b066..56cdc1e4a0 100644 --- a/docker/m3aggregator/Dockerfile +++ b/docker/m3aggregator/Dockerfile @@ -18,7 +18,7 @@ RUN cd /go/src/github.com/m3db/m3/ && \ FROM alpine:latest LABEL maintainer="The M3DB Authors " -EXPOSE 5000/tcp 6000/tcp 60001/tcp 7203/tcp 9000-9004/tcp +EXPOSE 5000/tcp 6000/tcp 6001/tcp RUN apk add --no-cache curl jq diff --git a/docker/m3aggregator/development.Dockerfile b/docker/m3aggregator/development.Dockerfile new file mode 100644 index 0000000000..eb4a40906a --- /dev/null +++ b/docker/m3aggregator/development.Dockerfile @@ -0,0 +1,10 @@ +FROM alpine:latest +LABEL maintainer="The M3DB Authors " + +EXPOSE 5000/tcp 6000/tcp 6001/tcp + +ADD ./m3aggregator /bin/m3aggregator +ADD ./config/m3aggregator.yml /etc/m3aggregator/m3aggregator.yml + +ENTRYPOINT [ "/bin/m3aggregator" ] +CMD [ "-f", "/etc/m3aggregator/m3aggregator.yml" ] diff --git a/docker/m3collector/development.Dockerfile b/docker/m3collector/development.Dockerfile new file mode 100644 index 0000000000..e391da0b65 --- /dev/null +++ b/docker/m3collector/development.Dockerfile @@ -0,0 +1,10 @@ +FROM alpine:latest +LABEL maintainer="The M3DB Authors " + +EXPOSE 7206/tcp 7207/tcp + +ADD ./m3collector /bin/m3collector +ADD ./config/m3collector.yml /etc/m3collector/m3collector.yml + +ENTRYPOINT [ "/bin/m3collector" ] +CMD [ "-f", "/etc/m3collector/m3collector.yml" ] diff --git a/docker/m3query/development.Dockerfile b/docker/m3query/development.Dockerfile new file mode 100644 index 0000000000..cd32657222 --- /dev/null +++ b/docker/m3query/development.Dockerfile @@ -0,0 +1,10 @@ +FROM alpine:latest +LABEL maintainer="The M3DB Authors " + +EXPOSE 7201/tcp 7203/tcp + +ADD ./m3query /bin/m3query +ADD ./config/m3query-local-etcd.yml /etc/m3query/m3query.yml + +ENTRYPOINT [ "/bin/m3query" ] +CMD [ "-f", "/etc/m3query/m3query.yml" ] diff --git a/integrations/grafana/m3aggregator_dashboard.json b/integrations/grafana/m3aggregator_dashboard.json index 2f31344775..0e3b67b73a 100644 --- a/integrations/grafana/m3aggregator_dashboard.json +++ b/integrations/grafana/m3aggregator_dashboard.json @@ -1,4 +1,30 @@ { + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.2.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], "annotations": { "list": [ { @@ -15,13 +41,12 @@ "editable": true, "gnetId": null, "graphTooltip": 1, - "iteration": 1575560854098, "iteration": 1582905705847, "links": [], "panels": [ { "collapsed": false, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -952,7 +977,7 @@ }, { "collapsed": false, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -1319,7 +1344,7 @@ }, { "collapsed": false, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -2077,7 +2102,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -2648,7 +2673,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -3210,7 +3235,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -3586,7 +3611,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -4142,7 +4167,7 @@ }, { "collapsed": false, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -5160,7 +5185,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -5736,7 +5761,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -6386,7 +6411,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -7451,7 +7476,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -8771,7 +8796,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -9237,7 +9262,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -9433,7 +9458,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -9993,7 +10018,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -10543,7 +10568,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -10829,7 +10854,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -10952,7 +10977,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -11058,7 +11083,7 @@ }, { "collapsed": true, - "datasource": null, + "datasource": "$datasource", "gridPos": { "h": 1, "w": 24, @@ -11697,14 +11722,11 @@ "list": [ { "current": { - "tags": [], - "text": "M3DB", - "value": "M3DB" + "text": "M3Query - Prometheus", + "value": "M3Query - Prometheus" }, "hide": 0, - "includeAll": false, "label": null, - "multi": false, "name": "datasource", "options": [], "query": "prometheus", @@ -11895,7 +11917,7 @@ "$__all" ] }, - "datasource": "M3DB", + "datasource": "$datasource", "definition": "m3aggregator_aggregator_flush_handler_placement_update{backend=\"m3msg\"}", "hide": 0, "includeAll": true, @@ -11925,7 +11947,7 @@ "$__all" ] }, - "datasource": "M3DB", + "datasource": "$datasource", "definition": "m3aggregator_aggregator_flush_handler_placement_update{backend=\"m3msg\"}", "hide": 0, "includeAll": true, diff --git a/integrations/grafana/m3aggregator_end_to_end_details.json b/integrations/grafana/m3aggregator_end_to_end_details.json index 4c0df4f9ee..e82c05c290 100644 --- a/integrations/grafana/m3aggregator_end_to_end_details.json +++ b/integrations/grafana/m3aggregator_end_to_end_details.json @@ -1,4 +1,30 @@ { + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.2.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], "annotations": { "list": [ { diff --git a/integrations/grafana/m3db_dashboard.json b/integrations/grafana/m3db_dashboard.json index c9ee4a025d..8144fa1a8c 100644 --- a/integrations/grafana/m3db_dashboard.json +++ b/integrations/grafana/m3db_dashboard.json @@ -5820,6 +5820,21 @@ "tags": ["disable-sync"], "templating": { "list": [ + { + "current": { + "text": "M3Query - Prometheus", + "value": "M3Query - Prometheus" + }, + "hide": 0, + "label": null, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, { "allFormat": "glob", "allValue": null, @@ -5981,21 +5996,6 @@ "tagsQuery": "", "type": "query", "useTags": false - }, - { - "current": { - "text": "M3Query - Prometheus", - "value": "M3Query - Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "type": "datasource" } ] }, diff --git a/scripts/development/m3_stack/docker-compose.yml b/scripts/development/m3_stack/docker-compose.yml index d0f8de35f0..43d13f2330 100644 --- a/scripts/development/m3_stack/docker-compose.yml +++ b/scripts/development/m3_stack/docker-compose.yml @@ -4,8 +4,8 @@ services: networks: - backend build: - context: ../../../ - dockerfile: ./docker/m3dbnode/Dockerfile + context: ../../../bin + dockerfile: ./docker/m3dbnode/development.Dockerfile image: m3dbnode01:latest volumes: - "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml" @@ -24,8 +24,8 @@ services: networks: - backend build: - context: ../../../ - dockerfile: ./docker/m3dbnode/Dockerfile + context: ../../../bin + dockerfile: ./docker/m3dbnode/development.Dockerfile image: m3dbnode02:latest volumes: - "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml" @@ -35,8 +35,8 @@ services: networks: - backend build: - context: ../../../ - dockerfile: ./docker/m3dbnode/Dockerfile + context: ../../../bin + dockerfile: ./docker/m3dbnode/development.Dockerfile image: m3dbnode03:latest volumes: - "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml" @@ -50,8 +50,8 @@ services: networks: - backend build: - context: ../../../ - dockerfile: ./docker/m3aggregator/Dockerfile + context: ../../../bin + dockerfile: ./docker/m3aggregator/development.Dockerfile image: m3aggregator01:latest volumes: - "./m3aggregator.yml:/etc/m3aggregator/m3aggregator.yml" @@ -71,11 +71,12 @@ services: networks: - backend build: - context: ../../../ - dockerfile: ./docker/m3coordinator/Dockerfile + context: ../../../bin + dockerfile: ./docker/m3coordinator/development.Dockerfile image: m3coordinator01:latest volumes: - - "./m3coordinator.yml:/etc/m3coordinator/m3coordinator.yml" + # Use a path in the bin directory (gitignored) to easily change configs + - "../../../bin/m3coordinator.yml:/etc/m3coordinator/m3coordinator.yml" - "./schema.proto:/etc/m3coordinator/schema.proto" m3collector01: expose: @@ -87,8 +88,8 @@ services: networks: - backend build: - context: ../../../ - dockerfile: ./docker/m3collector/Dockerfile + context: ../../../bin + dockerfile: ./docker/m3collector/development.Dockerfile image: m3collector01:latest volumes: - "./m3collector.yml:/etc/m3collector/m3collector.yml" @@ -101,7 +102,17 @@ services: - backend image: prom/prometheus:latest volumes: - - "./:/etc/prometheus/" + - "./prometheus01.yml:/etc/prometheus/prometheus.yml" + prometheus02: + expose: + - "9091" + ports: + - "0.0.0.0:9091:9090" + networks: + - backend + image: prom/prometheus:latest + volumes: + - "./prometheus02.yml:/etc/prometheus/prometheus.yml" grafana: build: context: ../../../ diff --git a/scripts/development/m3_stack/m3aggregator.yml b/scripts/development/m3_stack/m3aggregator.yml index 92105e8db5..46ffd43f85 100644 --- a/scripts/development/m3_stack/m3aggregator.yml +++ b/scripts/development/m3_stack/m3aggregator.yml @@ -7,6 +7,64 @@ metrics: prometheus: onError: none handlerPath: /metrics + listenAddress: 0.0.0.0:6002 + timerType: histogram + defaultHistogramBuckets: + - upper: 0.002 + - upper: 0.004 + - upper: 0.006 + - upper: 0.008 + - upper: 0.01 + - upper: 0.02 + - upper: 0.04 + - upper: 0.06 + - upper: 0.08 + - upper: 0.1 + - upper: 0.2 + - upper: 0.4 + - upper: 0.6 + - upper: 0.8 + - upper: 1 + - upper: 1.5 + - upper: 2 + - upper: 2.5 + - upper: 3 + - upper: 3.5 + - upper: 4 + - upper: 4.5 + - upper: 5 + - upper: 5.5 + - upper: 6 + - upper: 6.5 + - upper: 7 + - upper: 7.5 + - upper: 8 + - upper: 8.5 + - upper: 9 + - upper: 9.5 + - upper: 10 + - upper: 15 + - upper: 20 + - upper: 25 + - upper: 30 + - upper: 35 + - upper: 40 + - upper: 45 + - upper: 50 + - upper: 55 + - upper: 60 + - upper: 300 + - upper: 600 + - upper: 900 + - upper: 1200 + - upper: 1500 + - upper: 1800 + - upper: 2100 + - upper: 2400 + - upper: 2700 + - upper: 3000 + - upper: 3300 + - upper: 3600 sanitization: prometheus samplingRate: 1.0 extended: none diff --git a/scripts/development/m3_stack/m3coordinator.yml b/scripts/development/m3_stack/m3coordinator-aggregator.yml similarity index 57% rename from scripts/development/m3_stack/m3coordinator.yml rename to scripts/development/m3_stack/m3coordinator-aggregator.yml index 4c7108c4e4..f71de22380 100644 --- a/scripts/development/m3_stack/m3coordinator.yml +++ b/scripts/development/m3_stack/m3coordinator-aggregator.yml @@ -19,10 +19,10 @@ clusters: - namespace: metrics_0_30m type: unaggregated retention: 30m - - namespace: metrics_10s_48h + - namespace: metrics_30s_24h type: aggregated - retention: 48h - resolution: 10s + retention: 24h + resolution: 30s client: config: service: @@ -34,8 +34,36 @@ clusters: - zone: embedded endpoints: - m3db_seed:2379 - # proto: - # schemaFilePath: /etc/m3coordinator/schema.proto + +downsample: + remoteAggregator: + client: + placementKV: + namespace: /placement + environment: default_env + placementWatcher: + key: m3aggregator + initWatchTimeout: 10s + hashType: murmur32 + shardCutoffLingerDuration: 1m + flushSize: 1440 + maxTimerBatchSize: 1120 + queueSize: 10000 + queueDropType: oldest + encoder: + initBufferSize: 2048 + maxMessageSize: 10485760 + bytesPool: + buckets: + - capacity: 2048 + count: 4096 + - capacity: 4096 + count: 4096 + watermark: + low: 0.7 + high: 1.0 + connection: + writeTimeout: 250ms ingest: ingester: diff --git a/scripts/development/m3_stack/m3coordinator-standard.yml b/scripts/development/m3_stack/m3coordinator-standard.yml new file mode 100644 index 0000000000..0e9a47394a --- /dev/null +++ b/scripts/development/m3_stack/m3coordinator-standard.yml @@ -0,0 +1,94 @@ +listenAddress: + value: "0.0.0.0:7201" + +logging: + level: info + +metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + +clusters: + - namespaces: + - namespace: metrics_0_30m + type: unaggregated + retention: 30m + - namespace: metrics_30s_24h + type: aggregated + retention: 24h + resolution: 30s + client: + config: + service: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - m3db_seed:2379 + # Uncomment for proto use + # -- + # proto: + # schemaFilePath: /etc/m3coordinator/schema.proto + +# Uncomment for remote aggregator use +# -- +# downsample: +# remoteAggregator: +# client: +# placementKV: +# namespace: /placement +# environment: override_test_env +# placementWatcher: +# key: m3aggregator +# initWatchTimeout: 10s +# hashType: murmur32 +# shardCutoffLingerDuration: 1m +# flushSize: 1440 +# maxTimerBatchSize: 1120 +# queueSize: 10000 +# queueDropType: oldest +# encoder: +# initBufferSize: 2048 +# maxMessageSize: 10485760 +# bytesPool: +# buckets: +# - capacity: 2048 +# count: 4096 +# - capacity: 4096 +# count: 4096 +# watermark: +# low: 0.7 +# high: 1.0 +# connection: +# writeTimeout: 250ms +# ingest: +# ingester: +# workerPoolSize: 10000 +# opPool: +# size: 10000 +# retry: +# maxRetries: 3 +# jitter: true +# logSampleRate: 0.01 +# m3msg: +# server: +# listenAddress: "0.0.0.0:7507" +# retry: +# maxBackoff: 10s +# jitter: true + +carbon: + ingester: + listenAddress: "0.0.0.0:7204" + +tagOptions: + idScheme: quoted diff --git a/scripts/development/m3_stack/prometheus01.yml b/scripts/development/m3_stack/prometheus01.yml new file mode 100644 index 0000000000..35dcae03a1 --- /dev/null +++ b/scripts/development/m3_stack/prometheus01.yml @@ -0,0 +1,57 @@ +global: + # Don't apply external labels for + # external_labels: + # role: "remote" + scrape_interval: 10s + evaluation_interval: 10s + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'prometheus01' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['prometheus01:9090'] + + - job_name: 'coordinator' + static_configs: + - targets: ['m3coordinator01:7203'] + + - job_name: 'dbnode' + static_configs: + - targets: ['m3db_seed:9004', 'm3db_data01:9004', 'm3db_data02:9004'] + + - job_name: 'aggregator' + static_configs: + - targets: ['m3aggregator01:6002'] + +remote_read: + - url: http://m3coordinator01:7201/api/v1/prom/remote/read + +remote_write: + - url: http://m3coordinator01:7201/api/v1/prom/remote/write + remote_timeout: 30s + queue_config: + capacity: 10000 + max_shards: 10 + min_shards: 3 + max_samples_per_send: 5000 + batch_send_deadline: 1m + min_backoff: 50ms + max_backoff: 1s diff --git a/scripts/development/m3_stack/prometheus.yml b/scripts/development/m3_stack/prometheus02.yml similarity index 66% rename from scripts/development/m3_stack/prometheus.yml rename to scripts/development/m3_stack/prometheus02.yml index fed59356ad..335a102510 100644 --- a/scripts/development/m3_stack/prometheus.yml +++ b/scripts/development/m3_stack/prometheus02.yml @@ -1,8 +1,8 @@ global: - external_labels: - role: "remote" - scrape_interval: 15s - evaluation_interval: 15s + # external_labels: + # role: "remote_replica2" + scrape_interval: 10s + evaluation_interval: 10s # Alertmanager configuration alerting: @@ -20,13 +20,13 @@ rule_files: # Here it's Prometheus itself. scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'prometheus' + - job_name: 'prometheus01' # metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - - targets: ['localhost:9090'] + - targets: ['prometheus01:9090'] - job_name: 'coordinator' static_configs: @@ -36,8 +36,21 @@ scrape_configs: static_configs: - targets: ['m3db_seed:9004', 'm3db_data01:9004', 'm3db_data02:9004'] + - job_name: 'aggregator' + static_configs: + - targets: ['m3aggregator01:6002'] + remote_read: - url: http://m3coordinator01:7201/api/v1/prom/remote/read remote_write: - url: http://m3coordinator01:7201/api/v1/prom/remote/write + remote_timeout: 30s + queue_config: + capacity: 10000 + max_shards: 10 + min_shards: 3 + max_samples_per_send: 5000 + batch_send_deadline: 1m + min_backoff: 50ms + max_backoff: 1s diff --git a/scripts/development/m3_stack/start_m3.sh b/scripts/development/m3_stack/start_m3.sh index d2170ea956..cc529716d8 100755 --- a/scripts/development/m3_stack/start_m3.sh +++ b/scripts/development/m3_stack/start_m3.sh @@ -7,10 +7,11 @@ source "$(pwd)/../../docker-integration-tests/common.sh" # Locally don't care if we hot loop faster export MAX_TIMEOUT=4 +RELATIVE="./../../.." +prepare_build_cmd() { + build_cmd="cd $RELATIVE && make clean-build docker-dev-prep && cp -r ./docker ./bin/ && $1" +} DOCKER_ARGS="-d --renew-anon-volumes" -if [[ "$FORCE_BUILD" = true ]] ; then - DOCKER_ARGS="--build -d --renew-anon-volumes" -fi echo "Bringing up nodes in the background with docker compose, remember to run ./stop.sh when done" @@ -26,10 +27,28 @@ if [[ "$USE_JAEGER" = true ]] ; then fi fi -docker-compose -f docker-compose.yml up $DOCKER_ARGS m3coordinator01 -docker-compose -f docker-compose.yml up $DOCKER_ARGS m3db_seed -docker-compose -f docker-compose.yml up $DOCKER_ARGS prometheus01 -docker-compose -f docker-compose.yml up $DOCKER_ARGS grafana +# Use standard coordinator config +cp ./m3coordinator-standard.yml ${RELATIVE}/bin/m3coordinator.yml + +if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COORDINATOR" == true ]]; then + prepare_build_cmd "make m3coordinator-linux-amd64" + echo "Building m3coordinator binary first" + bash -c "$build_cmd" + + docker-compose -f docker-compose.yml up --build $DOCKER_ARGS m3coordinator01 +else + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3coordinator01 +fi + +if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3DBNODE" == true ]]; then + prepare_build_cmd "make m3dbnode-linux-amd64" + echo "Building m3dbnode binary first" + bash -c "$build_cmd" + + docker-compose -f docker-compose.yml up --build $DOCKER_ARGS m3db_seed +else + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3db_seed +fi if [[ "$MULTI_DB_NODE" = true ]] ; then echo "Running multi node" @@ -43,7 +62,7 @@ echo "Wait for coordinator API to be up" ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \ 'curl -vvvsSf localhost:7201/health' -if [[ "$AGGREGATOR_PIPELINE" = true ]]; then +if [[ "$USE_AGGREGATOR" = true ]]; then echo "Running aggregator pipeline" curl -vvvsSf -X POST localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ "num_shards": 64, @@ -66,8 +85,25 @@ if [[ "$AGGREGATOR_PIPELINE" = true ]]; then "numberOfShards": 64 }' - docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator01 - docker-compose -f docker-compose.yml up $DOCKER_ARGS m3collector01 + if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3AGGREGATOR" == true ]]; then + prepare_build_cmd "make m3aggregator-linux-amd64" + echo "Building m3aggregator binary first" + bash -c "$build_cmd" + + docker-compose -f docker-compose.yml up --build $DOCKER_ARGS m3aggregator01 + else + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator01 + fi + + if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COLLECTOR" == true ]]; then + prepare_build_cmd "make m3collector-linux-amd64" + echo "Building m3collector binary first" + bash -c "$build_cmd" + + docker-compose -f docker-compose.yml up --build $DOCKER_ARGS m3collector01 + else + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3collector01 + fi else echo "Not running aggregator pipeline" fi @@ -97,7 +133,7 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ } }' curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ - "name": "metrics_10s_48h", + "name": "metrics_30s_24h", "options": { "bootstrapEnabled": true, "flushEnabled": true, @@ -106,8 +142,8 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ "snapshotEnabled": true, "repairEnabled": false, "retentionOptions": { - "retentionPeriodDuration": "48h", - "blockSizeDuration": "4h", + "retentionPeriodDuration": "24h", + "blockSizeDuration": "2h", "bufferFutureDuration": "10m", "bufferPastDuration": "10m", "blockDataExpiry": true, @@ -115,7 +151,7 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ }, "indexOptions": { "enabled": true, - "blockSizeDuration": "4h" + "blockSizeDuration": "2h" } } }' @@ -123,7 +159,7 @@ echo "Done initializing namespaces" echo "Validating namespace" [ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.metrics_0_30m.indexOptions.enabled)" == true ] -[ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.metrics_10s_48h.indexOptions.enabled)" == true ] +[ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.metrics_30s_24h.indexOptions.enabled)" == true ] echo "Done validating namespace" echo "Initializing topology" @@ -187,7 +223,7 @@ echo "Waiting until shards are marked as available" ATTEMPTS=100 TIMEOUT=2 retry_with_backoff \ '[ "$(curl -sSf 0.0.0.0:7201/api/v1/placement | grep -c INITIALIZING)" -eq 0 ]' -if [[ "$AGGREGATOR_PIPELINE" = true ]]; then +if [[ "$USE_AGGREGATOR" = true ]]; then echo "Initializing M3Coordinator topology" curl -vvvsSf -X POST localhost:7201/api/v1/services/m3coordinator/placement/init -d '{ "instances": [ @@ -220,14 +256,33 @@ if [[ "$AGGREGATOR_PIPELINE" = true ]]; then } }' # msgs will be discarded after 600000000000ns = 10mins + # Restart with aggregator coordinator config + docker-compose -f docker-compose.yml stop m3coordinator01 + cp ./m3coordinator-aggregator.yml ${RELATIVE}/bin/m3coordinator.yml + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3coordinator01 + # May not necessarily flush echo "Sending unaggregated metric to m3collector" curl http://localhost:7206/api/v1/json/report -X POST -d '{"metrics":[{"type":"gauge","value":42,"tags":{"__name__":"foo_metric","foo":"bar"}}]}' fi +echo "Starting Prometheus" +docker-compose -f docker-compose.yml up $DOCKER_ARGS prometheus01 + +if [[ "$USE_PROMETHEUS_HA" = true ]] ; then + echo "Starting Prometheus HA replica" + docker-compose -f docker-compose.yml up $DOCKER_ARGS prometheus02 +fi + +echo "Starting Grafana" +docker-compose -f docker-compose.yml up $DOCKER_ARGS grafana + if [[ "$USE_JAEGER" = true ]] ; then echo "Jaeger UI available at localhost:16686" fi echo "Prometheus available at localhost:9090" +if [[ "$USE_PROMETHEUS_HA" = true ]] ; then + echo "Prometheus HA replica available at localhost:9091" +fi echo "Grafana available at localhost:3000" echo "Run ./stop.sh to shutdown nodes when done" diff --git a/scripts/docker-integration-tests/aggregator/m3aggregator.yml b/scripts/docker-integration-tests/aggregator/m3aggregator.yml index 58d0370d4e..41cc0e813a 100644 --- a/scripts/docker-integration-tests/aggregator/m3aggregator.yml +++ b/scripts/docker-integration-tests/aggregator/m3aggregator.yml @@ -7,6 +7,64 @@ metrics: prometheus: onError: none handlerPath: /metrics + listenAddress: 0.0.0.0:6002 + timerType: histogram + defaultHistogramBuckets: + - upper: 0.002 + - upper: 0.004 + - upper: 0.006 + - upper: 0.008 + - upper: 0.01 + - upper: 0.02 + - upper: 0.04 + - upper: 0.06 + - upper: 0.08 + - upper: 0.1 + - upper: 0.2 + - upper: 0.4 + - upper: 0.6 + - upper: 0.8 + - upper: 1 + - upper: 1.5 + - upper: 2 + - upper: 2.5 + - upper: 3 + - upper: 3.5 + - upper: 4 + - upper: 4.5 + - upper: 5 + - upper: 5.5 + - upper: 6 + - upper: 6.5 + - upper: 7 + - upper: 7.5 + - upper: 8 + - upper: 8.5 + - upper: 9 + - upper: 9.5 + - upper: 10 + - upper: 15 + - upper: 20 + - upper: 25 + - upper: 30 + - upper: 35 + - upper: 40 + - upper: 45 + - upper: 50 + - upper: 55 + - upper: 60 + - upper: 300 + - upper: 600 + - upper: 900 + - upper: 1200 + - upper: 1500 + - upper: 1800 + - upper: 2100 + - upper: 2400 + - upper: 2700 + - upper: 3000 + - upper: 3300 + - upper: 3600 sanitization: prometheus samplingRate: 1.0 extended: none diff --git a/src/aggregator/config/m3aggregator.yml b/src/aggregator/config/m3aggregator.yml index 92105e8db5..46ffd43f85 100644 --- a/src/aggregator/config/m3aggregator.yml +++ b/src/aggregator/config/m3aggregator.yml @@ -7,6 +7,64 @@ metrics: prometheus: onError: none handlerPath: /metrics + listenAddress: 0.0.0.0:6002 + timerType: histogram + defaultHistogramBuckets: + - upper: 0.002 + - upper: 0.004 + - upper: 0.006 + - upper: 0.008 + - upper: 0.01 + - upper: 0.02 + - upper: 0.04 + - upper: 0.06 + - upper: 0.08 + - upper: 0.1 + - upper: 0.2 + - upper: 0.4 + - upper: 0.6 + - upper: 0.8 + - upper: 1 + - upper: 1.5 + - upper: 2 + - upper: 2.5 + - upper: 3 + - upper: 3.5 + - upper: 4 + - upper: 4.5 + - upper: 5 + - upper: 5.5 + - upper: 6 + - upper: 6.5 + - upper: 7 + - upper: 7.5 + - upper: 8 + - upper: 8.5 + - upper: 9 + - upper: 9.5 + - upper: 10 + - upper: 15 + - upper: 20 + - upper: 25 + - upper: 30 + - upper: 35 + - upper: 40 + - upper: 45 + - upper: 50 + - upper: 55 + - upper: 60 + - upper: 300 + - upper: 600 + - upper: 900 + - upper: 1200 + - upper: 1500 + - upper: 1800 + - upper: 2100 + - upper: 2400 + - upper: 2700 + - upper: 3000 + - upper: 3300 + - upper: 3600 sanitization: prometheus samplingRate: 1.0 extended: none From 7bb64d43b1d90fcf8e14d84d5f413d04651657bb Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 12 Mar 2020 12:52:03 -0400 Subject: [PATCH 2/6] Address feedback --- .../development/m3_stack/docker-compose.yml | 27 ++++++++++--- scripts/development/m3_stack/prometheus01.yml | 6 +-- scripts/development/m3_stack/prometheus02.yml | 5 ++- scripts/development/m3_stack/start_m3.sh | 39 ++++++++++++------- 4 files changed, 51 insertions(+), 26 deletions(-) diff --git a/scripts/development/m3_stack/docker-compose.yml b/scripts/development/m3_stack/docker-compose.yml index 43d13f2330..323b2a06ad 100644 --- a/scripts/development/m3_stack/docker-compose.yml +++ b/scripts/development/m3_stack/docker-compose.yml @@ -6,7 +6,7 @@ services: build: context: ../../../bin dockerfile: ./docker/m3dbnode/development.Dockerfile - image: m3dbnode01:latest + image: m3dbnode:dev volumes: - "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml" - "./schema.proto:/etc/m3dbnode/schema.proto" @@ -26,7 +26,7 @@ services: build: context: ../../../bin dockerfile: ./docker/m3dbnode/development.Dockerfile - image: m3dbnode02:latest + image: m3dbnode:dev volumes: - "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml" environment: @@ -37,7 +37,7 @@ services: build: context: ../../../bin dockerfile: ./docker/m3dbnode/development.Dockerfile - image: m3dbnode03:latest + image: m3dbnode:dev volumes: - "./m3dbnode.yml:/etc/m3dbnode/m3dbnode.yml" environment: @@ -52,11 +52,26 @@ services: build: context: ../../../bin dockerfile: ./docker/m3aggregator/development.Dockerfile - image: m3aggregator01:latest + image: m3aggregator:dev volumes: - "./m3aggregator.yml:/etc/m3aggregator/m3aggregator.yml" environment: - M3AGGREGATOR_HOST_ID=m3aggregator01 + m3aggregator02: + expose: + - "6001" + ports: + - "0.0.0.0:6001:6001" + networks: + - backend + build: + context: ../../../bin + dockerfile: ./docker/m3aggregator/development.Dockerfile + image: m3aggregator:dev + volumes: + - "./m3aggregator.yml:/etc/m3aggregator/m3aggregator.yml" + environment: + - M3AGGREGATOR_HOST_ID=m3aggregator02 m3coordinator01: expose: - "7201" @@ -73,7 +88,7 @@ services: build: context: ../../../bin dockerfile: ./docker/m3coordinator/development.Dockerfile - image: m3coordinator01:latest + image: m3coordinator:dev volumes: # Use a path in the bin directory (gitignored) to easily change configs - "../../../bin/m3coordinator.yml:/etc/m3coordinator/m3coordinator.yml" @@ -90,7 +105,7 @@ services: build: context: ../../../bin dockerfile: ./docker/m3collector/development.Dockerfile - image: m3collector01:latest + image: m3collector:dev volumes: - "./m3collector.yml:/etc/m3collector/m3collector.yml" prometheus01: diff --git a/scripts/development/m3_stack/prometheus01.yml b/scripts/development/m3_stack/prometheus01.yml index 35dcae03a1..432c7f3a9b 100644 --- a/scripts/development/m3_stack/prometheus01.yml +++ b/scripts/development/m3_stack/prometheus01.yml @@ -1,7 +1,6 @@ global: - # Don't apply external labels for - # external_labels: - # role: "remote" + external_labels: + role: "remote" scrape_interval: 10s evaluation_interval: 10s @@ -43,6 +42,7 @@ scrape_configs: remote_read: - url: http://m3coordinator01:7201/api/v1/prom/remote/read + read_recent: true remote_write: - url: http://m3coordinator01:7201/api/v1/prom/remote/write diff --git a/scripts/development/m3_stack/prometheus02.yml b/scripts/development/m3_stack/prometheus02.yml index 335a102510..432c7f3a9b 100644 --- a/scripts/development/m3_stack/prometheus02.yml +++ b/scripts/development/m3_stack/prometheus02.yml @@ -1,6 +1,6 @@ global: - # external_labels: - # role: "remote_replica2" + external_labels: + role: "remote" scrape_interval: 10s evaluation_interval: 10s @@ -42,6 +42,7 @@ scrape_configs: remote_read: - url: http://m3coordinator01:7201/api/v1/prom/remote/read + read_recent: true remote_write: - url: http://m3coordinator01:7201/api/v1/prom/remote/write diff --git a/scripts/development/m3_stack/start_m3.sh b/scripts/development/m3_stack/start_m3.sh index cc529716d8..4df9373d53 100755 --- a/scripts/development/m3_stack/start_m3.sh +++ b/scripts/development/m3_stack/start_m3.sh @@ -27,20 +27,12 @@ if [[ "$USE_JAEGER" = true ]] ; then fi fi -# Use standard coordinator config -cp ./m3coordinator-standard.yml ${RELATIVE}/bin/m3coordinator.yml - -if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COORDINATOR" == true ]]; then - prepare_build_cmd "make m3coordinator-linux-amd64" - echo "Building m3coordinator binary first" - bash -c "$build_cmd" +M3DBNODE_DEV_IMG=$(docker images m3dbnode:dev | fgrep -iv repository | wc -l | xargs) +M3COORDINATOR_DEV_IMG=$(docker images m3coordinator:dev | fgrep -iv repository | wc -l | xargs) +M3AGGREGATOR_DEV_IMG=$(docker images m3aggregator:dev | fgrep -iv repository | wc -l | xargs) +M3COLLECTOR_DEV_IMG=$(docker images m3collector:dev | fgrep -iv repository | wc -l | xargs) - docker-compose -f docker-compose.yml up --build $DOCKER_ARGS m3coordinator01 -else - docker-compose -f docker-compose.yml up $DOCKER_ARGS m3coordinator01 -fi - -if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3DBNODE" == true ]]; then +if [[ "$M3DBNODE_DEV_IMG" == "0" ]] || [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3DBNODE" == true ]]; then prepare_build_cmd "make m3dbnode-linux-amd64" echo "Building m3dbnode binary first" bash -c "$build_cmd" @@ -50,6 +42,7 @@ else docker-compose -f docker-compose.yml up $DOCKER_ARGS m3db_seed fi +# Bring up any other replicas if [[ "$MULTI_DB_NODE" = true ]] ; then echo "Running multi node" docker-compose -f docker-compose.yml up $DOCKER_ARGS m3db_data01 @@ -58,6 +51,19 @@ else echo "Running single node" fi +# Use standard coordinator config when bringing up coordinator first time +cp ./m3coordinator-standard.yml ${RELATIVE}/bin/m3coordinator.yml + +if [[ "$M3COORDINATOR_DEV_IMG" == "0" ]] || [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COORDINATOR" == true ]]; then + prepare_build_cmd "make m3coordinator-linux-amd64" + echo "Building m3coordinator binary first" + bash -c "$build_cmd" + + docker-compose -f docker-compose.yml up --build $DOCKER_ARGS m3coordinator01 +else + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3coordinator01 +fi + echo "Wait for coordinator API to be up" ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \ 'curl -vvvsSf localhost:7201/health' @@ -85,7 +91,7 @@ if [[ "$USE_AGGREGATOR" = true ]]; then "numberOfShards": 64 }' - if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3AGGREGATOR" == true ]]; then + if [[ "$M3AGGREGATOR_DEV_IMG" == "0" ]] || [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3AGGREGATOR" == true ]]; then prepare_build_cmd "make m3aggregator-linux-amd64" echo "Building m3aggregator binary first" bash -c "$build_cmd" @@ -95,7 +101,10 @@ if [[ "$USE_AGGREGATOR" = true ]]; then docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator01 fi - if [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COLLECTOR" == true ]]; then + # Bring up the second replica + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator02 + + if [[ "$M3COLLECTOR_DEV_IMG" == "0" ]] || [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COLLECTOR" == true ]]; then prepare_build_cmd "make m3collector-linux-amd64" echo "Building m3collector binary first" bash -c "$build_cmd" From ca210fe62c618a8068315f2a0e3f234f2c136eca Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 12 Mar 2020 13:06:06 -0400 Subject: [PATCH 3/6] Update readme and use single prometheus config, add ability to add second aggregator --- scripts/development/m3_stack/README.md | 8 ++- .../development/m3_stack/docker-compose.yml | 8 +-- .../{prometheus01.yml => prometheus.yml} | 1 + scripts/development/m3_stack/prometheus02.yml | 57 ------------------- scripts/development/m3_stack/start_m3.sh | 4 +- 5 files changed, 14 insertions(+), 64 deletions(-) rename scripts/development/m3_stack/{prometheus01.yml => prometheus.yml} (97%) delete mode 100644 scripts/development/m3_stack/prometheus02.yml diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md index 10b5dcfa02..1a074f65e0 100644 --- a/scripts/development/m3_stack/README.md +++ b/scripts/development/m3_stack/README.md @@ -2,11 +2,17 @@ This docker-compose file will setup the following environment: -1. 3 M3DB nodes with a single node acting as an ETCD seed +1. 1 M3DB nodes with a single node acting as an ETCD seed 2. 1 M3Coordinator node 3. 1 Grafana node (with a pre-configured Prometheus source) 4. 1 Prometheus node that scrapes the M3DB/M3Coordinator nodes and writes the metrics to M3Coordinator +The environment variables that let's you configure this setup are: +- `USE_MULTI_DB_NODES=true`: uses 3 database nodes instead of 1 for cluster. +- `USE_JAEGER=true`: look at traces emitted by M3 services. +- `USE_PROMETHEUS_HA=true`: send data to M3 from two HA Prometheus instances to replicate deployments of HA Prometheus sending data to M3. +- `USE_AGGREGATOR=true`: use dedicated aggregators to aggregate metrics. + ## Usage Use the `start_m3.sh` and `stop_m3.sh` scripts. diff --git a/scripts/development/m3_stack/docker-compose.yml b/scripts/development/m3_stack/docker-compose.yml index 323b2a06ad..2b8d46800e 100644 --- a/scripts/development/m3_stack/docker-compose.yml +++ b/scripts/development/m3_stack/docker-compose.yml @@ -59,9 +59,9 @@ services: - M3AGGREGATOR_HOST_ID=m3aggregator01 m3aggregator02: expose: - - "6001" + - "6002" ports: - - "0.0.0.0:6001:6001" + - "0.0.0.0:6002:6001" networks: - backend build: @@ -117,7 +117,7 @@ services: - backend image: prom/prometheus:latest volumes: - - "./prometheus01.yml:/etc/prometheus/prometheus.yml" + - "./prometheus.yml:/etc/prometheus/prometheus.yml" prometheus02: expose: - "9091" @@ -127,7 +127,7 @@ services: - backend image: prom/prometheus:latest volumes: - - "./prometheus02.yml:/etc/prometheus/prometheus.yml" + - "./prometheus.yml:/etc/prometheus/prometheus.yml" grafana: build: context: ../../../ diff --git a/scripts/development/m3_stack/prometheus01.yml b/scripts/development/m3_stack/prometheus.yml similarity index 97% rename from scripts/development/m3_stack/prometheus01.yml rename to scripts/development/m3_stack/prometheus.yml index 432c7f3a9b..f068406215 100644 --- a/scripts/development/m3_stack/prometheus01.yml +++ b/scripts/development/m3_stack/prometheus.yml @@ -39,6 +39,7 @@ scrape_configs: - job_name: 'aggregator' static_configs: - targets: ['m3aggregator01:6002'] + - targets: ['m3aggregator02:6002'] remote_read: - url: http://m3coordinator01:7201/api/v1/prom/remote/read diff --git a/scripts/development/m3_stack/prometheus02.yml b/scripts/development/m3_stack/prometheus02.yml deleted file mode 100644 index 432c7f3a9b..0000000000 --- a/scripts/development/m3_stack/prometheus02.yml +++ /dev/null @@ -1,57 +0,0 @@ -global: - external_labels: - role: "remote" - scrape_interval: 10s - evaluation_interval: 10s - -# Alertmanager configuration -alerting: - alertmanagers: - - static_configs: - - targets: - # - alertmanager:9093 - -# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. -rule_files: - # - "first_rules.yml" - # - "second_rules.yml" - -# A scrape configuration containing exactly one endpoint to scrape: -# Here it's Prometheus itself. -scrape_configs: - # The job name is added as a label `job=` to any timeseries scraped from this config. - - job_name: 'prometheus01' - - # metrics_path defaults to '/metrics' - # scheme defaults to 'http'. - - static_configs: - - targets: ['prometheus01:9090'] - - - job_name: 'coordinator' - static_configs: - - targets: ['m3coordinator01:7203'] - - - job_name: 'dbnode' - static_configs: - - targets: ['m3db_seed:9004', 'm3db_data01:9004', 'm3db_data02:9004'] - - - job_name: 'aggregator' - static_configs: - - targets: ['m3aggregator01:6002'] - -remote_read: - - url: http://m3coordinator01:7201/api/v1/prom/remote/read - read_recent: true - -remote_write: - - url: http://m3coordinator01:7201/api/v1/prom/remote/write - remote_timeout: 30s - queue_config: - capacity: 10000 - max_shards: 10 - min_shards: 3 - max_samples_per_send: 5000 - batch_send_deadline: 1m - min_backoff: 50ms - max_backoff: 1s diff --git a/scripts/development/m3_stack/start_m3.sh b/scripts/development/m3_stack/start_m3.sh index 4df9373d53..0f33113480 100755 --- a/scripts/development/m3_stack/start_m3.sh +++ b/scripts/development/m3_stack/start_m3.sh @@ -43,7 +43,7 @@ else fi # Bring up any other replicas -if [[ "$MULTI_DB_NODE" = true ]] ; then +if [[ "$USE_MULTI_DB_NODES" = true ]] ; then echo "Running multi node" docker-compose -f docker-compose.yml up $DOCKER_ARGS m3db_data01 docker-compose -f docker-compose.yml up $DOCKER_ARGS m3db_data02 @@ -172,7 +172,7 @@ echo "Validating namespace" echo "Done validating namespace" echo "Initializing topology" -if [[ "$MULTI_DB_NODE" = true ]] ; then +if [[ "$USE_MULTI_DB_NODES" = true ]] ; then curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ "num_shards": 64, "replication_factor": 3, From 3fd216eb575f526afed86bb49a0fe92c85027737 Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 12 Mar 2020 13:06:16 -0400 Subject: [PATCH 4/6] Update --- scripts/development/m3_stack/README.md | 3 ++- scripts/development/m3_stack/start_m3.sh | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md index 1a074f65e0..735de89417 100644 --- a/scripts/development/m3_stack/README.md +++ b/scripts/development/m3_stack/README.md @@ -11,7 +11,8 @@ The environment variables that let's you configure this setup are: - `USE_MULTI_DB_NODES=true`: uses 3 database nodes instead of 1 for cluster. - `USE_JAEGER=true`: look at traces emitted by M3 services. - `USE_PROMETHEUS_HA=true`: send data to M3 from two HA Prometheus instances to replicate deployments of HA Prometheus sending data to M3. -- `USE_AGGREGATOR=true`: use dedicated aggregators to aggregate metrics. +- `USE_AGGREGATOR=true`: use dedicate aggregator to aggregate metrics. +- `USE_AGGREGATOR_HA=true`: use two dedicated aggregators for HA aggregated metrics. ## Usage diff --git a/scripts/development/m3_stack/start_m3.sh b/scripts/development/m3_stack/start_m3.sh index 0f33113480..fec5bbfd3c 100755 --- a/scripts/development/m3_stack/start_m3.sh +++ b/scripts/development/m3_stack/start_m3.sh @@ -101,8 +101,10 @@ if [[ "$USE_AGGREGATOR" = true ]]; then docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator01 fi - # Bring up the second replica - docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator02 + if [[ "$USE_AGGREGATOR_HA" == true ]]; then + # Bring up the second replica + docker-compose -f docker-compose.yml up $DOCKER_ARGS m3aggregator02 + fi if [[ "$M3COLLECTOR_DEV_IMG" == "0" ]] || [[ "$FORCE_BUILD" == true ]] || [[ "$BUILD_M3COLLECTOR" == true ]]; then prepare_build_cmd "make m3collector-linux-amd64" From 4c67f88c0816d6cb6d771226f2c212ea72d8ab64 Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 12 Mar 2020 13:35:54 -0400 Subject: [PATCH 5/6] Setup placement for two aggregators if using HA aggregators --- scripts/development/m3_stack/start_m3.sh | 59 ++++++++++++++++++------ 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/scripts/development/m3_stack/start_m3.sh b/scripts/development/m3_stack/start_m3.sh index fec5bbfd3c..ba35261738 100755 --- a/scripts/development/m3_stack/start_m3.sh +++ b/scripts/development/m3_stack/start_m3.sh @@ -70,21 +70,50 @@ ATTEMPTS=10 MAX_TIMEOUT=4 TIMEOUT=1 retry_with_backoff \ if [[ "$USE_AGGREGATOR" = true ]]; then echo "Running aggregator pipeline" - curl -vvvsSf -X POST localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ - "num_shards": 64, - "replication_factor": 1, - "instances": [ - { - "id": "m3aggregator01", - "isolation_group": "rack-a", - "zone": "embedded", - "weight": 1024, - "endpoint": "m3aggregator01:6000", - "hostname": "m3aggregator01", - "port": 6000 - } - ] - }' + if [[ "$USE_AGGREGATOR_HA" != true ]]; then + # Use single replica. + curl -vvvsSf -X POST localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ + "num_shards": 64, + "replication_factor": 1, + "instances": [ + { + "id": "m3aggregator01", + "isolation_group": "rack-a", + "zone": "embedded", + "weight": 1024, + "endpoint": "m3aggregator01:6000", + "hostname": "m3aggregator01", + "port": 6000 + } + ] + }' + else + # Use two replicas. + curl -vvvsSf -X POST localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ + "num_shards": 64, + "replication_factor": 2, + "instances": [ + { + "id": "m3aggregator01", + "isolation_group": "rack-a", + "zone": "embedded", + "weight": 1024, + "endpoint": "m3aggregator01:6000", + "hostname": "m3aggregator01", + "port": 6000 + }, + { + "id": "m3aggregator02", + "isolation_group": "rack-b", + "zone": "embedded", + "weight": 1024, + "endpoint": "m3aggregator02:6000", + "hostname": "m3aggregator02", + "port": 6000 + } + ] + }' + fi echo "Initializing m3msg topic for ingestion" curl -vvvsSf -X POST localhost:7201/api/v1/topic/init -d '{ From 8faed8a401918f6a4f2002a253aa1f73a125a3ac Mon Sep 17 00:00:00 2001 From: Rob Skillington Date: Thu, 12 Mar 2020 13:42:44 -0400 Subject: [PATCH 6/6] Remove downsample stanza from non-aggregator config --- .../m3_stack/m3coordinator-standard.yml | 47 ------------------- 1 file changed, 47 deletions(-) diff --git a/scripts/development/m3_stack/m3coordinator-standard.yml b/scripts/development/m3_stack/m3coordinator-standard.yml index 0e9a47394a..63699bc1be 100644 --- a/scripts/development/m3_stack/m3coordinator-standard.yml +++ b/scripts/development/m3_stack/m3coordinator-standard.yml @@ -39,53 +39,6 @@ clusters: # proto: # schemaFilePath: /etc/m3coordinator/schema.proto -# Uncomment for remote aggregator use -# -- -# downsample: -# remoteAggregator: -# client: -# placementKV: -# namespace: /placement -# environment: override_test_env -# placementWatcher: -# key: m3aggregator -# initWatchTimeout: 10s -# hashType: murmur32 -# shardCutoffLingerDuration: 1m -# flushSize: 1440 -# maxTimerBatchSize: 1120 -# queueSize: 10000 -# queueDropType: oldest -# encoder: -# initBufferSize: 2048 -# maxMessageSize: 10485760 -# bytesPool: -# buckets: -# - capacity: 2048 -# count: 4096 -# - capacity: 4096 -# count: 4096 -# watermark: -# low: 0.7 -# high: 1.0 -# connection: -# writeTimeout: 250ms -# ingest: -# ingester: -# workerPoolSize: 10000 -# opPool: -# size: 10000 -# retry: -# maxRetries: 3 -# jitter: true -# logSampleRate: 0.01 -# m3msg: -# server: -# listenAddress: "0.0.0.0:7507" -# retry: -# maxBackoff: 10s -# jitter: true - carbon: ingester: listenAddress: "0.0.0.0:7204"