From f5e38f0572b4525143109e2d9e5bfc42d0c6426b Mon Sep 17 00:00:00 2001 From: Adrian Catangiu Date: Tue, 4 Apr 2023 11:27:30 +0300 Subject: [PATCH] enable metrics on all validator nodes (#2016) Signed-off-by: acatangiu --- .../dashboard/grafana/beefy-dashboard.json | 347 +++++++----------- .../dashboard/prometheus/millau-targets.yml | 4 + .../dashboard/prometheus/rialto-targets.yml | 4 + deployments/networks/millau.yml | 16 +- deployments/networks/rialto.yml | 16 +- 5 files changed, 167 insertions(+), 220 deletions(-) diff --git a/deployments/networks/dashboard/grafana/beefy-dashboard.json b/deployments/networks/dashboard/grafana/beefy-dashboard.json index a5df3449e678e..2e1e177641a26 100644 --- a/deployments/networks/dashboard/grafana/beefy-dashboard.json +++ b/deployments/networks/dashboard/grafana/beefy-dashboard.json @@ -8,14 +8,22 @@ "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "editable": true, + "fiscalYearStartMonth": 0, "gnetId": null, "graphTooltip": 0, "links": [], + "liveNow": false, "panels": [ { "alert": { @@ -81,28 +89,6 @@ "dashLength": 10, "dashes": false, "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "custom": { - "align": null - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -125,8 +111,11 @@ "lines": true, "linewidth": 1, "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "8.2.6", "pointradius": 2, "points": false, "renderer": "flot", @@ -136,24 +125,33 @@ "steppedLine": false, "targets": [ { + "exemplar": true, "expr": "substrate_beefy_best_block{chain=\"rialto_local\"}", - "legendFormat": "Rialto(Charlie)", + "interval": "", + "legendFormat": "", "refId": "A" }, { + "exemplar": true, "expr": "substrate_beefy_best_block{chain=\"millau_local\"}", - "legendFormat": "Millau(Charlie)", + "hide": false, + "interval": "", + "legendFormat": "", "refId": "B" }, { - "expr": "max_over_time(substrate_beefy_best_block{chain=\"millau_local\"}[5m]) - min_over_time(substrate_beefy_best_block{chain=\"millau_local\"}[5m])", + "exemplar": true, + "expr": "increase(substrate_beefy_best_block{chain=\"millau_local\"}[5m])", "hide": true, + "interval": "", "legendFormat": "Millau Best Beefy blocks count in last 5 minutes", "refId": "C" }, { - "expr": "max_over_time(substrate_beefy_best_block{chain=\"rialto_local\"}[5m]) - min_over_time(substrate_beefy_best_block{chain=\"rialto_local\"}[5m])", + "exemplar": true, + "expr": "increase(substrate_beefy_best_block{chain=\"rialto_local\"}[5m])", "hide": true, + "interval": "", "legendFormat": "Rialto Best Beefy blocks count in last 5 minutes", "refId": "D" } @@ -164,7 +162,8 @@ "fill": true, "line": true, "op": "lt", - "value": 1 + "value": 1, + "visible": true } ], "timeFrom": null, @@ -208,98 +207,79 @@ } }, { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "custom": {}, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "yellow", - "value": null - }, - { - "color": "yellow", - "value": null - } - ] + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + }, + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "or" + }, + "query": { + "params": [ + "B", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" } - }, - "overrides": [] - }, - "gridPos": { - "h": 14, - "w": 11, - "x": 12, - "y": 0 - }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "textMode": "auto" + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Beefy Lagging Sessions alert", + "noDataState": "no_data", + "notifications": [] }, - "pluginVersion": "7.1.3", - "targets": [ - { - "expr": "substrate_beefy_should_vote_on{chain=\"rialto_local\"}", - "legendFormat": "Rialto(Charlie) Should-Vote-On", - "refId": "C" - }, - { - "expr": "substrate_beefy_round_concluded{chain=\"rialto_local\"}", - "legendFormat": "Rialto(Charlie) Round-Concluded", - "refId": "A" - }, - { - "expr": "substrate_beefy_should_vote_on{chain=\"millau_local\"}", - "legendFormat": "Millau(Charlie) Should-Vote-On", - "refId": "D" - }, - { - "expr": "substrate_beefy_round_concluded{chain=\"millau_local\"}", - "legendFormat": "Millau(Charlie) Round-Concluded", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Beefy Voting Rounds", - "type": "stat" - }, - { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 18, - "x": 0, - "y": 14 + "h": 14, + "w": 6, + "x": 12, + "y": 0 }, "hiddenSeries": false, - "id": 6, + "id": 8, "legend": { "avg": false, "current": false, @@ -312,8 +292,11 @@ "lines": true, "linewidth": 1, "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "8.2.6", "pointradius": 2, "points": false, "renderer": "flot", @@ -323,21 +306,34 @@ "steppedLine": false, "targets": [ { - "expr": "substrate_beefy_votes_sent{chain=\"rialto_local\"}", - "legendFormat": "Rialto (node Charlie)", + "exemplar": true, + "expr": "substrate_beefy_lagging_sessions{chain=\"rialto_local\"}", + "interval": "", + "legendFormat": "", "refId": "A" }, { - "expr": "substrate_beefy_votes_sent{chain=\"millau_local\"}", - "legendFormat": "Millau (node Charlie)", + "exemplar": true, + "expr": "substrate_beefy_lagging_sessions{chain=\"millau_local\"}", + "interval": "", + "legendFormat": "", "refId": "B" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Beefy Votes Sent", + "title": "Beefy Lagging Sessions", "tooltip": { "shared": true, "sort": 0, @@ -375,101 +371,21 @@ } }, { - "alert": { - "alertRuleTags": {}, - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - }, - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "or" - }, - "query": { - "params": [ - "B", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "for": "5m", - "frequency": "1m", - "handler": 1, - "name": "Beefy Lagging Sessions alert", - "noDataState": "no_data", - "notifications": [] - }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "custom": { - "align": null - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 5, - "x": 18, + "h": 12, + "w": 18, + "x": 0, "y": 14 }, "hiddenSeries": false, - "id": 8, + "id": 6, "legend": { "avg": false, "current": false, @@ -482,8 +398,11 @@ "lines": true, "linewidth": 1, "nullPointMode": "null", + "options": { + "alertThreshold": true + }, "percentage": false, - "pluginVersion": "7.1.3", + "pluginVersion": "8.2.6", "pointradius": 2, "points": false, "renderer": "flot", @@ -493,29 +412,25 @@ "steppedLine": false, "targets": [ { - "expr": "substrate_beefy_lagging_sessions{chain=\"rialto_local\"}", - "legendFormat": "Rialto(Charlie)", + "exemplar": true, + "expr": "substrate_beefy_votes_sent{chain=\"rialto_local\"}", + "interval": "", + "legendFormat": "", "refId": "A" }, { - "expr": "substrate_beefy_lagging_sessions{chain=\"millau_local\"}", - "legendFormat": "Millau(Charlie)", + "exemplar": true, + "expr": "substrate_beefy_votes_sent{chain=\"millau_local\"}", + "interval": "", + "legendFormat": "", "refId": "B" } ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], + "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Beefy Lagging Sessions", + "title": "Beefy Votes Sent", "tooltip": { "shared": true, "sort": 0, @@ -554,14 +469,14 @@ } ], "refresh": "5s", - "schemaVersion": 26, + "schemaVersion": 32, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { - "from": "now-5m", + "from": "now-30m", "to": "now" }, "timepicker": { diff --git a/deployments/networks/dashboard/prometheus/millau-targets.yml b/deployments/networks/dashboard/prometheus/millau-targets.yml index c7a06509276f9..5890c8fb3fdf2 100644 --- a/deployments/networks/dashboard/prometheus/millau-targets.yml +++ b/deployments/networks/dashboard/prometheus/millau-targets.yml @@ -1,2 +1,6 @@ - targets: + - millau-node-alice:9615 + - millau-node-bob:9615 - millau-node-charlie:9615 + - millau-node-dave:9615 + - millau-node-eve:9615 diff --git a/deployments/networks/dashboard/prometheus/rialto-targets.yml b/deployments/networks/dashboard/prometheus/rialto-targets.yml index 9de26b9a2d7f3..0c89926e8c35a 100644 --- a/deployments/networks/dashboard/prometheus/rialto-targets.yml +++ b/deployments/networks/dashboard/prometheus/rialto-targets.yml @@ -1,2 +1,6 @@ - targets: + - rialto-node-alice:9615 + - rialto-node-bob:9615 - rialto-node-charlie:9615 + - rialto-node-dave:9615 + - rialto-node-eve:9615 diff --git a/deployments/networks/millau.yml b/deployments/networks/millau.yml index 0f5846571aa22..d91c5d83286ef 100644 --- a/deployments/networks/millau.yml +++ b/deployments/networks/millau.yml @@ -20,11 +20,13 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external environment: - RUST_LOG: runtime=trace,rpc=debug,txpool=trace,runtime::bridge=trace,sc_basic_authorship=trace,beefy=debug,xcm=trace + RUST_LOG: runtime=trace,rpc=debug,txpool=trace,runtime::bridge=trace,sc_basic_authorship=trace,beefy=trace,xcm=trace ports: - "19933:9933" - "19944:9944" + - "19615:9615" millau-node-bob: <<: *millau-bridge-node @@ -39,9 +41,11 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external ports: - "20033:9933" - "20044:9944" + - "20015:9615" millau-node-charlie: <<: *millau-bridge-node @@ -59,7 +63,7 @@ services: ports: - "20133:9933" - "20144:9944" - - "20615:9615" + - "20115:9615" millau-node-dave: <<: *millau-bridge-node @@ -73,9 +77,11 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external ports: - "20233:9933" - "20244:9944" + - "20215:9615" millau-node-eve: <<: *millau-bridge-node @@ -89,13 +95,19 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external ports: - "20333:9933" - "20344:9944" + - "20315:9615" # Note: These are being overridden from the top level `monitoring` compose file. prometheus-metrics: volumes: - ./networks/dashboard/prometheus/millau-targets.yml:/etc/prometheus/targets-millau-nodes.yml depends_on: + - millau-node-alice + - millau-node-bob - millau-node-charlie + - millau-node-dave + - millau-node-eve diff --git a/deployments/networks/rialto.yml b/deployments/networks/rialto.yml index 5a8ed64e067a3..fab85b89c043f 100644 --- a/deployments/networks/rialto.yml +++ b/deployments/networks/rialto.yml @@ -20,11 +20,13 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external environment: - RUST_LOG: runtime=trace,rpc=debug,txpool=trace,runtime::bridge=trace,beefy=debug,xcm=trace + RUST_LOG: runtime=trace,rpc=debug,txpool=trace,runtime::bridge=trace,beefy=trace,xcm=trace ports: - "9933:9933" - "9944:9944" + - "9915:9615" rialto-node-bob: <<: *rialto-bridge-node @@ -39,9 +41,11 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external ports: - "10033:9933" - "10044:9944" + - "10015:9615" rialto-node-charlie: <<: *rialto-bridge-node @@ -59,7 +63,7 @@ services: ports: - "10133:9933" - "10144:9944" - - "10615:9615" + - "10115:9615" rialto-node-dave: <<: *rialto-bridge-node @@ -73,9 +77,11 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external ports: - "10233:9933" - "10244:9944" + - "10215:9615" rialto-node-eve: <<: *rialto-bridge-node @@ -89,9 +95,11 @@ services: - --enable-offchain-indexing=true - --unsafe-rpc-external - --unsafe-ws-external + - --prometheus-external ports: - "10333:9933" - "10344:9944" + - "10315:9615" rialto-chainspec-exporter: image: ${RIALTO_BRIDGE_NODE_IMAGE:-paritytech/rialto-bridge-node} @@ -105,7 +113,11 @@ services: volumes: - ./networks/dashboard/prometheus/rialto-targets.yml:/etc/prometheus/targets-rialto-nodes.yml depends_on: + - rialto-node-alice + - rialto-node-bob - rialto-node-charlie + - rialto-node-dave + - rialto-node-eve # we're using `/rialto-share` to expose Rialto chain spec to those who are interested. Right # now it is Rialto Parachain collator nodes. Local + tmpfs combination allows sharing writable