Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Add support for wasm runtime metrics try #2 #4483

Merged
merged 67 commits into from
Dec 16, 2021
Merged
Show file tree
Hide file tree
Changes from 55 commits
Commits
Show all changes
67 commits
Select commit Hold shift + click to select a range
e5ac481
Add runtime metrics provider
sandreim Dec 7, 2021
f2fedf5
Runner changes
sandreim Dec 7, 2021
3932b89
Some sample metrics in paras_inherent
sandreim Dec 7, 2021
c9099e9
update cargo toml
sandreim Dec 7, 2021
21c967b
fmt
sandreim Dec 7, 2021
b6205b2
bug
sandreim Dec 7, 2021
e058dc1
more fmt after merge
sandreim Dec 7, 2021
0d9760a
Refactor metric prefix override
sandreim Dec 8, 2021
ae187f7
fmt
sandreim Dec 8, 2021
3189117
remove bug comment
sandreim Dec 8, 2021
561c414
Add runtime metric primitives
sandreim Dec 8, 2021
bf5e5eb
Impl trace event parsing
sandreim Dec 8, 2021
e3385de
Update metrics
sandreim Dec 8, 2021
267fc85
cargo lock
sandreim Dec 8, 2021
82356e5
fmt
sandreim Dec 8, 2021
41c4034
Fix target check
sandreim Dec 8, 2021
a6ed5d4
Merge branch 'master' of github.com:paritytech/polkadot into sandreim…
sandreim Dec 9, 2021
6d8cb15
Runtime metrics primitives
sandreim Dec 9, 2021
d2381ea
Review feedback
sandreim Dec 9, 2021
155c9b7
Runtime metrics crate
sandreim Dec 9, 2021
2f27805
Node side runtime metric changes
sandreim Dec 9, 2021
cfd62ab
use runtime CounterVec instead of macro
sandreim Dec 9, 2021
e6855e1
fmt nice
sandreim Dec 9, 2021
90b78c8
remove dead code
sandreim Dec 9, 2021
ff02191
base58 decoding
sandreim Dec 9, 2021
975e46b
base58 encoding
sandreim Dec 9, 2021
6be0e3b
fix warn
sandreim Dec 9, 2021
fd612f1
typo
sandreim Dec 9, 2021
609bb74
Review feedback
sandreim Dec 10, 2021
c07f5cc
Finish label support
sandreim Dec 10, 2021
06af03b
fmt
sandreim Dec 10, 2021
c446960
please compile
sandreim Dec 10, 2021
470b976
add feature gate
sandreim Dec 10, 2021
1342382
fmt
sandreim Dec 10, 2021
fe95d3d
Comment cargo toml
sandreim Dec 10, 2021
1c66365
Fix cargo toml description
sandreim Dec 10, 2021
b4aa11d
merge master
sandreim Dec 10, 2021
129aae2
merge master
sandreim Dec 13, 2021
cff2691
Update doc.
sandreim Dec 13, 2021
a97a742
switch to `runtime-metrics` feature
sandreim Dec 13, 2021
eab86b2
fmt
sandreim Dec 13, 2021
9d64fd0
cargo toml
sandreim Dec 13, 2021
2703729
fix tests
sandreim Dec 13, 2021
fdc99dc
fixes
sandreim Dec 13, 2021
2eb038d
better ux
sandreim Dec 13, 2021
ca4dd9f
from_utf8_unchecked is safe
sandreim Dec 13, 2021
977f84d
fmt
sandreim Dec 13, 2021
e7c5636
Add Counter and refactor
sandreim Dec 13, 2021
9ed04c6
Fixes
sandreim Dec 13, 2021
09e0978
review fixes
sandreim Dec 14, 2021
a6442af
more fixes
sandreim Dec 14, 2021
04f517c
add integration test
sandreim Dec 14, 2021
1c44ffe
dev deps
sandreim Dec 14, 2021
90dd6fb
gitlab script update
sandreim Dec 14, 2021
7303a4a
review fixes
sandreim Dec 15, 2021
796338f
fix merge damage
sandreim Dec 15, 2021
0b6f0f2
Run tests twice
sandreim Dec 15, 2021
bc01068
small fix
sandreim Dec 15, 2021
e818617
typo
sandreim Dec 15, 2021
5a1050d
cargo lock
sandreim Dec 15, 2021
2febc4c
tests
sandreim Dec 15, 2021
fecd12b
spellcheck happy ?
sandreim Dec 15, 2021
0b405e0
more fixes
sandreim Dec 16, 2021
78620f6
Guard tracing init
sandreim Dec 16, 2021
061efe2
missing copyright
sandreim Dec 16, 2021
6dae53e
Merge remote-tracking branch 'origin/master' into sandreim/runtime_me…
Dec 16, 2021
f1d515e
update lockfile for substrate
Dec 16, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ panic = "unwind"
runtime-benchmarks= [ "polkadot-cli/runtime-benchmarks" ]
try-runtime = [ "polkadot-cli/try-runtime" ]
disputes = [ "polkadot-cli/disputes" ]
runtime-metrics = [ "polkadot-cli/runtime-metrics" ]

# Configuration for building a .deb package - for use with `cargo-deb`
[package.metadata.deb]
Expand Down
2 changes: 2 additions & 0 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ frame-benchmarking-cli = { git = "https://github.com/paritytech/substrate", bran
try-runtime-cli = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
sc-cli = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
sc-service = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }
polkadot-node-metrics = { path = "../node/metrics" }
sc-tracing = { git = "https://github.com/paritytech/substrate", branch = "master", optional = true }

# this crate is used only to enable `trie-memory-tracker` feature
Expand Down Expand Up @@ -66,3 +67,4 @@ rococo-native = [ "service/rococo-native" ]

malus = [ "full-node", "service/malus" ]
disputes = [ "service/disputes" ]
runtime-metrics = ["service/runtime-metrics", "polkadot-node-metrics/runtime-metrics"]
19 changes: 14 additions & 5 deletions cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,20 @@ fn host_perf_check() -> Result<()> {
/// for integration tests as needed.
#[cfg(feature = "malus")]
pub fn run_node(run: Cli, overseer_gen: impl service::OverseerGen) -> Result<()> {
run_node_inner(run, overseer_gen)
run_node_inner(run, overseer_gen, |_logger_builder, _config| {})
}

fn run_node_inner(cli: Cli, overseer_gen: impl service::OverseerGen) -> Result<()> {
let runner = cli.create_runner(&cli.run.base).map_err(Error::from)?;
fn run_node_inner<F>(
cli: Cli,
overseer_gen: impl service::OverseerGen,
logger_hook: F,
) -> Result<()>
where
F: FnOnce(&mut sc_cli::LoggerBuilder, &sc_service::Configuration),
{
let runner = cli
.create_runner_with_logger_hook::<sc_cli::RunCmd, F>(&cli.run.base, logger_hook)
.map_err(Error::from)?;
let chain_spec = &runner.config().chain_spec;

set_default_ss58_version(chain_spec);
Expand Down Expand Up @@ -269,10 +278,10 @@ fn run_node_inner(cli: Cli, overseer_gen: impl service::OverseerGen) -> Result<(

/// Parses polkadot specific CLI arguments and run the service.
pub fn run() -> Result<()> {
let cli = Cli::from_args();
let cli: Cli = Cli::from_args();

match &cli.subcommand {
None => run_node_inner(cli, service::RealOverseerGen),
None => run_node_inner(cli, service::RealOverseerGen, polkadot_node_metrics::logger_hook()),
Some(Subcommand::BuildSpec(cmd)) => {
let runner = cli.create_runner(cmd)?;
Ok(runner.sync_run(|config| cmd.run(config.chain_spec, config.network))?)
Expand Down
6 changes: 6 additions & 0 deletions node/client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,9 @@ polkadot = ["polkadot-runtime"]
kusama = ["kusama-runtime"]
rococo = ["rococo-runtime"]
westend = ["westend-runtime"]
runtime-metrics = [
"rococo-runtime/runtime-metrics",
"kusama-runtime/runtime-metrics",
"westend-runtime/runtime-metrics",
"polkadot-runtime/runtime-metrics",
]
20 changes: 19 additions & 1 deletion node/metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,33 @@ name = "polkadot-node-metrics"
version = "0.9.13"
authors = ["Parity Technologies <admin@parity.io>"]
edition = "2018"
description = "Subsystem traits and message definitions"
description = "Subsystem metric helpers"

[dependencies]
futures = "0.3.17"
futures-timer = "3.0.2"
tracing = "0.1.29"

metered-channel = { path = "../metered-channel" }

# Both `sc-service` and `sc-cli` are required by runtime metrics `logger_hook()`.
sc-service = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-cli = { git = "https://github.com/paritytech/substrate", branch = "master" }

substrate-prometheus-endpoint = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }
codec = { package = "parity-scale-codec", version = "2.2.0" }
primitives = { package = "polkadot-primitives", path = "../../primitives/" }
bs58 = { version = "0.4.0", features = ["alloc"] }
log = "0.4.13"

[dev-dependencies]
assert_cmd = "2.0.2"
nix = "0.23.0"
tempfile = "3.2.0"
hyper = { version = "0.14.14", default-features = false, features = ["http1", "tcp"] }
tokio = "1.13"

[features]
default = []
runtime-metrics = []
14 changes: 14 additions & 0 deletions node/metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ pub use metered_channel as metered;
pub mod metronome;
pub use self::metronome::Metronome;

#[cfg(feature = "runtime-metrics")]
pub mod runtime;
#[cfg(feature = "runtime-metrics")]
pub use self::runtime::logger_hook;

/// Export a dummy logger hook when `wasm tracing` is not enabled.
#[cfg(not(feature = "runtime-metrics"))]
pub fn logger_hook() -> impl FnOnce(&mut sc_cli::LoggerBuilder, &sc_service::Configuration) -> () {
|_logger_builder, _config| {}
}

/// This module reexports Prometheus types and defines the [`Metrics`] trait.
pub mod metrics {
/// Reexport Substrate Prometheus types.
Expand Down Expand Up @@ -69,3 +80,6 @@ pub mod metrics {
}
}
}

#[cfg(test)]
mod tests;
210 changes: 210 additions & 0 deletions node/metrics/src/runtime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

//! Runtime Metrics helpers.
//!
//! A runtime metric provider implementation that builds on top of Substrate wasm
//! tracing support. This requires that the custom profiler (`TraceHandler`) to be
//! registered in substrate via a `logger_hook()`. Events emitted from runtime are
//! then captured/processed by the `TraceHandler` implementation.
#![cfg(feature = "runtime-metrics")]
sandreim marked this conversation as resolved.
Show resolved Hide resolved

use codec::Decode;
use primitives::v1::{
RuntimeMetricLabelValues, RuntimeMetricOp, RuntimeMetricRegisterParams, RuntimeMetricUpdate,
};
use std::{
collections::hash_map::HashMap,
sync::{Arc, Mutex, MutexGuard},
};
use substrate_prometheus_endpoint::{
register, Counter, CounterVec, Opts, PrometheusError, Registry, U64,
};

const LOG_TARGET: &'static str = "metrics::runtime";
const METRIC_PREFIX: &'static str = "polkadot";

/// Holds the registered Prometheus metric collections.
#[derive(Clone, Default)]
pub struct Metrics {
counter_vecs: Arc<Mutex<HashMap<String, CounterVec<U64>>>>,
counters: Arc<Mutex<HashMap<String, Counter<U64>>>>,
}

/// Runtime metrics wrapper.
#[derive(Clone)]
pub struct RuntimeMetricsProvider(Registry, Metrics);

impl RuntimeMetricsProvider {
/// Creates new instance.
pub fn new(metrics_registry: Registry) -> Self {
Self(metrics_registry, Metrics::default())
}

/// Register a counter vec metric.
pub fn register_countervec(&self, metric_name: &str, params: &RuntimeMetricRegisterParams) {
self.with_counter_vecs_lock_held(|mut hashmap| {
hashmap.entry(metric_name.to_owned()).or_insert(register(
CounterVec::new(
Opts::new(metric_name, params.description()),
&params.labels().unwrap_or_default(),
)?,
&self.0,
)?);
Ok(())
})
}

/// Register a counter metric.
pub fn register_counter(&self, metric_name: &str, params: &RuntimeMetricRegisterParams) {
self.with_counters_lock_held(|mut hashmap| {
hashmap
.entry(metric_name.to_owned())
.or_insert(register(Counter::new(metric_name, params.description())?, &self.0)?);
return Ok(())
})
}

/// Increment a counter with labels by a value.
pub fn inc_counter_vec_by(&self, name: &str, value: u64, labels: &RuntimeMetricLabelValues) {
self.with_counter_vecs_lock_held(|mut hashmap| {
hashmap.entry(name.to_owned()).and_modify(|counter_vec| {
counter_vec.with_label_values(&labels.as_str_vec()).inc_by(value)
});

Ok(())
});
}

/// Increment a counter by a value.
pub fn inc_counter_by(&self, name: &str, value: u64) {
self.with_counters_lock_held(|mut hashmap| {
hashmap
.entry(name.to_owned())
.and_modify(|counter_vec| counter_vec.inc_by(value));
Ok(())
})
}

fn with_counters_lock_held<F>(&self, do_something: F)
where
F: FnOnce(MutexGuard<'_, HashMap<String, Counter<U64>>>) -> Result<(), PrometheusError>,
{
let _ = self.1.counters.lock().map(do_something).or_else(|error| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: map_err is a mild improvement

tracing::error!(
target: LOG_TARGET,
"Cannot acquire the counter hashmap lock: {:?}",
error
);
Err(error)
});
}

fn with_counter_vecs_lock_held<F>(&self, do_something: F)
where
F: FnOnce(MutexGuard<'_, HashMap<String, CounterVec<U64>>>) -> Result<(), PrometheusError>,
{
let _ = self.1.counter_vecs.lock().map(do_something).or_else(|error| {
tracing::error!(
target: LOG_TARGET,
"Cannot acquire the countervec hashmap lock: {:?}",
error
);
Err(error)
});
}
}

impl sc_tracing::TraceHandler for RuntimeMetricsProvider {
fn handle_span(&self, _span: &sc_tracing::SpanDatum) {}
fn handle_event(&self, event: &sc_tracing::TraceEvent) {
if event
.values
.string_values
.get("target")
.unwrap_or(&String::default())
.ne("metrics")
{
return
}

if let Some(update_op_bs58) = event.values.string_values.get("params") {
// Deserialize the metric update struct.
match RuntimeMetricUpdate::decode(
&mut RuntimeMetricsProvider::parse_event_params(&update_op_bs58)
.unwrap_or_default()
.as_slice(),
) {
Ok(update_op) => {
self.parse_metric_update(update_op);
},
Err(e) => {
tracing::error!(target: LOG_TARGET, "TraceEvent decode failed: {:?}", e);
},
}
}
}
}

impl RuntimeMetricsProvider {
// Parse end execute the update operation.
fn parse_metric_update(&self, update: RuntimeMetricUpdate) {
let metric_name = &format!("{}_{}", METRIC_PREFIX, update.metric_name());

match update.op {
RuntimeMetricOp::Register(ref params) =>
if params.labels.is_none() {
self.register_counter(metric_name, &params);
} else {
self.register_countervec(metric_name, &params);
},
RuntimeMetricOp::IncrementCounterVec(value, ref labels) =>
self.inc_counter_vec_by(metric_name, value, labels),
RuntimeMetricOp::IncrementCounter(value) => self.inc_counter_by(metric_name, value),
}
}

// Returns the `bs58` encoded metric update operation.
fn parse_event_params(event_params: &str) -> Option<Vec<u8>> {
// Shave " }" suffix.
let new_len = event_params.len().saturating_sub(2);
let event_params = &event_params[..new_len];

// Shave " { update_op: " prefix.
const SKIP_CHARS: &'static str = " { update_op: ";
if SKIP_CHARS.len() < event_params.len() {
if SKIP_CHARS.eq_ignore_ascii_case(&event_params[..SKIP_CHARS.len()]) {
return bs58::decode(&event_params[SKIP_CHARS.len()..].as_bytes()).into_vec().ok()
}
}

// No event was parsed
None
}
}

/// Returns the custom profiling closure that we'll apply to the `LoggerBuilder`.
pub fn logger_hook() -> impl FnOnce(&mut sc_cli::LoggerBuilder, &sc_service::Configuration) -> () {
|logger_builder, config| {
if config.prometheus_registry().is_none() {
tracing::debug!(target: LOG_TARGET, "Prometheus registry is not configured.",);
return
sandreim marked this conversation as resolved.
Show resolved Hide resolved
}
let registry = config.prometheus_registry().cloned().unwrap();
let metrics_provider = RuntimeMetricsProvider::new(registry);
logger_builder.with_custom_profiling(Box::new(metrics_provider));
}
}
Loading