Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

collect better memory stats #3612

Merged
merged 6 commits into from
Aug 13, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 41 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ cli = [
"frame-benchmarking-cli",
"try-runtime-cli",
"polkadot-node-core-pvf",
# memory stats require jemalloc, which we know is enabled for linux
# but not present on wasm or windows
# https://github.com/paritytech/parity-common/blob/master/parity-util-mem/src/allocators.rs#L9-L34
# Once
# https://github.com/rust-lang/cargo/issues/1197
# is resolved.
"service/memory-stats",
]
browser = [
"wasm-bindgen",
Expand Down
6 changes: 6 additions & 0 deletions node/metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-application-crypto = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-keystore = { git = "https://github.com/paritytech/substrate", branch = "master" }
substrate-prometheus-endpoint = { git = "https://github.com/paritytech/substrate", branch = "master" }

jemalloc-ctl = { version = "0.3.3", optional = true }

[features]
default = []
memory-stats = ["jemalloc-ctl"]
7 changes: 7 additions & 0 deletions node/metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ use std::{

pub use metered_channel as metered;

/// Memory allocation stats tracking.
#[cfg(feature = "memory-stats")]
pub mod memory_stats;

#[cfg(feature = "memory-stats")]
pub use self::memory_stats::{MemoryAllocationSnapshot, MemoryAllocationTracker};

/// This module reexports Prometheus types and defines the [`Metrics`] trait.
pub mod metrics {
/// Reexport Substrate Prometheus types.
Expand Down
66 changes: 66 additions & 0 deletions node/metrics/src/memory_stats.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

//! Memory tracking statistics.
//!
//! Many subsystems have common interests such as canceling a bunch of spawned jobs,
//! or determining what their validator ID is. These common interests are factored into
//! this module.
drahnr marked this conversation as resolved.
Show resolved Hide resolved
//!
//! This crate also reexports Prometheus metric types which are expected to be implemented by subsystems.
drahnr marked this conversation as resolved.
Show resolved Hide resolved

// #[cfg(not(feature = "memory-stats"))]
// use std::convert::Infallible;

use jemalloc_ctl::{epoch, stats, Result};

/// Accessor to the allocator internals.
#[derive(Clone)]
pub struct MemoryAllocationTracker {
epoch: jemalloc_ctl::epoch_mib,
allocated: stats::allocated_mib,
resident: stats::resident_mib,
}

impl MemoryAllocationTracker {
/// Create an instance of an allocation tracker.
pub fn new() -> Result<Self> {
Ok(Self {
epoch: epoch::mib()?,
allocated: stats::allocated::mib()?,
resident: stats::resident::mib()?,
})
}

/// Create an allocation snapshot.
pub fn snapshot(&self) -> Result<MemoryAllocationSnapshot> {
// update stats by advancing the allocation epoch
self.epoch.advance()?;

let allocated: u64 = self.allocated.read()? as _;
let resident: u64 = self.resident.read()? as _;
Ok(MemoryAllocationSnapshot { allocated, resident })
}
}

/// Snapshot of collected memory metrics.
#[derive(Debug, Clone)]
pub struct MemoryAllocationSnapshot {
/// Total resident memory, in bytes.
pub resident: u64,
/// Total allocated memory, in bytes.
pub allocated: u64,
}
4 changes: 4 additions & 0 deletions node/overseer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ futures = { version = "0.3.15", features = ["thread-pool"] }
femme = "2.1.1"
kv-log-macro = "1.0.7"
assert_matches = "1.4.0"

[features]
default = []
memory-stats = ["polkadot-node-metrics/memory-stats"]
25 changes: 25 additions & 0 deletions node/overseer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ use polkadot_node_metrics::{
metrics::{prometheus, Metrics as MetricsTrait},
Metronome,
};

#[cfg(feature = "memory-stats")]
use polkadot_node_metrics::memory_stats::MemoryAllocationTracker;

pub use polkadot_overseer_gen as gen;
pub use polkadot_overseer_gen::{
overlord, FromOverseer, MapSubsystem, MessagePacket, SignalsReceived, SpawnNamed, Subsystem,
Expand Down Expand Up @@ -694,9 +698,30 @@ where
}
let subsystem_meters = overseer.map_subsystems(ExtractNameAndMeters);

#[cfg(feature = "memory-stats")]
let memory_stats = MemoryAllocationTracker::new().expect("Jemalloc is the default allocator. qed");

let metronome_metrics = metrics.clone();
let metronome =
Metronome::new(std::time::Duration::from_millis(950)).for_each(move |_| {
#[cfg(feature = "memory-stats")]
match memory_stats.snapshot() {
Ok(memory_stats_snapshot) => {
tracing::trace!(
target: LOG_TARGET,
"memory_stats: {:?}",
&memory_stats_snapshot
);
metronome_metrics.memory_stats_snapshot(memory_stats_snapshot);
},

Err(e) => tracing::debug!(
target: LOG_TARGET,
"Failed to obtain memory stats: {:?}",
e
),
}

// We combine the amount of messages from subsystems to the overseer
// as well as the amount of messages from external sources to the overseer
// into one `to_overseer` value.
Expand Down
37 changes: 37 additions & 0 deletions node/overseer/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
use super::*;
use polkadot_node_metrics::metrics::{self, prometheus};

#[cfg(feature = "memory-stats")]
use polkadot_node_metrics::MemoryAllocationSnapshot;

/// Overseer Prometheus metrics.
#[derive(Clone)]
struct MetricsInner {
Expand All @@ -31,6 +34,12 @@ struct MetricsInner {
to_subsystem_unbounded_received: prometheus::GaugeVec<prometheus::U64>,
signals_sent: prometheus::GaugeVec<prometheus::U64>,
signals_received: prometheus::GaugeVec<prometheus::U64>,

#[cfg(feature = "memory-stats")]
memory_stats_resident: prometheus::Gauge<prometheus::U64>,

#[cfg(feature = "memory-stats")]
memory_stats_allocated: prometheus::Gauge<prometheus::U64>,
}

/// A shareable metrics type for usage with the overseer.
Expand All @@ -56,6 +65,16 @@ impl Metrics {
}
}

#[cfg(feature = "memory-stats")]
pub(crate) fn memory_stats_snapshot(&self, memory_stats: MemoryAllocationSnapshot) {
if let Some(metrics) = &self.0 {
let MemoryAllocationSnapshot { resident, allocated } = memory_stats;

metrics.memory_stats_allocated.set(allocated);
metrics.memory_stats_resident.set(resident);
}
}

pub(crate) fn channel_fill_level_snapshot(
&self,
collection: impl IntoIterator<Item = (&'static str, SubsystemMeterReadouts)>,
Expand Down Expand Up @@ -182,6 +201,24 @@ impl metrics::Metrics for Metrics {
)?,
registry,
)?,

#[cfg(feature = "memory-stats")]
memory_stats_allocated: prometheus::register(
prometheus::Gauge::<prometheus::U64>::new(
"parachain_overseer_memory_stats_allocated",
drahnr marked this conversation as resolved.
Show resolved Hide resolved
"Total bytes allocated by the node",
)?,
registry,
)?,

#[cfg(feature = "memory-stats")]
memory_stats_resident: prometheus::register(
prometheus::Gauge::<prometheus::U64>::new(
"parachain_overseer_memory_stats_resident",
drahnr marked this conversation as resolved.
Show resolved Hide resolved
"Bytes allocated by the node, and held in RAM",
)?,
registry,
)?,
};
Ok(Metrics(Some(metrics)))
}
Expand Down
1 change: 1 addition & 0 deletions node/service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,4 @@ try-runtime = [
"rococo-runtime/try-runtime",
]
malus = ["full-node"]
memory-stats = ["polkadot-overseer/memory-stats"]