From 1a57b7c503e0fe9eac97c68d77f1c499b0f0208a Mon Sep 17 00:00:00 2001 From: Zeeshan Lakhani Date: Mon, 26 Aug 2024 02:47:15 +0000 Subject: [PATCH] [network metrics] Update how we track and initialize stats This currently brings in all non virtual_disk stats/initializer/kstat changes from @bnaecker's https://github.com/oxidecomputer/propolis/pull/746 since that abstracts where and how we do start tracking kstats. Includes: - Straighten out a bunch of the initialiazation logic for the pre-existing metrics, producer server, and registry. This grew pretty organically, and was getting unwieldy and confusing. - Use virtual_machine:reset definition from TOML - Move kstat sampler creation to machine initialization process, rather than buried in the producer server startup. This lets us use the sampler in-line when initializing the vCPUs, where it should be much easier to re-use when we want to add more kstat-based metrics, such as Viona link stats. - Simplify and cleanup the ServerStats types, and remove the kstat sampler from it, which never should have been there at all. - Bump kstat-rs dep, which includes the new version that is cross-platform. This gets rid of a bunch of cfg-soup in the stats modules. --- Cargo.toml | 2 +- bin/propolis-server/src/lib/initializer.rs | 84 +++++- bin/propolis-server/src/lib/stats.rs | 276 +++++++++--------- .../src/lib/stats/network_interface.rs | 4 +- .../src/lib/stats/virtual_machine.rs | 9 +- bin/propolis-server/src/lib/vm/ensure.rs | 13 +- bin/propolis-server/src/lib/vm/objects.rs | 13 +- bin/propolis-server/src/lib/vm/services.rs | 58 ++-- 8 files changed, 238 insertions(+), 221 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fcb058838..46fbd6cf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,7 +120,7 @@ http = "0.2.9" hyper = "0.14" indicatif = "0.17.3" inventory = "0.3.0" -kstat-rs = "0.2.3" +kstat-rs = "0.2.4" lazy_static = "1.4" libc = "0.2" mockall = "0.12" diff --git a/bin/propolis-server/src/lib/initializer.rs b/bin/propolis-server/src/lib/initializer.rs index fa02b99a8..c1ce91dbd 100644 --- a/bin/propolis-server/src/lib/initializer.rs +++ b/bin/propolis-server/src/lib/initializer.rs @@ -10,14 +10,19 @@ use std::os::unix::fs::FileTypeExt; use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; - use crate::serial::Serial; -use crate::stats::VirtualMachine; -use crate::vm::{BlockBackendMap, CrucibleBackendMap, DeviceMap, InterfaceIdentifiers}; +use crate::stats::{ + create_kstat_sampler, track_network_interface_kstats, track_vcpu_kstats, + VirtualMachine, +}; +use crate::vm::{ + BlockBackendMap, CrucibleBackendMap, DeviceMap, InterfaceIdentifiers, +}; use anyhow::{Context, Result}; use crucible_client_types::VolumeConstructionRequest; pub use nexus_client::Client as NexusClient; use oximeter::types::ProducerRegistry; +use oximeter_instruments::kstat::KstatSampler; use propolis::block; use propolis::chardev::{self, BlockingSource, Source}; @@ -112,7 +117,6 @@ pub struct MachineInitializer<'a> { pub(crate) log: slog::Logger, pub(crate) machine: &'a Machine, pub(crate) devices: DeviceMap, - pub(crate) interface_ids: InterfaceIdentifiers, pub(crate) block_backends: BlockBackendMap, pub(crate) crucible_backends: CrucibleBackendMap, pub(crate) spec: &'a InstanceSpecV0, @@ -583,10 +587,20 @@ impl<'a> MachineInitializer<'a> { Ok(()) } - pub fn initialize_network_devices( + /// Initialize network devices, add them to the device map, and attach them + /// to the chipset. + /// + /// If a KstatSampler is provided, this function will also track network + /// interface statistics. + pub async fn initialize_network_devices( &mut self, chipset: &RegisteredChipset, + kstat_sampler: &Option, ) -> Result<(), Error> { + // Only create the vector if the kstat_sampler is exists + let mut interface_ids: Option = + kstat_sampler.as_ref().map(|_| Vec::new()); + for (name, vnic_spec) in &self.spec.devices.network_devices { info!(self.log, "Creating vNIC {}", name); let instance_spec::v0::NetworkDeviceV0::VirtioNic(vnic_spec) = @@ -606,7 +620,7 @@ impl<'a> MachineInitializer<'a> { ), ) })?; - let bdf: pci::Bdf = vnic_spec.pci_path.try_into().map_err(|e| { + let bdf = vnic_spec.pci_path.try_into().map_err(|e| { Error::new( ErrorKind::InvalidInput, format!("Couldn't get PCI BDF for vNIC {}: {}", name, e), @@ -635,10 +649,26 @@ impl<'a> MachineInitializer<'a> { )?; self.devices .insert(format!("pci-virtio-viona-{}", bdf), viona.clone()); - self.interface_ids - .push((vnic_spec.interface_id, viona.instance_id()?)); + + // Only push to interface_ids if kstat_sampler is Some + if let Some(ref mut ids) = interface_ids { + ids.push((vnic_spec.interface_id, viona.instance_id()?)); + } + chipset.pci_attach(bdf, viona); } + + kstat_sampler.as_ref().map(|sampler| { + interface_ids.map(|ids| { + track_network_interface_kstats( + &self.log, + sampler, + self.properties, + ids, + ) + }) + }); + Ok(()) } @@ -1045,13 +1075,49 @@ impl<'a> MachineInitializer<'a> { Ok(ramfb) } - pub fn initialize_cpus(&mut self) -> Result<(), Error> { + /// Initialize virtual CPUs by first setting their capabilities, inserting + /// them into the device map, and then, if a kstat sampler is provided, + /// tracking their kstats. + pub async fn initialize_cpus( + &mut self, + kstat_sampler: &Option, + ) -> Result<(), Error> { for vcpu in self.machine.vcpus.iter() { vcpu.set_default_capabs().unwrap(); // The vCPUs behave like devices, so add them to the list as well self.devices.insert(format!("vcpu-{}", vcpu.id), vcpu.clone()); } + if let Some(sampler) = kstat_sampler.as_ref() { + track_vcpu_kstats(&self.log, sampler, self.properties).await; + } Ok(()) } + + /// Create an object used to sample kstats. + /// + /// This object is currently used to generate vCPU metrics, though guest NIC + /// metrics will be included soon. + pub(crate) fn create_kstat_sampler(&self) -> Option { + let Some(registry) = &self.producer_registry else { + return None; + }; + let sampler = create_kstat_sampler( + &self.log, + u32::from(self.properties.vcpus), + self.spec.devices.network_devices.len() as u32, + )?; + match registry.register_producer(sampler.clone()) { + Ok(_) => Some(sampler), + Err(e) => { + slog::error!( + self.log, + "Failed to register kstat sampler in producer \ + registry, no kstat-based metrics will be produced"; + "error" => ?e, + ); + None + } + } + } } diff --git a/bin/propolis-server/src/lib/stats.rs b/bin/propolis-server/src/lib/stats.rs index da8d58e60..19eb1ac5b 100644 --- a/bin/propolis-server/src/lib/stats.rs +++ b/bin/propolis-server/src/lib/stats.rs @@ -4,39 +4,43 @@ //! Methods for starting an Oximeter endpoint and gathering server-level stats. +use std::net::SocketAddr; +use std::sync::{Arc, Mutex}; + use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::ProducerKind; use oximeter::{ - types::{Cumulative, ProducerRegistry, Sample}, - Metric, MetricsError, Producer, + types::{ProducerRegistry, Sample}, + MetricsError, Producer, }; -// Propolis is built and some tests are run on non-illumos systems. The real -// `kstat` infrastructure cannot be built there, so some conditional compilation -// tricks are needed -#[cfg(all(not(test), target_os = "illumos"))] use oximeter_instruments::kstat::KstatSampler; -use oximeter_producer::{self, Config, Server}; +use oximeter_producer::{Config, Server}; +use propolis_api_types::InstanceProperties; use slog::Logger; -use std::{ - net::SocketAddr, - sync::{Arc, Mutex}, -}; use crate::server::MetricsEndpointConfig; +use crate::vm::InterfaceIdentifiers; mod network_interface; mod pvpanic; mod virtual_machine; -pub(crate) use self::network_interface::InstanceNetworkInterfaces; +#[cfg(all(not(test), target_os = "illumos"))] +use self::network_interface::InstanceNetworkInterfaces; pub(crate) use self::pvpanic::PvpanicProducer; pub(crate) use self::virtual_machine::VirtualMachine; /// Interval on which we ask `oximeter` to poll us for metric data. -/// -/// This is used when setting-up the producer endpoint. +// +// Note that some statistics, like those based on kstats, are sampled more +// densely than this proactively. Their sampling rate is decoupled from this +// poll interval. Others, like the virtual disk stats, are updated all the time, +// but we only generate _samples_ from that when `oximeter` comes polling. +// +// In short, set this to the minimum interval on which you'd like those +// statistics to be sampled. const OXIMETER_STAT_INTERVAL: tokio::time::Duration = - tokio::time::Duration::from_secs(30); + tokio::time::Duration::from_secs(10); /// Interval on which we sample instance/guest network interface metrics. /// @@ -51,78 +55,74 @@ const NETWORK_INTERFACE_SAMPLE_INTERVAL: std::time::Duration = const VCPU_KSTAT_INTERVAL: std::time::Duration = std::time::Duration::from_secs(5); -// The kstat sampler includes a limit to its internal buffers for each target, -// to avoid growing without bound. This defaults to 500 samples. Since we have 5 -// vCPU microstates for which we track occupancy and up to 64 vCPUs, we can -// easily run up against this default. -// -// This limit provides extra space for up to 64 samples per vCPU per microstate, -// to ensure we don't throw away too much data if oximeter cannot reach us. -#[cfg(all(not(test), target_os = "illumos"))] +/// The kstat sampler includes a limit to its internal buffers for each target, +/// to avoid growing without bound. We introduce this buffer as a multiplier +/// for extra space. +const SAMPLE_BUFFER: u32 = 64; + +/// The kstat sampler includes a limit to its internal buffers for each target, +/// to avoid growing without bound. This defaults to 500 samples. Since we have 5 +/// vCPU microstates for which we track occupancy and up to 64 vCPUs, we can +/// easily run up against this default. +/// +/// This limit provides extra space for up to 64 samples per vCPU per microstate, +/// to ensure we don't throw away too much data if oximeter cannot reach us. const KSTAT_LIMIT_PER_VCPU: u32 = - crate::stats::virtual_machine::N_VCPU_MICROSTATES * 64; - -/// An Oximeter `Metric` that specifies the number of times an instance was -/// reset via the server API. -#[derive(Debug, Default, Copy, Clone, Metric)] -struct Reset { - /// The number of times this instance was reset via the API. - #[datum] - pub count: Cumulative, -} + crate::stats::virtual_machine::N_VCPU_MICROSTATES * SAMPLE_BUFFER; -/// The full set of server-level metrics, collated by -/// [`ServerStatsOuter::produce`] into the types needed to relay these -/// statistics to Oximeter. +/// Shared type for tracking metrics about the Propolis API server itself. #[derive(Clone, Debug)] -struct ServerStats { +struct ServerStatsInner { /// The oximeter Target identifying this instance as the source of metric /// data. virtual_machine: VirtualMachine, /// The reset count for the relevant instance. - run_count: Reset, + run_count: virtual_machine::Reset, } -impl ServerStats { +impl ServerStatsInner { pub fn new(virtual_machine: VirtualMachine) -> Self { - ServerStats { virtual_machine, run_count: Default::default() } + ServerStatsInner { + virtual_machine, + run_count: virtual_machine::Reset { datum: Default::default() }, + } } } -/// The public wrapper for server-level metrics. +/// Type publishing metrics about the Propolis API server itself. +// +// NOTE: This type is shared with the server and the oximeter producer. The +// former updates stats as API requests are handled or other actions taken, and +// the latter collects the stats when oximeter requests them. #[derive(Clone, Debug)] -pub struct ServerStatsOuter { - server_stats_wrapped: Arc>, - #[cfg(all(not(test), target_os = "illumos"))] - kstat_sampler: Option, +pub struct ServerStats { + inner: Arc>, } -impl ServerStatsOuter { - /// Increments the number of times the instance was reset. +impl ServerStats { + /// Create new server stats, representing the provided instance. + pub fn new(vm: VirtualMachine) -> Self { + Self { inner: Arc::new(Mutex::new(ServerStatsInner::new(vm))) } + } + + /// Increments the number of times the managed instance was reset. pub fn count_reset(&self) { - let mut inner = self.server_stats_wrapped.lock().unwrap(); - let datum = inner.run_count.datum_mut(); - *datum += 1; + self.inner.lock().unwrap().run_count.datum.increment(); } } -impl Producer for ServerStatsOuter { +impl Producer for ServerStats { fn produce( &mut self, ) -> Result + 'static>, MetricsError> { let run_count = { - let inner = self.server_stats_wrapped.lock().unwrap(); + let inner = self.inner.lock().unwrap(); std::iter::once(Sample::new( &inner.virtual_machine, &inner.run_count, )?) }; - #[cfg(all(not(test), target_os = "illumos"))] - if let Some(sampler) = self.kstat_sampler.as_mut() { - let samples = sampler.produce()?; - return Ok(Box::new(run_count.chain(samples))); - } Ok(Box::new(run_count)) } } @@ -183,112 +183,98 @@ pub fn start_oximeter_server( Server::with_registry(registry.clone(), &config) } -/// Creates and registers a set of server-level metrics for an instance. -/// -/// This attempts to initialize kstat-based metrics for vCPU usage data. This -/// may fail, in which case those metrics will be unavailable. -// -// NOTE: The logger is unused if we don't pass it to `setup_kstat_tracking` -// internally, so ignore that clippy lint. -#[cfg_attr(not(all(not(test), target_os = "illumos")), allow(unused_variables))] -pub async fn register_server_metrics( - registry: &ProducerRegistry, - nics: InstanceNetworkInterfaces, - virtual_machine: VirtualMachine, +/// Create an object that can be used to sample kstat-based metrics. +pub(crate) fn create_kstat_sampler( log: &Logger, -) -> anyhow::Result { - let stats = ServerStats::new(virtual_machine.clone()); - let stats_outer = ServerStatsOuter { - server_stats_wrapped: Arc::new(Mutex::new(stats)), - // Setup the collection of kstats for this instance. - #[cfg(all(not(test), target_os = "illumos"))] - kstat_sampler: setup_kstat_tracking(log, nics, virtual_machine).await, - }; - - registry.register_producer(stats_outer.clone())?; - - Ok(stats_outer) -} - -#[cfg(all(not(test), target_os = "illumos"))] -async fn setup_kstat_tracking( - log: &Logger, - nics: InstanceNetworkInterfaces, - virtual_machine: VirtualMachine, + n_vcpus: u32, + n_interfaces: u32, ) -> Option { - let n_interfaces = nics.interface_ids.len() as u32; let kstat_limit = usize::try_from( - (virtual_machine.n_vcpus() * KSTAT_LIMIT_PER_VCPU) - + (n_interfaces * KSTAT_LIMIT_PER_VCPU), + (n_vcpus * KSTAT_LIMIT_PER_VCPU) + (n_interfaces * SAMPLE_BUFFER), ) .unwrap(); - let sampler = KstatSampler::with_sample_limit(log, kstat_limit) - .map_err(|err| { + match KstatSampler::with_sample_limit(log, kstat_limit) { + Ok(sampler) => Some(sampler), + Err(e) => { slog::error!( log, "failed to create KstatSampler, \ - vCPU and link stats will be unavailable"; - "error" => ?err, + kstat-based stats will be unavailable"; + "error" => ?e, ); - err - }) - .ok()?; + None + } + } +} + +/// Track kstats required to publish vCPU metrics for this instance. +#[cfg(any(test, not(target_os = "illumos")))] +pub(crate) async fn track_vcpu_kstats( + log: &Logger, + _: &KstatSampler, + _: &InstanceProperties, +) { + slog::error!(log, "vCPU stats are not supported on this platform"); +} - let vcpu_details = oximeter_instruments::kstat::CollectionDetails::never( +/// Track kstats required to publish vCPU metrics for this instance. +#[cfg(all(not(test), target_os = "illumos"))] +pub(crate) async fn track_vcpu_kstats( + log: &Logger, + sampler: &KstatSampler, + properties: &InstanceProperties, +) { + let virtual_machine = VirtualMachine::from(properties); + let details = oximeter_instruments::kstat::CollectionDetails::never( VCPU_KSTAT_INTERVAL, ); + if let Err(e) = sampler.add_target(virtual_machine, details).await { + slog::error!( + log, + "failed to add VirtualMachine target, \ + vCPU stats will be unavailable"; + "error" => ?e, + ); + } +} - // We don't need to check the result of this operation, as we'll keep the - // sampler around even if we can't add the target. - let _ = sampler - .add_target(virtual_machine, vcpu_details) - .await - .map_err(|err| { - slog::error!( - log, - "failed to add VirtualMachine target, \ - vCPU stats will be unavailable"; - "error" => ?err, - ); - err - }) - .map(|_| { - slog::debug!( - log, - "Added new VirtualMachine target to kstat sampler" - ) - }); +/// Track kstats required to publish network interface metrics for this instance. +#[cfg(any(test, not(target_os = "illumos")))] +pub(crate) async fn track_network_interface_kstats( + log: &Logger, + _: &KstatSampler, + _: &InstanceProperties, + _: InterfaceIdentifiers, +) { + slog::error!( + log, + "network interface stats are not supported on this platform" + ); +} - let nic_details = oximeter_instruments::kstat::CollectionDetails::never( +/// Track kstats required to publish network interface metrics for this instance. +#[cfg(all(not(test), target_os = "illumos"))] +pub(crate) async fn track_network_interface_kstats( + log: &Logger, + sampler: &KstatSampler, + properties: &InstanceProperties, + interface_ids: InterfaceIdentifiers, +) { + let nics = InstanceNetworkInterfaces::new(properties, interface_ids); + let details = oximeter_instruments::kstat::CollectionDetails::never( NETWORK_INTERFACE_SAMPLE_INTERVAL, ); let interface_id = nics.target.interface_id; - - // We don't need to check the result of this operation, as we'll keep the - // sampler around even if we can't add the target. - let _ = sampler - .add_target(nics, nic_details) - .await - .map_err(|err| { - slog::error!( - log, - "failed to add network interface target, \ - network interface stats will be unavailable"; - "network_interface_id" => %interface_id, - "error" => ?err - ); - err - }) - .map(|_| { - slog::debug!( - log, - "Added new network interface target to kstat sampler"; - "network_interface_id" => %interface_id, - ) - }); - - Some(sampler) + if let Err(e) = sampler.add_target(nics, details).await { + slog::error!( + log, + "failed to add network interface targets, \ + network interface stats will be unavailable"; + "network_interface_id" => %interface_id, + "error" => ?e, + ); + } } #[cfg(all(not(test), target_os = "illumos"))] diff --git a/bin/propolis-server/src/lib/stats/network_interface.rs b/bin/propolis-server/src/lib/stats/network_interface.rs index c073f8cf9..ce8c4cfa2 100644 --- a/bin/propolis-server/src/lib/stats/network_interface.rs +++ b/bin/propolis-server/src/lib/stats/network_interface.rs @@ -102,7 +102,7 @@ fn extract_nic_kstats( /// A wrapper around the `oximeter::Target` representing all instance network interfaces. #[derive(Clone, Debug)] -pub struct InstanceNetworkInterfaces { +pub(crate) struct InstanceNetworkInterfaces { /// The `oximeter::Target` itself, storing the target fields for the /// timeseries. /// @@ -123,7 +123,7 @@ impl InstanceNetworkInterfaces { /// metrics from. pub(crate) fn new( properties: &InstanceProperties, - interface_ids: &InterfaceIdentifiers, + interface_ids: InterfaceIdentifiers, ) -> Self { Self { target: InstanceNetworkInterface { diff --git a/bin/propolis-server/src/lib/stats/virtual_machine.rs b/bin/propolis-server/src/lib/stats/virtual_machine.rs index be9dd5722..32febf24a 100644 --- a/bin/propolis-server/src/lib/stats/virtual_machine.rs +++ b/bin/propolis-server/src/lib/stats/virtual_machine.rs @@ -27,6 +27,7 @@ use std::collections::BTreeMap; // lives in that repo, at // `./omicron/oximeter/oximeter/schema/virtual-machine.toml`. oximeter::use_timeseries!("virtual-machine.toml"); +pub(crate) use self::virtual_machine::Reset; use self::virtual_machine::{ VcpuUsage, VirtualMachine as VirtualMachineTarget, }; @@ -59,14 +60,6 @@ pub(crate) struct VirtualMachine { vm_name: String, } -impl VirtualMachine { - /// Return the number of vCPUs in this VM. - #[cfg_attr(test, allow(dead_code))] - pub(crate) fn n_vcpus(&self) -> u32 { - self.n_vcpus - } -} - impl From<&propolis_api_types::InstanceProperties> for VirtualMachine { fn from(properties: &propolis_api_types::InstanceProperties) -> Self { Self { diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 5bda1d387..8e3378ea0 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -171,7 +171,6 @@ impl<'a> VmEnsureNotStarted<'a> { log: self.log.clone(), machine: &machine, devices: Default::default(), - interface_ids: Default::default(), block_backends: Default::default(), crucible_backends: Default::default(), spec: v0_spec, @@ -181,6 +180,12 @@ impl<'a> VmEnsureNotStarted<'a> { state: MachineInitializerState::default(), }; + // If we are publishing metrics to oximeter, then create a kstat-sampler + // now. This is used to track the vCPUs today, but will soon be used to + // track instance datalink stats as well. It can be provided to + // `initialize_network_devices()` at that time. + let kstat_sampler = init.create_kstat_sampler(); + init.initialize_rom(options.toml_config.bootrom.as_path())?; let chipset = init.initialize_chipset( &(event_queue.clone() @@ -194,7 +199,7 @@ impl<'a> VmEnsureNotStarted<'a> { let ps2ctrl = init.initialize_ps2(&chipset)?; init.initialize_qemu_debug_port()?; init.initialize_qemu_pvpanic(properties.into())?; - init.initialize_network_devices(&chipset)?; + init.initialize_network_devices(&chipset, &kstat_sampler).await?; #[cfg(not(feature = "omicron-build"))] init.initialize_test_devices(&options.toml_config.devices)?; @@ -214,7 +219,7 @@ impl<'a> VmEnsureNotStarted<'a> { .await?; let ramfb = init.initialize_fwcfg(v0_spec.devices.board.cpus)?; - init.initialize_cpus()?; + init.initialize_cpus(&kstat_sampler).await?; let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( &machine, event_queue.clone() @@ -226,7 +231,6 @@ impl<'a> VmEnsureNotStarted<'a> { devices, block_backends, crucible_backends, - interface_ids, .. } = init; @@ -235,7 +239,6 @@ impl<'a> VmEnsureNotStarted<'a> { vcpu_tasks, machine, devices, - interface_ids, block_backends, crucible_backends, com1, diff --git a/bin/propolis-server/src/lib/vm/objects.rs b/bin/propolis-server/src/lib/vm/objects.rs index 16930deb6..58adac77e 100644 --- a/bin/propolis-server/src/lib/vm/objects.rs +++ b/bin/propolis-server/src/lib/vm/objects.rs @@ -22,8 +22,7 @@ use slog::{error, info}; use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use super::{ - state_driver::VmStartReason, BlockBackendMap, CrucibleBackendMap, - DeviceMap, InterfaceIdentifiers, + state_driver::VmStartReason, BlockBackendMap, CrucibleBackendMap, DeviceMap, }; use crate::{serial::Serial, vcpu_tasks::VcpuTaskController}; @@ -48,7 +47,6 @@ pub(super) struct InputVmObjects { pub vcpu_tasks: Box, pub machine: Machine, pub devices: DeviceMap, - pub interface_ids: InterfaceIdentifiers, pub block_backends: BlockBackendMap, pub crucible_backends: CrucibleBackendMap, pub com1: Arc>, @@ -74,9 +72,6 @@ pub(crate) struct VmObjectsLocked { /// operations (e.g. pause and resume) for eligible components. devices: DeviceMap, - /// The set of NIC identifiers that are currently attached to this VM. - interface_ids: InterfaceIdentifiers, - /// Maps from component names to trait objects that implement the block /// storage backend trait. block_backends: BlockBackendMap, @@ -127,7 +122,6 @@ impl VmObjectsLocked { vcpu_tasks: input.vcpu_tasks, machine: input.machine, devices: input.devices, - interface_ids: input.interface_ids, block_backends: input.block_backends, crucible_backends: input.crucible_backends, com1: input.com1, @@ -173,11 +167,6 @@ impl VmObjectsLocked { self.devices.get(name).cloned() } - /// Yields the VM's current NIC identifiers. - pub(crate) fn interface_ids(&self) -> &InterfaceIdentifiers { - &self.interface_ids - } - /// Yields the VM's current Crucible backend map. pub(crate) fn crucible_backends(&self) -> &CrucibleBackendMap { &self.crucible_backends diff --git a/bin/propolis-server/src/lib/vm/services.rs b/bin/propolis-server/src/lib/vm/services.rs index 013bfacf3..f34fd7cfb 100644 --- a/bin/propolis-server/src/lib/vm/services.rs +++ b/bin/propolis-server/src/lib/vm/services.rs @@ -14,7 +14,7 @@ use super::objects::{VmObjects, VmObjectsShared}; use crate::{ serial::SerialTaskControlMessage, server::MetricsEndpointConfig, - stats::{InstanceNetworkInterfaces, VirtualMachine}, + stats::{ServerStats, VirtualMachine}, vnc::VncServer, }; @@ -26,7 +26,7 @@ pub(crate) struct OximeterState { server: Option, /// The statistics object used by the API layer to record its metrics. - pub stats: Option, + pub stats: Option, } /// A collection of services visible to consumers outside this Propolis that @@ -35,7 +35,10 @@ pub(crate) struct VmServices { /// A VM's serial console handler task. pub serial_task: tokio::sync::Mutex>, - /// A VM's Oximeter server. + /// A VM's Oximeter state. + /// + /// This mostly contains the actual producer server, though the + /// "server-level stats" are also included here. pub oximeter: tokio::sync::Mutex, /// A reference to the VM's host process's VNC server. @@ -57,14 +60,7 @@ impl VmServices { let registry = ensure_options.oximeter_registry.as_ref().expect( "should have a producer registry if metrics are configured", ); - register_oximeter_producer( - log, - cfg, - registry, - vm_properties, - &vm_objects, - ) - .await + register_oximeter_producer(log, cfg, registry, vm_properties).await } else { OximeterState::default() }; @@ -113,15 +109,10 @@ async fn register_oximeter_producer( log: &slog::Logger, cfg: &MetricsEndpointConfig, registry: &ProducerRegistry, - instance_properties: &InstanceProperties, - vm_objects: &VmObjectsShared<'_>, + vm_properties: &InstanceProperties, ) -> OximeterState { let mut oximeter_state = OximeterState::default(); - let network_interfaces = InstanceNetworkInterfaces::new( - instance_properties, - vm_objects.interface_ids(), - ); - let virtual_machine = VirtualMachine::from(instance_properties); + let virtual_machine = VirtualMachine::from(vm_properties); // Create the server itself. // @@ -151,27 +142,16 @@ async fn register_oximeter_producer( // Assign our own metrics production for this VM instance to the // registry, letting the server actually return them to oximeter when // polled. - oximeter_state.stats = match crate::stats::register_server_metrics( - registry, - network_interfaces, - virtual_machine, - log, - ) - .await - { - Ok(stats) => Some(stats), - Err(e) => { - error!( - log, - "failed to register our server metrics with \ - the ProducerRegistry, no server stats will \ - be produced"; - "error" => ?e, - ); - - None - } - }; + let stats = ServerStats::new(virtual_machine); + if let Err(e) = registry.register_producer(stats.clone()) { + error!( + log, + "failed to register our server metrics with \ + the ProducerRegistry, no server stats will \ + be produced"; + "error" => ?e, + ); + } oximeter_state }