Skip to content

Commit

Permalink
Publish richer virtual disk statistics to oximeter (#746)
Browse files Browse the repository at this point in the history
* Publish richer virtual disk statistics to oximeter

- Use the `virtual_disk:*` timeseries defined in Omicron to publish
  statistics about the block operations performed on virtual disks.
- Add concept of a completion callback to the existing block tracking
  object, and expose a way to set the callback on a `block::Device`.
- Create object for managing all the statistics for one virtual disk
  backed by Crucible. This is shared between the existing oximeter
  producer, and the block devices, via the above completion callback
  mechanism. As I/Os complete, the stats are updated, and oximeter
  collects them periodically.
- Straighten out a bunch of the initialiazation logic for the
  pre-existing metrics, producer server, and registry. This grew pretty
  organically, and was getting unwieldy and confusing.
- Use `virtual_machine:reset` definition from TOML
- Move kstat sampler creation to machine initialization process, rather
  than buried in the producer server startup. This lets us use the
  sampler in-line when initializing the vCPUs, where it should be much
  easier to re-use when we want to add more kstat-based metrics, such as
  Viona link stats.
- Simplify and cleanup the `ServerStats` types, and remove the kstat
  sampler from it, which never should have been there at all.
- Bump kstat-rs dep, which includes the new version that is
  cross-platform. This gets rid of a bunch of cfg-soup in the stats
  modules.

* Shorten collection interval, since disk stats are only polled

* Small fixes

- Use 512B as the lowest I/O size histogram bin
- Update Crucible dep so we can test in CI
- Fixups for some cfg directives around kstats

* Review feedback

- Clarify when we create kstat sampler
- Style nits and comment cleanup
- Don't return early from blockdev initialization if we fail to track
  stats, just continue instead

* Rename oximeter collection interval for clarity

* Review feedback

- Remove default impl of `set_completion_callback()`
- Cleanup internals of `Tracking` creation

* Point back to Omicron / Crucible main branches
  • Loading branch information
bnaecker authored Aug 27, 2024
1 parent 39d6150 commit 88fbde7
Show file tree
Hide file tree
Showing 13 changed files with 1,493 additions and 451 deletions.
1,165 changes: 855 additions & 310 deletions Cargo.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ oximeter = { git = "https://github.com/oxidecomputer/omicron", branch = "main" }
sled-agent-client = { git = "https://github.com/oxidecomputer/omicron", branch = "main" }

# Crucible
crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" }
crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "e58ca3693cb9ce0438947beba10e97ee38a0966b" }
crucible = { git = "https://github.com/oxidecomputer/crucible", rev = "d037eeb9a56a8a62ff17266f340c011224d15146" }
crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "d037eeb9a56a8a62ff17266f340c011224d15146" }

# External dependencies
anyhow = "1.0"
Expand Down Expand Up @@ -120,7 +120,7 @@ http = "0.2.9"
hyper = "0.14"
indicatif = "0.17.3"
inventory = "0.3.0"
kstat-rs = "0.2.3"
kstat-rs = "0.2.4"
lazy_static = "1.4"
libc = "0.2"
mockall = "0.12"
Expand Down
92 changes: 83 additions & 9 deletions bin/propolis-server/src/lib/initializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};

use crate::serial::Serial;
use crate::stats::virtual_machine::VirtualMachine;
use crate::stats::{
create_kstat_sampler, track_vcpu_kstats, virtual_disk::VirtualDiskProducer,
virtual_machine::VirtualMachine,
};
use crate::vm::{BlockBackendMap, CrucibleBackendMap, DeviceMap};
use anyhow::{Context, Result};
use crucible_client_types::VolumeConstructionRequest;
pub use nexus_client::Client as NexusClient;
use oximeter::types::ProducerRegistry;

use oximeter_instruments::kstat::KstatSampler;
use propolis::block;
use propolis::chardev::{self, BlockingSource, Source};
use propolis::common::{Lifecycle, GB, MB, PAGE_SIZE};
Expand Down Expand Up @@ -491,7 +495,8 @@ impl<'a> MachineInitializer<'a> {
Nvme,
}

for (name, device_spec) in &self.spec.devices.storage_devices {
'devloop: for (name, device_spec) in &self.spec.devices.storage_devices
{
info!(
self.log,
"Creating storage device {} with properties {:?}",
Expand Down Expand Up @@ -542,14 +547,15 @@ impl<'a> MachineInitializer<'a> {
.await?;

self.block_backends.insert(backend_name.clone(), backend.clone());
match device_interface {
let block_dev: Arc<dyn block::Device> = match device_interface {
DeviceInterface::Virtio => {
let vioblk = virtio::PciVirtioBlock::new(0x100);

self.devices
.insert(format!("pci-virtio-{}", bdf), vioblk.clone());
block::attach(vioblk.clone(), backend).unwrap();
chipset.pci_attach(bdf, vioblk);
chipset.pci_attach(bdf, vioblk.clone());
vioblk
}
DeviceInterface::Nvme => {
// Limit data transfers to 1MiB (2^8 * 4k) in size
Expand All @@ -564,18 +570,59 @@ impl<'a> MachineInitializer<'a> {
self.devices
.insert(format!("pci-nvme-{bdf}"), nvme.clone());
block::attach(nvme.clone(), backend).unwrap();
chipset.pci_attach(bdf, nvme);
chipset.pci_attach(bdf, nvme.clone());
nvme
}
};

if let Some((id, backend)) = crucible {
let prev = self.crucible_backends.insert(id, backend);
if let Some((disk_id, backend)) = crucible {
let block_size = backend.block_size().await;
let prev = self.crucible_backends.insert(disk_id, backend);
if prev.is_some() {
return Err(Error::new(
ErrorKind::InvalidInput,
format!("multiple disks with id {}", id),
format!("multiple disks with id {}", disk_id),
));
}

let Some(block_size) = block_size else {
slog::error!(
self.log,
"Could not get Crucible backend block size, \
virtual disk metrics can't be reported for it";
"disk_id" => %disk_id,
);
continue 'devloop;
};

// Register the block device as a metric producer, if we've been
// setup to do so. Note we currently only do this for the Crucible
// backend, in which case we have the disk ID.
if let Some(registry) = &self.producer_registry {
let stats = VirtualDiskProducer::new(
block_size,
self.properties.id,
disk_id,
&self.properties.metadata,
);
if let Err(e) = registry.register_producer(stats.clone()) {
slog::error!(
self.log,
"Could not register virtual disk producer, \
metrics will not be produced";
"disk_id" => %disk_id,
"error" => ?e,
);
continue 'devloop;
};

// Set the on-completion callback for the block device, to
// update stats.
let callback = move |op, result, duration| {
stats.on_completion(op, result, duration);
};
block_dev.on_completion(Box::new(callback));
};
}
}
Ok(())
Expand Down Expand Up @@ -1041,13 +1088,40 @@ impl<'a> MachineInitializer<'a> {
Ok(ramfb)
}

pub fn initialize_cpus(&mut self) -> Result<(), Error> {
pub async fn initialize_cpus(
&mut self,
kstat_sampler: &Option<KstatSampler>,
) -> Result<(), Error> {
for vcpu in self.machine.vcpus.iter() {
vcpu.set_default_capabs().unwrap();

// The vCPUs behave like devices, so add them to the list as well
self.devices.insert(format!("vcpu-{}", vcpu.id), vcpu.clone());
}
if let Some(sampler) = kstat_sampler.as_ref() {
track_vcpu_kstats(&self.log, sampler, self.properties).await;
}
Ok(())
}

/// Create an object used to sample kstats.
pub(crate) fn create_kstat_sampler(&self) -> Option<KstatSampler> {
let Some(registry) = &self.producer_registry else {
return None;
};
let sampler =
create_kstat_sampler(&self.log, u32::from(self.properties.vcpus))?;
match registry.register_producer(sampler.clone()) {
Ok(_) => Some(sampler),
Err(e) => {
slog::error!(
self.log,
"Failed to register kstat sampler in producer \
registry, no kstat-based metrics will be produced";
"error" => ?e,
);
None
}
}
}
}
Loading

0 comments on commit 88fbde7

Please sign in to comment.