diff --git a/Cargo.lock b/Cargo.lock
index 2630aa2a25..249b7c5cea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4720,6 +4720,7 @@ dependencies = [
  "gateway-messages",
  "gateway-test-utils",
  "libc",
+ "omicron-gateway",
  "omicron-workspace-hack",
  "signal-hook-tokio",
  "tokio",
@@ -5962,6 +5963,7 @@ dependencies = [
  "anyhow",
  "base64 0.22.1",
  "camino",
+ "chrono",
  "clap",
  "dropshot",
  "expectorate",
@@ -5980,6 +5982,8 @@ dependencies = [
  "omicron-test-utils",
  "omicron-workspace-hack",
  "once_cell",
+ "oximeter",
+ "oximeter-producer",
  "schemars",
  "serde",
  "serde_json",
diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs
index 62366c45e1..a55c5d4013 100644
--- a/clients/nexus-client/src/lib.rs
+++ b/clients/nexus-client/src/lib.rs
@@ -213,6 +213,7 @@ impl From<omicron_common::api::internal::nexus::ProducerKind>
     fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self {
         use omicron_common::api::internal::nexus::ProducerKind;
         match kind {
+            ProducerKind::ManagementGateway => Self::ManagementGateway,
             ProducerKind::SledAgent => Self::SledAgent,
             ProducerKind::Service => Self::Service,
             ProducerKind::Instance => Self::Instance,
@@ -390,6 +391,9 @@ impl From<types::ProducerKind>
     fn from(kind: types::ProducerKind) -> Self {
         use omicron_common::api::internal::nexus::ProducerKind;
         match kind {
+            types::ProducerKind::ManagementGateway => {
+                ProducerKind::ManagementGateway
+            }
             types::ProducerKind::SledAgent => ProducerKind::SledAgent,
             types::ProducerKind::Instance => ProducerKind::Instance,
             types::ProducerKind::Service => ProducerKind::Service,
diff --git a/clients/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs
index 74fc6968e8..c23e5177a0 100644
--- a/clients/oximeter-client/src/lib.rs
+++ b/clients/oximeter-client/src/lib.rs
@@ -26,6 +26,7 @@ impl From<omicron_common::api::internal::nexus::ProducerKind>
     fn from(kind: omicron_common::api::internal::nexus::ProducerKind) -> Self {
         use omicron_common::api::internal::nexus;
         match kind {
+            nexus::ProducerKind::ManagementGateway => Self::ManagementGateway,
             nexus::ProducerKind::Service => Self::Service,
             nexus::ProducerKind::SledAgent => Self::SledAgent,
             nexus::ProducerKind::Instance => Self::Instance,
diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs
index 7f4eb358a4..4daea6a198 100644
--- a/common/src/api/internal/nexus.rs
+++ b/common/src/api/internal/nexus.rs
@@ -223,6 +223,8 @@ pub enum ProducerKind {
     Service,
     /// The producer is a Propolis VMM managing a guest instance.
     Instance,
+    /// The producer is a management gateway service.
+    ManagementGateway,
 }
 
 /// Information announced by a metric server, used so that clients can contact it and collect
diff --git a/dev-tools/mgs-dev/Cargo.toml b/dev-tools/mgs-dev/Cargo.toml
index d5f61f4b96..70382c0469 100644
--- a/dev-tools/mgs-dev/Cargo.toml
+++ b/dev-tools/mgs-dev/Cargo.toml
@@ -14,6 +14,7 @@ futures.workspace = true
 gateway-messages.workspace = true
 gateway-test-utils.workspace = true
 libc.workspace = true
+omicron-gateway.workspace = true
 omicron-workspace-hack.workspace = true
 signal-hook-tokio.workspace = true
 tokio.workspace = true
diff --git a/dev-tools/mgs-dev/src/main.rs b/dev-tools/mgs-dev/src/main.rs
index 85b1313d68..77947999d9 100644
--- a/dev-tools/mgs-dev/src/main.rs
+++ b/dev-tools/mgs-dev/src/main.rs
@@ -8,6 +8,7 @@ use clap::{Args, Parser, Subcommand};
 use futures::StreamExt;
 use libc::SIGINT;
 use signal_hook_tokio::Signals;
+use std::net::SocketAddr;
 
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
@@ -36,7 +37,12 @@ enum MgsDevCmd {
 }
 
 #[derive(Clone, Debug, Args)]
-struct MgsRunArgs {}
+struct MgsRunArgs {
+    /// Override the address of the Nexus instance to use when registering the
+    /// Oximeter producer.
+    #[clap(long)]
+    nexus_address: Option<SocketAddr>,
+}
 
 impl MgsRunArgs {
     async fn exec(&self) -> Result<(), anyhow::Error> {
@@ -46,9 +52,23 @@ impl MgsRunArgs {
         let mut signal_stream = signals.fuse();
 
         println!("mgs-dev: setting up MGS ... ");
-        let gwtestctx = gateway_test_utils::setup::test_setup(
+        let (mut mgs_config, sp_sim_config) =
+            gateway_test_utils::setup::load_test_config();
+        if let Some(addr) = self.nexus_address {
+            mgs_config.metrics =
+                Some(gateway_test_utils::setup::MetricsConfig {
+                    disabled: false,
+                    dev_nexus_address: Some(addr),
+                    dev_bind_loopback: true,
+                });
+        }
+
+        let gwtestctx = gateway_test_utils::setup::test_setup_with_config(
             "mgs-dev",
             gateway_messages::SpPort::One,
+            mgs_config,
+            &sp_sim_config,
+            None,
         )
         .await;
         println!("mgs-dev: MGS is running.");
diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out
index 2a9c9c8051..e939bfa864 100644
--- a/dev-tools/omdb/tests/successes.out
+++ b/dev-tools/omdb/tests/successes.out
@@ -141,9 +141,16 @@ SP DETAILS: type "Sled" slot 0
 
     COMPONENTS
 
-        NAME         DESCRIPTION             DEVICE          PRESENCE SERIAL 
-        sp3-host-cpu FAKE host cpu           sp3-host-cpu    Present  None   
-        dev-0        FAKE temperature sensor fake-tmp-sensor Failed   None   
+        NAME         DESCRIPTION                              DEVICE          PRESENCE SERIAL 
+        sp3-host-cpu FAKE host cpu                            sp3-host-cpu    Present  None   
+        dev-0        FAKE temperature sensor                  fake-tmp-sensor Failed   None   
+        dev-1        FAKE temperature sensor                  tmp117          Present  None   
+        dev-2        FAKE Southeast temperature sensor        tmp117          Present  None   
+        dev-6        FAKE U.2 Sharkfin A VPD                  at24csw080      Present  None   
+        dev-7        FAKE U.2 Sharkfin A hot swap controller  max5970         Present  None   
+        dev-8        FAKE U.2 A NVMe Basic Management Command nvme_bmc        Present  None   
+        dev-39       FAKE T6 temperature sensor               tmp451          Present  None   
+        dev-53       FAKE Fan controller                      max31790        Present  None   
 
     CABOOSES: none found
 
@@ -167,8 +174,16 @@ SP DETAILS: type "Sled" slot 1
 
     COMPONENTS
 
-        NAME         DESCRIPTION   DEVICE       PRESENCE SERIAL 
-        sp3-host-cpu FAKE host cpu sp3-host-cpu Present  None   
+        NAME         DESCRIPTION                              DEVICE       PRESENCE SERIAL 
+        sp3-host-cpu FAKE host cpu                            sp3-host-cpu Present  None   
+        dev-0        FAKE temperature sensor                  tmp117       Present  None   
+        dev-1        FAKE temperature sensor                  tmp117       Present  None   
+        dev-2        FAKE Southeast temperature sensor        tmp117       Present  None   
+        dev-6        FAKE U.2 Sharkfin A VPD                  at24csw080   Present  None   
+        dev-7        FAKE U.2 Sharkfin A hot swap controller  max5970      Present  None   
+        dev-8        FAKE U.2 A NVMe Basic Management Command nvme_bmc     Present  None   
+        dev-39       FAKE T6 temperature sensor               tmp451       Present  None   
+        dev-53       FAKE Fan controller                      max31790     Present  None   
 
     CABOOSES: none found
 
diff --git a/gateway-test-utils/configs/config.test.toml b/gateway-test-utils/configs/config.test.toml
index 79975f4611..4e3e9c6e6e 100644
--- a/gateway-test-utils/configs/config.test.toml
+++ b/gateway-test-utils/configs/config.test.toml
@@ -88,6 +88,15 @@ addr = "[::1]:0"
 ignition-target = 3
 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] }
 
+#
+# Configuration for SP sensor metrics polling
+#
+[metrics]
+# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is
+# useful in local testing and development, when the gateway service is not
+# given a "real" underlay network IP.
+dev_bind_loopback = true
+
 #
 # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel
 # string "UNUSED".  The actual path will be generated by the test suite for each
diff --git a/gateway-test-utils/configs/sp_sim_config.test.toml b/gateway-test-utils/configs/sp_sim_config.test.toml
index cc08eec30b..4f370a167c 100644
--- a/gateway-test-utils/configs/sp_sim_config.test.toml
+++ b/gateway-test-utils/configs/sp_sim_config.test.toml
@@ -20,6 +20,9 @@ device = "fake-tmp-sensor"
 description = "FAKE temperature sensor 1"
 capabilities = 0x2
 presence = "Present"
+sensors = [
+    {name = "Southwest", kind = "Temperature", last_data.value = 41.7890625, last_data.timestamp = 1234 },
+]
 
 [[simulated_sps.sidecar.components]]
 id = "dev-1"
@@ -27,6 +30,9 @@ device = "fake-tmp-sensor"
 description = "FAKE temperature sensor 2"
 capabilities = 0x2
 presence = "Failed"
+sensors = [
+    { name = "South", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 },
+]
 
 [[simulated_sps.sidecar]]
 multicast_addr = "::1"
@@ -56,6 +62,82 @@ device = "fake-tmp-sensor"
 description = "FAKE temperature sensor"
 capabilities = 0x2
 presence = "Failed"
+sensors = [
+    { name = "Southwest", kind = "Temperature", last_error.value = "DeviceError", last_error.timestamp = 1234 },
+]
+[[simulated_sps.gimlet.components]]
+id = "dev-1"
+device = "tmp117"
+description = "FAKE temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 },
+]
+
+[[simulated_sps.gimlet.components]]
+id = "dev-2"
+device = "tmp117"
+description = "FAKE Southeast temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 },
+]
+
+[[simulated_sps.gimlet.components]]
+id = "dev-6"
+device = "at24csw080"
+description = "FAKE U.2 Sharkfin A VPD"
+capabilities = 0x0
+presence = "Present"
+
+[[simulated_sps.gimlet.components]]
+id = "dev-7"
+device = "max5970"
+description = "FAKE U.2 Sharkfin A hot swap controller"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.45898438, last_data.timestamp = 1234 },
+    { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.024414063, last_data.timestamp = 1234 },
+    { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.03125, last_data.timestamp = 1234 },
+    { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.328125, last_data.timestamp = 1234 },
+]
+
+[[simulated_sps.gimlet.components]]
+id = "dev-8"
+device = "nvme_bmc"
+description = "FAKE U.2 A NVMe Basic Management Command"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 },
+]
+[[simulated_sps.gimlet.components]]
+id = "dev-39"
+device = "tmp451"
+description = "FAKE T6 temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 },
+]
+[[simulated_sps.gimlet.components]]
+id = "dev-53"
+device = "max31790"
+description = "FAKE Fan controller"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "Southeast", kind = "Speed", last_data.value = 2607.0, last_data.timestamp = 1234 },
+    { name = "Northeast", kind = "Speed", last_data.value = 2476.0, last_data.timestamp = 1234 },
+    { name = "South", kind = "Speed", last_data.value = 2553.0, last_data.timestamp = 1234 },
+    { name = "North", kind = "Speed", last_data.value = 2265.0, last_data.timestamp = 1234 },
+    { name = "Southwest", kind = "Speed", last_data.value = 2649.0, last_data.timestamp = 1234 },
+    { name = "Northwest", kind = "Speed", last_data.value = 2275.0, last_data.timestamp = 1234 },
+]
+
 
 [[simulated_sps.gimlet]]
 multicast_addr = "::1"
@@ -72,6 +154,90 @@ capabilities = 0
 presence = "Present"
 serial_console = "[::1]:0"
 
+
+[[simulated_sps.gimlet.components]]
+id = "dev-0"
+device = "tmp117"
+description = "FAKE temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "Southwest", kind = "Temperature", last_data.value = 41.3629, last_data.timestamp = 1234 },
+]
+[[simulated_sps.gimlet.components]]
+id = "dev-1"
+device = "tmp117"
+description = "FAKE temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "South", kind = "Temperature", last_data.value = 42.5625, last_data.timestamp = 1234 },
+]
+
+[[simulated_sps.gimlet.components]]
+id = "dev-2"
+device = "tmp117"
+description = "FAKE Southeast temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "Southeast", kind = "Temperature", last_data.value = 41.570313, last_data.timestamp = 1234 },
+]
+
+[[simulated_sps.gimlet.components]]
+id = "dev-6"
+device = "at24csw080"
+description = "FAKE U.2 Sharkfin A VPD"
+capabilities = 0x0
+presence = "Present"
+
+[[simulated_sps.gimlet.components]]
+id = "dev-7"
+device = "max5970"
+description = "FAKE U.2 Sharkfin A hot swap controller"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "V12_U2A_A0", kind = "Current", last_data.value = 0.41893438, last_data.timestamp = 1234 },
+    { name = "V3P3_U2A_A0", kind = "Current", last_data.value = 0.025614603, last_data.timestamp = 1234 },
+    { name = "V12_U2A_A0", kind = "Voltage", last_data.value = 12.02914, last_data.timestamp = 1234 },
+    { name = "V3P3_U2A_A0", kind = "Voltage", last_data.value = 3.2618, last_data.timestamp = 1234 },
+]
+
+[[simulated_sps.gimlet.components]]
+id = "dev-8"
+device = "nvme_bmc"
+description = "FAKE U.2 A NVMe Basic Management Command"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "U2_N0", kind = "Temperature", last_data.value = 56.0, last_data.timestamp = 1234 },
+]
+[[simulated_sps.gimlet.components]]
+id = "dev-39"
+device = "tmp451"
+description = "FAKE T6 temperature sensor"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "t6", kind = "Temperature", last_data.value = 70.625, last_data.timestamp = 1234 },
+]
+[[simulated_sps.gimlet.components]]
+id = "dev-53"
+device = "max31790"
+description = "FAKE Fan controller"
+capabilities = 0x2
+presence = "Present"
+sensors = [
+    { name = "Southeast", kind = "Speed", last_data.value = 2510.0, last_data.timestamp = 1234 },
+    { name = "Northeast", kind = "Speed", last_data.value = 2390.0, last_data.timestamp = 1234 },
+    { name = "South", kind = "Speed", last_data.value = 2467.0, last_data.timestamp = 1234 },
+    { name = "North", kind = "Speed", last_data.value = 2195.0, last_data.timestamp = 1234 },
+    { name = "Southwest", kind = "Speed", last_data.value = 2680.0, last_data.timestamp = 1234 },
+    { name = "Northwest", kind = "Speed", last_data.value = 2212.0, last_data.timestamp = 1234 },
+]
+
+
 #
 # NOTE: for the test suite, the [log] section is ignored; sp-sim logs are rolled
 # into the gateway logfile.
diff --git a/gateway-test-utils/src/setup.rs b/gateway-test-utils/src/setup.rs
index 46bc55805a..056bb451f7 100644
--- a/gateway-test-utils/src/setup.rs
+++ b/gateway-test-utils/src/setup.rs
@@ -8,6 +8,7 @@ use camino::Utf8Path;
 use dropshot::test_util::ClientTestContext;
 use dropshot::test_util::LogContext;
 use gateway_messages::SpPort;
+pub use omicron_gateway::metrics::MetricsConfig;
 use omicron_gateway::MgsArguments;
 use omicron_gateway::SpType;
 use omicron_gateway::SwitchPortConfig;
@@ -33,6 +34,7 @@ pub struct GatewayTestContext {
     pub server: omicron_gateway::Server,
     pub simrack: SimRack,
     pub logctx: LogContext,
+    pub gateway_id: Uuid,
 }
 
 impl GatewayTestContext {
@@ -48,13 +50,18 @@ pub fn load_test_config() -> (omicron_gateway::Config, sp_sim::Config) {
     let manifest_dir = Utf8Path::new(env!("CARGO_MANIFEST_DIR"));
     let server_config_file_path = manifest_dir.join("configs/config.test.toml");
     let server_config =
-        omicron_gateway::Config::from_file(&server_config_file_path)
-            .expect("failed to load config.test.toml");
+        match omicron_gateway::Config::from_file(&server_config_file_path) {
+            Ok(config) => config,
+            Err(e) => panic!("failed to load MGS config: {e}"),
+        };
 
     let sp_sim_config_file_path =
         manifest_dir.join("configs/sp_sim_config.test.toml");
-    let sp_sim_config = sp_sim::Config::from_file(&sp_sim_config_file_path)
-        .expect("failed to load sp_sim_config.test.toml");
+    let sp_sim_config =
+        match sp_sim::Config::from_file(&sp_sim_config_file_path) {
+            Ok(config) => config,
+            Err(e) => panic!("failed to load SP simulator config: {e}"),
+        };
     (server_config, sp_sim_config)
 }
 
@@ -143,8 +150,8 @@ pub async fn test_setup_with_config(
 
     // Start gateway server
     let rack_id = Some(Uuid::parse_str(RACK_UUID).unwrap());
-
-    let args = MgsArguments { id: Uuid::new_v4(), addresses, rack_id };
+    let gateway_id = Uuid::new_v4();
+    let args = MgsArguments { id: gateway_id, addresses, rack_id };
     let server = omicron_gateway::Server::start(
         server_config.clone(),
         args,
@@ -206,5 +213,5 @@ pub async fn test_setup_with_config(
         log.new(o!("component" => "client test context")),
     );
 
-    GatewayTestContext { client, server, simrack, logctx }
+    GatewayTestContext { client, server, simrack, logctx, gateway_id }
 }
diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml
index 3cfd1d447b..2dce15892d 100644
--- a/gateway/Cargo.toml
+++ b/gateway/Cargo.toml
@@ -11,6 +11,7 @@ workspace = true
 anyhow.workspace = true
 base64.workspace = true
 camino.workspace = true
+chrono.workspace = true
 clap.workspace = true
 dropshot.workspace = true
 futures.workspace = true
@@ -39,6 +40,8 @@ tokio-tungstenite.workspace = true
 toml.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
+oximeter.workspace = true
+oximeter-producer.workspace = true
 
 [dev-dependencies]
 expectorate.workspace = true
diff --git a/gateway/examples/config.toml b/gateway/examples/config.toml
index d29d9508b9..a76edcd7b5 100644
--- a/gateway/examples/config.toml
+++ b/gateway/examples/config.toml
@@ -71,6 +71,15 @@ addr = "[::1]:33320"
 ignition-target = 3
 location = { switch0 = ["sled", 1], switch1 = ["sled", 1] }
 
+#
+# Configuration for SP sensor metrics polling
+#
+[metrics]
+# Allow the Oximeter metrics endpoint to bind on the loopback IP. This is
+# useful in local testing and development, when the gateway service is not
+# given a "real" underlay network IP.
+dev_bind_loopback = true
+
 [log]
 # Show log messages of this level and more severe
 level = "debug"
diff --git a/gateway/src/config.rs b/gateway/src/config.rs
index afdb046881..edf895ef59 100644
--- a/gateway/src/config.rs
+++ b/gateway/src/config.rs
@@ -6,6 +6,7 @@
 //! configuration
 
 use crate::management_switch::SwitchConfig;
+use crate::metrics::MetricsConfig;
 use camino::Utf8Path;
 use camino::Utf8PathBuf;
 use dropshot::ConfigLogging;
@@ -25,6 +26,8 @@ pub struct Config {
     pub switch: SwitchConfig,
     /// Server-wide logging configuration.
     pub log: ConfigLogging,
+    /// Configuration for SP sensor metrics.
+    pub metrics: Option<MetricsConfig>,
 }
 
 impl Config {
@@ -47,13 +50,13 @@ pub struct PartialDropshotConfig {
 
 #[derive(Debug, Error, SlogInlineError)]
 pub enum LoadError {
-    #[error("error reading \"{path}\"")]
+    #[error("error reading \"{path}\": {err}")]
     Io {
         path: Utf8PathBuf,
         #[source]
         err: std::io::Error,
     },
-    #[error("error parsing \"{path}\"")]
+    #[error("error parsing \"{path}\": {err}")]
     Parse {
         path: Utf8PathBuf,
         #[source]
diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs
index e1eed05334..8e764dc63f 100644
--- a/gateway/src/lib.rs
+++ b/gateway/src/lib.rs
@@ -6,6 +6,7 @@ mod config;
 mod context;
 mod error;
 mod management_switch;
+pub mod metrics;
 mod serial_console;
 
 pub mod http_entrypoints; // TODO pub only for testing - is this right?
@@ -62,6 +63,8 @@ pub struct Server {
     /// `http_servers`
     all_servers_shutdown: FuturesUnordered<ShutdownWaitFuture>,
     request_body_max_bytes: usize,
+    /// handle to the SP sensor metrics subsystem
+    metrics: metrics::Metrics,
     log: Logger,
 }
 
@@ -151,6 +154,9 @@ impl Server {
         let mut http_servers = HashMap::with_capacity(args.addresses.len());
         let all_servers_shutdown = FuturesUnordered::new();
 
+        let metrics =
+            metrics::Metrics::new(&log, &args, config.metrics, apictx.clone());
+
         for addr in args.addresses {
             start_dropshot_server(
                 &apictx,
@@ -167,6 +173,7 @@ impl Server {
             http_servers,
             all_servers_shutdown,
             request_body_max_bytes: config.dropshot.request_body_max_bytes,
+            metrics,
             log,
         })
     }
@@ -275,12 +282,14 @@ impl Server {
             server.close().await?;
         }
 
+        self.metrics.update_server_addrs(addresses).await;
+
         Ok(())
     }
 
     /// The rack_id will be set on a refresh of the SMF property when the sled
     /// agent starts.
-    pub fn set_rack_id(&self, rack_id: Option<Uuid>) {
+    pub fn set_rack_id(&mut self, rack_id: Option<Uuid>) {
         if let Some(rack_id) = rack_id {
             let val = self.apictx.rack_id.get_or_init(|| rack_id);
             if *val != rack_id {
@@ -291,6 +300,7 @@ impl Server {
                     "ignored_new_rack_id" => %rack_id);
             } else {
                 info!(self.apictx.log, "Set rack_id"; "rack_id" => %rack_id);
+                self.metrics.set_rack_id(rack_id);
             }
         } else {
             warn!(self.apictx.log, "SMF refresh called without a rack id");
diff --git a/gateway/src/metrics.rs b/gateway/src/metrics.rs
new file mode 100644
index 0000000000..d4e0795ae0
--- /dev/null
+++ b/gateway/src/metrics.rs
@@ -0,0 +1,1159 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+use crate::error::CommunicationError;
+use crate::management_switch::SpIdentifier;
+use crate::management_switch::SpType;
+use crate::MgsArguments;
+use crate::ServerContext;
+use anyhow::Context;
+use gateway_messages::measurement::MeasurementError;
+use gateway_messages::measurement::MeasurementKind;
+use gateway_messages::ComponentDetails;
+use gateway_messages::DeviceCapabilities;
+use gateway_sp_comms::SingleSp;
+use gateway_sp_comms::SpComponent;
+use gateway_sp_comms::VersionedSpState;
+use omicron_common::api::internal::nexus::ProducerEndpoint;
+use omicron_common::api::internal::nexus::ProducerKind;
+use omicron_common::backoff;
+use oximeter::types::Cumulative;
+use oximeter::types::ProducerRegistry;
+use oximeter::types::Sample;
+use oximeter::MetricsError;
+use std::borrow::Cow;
+use std::collections::hash_map;
+use std::collections::hash_map::HashMap;
+use std::net::IpAddr;
+use std::net::SocketAddr;
+use std::net::SocketAddrV6;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::broadcast;
+use tokio::sync::oneshot;
+use tokio::sync::watch;
+use tokio::task::JoinHandle;
+use uuid::Uuid;
+
+oximeter::use_timeseries!("hardware-component.toml");
+use hardware_component as metric;
+
+/// Handle to the metrics tasks.
+pub struct Metrics {
+    /// If the metrics subsystem is disabled, this is `None`.
+    inner: Option<Handles>,
+}
+
+struct Handles {
+    addrs_tx: watch::Sender<Vec<SocketAddrV6>>,
+    rack_id_tx: Option<oneshot::Sender<Uuid>>,
+    server: JoinHandle<anyhow::Result<()>>,
+}
+
+/// Configuration for metrics.
+///
+/// In order to reduce the risk of a bad config file taking down the whole
+/// management network, we try to keep the metrics-specific portion of the
+/// config file as minimal as possible. At present, it only includes development
+/// configurations that shouldn't be present in production configs.
+#[derive(
+    Clone, Debug, Default, PartialEq, Eq, serde::Deserialize, serde::Serialize,
+)]
+#[serde(deny_unknown_fields)]
+pub struct MetricsConfig {
+    /// Completely disable the metrics subsystem.
+    ///
+    /// If `disabled = true`, sensor data metrics will not be collected, and the
+    /// metrics polling tasks will not be started.
+    #[serde(default)]
+    pub disabled: bool,
+
+    /// Override the Nexus address used to register the SP metrics Oximeter
+    /// producer. This is intended for use in development and testing.
+    ///
+    /// If this argument is not present, Nexus is discovered through DNS.
+    #[serde(default)]
+    pub dev_nexus_address: Option<SocketAddr>,
+
+    /// Allow the metrics producer endpoint to bind on loopback.
+    ///
+    /// This should be disabled in production, as Nexus will not be able to
+    /// reach the loopback interface, but is necessary for local development and
+    /// test purposes.
+    #[serde(default)]
+    pub dev_bind_loopback: bool,
+}
+
+/// Polls sensor readings from an individual SP.
+struct SpPoller {
+    spid: SpIdentifier,
+    known_state: Option<SpUnderstanding>,
+    components: HashMap<SpComponent, ComponentMetrics>,
+    log: slog::Logger,
+    rack_id: Uuid,
+    mgs_id: Uuid,
+    sample_tx: broadcast::Sender<Vec<Sample>>,
+}
+
+struct ComponentMetrics {
+    target: metric::HardwareComponent,
+    /// Counts of errors reported by sensors on this component.
+    sensor_errors: HashMap<SensorErrorKey, Cumulative<u64>>,
+    /// Counts of errors that occurred whilst polling the SP for measurements
+    /// from this component.
+    poll_errors: HashMap<&'static str, Cumulative<u64>>,
+}
+
+#[derive(Eq, PartialEq, Hash)]
+struct SensorErrorKey {
+    name: Cow<'static, str>,
+    kind: &'static str,
+    error: &'static str,
+}
+
+/// Manages a metrics server and stuff.
+struct ServerManager {
+    log: slog::Logger,
+    addrs: watch::Receiver<Vec<SocketAddrV6>>,
+    registry: ProducerRegistry,
+}
+
+#[derive(Debug)]
+struct Producer {
+    /// Receiver for samples produced by SP pollers.
+    sample_rx: broadcast::Receiver<Vec<Sample>>,
+    /// Logging context.
+    ///
+    /// We stick this on the producer because we would like to be able to log
+    /// when stale samples are dropped.
+    log: slog::Logger,
+}
+
+/// The maximum Dropshot request size for the metrics server.
+const METRIC_REQUEST_MAX_SIZE: usize = 10 * 1024 * 1024;
+
+/// Poll interval for requesting sensor readings from SPs.
+///
+/// Bryan wants to try polling at 1Hz, so let's do that for now.
+const SP_POLL_INTERVAL: Duration = Duration::from_secs(1);
+
+///The interval at which we will ask Oximeter to collect our metric samples.
+///
+/// Every ten seconds seems good.
+const OXIMETER_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
+
+/// The expected number of SPs in a fully-loaded rack.
+///
+/// N.B. that there *might* be more than this; we shouldn't ever panic or
+/// otherwise misbehave if we see more than this number. This is just intended
+/// for sizing buffers/map allocations and so forth; we can always realloc if we
+/// see a bonus SP or two. That's why it's called "normal number of SPs" and not
+/// "MAX_SPS" or similar.
+///
+/// Additionally, note that we always determine the channel capacity based on
+/// the assumption that *someday*, the rack might be fully loaded with compute
+/// sleds, even if it isn't *right now*. A rack with 16 sleds could always grow
+/// another 16 later!
+const NORMAL_NUMBER_OF_SPS: usize =
+    32  // 32 compute sleds
+    + 2 // two switches
+    + 2 // two power shelves, someday.
+    ;
+
+/// What size should we make the
+const MAX_BUFFERED_SAMPLE_CHUNKS: usize = {
+    // Roughly how many times will we poll SPs for each metrics collection
+    // interval?
+    let polls_per_metrics_interval = {
+        let collection_interval_secs: usize =
+            OXIMETER_COLLECTION_INTERVAL.as_secs() as usize;
+        let poll_interval_secs: usize = SP_POLL_INTERVAL.as_secs() as usize;
+
+        collection_interval_secs / poll_interval_secs
+    };
+
+    // How many sample collection intervals do we want to allow to elapse before
+    // we start putting stuff on the floor?
+    //
+    // Let's say 16. Chosen totally arbitrarily but seems reasonable-ish.
+    let sloppiness = 16;
+    let capacity =
+        NORMAL_NUMBER_OF_SPS * polls_per_metrics_interval * sloppiness;
+    // Finally, the buffer capacity will probably be allocated in a power of two
+    // anyway, so let's make sure our thing is a power of two so we don't waste
+    // the allocation we're gonna get anyway.
+    capacity.next_power_of_two()
+};
+
+impl Metrics {
+    pub fn new(
+        log: &slog::Logger,
+        args: &MgsArguments,
+        cfg: Option<MetricsConfig>,
+        apictx: Arc<ServerContext>,
+    ) -> Self {
+        let &MgsArguments { id, rack_id, ref addresses } = args;
+
+        if cfg.as_ref().map(|c| c.disabled).unwrap_or(false) {
+            slog::warn!(&log, "metrics subsystem disabled by config");
+            return Self { inner: None };
+        }
+
+        // Create a channel for the SP poller tasks to send samples to the
+        // Oximeter producer endpoint.
+        //
+        // A broadcast channel is used here, not because we are actually
+        // multi-consumer (`Producer::produce` is never called concurrently),
+        // but because the broadcast channel has properly ring-buffer-like
+        // behavior, where earlier messages are discarded, rather than exerting
+        // backpressure on senders (as Tokio's MPSC channel does). This
+        // is what we want, as we would prefer a full buffer to result in
+        // clobbering the oldest measurements, rather than leaving the newest
+        // ones on the floor.
+        let (sample_tx, sample_rx) =
+            broadcast::channel(MAX_BUFFERED_SAMPLE_CHUNKS);
+
+        // Using a channel for this is, admittedly, a bit of an end-run around
+        // the `OnceLock` on the `ServerContext` that *also* stores the rack ID,
+        // but it has the nice benefit of allowing the `PollerManager` task to _await_
+        // the rack ID being set...we might want to change other code to use a
+        // similar approach in the future.
+        let (rack_id_tx, rack_id_rx) = oneshot::channel();
+        let rack_id_tx = if let Some(rack_id) = rack_id {
+            rack_id_tx.send(rack_id).expect(
+                "we just created the channel; it therefore will not be \
+                     closed",
+            );
+            None
+        } else {
+            Some(rack_id_tx)
+        };
+
+        tokio::spawn(start_pollers(
+            log.new(slog::o!("component" => "sensor-poller")),
+            apictx.clone(),
+            rack_id_rx,
+            id,
+            sample_tx,
+        ));
+
+        let (addrs_tx, addrs_rx) =
+            tokio::sync::watch::channel(addresses.clone());
+        let server = {
+            let log = log.new(slog::o!("component" => "producer-server"));
+            let registry = ProducerRegistry::with_id(id);
+            registry
+                .register_producer(Producer { sample_rx, log: log.clone() })
+                // TODO(ben): when you change `register_producer` to not return
+                // a `Result`, delete this `expect`. thanks in advance! :)
+                .expect(
+                    "`ProducerRegistry::register_producer()` will never \
+                     actually return an `Err`, so this shouldn't ever \
+                     happen...",
+                );
+
+            tokio::spawn(
+                ServerManager { log, addrs: addrs_rx, registry }.run(cfg),
+            )
+        };
+        Self { inner: Some(Handles { addrs_tx, rack_id_tx, server }) }
+    }
+
+    pub fn set_rack_id(&mut self, rack_id: Uuid) {
+        let tx = self.inner.as_mut().and_then(|i| i.rack_id_tx.take());
+        if let Some(tx) = tx {
+            // If the task that starts sensor pollers has gone away already,
+            // we're probably shutting down, and shouldn't panic.
+            let _ = tx.send(rack_id);
+        }
+        // Ignoring duplicate attempt to set the rack ID...
+    }
+
+    pub async fn update_server_addrs(&self, new_addrs: &[SocketAddrV6]) {
+        if let Some(ref inner) = self.inner {
+            inner.addrs_tx.send_if_modified(|current_addrs| {
+                if current_addrs.len() == new_addrs.len()
+                    // N.B. that we could make this "faster" with a `HashSet`,
+                    // but...the size of this Vec of addresses is probably going to
+                    // two or three items, max, so the linear scan actually probably
+                    // outperforms it...
+                    && current_addrs.iter().all(|addr| new_addrs.contains(addr))
+                {
+                    return false;
+                }
+
+                // Reuse existing `Vec` capacity if possible.This is almost
+                // certainly not performance-critical, but it makes me feel happy.
+                current_addrs.clear();
+                current_addrs.extend_from_slice(new_addrs);
+                true
+            });
+        }
+    }
+}
+
+impl Drop for Metrics {
+    fn drop(&mut self) {
+        // Clean up our children on drop.
+        if let Some(ref mut inner) = self.inner {
+            inner.server.abort();
+        }
+    }
+}
+
+impl oximeter::Producer for Producer {
+    fn produce(
+        &mut self,
+    ) -> Result<Box<dyn Iterator<Item = Sample>>, MetricsError> {
+        // Drain all samples currently in the queue into a `Vec`.
+        //
+        // N.B. it may be tempting to pursue an alternative design where we
+        // implement `Iterator` for a `broadcast::Receiver<Vec<Sample>>` and
+        // just return that using `Receiver::resubscribe`...DON'T DO THAT! The
+        // `resubscribe` function creates a receiver at the current *tail* of
+        // the ringbuffer, so it won't see any samples produced *before* now.
+        // Which  is the opposite of what we want!
+        let mut samples = Vec::with_capacity(self.sample_rx.len());
+        // Because we receive the individual samples in a `Vec` of all samples
+        // produced by a poller, let's also sum the length of each of those
+        // `Vec`s here, so we can log it later.
+        let mut total_samples = 0;
+        // Also, track whether any sample chunks were dropped off the end of the
+        // ring buffer.
+        let mut dropped_chunks = 0;
+
+        use broadcast::error::TryRecvError;
+        loop {
+            match self.sample_rx.try_recv() {
+                Ok(sample_chunk) => {
+                    total_samples += sample_chunk.len();
+                    samples.push(sample_chunk)
+                }
+                // This error indicates that an old ringbuffer entry was
+                // overwritten. That's fine, just get the next one.
+                Err(TryRecvError::Lagged(dropped)) => {
+                    dropped_chunks += dropped;
+                }
+                // We've drained all currently available samples! We're done here!
+                Err(TryRecvError::Empty) => break,
+                // This should only happen when shutting down.
+                Err(TryRecvError::Closed) => {
+                    slog::debug!(&self.log, "sample producer channel closed");
+                    break;
+                }
+            }
+        }
+
+        if dropped_chunks > 0 {
+            slog::info!(
+                &self.log,
+                "produced metric samples. some old sample chunks were dropped!";
+                "samples" => total_samples,
+                "sample_chunks" => samples.len(),
+                "dropped_chunks" => dropped_chunks,
+            );
+        } else {
+            slog::debug!(
+                &self.log,
+                "produced metric samples";
+                "samples" => total_samples,
+                "sample_chunks" => samples.len(),
+            );
+        }
+
+        // There you go, that's all I've got.
+        Ok(Box::new(samples.into_iter().flatten()))
+    }
+}
+
+async fn start_pollers(
+    log: slog::Logger,
+    apictx: Arc<ServerContext>,
+    rack_id: oneshot::Receiver<Uuid>,
+    mgs_id: Uuid,
+    sample_tx: broadcast::Sender<Vec<Sample>>,
+) -> anyhow::Result<()> {
+    let switch = &apictx.mgmt_switch;
+
+    // First, wait until we know what the rack ID is known...
+    let rack_id = rack_id
+        .await
+        .context("rack ID sender has gone away...we must be shutting down")?;
+
+    // Wait for SP discovery to complete, if it hasn't already.
+    // TODO(eliza): presently, we busy-poll here. It would be nicer to
+    // replace the `OnceLock<Result<LocationMap, ...>` in `ManagementSwitch`
+    // with a `tokio::sync::watch`
+    let sps = backoff::retry_notify_ext(
+        backoff::retry_policy_local(),
+        || async { switch.all_sps().map_err(backoff::BackoffError::transient) },
+        |err, _, elapsed| {
+            let secs = elapsed.as_secs();
+            if secs < 30 {
+                slog::debug!(
+                    &log,
+                    "waiting for SP discovery to complete...";
+                    "elapsed" => ?elapsed,
+                    "error" => err,
+                );
+            } else if secs < 180 {
+                slog::info!(
+                    &log,
+                    "still waiting for SP discovery to complete...";
+                    "elapsed" => ?elapsed,
+                    "error" => err,
+                )
+            } else {
+                slog::warn!(
+                    &log,
+                    "we have been waiting for SP discovery to complete \
+                     for a pretty long time!";
+                    "elapsed" => ?elapsed,
+                    "error" => err,
+                )
+            }
+        },
+    )
+    .await
+    .context("we should never return a fatal error here")?;
+
+    slog::info!(
+        &log,
+        "starting to poll SP sensor data every {SP_POLL_INTERVAL:?}"
+    );
+
+    for (spid, _) in sps {
+        slog::info!(
+            &log,
+            "found a new little friend!";
+            "sp_slot" => ?spid.slot,
+            "chassis_type" => ?spid.typ,
+        );
+
+        let poller = SpPoller {
+            spid,
+            rack_id,
+            mgs_id,
+            log: log.new(slog::o!(
+                "sp_slot" => spid.slot,
+                "chassis_type" => format!("{:?}", spid.typ),
+            )),
+            components: HashMap::new(),
+            known_state: None,
+            sample_tx: sample_tx.clone(),
+        };
+        tokio::spawn(poller.run(apictx.clone()));
+    }
+
+    Ok(())
+}
+
+impl SpPoller {
+    async fn run(mut self, apictx: Arc<ServerContext>) {
+        let mut interval = tokio::time::interval(SP_POLL_INTERVAL);
+        let switch = &apictx.mgmt_switch;
+        let sp = match switch.sp(self.spid) {
+            Ok(sp) => sp,
+            Err(e) => {
+                // This should never happen, but it's not worth taking down the
+                // entire management network over that...
+                const MSG: &'static str =
+                    "the `SpPoller::run` function is only called after \
+                     discovery completes successfully, and the `SpIdentifier` \
+                     used was returned by the management switch, \
+                     so it should be valid.";
+                if cfg!(debug_assertions) {
+                    unreachable!(
+                        "{MSG} nonetheless, we saw a {e:?} error when looking \
+                         up {:?}",
+                        self.spid
+                    );
+                } else {
+                    slog::error!(
+                        &self.log,
+                        "THIS SHOULDN'T HAPPEN: {MSG}";
+                        "error" => e,
+                        "sp" => ?self.spid,
+                    );
+                    return;
+                }
+            }
+        };
+        loop {
+            interval.tick().await;
+            slog::trace!(&self.log, "interval elapsed, polling SP...");
+
+            match self.poll(sp).await {
+                // No sense cluttering the ringbuffer with empty vecs...
+                Ok(samples) if samples.is_empty() => {
+                    slog::trace!(
+                        &self.log,
+                        "polled SP, no samples returned";
+                        "num_samples" => 0usize
+                    );
+                }
+                Ok(samples) => {
+                    slog::trace!(
+                        &self.log,
+                        "polled SP successfully";
+                        "num_samples" => samples.len(),
+                    );
+
+                    if let Err(_) = self.sample_tx.send(samples) {
+                        slog::debug!(
+                            &self.log,
+                            "all sample receiver handles have been dropped! \
+                             presumably we are shutting down...";
+                        );
+                        return;
+                    }
+                }
+                // No SP is currently present for this ID. This may change in
+                // the future: a cubby that is not populated at present may have
+                // a sled added to it in the future. So, let's wait until it
+                // changes.
+                Err(CommunicationError::NoSpDiscovered) => {
+                    slog::info!(
+                        &self.log,
+                        "no SP is present for this slot. waiting for a \
+                         little buddy to appear...";
+                    );
+                    let mut watch = sp.sp_addr_watch().clone();
+                    loop {
+                        if let Some((addr, port)) = *watch.borrow_and_update() {
+                            // Ladies and gentlemen...we got him!
+                            slog::info!(
+                                &self.log,
+                                "found a SP, resuming polling.";
+                                "sp_addr" => ?addr,
+                                "sp_port" => ?port,
+                            );
+                            break;
+                        }
+
+                        // Wait for an address to be discovered.
+                        slog::debug!(&self.log, "waiting for a SP to appear.");
+                        if watch.changed().await.is_err() {
+                            slog::debug!(
+                                &self.log,
+                                "SP address watch has been closed, presumably \
+                                 we are shutting down";
+                            );
+                            return;
+                        }
+                    }
+                }
+                Err(error) => {
+                    slog::warn!(
+                        &self.log,
+                        "failed to poll SP, will try again momentarily...";
+                        "error" => %error,
+                    );
+                    // TODO(eliza): we should probably have a metric for failed
+                    // SP polls.
+                }
+            }
+        }
+    }
+
+    async fn poll(
+        &mut self,
+        sp: &SingleSp,
+    ) -> Result<Vec<Sample>, CommunicationError> {
+        let mut current_state = SpUnderstanding::from(sp.state().await?);
+        let mut samples = Vec::new();
+        // If the SP's state changes dramatically *during* a poll, it may be
+        // necessary to re-do the metrics scrape, thus the loop. Normally, we
+        // will only loop a single time, but may retry if necessary.
+        loop {
+            // Check if the SP's state has changed. If it has, we need to make sure
+            // we still know what all of its sensors are.
+            if Some(&current_state) != self.known_state.as_ref() {
+                // The SP's state appears to have changed. Time to make sure our
+                // understanding of its devices and identity is up to date!
+
+                let chassis_kind = match self.spid.typ {
+                    SpType::Sled => "sled",
+                    SpType::Switch => "switch",
+                    SpType::Power => "power",
+                };
+                let model = stringify_byte_string(&current_state.model[..]);
+                let serial =
+                    stringify_byte_string(&current_state.serial_number[..]);
+                let hubris_archive_id =
+                    hex::encode(&current_state.hubris_archive_id);
+
+                slog::debug!(
+                    &self.log,
+                    "our little friend seems to have changed in some kind of way";
+                    "current_state" => ?current_state,
+                    "known_state" => ?self.known_state,
+                    "new_model" => %model,
+                    "new_serial" => %serial,
+                    "new_hubris_archive_id" => %hubris_archive_id,
+                );
+
+                let inv_devices = sp.inventory().await?.devices;
+
+                // Clear out any previously-known devices, and preallocate capacity
+                // for all the new ones.
+                self.components.clear();
+                self.components.reserve(inv_devices.len());
+
+                for dev in inv_devices {
+                    // Skip devices which have nothing interesting for us.
+                    if !dev
+                        .capabilities
+                        .contains(DeviceCapabilities::HAS_MEASUREMENT_CHANNELS)
+                    {
+                        continue;
+                    }
+                    let component_id = match dev.component.as_str() {
+                        Some(c) => Cow::Owned(c.to_string()),
+                        None => {
+                            // These are supposed to always be strings. But, if we
+                            // see one that's not a string, fall back to the hex
+                            // representation rather than panicking.
+                            let hex = hex::encode(dev.component.id);
+                            slog::warn!(
+                                &self.log,
+                                "a SP component ID was not a string! this isn't \
+                                 supposed to happen!";
+                                "component" => %hex,
+                                "device" => ?dev,
+                            );
+                            Cow::Owned(hex)
+                        }
+                    };
+
+                    // TODO(eliza): i hate having to clone all these strings for
+                    // every device on the SP...it would be cool if Oximeter let us
+                    // reference count them...
+                    let target = metric::HardwareComponent {
+                        rack_id: self.rack_id,
+                        gateway_id: self.mgs_id,
+                        chassis_model: Cow::Owned(model.clone()),
+                        chassis_revision: current_state.revision,
+                        chassis_kind: Cow::Borrowed(chassis_kind),
+                        chassis_serial: Cow::Owned(serial.clone()),
+                        hubris_archive_id: Cow::Owned(
+                            hubris_archive_id.clone(),
+                        ),
+                        slot: self.spid.slot as u32,
+                        component_kind: Cow::Owned(dev.device),
+                        component_id,
+                        description: Cow::Owned(dev.description),
+                    };
+                    match self.components.entry(dev.component) {
+                        // Found a new device!
+                        hash_map::Entry::Vacant(entry) => {
+                            slog::debug!(
+                                &self.log,
+                                "discovered a new component!";
+                                "component_id" => %target.component_id,
+                                "component_kind" => %target.component_kind,
+                                "description" => %target.component_id,
+                            );
+                            entry.insert(ComponentMetrics {
+                                target,
+                                sensor_errors: HashMap::new(),
+                                poll_errors: HashMap::new(),
+                            });
+                        }
+                        // We previously had a known device for this thing, but
+                        // the metrics target has changed, so we should reset
+                        // its cumulative metrics.
+                        hash_map::Entry::Occupied(mut entry)
+                            if entry.get().target != target =>
+                        {
+                            slog::trace!(
+                                &self.log,
+                                "target has changed, resetting cumulative metrics \
+                                 for component";
+                                "component" => ?dev.component,
+                            );
+                            entry.insert(ComponentMetrics {
+                                target,
+                                sensor_errors: HashMap::new(),
+                                poll_errors: HashMap::new(),
+                            });
+                        }
+
+                        // The target for this device hasn't changed, don't reset it.
+                        hash_map::Entry::Occupied(_) => {}
+                    }
+                }
+
+                self.known_state = Some(current_state);
+            }
+
+            // We will need capacity for *at least* the number of components on the
+            // SP --- it will probably be more, as several components have multiple
+            // measurement channels which will produce independent samples (e.g. a
+            // power rail will likely have both voltage and current measurements,
+            // and a device may have multiple rails...) but, this way, we can avoid
+            // *some* amount of reallocating...
+            samples.reserve(self.components.len());
+            for (c, metrics) in &mut self.components {
+                // Metrics samples *should* always be well-formed. If we ever emit a
+                // messed up one, this is a programmer error, and therefore should
+                // fail in test, but should probably *not* take down the whole
+                // management gateway in a real-life rack, especially because it's
+                // probably going to happen again if we were to get restarted.
+                const BAD_SAMPLE: &str =
+                    "we emitted a bad metrics sample! this should never happen";
+                macro_rules! try_sample {
+                    ($sample:expr) => {
+                        match $sample {
+                            Ok(sample) => samples.push(sample),
+
+                            Err(err) => {
+                                slog::error!(
+                                    &self.log,
+                                    "{BAD_SAMPLE}!";
+                                    "error" => %err,
+                                );
+                                #[cfg(debug_assertions)]
+                                unreachable!("{BAD_SAMPLE}: {err}");
+                            }
+                        }
+                    }
+                }
+                let details = match sp.component_details(*c).await {
+                    Ok(deets) => deets,
+                    // SP seems gone!
+                    Err(CommunicationError::NoSpDiscovered) => {
+                        return Err(CommunicationError::NoSpDiscovered)
+                    }
+                    Err(error) => {
+                        slog::warn!(
+                            &self.log,
+                            "failed to read details on SP component";
+                            "sp_component" => %c,
+                            "error" => %error,
+                        );
+                        try_sample!(metrics.poll_error(comms_error_str(error)));
+                        continue;
+                    }
+                };
+                if details.entries.is_empty() {
+                    slog::warn!(
+                        &self.log,
+                        "a component which claimed to have measurement channels \
+                         had empty details. this seems weird...";
+                        "sp_component" => %c,
+                    );
+                    try_sample!(metrics.poll_error("no_measurement_channels"));
+                    continue;
+                }
+
+                let ComponentMetrics { sensor_errors, target, .. } = metrics;
+                for d in details.entries {
+                    let ComponentDetails::Measurement(m) = d else {
+                        // If the component details are switch port details rather
+                        // than measurement channels, ignore it for now.
+                        continue;
+                    };
+                    let sensor: Cow<'static, str> = Cow::Owned(m.name);
+
+                    // First, if there's a measurement error, increment the
+                    // error count metric. We will synthesize a missing sample
+                    // for the sensor's metric as well, after we produce the
+                    // measurement error sample.
+                    //
+                    // We do this first so that we only have to clone the
+                    // sensor's name if there's an error, rather than always
+                    // cloning it in *case* there's an error.
+                    if let Err(error) = m.value {
+                        let kind = match m.kind {
+                            MeasurementKind::Temperature => "temperature",
+                            MeasurementKind::Current => "current",
+                            MeasurementKind::Voltage => "voltage",
+                            MeasurementKind::Power => "power",
+                            MeasurementKind::InputCurrent => "input_current",
+                            MeasurementKind::InputVoltage => "input_voltage",
+                            MeasurementKind::Speed => "fan_speed",
+                        };
+                        let error = match error {
+                            MeasurementError::InvalidSensor => "invalid_sensor",
+                            MeasurementError::NoReading => "no_reading",
+                            MeasurementError::NotPresent => "not_present",
+                            MeasurementError::DeviceError => "device_error",
+                            MeasurementError::DeviceUnavailable => {
+                                "device_unavailable"
+                            }
+                            MeasurementError::DeviceTimeout => "device_timeout",
+                            MeasurementError::DeviceOff => "device_off",
+                        };
+                        let datum = sensor_errors
+                            .entry(SensorErrorKey {
+                                name: sensor.clone(),
+                                kind,
+                                error,
+                            })
+                            .or_insert(Cumulative::new(0));
+                        // TODO(eliza): perhaps we should treat this as
+                        // "level-triggered" and only increment the counter
+                        // when the sensor has *changed* to an errored
+                        // state after we have seen at least one good
+                        // measurement from it since the last time the error
+                        // was observed?
+                        datum.increment();
+                        try_sample!(Sample::new(
+                            target,
+                            &metric::SensorErrorCount {
+                                error: Cow::Borrowed(error),
+                                sensor: sensor.clone(),
+                                datum: *datum,
+                                sensor_kind: Cow::Borrowed(kind),
+                            },
+                        ));
+                    }
+
+                    // I don't love this massive `match`, but because the
+                    // `Sample::new_missing` constructor is a different function
+                    // from `Sample::new`, we need separate branches for the
+                    // error and not-error cases, rather than just doing
+                    // something to produce a datum from both the `Ok` and
+                    // `Error` cases...
+                    let sample = match (m.value, m.kind) {
+                        (Ok(datum), MeasurementKind::Temperature) => {
+                            Sample::new(
+                                target,
+                                &metric::Temperature { sensor, datum },
+                            )
+                        }
+                        (Err(_), MeasurementKind::Temperature) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::Temperature { sensor, datum: 0.0 },
+                            )
+                        }
+                        (Ok(datum), MeasurementKind::Current) => Sample::new(
+                            target,
+                            &metric::Current { sensor, datum },
+                        ),
+                        (Err(_), MeasurementKind::Current) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::Current { sensor, datum: 0.0 },
+                            )
+                        }
+                        (Ok(datum), MeasurementKind::Voltage) => Sample::new(
+                            target,
+                            &metric::Voltage { sensor, datum },
+                        ),
+
+                        (Err(_), MeasurementKind::Voltage) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::Voltage { sensor, datum: 0.0 },
+                            )
+                        }
+                        (Ok(datum), MeasurementKind::Power) => Sample::new(
+                            target,
+                            &metric::Power { sensor, datum },
+                        ),
+                        (Err(_), MeasurementKind::Power) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::Power { sensor, datum: 0.0 },
+                            )
+                        }
+                        (Ok(datum), MeasurementKind::InputCurrent) => {
+                            Sample::new(
+                                target,
+                                &metric::InputCurrent { sensor, datum },
+                            )
+                        }
+                        (Err(_), MeasurementKind::InputCurrent) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::InputCurrent { sensor, datum: 0.0 },
+                            )
+                        }
+                        (Ok(datum), MeasurementKind::InputVoltage) => {
+                            Sample::new(
+                                target,
+                                &metric::InputVoltage { sensor, datum },
+                            )
+                        }
+                        (Err(_), MeasurementKind::InputVoltage) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::InputVoltage { sensor, datum: 0.0 },
+                            )
+                        }
+                        (Ok(datum), MeasurementKind::Speed) => Sample::new(
+                            target,
+                            &metric::FanSpeed { sensor, datum },
+                        ),
+                        (Err(_), MeasurementKind::Speed) => {
+                            Sample::new_missing(
+                                target,
+                                &metric::FanSpeed { sensor, datum: 0.0 },
+                            )
+                        }
+                    };
+                    try_sample!(sample);
+                }
+            }
+
+            // Now, fetch the SP's state *again*. It is possible that, while we
+            // were scraping the SP's samples, the SP's identity changed in some
+            // way: perhaps its version was updated during the poll, or it
+            // was removed from the rack and replaced with an entirely different
+            // chassis! If that's the case, some of the samples we collected may
+            // have a metrics target describing the wrong thing (e.g. they could
+            // still have the previous firmware's `hubris_archive_id`, if the SP
+            // was updated). In that case, we need to throw away the samples we
+            // collected and try again, potentially rebuilding our understanding
+            // of the SP's inventory.
+            let state = SpUnderstanding::from(sp.state().await?);
+            if state == current_state {
+                // All good, the SP is still who we thought it was! We can
+                // "commit" this batch of samples
+                return Ok(samples);
+            }
+
+            slog::info!(
+                &self.log,
+                "SP's state changed mid-poll! discarding current samples and \
+                 starting over!";
+                "new_state" => ?state,
+                "current_state" => ?current_state,
+            );
+            // Let's reuse the buffer we already have for the next batch of
+            // samples.
+            samples.clear();
+            //...and try again with the new state.
+            current_state = state;
+        }
+    }
+}
+
+/// The fields of the `gateway_messages` `VersionedSpState` and
+/// `SpStateV1`/`SpStateV2`/`SpStateV3` that we actually care about for purposes
+/// of determining whether our understanding of the SP's components are still
+/// valid.
+///
+/// In particular, we throw out the RoT state and the SP's power state, because
+/// those changing won't actually invalidate our understanding of the SP's
+/// components.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+struct SpUnderstanding {
+    hubris_archive_id: [u8; 8],
+    serial_number: [u8; 32],
+    model: [u8; 32],
+    revision: u32,
+}
+
+impl From<VersionedSpState> for SpUnderstanding {
+    fn from(v: VersionedSpState) -> Self {
+        match v {
+            VersionedSpState::V1(gateway_messages::SpStateV1 {
+                hubris_archive_id,
+                serial_number,
+                model,
+                revision,
+                ..
+            }) => Self { hubris_archive_id, serial_number, model, revision },
+            VersionedSpState::V2(gateway_messages::SpStateV2 {
+                hubris_archive_id,
+                serial_number,
+                model,
+                revision,
+                ..
+            }) => Self { hubris_archive_id, serial_number, model, revision },
+            VersionedSpState::V3(gateway_messages::SpStateV3 {
+                hubris_archive_id,
+                serial_number,
+                model,
+                revision,
+                ..
+            }) => Self { hubris_archive_id, serial_number, model, revision },
+        }
+    }
+}
+
+// Reimplement this ourselves because we don't really care about
+// reading the RoT state at present. This is unfortunately copied
+// from `gateway_messages`.
+fn stringify_byte_string(bytes: &[u8]) -> String {
+    // We expect serial and model numbers to be ASCII and 0-padded: find the first 0
+    // byte and convert to a string. If that fails, hexlify the entire slice.
+    let first_zero = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
+
+    std::str::from_utf8(&bytes[..first_zero])
+        .map(|s| s.to_string())
+        .unwrap_or_else(|_err| hex::encode(bytes))
+}
+
+impl ServerManager {
+    async fn run(mut self, cfg: Option<MetricsConfig>) -> anyhow::Result<()> {
+        let (registration_address, bind_loopback) =
+            if let Some(MetricsConfig {
+                dev_bind_loopback,
+                dev_nexus_address,
+                ..
+            }) = cfg
+            {
+                if dev_bind_loopback || dev_nexus_address.is_some() {
+                    slog::warn!(
+                        &self.log,
+                        "using development metrics configuration overrides!";
+                        "nexus_address" => ?dev_nexus_address,
+                        "bind_loopback" => dev_bind_loopback,
+                    );
+                }
+                (dev_nexus_address, dev_bind_loopback)
+            } else {
+                (None, false)
+            };
+        let id = self.registry.producer_id();
+
+        let mut current_server: Option<oximeter_producer::Server> = None;
+        loop {
+            let current_ip = current_server.as_ref().map(|s| s.address().ip());
+            let mut new_ip = None;
+            for addr in self.addrs.borrow_and_update().iter() {
+                let &ip = addr.ip();
+                // Don't bind the metrics endpoint on ::1
+                if ip.is_loopback() && !bind_loopback {
+                    continue;
+                }
+                // If our current address is contained in the new addresses,
+                // no need to rebind.
+                if current_ip == Some(IpAddr::V6(ip)) {
+                    new_ip = None;
+                    break;
+                } else {
+                    new_ip = Some(ip);
+                }
+            }
+
+            if let Some(ip) = new_ip {
+                slog::debug!(
+                    &self.log,
+                    "rebinding producer server on new IP";
+                    "new_ip" => ?ip,
+                    "current_ip" => ?current_ip,
+                    "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL,
+                    "producer_id" => ?id,
+                );
+                let server = {
+                    // Listen on any available socket, using the provided underlay IP.
+                    let address = SocketAddr::new(ip.into(), 0);
+
+                    let server_info = ProducerEndpoint {
+                        id,
+                        kind: ProducerKind::ManagementGateway,
+                        address,
+                        interval: OXIMETER_COLLECTION_INTERVAL,
+                    };
+                    let config = oximeter_producer::Config {
+                        server_info,
+                        registration_address,
+                        request_body_max_bytes: METRIC_REQUEST_MAX_SIZE,
+                        log: oximeter_producer::LogConfig::Logger(
+                            self.log.clone(),
+                        ),
+                    };
+                    oximeter_producer::Server::with_registry(
+                        self.registry.clone(),
+                        &config,
+                    )
+                    .context("failed to start producer server")?
+                };
+
+                slog::info!(
+                    &self.log,
+                    "bound metrics producer server";
+                    "collection_interval" => ?OXIMETER_COLLECTION_INTERVAL,
+                    "producer_id" => ?id,
+                    "address" => %server.address(),
+                );
+
+                if let Some(old_server) = current_server.replace(server) {
+                    let old_addr = old_server.address();
+                    if let Err(error) = old_server.close().await {
+                        slog::error!(
+                            &self.log,
+                            "failed to close old metrics producer server";
+                            "address" => %old_addr,
+                            "error" => %error,
+                        );
+                    } else {
+                        slog::debug!(
+                            &self.log,
+                            "old metrics producer server shut down";
+                            "address" => %old_addr,
+                        )
+                    }
+                }
+            }
+
+            // Wait for a subsequent address change.
+            self.addrs.changed().await?;
+        }
+    }
+}
+
+impl ComponentMetrics {
+    fn poll_error(
+        &mut self,
+        error_str: &'static str,
+    ) -> Result<Sample, MetricsError> {
+        let datum = self
+            .poll_errors
+            .entry(error_str)
+            .or_insert_with(|| Cumulative::new(0));
+        datum.increment();
+        Sample::new(
+            &self.target,
+            &metric::PollErrorCount {
+                error: Cow::Borrowed(error_str),
+                datum: *datum,
+            },
+        )
+    }
+}
+
+fn comms_error_str(error: CommunicationError) -> &'static str {
+    // TODO(eliza): a bunch of these probably can't be returned by the specific
+    // operations we try to do. It could be good to make the methods this code
+    // calls return a smaller enum of just the errors it might actually
+    // encounter? Figure this out later.
+    match error {
+        CommunicationError::NoSpDiscovered => "no_sp_discovered",
+        CommunicationError::InterfaceError(_) => "interface",
+        CommunicationError::ScopeIdChangingFrequently { .. } => {
+            "scope_id_changing_frequently"
+        }
+        CommunicationError::JoinMulticast { .. } => "join_multicast",
+        CommunicationError::UdpSendTo { .. } => "udp_send_to",
+        CommunicationError::UdpRecv(_) => "udp_recv",
+        CommunicationError::Deserialize { .. } => "deserialize",
+        CommunicationError::ExhaustedNumAttempts(_) => "exhausted_num_attempts",
+        CommunicationError::BadResponseType { .. } => "bad_response_type",
+        CommunicationError::SpError { .. } => "sp_error",
+        CommunicationError::BogusSerialConsoleState { .. } => {
+            "bogus_serial_console_state"
+        }
+        CommunicationError::VersionMismatch { .. } => {
+            "protocol_version_mismatch"
+        }
+        CommunicationError::TlvDeserialize { .. } => "tlv_deserialize",
+        CommunicationError::TlvDecode(_) => "tlv_decode",
+        CommunicationError::TlvPagination { .. } => "tlv_pagination",
+        CommunicationError::IpccKeyLookupValueTooLarge => {
+            "ipcc_key_lookup_value_too_large"
+        }
+        CommunicationError::UnexpectedTrailingData(_) => {
+            "unexpected_trailing_data"
+        }
+        CommunicationError::BadTrailingDataSize { .. } => {
+            "bad_trailing_data_size"
+        }
+    }
+}
diff --git a/gateway/tests/integration_tests/component_list.rs b/gateway/tests/integration_tests/component_list.rs
index ec876c0783..993dcc9e93 100644
--- a/gateway/tests/integration_tests/component_list.rs
+++ b/gateway/tests/integration_tests/component_list.rs
@@ -57,7 +57,71 @@ async fn component_list() {
                 capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
                     .bits(),
                 presence: SpComponentPresence::Failed,
-            }
+            },
+            SpComponentInfo {
+                component: "dev-1".to_string(),
+                device: "tmp117".to_string(),
+                serial_number: None,
+                description: "FAKE temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-2".to_string(),
+                device: "tmp117".to_string(),
+                serial_number: None,
+                description: "FAKE Southeast temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-6".to_string(),
+                device: "at24csw080".to_string(),
+                serial_number: None,
+                description: "FAKE U.2 Sharkfin A VPD".to_string(),
+                capabilities: 0,
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-7".to_string(),
+                device: "max5970".to_string(),
+                serial_number: None,
+                description: "FAKE U.2 Sharkfin A hot swap controller"
+                    .to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-8".to_string(),
+                device: "nvme_bmc".to_string(),
+                serial_number: None,
+                description: "FAKE U.2 A NVMe Basic Management Command"
+                    .to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-39".to_string(),
+                device: "tmp451".to_string(),
+                serial_number: None,
+                description: "FAKE T6 temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-53".to_string(),
+                device: "max31790".to_string(),
+                serial_number: None,
+                description: "FAKE Fan controller".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
         ]
     );
 
@@ -67,14 +131,89 @@ async fn component_list() {
 
     assert_eq!(
         resp.components,
-        &[SpComponentInfo {
-            component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(),
-            device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(),
-            serial_number: None,
-            description: "FAKE host cpu".to_string(),
-            capabilities: 0,
-            presence: SpComponentPresence::Present,
-        },]
+        &[
+            SpComponentInfo {
+                component: SpComponent::SP3_HOST_CPU.const_as_str().to_string(),
+                device: SpComponent::SP3_HOST_CPU.const_as_str().to_string(),
+                serial_number: None,
+                description: "FAKE host cpu".to_string(),
+                capabilities: 0,
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-0".to_string(),
+                device: "tmp117".to_string(),
+                serial_number: None,
+                description: "FAKE temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-1".to_string(),
+                device: "tmp117".to_string(),
+                serial_number: None,
+                description: "FAKE temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-2".to_string(),
+                device: "tmp117".to_string(),
+                serial_number: None,
+                description: "FAKE Southeast temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-6".to_string(),
+                device: "at24csw080".to_string(),
+                serial_number: None,
+                description: "FAKE U.2 Sharkfin A VPD".to_string(),
+                capabilities: 0,
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-7".to_string(),
+                device: "max5970".to_string(),
+                serial_number: None,
+                description: "FAKE U.2 Sharkfin A hot swap controller"
+                    .to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-8".to_string(),
+                device: "nvme_bmc".to_string(),
+                serial_number: None,
+                description: "FAKE U.2 A NVMe Basic Management Command"
+                    .to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-39".to_string(),
+                device: "tmp451".to_string(),
+                serial_number: None,
+                description: "FAKE T6 temperature sensor".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+            SpComponentInfo {
+                component: "dev-53".to_string(),
+                device: "max31790".to_string(),
+                serial_number: None,
+                description: "FAKE Fan controller".to_string(),
+                capabilities: DeviceCapabilities::HAS_MEASUREMENT_CHANNELS
+                    .bits(),
+                presence: SpComponentPresence::Present,
+            },
+        ]
     );
 
     // Get the component list for switch 0.
diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs
index 74a7356adb..c2fab2de5a 100644
--- a/nexus/db-model/src/producer_endpoint.rs
+++ b/nexus/db-model/src/producer_endpoint.rs
@@ -22,6 +22,7 @@ impl_enum_type!(
     #[diesel(sql_type = ProducerKindEnum)]
     pub enum ProducerKind;
 
+    ManagementGateway => b"management_gateway"
     SledAgent => b"sled_agent"
     Service => b"service"
     Instance => b"instance"
@@ -30,6 +31,9 @@ impl_enum_type!(
 impl From<internal::nexus::ProducerKind> for ProducerKind {
     fn from(kind: internal::nexus::ProducerKind) -> Self {
         match kind {
+            internal::nexus::ProducerKind::ManagementGateway => {
+                ProducerKind::ManagementGateway
+            }
             internal::nexus::ProducerKind::SledAgent => ProducerKind::SledAgent,
             internal::nexus::ProducerKind::Service => ProducerKind::Service,
             internal::nexus::ProducerKind::Instance => ProducerKind::Instance,
@@ -40,6 +44,9 @@ impl From<internal::nexus::ProducerKind> for ProducerKind {
 impl From<ProducerKind> for internal::nexus::ProducerKind {
     fn from(kind: ProducerKind) -> Self {
         match kind {
+            ProducerKind::ManagementGateway => {
+                internal::nexus::ProducerKind::ManagementGateway
+            }
             ProducerKind::SledAgent => internal::nexus::ProducerKind::SledAgent,
             ProducerKind::Service => internal::nexus::ProducerKind::Service,
             ProducerKind::Instance => internal::nexus::ProducerKind::Instance,
diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs
index d0542874fb..aef95e6d53 100644
--- a/nexus/db-model/src/schema_versions.rs
+++ b/nexus/db-model/src/schema_versions.rs
@@ -17,7 +17,7 @@ use std::collections::BTreeMap;
 ///
 /// This must be updated when you change the database schema.  Refer to
 /// schema/crdb/README.adoc in the root of this repository for details.
-pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(90, 0, 0);
+pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(91, 0, 0);
 
 /// List of all past database schema versions, in *reverse* order
 ///
@@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
         // |  leaving the first copy as an example for the next person.
         // v
         // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
+        KnownVersion::new(91, "add-management-gateway-producer-kind"),
         KnownVersion::new(90, "lookup-bgp-config-by-asn"),
         KnownVersion::new(89, "collapse_lldp_settings"),
         KnownVersion::new(88, "route-local-pref"),
diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs
index 3b808984ae..9f4652c2da 100644
--- a/nexus/tests/integration_tests/metrics.rs
+++ b/nexus/tests/integration_tests/metrics.rs
@@ -23,8 +23,11 @@ use nexus_types::external_api::views::OxqlQueryResult;
 use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError};
 use omicron_uuid_kinds::{GenericUuid, InstanceUuid};
 use oximeter::types::Datum;
+use oximeter::types::FieldValue;
 use oximeter::types::Measurement;
 use oximeter::TimeseriesSchema;
+use std::borrow::Borrow;
+use std::collections::HashMap;
 use uuid::Uuid;
 
 pub async fn query_for_metrics(
@@ -344,7 +347,6 @@ async fn test_instance_watcher_metrics(
             );
         }};
     }
-    use oximeter::types::FieldValue;
     const INSTANCE_ID_FIELD: &str = "instance_id";
     const STATE_FIELD: &str = "state";
     const STATE_STARTING: &str = "starting";
@@ -589,6 +591,183 @@ async fn test_instance_watcher_metrics(
     assert_gte!(ts2_running, 2);
 }
 
+#[nexus_test]
+async fn test_mgs_metrics(
+    cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+) {
+    // Make a MGS
+    let (mut mgs_config, sp_sim_config) =
+        gateway_test_utils::setup::load_test_config();
+    let mgs = {
+        // munge the already-parsed MGS config file to point it at the test
+        // Nexus' address.
+        mgs_config.metrics = Some(gateway_test_utils::setup::MetricsConfig {
+            disabled: false,
+            dev_bind_loopback: true,
+            dev_nexus_address: Some(cptestctx.internal_client.bind_address),
+        });
+        gateway_test_utils::setup::test_setup_with_config(
+            "test_mgs_metrics",
+            gateway_messages::SpPort::One,
+            mgs_config,
+            &sp_sim_config,
+            None,
+        )
+        .await
+    };
+
+    // Let's look at all the simulated SP components in the config file which
+    // have sensor readings, so we can assert that there are timeseries for all
+    // of them.
+    let all_sp_configs = {
+        let gimlet_configs =
+            sp_sim_config.simulated_sps.gimlet.iter().map(|g| &g.common);
+        let sidecar_configs =
+            sp_sim_config.simulated_sps.sidecar.iter().map(|s| &s.common);
+        gimlet_configs.chain(sidecar_configs)
+    };
+    // XXX(eliza): yes, this code is repetitive. We could probably make it a
+    // little elss ugly with nested hash maps, but like...I already wrote it, so
+    // you don't have to. :)
+    //
+    // TODO(eliza): presently, we just expect that the number of timeseries for
+    // each serial number and sensor type lines up. If we wanted to be *really*
+    // fancy, we could also assert that all the component IDs, component kinds,
+    // and measurement values line up with the config. But, honestly, it's
+    // pretty unlikely that a bug in MGS' sensor metrics subsystem would mess
+    // that up --- the most important thing is just to make sure that the sensor
+    // data is *present*, as that should catch most regressions.
+    let mut temp_sensors = HashMap::new();
+    let mut current_sensors = HashMap::new();
+    let mut voltage_sensors = HashMap::new();
+    let mut power_sensors = HashMap::new();
+    let mut input_voltage_sensors = HashMap::new();
+    let mut input_current_sensors = HashMap::new();
+    let mut fan_speed_sensors = HashMap::new();
+    for sp in all_sp_configs {
+        let mut temp = 0;
+        let mut current = 0;
+        let mut voltage = 0;
+        let mut input_voltage = 0;
+        let mut input_current = 0;
+        let mut power = 0;
+        let mut speed = 0;
+        for component in &sp.components {
+            for sensor in &component.sensors {
+                use gateway_messages::measurement::MeasurementKind as Kind;
+                match sensor.def.kind {
+                    Kind::Temperature => temp += 1,
+                    Kind::Current => current += 1,
+                    Kind::Voltage => voltage += 1,
+                    Kind::InputVoltage => input_voltage += 1,
+                    Kind::InputCurrent => input_current += 1,
+                    Kind::Speed => speed += 1,
+                    Kind::Power => power += 1,
+                }
+            }
+        }
+        temp_sensors.insert(sp.serial_number.clone(), temp);
+        current_sensors.insert(sp.serial_number.clone(), current);
+        voltage_sensors.insert(sp.serial_number.clone(), voltage);
+        input_voltage_sensors.insert(sp.serial_number.clone(), input_voltage);
+        input_current_sensors.insert(sp.serial_number.clone(), input_current);
+        fan_speed_sensors.insert(sp.serial_number.clone(), speed);
+        power_sensors.insert(sp.serial_number.clone(), power);
+    }
+
+    async fn check_all_timeseries_present(
+        cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+        name: &str,
+        expected: HashMap<String, usize>,
+    ) {
+        let metric_name = format!("hardware_component:{name}");
+        eprintln!("\n=== checking timeseries for {metric_name} ===\n");
+
+        if expected.values().all(|&v| v == 0) {
+            eprintln!(
+                "-> SP sim config contains no {name} sensors, skipping it"
+            );
+            return;
+        }
+
+        let table = timeseries_query(&cptestctx, &format!("get {metric_name}"))
+            .await
+            .into_iter()
+            .find(|t| t.name() == metric_name);
+        let table = match table {
+            Some(table) => table,
+            None => panic!("missing table for {metric_name}"),
+        };
+
+        let mut found = expected
+            .keys()
+            .map(|serial| (serial.clone(), 0))
+            .collect::<HashMap<_, usize>>();
+        for timeseries in table.timeseries() {
+            let fields = &timeseries.fields;
+            let n_points = timeseries.points.len();
+            assert!(
+                n_points > 0,
+                "{metric_name} timeseries {fields:?} should have points"
+            );
+            let serial_str: &str = match timeseries.fields.get("chassis_serial")
+            {
+                Some(FieldValue::String(s)) => s.borrow(),
+                Some(x) => panic!(
+                    "{metric_name} `chassis_serial` field should be a string, but got: {x:?}"
+                ),
+                None => {
+                    panic!("{metric_name} timeseries should have a `chassis_serial` field")
+                }
+            };
+            if let Some(count) = found.get_mut(serial_str) {
+                *count += 1;
+            } else {
+                panic!(
+                    "{metric_name} timeseries had an unexpected chassis serial \
+                     number {serial_str:?} (not in the config file)",
+                );
+            }
+        }
+
+        eprintln!("-> {metric_name}: found timeseries: {found:#?}");
+        assert_eq!(
+            found, expected,
+            "number of {metric_name} timeseries didn't match expected in {table:#?}",
+        );
+        eprintln!("-> okay, looks good!");
+    }
+
+    // Wait until the MGS registers as a producer with Oximeter.
+    wait_for_producer(&cptestctx.oximeter, &mgs.gateway_id).await;
+
+    // ...and collect its samples.
+    cptestctx.oximeter.force_collect().await;
+
+    check_all_timeseries_present(&cptestctx, "temperature", temp_sensors).await;
+    check_all_timeseries_present(&cptestctx, "voltage", voltage_sensors).await;
+    check_all_timeseries_present(&cptestctx, "current", current_sensors).await;
+    check_all_timeseries_present(&cptestctx, "power", power_sensors).await;
+    check_all_timeseries_present(
+        &cptestctx,
+        "input_voltage",
+        input_voltage_sensors,
+    )
+    .await;
+    check_all_timeseries_present(
+        &cptestctx,
+        "input_current",
+        input_current_sensors,
+    )
+    .await;
+    check_all_timeseries_present(&cptestctx, "fan_speed", fan_speed_sensors)
+        .await;
+
+    // Because the `ControlPlaneTestContext` isn't managing the MGS we made for
+    // this test, we are responsible for removing its logs.
+    mgs.logctx.cleanup_successful();
+}
+
 /// Wait until a producer is registered with Oximeter.
 ///
 /// This blocks until the producer is registered, for up to 60s. It panics if
diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs
index 8314d22173..6e482bc1ad 100644
--- a/nexus/tests/integration_tests/sp_updater.rs
+++ b/nexus/tests/integration_tests/sp_updater.rs
@@ -434,9 +434,23 @@ async fn test_sp_updater_switches_mgs_instances_on_failure() {
 #[tokio::test]
 async fn test_sp_updater_delivers_progress() {
     // Start MGS + Sim SP.
-    let mgstestctx =
-        mgs_setup::test_setup("test_sp_updater_delivers_progress", SpPort::One)
-            .await;
+    let mgstestctx = {
+        let (mut mgs_config, sp_sim_config) = mgs_setup::load_test_config();
+        // Enabling SP metrics collection makes this alread-flaky test even
+        // flakier, so let's just turn it off.
+        // TODO(eliza): it would be nice if we didn't have to disable metrics in
+        // this test, so that we can better catch regressions that could be
+        // introduced by the metrics subsystem...
+        mgs_config.metrics.get_or_insert_with(Default::default).disabled = true;
+        mgs_setup::test_setup_with_config(
+            "test_sp_updater_delivers_progress",
+            SpPort::One,
+            mgs_config,
+            &sp_sim_config,
+            None,
+        )
+        .await
+    };
 
     // Configure an MGS client.
     let mut mgs_clients =
diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json
index 54b4822e51..111bd552d0 100644
--- a/openapi/nexus-internal.json
+++ b/openapi/nexus-internal.json
@@ -4443,6 +4443,13 @@
             "enum": [
               "instance"
             ]
+          },
+          {
+            "description": "The producer is a management gateway service.",
+            "type": "string",
+            "enum": [
+              "management_gateway"
+            ]
           }
         ]
       },
diff --git a/openapi/nexus.json b/openapi/nexus.json
index 2a8c227c64..f6d140ed05 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -19934,6 +19934,7 @@
               "nanoseconds",
               "volts",
               "amps",
+              "watts",
               "degrees_celsius"
             ]
           },
diff --git a/openapi/oximeter.json b/openapi/oximeter.json
index f596ac6ee6..327351d961 100644
--- a/openapi/oximeter.json
+++ b/openapi/oximeter.json
@@ -277,6 +277,13 @@
             "enum": [
               "instance"
             ]
+          },
+          {
+            "description": "The producer is a management gateway service.",
+            "type": "string",
+            "enum": [
+              "management_gateway"
+            ]
           }
         ]
       }
diff --git a/oximeter/oximeter/schema/hardware-component.toml b/oximeter/oximeter/schema/hardware-component.toml
new file mode 100644
index 0000000000..30a1d6510f
--- /dev/null
+++ b/oximeter/oximeter/schema/hardware-component.toml
@@ -0,0 +1,183 @@
+format_version = 1
+
+[target]
+name = "hardware_component"
+description = "A hardware component on a compute sled, switch, or power shelf"
+authz_scope = "fleet"
+versions = [
+    { version = 1, fields = [
+        "rack_id",
+        "slot",
+        "chassis_kind",
+        "chassis_serial",
+        "chassis_model",
+        "chassis_revision",
+        "hubris_archive_id",
+        "gateway_id",
+        "component_kind",
+        "component_id",
+        "description",
+    ]}
+]
+
+[fields.rack_id]
+type = "uuid"
+description = "ID of the rack on which this measurement was recorded."
+
+[fields.slot]
+type = "u32"
+description = """
+The cubby number or switch slot of the service processor reporting the \
+measurement"""
+
+[fields.chassis_model]
+type = "string"
+description = "Model number of the sled, switch, or power shelf"
+
+[fields.chassis_revision]
+type = "u32"
+description = "Revision number of the sled, switch, or power shelf"
+
+[fields.chassis_serial]
+type = "string"
+description = "Serial number of the sled, switch, or power shelf"
+
+[fields.hubris_archive_id]
+type = "string"
+description = """
+Hubris firmware archive ID of the service processor when the measurement \
+was recorded."""
+
+[fields.gateway_id]
+type = "uuid"
+description = """
+ID of the Management Gateway Service process which recorded the measurement."""
+
+[fields.chassis_kind]
+type = "string"
+description = """
+What kind of thing the component resides on.
+
+This will be one of 'sled', for components on compute sleds; 'switch', for \
+components on rack switches; or 'power', for components on power shelves."""
+
+[fields.component_id]
+type = "string"
+description = """
+The service processor component ID uniquely identifying the hardware \
+component on the sled, switch, or power shelf."""
+
+[fields.component_kind]
+type = "string"
+description = "What type of hardware component this thing is."
+
+[fields.description]
+type = "string"
+description = """
+A human-readable description of the hardware component. This may include \
+its location or role in the system (e.g. a DIMM's number, or a temperature \
+sensor's location)."""
+
+[fields.sensor]
+type = "string"
+description = """The name of a sensor that recorded a sensor reading."""
+
+[fields.error]
+type = "string"
+description = "The kind of sensor error that occurred"
+
+[fields.sensor_kind]
+type = "string"
+description = """
+Which kind of sensor could not be read due to a sensor error.
+
+This will be one of 'temperature', 'current', 'power', 'voltage', \
+'input_current', 'input_voltage', or 'fan_speed' (the same names as \
+the metrics emitted by these sensors when they are read successfully)."""
+
+[[metrics]]
+name = "temperature"
+description = "A temperature reading from a hardware component."
+units = "degrees_celsius"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+[[metrics]]
+name = "current"
+description = "Output current reading in amperes"
+units = "amps"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+[[metrics]]
+name = "power"
+description = "Power reading, in watts"
+units = "watts"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+[[metrics]]
+name = "voltage"
+description = "Output voltage reading, in volts"
+units = "volts"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+[[metrics]]
+name = "input_current"
+description = "Input electric current reading in amperes"
+units = "amps"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+[[metrics]]
+name = "input_voltage"
+description = "Input electric voltage reading, in volts"
+units = "volts"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+
+[[metrics]]
+name = "fan_speed"
+description = "A fan speed measurement, in rotations per minute"
+units = "rpm"
+datum_type = "f32"
+versions = [
+    { added_in = 1, fields = ["sensor"]}
+]
+
+[[metrics]]
+name = "sensor_error_count"
+description = "Cumulative count of errors reported by a sensor"
+units = "count"
+datum_type = "cumulative_u64"
+versions = [
+    { added_in = 1, fields = ["sensor", "error", "sensor_kind"]}
+]
+
+[[metrics]]
+name = "poll_error_count"
+description = """
+Cumulative count of errors encountered whilst polling a component's sensors.
+
+Unlike the `sensor_error_count` metric, this counts errors encountered by \
+the management gateway while polling the component, rather than errors \
+reported by the component itself."""
+units = "count"
+datum_type = "cumulative_u64"
+versions = [
+    { added_in = 1, fields = ["error"] }
+]
diff --git a/oximeter/schema/src/codegen.rs b/oximeter/schema/src/codegen.rs
index c46c25c97d..1e6e352c15 100644
--- a/oximeter/schema/src/codegen.rs
+++ b/oximeter/schema/src/codegen.rs
@@ -512,6 +512,7 @@ fn quote_units(units: Units) -> TokenStream {
         }
         Units::Amps => quote! { ::oximeter::schema::Units::Amps },
         Units::Volts => quote! { ::oximeter::schema::Units::Volts },
+        Units::Watts => quote! { ::oximeter::schema::Units::Watts },
         Units::DegreesCelsius => {
             quote! { ::oximeter::schema::Units::DegreesCelsius }
         }
diff --git a/oximeter/types/src/schema.rs b/oximeter/types/src/schema.rs
index e06e6e2b57..135c77462a 100644
--- a/oximeter/types/src/schema.rs
+++ b/oximeter/types/src/schema.rs
@@ -189,6 +189,7 @@ pub enum Units {
     Nanoseconds,
     Volts,
     Amps,
+    Watts,
     DegreesCelsius,
     /// Rotations per minute.
     Rpm,
diff --git a/schema/crdb/add-management-gateway-producer-kind/up.sql b/schema/crdb/add-management-gateway-producer-kind/up.sql
new file mode 100644
index 0000000000..e872278e2f
--- /dev/null
+++ b/schema/crdb/add-management-gateway-producer-kind/up.sql
@@ -0,0 +1,2 @@
+ALTER TYPE omicron.public.producer_kind
+    ADD VALUE IF NOT EXISTS 'management_gateway' AFTER 'instance';
diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql
index baef38e44f..1457532c49 100644
--- a/schema/crdb/dbinit.sql
+++ b/schema/crdb/dbinit.sql
@@ -1334,7 +1334,9 @@ CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM (
     -- removed).
     'service',
     -- A Propolis VMM for an instance in the omicron.public.instance table
-    'instance'
+    'instance',
+    -- A management gateway service on a scrimlet.
+    'management_gateway'
 );
 
 /*
@@ -4212,7 +4214,7 @@ INSERT INTO omicron.public.db_metadata (
     version,
     target_version
 ) VALUES
-    (TRUE, NOW(), NOW(), '90.0.0', NULL)
+    (TRUE, NOW(), NOW(), '91.0.0', NULL)
 ON CONFLICT DO NOTHING;
 
 COMMIT;