diff --git a/src/internet_identity/src/anchor_management/registration.rs b/src/internet_identity/src/anchor_management/registration.rs index 04bb5d78c6..8a3aaa9585 100644 --- a/src/internet_identity/src/anchor_management/registration.rs +++ b/src/internet_identity/src/anchor_management/registration.rs @@ -106,6 +106,7 @@ pub fn register( anchor_number )) }); + storage.registration_rates.new_registration() }); // Save the 'temp_key' as a mean of authenticating for a short period of time, see diff --git a/src/internet_identity/src/anchor_management/registration/registration_flow_v2.rs b/src/internet_identity/src/anchor_management/registration/registration_flow_v2.rs index 5d56b623ca..304269b587 100644 --- a/src/internet_identity/src/anchor_management/registration/registration_flow_v2.rs +++ b/src/internet_identity/src/anchor_management/registration/registration_flow_v2.rs @@ -153,8 +153,11 @@ fn create_identity(arg: &IdRegFinishArg) -> Result>) -> std::io::Result<()> { storage.event_aggregations.len() as f64, "Number of entries in the event_aggregations map.", )?; + if let Some(registration_rates) = storage.registration_rates.registration_rates() { + w.counter_vec( + "internet_identity_registrations_per_second", + "Rate of new identity registrations on Internet Identity", + )? + .value( + &[("type", "reference_rate")], + registration_rates.reference_rate_per_second, + )? + .value( + &[("type", "current_rate")], + registration_rates.current_rate_per_second, + )? + .value( + &[("type", "captcha_threshold_rate")], + registration_rates.captcha_threshold_rate, + )?; + } let mut virtual_memory_stats_builder = w.gauge_vec( "internet_identity_virtual_memory_size_pages", diff --git a/src/internet_identity/src/storage.rs b/src/internet_identity/src/storage.rs index fe123cfdca..a02a26ab6b 100644 --- a/src/internet_identity/src/storage.rs +++ b/src/internet_identity/src/storage.rs @@ -92,7 +92,9 @@ use ic_stable_structures::memory_manager::{MemoryId, MemoryManager, VirtualMemor use ic_stable_structures::reader::Reader; use ic_stable_structures::storable::Bound; use ic_stable_structures::writer::Writer; -use ic_stable_structures::{Memory, RestrictedMemory, StableBTreeMap, StableCell, Storable}; +use ic_stable_structures::{ + Memory, MinHeap, RestrictedMemory, StableBTreeMap, StableCell, Storable, +}; use internet_identity_interface::archive::types::BufferedEntry; use crate::stats::event_stats::AggregationKey; @@ -102,10 +104,12 @@ use internet_identity_interface::internet_identity::types::*; use crate::state::PersistentState; use crate::storage::anchor::Anchor; use crate::storage::memory_wrapper::MemoryWrapper; +use crate::storage::registration_rates::RegistrationRates; use crate::storage::storable_anchor::StorableAnchor; use crate::storage::storable_persistent_state::StorablePersistentState; pub mod anchor; +pub mod registration_rates; /// module for the internal serialization format of anchors mod storable_anchor; @@ -129,11 +133,17 @@ const ARCHIVE_BUFFER_MEMORY_INDEX: u8 = 1u8; const PERSISTENT_STATE_MEMORY_INDEX: u8 = 2u8; const EVENT_DATA_MEMORY_INDEX: u8 = 3u8; const STATS_AGGREGATIONS_MEMORY_INDEX: u8 = 4u8; +const REGISTRATION_REFERENCE_RATE_MEMORY_INDEX: u8 = 5u8; +const REGISTRATION_CURRENT_RATE_MEMORY_INDEX: u8 = 6u8; const ANCHOR_MEMORY_ID: MemoryId = MemoryId::new(ANCHOR_MEMORY_INDEX); const ARCHIVE_BUFFER_MEMORY_ID: MemoryId = MemoryId::new(ARCHIVE_BUFFER_MEMORY_INDEX); const PERSISTENT_STATE_MEMORY_ID: MemoryId = MemoryId::new(PERSISTENT_STATE_MEMORY_INDEX); const EVENT_DATA_MEMORY_ID: MemoryId = MemoryId::new(EVENT_DATA_MEMORY_INDEX); const STATS_AGGREGATIONS_MEMORY_ID: MemoryId = MemoryId::new(STATS_AGGREGATIONS_MEMORY_INDEX); +const REGISTRATION_REFERENCE_RATE_MEMORY_ID: MemoryId = + MemoryId::new(REGISTRATION_REFERENCE_RATE_MEMORY_INDEX); +const REGISTRATION_CURRENT_RATE_MEMORY_ID: MemoryId = + MemoryId::new(REGISTRATION_CURRENT_RATE_MEMORY_INDEX); // The bucket size 128 is relatively low, to avoid wasting memory when using // multiple virtual memories for smaller amounts of data. // This value results in 256 GB of total managed memory, which should be enough @@ -186,6 +196,11 @@ pub struct Storage { /// Memory wrapper used to report the size of the stats aggregation memory. event_aggregations_memory_wrapper: MemoryWrapper>, pub event_aggregations: StableBTreeMap>, + /// Registration rates tracked for the purpose of toggling the dynamic captcha (if configured) + /// This data is persisted as it potentially contains data collected over longer periods of time. + pub registration_rates: RegistrationRates>, + current_registration_rate_memory_wrapper: MemoryWrapper>, + reference_registration_rate_memory_wrapper: MemoryWrapper>, } #[repr(packed)] @@ -243,10 +258,28 @@ impl Storage { let persistent_state_memory = memory_manager.get(PERSISTENT_STATE_MEMORY_ID); let event_data_memory = memory_manager.get(EVENT_DATA_MEMORY_ID); let stats_aggregations_memory = memory_manager.get(STATS_AGGREGATIONS_MEMORY_ID); + let registration_ref_rate_memory = + memory_manager.get(REGISTRATION_REFERENCE_RATE_MEMORY_ID); + let registration_current_rate_memory = + memory_manager.get(REGISTRATION_CURRENT_RATE_MEMORY_ID); + + let registration_rates = RegistrationRates::new( + MinHeap::init(registration_ref_rate_memory.clone()) + .expect("failed to initialize registration reference rate min heap"), + MinHeap::init(registration_current_rate_memory.clone()) + .expect("failed to initialize registration current rate min heap"), + ); Self { header, header_memory, anchor_memory, + registration_rates, + reference_registration_rate_memory_wrapper: MemoryWrapper::new( + registration_ref_rate_memory, + ), + current_registration_rate_memory_wrapper: MemoryWrapper::new( + registration_current_rate_memory, + ), archive_buffer_memory_wrapper: MemoryWrapper::new(archive_buffer_memory.clone()), archive_entries_buffer: StableBTreeMap::init(archive_buffer_memory), persistent_state_memory_wrapper: MemoryWrapper::new(persistent_state_memory.clone()), @@ -514,6 +547,14 @@ impl Storage { "event_aggregations".to_string(), self.event_aggregations_memory_wrapper.size(), ), + ( + "reference_registration_rate".to_string(), + self.reference_registration_rate_memory_wrapper.size(), + ), + ( + "current_registration_rate".to_string(), + self.current_registration_rate_memory_wrapper.size(), + ), ]) } } diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs new file mode 100644 index 0000000000..b14ec13f4d --- /dev/null +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -0,0 +1,147 @@ +//! Module to track the rate at which new registrations are started over both a longer reference +//! period and a short period (to determine the current rate). +//! These rates are then used to determine whether a captcha needs to be solved or not. + +use crate::state; +use ic_cdk::api::time; +use ic_stable_structures::{Memory, MinHeap}; +use internet_identity_interface::internet_identity::types::{CaptchaTrigger, Timestamp}; +use std::time::Duration; + +pub struct RegistrationRates { + reference_rate_data: MinHeap, + current_rate_data: MinHeap, +} + +pub struct NormalizedRegistrationRates { + pub reference_rate_per_second: f64, + pub current_rate_per_second: f64, + pub captcha_threshold_rate: f64, +} + +struct DynamicCaptchaConfig { + reference_rate_retention_ns: u64, + current_rate_retention_ns: u64, + threshold_multiplier: f64, +} + +impl RegistrationRates { + pub fn new( + reference_rate_data: MinHeap, + current_rate_data: MinHeap, + ) -> Self { + Self { + reference_rate_data, + current_rate_data, + } + } + pub fn new_registration(&mut self) { + self.prune_expired(); + let Some(data_retention) = dynamic_captcha_config() else { + return; + }; + + let now = time(); + self.reference_rate_data + .push(&(now + data_retention.reference_rate_retention_ns)) + .expect("out of memory"); + self.current_rate_data + .push(&(now + data_retention.current_rate_retention_ns)) + .expect("out of memory"); + } + + pub fn registration_rates(&self) -> Option { + let config = dynamic_captcha_config()?; + let now = time(); + + let reference_rate_per_second = calculate_rate( + now, + &self.reference_rate_data, + config.reference_rate_retention_ns, + ); + let current_rate_per_second = calculate_rate( + now, + &self.current_rate_data, + config.current_rate_retention_ns, + ); + let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier; + Some(NormalizedRegistrationRates { + reference_rate_per_second, + current_rate_per_second, + captcha_threshold_rate, + }) + } + + fn prune_expired(&mut self) { + prune_data(&mut self.reference_rate_data); + prune_data(&mut self.current_rate_data); + } +} + +/// Calculates the rate per second of registrations taking into account for how long data has +/// already been collected. +/// +/// E.g. if `data_retention_ns` is 3 weeks, the rate cannot just be calculated over a 3-week time +/// window because until 3 weeks of data has been collected the rate would be inaccurate. +/// In particular, it would underestimate the actual rate leading to the captcha threshold being +/// reached more easily thus potentially triggering the captcha prematurely. +fn calculate_rate( + now: u64, + data: &MinHeap, + data_retention_ns: u64, +) -> f64 { + data + // get the oldest expiration timestamp + .peek() + // calculate the registration timestamp from expiration + .map(|ts| ts - data_retention_ns) + // calculate the time window length with respect to the current time + .map(|ts| now - ts) + // the value _could_ be 0 if the oldest timestamp was added in the same execution round + // -> filter to avoid division by 0 + .filter(|val| *val != 0) + // use the value to calculate the rate per second + .map(|val| rate_per_second(data.len(), val)) + // if we don't have data, the rate is 0 + .unwrap_or(0.0) +} + +fn rate_per_second(count: u64, duration_ns: u64) -> f64 { + count as f64 / Duration::from_nanos(duration_ns).as_secs() as f64 +} + +fn dynamic_captcha_config() -> Option { + let trigger = state::persistent_state(|ps| ps.captcha_config.captcha_trigger.clone()); + match trigger { + CaptchaTrigger::Static(_) => None, + CaptchaTrigger::Dynamic { + current_rate_sampling_interval_s, + reference_rate_sampling_interval_s, + threshold_pct, + } => Some(DynamicCaptchaConfig { + reference_rate_retention_ns: Duration::from_secs(reference_rate_sampling_interval_s) + .as_nanos() as u64, + current_rate_retention_ns: Duration::from_secs(current_rate_sampling_interval_s) + .as_nanos() as u64, + threshold_multiplier: 1.0 + (threshold_pct as f64 / 100.0), + }), + } +} + +fn prune_data(data: &mut MinHeap) { + const MAX_TO_PRUNE: usize = 100; + + let now = time(); + for _ in 0..MAX_TO_PRUNE { + let Some(timestamp) = data.peek() else { + break; + }; + + // The timestamps are sorted because the expiration is constant and time() is monotonic + // -> we can stop pruning once we reach a not expired timestamp + if timestamp > now { + break; + } + data.pop(); + } +}