From 16e869a678312b16c2e97461f144ce63ef5309bc Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 13:50:39 +0100 Subject: [PATCH 1/3] interpret: pass Size and Align to before_memory_deallocation --- compiler/rustc_const_eval/src/interpret/machine.rs | 3 ++- compiler/rustc_const_eval/src/interpret/memory.rs | 3 ++- src/tools/miri/src/borrow_tracker/mod.rs | 6 +++--- src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs | 6 +++--- src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs | 4 ++-- src/tools/miri/src/concurrency/data_race.rs | 4 ++-- src/tools/miri/src/machine.rs | 7 ++++--- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index ee5de44a65144..1b9a96ec3219b 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -433,7 +433,8 @@ pub trait Machine<'mir, 'tcx: 'mir>: Sized { _machine: &mut Self, _alloc_extra: &mut Self::AllocExtra, _prov: (AllocId, Self::ProvenanceExtra), - _range: AllocRange, + _size: Size, + _align: Align, ) -> InterpResult<'tcx> { Ok(()) } diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 3b2208b8caa8a..0df724f29c401 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -345,7 +345,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { &mut self.machine, &mut alloc.extra, (alloc_id, prov), - alloc_range(Size::ZERO, size), + size, + alloc.align, )?; // Don't forget to remember size and align of this now-dead allocation diff --git a/src/tools/miri/src/borrow_tracker/mod.rs b/src/tools/miri/src/borrow_tracker/mod.rs index 262f7c449d275..8d76a488269fc 100644 --- a/src/tools/miri/src/borrow_tracker/mod.rs +++ b/src/tools/miri/src/borrow_tracker/mod.rs @@ -472,14 +472,14 @@ impl AllocState { &mut self, alloc_id: AllocId, prov_extra: ProvenanceExtra, - range: AllocRange, + size: Size, machine: &MiriMachine<'_, 'tcx>, ) -> InterpResult<'tcx> { match self { AllocState::StackedBorrows(sb) => - sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine), + sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine), AllocState::TreeBorrows(tb) => - tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, range, machine), + tb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine), } } diff --git a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs index 86d22229714d5..7ca2bc76f1e85 100644 --- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs @@ -574,13 +574,13 @@ impl Stacks { &mut self, alloc_id: AllocId, tag: ProvenanceExtra, - range: AllocRange, + size: Size, machine: &MiriMachine<'_, 'tcx>, ) -> InterpResult<'tcx> { - trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, range.size.bytes()); + trace!("deallocation with tag {:?}: {:?}, size {}", tag, alloc_id, size.bytes()); let dcx = DiagnosticCxBuilder::dealloc(machine, tag); let state = machine.borrow_tracker.as_ref().unwrap().borrow(); - self.for_each(range, dcx, |stack, dcx, exposed_tags| { + self.for_each(alloc_range(Size::ZERO, size), dcx, |stack, dcx, exposed_tags| { stack.dealloc(tag, &state, dcx, exposed_tags) })?; Ok(()) diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs index 4b944ea88f503..9dd147af42449 100644 --- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs +++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs @@ -80,7 +80,7 @@ impl<'tcx> Tree { &mut self, alloc_id: AllocId, prov: ProvenanceExtra, - range: AllocRange, + size: Size, machine: &MiriMachine<'_, 'tcx>, ) -> InterpResult<'tcx> { // TODO: for now we bail out on wildcard pointers. Eventually we should @@ -91,7 +91,7 @@ impl<'tcx> Tree { }; let global = machine.borrow_tracker.as_ref().unwrap(); let span = machine.current_span(); - self.dealloc(tag, range, global, alloc_id, span) + self.dealloc(tag, alloc_range(Size::ZERO, size), global, alloc_id, span) } pub fn expose_tag(&mut self, _tag: BorTag) { diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs index e304488408381..4a1c3ac868e14 100644 --- a/src/tools/miri/src/concurrency/data_race.rs +++ b/src/tools/miri/src/concurrency/data_race.rs @@ -1071,10 +1071,10 @@ impl VClockAlloc { pub fn deallocate<'tcx>( &mut self, alloc_id: AllocId, - range: AllocRange, + size: Size, machine: &mut MiriMachine<'_, '_>, ) -> InterpResult<'tcx> { - self.unique_access(alloc_id, range, NaWriteType::Deallocate, machine) + self.unique_access(alloc_id, alloc_range(Size::ZERO, size), NaWriteType::Deallocate, machine) } } diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index c3c3a81585614..fda7e6f4449ac 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1288,16 +1288,17 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { machine: &mut Self, alloc_extra: &mut AllocExtra<'tcx>, (alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra), - range: AllocRange, + size: Size, + _align: Align, ) -> InterpResult<'tcx> { if machine.tracked_alloc_ids.contains(&alloc_id) { machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id)); } if let Some(data_race) = &mut alloc_extra.data_race { - data_race.deallocate(alloc_id, range, machine)?; + data_race.deallocate(alloc_id, size, machine)?; } if let Some(borrow_tracker) = &mut alloc_extra.borrow_tracker { - borrow_tracker.before_memory_deallocation(alloc_id, prove_extra, range, machine)?; + borrow_tracker.before_memory_deallocation(alloc_id, prove_extra, size, machine)?; } if let Some((_, deallocated_at)) = machine.allocation_spans.borrow_mut().get_mut(&alloc_id) { From 092bd53c1ef96ede03bb50ecefc15c6a365531f4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 13:20:51 +0100 Subject: [PATCH 2/3] rename intptrcast -> alloc_addresses, and make a folder for it --- .../src/{intptrcast.rs => alloc_addresses/mod.rs} | 13 ++++++++----- src/tools/miri/src/lib.rs | 4 ++-- src/tools/miri/src/machine.rs | 10 +++++----- src/tools/miri/src/provenance_gc.rs | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) rename src/tools/miri/src/{intptrcast.rs => alloc_addresses/mod.rs} (96%) diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/alloc_addresses/mod.rs similarity index 96% rename from src/tools/miri/src/intptrcast.rs rename to src/tools/miri/src/alloc_addresses/mod.rs index 3fe127f973269..3177a1297c827 100644 --- a/src/tools/miri/src/intptrcast.rs +++ b/src/tools/miri/src/alloc_addresses/mod.rs @@ -1,3 +1,6 @@ +//! This module is responsible for managing the absolute addresses that allocations are located at, +//! and for casting between pointers and integers based on those addresses. + use std::cell::RefCell; use std::cmp::max; use std::collections::hash_map::Entry; @@ -96,7 +99,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // or `None` if the addr is out of bounds fn alloc_id_from_addr(&self, addr: u64) -> Option { let ecx = self.eval_context_ref(); - let global_state = ecx.machine.intptrcast.borrow(); + let global_state = ecx.machine.alloc_addresses.borrow(); assert!(global_state.provenance_mode != ProvenanceMode::Strict); let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); @@ -133,7 +136,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> { let ecx = self.eval_context_ref(); - let mut global_state = ecx.machine.intptrcast.borrow_mut(); + let mut global_state = ecx.machine.alloc_addresses.borrow_mut(); let global_state = &mut *global_state; Ok(match global_state.base_addr.entry(alloc_id) { @@ -196,7 +199,7 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { let ecx = self.eval_context_mut(); - let global_state = ecx.machine.intptrcast.get_mut(); + let global_state = ecx.machine.alloc_addresses.get_mut(); // In strict mode, we don't need this, so we can save some cycles by not tracking it. if global_state.provenance_mode == ProvenanceMode::Strict { return Ok(()); @@ -207,7 +210,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { return Ok(()); } trace!("Exposing allocation id {alloc_id:?}"); - let global_state = ecx.machine.intptrcast.get_mut(); + let global_state = ecx.machine.alloc_addresses.get_mut(); global_state.exposed.insert(alloc_id); if ecx.machine.borrow_tracker.is_some() { ecx.expose_tag(alloc_id, tag)?; @@ -219,7 +222,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { trace!("Casting {:#x} to a pointer", addr); let ecx = self.eval_context_ref(); - let global_state = ecx.machine.intptrcast.borrow(); + let global_state = ecx.machine.alloc_addresses.borrow(); // Potentially emit a warning. match global_state.provenance_mode { diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index e1d0bc1c18386..819952d09e17e 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -71,13 +71,13 @@ extern crate tracing; #[allow(unused_extern_crates)] extern crate rustc_driver; +mod alloc_addresses; mod borrow_tracker; mod clock; mod concurrency; mod diagnostics; mod eval; mod helpers; -mod intptrcast; mod machine; mod mono_hash_map; mod operator; @@ -100,6 +100,7 @@ pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as _}; pub use crate::shims::time::EvalContextExt as _; pub use crate::shims::tls::TlsData; +pub use crate::alloc_addresses::{EvalContextExt as _, ProvenanceMode}; pub use crate::borrow_tracker::stacked_borrows::{ EvalContextExt as _, Item, Permission, Stack, Stacks, }; @@ -121,7 +122,6 @@ pub use crate::eval::{ create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith, }; pub use crate::helpers::{AccessKind, EvalContextExt as _}; -pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode}; pub use crate::machine::{ AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index fda7e6f4449ac..3a54629222d51 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -436,7 +436,7 @@ pub struct MiriMachine<'mir, 'tcx> { pub data_race: Option, /// Ptr-int-cast module global data. - pub intptrcast: intptrcast::GlobalState, + pub alloc_addresses: alloc_addresses::GlobalState, /// Environment variables set by `setenv`. /// Miri does not expose env vars from the host to the emulated program. @@ -634,7 +634,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> { tcx, borrow_tracker, data_race, - intptrcast: RefCell::new(intptrcast::GlobalStateInner::new(config, stack_addr)), + alloc_addresses: RefCell::new(alloc_addresses::GlobalStateInner::new(config, stack_addr)), // `env_vars` depends on a full interpreter so we cannot properly initialize it yet. env_vars: EnvVars::default(), main_fn_ret_place: None, @@ -782,7 +782,7 @@ impl VisitProvenance for MiriMachine<'_, '_> { dir_handler, borrow_tracker, data_race, - intptrcast, + alloc_addresses, file_handler, tcx: _, isolated_op: _, @@ -827,7 +827,7 @@ impl VisitProvenance for MiriMachine<'_, '_> { file_handler.visit_provenance(visit); data_race.visit_provenance(visit); borrow_tracker.visit_provenance(visit); - intptrcast.visit_provenance(visit); + alloc_addresses.visit_provenance(visit); main_fn_ret_place.visit_provenance(visit); argc.visit_provenance(visit); argv.visit_provenance(visit); @@ -1304,7 +1304,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { { *deallocated_at = Some(machine.current_span()); } - machine.intptrcast.get_mut().free_alloc_id(alloc_id); + machine.alloc_addresses.get_mut().free_alloc_id(alloc_id); Ok(()) } diff --git a/src/tools/miri/src/provenance_gc.rs b/src/tools/miri/src/provenance_gc.rs index 347951ce37270..f23d7dfd52d54 100644 --- a/src/tools/miri/src/provenance_gc.rs +++ b/src/tools/miri/src/provenance_gc.rs @@ -197,7 +197,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> { let allocs = LiveAllocs { ecx: this, collected: allocs }; this.machine.allocation_spans.borrow_mut().retain(|id, _| allocs.is_live(*id)); this.machine.symbolic_alignment.borrow_mut().retain(|id, _| allocs.is_live(*id)); - this.machine.intptrcast.borrow_mut().remove_unreachable_allocs(&allocs); + this.machine.alloc_addresses.borrow_mut().remove_unreachable_allocs(&allocs); if let Some(borrow_tracker) = &this.machine.borrow_tracker { borrow_tracker.borrow_mut().remove_unreachable_allocs(&allocs); } From c01676835125747b5fff3143ca2a71d2bf7b94c4 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sat, 9 Mar 2024 16:05:37 +0100 Subject: [PATCH 3/3] miri: add some chance to reuse addresses of previously freed allocations --- src/tools/miri/src/alloc_addresses/mod.rs | 101 ++++++++++++------ .../miri/src/alloc_addresses/reuse_pool.rs | 87 +++++++++++++++ src/tools/miri/src/helpers.rs | 4 +- src/tools/miri/src/machine.rs | 9 +- src/tools/miri/tests/pass/address-reuse.rs | 16 +++ src/tools/miri/tests/pass/intptrcast.rs | 4 +- 6 files changed, 182 insertions(+), 39 deletions(-) create mode 100644 src/tools/miri/src/alloc_addresses/reuse_pool.rs create mode 100644 src/tools/miri/tests/pass/address-reuse.rs diff --git a/src/tools/miri/src/alloc_addresses/mod.rs b/src/tools/miri/src/alloc_addresses/mod.rs index 3177a1297c827..e1714aa9e46b9 100644 --- a/src/tools/miri/src/alloc_addresses/mod.rs +++ b/src/tools/miri/src/alloc_addresses/mod.rs @@ -1,6 +1,8 @@ //! This module is responsible for managing the absolute addresses that allocations are located at, //! and for casting between pointers and integers based on those addresses. +mod reuse_pool; + use std::cell::RefCell; use std::cmp::max; use std::collections::hash_map::Entry; @@ -9,9 +11,10 @@ use rand::Rng; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_span::Span; -use rustc_target::abi::{HasDataLayout, Size}; +use rustc_target::abi::{Align, HasDataLayout, Size}; use crate::*; +use reuse_pool::ReusePool; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum ProvenanceMode { @@ -26,7 +29,7 @@ pub enum ProvenanceMode { pub type GlobalState = RefCell; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct GlobalStateInner { /// This is used as a map between the address of each allocation and its `AllocId`. It is always /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset @@ -38,6 +41,8 @@ pub struct GlobalStateInner { /// they do not have an `AllocExtra`. /// This is the inverse of `int_to_ptr_map`. base_addr: FxHashMap, + /// A pool of addresses we can reuse for future allocations. + reuse: ReusePool, /// Whether an allocation has been exposed or not. This cannot be put /// into `AllocExtra` for the same reason as `base_addr`. exposed: FxHashSet, @@ -53,6 +58,7 @@ impl VisitProvenance for GlobalStateInner { let GlobalStateInner { int_to_ptr_map: _, base_addr: _, + reuse: _, exposed: _, next_base_addr: _, provenance_mode: _, @@ -71,6 +77,7 @@ impl GlobalStateInner { GlobalStateInner { int_to_ptr_map: Vec::default(), base_addr: FxHashMap::default(), + reuse: ReusePool::new(), exposed: FxHashSet::default(), next_base_addr: stack_addr, provenance_mode: config.provenance_mode, @@ -142,6 +149,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { Ok(match global_state.base_addr.entry(alloc_id) { Entry::Occupied(entry) => *entry.get(), Entry::Vacant(entry) => { + let mut rng = ecx.machine.rng.borrow_mut(); let (size, align, kind) = ecx.get_alloc_info(alloc_id); // This is either called immediately after allocation (and then cached), or when // adjusting `tcx` pointers (which never get freed). So assert that we are looking @@ -150,44 +158,63 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { // information was removed. assert!(!matches!(kind, AllocKind::Dead)); - // This allocation does not have a base address yet, pick one. - // Leave some space to the previous allocation, to give it some chance to be less aligned. - let slack = { - let mut rng = ecx.machine.rng.borrow_mut(); - // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. - rng.gen_range(0..16) + // This allocation does not have a base address yet, pick or reuse one. + let base_addr = if let Some(reuse_addr) = + global_state.reuse.take_addr(&mut *rng, size, align) + { + reuse_addr + } else { + // We have to pick a fresh address. + // Leave some space to the previous allocation, to give it some chance to be less aligned. + // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. + let slack = rng.gen_range(0..16); + // From next_base_addr + slack, round up to adjust for alignment. + let base_addr = global_state + .next_base_addr + .checked_add(slack) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + let base_addr = align_addr(base_addr, align.bytes()); + + // Remember next base address. If this allocation is zero-sized, leave a gap + // of at least 1 to avoid two allocations having the same base address. + // (The logic in `alloc_id_from_addr` assumes unique addresses, and different + // function/vtable pointers need to be distinguishable!) + global_state.next_base_addr = base_addr + .checked_add(max(size.bytes(), 1)) + .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; + // Even if `Size` didn't overflow, we might still have filled up the address space. + if global_state.next_base_addr > ecx.target_usize_max() { + throw_exhaust!(AddressSpaceFull); + } + + base_addr }; - // From next_base_addr + slack, round up to adjust for alignment. - let base_addr = global_state - .next_base_addr - .checked_add(slack) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - let base_addr = align_addr(base_addr, align.bytes()); - entry.insert(base_addr); trace!( - "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})", + "Assigning base address {:#x} to allocation {:?} (size: {}, align: {})", base_addr, alloc_id, size.bytes(), align.bytes(), - slack, ); - // Remember next base address. If this allocation is zero-sized, leave a gap - // of at least 1 to avoid two allocations having the same base address. - // (The logic in `alloc_id_from_addr` assumes unique addresses, and different - // function/vtable pointers need to be distinguishable!) - global_state.next_base_addr = base_addr - .checked_add(max(size.bytes(), 1)) - .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; - // Even if `Size` didn't overflow, we might still have filled up the address space. - if global_state.next_base_addr > ecx.target_usize_max() { - throw_exhaust!(AddressSpaceFull); - } - // Also maintain the opposite mapping in `int_to_ptr_map`. - // Given that `next_base_addr` increases in each allocation, pushing the - // corresponding tuple keeps `int_to_ptr_map` sorted - global_state.int_to_ptr_map.push((base_addr, alloc_id)); + // Store address in cache. + entry.insert(base_addr); + + // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted. + // We have a fast-path for the common case that this address is bigger than all previous ones. + let pos = if global_state + .int_to_ptr_map + .last() + .is_some_and(|(last_addr, _)| *last_addr < base_addr) + { + global_state.int_to_ptr_map.len() + } else { + global_state + .int_to_ptr_map + .binary_search_by_key(&base_addr, |(addr, _)| *addr) + .unwrap_err() + }; + global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id)); base_addr } @@ -302,7 +329,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> { } impl GlobalStateInner { - pub fn free_alloc_id(&mut self, dead_id: AllocId) { + pub fn free_alloc_id( + &mut self, + rng: &mut impl Rng, + dead_id: AllocId, + size: Size, + align: Align, + ) { // We can *not* remove this from `base_addr`, since the interpreter design requires that we // be able to retrieve an AllocId + offset for any memory access *before* we check if the // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory @@ -322,6 +355,8 @@ impl GlobalStateInner { // We can also remove it from `exposed`, since this allocation can anyway not be returned by // `alloc_id_from_addr` any more. self.exposed.remove(&dead_id); + // Also remember this address for future reuse. + self.reuse.add_addr(rng, addr, size, align) } } diff --git a/src/tools/miri/src/alloc_addresses/reuse_pool.rs b/src/tools/miri/src/alloc_addresses/reuse_pool.rs new file mode 100644 index 0000000000000..8374d0ec605a8 --- /dev/null +++ b/src/tools/miri/src/alloc_addresses/reuse_pool.rs @@ -0,0 +1,87 @@ +//! Manages a pool of addresses that can be reused. + +use rand::Rng; + +use rustc_target::abi::{Align, Size}; + +const MAX_POOL_SIZE: usize = 64; + +// Just use fair coins, until we have evidence that other numbers are better. +const ADDR_REMEMBER_CHANCE: f64 = 0.5; +const ADDR_TAKE_CHANCE: f64 = 0.5; + +/// The pool strikes a balance between exploring more possible executions and making it more likely +/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for +/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data +/// structure. Therefore we only reuse allocations when size and alignment match exactly. +#[derive(Debug)] +pub struct ReusePool { + /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable + /// allocations as address-size pairs, the list must be sorted by the size. + /// + /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to + /// less than 64 different possible value, that bounds the overall size of the pool. + pool: Vec>, +} + +impl ReusePool { + pub fn new() -> Self { + ReusePool { pool: vec![] } + } + + fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> { + let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap(); + if self.pool.len() <= pool_idx { + self.pool.resize(pool_idx + 1, Vec::new()); + } + &mut self.pool[pool_idx] + } + + pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) { + // Let's see if we even want to remember this address. + if !rng.gen_bool(ADDR_REMEMBER_CHANCE) { + return; + } + // Determine the pool to add this to, and where in the pool to put it. + let subpool = self.subpool(align); + let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size); + // Make sure the pool does not grow too big. + if subpool.len() >= MAX_POOL_SIZE { + // Pool full. Replace existing element, or last one if this would be even bigger. + let clamped_pos = pos.min(subpool.len() - 1); + subpool[clamped_pos] = (addr, size); + return; + } + // Add address to pool, at the right position. + subpool.insert(pos, (addr, size)); + } + + pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option { + // Determine whether we'll even attempt a reuse. + if !rng.gen_bool(ADDR_TAKE_CHANCE) { + return None; + } + // Determine the pool to take this from. + let subpool = self.subpool(align); + // Let's see if we can find something of the right size. We want to find the full range of + // such items, beginning with the first, so we can't use `binary_search_by_key`. + let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size); + let mut end = begin; + while let Some((_addr, other_size)) = subpool.get(end) { + if *other_size != size { + break; + } + end += 1; + } + if end == begin { + // Could not find any item of the right size. + return None; + } + // Pick a random element with the desired size. + let idx = rng.gen_range(begin..end); + // Remove it from the pool and return. + let (chosen_addr, chosen_size) = subpool.remove(idx); + debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0); + Some(chosen_addr) + } +} diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs index 9e4b5fe8ad780..371dc6edda31d 100644 --- a/src/tools/miri/src/helpers.rs +++ b/src/tools/miri/src/helpers.rs @@ -3,6 +3,8 @@ use std::iter; use std::num::NonZero; use std::time::Duration; +use rand::RngCore; + use rustc_apfloat::ieee::{Double, Single}; use rustc_apfloat::Float; use rustc_hir::def::{DefKind, Namespace}; @@ -18,8 +20,6 @@ use rustc_span::{def_id::CrateNum, sym, Span, Symbol}; use rustc_target::abi::{Align, FieldIdx, FieldsShape, Size, Variants}; use rustc_target::spec::abi::Abi; -use rand::RngCore; - use crate::*; /// Indicates which kind of access is being performed. diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 3a54629222d51..53f30ec2ba448 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1289,7 +1289,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { alloc_extra: &mut AllocExtra<'tcx>, (alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra), size: Size, - _align: Align, + align: Align, ) -> InterpResult<'tcx> { if machine.tracked_alloc_ids.contains(&alloc_id) { machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id)); @@ -1304,7 +1304,12 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> { { *deallocated_at = Some(machine.current_span()); } - machine.alloc_addresses.get_mut().free_alloc_id(alloc_id); + machine.alloc_addresses.get_mut().free_alloc_id( + machine.rng.get_mut(), + alloc_id, + size, + align, + ); Ok(()) } diff --git a/src/tools/miri/tests/pass/address-reuse.rs b/src/tools/miri/tests/pass/address-reuse.rs new file mode 100644 index 0000000000000..9b5c8c38b8fba --- /dev/null +++ b/src/tools/miri/tests/pass/address-reuse.rs @@ -0,0 +1,16 @@ +//! Check that we do sometimes reuse addresses. +use std::collections::HashSet; + +fn main() { + let count = 100; + let mut addrs = HashSet::::new(); + for _ in 0..count { + // We make a `Box` with a layout that's hopefully not used by tons of things inside the + // allocator itself, so that we are more likely to get reuse. (With `i32` or `usize`, on + // Windows the reuse chances are very low.) + let b = Box::new([42usize; 4]); + addrs.insert(&*b as *const [usize; 4] as usize); + } + // dbg!(addrs.len()); + assert!(addrs.len() > 1 && addrs.len() < count); +} diff --git a/src/tools/miri/tests/pass/intptrcast.rs b/src/tools/miri/tests/pass/intptrcast.rs index 42b6f433420b6..370b09f512cbf 100644 --- a/src/tools/miri/tests/pass/intptrcast.rs +++ b/src/tools/miri/tests/pass/intptrcast.rs @@ -67,8 +67,8 @@ fn ptr_eq_dangling() { drop(b); let b = Box::new(0); let y = &*b as *const i32; // different allocation - // They *could* be equal if memory was reused, but probably are not. - assert!(x != y); + // They *could* be equal if memory is reused... + assert!(x != y || x == y); } fn ptr_eq_out_of_bounds() {