Skip to content

Commit

Permalink
Auto merge of #90491 - Mark-Simulacrum:push-pred-faster, r=matthewjasper
Browse files Browse the repository at this point in the history
Optimize live point computation

This refactors the live-point computation to lower per-MIR-instruction costs by operating on a largely per-block level. This doesn't fundamentally change the number of operations necessary, but it greatly improves the practical performance by aggregating bit manipulation into ranges rather than single-bit; this scales much better with larger blocks.

On the benchmark provided in #90445, with 100,000 array elements, walltime for a check build is improved from 143 seconds to 15.

I consider the tiny losses here acceptable given the many small wins on real world benchmarks and large wins on stress tests. The new code scales much better, but on some subset of inputs the slightly higher constant overheads decrease performance somewhat. Overall though, this is expected to be a big win for pathological cases (as illustrated by the test case motivating this work) and largely not material for non-pathological cases. I consider the new code somewhat easier to follow, too.
  • Loading branch information
bors committed Nov 24, 2021
2 parents c6a7ca1 + 03afb61 commit 8a48b37
Show file tree
Hide file tree
Showing 5 changed files with 278 additions and 29 deletions.
28 changes: 5 additions & 23 deletions compiler/rustc_borrowck/src/region_infer/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ impl RegionValueElements {
PointIndex::new(start_index)
}

/// Return the PointIndex for the block start of this index.
crate fn to_block_start(&self, index: PointIndex) -> PointIndex {
PointIndex::new(self.statements_before_block[self.basic_blocks[index]])
}

/// Converts a `PointIndex` back to a location. O(1).
crate fn to_location(&self, index: PointIndex) -> Location {
assert!(index.index() < self.num_points);
Expand All @@ -76,29 +81,6 @@ impl RegionValueElements {
crate fn point_in_range(&self, index: PointIndex) -> bool {
index.index() < self.num_points
}

/// Pushes all predecessors of `index` onto `stack`.
crate fn push_predecessors(
&self,
body: &Body<'_>,
index: PointIndex,
stack: &mut Vec<PointIndex>,
) {
let Location { block, statement_index } = self.to_location(index);
if statement_index == 0 {
// If this is a basic block head, then the predecessors are
// the terminators of other basic blocks
stack.extend(
body.predecessors()[block]
.iter()
.map(|&pred_bb| body.terminator_loc(pred_bb))
.map(|pred_loc| self.point_from_location(pred_loc)),
);
} else {
// Otherwise, the pred is just the previous statement
stack.push(PointIndex::new(index.index() - 1));
}
}
}

rustc_index::newtype_index! {
Expand Down
40 changes: 35 additions & 5 deletions compiler/rustc_borrowck/src/type_check/liveness/trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,42 @@ impl LivenessResults<'me, 'typeck, 'flow, 'tcx> {

self.stack.extend(self.cx.local_use_map.uses(local));
while let Some(p) = self.stack.pop() {
if self.defs.contains(p) {
// We are live in this block from the closest to us of:
//
// * Inclusively, the block start
// * Exclusively, the previous definition (if it's in this block)
// * Exclusively, the previous live_at setting (an optimization)
let block_start = self.cx.elements.to_block_start(p);
let previous_defs = self.defs.last_set_in(block_start..=p);
let previous_live_at = self.use_live_at.last_set_in(block_start..=p);

let exclusive_start = match (previous_defs, previous_live_at) {
(Some(a), Some(b)) => Some(std::cmp::max(a, b)),
(Some(a), None) | (None, Some(a)) => Some(a),
(None, None) => None,
};

if let Some(exclusive) = exclusive_start {
self.use_live_at.insert_range(exclusive + 1..=p);

// If we have a bound after the start of the block, we should
// not add the predecessors for this block.
continue;
}

if self.use_live_at.insert(p) {
self.cx.elements.push_predecessors(self.cx.body, p, &mut self.stack)
} else {
// Add all the elements of this block.
self.use_live_at.insert_range(block_start..=p);

// Then add the predecessors for this block, which are the
// terminators of predecessor basic blocks. Push those onto the
// stack so that the next iteration(s) will process them.

let block = self.cx.elements.to_location(block_start).block;
self.stack.extend(
self.cx.body.predecessors()[block]
.iter()
.map(|&pred_bb| self.cx.body.terminator_loc(pred_bb))
.map(|pred_loc| self.cx.elements.point_from_location(pred_loc)),
);
}
}
}
Expand Down
142 changes: 141 additions & 1 deletion compiler/rustc_index/src/bit_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::fmt;
use std::iter;
use std::marker::PhantomData;
use std::mem;
use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Not, Range, Shl};
use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl};
use std::slice;

use rustc_macros::{Decodable, Encodable};
Expand All @@ -22,6 +22,29 @@ pub trait BitRelations<Rhs> {
fn intersect(&mut self, other: &Rhs) -> bool;
}

#[inline]
fn inclusive_start_end<T: Idx>(
range: impl RangeBounds<T>,
domain: usize,
) -> Option<(usize, usize)> {
// Both start and end are inclusive.
let start = match range.start_bound().cloned() {
Bound::Included(start) => start.index(),
Bound::Excluded(start) => start.index() + 1,
Bound::Unbounded => 0,
};
let end = match range.end_bound().cloned() {
Bound::Included(end) => end.index(),
Bound::Excluded(end) => end.index().checked_sub(1)?,
Bound::Unbounded => domain - 1,
};
assert!(end < domain);
if start > end {
return None;
}
Some((start, end))
}

macro_rules! bit_relations_inherent_impls {
() => {
/// Sets `self = self | other` and returns `true` if `self` changed
Expand Down Expand Up @@ -151,6 +174,33 @@ impl<T: Idx> BitSet<T> {
new_word != word
}

#[inline]
pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else {
return;
};

let (start_word_index, start_mask) = word_index_and_mask(start);
let (end_word_index, end_mask) = word_index_and_mask(end);

// Set all words in between start and end (exclusively of both).
for word_index in (start_word_index + 1)..end_word_index {
self.words[word_index] = !0;
}

if start_word_index != end_word_index {
// Start and end are in different words, so we handle each in turn.
//
// We set all leading bits. This includes the start_mask bit.
self.words[start_word_index] |= !(start_mask - 1);
// And all trailing bits (i.e. from 0..=end) in the end word,
// including the end.
self.words[end_word_index] |= end_mask | end_mask - 1;
} else {
self.words[start_word_index] |= end_mask | (end_mask - start_mask);
}
}

/// Sets all bits to true.
pub fn insert_all(&mut self) {
for word in &mut self.words {
Expand Down Expand Up @@ -227,6 +277,36 @@ impl<T: Idx> BitSet<T> {
not_already
}

fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
let (start, end) = inclusive_start_end(range, self.domain_size)?;
let (start_word_index, _) = word_index_and_mask(start);
let (end_word_index, end_mask) = word_index_and_mask(end);

let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1));
if end_word != 0 {
let pos = max_bit(end_word) + WORD_BITS * end_word_index;
if start <= pos {
return Some(T::new(pos));
}
}

// We exclude end_word_index from the range here, because we don't want
// to limit ourselves to *just* the last word: the bits set it in may be
// after `end`, so it may not work out.
if let Some(offset) =
self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
{
let word_idx = start_word_index + offset;
let start_word = self.words[word_idx];
let pos = max_bit(start_word) + WORD_BITS * word_idx;
if start <= pos {
return Some(T::new(pos));
}
}

None
}

bit_relations_inherent_impls! {}
}

Expand Down Expand Up @@ -635,6 +715,16 @@ impl<T: Idx> SparseBitSet<T> {
self.elems.iter()
}

fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
let mut last_leq = None;
for e in self.iter() {
if range.contains(e) {
last_leq = Some(*e);
}
}
last_leq
}

bit_relations_inherent_impls! {}
}

Expand Down Expand Up @@ -709,6 +799,16 @@ impl<T: Idx> HybridBitSet<T> {
}
}

/// Returns the previous element present in the bitset from `elem`,
/// inclusively of elem. That is, will return `Some(elem)` if elem is in the
/// bitset.
pub fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
match self {
HybridBitSet::Sparse(sparse) => sparse.last_set_in(range),
HybridBitSet::Dense(dense) => dense.last_set_in(range),
}
}

pub fn insert(&mut self, elem: T) -> bool {
// No need to check `elem` against `self.domain_size` here because all
// the match cases check it, one way or another.
Expand All @@ -734,6 +834,41 @@ impl<T: Idx> HybridBitSet<T> {
}
}

pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
// No need to check `elem` against `self.domain_size` here because all
// the match cases check it, one way or another.
let start = match elems.start_bound().cloned() {
Bound::Included(start) => start.index(),
Bound::Excluded(start) => start.index() + 1,
Bound::Unbounded => 0,
};
let end = match elems.end_bound().cloned() {
Bound::Included(end) => end.index() + 1,
Bound::Excluded(end) => end.index(),
Bound::Unbounded => self.domain_size() - 1,
};
let len = if let Some(l) = end.checked_sub(start) {
l
} else {
return;
};
match self {
HybridBitSet::Sparse(sparse) if sparse.len() + len < SPARSE_MAX => {
// The set is sparse and has space for `elems`.
for elem in start..end {
sparse.insert(T::new(elem));
}
}
HybridBitSet::Sparse(sparse) => {
// The set is sparse and full. Convert to a dense set.
let mut dense = sparse.to_dense();
dense.insert_range(elems);
*self = HybridBitSet::Dense(dense);
}
HybridBitSet::Dense(dense) => dense.insert_range(elems),
}
}

pub fn insert_all(&mut self) {
let domain_size = self.domain_size();
match self {
Expand Down Expand Up @@ -1205,6 +1340,11 @@ fn word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) {
(word_index, mask)
}

#[inline]
fn max_bit(word: Word) -> usize {
WORD_BITS - 1 - word.leading_zeros() as usize
}

/// Integral type used to represent the bit set.
pub trait FiniteBitSetTy:
BitAnd<Output = Self>
Expand Down
95 changes: 95 additions & 0 deletions compiler/rustc_index/src/bit_set/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,101 @@ fn sparse_matrix_operations() {
}
}

#[test]
fn dense_insert_range() {
#[track_caller]
fn check<R>(domain: usize, range: R)
where
R: RangeBounds<usize> + Clone + IntoIterator<Item = usize> + std::fmt::Debug,
{
let mut set = BitSet::new_empty(domain);
set.insert_range(range.clone());
for i in set.iter() {
assert!(range.contains(&i));
}
for i in range.clone() {
assert!(set.contains(i), "{} in {:?}, inserted {:?}", i, set, range);
}
}
check(300, 10..10);
check(300, WORD_BITS..WORD_BITS * 2);
check(300, WORD_BITS - 1..WORD_BITS * 2);
check(300, WORD_BITS - 1..WORD_BITS);
check(300, 10..100);
check(300, 10..30);
check(300, 0..5);
check(300, 0..250);
check(300, 200..250);

check(300, 10..=10);
check(300, WORD_BITS..=WORD_BITS * 2);
check(300, WORD_BITS - 1..=WORD_BITS * 2);
check(300, WORD_BITS - 1..=WORD_BITS);
check(300, 10..=100);
check(300, 10..=30);
check(300, 0..=5);
check(300, 0..=250);
check(300, 200..=250);

for i in 0..WORD_BITS * 2 {
for j in i..WORD_BITS * 2 {
check(WORD_BITS * 2, i..j);
check(WORD_BITS * 2, i..=j);
check(300, i..j);
check(300, i..=j);
}
}
}

#[test]
fn dense_last_set_before() {
fn easy(set: &BitSet<usize>, needle: impl RangeBounds<usize>) -> Option<usize> {
let mut last_leq = None;
for e in set.iter() {
if needle.contains(&e) {
last_leq = Some(e);
}
}
last_leq
}

#[track_caller]
fn cmp(set: &BitSet<usize>, needle: impl RangeBounds<usize> + Clone + std::fmt::Debug) {
assert_eq!(
set.last_set_in(needle.clone()),
easy(set, needle.clone()),
"{:?} in {:?}",
needle,
set
);
}
let mut set = BitSet::new_empty(300);
cmp(&set, 50..=50);
set.insert(WORD_BITS);
cmp(&set, WORD_BITS..=WORD_BITS);
set.insert(WORD_BITS - 1);
cmp(&set, 0..=WORD_BITS - 1);
cmp(&set, 0..=5);
cmp(&set, 10..100);
set.insert(100);
cmp(&set, 100..110);
cmp(&set, 99..100);
cmp(&set, 99..=100);

for i in 0..=WORD_BITS * 2 {
for j in i..=WORD_BITS * 2 {
for k in 0..WORD_BITS * 2 {
let mut set = BitSet::new_empty(300);
cmp(&set, i..j);
cmp(&set, i..=j);
set.insert(k);
cmp(&set, i..j);
cmp(&set, i..=j);
}
}
}
}

/// Merge dense hybrid set into empty sparse hybrid set.
#[bench]
fn union_hybrid_sparse_empty_to_dense(b: &mut Bencher) {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#![feature(extend_one)]
#![feature(iter_zip)]
#![feature(min_specialization)]
#![feature(step_trait)]
#![feature(test)]
#![feature(let_else)]

pub mod bit_set;
pub mod vec;
Expand Down

0 comments on commit 8a48b37

Please sign in to comment.