Skip to content

Commit

Permalink
Auto merge of #110050 - saethlin:better-u32-encoding, r=nnethercote
Browse files Browse the repository at this point in the history
Use a specialized varint + bitpacking scheme for DepGraph encoding

The previous scheme here uses leb128 to encode the edge tables that represent the incr comp dependency graph. The problem with that scheme is that leb128 has overhead for larger values, and generally relies on the distribution of encoded values being heavily skewed towards smaller values. That is definitely not the case for a dep node index, since they are handed out sequentially and the whole range is covered, the distribution is actually biased in the opposite direction: Most dep nodes are large.

This PR implements a different varint encoding scheme. Instead of applying varint encoding to individual dep node indices (which is extremely branchy) we now apply it per node.

While being built, each node now stores its edges in a `SmallVec` with a bit of extra logic to track the max value of each edge. Then we varint encode the whole batch. This is a gamble: We save on space by only claiming 2 bits per node instead of ~3 bits per edge which is a nice savings but needs to balance out with the space overhead that a single large index in a node with a lot of edges will encode unnecessary bytes in each of that node's edge indices.

Then, to keep the runtime overhead of this encoding scheme down we deserialize our indices by loading 4 bytes for each then masking off the bytes that are't ours. This is much less code and branches than leb128, but relies on having some readable bytes past the end of each edge list. We explicitly add such padding to the in-memory data during decoding. And we also do this decoding lazily, turning a dense on-disk encoding into a peak memory reduction.

Then we apply a bit-packing scheme; since in #115391 we now have unused bits on `DepKind`, we use those unused bits (currently there are 7!) to store the 2 bits that we need for the byte width of the edges in each node, then use the remaining bits to store the length of the edge list, if it fits.

r? `@nnethercote`
  • Loading branch information
bors committed Sep 7, 2023
2 parents 4e5b31c + 469dc8f commit f00c139
Show file tree
Hide file tree
Showing 8 changed files with 399 additions and 45 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_middle/src/dep_graph/dep_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ macro_rules! define_dep_nodes {
// discriminants of the variants have been assigned consecutively from 0
// so that just the one comparison suffices to check that the u16 can be
// transmuted to a DepKind.
const VARIANTS: u16 = {
pub const VARIANTS: u16 = {
let deps: &[DepKind] = &[$(DepKind::$variant,)*];
let mut i = 0;
while i < deps.len() {
Expand Down
16 changes: 16 additions & 0 deletions compiler/rustc_middle/src/dep_graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub type DepKindStruct<'tcx> = rustc_query_system::dep_graph::DepKindStruct<TyCt
impl rustc_query_system::dep_graph::DepKind for DepKind {
const NULL: Self = DepKind::Null;
const RED: Self = DepKind::Red;
const MAX: u16 = DepKind::VARIANTS - 1;

fn debug_node(node: &DepNode, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}(", node.kind)?;
Expand Down Expand Up @@ -68,6 +69,21 @@ impl rustc_query_system::dep_graph::DepKind for DepKind {
op(icx.task_deps)
})
}

#[track_caller]
#[inline]
fn from_u16(u: u16) -> Self {
if u > Self::MAX {
panic!("Invalid DepKind {u}");
}
// SAFETY: See comment on DepKind::VARIANTS
unsafe { std::mem::transmute(u) }
}

#[inline]
fn to_u16(self) -> u16 {
self as u16
}
}

impl<'tcx> DepContext for TyCtxt<'tcx> {
Expand Down
73 changes: 73 additions & 0 deletions compiler/rustc_query_system/src/dep_graph/edges.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use crate::dep_graph::DepNodeIndex;
use smallvec::SmallVec;
use std::hash::{Hash, Hasher};
use std::iter::Extend;
use std::ops::Deref;

#[derive(Default, Debug)]
pub struct EdgesVec {
max: u32,
edges: SmallVec<[DepNodeIndex; EdgesVec::INLINE_CAPACITY]>,
}

impl Hash for EdgesVec {
#[inline]
fn hash<H: Hasher>(&self, hasher: &mut H) {
Hash::hash(&self.edges, hasher)
}
}

impl EdgesVec {
pub const INLINE_CAPACITY: usize = 8;

#[inline]
pub fn new() -> Self {
Self::default()
}

#[inline]
pub fn push(&mut self, edge: DepNodeIndex) {
self.max = self.max.max(edge.as_u32());
self.edges.push(edge);
}

#[inline]
pub fn max_index(&self) -> u32 {
self.max
}
}

impl Deref for EdgesVec {
type Target = [DepNodeIndex];

#[inline]
fn deref(&self) -> &Self::Target {
self.edges.as_slice()
}
}

impl FromIterator<DepNodeIndex> for EdgesVec {
#[inline]
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = DepNodeIndex>,
{
let mut vec = EdgesVec::new();
for index in iter {
vec.push(index)
}
vec
}
}

impl Extend<DepNodeIndex> for EdgesVec {
#[inline]
fn extend<T>(&mut self, iter: T)
where
T: IntoIterator<Item = DepNodeIndex>,
{
for elem in iter {
self.push(elem);
}
}
}
25 changes: 10 additions & 15 deletions compiler/rustc_query_system/src/dep_graph/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering};
use rustc_data_structures::unord::UnordMap;
use rustc_index::IndexVec;
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
use smallvec::{smallvec, SmallVec};
use std::assert_matches::assert_matches;
use std::collections::hash_map::Entry;
use std::fmt::Debug;
Expand All @@ -19,6 +18,7 @@ use std::sync::atomic::Ordering::Relaxed;
use super::query::DepGraphQuery;
use super::serialized::{GraphEncoder, SerializedDepGraph, SerializedDepNodeIndex};
use super::{DepContext, DepKind, DepNode, HasDepContext, WorkProductId};
use crate::dep_graph::EdgesVec;
use crate::ich::StableHashingContext;
use crate::query::{QueryContext, QuerySideEffects};

Expand Down Expand Up @@ -137,7 +137,7 @@ impl<K: DepKind> DepGraph<K> {
let _green_node_index = current.intern_new_node(
profiler,
DepNode { kind: DepKind::NULL, hash: current.anon_id_seed.into() },
smallvec![],
EdgesVec::new(),
Fingerprint::ZERO,
);
assert_eq!(_green_node_index, DepNodeIndex::SINGLETON_DEPENDENCYLESS_ANON_NODE);
Expand All @@ -147,7 +147,7 @@ impl<K: DepKind> DepGraph<K> {
profiler,
&prev_graph,
DepNode { kind: DepKind::RED, hash: Fingerprint::ZERO.into() },
smallvec![],
EdgesVec::new(),
None,
false,
);
Expand Down Expand Up @@ -356,12 +356,12 @@ impl<K: DepKind> DepGraphData<K> {

let with_deps = |task_deps| K::with_deps(task_deps, || task(cx, arg));
let (result, edges) = if cx.dep_context().is_eval_always(key.kind) {
(with_deps(TaskDepsRef::EvalAlways), smallvec![])
(with_deps(TaskDepsRef::EvalAlways), EdgesVec::new())
} else {
let task_deps = Lock::new(TaskDeps {
#[cfg(debug_assertions)]
node: Some(key),
reads: SmallVec::new(),
reads: EdgesVec::new(),
read_set: Default::default(),
phantom_data: PhantomData,
});
Expand Down Expand Up @@ -486,14 +486,14 @@ impl<K: DepKind> DepGraph<K> {

// As long as we only have a low number of reads we can avoid doing a hash
// insert and potentially allocating/reallocating the hashmap
let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY {
task_deps.reads.iter().all(|other| *other != dep_node_index)
} else {
task_deps.read_set.insert(dep_node_index)
};
if new_read {
task_deps.reads.push(dep_node_index);
if task_deps.reads.len() == TASK_DEPS_READS_CAP {
if task_deps.reads.len() == EdgesVec::INLINE_CAPACITY {
// Fill `read_set` with what we have so far so we can use the hashset
// next time
task_deps.read_set.extend(task_deps.reads.iter().copied());
Expand Down Expand Up @@ -572,7 +572,7 @@ impl<K: DepKind> DepGraph<K> {
}
}

let mut edges = SmallVec::new();
let mut edges = EdgesVec::new();
K::read_deps(|task_deps| match task_deps {
TaskDepsRef::Allow(deps) => edges.extend(deps.lock().reads.iter().copied()),
TaskDepsRef::EvalAlways => {
Expand Down Expand Up @@ -872,7 +872,7 @@ impl<K: DepKind> DepGraphData<K> {

let prev_deps = self.previous.edge_targets_from(prev_dep_node_index);

for &dep_dep_node_index in prev_deps {
for dep_dep_node_index in prev_deps {
self.try_mark_parent_green(qcx, dep_dep_node_index, dep_node, Some(&frame))?;
}

Expand Down Expand Up @@ -1308,8 +1308,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
let key = prev_graph.index_to_node(prev_index);
let edges = prev_graph
.edge_targets_from(prev_index)
.iter()
.map(|i| prev_index_to_index[*i].unwrap())
.map(|i| prev_index_to_index[i].unwrap())
.collect();
let fingerprint = prev_graph.fingerprint_by_index(prev_index);
let dep_node_index = self.encoder.borrow().send(profiler, key, fingerprint, edges);
Expand All @@ -1335,10 +1334,6 @@ impl<K: DepKind> CurrentDepGraph<K> {
}
}

/// The capacity of the `reads` field `SmallVec`
const TASK_DEPS_READS_CAP: usize = 8;
type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;

#[derive(Debug, Clone, Copy)]
pub enum TaskDepsRef<'a, K: DepKind> {
/// New dependencies can be added to the
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_query_system/src/dep_graph/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
pub mod debug;
mod dep_node;
mod edges;
mod graph;
mod query;
mod serialized;

pub use dep_node::{DepKindStruct, DepNode, DepNodeParams, WorkProductId};
pub use edges::EdgesVec;
pub use graph::{
hash_result, DepGraph, DepGraphData, DepNodeColor, DepNodeIndex, TaskDeps, TaskDepsRef,
WorkProduct, WorkProductMap,
Expand Down Expand Up @@ -157,4 +159,10 @@ pub trait DepKind: Copy + fmt::Debug + Eq + Hash + Send + Encodable<FileEncoder>
fn read_deps<OP>(op: OP)
where
OP: for<'a> FnOnce(TaskDepsRef<'a, Self>);

fn from_u16(u: u16) -> Self;

fn to_u16(self) -> u16;

const MAX: u16;
}
Loading

0 comments on commit f00c139

Please sign in to comment.