Auto merge of #110050 - saethlin:better-u32-encoding, r=nnethercote

Use a specialized varint + bitpacking scheme for DepGraph encoding The previous scheme here uses leb128 to encode the edge tables that represent the incr comp dependency graph. The problem with that scheme is that leb128 has overhead for larger values, and generally relies on the distribution of encoded values being heavily skewed towards smaller values. That is definitely not the case for a dep node index, since they are handed out sequentially and the whole range is covered, the distribution is actually biased in the opposite direction: Most dep nodes are large. This PR implements a different varint encoding scheme. Instead of applying varint encoding to individual dep node indices (which is extremely branchy) we now apply it per node. While being built, each node now stores its edges in a `SmallVec` with a bit of extra logic to track the max value of each edge. Then we varint encode the whole batch. This is a gamble: We save on space by only claiming 2 bits per node instead of ~3 bits per edge which is a nice savings but needs to balance out with the space overhead that a single large index in a node with a lot of edges will encode unnecessary bytes in each of that node's edge indices. Then, to keep the runtime overhead of this encoding scheme down we deserialize our indices by loading 4 bytes for each then masking off the bytes that are't ours. This is much less code and branches than leb128, but relies on having some readable bytes past the end of each edge list. We explicitly add such padding to the in-memory data during decoding. And we also do this decoding lazily, turning a dense on-disk encoding into a peak memory reduction. Then we apply a bit-packing scheme; since in #115391 we now have unused bits on `DepKind`, we use those unused bits (currently there are 7!) to store the 2 bits that we need for the byte width of the edges in each node, then use the remaining bits to store the length of the edge list, if it fits. r? `@nnethercote`
rust-lang · Sep 7, 2023 · f00c139 · f00c139
2 parents 4e5b31c + 469dc8f
commit f00c139
Show file tree

Hide file tree

Showing 8 changed files with 399 additions and 45 deletions.
diff --git a/compiler/rustc_middle/src/dep_graph/dep_node.rs b/compiler/rustc_middle/src/dep_graph/dep_node.rs
@@ -97,7 +97,7 @@ macro_rules! define_dep_nodes {
             // discriminants of the variants have been assigned consecutively from 0
             // so that just the one comparison suffices to check that the u16 can be
             // transmuted to a DepKind.
-            const VARIANTS: u16 = {
+            pub const VARIANTS: u16 = {
                 let deps: &[DepKind] = &[$(DepKind::$variant,)*];
                 let mut i = 0;
                 while i < deps.len() {

diff --git a/compiler/rustc_middle/src/dep_graph/mod.rs b/compiler/rustc_middle/src/dep_graph/mod.rs
@@ -26,6 +26,7 @@ pub type DepKindStruct<'tcx> = rustc_query_system::dep_graph::DepKindStruct<TyCt
 impl rustc_query_system::dep_graph::DepKind for DepKind {
     const NULL: Self = DepKind::Null;
     const RED: Self = DepKind::Red;
+    const MAX: u16 = DepKind::VARIANTS - 1;
 
     fn debug_node(node: &DepNode, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "{:?}(", node.kind)?;
@@ -68,6 +69,21 @@ impl rustc_query_system::dep_graph::DepKind for DepKind {
             op(icx.task_deps)
         })
     }
+
+    #[track_caller]
+    #[inline]
+    fn from_u16(u: u16) -> Self {
+        if u > Self::MAX {
+            panic!("Invalid DepKind {u}");
+        }
+        // SAFETY: See comment on DepKind::VARIANTS
+        unsafe { std::mem::transmute(u) }
+    }
+
+    #[inline]
+    fn to_u16(self) -> u16 {
+        self as u16
+    }
 }
 
 impl<'tcx> DepContext for TyCtxt<'tcx> {

diff --git a/compiler/rustc_query_system/src/dep_graph/edges.rs b/compiler/rustc_query_system/src/dep_graph/edges.rs
@@ -0,0 +1,73 @@
+use crate::dep_graph::DepNodeIndex;
+use smallvec::SmallVec;
+use std::hash::{Hash, Hasher};
+use std::iter::Extend;
+use std::ops::Deref;
+
+#[derive(Default, Debug)]
+pub struct EdgesVec {
+    max: u32,
+    edges: SmallVec<[DepNodeIndex; EdgesVec::INLINE_CAPACITY]>,
+}
+
+impl Hash for EdgesVec {
+    #[inline]
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        Hash::hash(&self.edges, hasher)
+    }
+}
+
+impl EdgesVec {
+    pub const INLINE_CAPACITY: usize = 8;
+
+    #[inline]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    #[inline]
+    pub fn push(&mut self, edge: DepNodeIndex) {
+        self.max = self.max.max(edge.as_u32());
+        self.edges.push(edge);
+    }
+
+    #[inline]
+    pub fn max_index(&self) -> u32 {
+        self.max
+    }
+}
+
+impl Deref for EdgesVec {
+    type Target = [DepNodeIndex];
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        self.edges.as_slice()
+    }
+}
+
+impl FromIterator<DepNodeIndex> for EdgesVec {
+    #[inline]
+    fn from_iter<T>(iter: T) -> Self
+    where
+        T: IntoIterator<Item = DepNodeIndex>,
+    {
+        let mut vec = EdgesVec::new();
+        for index in iter {
+            vec.push(index)
+        }
+        vec
+    }
+}
+
+impl Extend<DepNodeIndex> for EdgesVec {
+    #[inline]
+    fn extend<T>(&mut self, iter: T)
+    where
+        T: IntoIterator<Item = DepNodeIndex>,
+    {
+        for elem in iter {
+            self.push(elem);
+        }
+    }
+}
diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -8,7 +8,6 @@ use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering};
 use rustc_data_structures::unord::UnordMap;
 use rustc_index::IndexVec;
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
-use smallvec::{smallvec, SmallVec};
 use std::assert_matches::assert_matches;
 use std::collections::hash_map::Entry;
 use std::fmt::Debug;
@@ -19,6 +18,7 @@ use std::sync::atomic::Ordering::Relaxed;
 use super::query::DepGraphQuery;
 use super::serialized::{GraphEncoder, SerializedDepGraph, SerializedDepNodeIndex};
 use super::{DepContext, DepKind, DepNode, HasDepContext, WorkProductId};
+use crate::dep_graph::EdgesVec;
 use crate::ich::StableHashingContext;
 use crate::query::{QueryContext, QuerySideEffects};
 
@@ -137,7 +137,7 @@ impl<K: DepKind> DepGraph<K> {
         let _green_node_index = current.intern_new_node(
             profiler,
             DepNode { kind: DepKind::NULL, hash: current.anon_id_seed.into() },
-            smallvec![],
+            EdgesVec::new(),
             Fingerprint::ZERO,
         );
         assert_eq!(_green_node_index, DepNodeIndex::SINGLETON_DEPENDENCYLESS_ANON_NODE);
@@ -147,7 +147,7 @@ impl<K: DepKind> DepGraph<K> {
             profiler,
             &prev_graph,
             DepNode { kind: DepKind::RED, hash: Fingerprint::ZERO.into() },
-            smallvec![],
+            EdgesVec::new(),
             None,
             false,
         );
@@ -356,12 +356,12 @@ impl<K: DepKind> DepGraphData<K> {
 
         let with_deps = |task_deps| K::with_deps(task_deps, || task(cx, arg));
         let (result, edges) = if cx.dep_context().is_eval_always(key.kind) {
-            (with_deps(TaskDepsRef::EvalAlways), smallvec![])
+            (with_deps(TaskDepsRef::EvalAlways), EdgesVec::new())
         } else {
             let task_deps = Lock::new(TaskDeps {
                 #[cfg(debug_assertions)]
                 node: Some(key),
-                reads: SmallVec::new(),
+                reads: EdgesVec::new(),
                 read_set: Default::default(),
                 phantom_data: PhantomData,
             });
@@ -486,14 +486,14 @@ impl<K: DepKind> DepGraph<K> {
 
                 // As long as we only have a low number of reads we can avoid doing a hash
                 // insert and potentially allocating/reallocating the hashmap
-                let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
+                let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY {
                     task_deps.reads.iter().all(|other| *other != dep_node_index)
                 } else {
                     task_deps.read_set.insert(dep_node_index)
                 };
                 if new_read {
                     task_deps.reads.push(dep_node_index);
-                    if task_deps.reads.len() == TASK_DEPS_READS_CAP {
+                    if task_deps.reads.len() == EdgesVec::INLINE_CAPACITY {
                         // Fill `read_set` with what we have so far so we can use the hashset
                         // next time
                         task_deps.read_set.extend(task_deps.reads.iter().copied());
@@ -572,7 +572,7 @@ impl<K: DepKind> DepGraph<K> {
                 }
             }
 
-            let mut edges = SmallVec::new();
+            let mut edges = EdgesVec::new();
             K::read_deps(|task_deps| match task_deps {
                 TaskDepsRef::Allow(deps) => edges.extend(deps.lock().reads.iter().copied()),
                 TaskDepsRef::EvalAlways => {
@@ -872,7 +872,7 @@ impl<K: DepKind> DepGraphData<K> {
 
         let prev_deps = self.previous.edge_targets_from(prev_dep_node_index);
 
-        for &dep_dep_node_index in prev_deps {
+        for dep_dep_node_index in prev_deps {
             self.try_mark_parent_green(qcx, dep_dep_node_index, dep_node, Some(&frame))?;
         }
 
@@ -1308,8 +1308,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
                 let key = prev_graph.index_to_node(prev_index);
                 let edges = prev_graph
                     .edge_targets_from(prev_index)
-                    .iter()
-                    .map(|i| prev_index_to_index[*i].unwrap())
+                    .map(|i| prev_index_to_index[i].unwrap())
                     .collect();
                 let fingerprint = prev_graph.fingerprint_by_index(prev_index);
                 let dep_node_index = self.encoder.borrow().send(profiler, key, fingerprint, edges);
@@ -1335,10 +1334,6 @@ impl<K: DepKind> CurrentDepGraph<K> {
     }
 }
 
-/// The capacity of the `reads` field `SmallVec`
-const TASK_DEPS_READS_CAP: usize = 8;
-type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;
-
 #[derive(Debug, Clone, Copy)]
 pub enum TaskDepsRef<'a, K: DepKind> {
     /// New dependencies can be added to the

diff --git a/compiler/rustc_query_system/src/dep_graph/mod.rs b/compiler/rustc_query_system/src/dep_graph/mod.rs
@@ -1,10 +1,12 @@
 pub mod debug;
 mod dep_node;
+mod edges;
 mod graph;
 mod query;
 mod serialized;
 
 pub use dep_node::{DepKindStruct, DepNode, DepNodeParams, WorkProductId};
+pub use edges::EdgesVec;
 pub use graph::{
     hash_result, DepGraph, DepGraphData, DepNodeColor, DepNodeIndex, TaskDeps, TaskDepsRef,
     WorkProduct, WorkProductMap,
@@ -157,4 +159,10 @@ pub trait DepKind: Copy + fmt::Debug + Eq + Hash + Send + Encodable<FileEncoder>
     fn read_deps<OP>(op: OP)
     where
         OP: for<'a> FnOnce(TaskDepsRef<'a, Self>);
+
+    fn from_u16(u: u16) -> Self;
+
+    fn to_u16(self) -> u16;
+
+    const MAX: u16;
 }