Use a specialized varint + bitpacking scheme for DepGraph encoding

2023-04-29 21:51:32 -04:00 · 2023-04-29 21:51:32 -04:00 · 94fe18f84b
commit 94fe18f84b
parent bf1e3f31f9
8 changed files with 358 additions and 46 deletions
--- a/compiler/rustc_middle/src/dep_graph/dep_node.rs
+++ b/compiler/rustc_middle/src/dep_graph/dep_node.rs
@ -97,7 +97,7 @@ macro_rules! define_dep_nodes {
            // discriminants of the variants have been assigned consecutively from 0
            // so that just the one comparison suffices to check that the u16 can be
            // transmuted to a DepKind.
-            const VARIANTS: u16 = {
+            pub const VARIANTS: u16 = {
                let deps: &[DepKind] = &[$(DepKind::$variant,)*];
                let mut i = 0;
                while i < deps.len() {
--- a/compiler/rustc_middle/src/dep_graph/mod.rs
+++ b/compiler/rustc_middle/src/dep_graph/mod.rs
@ -26,6 +26,7 @@ pub type DepKindStruct<'tcx> = rustc_query_system::dep_graph::DepKindStruct<TyCt
 impl rustc_query_system::dep_graph::DepKind for DepKind {
    const NULL: Self = DepKind::Null;
    const RED: Self = DepKind::Red;
+    const MAX: u16 = DepKind::VARIANTS - 1;

    fn debug_node(node: &DepNode, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}(", node.kind)?;
@ -68,6 +69,21 @@ impl rustc_query_system::dep_graph::DepKind for DepKind {
            op(icx.task_deps)
        })
    }
+
+    #[track_caller]
+    #[inline]
+    fn from_u16(u: u16) -> Self {
+        if u > Self::MAX {
+            panic!("Invalid DepKind {u}");
+        }
+        // SAFETY: See comment on DepKind::VARIANTS
+        unsafe { std::mem::transmute(u) }
+    }
+
+    #[inline]
+    fn to_u16(self) -> u16 {
+        self as u16
+    }
 }

 impl<'tcx> DepContext for TyCtxt<'tcx> {
--- a/compiler/rustc_query_system/src/dep_graph/edges.rs
+++ b/compiler/rustc_query_system/src/dep_graph/edges.rs
@ -0,0 +1,60 @@
+use crate::dep_graph::DepNodeIndex;
+use smallvec::SmallVec;
+use std::hash::{Hash, Hasher};
+use std::ops::Deref;
+
+#[derive(Default, Debug)]
+pub struct EdgesVec {
+    max: u32,
+    edges: SmallVec<[DepNodeIndex; EdgesVec::INLINE_CAPACITY]>,
+}
+
+impl Hash for EdgesVec {
+    #[inline]
+    fn hash<H: Hasher>(&self, hasher: &mut H) {
+        Hash::hash(&self.edges, hasher)
+    }
+}
+
+impl EdgesVec {
+    pub const INLINE_CAPACITY: usize = 8;
+
+    #[inline]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    #[inline]
+    pub fn push(&mut self, edge: DepNodeIndex) {
+        self.max = self.max.max(edge.as_u32());
+        self.edges.push(edge);
+    }
+
+    #[inline]
+    pub fn max_index(&self) -> u32 {
+        self.max
+    }
+}
+
+impl Deref for EdgesVec {
+    type Target = [DepNodeIndex];
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        self.edges.as_slice()
+    }
+}
+
+impl FromIterator<DepNodeIndex> for EdgesVec {
+    #[inline]
+    fn from_iter<T>(iter: T) -> Self
+    where
+        T: IntoIterator<Item = DepNodeIndex>,
+    {
+        let mut vec = EdgesVec::new();
+        for index in iter {
+            vec.push(index)
+        }
+        vec
+    }
+}
--- a/compiler/rustc_query_system/src/dep_graph/graph.rs
+++ b/compiler/rustc_query_system/src/dep_graph/graph.rs
@ -8,7 +8,6 @@ use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering};
 use rustc_data_structures::unord::UnordMap;
 use rustc_index::IndexVec;
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
-use smallvec::{smallvec, SmallVec};
 use std::assert_matches::assert_matches;
 use std::collections::hash_map::Entry;
 use std::fmt::Debug;
@ -19,6 +18,7 @@ use std::sync::atomic::Ordering::Relaxed;
 use super::query::DepGraphQuery;
 use super::serialized::{GraphEncoder, SerializedDepGraph, SerializedDepNodeIndex};
 use super::{DepContext, DepKind, DepNode, HasDepContext, WorkProductId};
+use crate::dep_graph::EdgesVec;
 use crate::ich::StableHashingContext;
 use crate::query::{QueryContext, QuerySideEffects};

@ -137,7 +137,7 @@ impl<K: DepKind> DepGraph<K> {
        let _green_node_index = current.intern_new_node(
            profiler,
            DepNode { kind: DepKind::NULL, hash: current.anon_id_seed.into() },
-            smallvec![],
+            EdgesVec::new(),
            Fingerprint::ZERO,
        );
        assert_eq!(_green_node_index, DepNodeIndex::SINGLETON_DEPENDENCYLESS_ANON_NODE);
@ -147,7 +147,7 @@ impl<K: DepKind> DepGraph<K> {
            profiler,
            &prev_graph,
            DepNode { kind: DepKind::RED, hash: Fingerprint::ZERO.into() },
-            smallvec![],
+            EdgesVec::new(),
            None,
            false,
        );
@ -356,12 +356,12 @@ impl<K: DepKind> DepGraphData<K> {

        let with_deps = |task_deps| K::with_deps(task_deps, || task(cx, arg));
        let (result, edges) = if cx.dep_context().is_eval_always(key.kind) {
-            (with_deps(TaskDepsRef::EvalAlways), smallvec![])
+            (with_deps(TaskDepsRef::EvalAlways), EdgesVec::new())
        } else {
            let task_deps = Lock::new(TaskDeps {
                #[cfg(debug_assertions)]
                node: Some(key),
-                reads: SmallVec::new(),
+                reads: EdgesVec::new(),
                read_set: Default::default(),
                phantom_data: PhantomData,
            });
@ -486,14 +486,14 @@ impl<K: DepKind> DepGraph<K> {

                // As long as we only have a low number of reads we can avoid doing a hash
                // insert and potentially allocating/reallocating the hashmap
-                let new_read = if task_deps.reads.len() < TASK_DEPS_READS_CAP {
+                let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY {
                    task_deps.reads.iter().all(|other| *other != dep_node_index)
                } else {
                    task_deps.read_set.insert(dep_node_index)
                };
                if new_read {
                    task_deps.reads.push(dep_node_index);
-                    if task_deps.reads.len() == TASK_DEPS_READS_CAP {
+                    if task_deps.reads.len() == EdgesVec::INLINE_CAPACITY {
                        // Fill `read_set` with what we have so far so we can use the hashset
                        // next time
                        task_deps.read_set.extend(task_deps.reads.iter().copied());
@ -572,9 +572,13 @@ impl<K: DepKind> DepGraph<K> {
                }
            }

-            let mut edges = SmallVec::new();
+            let mut edges = EdgesVec::new();
            K::read_deps(|task_deps| match task_deps {
-                TaskDepsRef::Allow(deps) => edges.extend(deps.lock().reads.iter().copied()),
+                TaskDepsRef::Allow(deps) => {
+                    for index in deps.lock().reads.iter().copied() {
+                        edges.push(index);
+                    }
+                }
                TaskDepsRef::EvalAlways => {
                    edges.push(DepNodeIndex::FOREVER_RED_NODE);
                }
@ -872,7 +876,7 @@ impl<K: DepKind> DepGraphData<K> {

        let prev_deps = self.previous.edge_targets_from(prev_dep_node_index);

-        for &dep_dep_node_index in prev_deps {
+        for dep_dep_node_index in prev_deps {
            self.try_mark_parent_green(qcx, dep_dep_node_index, dep_node, Some(&frame))?;
        }

@ -1308,8 +1312,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
                let key = prev_graph.index_to_node(prev_index);
                let edges = prev_graph
                    .edge_targets_from(prev_index)
-                    .iter()
-                    .map(|i| prev_index_to_index[*i].unwrap())
+                    .map(|i| prev_index_to_index[i].unwrap())
                    .collect();
                let fingerprint = prev_graph.fingerprint_by_index(prev_index);
                let dep_node_index = self.encoder.borrow().send(profiler, key, fingerprint, edges);
@ -1335,10 +1338,6 @@ impl<K: DepKind> CurrentDepGraph<K> {
    }
 }

-/// The capacity of the `reads` field `SmallVec`
-const TASK_DEPS_READS_CAP: usize = 8;
-type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;
-
 #[derive(Debug, Clone, Copy)]
 pub enum TaskDepsRef<'a, K: DepKind> {
    /// New dependencies can be added to the
--- a/compiler/rustc_query_system/src/dep_graph/mod.rs
+++ b/compiler/rustc_query_system/src/dep_graph/mod.rs
@ -1,10 +1,12 @@
 pub mod debug;
 mod dep_node;
+mod edges;
 mod graph;
 mod query;
 mod serialized;

 pub use dep_node::{DepKindStruct, DepNode, DepNodeParams, WorkProductId};
+pub use edges::EdgesVec;
 pub use graph::{
    hash_result, DepGraph, DepGraphData, DepNodeColor, DepNodeIndex, TaskDeps, TaskDepsRef,
    WorkProduct, WorkProductMap,
@ -157,4 +159,10 @@ pub trait DepKind: Copy + fmt::Debug + Eq + Hash + Send + Encodable<FileEncoder>
    fn read_deps<OP>(op: OP)
    where
        OP: for<'a> FnOnce(TaskDepsRef<'a, Self>);
+
+    fn from_u16(u: u16) -> Self;
+
+    fn to_u16(self) -> u16;
+
+    const MAX: u16;
 }
--- a/compiler/rustc_query_system/src/dep_graph/serialized.rs
+++ b/compiler/rustc_query_system/src/dep_graph/serialized.rs
@ -1,6 +1,6 @@
 //! The data that we will serialize and deserialize.
 //!
-//! The dep-graph is serialized as a sequence of NodeInfo, with the dependencies
+//! Notionally, the dep-graph is a sequence of NodeInfo with the dependencies
 //! specified inline. The total number of nodes and edges are stored as the last
 //! 16 bytes of the file, so we can find them easily at decoding time.
 //!
@ -14,14 +14,16 @@

 use super::query::DepGraphQuery;
 use super::{DepKind, DepNode, DepNodeIndex};
+use crate::dep_graph::EdgesVec;
 use rustc_data_structures::fingerprint::Fingerprint;
+use rustc_data_structures::fingerprint::PackedFingerprint;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::sync::Lock;
 use rustc_index::{Idx, IndexVec};
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
-use rustc_serialize::{Decodable, Decoder, Encodable};
-use smallvec::SmallVec;
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::marker::PhantomData;

 // The maximum value of `SerializedDepNodeIndex` leaves the upper two bits
 // unused so that we can store multiple index types in `CompressedHybridIndex`,
@ -31,6 +33,16 @@ rustc_index::newtype_index! {
    pub struct SerializedDepNodeIndex {}
 }

+const DEP_NODE_SIZE: usize = std::mem::size_of::<SerializedDepNodeIndex>();
+/// Amount of padding we need to add to the edge list data so that we can retrieve every
+/// SerializedDepNodeIndex with a fixed-size read then mask.
+const DEP_NODE_PAD: usize = DEP_NODE_SIZE - 1;
+/// Amount of bits we need to store the number of used bytes in a SerializedDepNodeIndex.
+/// Note that wherever we encode byte widths like this we actually store the number of bytes used
+/// minus 1; for a 4-byte value we technically would have 5 widths to store, but using one byte to
+/// store zeroes (which are relatively rare) is a decent tradeoff to save a bit in our bitfields.
+const DEP_NODE_WIDTH_BITS: usize = DEP_NODE_SIZE / 2;
+
 /// Data for use when recompiling the **current crate**.
 #[derive(Debug)]
 pub struct SerializedDepGraph<K: DepKind> {
@ -42,10 +54,10 @@ pub struct SerializedDepGraph<K: DepKind> {
    /// For each DepNode, stores the list of edges originating from that
    /// DepNode. Encoded as a [start, end) pair indexing into edge_list_data,
    /// which holds the actual DepNodeIndices of the target nodes.
-    edge_list_indices: IndexVec<SerializedDepNodeIndex, (u32, u32)>,
-    /// A flattened list of all edge targets in the graph. Edge sources are
-    /// implicit in edge_list_indices.
-    edge_list_data: Vec<SerializedDepNodeIndex>,
+    edge_list_indices: IndexVec<SerializedDepNodeIndex, EdgeHeader>,
+    /// A flattened list of all edge targets in the graph, stored in the same
+    /// varint encoding that we use on disk. Edge sources are implicit in edge_list_indices.
+    edge_list_data: Vec<u8>,
    /// Reciprocal map to `nodes`.
    index: FxHashMap<DepNode<K>, SerializedDepNodeIndex>,
 }
@ -64,9 +76,35 @@ impl<K: DepKind> Default for SerializedDepGraph<K> {

 impl<K: DepKind> SerializedDepGraph<K> {
    #[inline]
-    pub fn edge_targets_from(&self, source: SerializedDepNodeIndex) -> &[SerializedDepNodeIndex] {
-        let targets = self.edge_list_indices[source];
-        &self.edge_list_data[targets.0 as usize..targets.1 as usize]
+    pub fn edge_targets_from(
+        &self,
+        source: SerializedDepNodeIndex,
+    ) -> impl Iterator<Item = SerializedDepNodeIndex> + '_ {
+        let header = self.edge_list_indices[source];
+        let mut raw = &self.edge_list_data[header.start()..];
+        // Figure out where the edge list for `source` ends by getting the start index of the next
+        // edge list, or the end of the array if this is the last edge.
+        let end = self
+            .edge_list_indices
+            .get(source + 1)
+            .map(|h| h.start())
+            .unwrap_or_else(|| self.edge_list_data.len() - DEP_NODE_PAD);
+
+        // The number of edges for this node is implicitly stored in the combination of the byte
+        // width and the length.
+        let bytes_per_index = header.bytes_per_index();
+        let len = (end - header.start()) / bytes_per_index;
+
+        // LLVM doesn't hoist EdgeHeader::mask so we do it ourselves.
+        let mask = header.mask();
+        (0..len).map(move |_| {
+            // Doing this slicing in this order ensures that the first bounds check suffices for
+            // all the others.
+            let index = &raw[..DEP_NODE_SIZE];
+            raw = &raw[bytes_per_index..];
+            let index = u32::from_le_bytes(index.try_into().unwrap()) & mask;
+            SerializedDepNodeIndex::from_u32(index)
+        })
    }

    #[inline]
@ -84,11 +122,42 @@ impl<K: DepKind> SerializedDepGraph<K> {
        self.fingerprints[dep_node_index]
    }

+    #[inline]
    pub fn node_count(&self) -> usize {
        self.index.len()
    }
 }

+/// A packed representation of an edge's start index and byte width.
+///
+/// This is packed by stealing 2 bits from the start index, which means we only accomodate edge
+/// data arrays up to a quarter of our address space. Which seems fine.
+#[derive(Debug, Clone, Copy)]
+struct EdgeHeader {
+    repr: usize,
+}
+
+impl EdgeHeader {
+    #[inline]
+    fn start(self) -> usize {
+        self.repr >> DEP_NODE_WIDTH_BITS
+    }
+
+    #[inline]
+    fn bytes_per_index(self) -> usize {
+        (self.repr & mask(DEP_NODE_WIDTH_BITS)) + 1
+    }
+
+    #[inline]
+    fn mask(self) -> u32 {
+        mask(self.bytes_per_index() * 8) as u32
+    }
+}
+
+fn mask(bits: usize) -> usize {
+    usize::MAX >> ((std::mem::size_of::<usize>() * 8) - bits)
+}
+
 impl<'a, K: DepKind + Decodable<MemDecoder<'a>>> Decodable<MemDecoder<'a>>
    for SerializedDepGraph<K>
 {
@ -107,32 +176,51 @@ impl<'a, K: DepKind + Decodable<MemDecoder<'a>>> Decodable<MemDecoder<'a>>

        debug!(?node_count, ?edge_count);

+        let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position();
+
        let mut nodes = IndexVec::with_capacity(node_count);
        let mut fingerprints = IndexVec::with_capacity(node_count);
        let mut edge_list_indices = IndexVec::with_capacity(node_count);
-        let mut edge_list_data = Vec::with_capacity(edge_count);
+        // This slightly over-estimates the amount of bytes used for all the edge data but never by
+        // more than ~6%, because over-estimation only occurs for large nodes.
+        let mut edge_list_data = Vec::with_capacity(
+            graph_bytes - node_count * std::mem::size_of::<SerializedNodeHeader<K>>(),
+        );

        for _index in 0..node_count {
-            let dep_node: DepNode<K> = Decodable::decode(d);
-            let _i: SerializedDepNodeIndex = nodes.push(dep_node);
+            // Decode the header for this edge; the header packs together as many of the fixed-size
+            // fields as possible to limit the number of times we update decoder state.
+            let node_header = SerializedNodeHeader { bytes: d.read_array(), _marker: PhantomData };
+
+            let _i: SerializedDepNodeIndex = nodes.push(node_header.node());
            debug_assert_eq!(_i.index(), _index);

-            let fingerprint: Fingerprint = Decodable::decode(d);
-            let _i: SerializedDepNodeIndex = fingerprints.push(fingerprint);
+            let _i: SerializedDepNodeIndex = fingerprints.push(node_header.fingerprint());
            debug_assert_eq!(_i.index(), _index);

-            // Deserialize edges -- sequence of DepNodeIndex
-            let len = d.read_usize();
-            let start = edge_list_data.len().try_into().unwrap();
-            for _ in 0..len {
-                let edge = Decodable::decode(d);
-                edge_list_data.push(edge);
-            }
-            let end = edge_list_data.len().try_into().unwrap();
-            let _i: SerializedDepNodeIndex = edge_list_indices.push((start, end));
+            // If the length of this node's edge list is small, the length is stored in the header.
+            // If it is not, we fall back to another decoder call.
+            let num_edges = node_header.len().unwrap_or_else(|| d.read_usize());
+
+            // The edges index list uses the same varint strategy as rmeta tables; we select the
+            // number of byte elements per-array not per-element. This lets us read the whole edge
+            // list for a node with one decoder call and also use the on-disk format in memory.
+            let edges_len_bytes = node_header.bytes_per_index() * num_edges;
+            // The in-memory structure for the edges list stores the byte width of the edges on
+            // this node with the offset into the global edge data array.
+            let edges_header = node_header.edges_header(&edge_list_data);
+
+            edge_list_data.extend(d.read_raw_bytes(edges_len_bytes));
+
+            let _i: SerializedDepNodeIndex = edge_list_indices.push(edges_header);
            debug_assert_eq!(_i.index(), _index);
        }

+        // When we access the edge list data, we do a fixed-size read from the edge list data then
+        // mask off the bytes that aren't for that edge index, so the last read may dangle off the
+        // end of the array. This padding ensure it doesn't.
+        edge_list_data.extend(&[0u8; DEP_NODE_PAD]);
+
        let index: FxHashMap<_, _> =
            nodes.iter_enumerated().map(|(idx, &dep_node)| (dep_node, idx)).collect();

@ -140,11 +228,151 @@ impl<'a, K: DepKind + Decodable<MemDecoder<'a>>> Decodable<MemDecoder<'a>>
    }
 }

-#[derive(Debug, Encodable, Decodable)]
-pub struct NodeInfo<K: DepKind> {
+/// A packed representation of all the fixed-size fields in a `NodeInfo`.
+///
+/// This stores in one byte array:
+/// * The `Fingerprint` in the `NodeInfo`
+/// * The `Fingerprint` in `DepNode` that is in this `NodeInfo`
+/// * The `DepKind`'s discriminant (a u16, but not all bits are used...)
+/// * The byte width of the encoded edges for this node
+/// * In whatever bits remain, the length of the edge list for this node, if it fits
+struct SerializedNodeHeader<K> {
+    // 2 bytes for the DepNode
+    // 16 for Fingerprint in DepNode
+    // 16 for Fingerprint in NodeInfo
+    bytes: [u8; 34],
+    _marker: PhantomData<K>,
+}
+
+// The fields of a `SerializedNodeHeader`, this struct is an implementation detail and exists only
+// to make the implementation of `SerializedNodeHeader` simpler.
+struct Unpacked<K> {
+    len: Option<usize>,
+    bytes_per_index: usize,
+    kind: K,
+    hash: PackedFingerprint,
+    fingerprint: Fingerprint,
+}
+
+// Bit fields are
+// 0..?    length of the edge
+// ?..?+2  bytes per index
+// ?+2..16 kind
+impl<K: DepKind> SerializedNodeHeader<K> {
+    const TOTAL_BITS: usize = std::mem::size_of::<K>() * 8;
+    const LEN_BITS: usize = Self::TOTAL_BITS - Self::KIND_BITS - Self::WIDTH_BITS;
+    const WIDTH_BITS: usize = DEP_NODE_WIDTH_BITS;
+    const KIND_BITS: usize = Self::TOTAL_BITS - K::MAX.leading_zeros() as usize;
+    const MAX_INLINE_LEN: usize = (u16::MAX as usize >> (Self::TOTAL_BITS - Self::LEN_BITS)) - 1;
+
+    #[inline]
+    fn new(node_info: &NodeInfo<K>) -> Self {
+        debug_assert_eq!(Self::TOTAL_BITS, Self::LEN_BITS + Self::WIDTH_BITS + Self::KIND_BITS);
+
+        let NodeInfo { node, fingerprint, edges } = node_info;
+
+        let mut head = node.kind.to_u16();
+
+        let free_bytes = edges.max_index().leading_zeros() as usize / 8;
+        let bytes_per_index = (DEP_NODE_SIZE - free_bytes).saturating_sub(1);
+        head |= (bytes_per_index as u16) << Self::KIND_BITS;
+
+        // Encode number of edges + 1 so that we can reserve 0 to indicate that the len doesn't fit
+        // in this bitfield.
+        if edges.len() <= Self::MAX_INLINE_LEN {
+            head |= (edges.len() as u16 + 1) << (Self::KIND_BITS + Self::WIDTH_BITS);
+        }
+
+        let hash: Fingerprint = node.hash.into();
+
+        // Using half-open ranges ensures an unconditional panic if we get the magic numbers wrong.
+        let mut bytes = [0u8; 34];
+        bytes[..2].copy_from_slice(&head.to_le_bytes());
+        bytes[2..18].copy_from_slice(&hash.to_le_bytes());
+        bytes[18..].copy_from_slice(&fingerprint.to_le_bytes());
+
+        #[cfg(debug_assertions)]
+        {
+            let res = Self { bytes, _marker: PhantomData };
+            assert_eq!(node_info.fingerprint, res.fingerprint());
+            assert_eq!(node_info.node, res.node());
+            if let Some(len) = res.len() {
+                assert_eq!(node_info.edges.len(), len);
+            }
+        }
+        Self { bytes, _marker: PhantomData }
+    }
+
+    #[inline]
+    fn unpack(&self) -> Unpacked<K> {
+        let head = u16::from_le_bytes(self.bytes[..2].try_into().unwrap());
+        let hash = self.bytes[2..18].try_into().unwrap();
+        let fingerprint = self.bytes[18..].try_into().unwrap();
+
+        let kind = head & mask(Self::KIND_BITS) as u16;
+        let bytes_per_index = (head >> Self::KIND_BITS) & mask(Self::WIDTH_BITS) as u16;
+        let len = (head as usize) >> (Self::WIDTH_BITS + Self::KIND_BITS);
+
+        Unpacked {
+            len: len.checked_sub(1),
+            bytes_per_index: bytes_per_index as usize + 1,
+            kind: DepKind::from_u16(kind),
+            hash: Fingerprint::from_le_bytes(hash).into(),
+            fingerprint: Fingerprint::from_le_bytes(fingerprint),
+        }
+    }
+
+    #[inline]
+    fn len(&self) -> Option<usize> {
+        self.unpack().len
+    }
+
+    #[inline]
+    fn bytes_per_index(&self) -> usize {
+        self.unpack().bytes_per_index
+    }
+
+    #[inline]
+    fn fingerprint(&self) -> Fingerprint {
+        self.unpack().fingerprint
+    }
+
+    #[inline]
+    fn node(&self) -> DepNode<K> {
+        let Unpacked { kind, hash, .. } = self.unpack();
+        DepNode { kind, hash }
+    }
+
+    #[inline]
+    fn edges_header(&self, edge_list_data: &[u8]) -> EdgeHeader {
+        EdgeHeader {
+            repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct NodeInfo<K: DepKind> {
    node: DepNode<K>,
    fingerprint: Fingerprint,
-    edges: SmallVec<[DepNodeIndex; 8]>,
+    edges: EdgesVec,
+}
+
+impl<K: DepKind> Encodable<FileEncoder> for NodeInfo<K> {
+    fn encode(&self, e: &mut FileEncoder) {
+        let header = SerializedNodeHeader::new(self);
+        e.emit_raw_bytes(&header.bytes);
+
+        if header.len().is_none() {
+            e.emit_usize(self.edges.len());
+        }
+
+        let bytes_per_index = header.bytes_per_index();
+        for node_index in self.edges.iter() {
+            let bytes = node_index.as_u32().to_le_bytes();
+            e.emit_raw_bytes(&bytes[..bytes_per_index]);
+        }
+    }
 }

 struct Stat<K: DepKind> {
@ -303,7 +531,7 @@ impl<K: DepKind + Encodable<FileEncoder>> GraphEncoder<K> {
        profiler: &SelfProfilerRef,
        node: DepNode<K>,
        fingerprint: Fingerprint,
-        edges: SmallVec<[DepNodeIndex; 8]>,
+        edges: EdgesVec,
    ) -> DepNodeIndex {
        let _prof_timer = profiler.generic_activity("incr_comp_encode_dep_graph");
        let node = NodeInfo { node, fingerprint, edges };
--- a/compiler/rustc_query_system/src/lib.rs
+++ b/compiler/rustc_query_system/src/lib.rs
@ -4,6 +4,7 @@
 #![feature(min_specialization)]
 #![feature(extern_types)]
 #![feature(let_chains)]
+#![feature(inline_const)]
 #![allow(rustc::potential_query_instability)]
 #![deny(rustc::untranslatable_diagnostic)]
 #![deny(rustc::diagnostic_outside_of_impl)]
--- a/compiler/rustc_serialize/src/opaque.rs
+++ b/compiler/rustc_serialize/src/opaque.rs
@ -353,7 +353,7 @@ impl<'a> MemDecoder<'a> {
    }

    #[inline]
-    fn read_array<const N: usize>(&mut self) -> [u8; N] {
+    pub fn read_array<const N: usize>(&mut self) -> [u8; N] {
        self.read_raw_bytes(N).try_into().unwrap()
    }