Shrink span encoding further
Spans are now stored in a more compact form which cuts down on at least 1 byte per span (indirect/direct encoding) and at most 3 bytes per span (indirect/direct encoding, context byte, length byte). As a result, libcore metadata shrinks by 1.5MB.
This commit is contained in:
parent
3cdd004e55
commit
09e619d62e
@ -508,21 +508,19 @@ fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> ExpnId {
|
||||
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
|
||||
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
|
||||
let start = decoder.position();
|
||||
let mode = SpanEncodingMode::decode(decoder);
|
||||
let data = match mode {
|
||||
SpanEncodingMode::Direct => SpanData::decode(decoder),
|
||||
SpanEncodingMode::RelativeOffset(offset) => {
|
||||
decoder.with_position(start - offset, |decoder| {
|
||||
let mode = SpanEncodingMode::decode(decoder);
|
||||
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
|
||||
SpanData::decode(decoder)
|
||||
})
|
||||
}
|
||||
SpanEncodingMode::AbsoluteOffset(addr) => decoder.with_position(addr, |decoder| {
|
||||
let mode = SpanEncodingMode::decode(decoder);
|
||||
debug_assert!(matches!(mode, SpanEncodingMode::Direct));
|
||||
SpanData::decode(decoder)
|
||||
}),
|
||||
let tag = SpanTag(decoder.peek_byte());
|
||||
let data = if tag.kind() == SpanKind::Indirect {
|
||||
// Skip past the tag we just peek'd.
|
||||
decoder.read_u8();
|
||||
let offset_or_position = decoder.read_usize();
|
||||
let position = if tag.is_relative_offset() {
|
||||
start - offset_or_position
|
||||
} else {
|
||||
offset_or_position
|
||||
};
|
||||
decoder.with_position(position, SpanData::decode)
|
||||
} else {
|
||||
SpanData::decode(decoder)
|
||||
};
|
||||
Span::new(data.lo, data.hi, data.ctxt, data.parent)
|
||||
}
|
||||
@ -530,17 +528,17 @@ fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Span {
|
||||
|
||||
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for SpanData {
|
||||
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
|
||||
let ctxt = SyntaxContext::decode(decoder);
|
||||
let tag = u8::decode(decoder);
|
||||
let tag = SpanTag::decode(decoder);
|
||||
let ctxt = tag.context().unwrap_or_else(|| SyntaxContext::decode(decoder));
|
||||
|
||||
if tag == TAG_PARTIAL_SPAN {
|
||||
if tag.kind() == SpanKind::Partial {
|
||||
return DUMMY_SP.with_ctxt(ctxt).data();
|
||||
}
|
||||
|
||||
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
|
||||
debug_assert!(tag.kind() == SpanKind::Local || tag.kind() == SpanKind::Foreign);
|
||||
|
||||
let lo = BytePos::decode(decoder);
|
||||
let len = BytePos::decode(decoder);
|
||||
let len = tag.length().unwrap_or_else(|| BytePos::decode(decoder));
|
||||
let hi = lo + len;
|
||||
|
||||
let Some(sess) = decoder.sess else {
|
||||
@ -581,7 +579,7 @@ fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> SpanData {
|
||||
// treat the 'local' and 'foreign' cases almost identically during deserialization:
|
||||
// we can call `imported_source_file` for the proper crate, and binary search
|
||||
// through the returned slice using our span.
|
||||
let source_file = if tag == TAG_VALID_SPAN_LOCAL {
|
||||
let source_file = if tag.kind() == SpanKind::Local {
|
||||
decoder.cdata().imported_source_file(metadata_index, sess)
|
||||
} else {
|
||||
// When we encode a proc-macro crate, all `Span`s should be encoded
|
||||
|
@ -177,15 +177,17 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
// previously saved offset must be smaller than the current position.
|
||||
let offset = s.opaque.position() - last_location;
|
||||
if offset < last_location {
|
||||
SpanEncodingMode::RelativeOffset(offset).encode(s)
|
||||
SpanTag::indirect(true).encode(s);
|
||||
offset.encode(s);
|
||||
} else {
|
||||
SpanEncodingMode::AbsoluteOffset(last_location).encode(s)
|
||||
SpanTag::indirect(false).encode(s);
|
||||
last_location.encode(s);
|
||||
}
|
||||
}
|
||||
Entry::Vacant(v) => {
|
||||
let position = s.opaque.position();
|
||||
v.insert(position);
|
||||
SpanEncodingMode::Direct.encode(s);
|
||||
// Data is encoded with a SpanTag prefix (see below).
|
||||
self.data().encode(s);
|
||||
}
|
||||
}
|
||||
@ -225,14 +227,15 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
// IMPORTANT: If this is ever changed, be sure to update
|
||||
// `rustc_span::hygiene::raw_encode_expn_id` to handle
|
||||
// encoding `ExpnData` for proc-macro crates.
|
||||
if s.is_proc_macro {
|
||||
SyntaxContext::root().encode(s);
|
||||
} else {
|
||||
self.ctxt.encode(s);
|
||||
}
|
||||
let ctxt = if s.is_proc_macro { SyntaxContext::root() } else { self.ctxt };
|
||||
|
||||
if self.is_dummy() {
|
||||
return TAG_PARTIAL_SPAN.encode(s);
|
||||
let tag = SpanTag::new(SpanKind::Partial, ctxt, 0);
|
||||
tag.encode(s);
|
||||
if tag.context().is_none() {
|
||||
ctxt.encode(s);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// The Span infrastructure should make sure that this invariant holds:
|
||||
@ -250,7 +253,12 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
if !source_file.contains(self.hi) {
|
||||
// Unfortunately, macro expansion still sometimes generates Spans
|
||||
// that malformed in this way.
|
||||
return TAG_PARTIAL_SPAN.encode(s);
|
||||
let tag = SpanTag::new(SpanKind::Partial, ctxt, 0);
|
||||
tag.encode(s);
|
||||
if tag.context().is_none() {
|
||||
ctxt.encode(s);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// There are two possible cases here:
|
||||
@ -269,7 +277,7 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
// if we're a proc-macro crate.
|
||||
// This allows us to avoid loading the dependencies of proc-macro crates: all of
|
||||
// the information we need to decode `Span`s is stored in the proc-macro crate.
|
||||
let (tag, metadata_index) = if source_file.is_imported() && !s.is_proc_macro {
|
||||
let (kind, metadata_index) = if source_file.is_imported() && !s.is_proc_macro {
|
||||
// To simplify deserialization, we 'rebase' this span onto the crate it originally came
|
||||
// from (the crate that 'owns' the file it references. These rebased 'lo' and 'hi'
|
||||
// values are relative to the source map information for the 'foreign' crate whose
|
||||
@ -287,7 +295,7 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
}
|
||||
};
|
||||
|
||||
(TAG_VALID_SPAN_FOREIGN, metadata_index)
|
||||
(SpanKind::Foreign, metadata_index)
|
||||
} else {
|
||||
// Record the fact that we need to encode the data for this `SourceFile`
|
||||
let source_files =
|
||||
@ -296,7 +304,7 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
let metadata_index: u32 =
|
||||
metadata_index.try_into().expect("cannot export more than U32_MAX files");
|
||||
|
||||
(TAG_VALID_SPAN_LOCAL, metadata_index)
|
||||
(SpanKind::Local, metadata_index)
|
||||
};
|
||||
|
||||
// Encode the start position relative to the file start, so we profit more from the
|
||||
@ -307,14 +315,20 @@ fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
|
||||
// from the variable-length integer encoding that we use.
|
||||
let len = self.hi - self.lo;
|
||||
|
||||
let tag = SpanTag::new(kind, ctxt, len.0 as usize);
|
||||
tag.encode(s);
|
||||
if tag.context().is_none() {
|
||||
ctxt.encode(s);
|
||||
}
|
||||
lo.encode(s);
|
||||
len.encode(s);
|
||||
if tag.length().is_none() {
|
||||
len.encode(s);
|
||||
}
|
||||
|
||||
// Encode the index of the `SourceFile` for the span, in order to make decoding faster.
|
||||
metadata_index.encode(s);
|
||||
|
||||
if tag == TAG_VALID_SPAN_FOREIGN {
|
||||
if kind == SpanKind::Foreign {
|
||||
// This needs to be two lines to avoid holding the `s.source_file_cache`
|
||||
// while calling `cnum.encode(s)`
|
||||
let cnum = s.source_file_cache.0.cnum;
|
||||
|
@ -66,13 +66,6 @@ pub(crate) fn rustc_version(cfg_version: &'static str) -> String {
|
||||
/// unsigned integer, and further followed by the rustc version string.
|
||||
pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];
|
||||
|
||||
#[derive(Encodable, Decodable)]
|
||||
enum SpanEncodingMode {
|
||||
RelativeOffset(usize),
|
||||
AbsoluteOffset(usize),
|
||||
Direct,
|
||||
}
|
||||
|
||||
/// A value of type T referred to by its absolute position
|
||||
/// in the metadata, and which can be decoded lazily.
|
||||
///
|
||||
@ -488,10 +481,88 @@ pub struct AttrFlags: u8 {
|
||||
}
|
||||
}
|
||||
|
||||
// Tags used for encoding Spans:
|
||||
const TAG_VALID_SPAN_LOCAL: u8 = 0;
|
||||
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
|
||||
const TAG_PARTIAL_SPAN: u8 = 2;
|
||||
/// A span tag byte encodes a bunch of data, so that we can cut out a few extra bytes from span
|
||||
/// encodings (which are very common, for example, libcore has ~650,000 unique spans and over 1.1
|
||||
/// million references to prior-written spans).
|
||||
///
|
||||
/// The byte format is split into several parts:
|
||||
///
|
||||
/// [ a a a a a c d d ]
|
||||
///
|
||||
/// `a` bits represent the span length. We have 5 bits, so we can store lengths up to 30 inline, with
|
||||
/// an all-1s pattern representing that the length is stored separately.
|
||||
///
|
||||
/// `c` represents whether the span context is zero (and then it is not stored as a separate varint)
|
||||
/// for direct span encodings, and whether the offset is absolute or relative otherwise (zero for
|
||||
/// absolute).
|
||||
///
|
||||
/// d bits represent the kind of span we are storing (local, foreign, partial, indirect).
|
||||
#[derive(Encodable, Decodable, Copy, Clone)]
|
||||
struct SpanTag(u8);
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
enum SpanKind {
|
||||
Local = 0b00,
|
||||
Foreign = 0b01,
|
||||
Partial = 0b10,
|
||||
// Indicates the actual span contents are elsewhere.
|
||||
// If this is the kind, then the span context bit represents whether it is a relative or
|
||||
// absolute offset.
|
||||
Indirect = 0b11,
|
||||
}
|
||||
|
||||
impl SpanTag {
|
||||
fn new(kind: SpanKind, context: rustc_span::SyntaxContext, length: usize) -> SpanTag {
|
||||
let mut data = 0u8;
|
||||
data |= kind as u8;
|
||||
if context.is_root() {
|
||||
data |= 0b100;
|
||||
}
|
||||
let all_1s_len = (0xffu8 << 3) >> 3;
|
||||
// strictly less than - all 1s pattern is a sentinel for storage being out of band.
|
||||
if length < all_1s_len as usize {
|
||||
data |= (length as u8) << 3;
|
||||
} else {
|
||||
data |= all_1s_len << 3;
|
||||
}
|
||||
|
||||
SpanTag(data)
|
||||
}
|
||||
|
||||
fn indirect(relative: bool) -> SpanTag {
|
||||
let mut tag = SpanTag(SpanKind::Indirect as u8);
|
||||
if relative {
|
||||
tag.0 |= 0b100;
|
||||
}
|
||||
tag
|
||||
}
|
||||
|
||||
fn kind(self) -> SpanKind {
|
||||
let masked = self.0 & 0b11;
|
||||
match masked {
|
||||
0b00 => SpanKind::Local,
|
||||
0b01 => SpanKind::Foreign,
|
||||
0b10 => SpanKind::Partial,
|
||||
0b11 => SpanKind::Indirect,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_relative_offset(self) -> bool {
|
||||
debug_assert_eq!(self.kind(), SpanKind::Indirect);
|
||||
self.0 & 0b100 != 0
|
||||
}
|
||||
|
||||
fn context(self) -> Option<rustc_span::SyntaxContext> {
|
||||
if self.0 & 0b100 != 0 { Some(rustc_span::SyntaxContext::root()) } else { None }
|
||||
}
|
||||
|
||||
fn length(self) -> Option<rustc_span::BytePos> {
|
||||
let all_1s_len = (0xffu8 << 3) >> 3;
|
||||
let len = self.0 >> 3;
|
||||
if len != all_1s_len { Some(rustc_span::BytePos(u32::from(len))) } else { None }
|
||||
}
|
||||
}
|
||||
|
||||
// Tags for encoding Symbol's
|
||||
const SYMBOL_STR: u8 = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user