From 52251cd930dca697804b0c7c1582671683cc6a6c Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sat, 16 Sep 2017 21:43:05 +0300 Subject: [PATCH] Compress "small" spans to 32 bits and intern "large" spans --- src/librustc/ty/maps/plumbing.rs | 2 +- src/librustc/util/common.rs | 5 +- src/librustc_driver/profile/trace.rs | 4 +- src/libsyntax_pos/hygiene.rs | 2 +- src/libsyntax_pos/lib.rs | 64 +++++++----- src/libsyntax_pos/span_encoding.rs | 143 +++++++++++++++++++++++++++ 6 files changed, 188 insertions(+), 32 deletions(-) create mode 100644 src/libsyntax_pos/span_encoding.rs diff --git a/src/librustc/ty/maps/plumbing.rs b/src/librustc/ty/maps/plumbing.rs index 87a9eef0de5..581f47dc13c 100644 --- a/src/librustc/ty/maps/plumbing.rs +++ b/src/librustc/ty/maps/plumbing.rs @@ -221,7 +221,7 @@ macro_rules! define_maps { profq_msg!(tcx, ProfileQueriesMsg::QueryBegin( - span.clone(), + span.data(), QueryMsg::$name(profq_key!(tcx, key)) ) ); diff --git a/src/librustc/util/common.rs b/src/librustc/util/common.rs index 618a4ed331e..9e566d2b907 100644 --- a/src/librustc/util/common.rs +++ b/src/librustc/util/common.rs @@ -20,7 +20,7 @@ use std::path::Path; use std::time::{Duration, Instant}; use std::sync::mpsc::{Sender}; -use syntax_pos::{Span}; +use syntax_pos::{SpanData}; use ty::maps::{QueryMsg}; use dep_graph::{DepNode}; @@ -61,7 +61,8 @@ pub enum ProfileQueriesMsg { /// end a task TaskEnd, /// begin a new query - QueryBegin(Span, QueryMsg), + /// can't use `Span` because queries are sent to other thread + QueryBegin(SpanData, QueryMsg), /// query is satisfied by using an already-known value for the given key CacheHit, /// query requires running a provider; providers may nest, permitting queries to nest. diff --git a/src/librustc_driver/profile/trace.rs b/src/librustc_driver/profile/trace.rs index f5079836c3c..280f3c8c796 100644 --- a/src/librustc_driver/profile/trace.rs +++ b/src/librustc_driver/profile/trace.rs @@ -9,7 +9,7 @@ // except according to those terms. use super::*; -use syntax_pos::Span; +use syntax_pos::SpanData; use rustc::ty::maps::QueryMsg; use std::fs::File; use std::time::{Duration, Instant}; @@ -18,7 +18,7 @@ use rustc::dep_graph::{DepNode}; #[derive(Debug, Clone, Eq, PartialEq)] pub struct Query { - pub span: Span, + pub span: SpanData, pub msg: QueryMsg, } pub enum Effect { diff --git a/src/libsyntax_pos/hygiene.rs b/src/libsyntax_pos/hygiene.rs index 919804d7efd..4790fa0a7ed 100644 --- a/src/libsyntax_pos/hygiene.rs +++ b/src/libsyntax_pos/hygiene.rs @@ -25,7 +25,7 @@ use std::fmt; /// A SyntaxContext represents a chain of macro expansions (represented by marks). #[derive(Clone, Copy, PartialEq, Eq, Default, PartialOrd, Ord, Hash)] -pub struct SyntaxContext(u32); +pub struct SyntaxContext(pub(super) u32); #[derive(Copy, Clone, Default)] pub struct SyntaxContextData { diff --git a/src/libsyntax_pos/lib.rs b/src/libsyntax_pos/lib.rs index 27fbca19dcc..582f2798181 100644 --- a/src/libsyntax_pos/lib.rs +++ b/src/libsyntax_pos/lib.rs @@ -25,11 +25,10 @@ #![feature(optin_builtin_traits)] #![allow(unused_attributes)] #![feature(specialization)] -#![feature(staged_api)] use std::borrow::Cow; use std::cell::{Cell, RefCell}; -use std::cmp; +use std::cmp::{self, Ordering}; use std::fmt; use std::hash::Hasher; use std::ops::{Add, Sub}; @@ -47,6 +46,9 @@ extern crate serialize as rustc_serialize; // used by deriving pub mod hygiene; pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind}; +mod span_encoding; +pub use span_encoding::{Span, DUMMY_SP}; + pub mod symbol; pub type FileName = String; @@ -59,23 +61,33 @@ pub type FileName = String; /// able to use many of the functions on spans in codemap and you cannot assume /// that the length of the span = hi - lo; there may be space in the BytePos /// range between files. +/// +/// `SpanData` is public because `Span` uses a thread-local interner and can't be +/// sent to other threads, but some pieces of performance infra run in a separate thread. +/// Using `Span` is generally preferred. #[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)] -pub struct Span { - #[unstable(feature = "rustc_private", issue = "27812")] - #[rustc_deprecated(since = "1.21", reason = "use getters/setters instead")] +pub struct SpanData { pub lo: BytePos, - #[unstable(feature = "rustc_private", issue = "27812")] - #[rustc_deprecated(since = "1.21", reason = "use getters/setters instead")] pub hi: BytePos, /// Information about where the macro came from, if this piece of /// code was created by a macro expansion. - #[unstable(feature = "rustc_private", issue = "27812")] - #[rustc_deprecated(since = "1.21", reason = "use getters/setters instead")] pub ctxt: SyntaxContext, } -#[allow(deprecated)] -pub const DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), ctxt: NO_EXPANSION }; +// The interner in thread-local, so `Span` shouldn't move between threads. +impl !Send for Span {} +impl !Sync for Span {} + +impl PartialOrd for Span { + fn partial_cmp(&self, rhs: &Self) -> Option { + PartialOrd::partial_cmp(&self.data(), &rhs.data()) + } +} +impl Ord for Span { + fn cmp(&self, rhs: &Self) -> Ordering { + Ord::cmp(&self.data(), &rhs.data()) + } +} /// A collection of spans. Spans have two orthogonal attributes: /// @@ -90,38 +102,32 @@ pub struct MultiSpan { } impl Span { - #[allow(deprecated)] - #[inline] - pub fn new(lo: BytePos, hi: BytePos, ctxt: SyntaxContext) -> Self { - if lo <= hi { Span { lo, hi, ctxt } } else { Span { lo: hi, hi: lo, ctxt } } - } - - #[allow(deprecated)] #[inline] pub fn lo(self) -> BytePos { - self.lo + self.data().lo } #[inline] pub fn with_lo(self, lo: BytePos) -> Span { - Span::new(lo, self.hi(), self.ctxt()) + let base = self.data(); + Span::new(lo, base.hi, base.ctxt) } - #[allow(deprecated)] #[inline] pub fn hi(self) -> BytePos { - self.hi + self.data().hi } #[inline] pub fn with_hi(self, hi: BytePos) -> Span { - Span::new(self.lo(), hi, self.ctxt()) + let base = self.data(); + Span::new(base.lo, hi, base.ctxt) } - #[allow(deprecated)] #[inline] pub fn ctxt(self) -> SyntaxContext { - self.ctxt + self.data().ctxt } #[inline] pub fn with_ctxt(self, ctxt: SyntaxContext) -> Span { - Span::new(self.lo(), self.hi(), ctxt) + let base = self.data(); + Span::new(base.lo, base.hi, ctxt) } /// Returns a new span representing just the end-point of this span @@ -342,6 +348,12 @@ impl fmt::Debug for Span { } } +impl fmt::Debug for SpanData { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + SPAN_DEBUG.with(|span_debug| span_debug.get()(Span::new(self.lo, self.hi, self.ctxt), f)) + } +} + impl MultiSpan { pub fn new() -> MultiSpan { MultiSpan { diff --git a/src/libsyntax_pos/span_encoding.rs b/src/libsyntax_pos/span_encoding.rs new file mode 100644 index 00000000000..c2b32171a9a --- /dev/null +++ b/src/libsyntax_pos/span_encoding.rs @@ -0,0 +1,143 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Spans are encoded using 1-bit tag and 2 different encoding formats (one for each tag value). +// One format is used for keeping span data inline, +// another contains index into an out-of-line span interner. +// The encoding format for inline spans were obtained by optimizing over crates in rustc/libstd. +// See https://internals.rust-lang.org/t/rfc-compiler-refactoring-spans/1357/28 + +use {BytePos, SpanData}; +use hygiene::SyntaxContext; + +use rustc_data_structures::fx::FxHashMap; +use std::cell::RefCell; + +/// A compressed span. +/// Contains either fields of `SpanData` inline if they are small, or index into span interner. +/// The primary goal of `Span` is to be as small as possible and fit into other structures +/// (that's why it uses `packed` as well). Decoding speed is the second priority. +/// See `SpanData` for the info on span fields in decoded representation. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +#[repr(packed)] +pub struct Span(u32); + +/// Dummy span, both position and length are zero, syntax context is zero as well. +/// This span is kept inline and encoded with format 0. +pub const DUMMY_SP: Span = Span(0); + +impl Span { + #[inline] + pub fn new(lo: BytePos, hi: BytePos, ctxt: SyntaxContext) -> Self { + encode(&match lo <= hi { + true => SpanData { lo, hi, ctxt }, + false => SpanData { lo: hi, hi: lo, ctxt }, + }) + } + + #[inline] + pub fn data(self) -> SpanData { + decode(self) + } +} + +// Tags +const TAG_INLINE: u32 = 0; +const TAG_INTERNED: u32 = 1; +const TAG_MASK: u32 = 1; + +// Fields indexes +const BASE_INDEX: usize = 0; +const LEN_INDEX: usize = 1; +const CTXT_INDEX: usize = 2; + +// Tag = 0, inline format. +// ----------------------------------- +// | base 31:8 | len 7:1 | tag 0:0 | +// ----------------------------------- +const INLINE_SIZES: [u32; 3] = [24, 7, 0]; +const INLINE_OFFSETS: [u32; 3] = [8, 1, 1]; + +// Tag = 1, interned format. +// ------------------------ +// | index 31:1 | tag 0:0 | +// ------------------------ +const INTERNED_INDEX_SIZE: u32 = 31; +const INTERNED_INDEX_OFFSET: u32 = 1; + +#[inline] +fn encode(sd: &SpanData) -> Span { + let (base, len, ctxt) = (sd.lo.0, sd.hi.0 - sd.lo.0, sd.ctxt.0); + + let val = if (base >> INLINE_SIZES[BASE_INDEX]) == 0 && + (len >> INLINE_SIZES[LEN_INDEX]) == 0 && + (ctxt >> INLINE_SIZES[CTXT_INDEX]) == 0 { + (base << INLINE_OFFSETS[BASE_INDEX]) | (len << INLINE_OFFSETS[LEN_INDEX]) | + (ctxt << INLINE_OFFSETS[CTXT_INDEX]) | TAG_INLINE + } else { + let index = with_span_interner(|interner| interner.intern(sd)); + (index << INTERNED_INDEX_OFFSET) | TAG_INTERNED + }; + Span(val) +} + +#[inline] +fn decode(span: Span) -> SpanData { + let val = span.0; + + // Extract a field at position `pos` having size `size`. + let extract = |pos: u32, size: u32| { + let mask = ((!0u32) as u64 >> (32 - size)) as u32; // Can't shift u32 by 32 + (val >> pos) & mask + }; + + let (base, len, ctxt) = if val & TAG_MASK == TAG_INLINE {( + extract(INLINE_OFFSETS[BASE_INDEX], INLINE_SIZES[BASE_INDEX]), + extract(INLINE_OFFSETS[LEN_INDEX], INLINE_SIZES[LEN_INDEX]), + extract(INLINE_OFFSETS[CTXT_INDEX], INLINE_SIZES[CTXT_INDEX]), + )} else { + let index = extract(INTERNED_INDEX_OFFSET, INTERNED_INDEX_SIZE); + return with_span_interner(|interner| *interner.get(index)); + }; + SpanData { lo: BytePos(base), hi: BytePos(base + len), ctxt: SyntaxContext(ctxt) } +} + +#[derive(Default)] +struct SpanInterner { + spans: FxHashMap, + span_data: Vec, +} + +impl SpanInterner { + fn intern(&mut self, span_data: &SpanData) -> u32 { + if let Some(index) = self.spans.get(span_data) { + return *index; + } + + let index = self.spans.len() as u32; + self.span_data.push(*span_data); + self.spans.insert(*span_data, index); + index + } + + #[inline] + fn get(&self, index: u32) -> &SpanData { + &self.span_data[index as usize] + } +} + +// If an interner exists in TLS, return it. Otherwise, prepare a fresh one. +#[inline] +fn with_span_interner T>(f: F) -> T { + thread_local!(static INTERNER: RefCell = { + RefCell::new(SpanInterner::default()) + }); + INTERNER.with(|interner| f(&mut *interner.borrow_mut())) +}