Rollup merge of #65392 - Centril:nt-to-tt, r=Mark-Simulacrum

Move `Nonterminal::to_tokenstream` to parser & don't rely directly on parser in lowering Split out from https://github.com/rust-lang/rust/pull/65324. r? @petrochenkov
2019-10-14 07:36:59 +02:00 · 2019-10-14 07:36:59 +02:00 · 28d08f3986
commit 28d08f3986
parent e29a6fcfa2 07e946caf7
5 changed files with 152 additions and 142 deletions
--- a/src/librustc/hir/lowering.rs
+++ b/src/librustc/hir/lowering.rs
@ -70,7 +70,8 @@ use syntax::print::pprust;
 use syntax::source_map::{respan, ExpnData, ExpnKind, DesugaringKind, Spanned};
 use syntax::symbol::{kw, sym, Symbol};
 use syntax::tokenstream::{TokenStream, TokenTree};
-use syntax::parse::token::{self, Token};
+use syntax::parse::token::{self, Nonterminal, Token};
+use syntax::parse::ParseSess;
 use syntax::visit::{self, Visitor};
 use syntax_pos::Span;

@ -86,6 +87,11 @@ pub struct LoweringContext<'a> {

    resolver: &'a mut dyn Resolver,

+    /// HACK(Centril): there is a cyclic dependency between the parser and lowering
+    /// if we don't have this function pointer. To avoid that dependency so that
+    /// librustc is independent of the parser, we use dynamic dispatch here.
+    nt_to_tokenstream: NtToTokenstream,
+
    /// The items being lowered are collected here.
    items: BTreeMap<hir::HirId, hir::Item>,

@ -180,6 +186,8 @@ pub trait Resolver {
    fn has_derives(&self, node_id: NodeId, derives: SpecialDerives) -> bool;
 }

+type NtToTokenstream = fn(&Nonterminal, &ParseSess, Span) -> TokenStream;
+
 /// Context of `impl Trait` in code, which determines whether it is allowed in an HIR subtree,
 /// and if so, what meaning it has.
 #[derive(Debug)]
@ -236,6 +244,7 @@ pub fn lower_crate(
    dep_graph: &DepGraph,
    krate: &Crate,
    resolver: &mut dyn Resolver,
+    nt_to_tokenstream: NtToTokenstream,
 ) -> hir::Crate {
    // We're constructing the HIR here; we don't care what we will
    // read, since we haven't even constructed the *input* to
@ -249,6 +258,7 @@ pub fn lower_crate(
        sess,
        cstore,
        resolver,
+        nt_to_tokenstream,
        items: BTreeMap::new(),
        trait_items: BTreeMap::new(),
        impl_items: BTreeMap::new(),
@ -1022,7 +1032,7 @@ impl<'a> LoweringContext<'a> {
    fn lower_token(&mut self, token: Token) -> TokenStream {
        match token.kind {
            token::Interpolated(nt) => {
-                let tts = nt.to_tokenstream(&self.sess.parse_sess, token.span);
+                let tts = (self.nt_to_tokenstream)(&nt, &self.sess.parse_sess, token.span);
                self.lower_token_stream(tts)
            }
            _ => TokenTree::Token(token).into(),
--- a/src/librustc_interface/passes.rs
+++ b/src/librustc_interface/passes.rs
@ -541,7 +541,8 @@ pub fn lower_to_hir(
 ) -> Result<hir::map::Forest> {
    // Lower AST to HIR.
    let hir_forest = time(sess, "lowering AST -> HIR", || {
-        let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver);
+        let nt_to_tokenstream = syntax::parse::nt_to_tokenstream;
+        let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver, nt_to_tokenstream);

        if sess.opts.debugging_opts.hir_stats {
            hir_stats::print_hir_stats(&hir_crate);
--- a/src/libsyntax/ext/proc_macro_server.rs
+++ b/src/libsyntax/ext/proc_macro_server.rs
@ -175,7 +175,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
            }

            Interpolated(nt) => {
-                let stream = nt.to_tokenstream(sess, span);
+                let stream = parse::nt_to_tokenstream(&nt, sess, span);
                TokenTree::Group(Group {
                    delimiter: Delimiter::None,
                    stream,
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
 use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
 use crate::source_map::{SourceMap, FilePathMapping};
 use crate::feature_gate::UnstableFeatures;
-use crate::parse::parser::Parser;
-use crate::parse::parser::emit_unclosed_delims;
-use crate::parse::token::TokenKind;
-use crate::tokenstream::{TokenStream, TokenTree};
+use crate::parse::parser::{Parser, emit_unclosed_delims};
+use crate::parse::token::{Nonterminal, TokenKind};
+use crate::tokenstream::{self, TokenStream, TokenTree};
 use crate::print::pprust;
 use crate::symbol::Symbol;

@ -24,6 +23,8 @@ use std::borrow::Cow;
 use std::path::{Path, PathBuf};
 use std::str;

+use log::info;
+
 #[cfg(test)]
 mod tests;

@ -407,3 +408,132 @@ impl SeqSep {
        }
    }
 }
+
+// NOTE(Centril): The following probably shouldn't be here but it acknowledges the
+// fact that architecturally, we are using parsing (read on below to understand why).
+
+pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
+    // A `Nonterminal` is often a parsed AST item. At this point we now
+    // need to convert the parsed AST to an actual token stream, e.g.
+    // un-parse it basically.
+    //
+    // Unfortunately there's not really a great way to do that in a
+    // guaranteed lossless fashion right now. The fallback here is to just
+    // stringify the AST node and reparse it, but this loses all span
+    // information.
+    //
+    // As a result, some AST nodes are annotated with the token stream they
+    // came from. Here we attempt to extract these lossless token streams
+    // before we fall back to the stringification.
+    let tokens = match *nt {
+        Nonterminal::NtItem(ref item) => {
+            prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
+        }
+        Nonterminal::NtTraitItem(ref item) => {
+            prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
+        }
+        Nonterminal::NtImplItem(ref item) => {
+            prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
+        }
+        Nonterminal::NtIdent(ident, is_raw) => {
+            Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
+        }
+        Nonterminal::NtLifetime(ident) => {
+            Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
+        }
+        Nonterminal::NtTT(ref tt) => {
+            Some(tt.clone().into())
+        }
+        _ => None,
+    };
+
+    // FIXME(#43081): Avoid this pretty-print + reparse hack
+    let source = pprust::nonterminal_to_string(nt);
+    let filename = FileName::macro_expansion_source_code(&source);
+    let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
+
+    // During early phases of the compiler the AST could get modified
+    // directly (e.g., attributes added or removed) and the internal cache
+    // of tokens my not be invalidated or updated. Consequently if the
+    // "lossless" token stream disagrees with our actual stringification
+    // (which has historically been much more battle-tested) then we go
+    // with the lossy stream anyway (losing span information).
+    //
+    // Note that the comparison isn't `==` here to avoid comparing spans,
+    // but it *also* is a "probable" equality which is a pretty weird
+    // definition. We mostly want to catch actual changes to the AST
+    // like a `#[cfg]` being processed or some weird `macro_rules!`
+    // expansion.
+    //
+    // What we *don't* want to catch is the fact that a user-defined
+    // literal like `0xf` is stringified as `15`, causing the cached token
+    // stream to not be literal `==` token-wise (ignoring spans) to the
+    // token stream we got from stringification.
+    //
+    // Instead the "probably equal" check here is "does each token
+    // recursively have the same discriminant?" We basically don't look at
+    // the token values here and assume that such fine grained token stream
+    // modifications, including adding/removing typically non-semantic
+    // tokens such as extra braces and commas, don't happen.
+    if let Some(tokens) = tokens {
+        if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
+            return tokens
+        }
+        info!("cached tokens found, but they're not \"probably equal\", \
+                going with stringified version");
+    }
+    return tokens_for_real
+}
+
+fn prepend_attrs(
+    sess: &ParseSess,
+    attrs: &[ast::Attribute],
+    tokens: Option<&tokenstream::TokenStream>,
+    span: syntax_pos::Span
+) -> Option<tokenstream::TokenStream> {
+    let tokens = tokens?;
+    if attrs.len() == 0 {
+        return Some(tokens.clone())
+    }
+    let mut builder = tokenstream::TokenStreamBuilder::new();
+    for attr in attrs {
+        assert_eq!(attr.style, ast::AttrStyle::Outer,
+                   "inner attributes should prevent cached tokens from existing");
+
+        let source = pprust::attribute_to_string(attr);
+        let macro_filename = FileName::macro_expansion_source_code(&source);
+        if attr.is_sugared_doc {
+            let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
+            builder.push(stream);
+            continue
+        }
+
+        // synthesize # [ $path $tokens ] manually here
+        let mut brackets = tokenstream::TokenStreamBuilder::new();
+
+        // For simple paths, push the identifier directly
+        if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
+            let ident = attr.path.segments[0].ident;
+            let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
+            brackets.push(tokenstream::TokenTree::token(token, ident.span));
+
+        // ... and for more complicated paths, fall back to a reparse hack that
+        // should eventually be removed.
+        } else {
+            let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
+            brackets.push(stream);
+        }
+
+        brackets.push(attr.tokens.clone());
+
+        // The span we list here for `#` and for `[ ... ]` are both wrong in
+        // that it encompasses more than each token, but it hopefully is "good
+        // enough" for now at least.
+        builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
+        let delim_span = tokenstream::DelimSpan::from_single(attr.span);
+        builder.push(tokenstream::TokenTree::Delimited(
+            delim_span, token::DelimToken::Bracket, brackets.build().into()));
+    }
+    builder.push(tokens.clone());
+    Some(builder.build())
+}
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@ -4,16 +4,13 @@ pub use DelimToken::*;
 pub use LitKind::*;
 pub use TokenKind::*;

-use crate::ast::{self};
-use crate::parse::{parse_stream_from_source_str, ParseSess};
-use crate::print::pprust;
+use crate::ast;
 use crate::ptr::P;
 use crate::symbol::kw;
-use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
+use crate::tokenstream::TokenTree;

 use syntax_pos::symbol::Symbol;
-use syntax_pos::{self, Span, FileName, DUMMY_SP};
-use log::info;
+use syntax_pos::{self, Span, DUMMY_SP};

 use std::fmt;
 use std::mem;
@ -737,131 +734,3 @@ impl fmt::Debug for Nonterminal {
        }
    }
 }
-
-impl Nonterminal {
-    pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
-        // A `Nonterminal` is often a parsed AST item. At this point we now
-        // need to convert the parsed AST to an actual token stream, e.g.
-        // un-parse it basically.
-        //
-        // Unfortunately there's not really a great way to do that in a
-        // guaranteed lossless fashion right now. The fallback here is to just
-        // stringify the AST node and reparse it, but this loses all span
-        // information.
-        //
-        // As a result, some AST nodes are annotated with the token stream they
-        // came from. Here we attempt to extract these lossless token streams
-        // before we fall back to the stringification.
-        let tokens = match *self {
-            Nonterminal::NtItem(ref item) => {
-                prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
-            }
-            Nonterminal::NtTraitItem(ref item) => {
-                prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
-            }
-            Nonterminal::NtImplItem(ref item) => {
-                prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
-            }
-            Nonterminal::NtIdent(ident, is_raw) => {
-                Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into())
-            }
-            Nonterminal::NtLifetime(ident) => {
-                Some(TokenTree::token(Lifetime(ident.name), ident.span).into())
-            }
-            Nonterminal::NtTT(ref tt) => {
-                Some(tt.clone().into())
-            }
-            _ => None,
-        };
-
-        // FIXME(#43081): Avoid this pretty-print + reparse hack
-        let source = pprust::nonterminal_to_string(self);
-        let filename = FileName::macro_expansion_source_code(&source);
-        let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
-
-        // During early phases of the compiler the AST could get modified
-        // directly (e.g., attributes added or removed) and the internal cache
-        // of tokens my not be invalidated or updated. Consequently if the
-        // "lossless" token stream disagrees with our actual stringification
-        // (which has historically been much more battle-tested) then we go
-        // with the lossy stream anyway (losing span information).
-        //
-        // Note that the comparison isn't `==` here to avoid comparing spans,
-        // but it *also* is a "probable" equality which is a pretty weird
-        // definition. We mostly want to catch actual changes to the AST
-        // like a `#[cfg]` being processed or some weird `macro_rules!`
-        // expansion.
-        //
-        // What we *don't* want to catch is the fact that a user-defined
-        // literal like `0xf` is stringified as `15`, causing the cached token
-        // stream to not be literal `==` token-wise (ignoring spans) to the
-        // token stream we got from stringification.
-        //
-        // Instead the "probably equal" check here is "does each token
-        // recursively have the same discriminant?" We basically don't look at
-        // the token values here and assume that such fine grained token stream
-        // modifications, including adding/removing typically non-semantic
-        // tokens such as extra braces and commas, don't happen.
-        if let Some(tokens) = tokens {
-            if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
-                return tokens
-            }
-            info!("cached tokens found, but they're not \"probably equal\", \
-                   going with stringified version");
-        }
-        return tokens_for_real
-    }
-}
-
-fn prepend_attrs(sess: &ParseSess,
-                 attrs: &[ast::Attribute],
-                 tokens: Option<&tokenstream::TokenStream>,
-                 span: syntax_pos::Span)
-    -> Option<tokenstream::TokenStream>
-{
-    let tokens = tokens?;
-    if attrs.len() == 0 {
-        return Some(tokens.clone())
-    }
-    let mut builder = tokenstream::TokenStreamBuilder::new();
-    for attr in attrs {
-        assert_eq!(attr.style, ast::AttrStyle::Outer,
-                   "inner attributes should prevent cached tokens from existing");
-
-        let source = pprust::attribute_to_string(attr);
-        let macro_filename = FileName::macro_expansion_source_code(&source);
-        if attr.is_sugared_doc {
-            let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
-            builder.push(stream);
-            continue
-        }
-
-        // synthesize # [ $path $tokens ] manually here
-        let mut brackets = tokenstream::TokenStreamBuilder::new();
-
-        // For simple paths, push the identifier directly
-        if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
-            let ident = attr.path.segments[0].ident;
-            let token = Ident(ident.name, ident.as_str().starts_with("r#"));
-            brackets.push(tokenstream::TokenTree::token(token, ident.span));
-
-        // ... and for more complicated paths, fall back to a reparse hack that
-        // should eventually be removed.
-        } else {
-            let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
-            brackets.push(stream);
-        }
-
-        brackets.push(attr.tokens.clone());
-
-        // The span we list here for `#` and for `[ ... ]` are both wrong in
-        // that it encompasses more than each token, but it hopefully is "good
-        // enough" for now at least.
-        builder.push(tokenstream::TokenTree::token(Pound, attr.span));
-        let delim_span = DelimSpan::from_single(attr.span);
-        builder.push(tokenstream::TokenTree::Delimited(
-            delim_span, DelimToken::Bracket, brackets.build().into()));
-    }
-    builder.push(tokens.clone());
-    Some(builder.build())
-}