proc_macro: stop using a remote object handle for Ident

Doing this for all unicode identifiers would require a dependency on
`unicode-normalization` and `rustc_lexer`, which is currently not
possible for `proc_macro` due to it being built concurrently with `std`
and `core`. Instead, ASCII identifiers are validated locally, and an RPC
message is used to validate unicode identifiers when needed.

String values are interned on the both the server and client when
deserializing, to avoid unnecessary copies and keep Ident cheap to copy and
move. This appears to be important for performance.

The client-side interner is based roughly on the one from rustc_span, and uses
an arena inspired by rustc_arena.

RPC messages passing symbols always include the full value. This could
potentially be optimized in the future if it is revealed to be a
performance bottleneck.

Despite now having a relevant implementaion of Display for Ident, ToString is
still specialized, as it is a hot-path for this object.

The symbol infrastructure will also be used for literals in the next
part.
This commit is contained in:
Nika Layzell 2022-06-30 21:05:46 -04:00
parent e0dce6ec8d
commit 491fccfbe3
11 changed files with 441 additions and 114 deletions

View File

@ -11,13 +11,13 @@
use rustc_parse::parse_stream_from_source_str;
use rustc_session::parse::ParseSess;
use rustc_span::def_id::CrateNum;
use rustc_span::symbol::{self, kw, sym, Symbol};
use rustc_span::symbol::{self, sym, Symbol};
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Punct, TokenTree};
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Ident, Punct, TokenTree};
use pm::{Delimiter, Level, LineColumn};
use std::ascii;
use std::ops::Bound;
use std::{ascii, panic};
trait FromInternal<T> {
fn from_internal(x: T) -> Self;
@ -50,7 +50,7 @@ fn to_internal(self) -> token::Delimiter {
}
impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
for Vec<TokenTree<TokenStream, Span, Ident, Literal>>
for Vec<TokenTree<TokenStream, Span, Symbol, Literal>>
{
fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
use rustc_ast::token::*;
@ -135,13 +135,12 @@ fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
Question => op("?"),
SingleQuote => op("'"),
Ident(name, false) if name == kw::DollarCrate => trees.push(TokenTree::Ident(Ident::dollar_crate(span))),
Ident(name, is_raw) => trees.push(TokenTree::Ident(Ident::new(rustc.sess(), name, is_raw, span))),
Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })),
Lifetime(name) => {
let ident = symbol::Ident::new(name, span).without_first_quote();
trees.extend([
TokenTree::Punct(Punct { ch: b'\'', joint: true, span }),
TokenTree::Ident(Ident::new(rustc.sess(), ident.name, false, span)),
TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }),
]);
}
Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })),
@ -170,7 +169,7 @@ fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
}
Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => {
trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span)))
trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span }))
}
Interpolated(nt) => {
@ -200,11 +199,14 @@ fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
}
}
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
impl ToInternal<TokenStream>
for (TokenTree<TokenStream, Span, Symbol, Literal>, &mut Rustc<'_, '_>)
{
fn to_internal(self) -> TokenStream {
use rustc_ast::token::*;
let (ch, joint, span) = match self {
let (tree, rustc) = self;
let (ch, joint, span) = match tree {
TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span),
TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => {
return tokenstream::TokenTree::Delimited(
@ -215,6 +217,7 @@ fn to_internal(self) -> TokenStream {
.into();
}
TokenTree::Ident(self::Ident { sym, is_raw, span }) => {
rustc.sess().symbol_gallery.insert(sym, span);
return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into();
}
TokenTree::Literal(self::Literal {
@ -289,33 +292,6 @@ fn to_internal(self) -> rustc_errors::Level {
pub struct FreeFunctions;
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Ident {
sym: Symbol,
is_raw: bool,
span: Span,
}
impl Ident {
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
let sym = nfc_normalize(sym.as_str());
let string = sym.as_str();
if !rustc_lexer::is_ident(string) {
panic!("`{:?}` is not a valid identifier", string)
}
if is_raw && !sym.can_be_raw() {
panic!("`{}` cannot be a raw identifier", string);
}
sess.symbol_gallery.insert(sym, span);
Ident { sym, is_raw, span }
}
fn dollar_crate(span: Span) -> Ident {
// `$crate` is accepted as an ident only if it comes from the compiler.
Ident { sym: kw::DollarCrate, is_raw: false, span }
}
}
// FIXME(eddyb) `Literal` should not expose internal `Debug` impls.
#[derive(Clone, Debug)]
pub struct Literal {
@ -357,12 +333,12 @@ fn lit(&mut self, kind: token::LitKind, symbol: Symbol, suffix: Option<Symbol>)
impl server::Types for Rustc<'_, '_> {
type FreeFunctions = FreeFunctions;
type TokenStream = TokenStream;
type Ident = Ident;
type Literal = Literal;
type SourceFile = Lrc<SourceFile>;
type MultiSpan = Vec<Span>;
type Diagnostic = Diagnostic;
type Span = Span;
type Symbol = Symbol;
}
impl server::FreeFunctions for Rustc<'_, '_> {
@ -453,22 +429,22 @@ fn expand_expr(&mut self, stream: &Self::TokenStream) -> Result<Self::TokenStrea
fn from_token_tree(
&mut self,
tree: TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>,
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>,
) -> Self::TokenStream {
tree.to_internal()
(tree, &mut *self).to_internal()
}
fn concat_trees(
&mut self,
base: Option<Self::TokenStream>,
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>>,
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>>,
) -> Self::TokenStream {
let mut builder = tokenstream::TokenStreamBuilder::new();
if let Some(base) = base {
builder.push(base);
}
for tree in trees {
builder.push(tree.to_internal());
builder.push((tree, &mut *self).to_internal());
}
builder.build()
}
@ -491,25 +467,11 @@ fn concat_streams(
fn into_trees(
&mut self,
stream: Self::TokenStream,
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>> {
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>> {
FromInternal::from_internal((stream, self))
}
}
impl server::Ident for Rustc<'_, '_> {
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
Ident::new(self.sess(), Symbol::intern(string), is_raw, span)
}
fn span(&mut self, ident: Self::Ident) -> Self::Span {
ident.span
}
fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident {
Ident { span, ..ident }
}
}
impl server::Literal for Rustc<'_, '_> {
fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> {
let name = FileName::proc_macro_source_code(s);
@ -812,6 +774,13 @@ fn recover_proc_macro_span(&mut self, id: usize) -> Self::Span {
}
}
impl server::Symbol for Rustc<'_, '_> {
fn normalize_and_validate_ident(&mut self, string: &str) -> Result<Self::Symbol, ()> {
let sym = nfc_normalize(string);
if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) }
}
}
impl server::Server for Rustc<'_, '_> {
fn globals(&mut self) -> ExpnGlobals<Self::Span> {
ExpnGlobals {
@ -820,4 +789,12 @@ fn globals(&mut self) -> ExpnGlobals<Self::Span> {
mixed_site: self.mixed_site,
}
}
fn intern_symbol(string: &str) -> Self::Symbol {
Symbol::intern(string)
}
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
f(&symbol.as_str())
}
}

View File

@ -0,0 +1,113 @@
//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
//!
//! This is unfortunately a minimal re-implementation rather than a dependency
//! as it is difficult to depend on crates from within `proc_macro`, due to it
//! being built at the same time as `std`.
use std::cell::{Cell, RefCell};
use std::cmp;
use std::mem::MaybeUninit;
use std::ops::Range;
use std::ptr;
use std::slice;
use std::str;
// The arenas start with PAGE-sized chunks, and then each new chunk is twice as
// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon
// we stop growing. This scales well, from arenas that are barely used up to
// arenas that are used for 100s of MiBs. Note also that the chosen sizes match
// the usual sizes of pages and huge pages on Linux.
const PAGE: usize = 4096;
const HUGE_PAGE: usize = 2 * 1024 * 1024;
/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
///
/// This is unfortunately a complete re-implementation rather than a dependency
/// as it is difficult to depend on crates from within `proc_macro`, due to it
/// being built at the same time as `std`.
///
/// This arena doesn't have support for allocating anything other than byte
/// slices, as that is all that is necessary.
pub(crate) struct Arena {
start: Cell<*mut MaybeUninit<u8>>,
end: Cell<*mut MaybeUninit<u8>>,
chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>,
}
impl Arena {
pub(crate) fn new() -> Self {
Arena {
start: Cell::new(ptr::null_mut()),
end: Cell::new(ptr::null_mut()),
chunks: RefCell::new(Vec::new()),
}
}
/// Add a new chunk with at least `additional` free bytes.
#[inline(never)]
#[cold]
fn grow(&self, additional: usize) {
let mut chunks = self.chunks.borrow_mut();
let mut new_cap;
if let Some(last_chunk) = chunks.last_mut() {
// If the previous chunk's len is less than HUGE_PAGE
// bytes, then this chunk will be least double the previous
// chunk's size.
new_cap = last_chunk.len().min(HUGE_PAGE / 2);
new_cap *= 2;
} else {
new_cap = PAGE;
}
// Also ensure that this chunk can fit `additional`.
new_cap = cmp::max(additional, new_cap);
let mut chunk = Box::new_uninit_slice(new_cap);
let Range { start, end } = chunk.as_mut_ptr_range();
self.start.set(start);
self.end.set(end);
chunks.push(chunk);
}
/// Allocates a byte slice with specified size from the current memory
/// chunk. Returns `None` if there is no free space left to satisfy the
/// request.
fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> {
let start = self.start.get().addr();
let old_end = self.end.get();
let end = old_end.addr();
let new_end = end.checked_sub(bytes)?;
if start <= new_end {
let new_end = old_end.with_addr(new_end);
self.end.set(new_end);
// SAFETY: `bytes` bytes starting at `new_end` were just reserved.
Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) })
} else {
None
}
}
fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] {
if bytes == 0 {
return &mut [];
}
loop {
if let Some(a) = self.alloc_raw_without_grow(bytes) {
break a;
}
// No free space left. Allocate a new chunk to satisfy the request.
// On failure the grow will panic or abort.
self.grow(bytes);
}
}
pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str {
let alloc = self.alloc_raw(string.len());
let bytes = MaybeUninit::write_slice(alloc, string.as_bytes());
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
// and immediately convert the clone back to `&str`.
unsafe { str::from_utf8_unchecked_mut(bytes) }
}
}

View File

@ -181,7 +181,6 @@ fn decode(r: &mut Reader<'_>, s: &mut S) -> Self {
Diagnostic,
'interned:
Ident,
Span,
}
@ -242,6 +241,8 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
}
}
pub(crate) use super::symbol::Symbol;
macro_rules! define_client_side {
($($name:ident {
$(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)*
@ -405,6 +406,9 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
panic::catch_unwind(panic::AssertUnwindSafe(|| {
maybe_install_panic_hook(force_show_panics);
// Make sure the symbol store is empty before decoding inputs.
Symbol::invalidate_all();
let reader = &mut &buf[..];
let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ());
@ -438,6 +442,10 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
buf.clear();
Err::<(), _>(e).encode(&mut buf, &mut ());
});
// Now that a response has been serialized, invalidate all symbols
// registered with the interner.
Symbol::invalidate_all();
buf
}

View File

@ -65,11 +65,11 @@ macro_rules! with_api {
fn from_str(src: &str) -> $S::TokenStream;
fn to_string($self: &$S::TokenStream) -> String;
fn from_token_tree(
tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>,
tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>,
) -> $S::TokenStream;
fn concat_trees(
base: Option<$S::TokenStream>,
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>,
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>,
) -> $S::TokenStream;
fn concat_streams(
base: Option<$S::TokenStream>,
@ -77,12 +77,7 @@ fn concat_streams(
) -> $S::TokenStream;
fn into_trees(
$self: $S::TokenStream
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>;
},
Ident {
fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident;
fn span($self: $S::Ident) -> $S::Span;
fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident;
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>;
},
Literal {
fn drop($self: $S::Literal);
@ -146,6 +141,9 @@ fn sub(
fn save_span($self: $S::Span) -> usize;
fn recover_proc_macro_span(id: usize) -> $S::Span;
},
Symbol {
fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>;
},
}
};
}
@ -170,6 +168,8 @@ macro_rules! reverse_decode {
}
}
#[allow(unsafe_code)]
mod arena;
#[allow(unsafe_code)]
mod buffer;
#[forbid(unsafe_code)]
@ -189,6 +189,8 @@ macro_rules! reverse_decode {
mod selfless_reify;
#[forbid(unsafe_code)]
pub mod server;
#[allow(unsafe_code)]
mod symbol;
use buffer::Buffer;
pub use rpc::PanicMessage;
@ -466,16 +468,25 @@ pub struct Punct<Span> {
compound_traits!(struct Punct<Span> { ch, joint, span });
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct Ident<Span, Symbol> {
pub sym: Symbol,
pub is_raw: bool,
pub span: Span,
}
compound_traits!(struct Ident<Span, Symbol> { sym, is_raw, span });
#[derive(Clone)]
pub enum TokenTree<TokenStream, Span, Ident, Literal> {
pub enum TokenTree<TokenStream, Span, Symbol, Literal> {
Group(Group<TokenStream, Span>),
Punct(Punct<Span>),
Ident(Ident),
Ident(Ident<Span, Symbol>),
Literal(Literal),
}
compound_traits!(
enum TokenTree<TokenStream, Span, Ident, Literal> {
enum TokenTree<TokenStream, Span, Symbol, Literal> {
Group(tt),
Punct(tt),
Ident(tt),

View File

@ -8,12 +8,12 @@
pub trait Types {
type FreeFunctions: 'static;
type TokenStream: 'static + Clone;
type Ident: 'static + Copy + Eq + Hash;
type Literal: 'static + Clone;
type SourceFile: 'static + Clone;
type MultiSpan: 'static;
type Diagnostic: 'static;
type Span: 'static + Copy + Eq + Hash;
type Symbol: 'static;
}
/// Declare an associated fn of one of the traits below, adding necessary
@ -38,6 +38,12 @@ macro_rules! declare_server_traits {
pub trait Server: Types $(+ $name)* {
fn globals(&mut self) -> ExpnGlobals<Self::Span>;
/// Intern a symbol received from RPC
fn intern_symbol(ident: &str) -> Self::Symbol;
/// Recover the string value of a symbol, and invoke a callback with it.
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str));
}
}
}
@ -49,6 +55,12 @@ impl<S: Server> Server for MarkedTypes<S> {
fn globals(&mut self) -> ExpnGlobals<Self::Span> {
<_>::mark(Server::globals(&mut self.0))
}
fn intern_symbol(ident: &str) -> Self::Symbol {
<_>::mark(S::intern_symbol(ident))
}
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
S::with_symbol_string(symbol.unmark(), f)
}
}
macro_rules! define_mark_types_impls {
@ -81,11 +93,13 @@ macro_rules! define_dispatcher_impl {
pub trait DispatcherTrait {
// HACK(eddyb) these are here to allow `Self::$name` to work below.
$(type $name;)*
fn dispatch(&mut self, buf: Buffer) -> Buffer;
}
impl<S: Server> DispatcherTrait for Dispatcher<MarkedTypes<S>> {
$(type $name = <MarkedTypes<S> as Types>::$name;)*
fn dispatch(&mut self, mut buf: Buffer) -> Buffer {
let Dispatcher { handle_store, server } = self;

View File

@ -0,0 +1,205 @@
//! Client-side interner used for symbols.
//!
//! This is roughly based on the symbol interner from `rustc_span` and the
//! DroplessArena from `rustc_arena`. It is unfortunately a complete
//! copy/re-implementation rather than a dependency as it is difficult to depend
//! on crates from within `proc_macro`, due to it being built at the same time
//! as `std`.
//!
//! If at some point in the future it becomes easier to add dependencies to
//! proc_macro, this module should probably be removed or simplified.
use std::cell::RefCell;
use std::num::NonZeroU32;
use std::str;
use super::*;
/// Handle for a symbol string stored within the Interner.
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Symbol(NonZeroU32);
impl !Send for Symbol {}
impl !Sync for Symbol {}
impl Symbol {
/// Intern a new `Symbol`
pub(crate) fn new(string: &str) -> Self {
INTERNER.with_borrow_mut(|i| i.intern(string))
}
/// Create a new `Symbol` for an identifier.
///
/// Validates and normalizes before converting it to a symbol.
pub(crate) fn new_ident(string: &str, is_raw: bool) -> Self {
// Fast-path: check if this is a valid ASCII identifier
if Self::is_valid_ascii_ident(string.as_bytes()) {
if is_raw && !Self::can_be_raw(string) {
panic!("`{}` cannot be a raw identifier", string);
}
return Self::new(string);
}
// Slow-path: If the string is already ASCII we're done, otherwise ask
// our server to do this for us over RPC.
// We don't need to check for identifiers which can't be raw here,
// because all of them are ASCII.
if string.is_ascii() {
Err(())
} else {
client::Symbol::normalize_and_validate_ident(string)
}
.unwrap_or_else(|_| panic!("`{:?}` is not a valid identifier", string))
}
/// Run a callback with the symbol's string value.
pub(crate) fn with<R>(self, f: impl FnOnce(&str) -> R) -> R {
INTERNER.with_borrow(|i| f(i.get(self)))
}
/// Clear out the thread-local symbol interner, making all previously
/// created symbols invalid such that `with` will panic when called on them.
pub(crate) fn invalidate_all() {
INTERNER.with_borrow_mut(|i| i.clear());
}
/// Check if the ident is a valid ASCII identifier.
///
/// This is a short-circuit which is cheap to implement within the
/// proc-macro client to avoid RPC when creating simple idents, but may
/// return `false` for a valid identifier if it contains non-ASCII
/// characters.
fn is_valid_ascii_ident(bytes: &[u8]) -> bool {
matches!(bytes.first(), Some(b'_' | b'a'..=b'z' | b'A'..=b'Z'))
&& bytes[1..]
.iter()
.all(|b| matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'))
}
// Mimics the behaviour of `Symbol::can_be_raw` from `rustc_span`
fn can_be_raw(string: &str) -> bool {
match string {
"_" | "super" | "self" | "Self" | "crate" => false,
_ => true,
}
}
}
impl fmt::Debug for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.with(|s| fmt::Debug::fmt(s, f))
}
}
impl ToString for Symbol {
fn to_string(&self) -> String {
self.with(|s| s.to_owned())
}
}
impl fmt::Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.with(|s| fmt::Display::fmt(s, f))
}
}
impl<S> Encode<S> for Symbol {
fn encode(self, w: &mut Writer, s: &mut S) {
self.with(|sym| sym.encode(w, s))
}
}
impl<S: server::Server> DecodeMut<'_, '_, client::HandleStore<server::MarkedTypes<S>>>
for Marked<S::Symbol, Symbol>
{
fn decode(r: &mut Reader<'_>, s: &mut client::HandleStore<server::MarkedTypes<S>>) -> Self {
Mark::mark(S::intern_symbol(<&str>::decode(r, s)))
}
}
impl<S: server::Server> Encode<client::HandleStore<server::MarkedTypes<S>>>
for Marked<S::Symbol, Symbol>
{
fn encode(self, w: &mut Writer, s: &mut client::HandleStore<server::MarkedTypes<S>>) {
S::with_symbol_string(&self.unmark(), |sym| sym.encode(w, s))
}
}
impl<S> DecodeMut<'_, '_, S> for Symbol {
fn decode(r: &mut Reader<'_>, s: &mut S) -> Self {
Symbol::new(<&str>::decode(r, s))
}
}
thread_local! {
static INTERNER: RefCell<Interner> = RefCell::new(Interner {
arena: arena::Arena::new(),
names: fxhash::FxHashMap::default(),
strings: Vec::new(),
// Start with a base of 1 to make sure that `NonZeroU32` works.
sym_base: NonZeroU32::new(1).unwrap(),
});
}
/// Basic interner for a `Symbol`, inspired by the one in `rustc_span`.
struct Interner {
arena: arena::Arena,
// SAFETY: These `'static` lifetimes are actually references to data owned
// by the Arena. This is safe, as we never return them as static references
// from `Interner`.
names: fxhash::FxHashMap<&'static str, Symbol>,
strings: Vec<&'static str>,
// The offset to apply to symbol names stored in the interner. This is used
// to ensure that symbol names are not re-used after the interner is
// cleared.
sym_base: NonZeroU32,
}
impl Interner {
fn intern(&mut self, string: &str) -> Symbol {
if let Some(&name) = self.names.get(string) {
return name;
}
let name = Symbol(
self.sym_base
.checked_add(self.strings.len() as u32)
.expect("`proc_macro` symbol name overflow"),
);
let string: &str = self.arena.alloc_str(string);
// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
self.strings.push(string);
self.names.insert(string, name);
name
}
/// Read a symbol's value from the store while it is held.
fn get(&self, symbol: Symbol) -> &str {
// NOTE: Subtract out the offset which was added to make the symbol
// nonzero and prevent symbol name re-use.
let name = symbol
.0
.get()
.checked_sub(self.sym_base.get())
.expect("use-after-free of `proc_macro` symbol");
self.strings[name as usize]
}
/// Clear all symbols from the store, invalidating them such that `get` will
/// panic if they are accessed in the future.
fn clear(&mut self) {
// NOTE: Be careful not to panic here, as we may be called on the client
// when a `catch_unwind` isn't installed.
self.sym_base = self.sym_base.saturating_add(self.strings.len() as u32);
self.names.clear();
self.strings.clear();
// SAFETY: This is cleared after the names and strings tables are
// cleared out, so no references into the arena should remain.
self.arena = arena::Arena::new();
}
}

View File

@ -24,10 +24,14 @@
#![feature(staged_api)]
#![feature(allow_internal_unstable)]
#![feature(decl_macro)]
#![feature(local_key_cell_methods)]
#![feature(maybe_uninit_write_slice)]
#![feature(negative_impls)]
#![feature(new_uninit)]
#![feature(restricted_std)]
#![feature(rustc_attrs)]
#![feature(min_specialization)]
#![feature(strict_provenance)]
#![recursion_limit = "256"]
#[unstable(feature = "proc_macro_internals", issue = "27812")]
@ -214,7 +218,7 @@ fn tree_to_bridge_tree(
) -> bridge::TokenTree<
bridge::client::TokenStream,
bridge::client::Span,
bridge::client::Ident,
bridge::client::Symbol,
bridge::client::Literal,
> {
match tree {
@ -240,7 +244,7 @@ struct ConcatTreesHelper {
bridge::TokenTree<
bridge::client::TokenStream,
bridge::client::Span,
bridge::client::Ident,
bridge::client::Symbol,
bridge::client::Literal,
>,
>,
@ -367,7 +371,7 @@ pub struct IntoIter(
bridge::TokenTree<
bridge::client::TokenStream,
bridge::client::Span,
bridge::client::Ident,
bridge::client::Symbol,
bridge::client::Literal,
>,
>,
@ -1048,7 +1052,7 @@ fn eq(&self, rhs: &Punct) -> bool {
/// An identifier (`ident`).
#[derive(Clone)]
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub struct Ident(bridge::client::Ident);
pub struct Ident(bridge::Ident<bridge::client::Span, bridge::client::Symbol>);
impl Ident {
/// Creates a new `Ident` with the given `string` as well as the specified
@ -1072,7 +1076,11 @@ impl Ident {
/// tokens, requires a `Span` to be specified at construction.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn new(string: &str, span: Span) -> Ident {
Ident(bridge::client::Ident::new(string, span.0, false))
Ident(bridge::Ident {
sym: bridge::client::Symbol::new_ident(string, false),
is_raw: false,
span: span.0,
})
}
/// Same as `Ident::new`, but creates a raw identifier (`r#ident`).
@ -1081,38 +1089,45 @@ pub fn new(string: &str, span: Span) -> Ident {
/// (e.g. `self`, `super`) are not supported, and will cause a panic.
#[stable(feature = "proc_macro_raw_ident", since = "1.47.0")]
pub fn new_raw(string: &str, span: Span) -> Ident {
Ident(bridge::client::Ident::new(string, span.0, true))
Ident(bridge::Ident {
sym: bridge::client::Symbol::new_ident(string, true),
is_raw: true,
span: span.0,
})
}
/// Returns the span of this `Ident`, encompassing the entire string returned
/// by [`to_string`](Self::to_string).
/// by [`to_string`](ToString::to_string).
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn span(&self) -> Span {
Span(self.0.span())
Span(self.0.span)
}
/// Configures the span of this `Ident`, possibly changing its hygiene context.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn set_span(&mut self, span: Span) {
self.0 = self.0.with_span(span.0);
self.0.span = span.0;
}
}
// N.B., the bridge only provides `to_string`, implement `fmt::Display`
// based on it (the reverse of the usual relationship between the two).
#[stable(feature = "proc_macro_lib", since = "1.15.0")]
/// Converts the identifier to a string that should be losslessly convertible
/// back into the same identifier.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
impl ToString for Ident {
fn to_string(&self) -> String {
TokenStream::from(TokenTree::from(self.clone())).to_string()
self.0.sym.with(|sym| if self.0.is_raw { ["r#", sym].concat() } else { sym.to_owned() })
}
}
/// Prints the identifier as a string that should be losslessly convertible
/// back into the same identifier.
/// Prints the identifier as a string that should be losslessly convertible back
/// into the same identifier.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.to_string())
if self.0.is_raw {
f.write_str("r#")?;
}
fmt::Display::fmt(&self.0.sym, f)
}
}

View File

@ -1,17 +1,9 @@
// aux-build:invalid-punct-ident.rs
// rustc-env:RUST_BACKTRACE=0
// FIXME https://github.com/rust-lang/rust/issues/59998
// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> ""
// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> ""
// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> ""
// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> ""
// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> ""
// normalize-stderr-test "note: compiler flags.*\n\n" -> ""
// normalize-stderr-test "note: rustc.*running on.*\n\n" -> ""
// normalize-stderr-test "query stack during panic:\n" -> ""
// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> ""
// normalize-stderr-test "end of query stack\n" -> ""
// ignore-stage1
// only-linux
//
// FIXME: This should be a normal (stage1, all platforms) test in
// src/test/ui/proc-macro once issue #59998 is fixed.
#[macro_use]
extern crate invalid_punct_ident;

View File

@ -1,5 +1,5 @@
error: proc macro panicked
--> $DIR/invalid-punct-ident-2.rs:19:1
--> $DIR/invalid-punct-ident-2.rs:11:1
|
LL | invalid_ident!();
| ^^^^^^^^^^^^^^^^

View File

@ -1,17 +1,9 @@
// aux-build:invalid-punct-ident.rs
// rustc-env:RUST_BACKTRACE=0
// FIXME https://github.com/rust-lang/rust/issues/59998
// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> ""
// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> ""
// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> ""
// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> ""
// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> ""
// normalize-stderr-test "note: compiler flags.*\n\n" -> ""
// normalize-stderr-test "note: rustc.*running on.*\n\n" -> ""
// normalize-stderr-test "query stack during panic:\n" -> ""
// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> ""
// normalize-stderr-test "end of query stack\n" -> ""
// ignore-stage1
// only-linux
//
// FIXME: This should be a normal (stage1, all platforms) test in
// src/test/ui/proc-macro once issue #59998 is fixed.
#[macro_use]
extern crate invalid_punct_ident;

View File

@ -1,5 +1,5 @@
error: proc macro panicked
--> $DIR/invalid-punct-ident-3.rs:19:1
--> $DIR/invalid-punct-ident-3.rs:11:1
|
LL | invalid_raw_ident!();
| ^^^^^^^^^^^^^^^^^^^^