Reserve prefixed identifiers and string literals (RFC 3101)

This commit denies any identifiers immediately followed by
one of three tokens `"`, `'` or `#`, which is stricter than
the requirements of RFC 3101 but may be necessary according
to the discussion at [Zulip].

[Zulip]: https://rust-lang.zulipchat.com/#narrow/stream/268952-edition-2021/topic/reserved.20prefixes/near/238470099
This commit is contained in:
lrh2000 2021-05-16 11:10:05 +08:00
parent 831ae3c136
commit 8dee9bc8fc
5 changed files with 172 additions and 7 deletions

View File

@ -66,6 +66,8 @@ pub enum TokenKind {
Ident,
/// "r#ident"
RawIdent,
/// `foo#`, `foo'`, `foo"`. Note the tailer is not included.
BadPrefix,
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
Literal { kind: LiteralKind, suffix_start: usize },
/// "'a"
@ -323,7 +325,7 @@ impl Cursor<'_> {
let kind = RawStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
_ => self.ident_or_bad_prefix(),
},
// Byte literal, byte string literal, raw byte string literal or identifier.
@ -358,12 +360,12 @@ impl Cursor<'_> {
let kind = RawByteStr { n_hashes, err };
Literal { kind, suffix_start }
}
_ => self.ident(),
_ => self.ident_or_bad_prefix(),
},
// Identifier (this should be checked after other variant that can
// start as identifier).
c if is_id_start(c) => self.ident(),
c if is_id_start(c) => self.ident_or_bad_prefix(),
// Numeric literal.
c @ '0'..='9' => {
@ -487,11 +489,16 @@ impl Cursor<'_> {
RawIdent
}
fn ident(&mut self) -> TokenKind {
fn ident_or_bad_prefix(&mut self) -> TokenKind {
debug_assert!(is_id_start(self.prev()));
// Start is already eaten, eat the rest of identifier.
self.eat_while(is_id_continue);
Ident
// Good prefixes must have been handled eariler. So if
// we see a prefix here, it is definitely a bad prefix.
match self.first() {
'#' | '"' | '\'' => BadPrefix,
_ => Ident,
}
}
fn number(&mut self, first_digit: char) -> LiteralKind {

View File

@ -5,6 +5,7 @@ use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PRe
use rustc_lexer::unescape::{self, Mode};
use rustc_lexer::{Base, DocStyle, RawStrError};
use rustc_session::parse::ParseSess;
use rustc_span::edition::Edition;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::{BytePos, Pos, Span};
@ -166,12 +167,18 @@ impl<'a> StringReader<'a> {
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
}
rustc_lexer::TokenKind::Whitespace => return None,
rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
rustc_lexer::TokenKind::Ident
| rustc_lexer::TokenKind::RawIdent
| rustc_lexer::TokenKind::BadPrefix => {
let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
let is_bad_prefix = token == rustc_lexer::TokenKind::BadPrefix;
let mut ident_start = start;
if is_raw_ident {
ident_start = ident_start + BytePos(2);
}
if is_bad_prefix {
self.report_reserved_prefix(start);
}
let sym = nfc_normalize(self.str_from(ident_start));
let span = self.mk_sp(start, self.pos);
self.sess.symbol_gallery.insert(sym, span);
@ -491,6 +498,29 @@ impl<'a> StringReader<'a> {
FatalError.raise()
}
fn report_reserved_prefix(&self, start: BytePos) {
// See RFC 3101.
if self.sess.edition < Edition::Edition2021 {
return;
}
let mut err = self.sess.span_diagnostic.struct_span_err(
self.mk_sp(start, self.pos),
&format!("prefix `{}` is unknown", self.str_from_to(start, self.pos)),
);
err.span_label(self.mk_sp(start, self.pos), "unknown prefix");
err.span_label(
self.mk_sp(self.pos, self.pos),
&format!(
"help: consider inserting a whitespace before this `{}`",
self.str_from_to(self.pos, self.pos + BytePos(1)),
),
);
err.note("prefixed identifiers and string literals are reserved since Rust 2021");
err.emit();
}
/// Note: It was decided to not add a test case, because it would be too big.
/// <https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180>
fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {

View File

@ -413,7 +413,7 @@ impl<'a> Classifier<'a> {
},
c => c,
},
TokenKind::RawIdent => Class::Ident,
TokenKind::RawIdent | TokenKind::BadPrefix => Class::Ident,
TokenKind::Lifetime { .. } => Class::Lifetime,
};
// Anything that didn't return above is the simple case where we the

View File

@ -0,0 +1,36 @@
// compile-flags: -Z unstable-options --edition 2021
macro_rules! demo2 {
( $a:tt $b:tt ) => { println!("two tokens") };
}
macro_rules! demo3 {
( $a:tt $b:tt $c:tt ) => { println!("three tokens") };
}
macro_rules! demo4 {
( $a:tt $b:tt $c:tt $d:tt ) => { println!("four tokens") };
}
fn main() {
demo3!(foo#bar); //~ ERROR prefix `foo` is unknown
demo2!(foo"bar"); //~ ERROR prefix `foo` is unknown
demo2!(foo'b'); //~ ERROR prefix `foo` is unknown
demo2!(foo'b); //~ ERROR prefix `foo` is unknown
demo3!(foo# bar); //~ ERROR prefix `foo` is unknown
demo4!(foo#! bar); //~ ERROR prefix `foo` is unknown
demo4!(foo## bar); //~ ERROR prefix `foo` is unknown
demo4!(foo#bar#);
//~^ ERROR prefix `foo` is unknown
//~| ERROR prefix `bar` is unknown
demo3!(foo # bar);
demo3!(foo #bar);
demo4!(foo!#bar);
demo4!(foo ##bar);
demo3!(r"foo"#bar);
demo3!(r#foo#bar);
}

View File

@ -0,0 +1,92 @@
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:16:12
|
LL | demo3!(foo#bar);
| ^^^- help: consider inserting a whitespace before this `#`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:17:12
|
LL | demo2!(foo"bar");
| ^^^- help: consider inserting a whitespace before this `"`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:18:12
|
LL | demo2!(foo'b');
| ^^^- help: consider inserting a whitespace before this `'`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:20:12
|
LL | demo2!(foo'b);
| ^^^- help: consider inserting a whitespace before this `'`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:21:12
|
LL | demo3!(foo# bar);
| ^^^- help: consider inserting a whitespace before this `#`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:22:12
|
LL | demo4!(foo#! bar);
| ^^^- help: consider inserting a whitespace before this `#`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:23:12
|
LL | demo4!(foo## bar);
| ^^^- help: consider inserting a whitespace before this `#`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `foo` is unknown
--> $DIR/reserved-prefixes.rs:25:12
|
LL | demo4!(foo#bar#);
| ^^^- help: consider inserting a whitespace before this `#`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: prefix `bar` is unknown
--> $DIR/reserved-prefixes.rs:25:16
|
LL | demo4!(foo#bar#);
| ^^^- help: consider inserting a whitespace before this `#`
| |
| unknown prefix
|
= note: prefixed identifiers and string literals are reserved since Rust 2021
error: aborting due to 9 previous errors