Revert "Don't recover lifetimes/labels containing emojis as character literals"

Reverts PR 
Fixes (doesnt close until beta backported) 

This reverts commit e3f9db5fc319c6d8eee5d47d216ea6a426070c41.
This reverts commit 98b82aedba3f3f581e89df54352914b27f42c6f7.
This reverts commit 380fa264132ad481e73cbbf0f3a0feefd99a1d78.
This commit is contained in:
Michael Goulet 2023-04-10 06:52:18 +00:00
parent 3c2e2dd5c5
commit a047064d6b
8 changed files with 17 additions and 178 deletions

@ -473,8 +473,6 @@ pub enum StashKey {
/// When an invalid lifetime e.g. `'2` should be reinterpreted
/// as a char literal in the parser
LifetimeIsChar,
/// When an invalid lifetime e.g. `'🐱` contains emoji.
LifetimeContainsEmoji,
/// Maybe there was a typo where a comma was forgotten before
/// FRU syntax
MaybeFruTypo,

@ -95,7 +95,7 @@ pub enum TokenKind {
Literal { kind: LiteralKind, suffix_start: u32 },
/// "'a"
Lifetime { starts_with_number: bool, contains_emoji: bool },
Lifetime { starts_with_number: bool },
// One-char tokens:
/// ";"
@ -632,13 +632,7 @@ impl Cursor<'_> {
// If the first symbol is valid for identifier, it can be a lifetime.
// Also check if it's a number for a better error reporting (so '0 will
// be reported as invalid lifetime and not as unterminated char literal).
// We also have to account for potential `'🐱` emojis to avoid reporting
// it as an unterminated char literal.
is_id_start(self.first())
|| self.first().is_digit(10)
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
|| unic_emoji_char::is_emoji(self.first())
is_id_start(self.first()) || self.first().is_digit(10)
};
if !can_be_a_lifetime {
@ -651,33 +645,16 @@ impl Cursor<'_> {
return Literal { kind, suffix_start };
}
// Either a lifetime or a character literal.
// Either a lifetime or a character literal with
// length greater than 1.
let starts_with_number = self.first().is_digit(10);
let mut contains_emoji = false;
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
if unic_emoji_char::is_emoji(self.first()) {
contains_emoji = true;
} else {
// Skip the literal contents.
// First symbol can be a number (which isn't a valid identifier start),
// so skip it without any checks.
self.bump();
}
self.eat_while(|c| {
if is_id_continue(c) {
true
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
} else if unic_emoji_char::is_emoji(c) {
contains_emoji = true;
true
} else {
false
}
});
// Skip the literal contents.
// First symbol can be a number (which isn't a valid identifier start),
// so skip it without any checks.
self.bump();
self.eat_while(is_id_continue);
// Check if after skipping literal contents we've met a closing
// single quote (which means that user attempted to create a
@ -687,7 +664,7 @@ impl Cursor<'_> {
let kind = Char { terminated: true };
Literal { kind, suffix_start: self.pos_within_token() }
} else {
Lifetime { starts_with_number, contains_emoji }
Lifetime { starts_with_number }
}
}

@ -235,7 +235,7 @@ fn lifetime() {
check_lexing(
"'abc",
expect![[r#"
Token { kind: Lifetime { starts_with_number: false, contains_emoji: false }, len: 4 }
Token { kind: Lifetime { starts_with_number: false }, len: 4 }
"#]],
);
}

@ -223,21 +223,16 @@ impl<'a> StringReader<'a> {
};
token::Literal(token::Lit { kind, symbol, suffix })
}
rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => {
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
if starts_with_number {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number");
let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeIsChar);
} else if contains_emoji {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeContainsEmoji);
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident)

@ -1,45 +0,0 @@
#![allow(unused_labels)]
// FIXME(#108019): outdated Unicode table
// fn foo() {
// '🥺 loop {
// break
// }
// }
fn bar() {
'🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot contain emojis
break
}
}
fn qux() {
'a🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot contain emojis
break
}
}
fn quux() {
'1🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot start with a number
break
}
}
fn x<'🐱>() -> &'🐱 () {
//~^ ERROR lifetimes or labels cannot contain emojis
//~| ERROR lifetimes or labels cannot contain emojis
&()
}
fn y() {
'a🐱: loop {}
//~^ ERROR lifetimes or labels cannot contain emojis
}
fn main() {}

@ -1,86 +0,0 @@
error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
LL | '🐱 loop {
| ^--- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
LL | 'a🐱 loop {
| ^---- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
LL | '1🐱 loop {
| ^---- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them
error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
LL | '🐱 loop {
| ^^^
error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
LL | 'a🐱 loop {
| ^^^^
error: lifetimes or labels cannot start with a number
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
LL | '1🐱 loop {
| ^^^^
error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:6
|
LL | fn x<'🐱>() -> &'🐱 () {
| ^^^
error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:16
|
LL | fn x<'🐱>() -> &'🐱 () {
| ^^^
error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:41:5
|
LL | 'a🐱: loop {}
| ^^^^
error: aborting due to 9 previous errors

@ -1,6 +1,6 @@
struct S<'1> { s: &'1 usize }
//~^ ERROR lifetimes or labels cannot start with a number
//~| ERROR lifetimes or labels cannot start with a number
//~^ ERROR lifetimes cannot start with a number
//~| ERROR lifetimes cannot start with a number
fn main() {
// verify that the parse error doesn't stop type checking
let x: usize = "";

@ -6,13 +6,13 @@ LL | let x: usize = "";
| |
| expected due to this
error: lifetimes or labels cannot start with a number
error: lifetimes cannot start with a number
--> $DIR/numeric-lifetime.rs:1:10
|
LL | struct S<'1> { s: &'1 usize }
| ^^
error: lifetimes or labels cannot start with a number
error: lifetimes cannot start with a number
--> $DIR/numeric-lifetime.rs:1:20
|
LL | struct S<'1> { s: &'1 usize }