diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index 7b781ba1e11..852d49fc5b6 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -48,6 +48,9 @@ impl LitKind { return Err(LitError::InvalidSuffix); } + // For byte/char/string literals, chars and escapes have already been + // checked in the lexer (in `cook_lexer_literal`). So we can assume all + // chars and escapes are valid here. Ok(match kind { token::Bool => { assert!(symbol.is_bool_lit()); @@ -56,12 +59,12 @@ impl LitKind { token::Byte => { return unescape_byte(symbol.as_str()) .map(LitKind::Byte) - .map_err(|_| LitError::LexerError); + .map_err(|_| panic!("failed to unescape byte literal")); } token::Char => { return unescape_char(symbol.as_str()) .map(LitKind::Char) - .map_err(|_| LitError::LexerError); + .map_err(|_| panic!("failed to unescape char literal")); } // There are some valid suffixes for integer and float literals, @@ -77,26 +80,22 @@ impl LitKind { let s = symbol.as_str(); // Vanilla strings are so common we optimize for the common case where no chars // requiring special behaviour are present. - let symbol = if s.contains(['\\', '\r']) { + let symbol = if s.contains('\\') { let mut buf = String::with_capacity(s.len()); - let mut error = Ok(()); // Force-inlining here is aggressive but the closure is - // called on every char in the string, so it can be - // hot in programs with many long strings. + // called on every char in the string, so it can be hot in + // programs with many long strings containing escapes. unescape_literal( s, Mode::Str, &mut #[inline(always)] - |_, unescaped_char| match unescaped_char { + |_, c| match c { Ok(c) => buf.push(c), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + assert!(!err.is_fatal(), "failed to unescape string literal") } }, ); - error?; Symbol::intern(&buf) } else { symbol @@ -104,86 +103,46 @@ impl LitKind { LitKind::Str(symbol, ast::StrStyle::Cooked) } token::StrRaw(n) => { - // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can reuse the symbol on success. - let mut error = Ok(()); - unescape_literal(symbol.as_str(), Mode::RawStr, &mut |_, unescaped_char| { - match unescaped_char { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } - } - }); - error?; + // Raw strings have no escapes so no work is needed here. LitKind::Str(symbol, ast::StrStyle::Raw(n)) } token::ByteStr => { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); unescape_literal(s, Mode::ByteStr, &mut |_, c| match c { Ok(c) => buf.push(byte_from_char(c)), Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + assert!(!err.is_fatal(), "failed to unescape string literal") } }); - error?; LitKind::ByteStr(buf.into(), StrStyle::Cooked) } token::ByteStrRaw(n) => { - // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. - let s = symbol.as_str(); - let mut error = Ok(()); - unescape_literal(s, Mode::RawByteStr, &mut |_, c| match c { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } - }); - LitKind::ByteStr(s.to_owned().into_bytes().into(), StrStyle::Raw(n)) + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc`. + let buf = symbol.as_str().to_owned().into_bytes(); + LitKind::ByteStr(buf.into(), StrStyle::Raw(n)) } token::CStr => { let s = symbol.as_str(); let mut buf = Vec::with_capacity(s.len()); - let mut error = Ok(()); unescape_c_string(s, Mode::CStr, &mut |_span, c| match c { Ok(CStrUnit::Byte(b)) => buf.push(b), Ok(CStrUnit::Char(c)) => { buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes()) } Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } + assert!(!err.is_fatal(), "failed to unescape C string literal") } }); - error?; buf.push(0); LitKind::CStr(buf.into(), StrStyle::Cooked) } token::CStrRaw(n) => { - // Raw strings have no escapes, so we only need to check for invalid chars, and we - // can convert the symbol directly to a `Lrc` on success. - let s = symbol.as_str(); - let mut error = Ok(()); - unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c { - Ok(_) => {} - Err(err) => { - if err.is_fatal() { - error = Err(LitError::LexerError); - } - } - }); - error?; - let mut buf = s.to_owned().into_bytes(); + // Raw strings have no escapes so we can convert the symbol + // directly to a `Lrc` after appending the terminating NUL + // char. + let mut buf = symbol.as_str().to_owned().into_bytes(); buf.push(0); LitKind::CStr(buf.into(), StrStyle::Raw(n)) } diff --git a/tests/ui/str/str-escape.rs b/tests/ui/str/str-escape.rs index 10a72421f24..89a82171063 100644 --- a/tests/ui/str/str-escape.rs +++ b/tests/ui/str/str-escape.rs @@ -1,5 +1,6 @@ // check-pass // ignore-tidy-tab +// edition: 2021 fn main() { let s = "\ @@ -8,11 +9,11 @@ fn main() { //~^^^ WARNING multiple lines skipped by escaped newline assert_eq!(s, ""); - let s = "foo\ + let s = c"foo\   bar "; //~^^^ WARNING whitespace symbol '\u{a0}' is not skipped - assert_eq!(s, "foo  bar\n "); + assert_eq!(s, c"foo  bar\n "); let s = "a\ b"; @@ -22,10 +23,10 @@ fn main() { b"; assert_eq!(s, "ab"); - let s = "a\ + let s = b"a\ b"; //~^^ WARNING whitespace symbol '\u{c}' is not skipped // '\x0c' is ASCII whitespace, but it may not need skipped // discussion: https://github.com/rust-lang/rust/pull/108403 - assert_eq!(s, "a\x0cb"); + assert_eq!(s, b"a\x0cb"); } diff --git a/tests/ui/str/str-escape.stderr b/tests/ui/str/str-escape.stderr index 43b4f7e36f6..00fe5444e1a 100644 --- a/tests/ui/str/str-escape.stderr +++ b/tests/ui/str/str-escape.stderr @@ -1,5 +1,5 @@ warning: multiple lines skipped by escaped newline - --> $DIR/str-escape.rs:5:14 + --> $DIR/str-escape.rs:6:14 | LL | let s = "\ | ______________^ @@ -8,20 +8,20 @@ LL | | "; | |_____________^ skipping everything up to and including this point warning: whitespace symbol '\u{a0}' is not skipped - --> $DIR/str-escape.rs:11:17 + --> $DIR/str-escape.rs:12:18 | -LL | let s = "foo\ - | _________________^ +LL | let s = c"foo\ + | __________________^ LL | |   bar | | ^ whitespace symbol '\u{a0}' is not skipped | |___| | warning: whitespace symbol '\u{c}' is not skipped - --> $DIR/str-escape.rs:25:15 + --> $DIR/str-escape.rs:26:16 | -LL | let s = "a\ - | _______________^ +LL | let s = b"a\ + | ________________^ LL | | b"; | | ^- whitespace symbol '\u{c}' is not skipped | |____|