Rollup merge of #108801 - fee1-dead-contrib:c-str, r=compiler-errors
Implement RFC 3348, `c"foo"` literals RFC: https://github.com/rust-lang/rfcs/pull/3348 Tracking issue: #105723
This commit is contained in:
commit
4891f02cff
@ -1821,6 +1821,8 @@ pub enum LitKind {
|
||||
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
|
||||
/// non-utf8, and symbols only allow utf8 strings.
|
||||
ByteStr(Lrc<[u8]>, StrStyle),
|
||||
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
|
||||
CStr(Lrc<[u8]>, StrStyle),
|
||||
/// A byte char (`b'f'`).
|
||||
Byte(u8),
|
||||
/// A character literal (`'a'`).
|
||||
@ -1875,6 +1877,7 @@ impl LitKind {
|
||||
// unsuffixed variants
|
||||
LitKind::Str(..)
|
||||
| LitKind::ByteStr(..)
|
||||
| LitKind::CStr(..)
|
||||
| LitKind::Byte(..)
|
||||
| LitKind::Char(..)
|
||||
| LitKind::Int(_, LitIntType::Unsuffixed)
|
||||
|
@ -74,6 +74,8 @@ pub enum LitKind {
|
||||
StrRaw(u8), // raw string delimited by `n` hash symbols
|
||||
ByteStr,
|
||||
ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols
|
||||
CStr,
|
||||
CStrRaw(u8),
|
||||
Err,
|
||||
}
|
||||
|
||||
@ -141,6 +143,10 @@ impl fmt::Display for Lit {
|
||||
delim = "#".repeat(n as usize),
|
||||
string = symbol
|
||||
)?,
|
||||
CStr => write!(f, "c\"{symbol}\"")?,
|
||||
CStrRaw(n) => {
|
||||
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))?
|
||||
}
|
||||
Integer | Float | Bool | Err => write!(f, "{symbol}")?,
|
||||
}
|
||||
|
||||
@ -170,6 +176,7 @@ impl LitKind {
|
||||
Float => "float",
|
||||
Str | StrRaw(..) => "string",
|
||||
ByteStr | ByteStrRaw(..) => "byte string",
|
||||
CStr | CStrRaw(..) => "C string",
|
||||
Err => "error",
|
||||
}
|
||||
}
|
||||
|
@ -2,9 +2,13 @@
|
||||
|
||||
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
|
||||
use crate::token::{self, Token};
|
||||
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
|
||||
use rustc_lexer::unescape::{
|
||||
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit,
|
||||
Mode,
|
||||
};
|
||||
use rustc_span::symbol::{kw, sym, Symbol};
|
||||
use rustc_span::Span;
|
||||
use std::ops::Range;
|
||||
use std::{ascii, fmt, str};
|
||||
|
||||
// Escapes a string, represented as a symbol. Reuses the original symbol,
|
||||
@ -35,6 +39,7 @@ pub enum LitError {
|
||||
InvalidFloatSuffix,
|
||||
NonDecimalFloat(u32),
|
||||
IntTooLarge(u32),
|
||||
NulInCStr(Range<usize>),
|
||||
}
|
||||
|
||||
impl LitKind {
|
||||
@ -158,6 +163,52 @@ impl LitKind {
|
||||
|
||||
LitKind::ByteStr(bytes.into(), StrStyle::Raw(n))
|
||||
}
|
||||
token::CStr => {
|
||||
let s = symbol.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
|
||||
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
|
||||
error = Err(LitError::NulInCStr(span));
|
||||
}
|
||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
|
||||
Ok(CStrUnit::Char(c)) => {
|
||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||
}
|
||||
Err(err) => {
|
||||
if err.is_fatal() {
|
||||
error = Err(LitError::LexerError);
|
||||
}
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.push(0);
|
||||
LitKind::CStr(buf.into(), StrStyle::Cooked)
|
||||
}
|
||||
token::CStrRaw(n) => {
|
||||
let s = symbol.as_str();
|
||||
let mut buf = Vec::with_capacity(s.len());
|
||||
let mut error = Ok(());
|
||||
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
|
||||
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
|
||||
error = Err(LitError::NulInCStr(span));
|
||||
}
|
||||
Ok(CStrUnit::Byte(b)) => buf.push(b),
|
||||
Ok(CStrUnit::Char(c)) if c.len_utf8() == 1 => buf.push(c as u8),
|
||||
Ok(CStrUnit::Char(c)) => {
|
||||
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
|
||||
}
|
||||
Err(err) => {
|
||||
if err.is_fatal() {
|
||||
error = Err(LitError::LexerError);
|
||||
}
|
||||
}
|
||||
});
|
||||
error?;
|
||||
buf.push(0);
|
||||
LitKind::CStr(buf.into(), StrStyle::Raw(n))
|
||||
}
|
||||
token::Err => LitKind::Err,
|
||||
})
|
||||
}
|
||||
@ -191,6 +242,14 @@ impl fmt::Display for LitKind {
|
||||
string = symbol
|
||||
)?;
|
||||
}
|
||||
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
|
||||
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
|
||||
}
|
||||
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
|
||||
// This can only be valid UTF-8.
|
||||
let symbol = str::from_utf8(bytes).unwrap();
|
||||
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
|
||||
}
|
||||
LitKind::Int(n, ty) => {
|
||||
write!(f, "{n}")?;
|
||||
match ty {
|
||||
@ -237,6 +296,8 @@ impl MetaItemLit {
|
||||
LitKind::Str(_, ast::StrStyle::Raw(n)) => token::StrRaw(n),
|
||||
LitKind::ByteStr(_, ast::StrStyle::Cooked) => token::ByteStr,
|
||||
LitKind::ByteStr(_, ast::StrStyle::Raw(n)) => token::ByteStrRaw(n),
|
||||
LitKind::CStr(_, ast::StrStyle::Cooked) => token::CStr,
|
||||
LitKind::CStr(_, ast::StrStyle::Raw(n)) => token::CStrRaw(n),
|
||||
LitKind::Byte(_) => token::Byte,
|
||||
LitKind::Char(_) => token::Char,
|
||||
LitKind::Int(..) => token::Integer,
|
||||
|
@ -572,6 +572,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session) {
|
||||
}
|
||||
};
|
||||
}
|
||||
gate_all!(c_str_literals, "`c\"..\"` literals are experimental");
|
||||
gate_all!(
|
||||
if_let_guard,
|
||||
"`if let` guards are experimental",
|
||||
|
@ -210,6 +210,10 @@ pub fn literal_to_string(lit: token::Lit) -> String {
|
||||
token::ByteStrRaw(n) => {
|
||||
format!("br{delim}\"{string}\"{delim}", delim = "#".repeat(n as usize), string = symbol)
|
||||
}
|
||||
token::CStr => format!("c\"{symbol}\""),
|
||||
token::CStrRaw(n) => {
|
||||
format!("cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize))
|
||||
}
|
||||
token::Integer | token::Float | token::Bool | token::Err => symbol.to_string(),
|
||||
};
|
||||
|
||||
|
@ -32,6 +32,10 @@ pub fn expand_concat(
|
||||
Ok(ast::LitKind::Bool(b)) => {
|
||||
accumulator.push_str(&b.to_string());
|
||||
}
|
||||
Ok(ast::LitKind::CStr(..)) => {
|
||||
cx.span_err(e.span, "cannot concatenate a C string literal");
|
||||
has_errors = true;
|
||||
}
|
||||
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
|
||||
cx.emit_err(errors::ConcatBytestr { span: e.span });
|
||||
has_errors = true;
|
||||
|
@ -18,6 +18,11 @@ fn invalid_type_err(
|
||||
};
|
||||
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
|
||||
match ast::LitKind::from_token_lit(token_lit) {
|
||||
Ok(ast::LitKind::CStr(_, _)) => {
|
||||
// FIXME(c_str_literals): should concatenation of C string literals
|
||||
// include the null bytes in the end?
|
||||
cx.span_err(span, "cannot concatenate C string literals");
|
||||
}
|
||||
Ok(ast::LitKind::Char(_)) => {
|
||||
let sugg =
|
||||
snippet.map(|snippet| ConcatBytesInvalidSuggestion::CharLit { span, snippet });
|
||||
|
@ -61,6 +61,8 @@ impl FromInternal<token::LitKind> for LitKind {
|
||||
token::StrRaw(n) => LitKind::StrRaw(n),
|
||||
token::ByteStr => LitKind::ByteStr,
|
||||
token::ByteStrRaw(n) => LitKind::ByteStrRaw(n),
|
||||
token::CStr => LitKind::CStr,
|
||||
token::CStrRaw(n) => LitKind::CStrRaw(n),
|
||||
token::Err => LitKind::Err,
|
||||
token::Bool => unreachable!(),
|
||||
}
|
||||
@ -78,6 +80,8 @@ impl ToInternal<token::LitKind> for LitKind {
|
||||
LitKind::StrRaw(n) => token::StrRaw(n),
|
||||
LitKind::ByteStr => token::ByteStr,
|
||||
LitKind::ByteStrRaw(n) => token::ByteStrRaw(n),
|
||||
LitKind::CStr => token::CStr,
|
||||
LitKind::CStrRaw(n) => token::CStrRaw(n),
|
||||
LitKind::Err => token::Err,
|
||||
}
|
||||
}
|
||||
@ -436,6 +440,8 @@ impl server::FreeFunctions for Rustc<'_, '_> {
|
||||
| token::LitKind::StrRaw(_)
|
||||
| token::LitKind::ByteStr
|
||||
| token::LitKind::ByteStrRaw(_)
|
||||
| token::LitKind::CStr
|
||||
| token::LitKind::CStrRaw(_)
|
||||
| token::LitKind::Err => return Err(()),
|
||||
token::LitKind::Integer | token::LitKind::Float => {}
|
||||
}
|
||||
|
@ -313,6 +313,8 @@ declare_features! (
|
||||
(active, async_closure, "1.37.0", Some(62290), None),
|
||||
/// Allows async functions to be declared, implemented, and used in traits.
|
||||
(active, async_fn_in_trait, "1.66.0", Some(91611), None),
|
||||
/// Allows `c"foo"` literals.
|
||||
(active, c_str_literals, "CURRENT_RUSTC_VERSION", Some(105723), None),
|
||||
/// Treat `extern "C"` function as nounwind.
|
||||
(active, c_unwind, "1.52.0", Some(74990), None),
|
||||
/// Allows using C-variadics.
|
||||
|
@ -333,6 +333,7 @@ language_item_table! {
|
||||
RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None;
|
||||
|
||||
String, sym::String, string, Target::Struct, GenericRequirement::None;
|
||||
CStr, sym::CStr, c_str, Target::Struct, GenericRequirement::None;
|
||||
}
|
||||
|
||||
pub enum GenericRequirement {
|
||||
|
@ -1300,6 +1300,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
|
||||
opt_ty.unwrap_or_else(|| self.next_float_var())
|
||||
}
|
||||
ast::LitKind::Bool(_) => tcx.types.bool,
|
||||
ast::LitKind::CStr(_, _) => tcx.mk_imm_ref(
|
||||
tcx.lifetimes.re_static,
|
||||
tcx.type_of(tcx.require_lang_item(hir::LangItem::CStr, Some(lit.span)))
|
||||
.skip_binder(),
|
||||
),
|
||||
ast::LitKind::Err => tcx.ty_error_misc(),
|
||||
}
|
||||
}
|
||||
|
@ -186,12 +186,16 @@ pub enum LiteralKind {
|
||||
Str { terminated: bool },
|
||||
/// "b"abc"", "b"abc"
|
||||
ByteStr { terminated: bool },
|
||||
/// `c"abc"`, `c"abc`
|
||||
CStr { terminated: bool },
|
||||
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
|
||||
/// an invalid literal.
|
||||
RawStr { n_hashes: Option<u8> },
|
||||
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
|
||||
/// indicates an invalid literal.
|
||||
RawByteStr { n_hashes: Option<u8> },
|
||||
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
|
||||
RawCStr { n_hashes: Option<u8> },
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
@ -357,39 +361,18 @@ impl Cursor<'_> {
|
||||
},
|
||||
|
||||
// Byte literal, byte string literal, raw byte string literal or identifier.
|
||||
'b' => match (self.first(), self.second()) {
|
||||
('\'', _) => {
|
||||
self.bump();
|
||||
let terminated = self.single_quoted_string();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = Byte { terminated };
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
('"', _) => {
|
||||
self.bump();
|
||||
let terminated = self.double_quoted_string();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = ByteStr { terminated };
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
('r', '"') | ('r', '#') => {
|
||||
self.bump();
|
||||
let res = self.raw_double_quoted_string(2);
|
||||
let suffix_start = self.pos_within_token();
|
||||
if res.is_ok() {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = RawByteStr { n_hashes: res.ok() };
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
_ => self.ident_or_unknown_prefix(),
|
||||
},
|
||||
'b' => self.c_or_byte_string(
|
||||
|terminated| ByteStr { terminated },
|
||||
|n_hashes| RawByteStr { n_hashes },
|
||||
Some(|terminated| Byte { terminated }),
|
||||
),
|
||||
|
||||
// c-string literal, raw c-string literal or identifier.
|
||||
'c' => self.c_or_byte_string(
|
||||
|terminated| CStr { terminated },
|
||||
|n_hashes| RawCStr { n_hashes },
|
||||
None,
|
||||
),
|
||||
|
||||
// Identifier (this should be checked after other variant that can
|
||||
// start as identifier).
|
||||
@ -553,6 +536,47 @@ impl Cursor<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
fn c_or_byte_string(
|
||||
&mut self,
|
||||
mk_kind: impl FnOnce(bool) -> LiteralKind,
|
||||
mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
|
||||
single_quoted: Option<fn(bool) -> LiteralKind>,
|
||||
) -> TokenKind {
|
||||
match (self.first(), self.second(), single_quoted) {
|
||||
('\'', _, Some(mk_kind)) => {
|
||||
self.bump();
|
||||
let terminated = self.single_quoted_string();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = mk_kind(terminated);
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
('"', _, _) => {
|
||||
self.bump();
|
||||
let terminated = self.double_quoted_string();
|
||||
let suffix_start = self.pos_within_token();
|
||||
if terminated {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = mk_kind(terminated);
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
('r', '"', _) | ('r', '#', _) => {
|
||||
self.bump();
|
||||
let res = self.raw_double_quoted_string(2);
|
||||
let suffix_start = self.pos_within_token();
|
||||
if res.is_ok() {
|
||||
self.eat_literal_suffix();
|
||||
}
|
||||
let kind = mk_kind_raw(res.ok());
|
||||
Literal { kind, suffix_start }
|
||||
}
|
||||
_ => self.ident_or_unknown_prefix(),
|
||||
}
|
||||
}
|
||||
|
||||
fn number(&mut self, first_digit: char) -> LiteralKind {
|
||||
debug_assert!('0' <= self.prev() && self.prev() <= '9');
|
||||
let mut base = Base::Decimal;
|
||||
|
@ -86,10 +86,45 @@ where
|
||||
let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
|
||||
callback(0..(src.len() - chars.as_str().len()), res);
|
||||
}
|
||||
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback),
|
||||
Mode::Str | Mode::ByteStr => unescape_str_common(src, mode, callback),
|
||||
|
||||
Mode::RawStr | Mode::RawByteStr => {
|
||||
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
|
||||
}
|
||||
Mode::CStr | Mode::RawCStr => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// A unit within CStr. Must not be a nul character.
|
||||
pub enum CStrUnit {
|
||||
Byte(u8),
|
||||
Char(char),
|
||||
}
|
||||
|
||||
impl From<u8> for CStrUnit {
|
||||
fn from(value: u8) -> Self {
|
||||
CStrUnit::Byte(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<char> for CStrUnit {
|
||||
fn from(value: char) -> Self {
|
||||
CStrUnit::Char(value)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<CStrUnit, EscapeError>),
|
||||
{
|
||||
if mode == Mode::RawCStr {
|
||||
unescape_raw_str_or_raw_byte_str(
|
||||
src,
|
||||
mode.characters_should_be_ascii(),
|
||||
&mut |r, result| callback(r, result.map(CStrUnit::Char)),
|
||||
);
|
||||
} else {
|
||||
unescape_str_common(src, mode, callback);
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,34 +149,69 @@ pub enum Mode {
|
||||
ByteStr,
|
||||
RawStr,
|
||||
RawByteStr,
|
||||
CStr,
|
||||
RawCStr,
|
||||
}
|
||||
|
||||
impl Mode {
|
||||
pub fn in_double_quotes(self) -> bool {
|
||||
match self {
|
||||
Mode::Str | Mode::ByteStr | Mode::RawStr | Mode::RawByteStr => true,
|
||||
Mode::Str
|
||||
| Mode::ByteStr
|
||||
| Mode::RawStr
|
||||
| Mode::RawByteStr
|
||||
| Mode::CStr
|
||||
| Mode::RawCStr => true,
|
||||
Mode::Char | Mode::Byte => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_byte(self) -> bool {
|
||||
/// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
|
||||
pub fn ascii_escapes_should_be_ascii(self) -> bool {
|
||||
match self {
|
||||
Mode::Char | Mode::Str | Mode::RawStr => true,
|
||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr | Mode::CStr | Mode::RawCStr => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether characters within the literal must be within the ASCII range
|
||||
pub fn characters_should_be_ascii(self) -> bool {
|
||||
match self {
|
||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
|
||||
Mode::Char | Mode::Str | Mode::RawStr => false,
|
||||
Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Byte literals do not allow unicode escape.
|
||||
pub fn is_unicode_escape_disallowed(self) -> bool {
|
||||
match self {
|
||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
|
||||
Mode::Char | Mode::Str | Mode::RawStr | Mode::CStr | Mode::RawCStr => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn prefix_noraw(self) -> &'static str {
|
||||
match self {
|
||||
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b",
|
||||
Mode::CStr | Mode::RawCStr => "c",
|
||||
Mode::Char | Mode::Str | Mode::RawStr => "",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
||||
fn scan_escape<T: From<u8> + From<char>>(
|
||||
chars: &mut Chars<'_>,
|
||||
mode: Mode,
|
||||
) -> Result<T, EscapeError> {
|
||||
// Previous character was '\\', unescape what follows.
|
||||
let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
|
||||
'"' => '"',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'\\' => '\\',
|
||||
'\'' => '\'',
|
||||
'0' => '\0',
|
||||
'"' => b'"',
|
||||
'n' => b'\n',
|
||||
'r' => b'\r',
|
||||
't' => b'\t',
|
||||
'\\' => b'\\',
|
||||
'\'' => b'\'',
|
||||
'0' => b'\0',
|
||||
|
||||
'x' => {
|
||||
// Parse hexadecimal character code.
|
||||
@ -154,76 +224,78 @@ fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError
|
||||
|
||||
let value = hi * 16 + lo;
|
||||
|
||||
// For a non-byte literal verify that it is within ASCII range.
|
||||
if !is_byte && !is_ascii(value) {
|
||||
if mode.ascii_escapes_should_be_ascii() && !is_ascii(value) {
|
||||
return Err(EscapeError::OutOfRangeHexEscape);
|
||||
}
|
||||
let value = value as u8;
|
||||
|
||||
value as char
|
||||
value as u8
|
||||
}
|
||||
|
||||
'u' => {
|
||||
// We've parsed '\u', now we have to parse '{..}'.
|
||||
|
||||
if chars.next() != Some('{') {
|
||||
return Err(EscapeError::NoBraceInUnicodeEscape);
|
||||
}
|
||||
|
||||
// First character must be a hexadecimal digit.
|
||||
let mut n_digits = 1;
|
||||
let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
|
||||
'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
|
||||
'}' => return Err(EscapeError::EmptyUnicodeEscape),
|
||||
c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
|
||||
};
|
||||
|
||||
// First character is valid, now parse the rest of the number
|
||||
// and closing brace.
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return Err(EscapeError::UnclosedUnicodeEscape),
|
||||
Some('_') => continue,
|
||||
Some('}') => {
|
||||
if n_digits > 6 {
|
||||
return Err(EscapeError::OverlongUnicodeEscape);
|
||||
}
|
||||
|
||||
// Incorrect syntax has higher priority for error reporting
|
||||
// than unallowed value for a literal.
|
||||
if is_byte {
|
||||
return Err(EscapeError::UnicodeEscapeInByte);
|
||||
}
|
||||
|
||||
break std::char::from_u32(value).ok_or_else(|| {
|
||||
if value > 0x10FFFF {
|
||||
EscapeError::OutOfRangeUnicodeEscape
|
||||
} else {
|
||||
EscapeError::LoneSurrogateUnicodeEscape
|
||||
}
|
||||
})?;
|
||||
}
|
||||
Some(c) => {
|
||||
let digit: u32 =
|
||||
c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
|
||||
n_digits += 1;
|
||||
if n_digits > 6 {
|
||||
// Stop updating value since we're sure that it's incorrect already.
|
||||
continue;
|
||||
}
|
||||
value = value * 16 + digit;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
'u' => return scan_unicode(chars, mode.is_unicode_escape_disallowed()).map(Into::into),
|
||||
_ => return Err(EscapeError::InvalidEscape),
|
||||
};
|
||||
Ok(res)
|
||||
Ok(res.into())
|
||||
}
|
||||
|
||||
fn scan_unicode(
|
||||
chars: &mut Chars<'_>,
|
||||
is_unicode_escape_disallowed: bool,
|
||||
) -> Result<char, EscapeError> {
|
||||
// We've parsed '\u', now we have to parse '{..}'.
|
||||
|
||||
if chars.next() != Some('{') {
|
||||
return Err(EscapeError::NoBraceInUnicodeEscape);
|
||||
}
|
||||
|
||||
// First character must be a hexadecimal digit.
|
||||
let mut n_digits = 1;
|
||||
let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
|
||||
'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
|
||||
'}' => return Err(EscapeError::EmptyUnicodeEscape),
|
||||
c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
|
||||
};
|
||||
|
||||
// First character is valid, now parse the rest of the number
|
||||
// and closing brace.
|
||||
loop {
|
||||
match chars.next() {
|
||||
None => return Err(EscapeError::UnclosedUnicodeEscape),
|
||||
Some('_') => continue,
|
||||
Some('}') => {
|
||||
if n_digits > 6 {
|
||||
return Err(EscapeError::OverlongUnicodeEscape);
|
||||
}
|
||||
|
||||
// Incorrect syntax has higher priority for error reporting
|
||||
// than unallowed value for a literal.
|
||||
if is_unicode_escape_disallowed {
|
||||
return Err(EscapeError::UnicodeEscapeInByte);
|
||||
}
|
||||
|
||||
break std::char::from_u32(value).ok_or_else(|| {
|
||||
if value > 0x10FFFF {
|
||||
EscapeError::OutOfRangeUnicodeEscape
|
||||
} else {
|
||||
EscapeError::LoneSurrogateUnicodeEscape
|
||||
}
|
||||
});
|
||||
}
|
||||
Some(c) => {
|
||||
let digit: u32 = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
|
||||
n_digits += 1;
|
||||
if n_digits > 6 {
|
||||
// Stop updating value since we're sure that it's incorrect already.
|
||||
continue;
|
||||
}
|
||||
value = value * 16 + digit;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
|
||||
if is_byte && !c.is_ascii() {
|
||||
fn ascii_check(c: char, characters_should_be_ascii: bool) -> Result<char, EscapeError> {
|
||||
if characters_should_be_ascii && !c.is_ascii() {
|
||||
// Byte literal can't be a non-ascii character.
|
||||
Err(EscapeError::NonAsciiCharInByte)
|
||||
} else {
|
||||
@ -234,7 +306,7 @@ fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
|
||||
fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
|
||||
let c = chars.next().ok_or(EscapeError::ZeroChars)?;
|
||||
let res = match c {
|
||||
'\\' => scan_escape(chars, is_byte),
|
||||
'\\' => scan_escape(chars, if is_byte { Mode::Byte } else { Mode::Char }),
|
||||
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
|
||||
'\r' => Err(EscapeError::BareCarriageReturn),
|
||||
_ => ascii_check(c, is_byte),
|
||||
@ -247,9 +319,9 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, E
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
/// sequence of escaped characters or errors.
|
||||
fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
|
||||
fn unescape_str_common<F, T: From<u8> + From<char>>(src: &str, mode: Mode, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
F: FnMut(Range<usize>, Result<T, EscapeError>),
|
||||
{
|
||||
let mut chars = src.chars();
|
||||
|
||||
@ -266,47 +338,49 @@ where
|
||||
// if unescaped '\' character is followed by '\n'.
|
||||
// For details see [Rust language reference]
|
||||
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
|
||||
skip_ascii_whitespace(&mut chars, start, callback);
|
||||
skip_ascii_whitespace(&mut chars, start, &mut |range, err| {
|
||||
callback(range, Err(err))
|
||||
});
|
||||
continue;
|
||||
}
|
||||
_ => scan_escape(&mut chars, is_byte),
|
||||
_ => scan_escape::<T>(&mut chars, mode),
|
||||
}
|
||||
}
|
||||
'\n' => Ok('\n'),
|
||||
'\t' => Ok('\t'),
|
||||
'\n' => Ok(b'\n'.into()),
|
||||
'\t' => Ok(b'\t'.into()),
|
||||
'"' => Err(EscapeError::EscapeOnlyChar),
|
||||
'\r' => Err(EscapeError::BareCarriageReturn),
|
||||
_ => ascii_check(c, is_byte),
|
||||
_ => ascii_check(c, mode.characters_should_be_ascii()).map(Into::into),
|
||||
};
|
||||
let end = src.len() - chars.as_str().len();
|
||||
callback(start..end, res);
|
||||
callback(start..end, res.map(Into::into));
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, Result<char, EscapeError>),
|
||||
{
|
||||
let tail = chars.as_str();
|
||||
let first_non_space = tail
|
||||
.bytes()
|
||||
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
|
||||
.unwrap_or(tail.len());
|
||||
if tail[1..first_non_space].contains('\n') {
|
||||
// The +1 accounts for the escaping slash.
|
||||
let end = start + first_non_space + 1;
|
||||
callback(start..end, Err(EscapeError::MultipleSkippedLinesWarning));
|
||||
}
|
||||
let tail = &tail[first_non_space..];
|
||||
if let Some(c) = tail.chars().nth(0) {
|
||||
if c.is_whitespace() {
|
||||
// For error reporting, we would like the span to contain the character that was not
|
||||
// skipped. The +1 is necessary to account for the leading \ that started the escape.
|
||||
let end = start + first_non_space + c.len_utf8() + 1;
|
||||
callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
|
||||
}
|
||||
}
|
||||
*chars = tail.chars();
|
||||
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
|
||||
where
|
||||
F: FnMut(Range<usize>, EscapeError),
|
||||
{
|
||||
let tail = chars.as_str();
|
||||
let first_non_space = tail
|
||||
.bytes()
|
||||
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
|
||||
.unwrap_or(tail.len());
|
||||
if tail[1..first_non_space].contains('\n') {
|
||||
// The +1 accounts for the escaping slash.
|
||||
let end = start + first_non_space + 1;
|
||||
callback(start..end, EscapeError::MultipleSkippedLinesWarning);
|
||||
}
|
||||
let tail = &tail[first_non_space..];
|
||||
if let Some(c) = tail.chars().nth(0) {
|
||||
if c.is_whitespace() {
|
||||
// For error reporting, we would like the span to contain the character that was not
|
||||
// skipped. The +1 is necessary to account for the leading \ that started the escape.
|
||||
let end = start + first_non_space + c.len_utf8() + 1;
|
||||
callback(start..end, EscapeError::UnskippedWhitespaceWarning);
|
||||
}
|
||||
}
|
||||
*chars = tail.chars();
|
||||
}
|
||||
|
||||
/// Takes a contents of a string literal (without quotes) and produces a
|
||||
|
@ -146,6 +146,12 @@ pub(crate) fn lit_to_mir_constant<'tcx>(
|
||||
let id = tcx.allocate_bytes(data);
|
||||
ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
|
||||
}
|
||||
(ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if Some(def.did()) == tcx.lang_items().c_str()) =>
|
||||
{
|
||||
let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8]);
|
||||
let allocation = tcx.mk_const_alloc(allocation);
|
||||
ConstValue::Slice { data: allocation, start: 0, end: data.len() }
|
||||
}
|
||||
(ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => {
|
||||
ConstValue::Scalar(Scalar::from_uint(*n, Size::from_bytes(1)))
|
||||
}
|
||||
|
@ -1,3 +1,5 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::errors;
|
||||
use crate::lexer::unicode_chars::UNICODE_ARRAY;
|
||||
use crate::make_unclosed_delims_error;
|
||||
@ -6,7 +8,7 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
|
||||
use rustc_ast::tokenstream::TokenStream;
|
||||
use rustc_ast::util::unicode::contains_text_flow_control_chars;
|
||||
use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
|
||||
use rustc_lexer::unescape::{self, Mode};
|
||||
use rustc_lexer::unescape::{self, EscapeError, Mode};
|
||||
use rustc_lexer::Cursor;
|
||||
use rustc_lexer::{Base, DocStyle, RawStrError};
|
||||
use rustc_session::lint::builtin::{
|
||||
@ -204,6 +206,9 @@ impl<'a> StringReader<'a> {
|
||||
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
|
||||
let suffix_start = start + BytePos(suffix_start);
|
||||
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
|
||||
if let token::LitKind::CStr | token::LitKind::CStrRaw(_) = kind {
|
||||
self.sess.gated_spans.gate(sym::c_str_literals, self.mk_sp(start, self.pos));
|
||||
}
|
||||
let suffix = if suffix_start < self.pos {
|
||||
let string = self.str_from(suffix_start);
|
||||
if string == "_" {
|
||||
@ -415,6 +420,16 @@ impl<'a> StringReader<'a> {
|
||||
}
|
||||
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
|
||||
}
|
||||
rustc_lexer::LiteralKind::CStr { terminated } => {
|
||||
if !terminated {
|
||||
self.sess.span_diagnostic.span_fatal_with_code(
|
||||
self.mk_sp(start + BytePos(1), end),
|
||||
"unterminated C string",
|
||||
error_code!(E0767),
|
||||
)
|
||||
}
|
||||
self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
|
||||
}
|
||||
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
|
||||
if let Some(n_hashes) = n_hashes {
|
||||
let n = u32::from(n_hashes);
|
||||
@ -433,6 +448,15 @@ impl<'a> StringReader<'a> {
|
||||
self.report_raw_str_error(start, 2);
|
||||
}
|
||||
}
|
||||
rustc_lexer::LiteralKind::RawCStr { n_hashes } => {
|
||||
if let Some(n_hashes) = n_hashes {
|
||||
let n = u32::from(n_hashes);
|
||||
let kind = token::CStrRaw(n_hashes);
|
||||
self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
|
||||
} else {
|
||||
self.report_raw_str_error(start, 2);
|
||||
}
|
||||
}
|
||||
rustc_lexer::LiteralKind::Int { base, empty_int } => {
|
||||
if empty_int {
|
||||
let span = self.mk_sp(start, end);
|
||||
@ -648,7 +672,7 @@ impl<'a> StringReader<'a> {
|
||||
self.sess.emit_fatal(errors::TooManyHashes { span: self.mk_sp(start, self.pos), num });
|
||||
}
|
||||
|
||||
fn cook_quoted(
|
||||
fn cook_common(
|
||||
&self,
|
||||
kind: token::LitKind,
|
||||
mode: Mode,
|
||||
@ -656,12 +680,13 @@ impl<'a> StringReader<'a> {
|
||||
end: BytePos,
|
||||
prefix_len: u32,
|
||||
postfix_len: u32,
|
||||
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
|
||||
) -> (token::LitKind, Symbol) {
|
||||
let mut has_fatal_err = false;
|
||||
let content_start = start + BytePos(prefix_len);
|
||||
let content_end = end - BytePos(postfix_len);
|
||||
let lit_content = self.str_from_to(content_start, content_end);
|
||||
unescape::unescape_literal(lit_content, mode, &mut |range, result| {
|
||||
unescape(lit_content, mode, &mut |range, result| {
|
||||
// Here we only check for errors. The actual unescaping is done later.
|
||||
if let Err(err) = result {
|
||||
let span_with_quotes = self.mk_sp(start, end);
|
||||
@ -692,6 +717,38 @@ impl<'a> StringReader<'a> {
|
||||
(token::Err, self.symbol_from_to(start, end))
|
||||
}
|
||||
}
|
||||
|
||||
fn cook_quoted(
|
||||
&self,
|
||||
kind: token::LitKind,
|
||||
mode: Mode,
|
||||
start: BytePos,
|
||||
end: BytePos,
|
||||
prefix_len: u32,
|
||||
postfix_len: u32,
|
||||
) -> (token::LitKind, Symbol) {
|
||||
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
|
||||
unescape::unescape_literal(src, mode, &mut |span, result| {
|
||||
callback(span, result.map(drop))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn cook_c_string(
|
||||
&self,
|
||||
kind: token::LitKind,
|
||||
mode: Mode,
|
||||
start: BytePos,
|
||||
end: BytePos,
|
||||
prefix_len: u32,
|
||||
postfix_len: u32,
|
||||
) -> (token::LitKind, Symbol) {
|
||||
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
|
||||
unescape::unescape_c_string(src, mode, &mut |span, result| {
|
||||
callback(span, result.map(drop))
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn nfc_normalize(string: &str) -> Symbol {
|
||||
|
@ -78,8 +78,7 @@ pub(crate) fn emit_unescape_error(
|
||||
}
|
||||
};
|
||||
let sugg = sugg.unwrap_or_else(|| {
|
||||
let is_byte = mode.is_byte();
|
||||
let prefix = if is_byte { "b" } else { "" };
|
||||
let prefix = mode.prefix_noraw();
|
||||
let mut escaped = String::with_capacity(lit.len());
|
||||
let mut chrs = lit.chars().peekable();
|
||||
while let Some(first) = chrs.next() {
|
||||
@ -97,7 +96,11 @@ pub(crate) fn emit_unescape_error(
|
||||
};
|
||||
}
|
||||
let sugg = format!("{prefix}\"{escaped}\"");
|
||||
MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
|
||||
MoreThanOneCharSugg::Quotes {
|
||||
span: span_with_quotes,
|
||||
is_byte: mode == Mode::Byte,
|
||||
sugg,
|
||||
}
|
||||
});
|
||||
handler.emit_err(UnescapeError::MoreThanOneChar {
|
||||
span: span_with_quotes,
|
||||
@ -112,7 +115,7 @@ pub(crate) fn emit_unescape_error(
|
||||
char_span,
|
||||
escaped_sugg: c.escape_default().to_string(),
|
||||
escaped_msg: escaped_char(c),
|
||||
byte: mode.is_byte(),
|
||||
byte: mode == Mode::Byte,
|
||||
});
|
||||
}
|
||||
EscapeError::BareCarriageReturn => {
|
||||
@ -126,12 +129,15 @@ pub(crate) fn emit_unescape_error(
|
||||
EscapeError::InvalidEscape => {
|
||||
let (c, span) = last_char();
|
||||
|
||||
let label =
|
||||
if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
|
||||
let label = if mode == Mode::Byte || mode == Mode::ByteStr {
|
||||
"unknown byte escape"
|
||||
} else {
|
||||
"unknown character escape"
|
||||
};
|
||||
let ec = escaped_char(c);
|
||||
let mut diag = handler.struct_span_err(span, format!("{}: `{}`", label, ec));
|
||||
diag.span_label(span, label);
|
||||
if c == '{' || c == '}' && !mode.is_byte() {
|
||||
if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
|
||||
diag.help(
|
||||
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
|
||||
);
|
||||
@ -141,7 +147,7 @@ pub(crate) fn emit_unescape_error(
|
||||
version control settings",
|
||||
);
|
||||
} else {
|
||||
if !mode.is_byte() {
|
||||
if mode == Mode::Str || mode == Mode::Char {
|
||||
diag.span_suggestion(
|
||||
span_with_quotes,
|
||||
"if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
|
||||
|
@ -1870,6 +1870,7 @@ impl<'a> Parser<'a> {
|
||||
let recovered = self.recover_after_dot();
|
||||
let token = recovered.as_ref().unwrap_or(&self.token);
|
||||
let span = token.span;
|
||||
|
||||
token::Lit::from_token(token).map(|token_lit| {
|
||||
self.bump();
|
||||
(token_lit, span)
|
||||
|
@ -101,3 +101,5 @@ session_invalid_int_literal_width = invalid width `{$width}` for integer literal
|
||||
.help = valid widths are 8, 16, 32, 64 and 128
|
||||
|
||||
session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg}
|
||||
|
||||
session_nul_in_c_str = null characters in C string literals are not supported
|
||||
|
@ -6,7 +6,7 @@ use rustc_ast::token;
|
||||
use rustc_ast::util::literal::LitError;
|
||||
use rustc_errors::{error_code, DiagnosticMessage, EmissionGuarantee, IntoDiagnostic, MultiSpan};
|
||||
use rustc_macros::Diagnostic;
|
||||
use rustc_span::{Span, Symbol};
|
||||
use rustc_span::{BytePos, Span, Symbol};
|
||||
use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple};
|
||||
|
||||
#[derive(Diagnostic)]
|
||||
@ -323,6 +323,13 @@ pub(crate) struct BinaryFloatLiteralNotSupported {
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Diagnostic)]
|
||||
#[diag(session_nul_in_c_str)]
|
||||
pub(crate) struct NulInCStr {
|
||||
#[primary_span]
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) {
|
||||
// Checks if `s` looks like i32 or u1234 etc.
|
||||
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
|
||||
@ -401,6 +408,12 @@ pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span:
|
||||
};
|
||||
sess.emit_err(IntLiteralTooLarge { span, limit });
|
||||
}
|
||||
LitError::NulInCStr(range) => {
|
||||
let lo = BytePos(span.lo().0 + range.start as u32 + 2);
|
||||
let hi = BytePos(span.lo().0 + range.end as u32 + 2);
|
||||
let span = span.with_lo(lo).with_hi(hi);
|
||||
sess.emit_err(NulInCStr { span });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -441,6 +441,7 @@ symbols! {
|
||||
bridge,
|
||||
bswap,
|
||||
c_str,
|
||||
c_str_literals,
|
||||
c_unwind,
|
||||
c_variadic,
|
||||
c_void,
|
||||
|
@ -79,9 +79,9 @@ use crate::str;
|
||||
///
|
||||
/// [str]: prim@str "str"
|
||||
#[derive(Hash)]
|
||||
#[cfg_attr(not(test), rustc_diagnostic_item = "CStr")]
|
||||
#[stable(feature = "core_c_str", since = "1.64.0")]
|
||||
#[rustc_has_incoherent_inherent_impls]
|
||||
#[cfg_attr(not(bootstrap), lang = "CStr")]
|
||||
// FIXME:
|
||||
// `fn from` in `impl From<&CStr> for Box<CStr>` current implementation relies
|
||||
// on `CStr` being layout-compatible with `[u8]`.
|
||||
|
@ -337,6 +337,8 @@ pub enum LitKind {
|
||||
StrRaw(u8),
|
||||
ByteStr,
|
||||
ByteStrRaw(u8),
|
||||
CStr,
|
||||
CStrRaw(u8),
|
||||
Err,
|
||||
}
|
||||
|
||||
@ -350,6 +352,8 @@ rpc_encode_decode!(
|
||||
StrRaw(n),
|
||||
ByteStr,
|
||||
ByteStrRaw(n),
|
||||
CStr,
|
||||
CStrRaw(n),
|
||||
Err,
|
||||
}
|
||||
);
|
||||
|
@ -811,7 +811,9 @@ impl<'src> Classifier<'src> {
|
||||
| LiteralKind::Str { .. }
|
||||
| LiteralKind::ByteStr { .. }
|
||||
| LiteralKind::RawStr { .. }
|
||||
| LiteralKind::RawByteStr { .. } => Class::String,
|
||||
| LiteralKind::RawByteStr { .. }
|
||||
| LiteralKind::CStr { .. }
|
||||
| LiteralKind::RawCStr { .. } => Class::String,
|
||||
// Number literals.
|
||||
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
|
||||
},
|
||||
|
@ -284,6 +284,7 @@ impl<'a> NormalizedPat<'a> {
|
||||
LitKind::Str(sym, _) => Self::LitStr(sym),
|
||||
LitKind::ByteStr(ref bytes, _) => Self::LitBytes(bytes),
|
||||
LitKind::Byte(val) => Self::LitInt(val.into()),
|
||||
LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes),
|
||||
LitKind::Char(val) => Self::LitInt(val.into()),
|
||||
LitKind::Int(val, _) => Self::LitInt(val),
|
||||
LitKind::Bool(val) => Self::LitBool(val),
|
||||
|
@ -1,11 +1,11 @@
|
||||
use clippy_utils::diagnostics::span_lint_and_sugg;
|
||||
use clippy_utils::source::snippet_with_context;
|
||||
use clippy_utils::ty::is_type_diagnostic_item;
|
||||
use clippy_utils::ty::{is_type_diagnostic_item, is_type_lang_item};
|
||||
use clippy_utils::visitors::is_expr_unsafe;
|
||||
use clippy_utils::{get_parent_node, match_libc_symbol};
|
||||
use if_chain::if_chain;
|
||||
use rustc_errors::Applicability;
|
||||
use rustc_hir::{Block, BlockCheckMode, Expr, ExprKind, Node, UnsafeSource};
|
||||
use rustc_hir::{Block, BlockCheckMode, Expr, ExprKind, LangItem, Node, UnsafeSource};
|
||||
use rustc_lint::{LateContext, LateLintPass};
|
||||
use rustc_session::{declare_lint_pass, declare_tool_lint};
|
||||
use rustc_span::symbol::sym;
|
||||
@ -67,7 +67,7 @@ impl<'tcx> LateLintPass<'tcx> for StrlenOnCStrings {
|
||||
let val_name = snippet_with_context(cx, self_arg.span, ctxt, "..", &mut app).0;
|
||||
let method_name = if is_type_diagnostic_item(cx, ty, sym::cstring_type) {
|
||||
"as_bytes"
|
||||
} else if is_type_diagnostic_item(cx, ty, sym::CStr) {
|
||||
} else if is_type_lang_item(cx, ty, LangItem::CStr) {
|
||||
"to_bytes"
|
||||
} else {
|
||||
return;
|
||||
|
@ -304,6 +304,11 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
|
||||
kind!("ByteStr(ref {vec})");
|
||||
chain!(self, "let [{:?}] = **{vec}", vec.value);
|
||||
},
|
||||
LitKind::CStr(ref vec, _) => {
|
||||
bind!(self, vec);
|
||||
kind!("CStr(ref {vec})");
|
||||
chain!(self, "let [{:?}] = **{vec}", vec.value);
|
||||
}
|
||||
LitKind::Str(s, _) => {
|
||||
bind!(self, s);
|
||||
kind!("Str({s}, _)");
|
||||
|
@ -211,6 +211,7 @@ pub fn lit_to_mir_constant(lit: &LitKind, ty: Option<Ty<'_>>) -> Constant {
|
||||
LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
|
||||
LitKind::Byte(b) => Constant::Int(u128::from(b)),
|
||||
LitKind::ByteStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
|
||||
LitKind::CStr(ref s, _) => Constant::Binary(Lrc::clone(s)),
|
||||
LitKind::Char(c) => Constant::Char(c),
|
||||
LitKind::Int(n, _) => Constant::Int(n),
|
||||
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {
|
||||
|
7
tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
Normal file
7
tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
Normal file
@ -0,0 +1,7 @@
|
||||
// run-pass
|
||||
|
||||
#![feature(c_str_literals)]
|
||||
|
||||
fn main() {
|
||||
assert_eq!(b"test\0", c"test".to_bytes_with_nul());
|
||||
}
|
13
tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
Normal file
13
tests/ui/rfcs/rfc-3348-c-string-literals/gate.rs
Normal file
@ -0,0 +1,13 @@
|
||||
// gate-test-c_str_literals
|
||||
|
||||
macro_rules! m {
|
||||
($t:tt) => {}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
c"foo";
|
||||
//~^ ERROR: `c".."` literals are experimental
|
||||
|
||||
m!(c"test");
|
||||
//~^ ERROR: `c".."` literals are experimental
|
||||
}
|
21
tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
Normal file
21
tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
Normal file
@ -0,0 +1,21 @@
|
||||
error[E0658]: `c".."` literals are experimental
|
||||
--> $DIR/gate.rs:8:5
|
||||
|
|
||||
LL | c"foo";
|
||||
| ^^^^^^
|
||||
|
|
||||
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
|
||||
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
|
||||
|
||||
error[E0658]: `c".."` literals are experimental
|
||||
--> $DIR/gate.rs:11:8
|
||||
|
|
||||
LL | m!(c"test");
|
||||
| ^^^^^^^
|
||||
|
|
||||
= note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
|
||||
= help: add `#![feature(c_str_literals)]` to the crate attributes to enable
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
||||
For more information about this error, try `rustc --explain E0658`.
|
BIN
tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
Normal file
BIN
tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
Normal file
Binary file not shown.
BIN
tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
Normal file
BIN
tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
Normal file
Binary file not shown.
10
tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
Normal file
10
tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
Normal file
@ -0,0 +1,10 @@
|
||||
// run-pass
|
||||
|
||||
#![feature(c_str_literals)]
|
||||
|
||||
fn main() {
|
||||
assert_eq!(
|
||||
c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
|
||||
&[0xEF, 0x80, 0xF0, 0x9F, 0xA6, 0x80, 0xF0, 0x9F, 0xA6, 0x80, 0x00],
|
||||
);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user