Rollup merge of #119172 - nnethercote:earlier-NulInCStr, r=petrochenkov

Detect `NulInCStr` error earlier.

By making it an `EscapeError` instead of a `LitError`. This makes it like the other errors produced when checking string literals contents, e.g. for invalid escape sequences or bare CR chars.

NOTE: this means these errors are issued earlier, before expansion, which changes behaviour. It will be possible to move the check back to the later point if desired. If that happens, it's likely that all the string literal contents checks will be delayed together.

One nice thing about this: the old approach had some code in `report_lit_error` to calculate the span of the nul char from a range. This code used a hardwired `+2` to account for the `c"` at the start of a C string literal, but this should have changed to a `+3` for raw C string literals to account for the `cr"`, which meant that the caret in `cr"` nul error messages was one short of where it should have been. The new approach doesn't need any of this and avoids the off-by-one error.

r? ```@fee1-dead```
This commit is contained in:
Matthias Krüger 2024-01-18 10:34:17 +01:00 committed by GitHub
commit ff8c7a7816
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 32 additions and 28 deletions

View File

@ -8,7 +8,6 @@
};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
use std::ops::Range;
use std::{ascii, fmt, str};
// Escapes a string, represented as a symbol. Reuses the original symbol,
@ -39,7 +38,6 @@ pub enum LitError {
InvalidFloatSuffix,
NonDecimalFloat(u32),
IntTooLarge(u32),
NulInCStr(Range<usize>),
}
impl LitKind {
@ -156,10 +154,7 @@ pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_c_string(s, Mode::CStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
Ok(CStrUnit::Byte(b)) => buf.push(b),
Ok(CStrUnit::Char(c)) => {
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
@ -179,10 +174,7 @@ pub fn from_token_lit(lit: token::Lit) -> Result<LitKind, LitError> {
// can convert the symbol directly to a `Lrc<u8>` on success.
let s = symbol.as_str();
let mut error = Ok(());
unescape_c_string(s, Mode::RawCStr, &mut |span, c| match c {
Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) => {
error = Err(LitError::NulInCStr(span));
}
unescape_c_string(s, Mode::RawCStr, &mut |_, c| match c {
Ok(_) => {}
Err(err) => {
if err.is_fatal() {

View File

@ -59,6 +59,9 @@ pub enum EscapeError {
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
NonAsciiCharInByte,
// `\0` in a C string literal.
NulInCStr,
/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
UnskippedWhitespaceWarning,
@ -122,10 +125,20 @@ pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
{
match mode {
CStr => {
unescape_non_raw_common(src, mode, callback);
unescape_non_raw_common(src, mode, &mut |r, mut result| {
if let Ok(CStrUnit::Byte(0) | CStrUnit::Char('\0')) = result {
result = Err(EscapeError::NulInCStr);
}
callback(r, result)
});
}
RawCStr => {
check_raw_common(src, mode, &mut |r, result| callback(r, result.map(CStrUnit::Char)));
check_raw_common(src, mode, &mut |r, mut result| {
if let Ok('\0') = result {
result = Err(EscapeError::NulInCStr);
}
callback(r, result.map(CStrUnit::Char))
});
}
Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable!(),
}

View File

@ -616,6 +616,8 @@ parse_note_mut_pattern_usage = `mut` may be followed by `variable` and `variable
parse_note_pattern_alternatives_use_single_vert = alternatives in or-patterns are separated with `|`, not `||`
parse_nul_in_c_str = null characters in C string literals are not supported
parse_or_pattern_not_allowed_in_fn_parameters = top-level or-patterns are not allowed in function parameters
parse_or_pattern_not_allowed_in_let_binding = top-level or-patterns are not allowed in `let` bindings
parse_out_of_range_hex_escape = out of range hex escape

View File

@ -2163,6 +2163,11 @@ pub enum UnescapeError {
#[subdiagnostic]
suggestion: MoreThanOneCharSugg,
},
#[diag(parse_nul_in_c_str)]
NulInCStr {
#[primary_span]
span: Span,
},
}
#[derive(Subdiagnostic)]

View File

@ -262,6 +262,9 @@ pub(crate) fn emit_unescape_error(
EscapeError::LoneSlash => {
dcx.emit_err(UnescapeError::LoneSlash(err_span));
}
EscapeError::NulInCStr => {
dcx.emit_err(UnescapeError::NulInCStr { span: err_span });
}
EscapeError::UnskippedWhitespaceWarning => {
let (c, char_span) = last_char();
dcx.emit_warn(UnescapeError::UnskippedWhitespace {

View File

@ -75,8 +75,6 @@ session_not_circumvent_feature = `-Zunleash-the-miri-inside-of-you` may not be u
session_not_supported = not supported
session_nul_in_c_str = null characters in C string literals are not supported
session_octal_float_literal_not_supported = octal float literal is not supported
session_optimization_fuel_exhausted = optimization-fuel-exhausted: {$msg}

View File

@ -6,7 +6,7 @@
error_code, DiagCtxt, DiagnosticBuilder, DiagnosticMessage, IntoDiagnostic, Level, MultiSpan,
};
use rustc_macros::Diagnostic;
use rustc_span::{BytePos, Span, Symbol};
use rustc_span::{Span, Symbol};
use rustc_target::spec::{SplitDebuginfo, StackProtector, TargetTriple};
use crate::parse::ParseSess;
@ -346,13 +346,6 @@ pub(crate) struct BinaryFloatLiteralNotSupported {
pub span: Span,
}
#[derive(Diagnostic)]
#[diag(session_nul_in_c_str)]
pub(crate) struct NulInCStr {
#[primary_span]
pub span: Span,
}
pub fn report_lit_error(sess: &ParseSess, err: LitError, lit: token::Lit, span: Span) {
// Checks if `s` looks like i32 or u1234 etc.
fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
@ -432,12 +425,6 @@ fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option<String> {
};
dcx.emit_err(IntLiteralTooLarge { span, limit });
}
LitError::NulInCStr(range) => {
let lo = BytePos(span.lo().0 + range.start as u32 + 2);
let hi = BytePos(span.lo().0 + range.end as u32 + 2);
let span = span.with_lo(lo).with_hi(hi);
dcx.emit_err(NulInCStr { span });
}
}
}

View File

@ -369,6 +369,7 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
"non-ASCII character in byte string literal"
}
EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
EscapeError::NulInCStr => "null character in C string literal",
EscapeError::UnskippedWhitespaceWarning => "",
EscapeError::MultipleSkippedLinesWarning => "",
}

View File

@ -106,6 +106,9 @@ fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> (&'static str,
EE::NonAsciiCharInByte => {
"Byte literals must not contain non-ASCII characters"
}
EE::NulInCStr => {
"C strings literals must not contain null characters"
}
EE::UnskippedWhitespaceWarning => "Whitespace after this escape is not skipped",
EE::MultipleSkippedLinesWarning => "Multiple lines are skipped by this escape",