Remove superfluous escaping from byte, byte str, and c str literals

This commit is contained in:
David Tolnay 2024-04-10 20:55:59 -07:00
parent 2cc0284905
commit 7ddc89e893
No known key found for this signature in database
GPG Key ID: F9BA143B95FF6D82
3 changed files with 101 additions and 21 deletions

View File

@ -0,0 +1,57 @@
#[derive(Copy, Clone)]
pub(crate) struct EscapeOptions {
/// Produce \'.
pub escape_single_quote: bool,
/// Produce \".
pub escape_double_quote: bool,
/// Produce \x escapes for non-ASCII, and use \x rather than \u for ASCII
/// control characters.
pub escape_nonascii: bool,
}
pub(crate) fn escape_bytes(bytes: &[u8], opt: EscapeOptions) -> String {
let mut repr = String::new();
if opt.escape_nonascii {
for &byte in bytes {
escape_single_byte(byte, opt, &mut repr);
}
} else {
let mut chunks = bytes.utf8_chunks();
while let Some(chunk) = chunks.next() {
for ch in chunk.valid().chars() {
escape_single_char(ch, opt, &mut repr);
}
for &byte in chunk.invalid() {
escape_single_byte(byte, opt, &mut repr);
}
}
}
repr
}
fn escape_single_byte(byte: u8, opt: EscapeOptions, repr: &mut String) {
if byte == b'\0' {
repr.push_str("\\0");
} else if (byte == b'\'' && !opt.escape_single_quote)
|| (byte == b'"' && !opt.escape_double_quote)
{
repr.push(byte as char);
} else {
// Escapes \t, \r, \n, \\, \', \", and uses \x## for non-ASCII and
// for ASCII control characters.
repr.extend(byte.escape_ascii().map(char::from));
}
}
fn escape_single_char(ch: char, opt: EscapeOptions, repr: &mut String) {
if (ch == '\'' && !opt.escape_single_quote) || (ch == '"' && !opt.escape_double_quote) {
repr.push(ch);
} else {
// Escapes \0, \t, \r, \n, \\, \', \", and uses \u{...} for
// non-printable characters and for Grapheme_Extend characters, which
// includes things like U+0300 "Combining Grave Accent".
repr.extend(ch.escape_debug());
}
}

View File

@ -43,10 +43,12 @@
pub mod bridge;
mod diagnostic;
mod escape;
#[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
pub use diagnostic::{Diagnostic, Level, MultiSpan};
use crate::escape::{escape_bytes, EscapeOptions};
use std::ffi::CStr;
use std::ops::{Range, RangeBounds};
use std::path::PathBuf;
@ -1356,40 +1358,61 @@ pub fn f64_suffixed(n: f64) -> Literal {
/// String literal.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn string(string: &str) -> Literal {
let quoted = format!("{:?}", string);
assert!(quoted.starts_with('"') && quoted.ends_with('"'));
let symbol = &quoted[1..quoted.len() - 1];
Literal::new(bridge::LitKind::Str, symbol, None)
let escape = EscapeOptions {
escape_single_quote: false,
escape_double_quote: true,
escape_nonascii: false,
};
let repr = escape_bytes(string.as_bytes(), escape);
Literal::new(bridge::LitKind::Str, &repr, None)
}
/// Character literal.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn character(ch: char) -> Literal {
let quoted = format!("{:?}", ch);
assert!(quoted.starts_with('\'') && quoted.ends_with('\''));
let symbol = &quoted[1..quoted.len() - 1];
Literal::new(bridge::LitKind::Char, symbol, None)
let escape = EscapeOptions {
escape_single_quote: true,
escape_double_quote: false,
escape_nonascii: false,
};
let repr = escape_bytes(ch.encode_utf8(&mut [0u8; 4]).as_bytes(), escape);
Literal::new(bridge::LitKind::Char, &repr, None)
}
/// Byte character literal.
#[stable(feature = "proc_macro_byte_character", since = "1.79.0")]
pub fn byte_character(byte: u8) -> Literal {
let string = [byte].escape_ascii().to_string();
Literal::new(bridge::LitKind::Byte, &string, None)
let escape = EscapeOptions {
escape_single_quote: true,
escape_double_quote: false,
escape_nonascii: true,
};
let repr = escape_bytes(&[byte], escape);
Literal::new(bridge::LitKind::Byte, &repr, None)
}
/// Byte string literal.
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
pub fn byte_string(bytes: &[u8]) -> Literal {
let string = bytes.escape_ascii().to_string();
Literal::new(bridge::LitKind::ByteStr, &string, None)
let escape = EscapeOptions {
escape_single_quote: false,
escape_double_quote: true,
escape_nonascii: true,
};
let repr = escape_bytes(bytes, escape);
Literal::new(bridge::LitKind::ByteStr, &repr, None)
}
/// C string literal.
#[stable(feature = "proc_macro_c_str_literals", since = "1.79.0")]
pub fn c_string(string: &CStr) -> Literal {
let string = string.to_bytes().escape_ascii().to_string();
Literal::new(bridge::LitKind::CStr, &string, None)
let escape = EscapeOptions {
escape_single_quote: false,
escape_double_quote: true,
escape_nonascii: false,
};
let repr = escape_bytes(string.to_bytes(), escape);
Literal::new(bridge::LitKind::CStr, &repr, None)
}
/// Returns the span encompassing this literal.

View File

@ -29,17 +29,17 @@ fn test_display_literal() {
assert_eq!(Literal::byte_string(b"aA").to_string(), r#" b"aA" "#.trim());
assert_eq!(Literal::byte_string(b"\t").to_string(), r#" b"\t" "#.trim());
assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"\'" "#.trim());
assert_eq!(Literal::byte_string(b"'").to_string(), r#" b"'" "#.trim());
assert_eq!(Literal::byte_string(b"\"").to_string(), r#" b"\"" "#.trim());
assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\x00" "#.trim());
assert_eq!(Literal::byte_string(b"\0").to_string(), r#" b"\0" "#.trim());
assert_eq!(Literal::byte_string(b"\x01").to_string(), r#" b"\x01" "#.trim());
assert_eq!(Literal::c_string(c"aA").to_string(), r#" c"aA" "#.trim());
assert_eq!(Literal::c_string(c"\t").to_string(), r#" c"\t" "#.trim());
assert_eq!(Literal::c_string(c"").to_string(), r#" c"\xe2\x9d\xa4" "#.trim());
assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"\'" "#.trim());
assert_eq!(Literal::c_string(c"").to_string(), r#" c"" "#.trim());
assert_eq!(Literal::c_string(c"\'").to_string(), r#" c"'" "#.trim());
assert_eq!(Literal::c_string(c"\"").to_string(), r#" c"\"" "#.trim());
assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\x7f\xff\xfe\xcc\xb3" "#.trim());
assert_eq!(Literal::c_string(c"\x7f\xff\xfe\u{333}").to_string(), r#" c"\u{7f}\xff\xfe\u{333}" "#.trim());
assert_eq!(Literal::character('a').to_string(), r#" 'a' "#.trim());
assert_eq!(Literal::character('\t').to_string(), r#" '\t' "#.trim());
@ -52,8 +52,8 @@ fn test_display_literal() {
assert_eq!(Literal::byte_character(b'a').to_string(), r#" b'a' "#.trim());
assert_eq!(Literal::byte_character(b'\t').to_string(), r#" b'\t' "#.trim());
assert_eq!(Literal::byte_character(b'\'').to_string(), r#" b'\'' "#.trim());
assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'\"' "#.trim());
assert_eq!(Literal::byte_character(0).to_string(), r#" b'\x00' "#.trim());
assert_eq!(Literal::byte_character(b'"').to_string(), r#" b'"' "#.trim());
assert_eq!(Literal::byte_character(0).to_string(), r#" b'\0' "#.trim());
assert_eq!(Literal::byte_character(1).to_string(), r#" b'\x01' "#.trim());
}