Replace ASCII control chars with Unicode Control Pictures

``` error: bare CR not allowed in doc-comment --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32 | LL | /// doc comment with bare CR: '␍' | ^ ```
2024-07-09 16:46:09 +00:00 · 2024-07-09 16:46:09 +00:00 · 89f273f40d
commit 89f273f40d
parent 5753b30676
14 changed files with 81 additions and 31 deletions
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@ -677,10 +677,7 @@ fn draw_line(
            .skip(left)
            .take_while(|ch| {
                // Make sure that the trimming on the right will fall within the terminal width.
-                // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char`
+                let next = char_width(*ch);
                // is. For now, just accept that sometimes the code line will be longer than
                // desired.
                let next = unicode_width::UnicodeWidthChar::width(*ch).unwrap_or(1);
                if taken + next > right - left {
                    return false;
                }
@ -742,11 +739,7 @@ fn render_source_line(
        let left = margin.left(source_string.len());
        // Account for unicode characters of width !=0 that were removed.
-        let left = source_string
+        let left = source_string.chars().take(left).map(|ch| char_width(ch)).sum();
            .chars()
            .take(left)
            .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
            .sum();
        self.draw_line(
            buffer,
@ -2039,7 +2032,7 @@ fn emit_suggestion_default(
                    let sub_len: usize =
                        if is_whitespace_addition { &part.snippet } else { part.snippet.trim() }
                            .chars()
-                            .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
+                            .map(|ch| char_width(ch))
                            .sum();
                    let offset: isize = offsets
@ -2076,11 +2069,8 @@ fn emit_suggestion_default(
                    }
                    // length of the code after substitution
-                    let full_sub_len = part
+                    let full_sub_len =
-                        .snippet
+                        part.snippet.chars().map(|ch| char_width(ch)).sum::<usize>() as isize;
                        .chars()
                        .map(|ch| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1))
                        .sum::<usize>() as isize;
                    // length of the code to be substituted
                    let snippet_len = span_end_pos as isize - span_start_pos as isize;
@ -2580,6 +2570,40 @@ fn num_decimal_digits(num: usize) -> usize {
    ('\u{2068}', ""),
    ('\u{202C}', ""),
    ('\u{2069}', ""),
    // In terminals without Unicode support the following will be garbled, but in *all* terminals
    // the underlying codepoint will be as well. We could gate this replacement behind a "unicode
    // support" gate.
    ('\u{0000}', "␀"),
    ('\u{0001}', "␁"),
    ('\u{0002}', "␂"),
    ('\u{0003}', "␃"),
    ('\u{0004}', "␄"),
    ('\u{0005}', "␅"),
    ('\u{0006}', "␆"),
    ('\u{0007}', "␇"),
    ('\u{0008}', "␈"),
    ('\u{000B}', "␋"),
    ('\u{000C}', "␌"),
    ('\u{000D}', "␍"),
    ('\u{000E}', "␎"),
    ('\u{000F}', "␏"),
    ('\u{0010}', "␐"),
    ('\u{0011}', "␑"),
    ('\u{0012}', "␒"),
    ('\u{0013}', "␓"),
    ('\u{0014}', "␔"),
    ('\u{0015}', "␕"),
    ('\u{0016}', "␖"),
    ('\u{0017}', "␗"),
    ('\u{0018}', "␘"),
    ('\u{0019}', "␙"),
    ('\u{001A}', "␚"),
    ('\u{001B}', "␛"),
    ('\u{001C}', "␜"),
    ('\u{001D}', "␝"),
    ('\u{001E}', "␞"),
    ('\u{001F}', "␟"),
    ('\u{007F}', "␡"),
 ];
 fn normalize_whitespace(str: &str) -> String {
@ -2590,6 +2614,21 @@ fn normalize_whitespace(str: &str) -> String {
    s
 }
 fn char_width(ch: char) -> usize {
    // FIXME: `unicode_width` sometimes disagrees with terminals on how wide a `char` is. For now,
    // just accept that sometimes the code line will be longer than desired.
    match ch {
        '\t' => 4,
        '\u{0000}' | '\u{0001}' | '\u{0002}' | '\u{0003}' | '\u{0004}' | '\u{0005}'
        | '\u{0006}' | '\u{0007}' | '\u{0008}' | '\u{000B}' | '\u{000C}' | '\u{000D}'
        | '\u{000E}' | '\u{000F}' | '\u{0010}' | '\u{0011}' | '\u{0012}' | '\u{0013}'
        | '\u{0014}' | '\u{0015}' | '\u{0016}' | '\u{0017}' | '\u{0018}' | '\u{0019}'
        | '\u{001A}' | '\u{001B}' | '\u{001C}' | '\u{001D}' | '\u{001E}' | '\u{001F}'
        | '\u{007F}' => 1,
        _ => unicode_width::UnicodeWidthChar::width(ch).unwrap_or(1),
    }
 }
 fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) {
    buffer.puts(line, col, "| ", Style::LineNumber);
 }
--- a/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
+++ b/tests/ui/lexer/lex-bare-cr-string-literal-doc-comment.stderr
@ -1,31 +1,31 @@
 error: bare CR not allowed in doc-comment
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:3:32
   |
-LL | /// doc comment with bare CR: '
'
+LL | /// doc comment with bare CR: '␍'
   |                                ^
 error: bare CR not allowed in block doc-comment
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:7:38
   |
-LL | /** block doc comment with bare CR: '
' */
+LL | /** block doc comment with bare CR: '␍' */
   |                                      ^
 error: bare CR not allowed in doc-comment
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:12:36
   |
-LL |     //! doc comment with bare CR: '
'
+LL |     //! doc comment with bare CR: '␍'
   |                                    ^
 error: bare CR not allowed in block doc-comment
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:15:42
   |
-LL |     /*! block doc comment with bare CR: '
' */
+LL |     /*! block doc comment with bare CR: '␍' */
   |                                          ^
 error: bare CR not allowed in string, use `\r` instead
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:19:18
   |
-LL |     let _s = "foo
bar";
+LL |     let _s = "foo␍bar";
   |                  ^
   |
 help: escape the character
@ -36,13 +36,13 @@ LL |     let _s = "foo\rbar";
 error: bare CR not allowed in raw string
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:22:19
   |
-LL |     let _s = r"bar
foo";
+LL |     let _s = r"bar␍foo";
   |                   ^
 error: unknown character escape: `\r`
  --> $DIR/lex-bare-cr-string-literal-doc-comment.rs:25:19
   |
-LL |     let _s = "foo\
bar";
+LL |     let _s = "foo\␍bar";
   |                   ^ unknown character escape
   |
   = help: this is an isolated carriage return; consider checking your editor and version control settings
--- a/tests/ui/parser/bad-char-literals.rs
+++ b/tests/ui/parser/bad-char-literals.rs
--- a/tests/ui/parser/bad-char-literals.stderr
+++ b/tests/ui/parser/bad-char-literals.stderr
@ -25,7 +25,7 @@ LL |     '\n';
 error: character constant must be escaped: `\r`
  --> $DIR/bad-char-literals.rs:15:6
   |
-LL |     '
';
+LL |     '␍';
   |      ^
   |
 help: escape the character
@ -33,8 +33,19 @@ help: escape the character
 LL |     '\r';
   |      ++
 error: character literal may only contain one codepoint
  --> $DIR/bad-char-literals.rs:18:5
   |
 LL |     '-␀-';
   |     ^^^^
   |
 help: if you meant to write a string literal, use double quotes
   |
 LL |     "-␀-";
   |     ~  ~
 error: character constant must be escaped: `\t`
-  --> $DIR/bad-char-literals.rs:18:6
+  --> $DIR/bad-char-literals.rs:21:6
   |
 LL |     '    ';
   |      ^^^^
@ -44,5 +55,5 @@ help: escape the character
 LL |     '\t';
   |      ++
-error: aborting due to 4 previous errors
+error: aborting due to 5 previous errors
--- a/tests/ui/parser/issues/issue-66473.stderr
+++ b/tests/ui/parser/issues/issue-66473.stderr
--- a/tests/ui/parser/issues/issue-68629.stderr
+++ b/tests/ui/parser/issues/issue-68629.stderr
--- a/tests/ui/parser/issues/issue-68730.stderr
+++ b/tests/ui/parser/issues/issue-68730.stderr
--- a/tests/ui/parser/raw/raw-byte-string-literals.stderr
+++ b/tests/ui/parser/raw/raw-byte-string-literals.stderr
@ -1,7 +1,7 @@
 error: bare CR not allowed in raw string
  --> $DIR/raw-byte-string-literals.rs:4:9
   |
-LL |     br"a
";
+LL |     br"a␍";
   |         ^
 error: non-ASCII character in raw byte string literal
--- a/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr
+++ b/tests/ui/parser/several-carriage-returns-in-doc-comment.stderr
@ -1,19 +1,19 @@
 error: bare CR not allowed in doc-comment
  --> $DIR/several-carriage-returns-in-doc-comment.rs:6:12
   |
-LL | /// This do
c comment contains 
three isolated `\r`
 symbols
+LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols
   |            ^
 error: bare CR not allowed in doc-comment
  --> $DIR/several-carriage-returns-in-doc-comment.rs:6:32
   |
-LL | /// This do
c comment contains 
three isolated `\r`
 symbols
+LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols
   |                               ^
 error: bare CR not allowed in doc-comment
  --> $DIR/several-carriage-returns-in-doc-comment.rs:6:52
   |
-LL | /// This do
c comment contains 
three isolated `\r`
 symbols
+LL | /// This do␍c comment contains ␍three isolated `\r`␍ symbols
   |                                                  ^
 error: aborting due to 3 previous errors
--- a/tests/ui/parser/trailing-carriage-return-in-string.stderr
+++ b/tests/ui/parser/trailing-carriage-return-in-string.stderr
@ -1,7 +1,7 @@
 error: unknown character escape: `\r`
  --> $DIR/trailing-carriage-return-in-string.rs:10:25
   |
-LL |     let bad = "This is \
 a test";
+LL |     let bad = "This is \␍ a test";
   |                         ^ unknown character escape
   |
   = help: this is an isolated carriage return; consider checking your editor and version control settings
--- a/tests/ui/parser/utf16-be-without-bom.stderr
+++ b/tests/ui/parser/utf16-be-without-bom.stderr
--- a/tests/ui/parser/utf16-le-without-bom.stderr
+++ b/tests/ui/parser/utf16-le-without-bom.stderr
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
--- a/tests/ui/str/str-escape.stderr
+++ b/tests/ui/str/str-escape.stderr
@ -22,7 +22,7 @@ warning: whitespace symbol '\u{c}' is not skipped
   |
 LL |       let s = b"a\
   |  ________________^
-LL | |     b";
+LL | |     ␌b";
   | |    ^- whitespace symbol '\u{c}' is not skipped
   | |____|
   |