From fcd850fc5db2501d14b2e0cbfac8aa890d700e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Thu, 9 Jan 2020 22:10:18 -0800 Subject: [PATCH 1/5] Do not ICE on unicode next point Use `shrink_to_hi` instead of `next_point` Fix #68000. --- src/librustc_parse/parser/item.rs | 2 +- ...e-68000-unicode-ident-after-missing-comma.rs | 6 ++++++ ...000-unicode-ident-after-missing-comma.stderr | 17 +++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.rs create mode 100644 src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.stderr diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index 918e826fc26..4cd3540ea68 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -1489,7 +1489,7 @@ impl<'a> Parser<'a> { } } _ => { - let sp = self.sess.source_map().next_point(self.prev_span); + let sp = self.prev_span.shrink_to_hi(); let mut err = self.struct_span_err( sp, &format!("expected `,`, or `}}`, found {}", super::token_descr(&self.token)), diff --git a/src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.rs b/src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.rs new file mode 100644 index 00000000000..3c49a5a9752 --- /dev/null +++ b/src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.rs @@ -0,0 +1,6 @@ +pub struct Foo { + pub bar: Vecö + //~^ ERROR expected `,`, or `}`, found `ö` +} //~ ERROR expected `:`, found `}` + +fn main() {} diff --git a/src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.stderr b/src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.stderr new file mode 100644 index 00000000000..ef365a61643 --- /dev/null +++ b/src/test/ui/issues/issue-68000-unicode-ident-after-missing-comma.stderr @@ -0,0 +1,17 @@ +error: expected `,`, or `}`, found `ö` + --> $DIR/issue-68000-unicode-ident-after-missing-comma.rs:2:22 + | +LL | pub bar: Vecö + | ^ help: try adding a comma: `,` + +error: expected `:`, found `}` + --> $DIR/issue-68000-unicode-ident-after-missing-comma.rs:4:1 + | +LL | pub bar: Vecö + | - expected `:` +LL | +LL | } + | ^ unexpected token + +error: aborting due to 2 previous errors + From 3250057da983fa4d5bfd0799adaa41cb038f0e25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Fri, 10 Jan 2020 11:02:47 -0800 Subject: [PATCH 2/5] Fix `next_point` to be unicode aware --- src/librustc_span/source_map.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc_span/source_map.rs b/src/librustc_span/source_map.rs index ec1f9f3a7bd..fb5fcf4a830 100644 --- a/src/librustc_span/source_map.rs +++ b/src/librustc_span/source_map.rs @@ -710,7 +710,7 @@ impl SourceMap { pub fn next_point(&self, sp: Span) -> Span { let start_of_next_point = sp.hi().0; - let width = self.find_width_of_character_at_span(sp, true); + let width = self.find_width_of_character_at_span(sp.shrink_to_hi(), true); // If the width is 1, then the next span should point to the same `lo` and `hi`. However, // in the case of a multibyte character, where the width != 1, the next span should // span multiple bytes to include the whole character. From d558f6a570a782cd1c2e54de790f4f968b0de5f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Fri, 10 Jan 2020 11:03:26 -0800 Subject: [PATCH 3/5] Fix invalid bounding box --- src/librustc_errors/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc_errors/lib.rs b/src/librustc_errors/lib.rs index 99a6d6f8ec2..e24e8719133 100644 --- a/src/librustc_errors/lib.rs +++ b/src/librustc_errors/lib.rs @@ -182,7 +182,7 @@ impl CodeSuggestion { // Find the bounding span. let lo = substitution.parts.iter().map(|part| part.span.lo()).min().unwrap(); - let hi = substitution.parts.iter().map(|part| part.span.hi()).min().unwrap(); + let hi = substitution.parts.iter().map(|part| part.span.hi()).max().unwrap(); let bounding_span = Span::with_root_ctxt(lo, hi); let lines = cm.span_to_lines(bounding_span).unwrap(); assert!(!lines.lines.is_empty()); From b93ef68245807bac97cd17ea9eaa13169380d815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Fri, 10 Jan 2020 11:22:33 -0800 Subject: [PATCH 4/5] Change `next_point` when `shrink_to_hi` is more appropriate --- src/librustc_builtin_macros/assert.rs | 2 +- src/librustc_expand/mbe/macro_parser.rs | 2 +- src/librustc_parse/parser/diagnostics.rs | 11 ++++------- src/librustc_parse/parser/expr.rs | 2 +- src/librustc_parse/parser/item.rs | 4 ++-- src/librustc_parse/parser/mod.rs | 2 +- src/librustc_typeck/check/callee.rs | 5 ++--- src/librustc_typeck/check/mod.rs | 3 +-- 8 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/librustc_builtin_macros/assert.rs b/src/librustc_builtin_macros/assert.rs index 9043db4742b..b76b496666e 100644 --- a/src/librustc_builtin_macros/assert.rs +++ b/src/librustc_builtin_macros/assert.rs @@ -106,7 +106,7 @@ fn parse_assert<'a>( let custom_message = if let token::Literal(token::Lit { kind: token::Str, .. }) = parser.token.kind { let mut err = cx.struct_span_warn(parser.token.span, "unexpected string literal"); - let comma_span = cx.source_map().next_point(parser.prev_span); + let comma_span = parser.prev_span.shrink_to_hi(); err.span_suggestion_short( comma_span, "try adding a comma", diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index c0e34a30c54..d0aae300b1c 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -696,7 +696,7 @@ pub(super) fn parse( if parser.token.span.is_dummy() { parser.token.span } else { - sess.source_map().next_point(parser.token.span) + parser.token.span.shrink_to_hi() }, ), "missing tokens in macro arguments", diff --git a/src/librustc_parse/parser/diagnostics.rs b/src/librustc_parse/parser/diagnostics.rs index 9abfbc698c5..b9658b33d0f 100644 --- a/src/librustc_parse/parser/diagnostics.rs +++ b/src/librustc_parse/parser/diagnostics.rs @@ -265,10 +265,7 @@ impl<'a> Parser<'a> { }; ( format!("expected one of {}, found {}", expect, actual), - ( - self.sess.source_map().next_point(self.prev_span), - format!("expected one of {}", short_expect), - ), + (self.prev_span.shrink_to_hi(), format!("expected one of {}", short_expect)), ) } else if expected.is_empty() { ( @@ -278,7 +275,7 @@ impl<'a> Parser<'a> { } else { ( format!("expected {}, found {}", expect, actual), - (self.sess.source_map().next_point(self.prev_span), format!("expected {}", expect)), + (self.prev_span.shrink_to_hi(), format!("expected {}", expect)), ) }; self.last_unexpected_token_span = Some(self.token.span); @@ -809,7 +806,7 @@ impl<'a> Parser<'a> { _ if self.prev_span == DUMMY_SP => (self.token.span, self.token.span), // EOF, don't want to point at the following char, but rather the last token. (token::Eof, None) => (self.prev_span, self.token.span), - _ => (self.sess.source_map().next_point(self.prev_span), self.token.span), + _ => (self.prev_span.shrink_to_hi(), self.token.span), }; let msg = format!( "expected `{}`, found {}", @@ -1132,7 +1129,7 @@ impl<'a> Parser<'a> { err.span_label(sp, "unclosed delimiter"); } err.span_suggestion_short( - self.sess.source_map().next_point(self.prev_span), + self.prev_span.shrink_to_hi(), &format!("{} may belong here", delim.to_string()), delim.to_string(), Applicability::MaybeIncorrect, diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index 90f15375aec..f02ea117c06 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -1646,7 +1646,7 @@ impl<'a> Parser<'a> { // | | // | parsed until here as `"y" & X` err.span_suggestion_short( - cm.next_point(arm_start_span), + arm_start_span.shrink_to_hi(), "missing a comma here to end this `match` arm", ",".to_owned(), Applicability::MachineApplicable, diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index 4cd3540ea68..fdc01dc930d 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -1628,7 +1628,7 @@ impl<'a> Parser<'a> { // it's safe to peel off one character only when it has the close delim self.prev_span.with_lo(self.prev_span.hi() - BytePos(1)) } else { - self.sess.source_map().next_point(self.prev_span) + self.prev_span.shrink_to_hi() }; self.struct_span_err( @@ -1644,7 +1644,7 @@ impl<'a> Parser<'a> { Applicability::MaybeIncorrect, ) .span_suggestion( - self.sess.source_map().next_point(self.prev_span), + self.prev_span.shrink_to_hi(), "add a semicolon", ';'.to_string(), Applicability::MaybeIncorrect, diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 8d695eda98d..a1035d320b3 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -765,7 +765,7 @@ impl<'a> Parser<'a> { break; } Err(mut expect_err) => { - let sp = self.sess.source_map().next_point(self.prev_span); + let sp = self.prev_span.shrink_to_hi(); let token_str = pprust::token_kind_to_string(t); // Attempt to keep parsing if it was a similar separator. diff --git a/src/librustc_typeck/check/callee.rs b/src/librustc_typeck/check/callee.rs index a1915bc025f..918542e7da5 100644 --- a/src/librustc_typeck/check/callee.rs +++ b/src/librustc_typeck/check/callee.rs @@ -242,7 +242,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { ) = (parent_node, callee_node) { let start = sp.shrink_to_lo(); - let end = self.tcx.sess.source_map().next_point(callee_span); + let end = callee_span.shrink_to_hi(); err.multipart_suggestion( "if you meant to create this closure and immediately call it, surround the \ closure with parenthesis", @@ -319,9 +319,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { let call_is_multiline = self.tcx.sess.source_map().is_multiline(call_expr.span); if call_is_multiline { - let span = self.tcx.sess.source_map().next_point(callee.span); err.span_suggestion( - span, + callee.span.shrink_to_hi(), "try adding a semicolon", ";".to_owned(), Applicability::MaybeIncorrect, diff --git a/src/librustc_typeck/check/mod.rs b/src/librustc_typeck/check/mod.rs index f10edc1a468..94c9a704c65 100644 --- a/src/librustc_typeck/check/mod.rs +++ b/src/librustc_typeck/check/mod.rs @@ -4953,9 +4953,8 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { | ExprKind::Loop(..) | ExprKind::Match(..) | ExprKind::Block(..) => { - let sp = self.tcx.sess.source_map().next_point(cause_span); err.span_suggestion( - sp, + cause_span.shrink_to_hi(), "try adding a semicolon", ";".to_string(), Applicability::MachineApplicable, From f6e9fd037a7b55f8f4fe78694b77d9788b18dfeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Fri, 10 Jan 2020 11:22:57 -0800 Subject: [PATCH 5/5] Add ICE regression tests --- .../issue-68091-unicode-ident-after-if.rs | 9 +++++++++ .../issue-68091-unicode-ident-after-if.stderr | 18 ++++++++++++++++++ ...8092-unicode-ident-after-incomplete-expr.rs | 9 +++++++++ ...-unicode-ident-after-incomplete-expr.stderr | 8 ++++++++ 4 files changed, 44 insertions(+) create mode 100644 src/test/ui/issues/issue-68091-unicode-ident-after-if.rs create mode 100644 src/test/ui/issues/issue-68091-unicode-ident-after-if.stderr create mode 100644 src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.rs create mode 100644 src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr diff --git a/src/test/ui/issues/issue-68091-unicode-ident-after-if.rs b/src/test/ui/issues/issue-68091-unicode-ident-after-if.rs new file mode 100644 index 00000000000..00f90cc73b3 --- /dev/null +++ b/src/test/ui/issues/issue-68091-unicode-ident-after-if.rs @@ -0,0 +1,9 @@ +macro_rules! x { + ($($c:tt)*) => { + $($c)ö* {} //~ ERROR missing condition for `if` expression + }; //~| ERROR mismatched types +} + +fn main() { + x!(if); +} diff --git a/src/test/ui/issues/issue-68091-unicode-ident-after-if.stderr b/src/test/ui/issues/issue-68091-unicode-ident-after-if.stderr new file mode 100644 index 00000000000..8d1a03ac207 --- /dev/null +++ b/src/test/ui/issues/issue-68091-unicode-ident-after-if.stderr @@ -0,0 +1,18 @@ +error: missing condition for `if` expression + --> $DIR/issue-68091-unicode-ident-after-if.rs:3:14 + | +LL | $($c)ö* {} + | ^ expected if condition here + +error[E0308]: mismatched types + --> $DIR/issue-68091-unicode-ident-after-if.rs:3:17 + | +LL | $($c)ö* {} + | ^^ expected `bool`, found `()` +... +LL | x!(if); + | ------- in this macro invocation + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0308`. diff --git a/src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.rs b/src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.rs new file mode 100644 index 00000000000..1a90b4724d4 --- /dev/null +++ b/src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.rs @@ -0,0 +1,9 @@ +macro_rules! x { + ($($c:tt)*) => { + $($c)ö* //~ ERROR macro expansion ends with an incomplete expression: expected expression + }; +} + +fn main() { + x!(!); +} diff --git a/src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr b/src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr new file mode 100644 index 00000000000..0b9c364f1f1 --- /dev/null +++ b/src/test/ui/issues/issue-68092-unicode-ident-after-incomplete-expr.stderr @@ -0,0 +1,8 @@ +error: macro expansion ends with an incomplete expression: expected expression + --> $DIR/issue-68092-unicode-ident-after-incomplete-expr.rs:3:14 + | +LL | $($c)ö* + | ^ expected expression + +error: aborting due to previous error +