diff --git a/src/Cargo.lock b/src/Cargo.lock index 00f556bf0b2..9cce13d84cc 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -1790,6 +1790,7 @@ dependencies = [ "rustc_data_structures 0.0.0", "serialize 0.0.0", "syntax_pos 0.0.0", + "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/src/librustc_errors/Cargo.toml b/src/librustc_errors/Cargo.toml index c72e9dd0ea3..3e15af7558d 100644 --- a/src/librustc_errors/Cargo.toml +++ b/src/librustc_errors/Cargo.toml @@ -12,3 +12,4 @@ crate-type = ["dylib"] serialize = { path = "../libserialize" } syntax_pos = { path = "../libsyntax_pos" } rustc_data_structures = { path = "../librustc_data_structures" } +unicode-width = "0.1.4" diff --git a/src/librustc_errors/emitter.rs b/src/librustc_errors/emitter.rs index 6bba6fbc295..af556c576c0 100644 --- a/src/librustc_errors/emitter.rs +++ b/src/librustc_errors/emitter.rs @@ -23,6 +23,7 @@ use std::rc::Rc; use term; use std::collections::HashMap; use std::cmp::min; +use unicode_width; /// Emitter trait for emitting errors. pub trait Emitter { @@ -1182,7 +1183,10 @@ impl EmitterWriter { if show_underline { draw_col_separator(&mut buffer, row_num, max_line_num_len + 1); let start = parts[0].snippet.len() - parts[0].snippet.trim_left().len(); - let sub_len = parts[0].snippet.trim().len(); + // account for substitutions containing unicode characters + let sub_len = parts[0].snippet.trim().chars().fold(0, |acc, ch| { + acc + unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) + }); let underline_start = span_start_pos.col.0 + start; let underline_end = span_start_pos.col.0 + start + sub_len; for p in underline_start..underline_end { diff --git a/src/librustc_errors/lib.rs b/src/librustc_errors/lib.rs index 605cfc5ed12..840346c447b 100644 --- a/src/librustc_errors/lib.rs +++ b/src/librustc_errors/lib.rs @@ -26,6 +26,7 @@ extern crate libc; extern crate rustc_data_structures; extern crate serialize as rustc_serialize; extern crate syntax_pos; +extern crate unicode_width; pub use emitter::ColorConfig; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 798dfc6d209..1e84fb98a66 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1306,8 +1306,34 @@ impl<'a> StringReader<'a> { '\''); if !self.ch_is('\'') { + let pos = self.pos; + loop { + self.bump(); + if self.ch_is('\'') { + let start = self.byte_offset(start).to_usize(); + let end = self.byte_offset(self.pos).to_usize(); + self.bump(); + let span = self.mk_sp(start_with_quote, self.pos); + self.sess.span_diagnostic + .struct_span_err(span, + "character literal may only contain one codepoint") + .span_suggestion(span, + "if you meant to write a `str` literal, \ + use double quotes", + format!("\"{}\"", + &self.source_text[start..end])) + .emit(); + return Ok(token::Literal(token::Str_(Symbol::intern("??")), None)) + } + if self.ch_is('\n') || self.is_eof() || self.ch_is('/') { + // Only attempt to infer single line string literals. If we encounter + // a slash, bail out in order to avoid nonsensical suggestion when + // involving comments. + break; + } + } panic!(self.fatal_span_verbose( - start_with_quote, self.pos, + start_with_quote, pos, String::from("character literal may only contain one codepoint"))); } diff --git a/src/test/parse-fail/lex-bad-char-literals-3.rs b/src/test/parse-fail/lex-bad-char-literals-3.rs index 92432dc8b63..464e75ec582 100644 --- a/src/test/parse-fail/lex-bad-char-literals-3.rs +++ b/src/test/parse-fail/lex-bad-char-literals-3.rs @@ -12,5 +12,5 @@ // This test needs to the last one appearing in this file as it kills the parser static c: char = - '●●' //~ ERROR: character literal may only contain one codepoint: '● + '●●' //~ ERROR: character literal may only contain one codepoint ; diff --git a/src/test/parse-fail/lex-bad-char-literals-5.rs b/src/test/parse-fail/lex-bad-char-literals-5.rs index 5259175b186..aa166881d89 100644 --- a/src/test/parse-fail/lex-bad-char-literals-5.rs +++ b/src/test/parse-fail/lex-bad-char-literals-5.rs @@ -12,5 +12,5 @@ // // This test needs to the last one appearing in this file as it kills the parser static c: char = - '\x10\x10' //~ ERROR: character literal may only contain one codepoint: '\x10 + '\x10\x10' //~ ERROR: character literal may only contain one codepoint ; diff --git a/src/test/ui/suggestions/str-as-char.rs b/src/test/ui/suggestions/str-as-char.rs new file mode 100644 index 00000000000..09aca61147d --- /dev/null +++ b/src/test/ui/suggestions/str-as-char.rs @@ -0,0 +1,14 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +fn main() { + println!('●●'); + //~^ ERROR character literal may only contain one codepoint +} diff --git a/src/test/ui/suggestions/str-as-char.stderr b/src/test/ui/suggestions/str-as-char.stderr new file mode 100644 index 00000000000..bf975053ffa --- /dev/null +++ b/src/test/ui/suggestions/str-as-char.stderr @@ -0,0 +1,12 @@ +error: character literal may only contain one codepoint + --> $DIR/str-as-char.rs:12:14 + | +12 | println!('●●'); + | ^^^^ +help: if you meant to write a `str` literal, use double quotes + | +12 | println!("●●"); + | ^^^^ + +error: aborting due to previous error +