Auto merge of #49283 - varkor:combining-chars-escape_debug, r=SimonSapin
Escape combining characters in char::Debug Although combining characters are technically printable, they make little sense to print on their own with `Debug`: it'd be better to escape them like non-printable characters. This is a breaking change, but I imagine the fact `escape_debug` is rare and almost certainly primarily used for debugging that this is an acceptable change. Resolves #41922. r? @alexcrichton cc @clarcharr
This commit is contained in:
commit
65a16c0007
14
.gitignore
vendored
14
.gitignore
vendored
@ -74,13 +74,13 @@ __pycache__/
|
||||
/obj/
|
||||
/rt/
|
||||
/rustllvm/
|
||||
/src/libstd_unicode/DerivedCoreProperties.txt
|
||||
/src/libstd_unicode/DerivedNormalizationProps.txt
|
||||
/src/libstd_unicode/PropList.txt
|
||||
/src/libstd_unicode/ReadMe.txt
|
||||
/src/libstd_unicode/Scripts.txt
|
||||
/src/libstd_unicode/SpecialCasing.txt
|
||||
/src/libstd_unicode/UnicodeData.txt
|
||||
/src/libcore/unicode/DerivedCoreProperties.txt
|
||||
/src/libcore/unicode/DerivedNormalizationProps.txt
|
||||
/src/libcore/unicode/PropList.txt
|
||||
/src/libcore/unicode/ReadMe.txt
|
||||
/src/libcore/unicode/Scripts.txt
|
||||
/src/libcore/unicode/SpecialCasing.txt
|
||||
/src/libcore/unicode/UnicodeData.txt
|
||||
/stage[0-9]+/
|
||||
/target
|
||||
target/
|
||||
|
@ -372,12 +372,21 @@ impl str {
|
||||
|
||||
/// Escapes each char in `s` with [`char::escape_debug`].
|
||||
///
|
||||
/// Note: only extended grapheme codepoints that begin the string will be
|
||||
/// escaped.
|
||||
///
|
||||
/// [`char::escape_debug`]: primitive.char.html#method.escape_debug
|
||||
#[unstable(feature = "str_escape",
|
||||
reason = "return type may change to be an iterator",
|
||||
issue = "27791")]
|
||||
pub fn escape_debug(&self) -> String {
|
||||
self.chars().flat_map(|c| c.escape_debug()).collect()
|
||||
let mut string = String::with_capacity(self.len());
|
||||
let mut chars = self.chars();
|
||||
if let Some(first) = chars.next() {
|
||||
string.extend(first.escape_debug_ext(true))
|
||||
}
|
||||
string.extend(chars.flat_map(|c| c.escape_debug_ext(false)));
|
||||
string
|
||||
}
|
||||
|
||||
/// Escapes each char in `s` with [`char::escape_default`].
|
||||
|
@ -989,6 +989,12 @@ fn test_escape_unicode() {
|
||||
|
||||
#[test]
|
||||
fn test_escape_debug() {
|
||||
// Note that there are subtleties with the number of backslashes
|
||||
// on the left- and right-hand sides. In particular, Unicode code points
|
||||
// are usually escaped with two backslashes on the right-hand side, as
|
||||
// they are escaped. However, when the character is unescaped (e.g. for
|
||||
// printable characters), only a single backslash appears (as the character
|
||||
// itself appears in the debug string).
|
||||
assert_eq!("abc".escape_debug(), "abc");
|
||||
assert_eq!("a c".escape_debug(), "a c");
|
||||
assert_eq!("éèê".escape_debug(), "éèê");
|
||||
@ -999,6 +1005,7 @@ fn test_escape_debug() {
|
||||
assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}");
|
||||
assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}");
|
||||
assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r");
|
||||
assert_eq!("\u{301}a\u{301}bé\u{e000}".escape_debug(), "\\u{301}a\u{301}bé\\u{e000}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -187,6 +187,27 @@ impl char {
|
||||
}
|
||||
}
|
||||
|
||||
/// An extended version of `escape_debug` that optionally permits escaping
|
||||
/// Extended Grapheme codepoints. This allows us to format characters like
|
||||
/// nonspacing marks better when they're at the start of a string.
|
||||
#[doc(hidden)]
|
||||
#[unstable(feature = "str_internals", issue = "0")]
|
||||
#[inline]
|
||||
pub fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug {
|
||||
let init_state = match self {
|
||||
'\t' => EscapeDefaultState::Backslash('t'),
|
||||
'\r' => EscapeDefaultState::Backslash('r'),
|
||||
'\n' => EscapeDefaultState::Backslash('n'),
|
||||
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
|
||||
_ if escape_grapheme_extended && self.is_grapheme_extended() => {
|
||||
EscapeDefaultState::Unicode(self.escape_unicode())
|
||||
}
|
||||
_ if is_printable(self) => EscapeDefaultState::Char(self),
|
||||
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
|
||||
};
|
||||
EscapeDebug(EscapeDefault { state: init_state })
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the literal escape code of a character
|
||||
/// as `char`s.
|
||||
///
|
||||
@ -224,15 +245,7 @@ impl char {
|
||||
#[stable(feature = "char_escape_debug", since = "1.20.0")]
|
||||
#[inline]
|
||||
pub fn escape_debug(self) -> EscapeDebug {
|
||||
let init_state = match self {
|
||||
'\t' => EscapeDefaultState::Backslash('t'),
|
||||
'\r' => EscapeDefaultState::Backslash('r'),
|
||||
'\n' => EscapeDefaultState::Backslash('n'),
|
||||
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
|
||||
c if is_printable(c) => EscapeDefaultState::Char(c),
|
||||
c => EscapeDefaultState::Unicode(c.escape_unicode()),
|
||||
};
|
||||
EscapeDebug(EscapeDefault { state: init_state })
|
||||
self.escape_debug_ext(true)
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields the literal escape code of a character
|
||||
@ -692,6 +705,15 @@ impl char {
|
||||
general_category::Cc(self)
|
||||
}
|
||||
|
||||
/// Returns true if this `char` is an extended grapheme character, and false otherwise.
|
||||
///
|
||||
/// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
|
||||
/// Category `Grapheme_Extend`.
|
||||
#[inline]
|
||||
pub(crate) fn is_grapheme_extended(self) -> bool {
|
||||
derived_property::Grapheme_Extend(self)
|
||||
}
|
||||
|
||||
/// Returns true if this `char` is numeric, and false otherwise.
|
||||
///
|
||||
/// 'Numeric'-ness is defined in terms of the Unicode General Categories
|
||||
|
@ -181,6 +181,7 @@ fn test_escape_debug() {
|
||||
assert_eq!(string('\u{ff}'), "\u{ff}");
|
||||
assert_eq!(string('\u{11b}'), "\u{11b}");
|
||||
assert_eq!(string('\u{1d4b6}'), "\u{1d4b6}");
|
||||
assert_eq!(string('\u{301}'), "\\u{301}"); // combining character
|
||||
assert_eq!(string('\u{200b}'),"\\u{200b}"); // zero width space
|
||||
assert_eq!(string('\u{e000}'), "\\u{e000}"); // private use 1
|
||||
assert_eq!(string('\u{100000}'), "\\u{100000}"); // private use 2
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
|
||||
// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
@ -549,6 +549,127 @@ pub mod derived_property {
|
||||
Cased_table.lookup(c)
|
||||
}
|
||||
|
||||
pub const Grapheme_Extend_table: &super::BoolTrie = &super::BoolTrie {
|
||||
r1: [
|
||||
0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
|
||||
0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
|
||||
0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000,
|
||||
0xffffffffffffffff, 0x0000ffffffffffff, 0x0000000000000000, 0x0000000000000000,
|
||||
0x0000000000000000, 0x0000000000000000, 0x00000000000003f8, 0x0000000000000000,
|
||||
0x0000000000000000, 0x0000000000000000, 0xbffffffffffe0000, 0x00000000000000b6,
|
||||
0x0000000007ff0000, 0x00010000fffff800, 0x0000000000000000, 0x00003d9f9fc00000,
|
||||
0xffff000000020000, 0x00000000000007ff, 0x0001ffc000000000, 0x000ff80000000000
|
||||
],
|
||||
r2: [
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 7, 2, 20, 21,
|
||||
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 32, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 33, 34, 35, 36, 37, 2, 38, 2, 39, 2, 2, 2, 40, 41, 42, 2, 43,
|
||||
44, 45, 46, 47, 2, 2, 48, 2, 2, 2, 49, 2, 2, 2, 2, 2, 2, 2, 2, 50, 2, 2, 51, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 52, 2, 53, 2, 54, 2, 2, 2, 2, 2, 2, 2, 2, 55,
|
||||
2, 56, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 57, 58, 59, 2, 2, 2, 2, 60, 2, 2, 61, 62, 63, 64, 65, 66, 67,
|
||||
68, 69, 2, 2, 2, 70, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 71, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 72, 2, 2, 2, 2, 2, 58, 2
|
||||
],
|
||||
r3: &[
|
||||
0x00003eeffbc00000, 0x000000000e000000, 0x0000000000000000, 0xfffffffbfff00000,
|
||||
0x1400000000000007, 0x0000000c00fe21fe, 0x5000000000000002, 0x0000000c0080201e,
|
||||
0x1000000000000006, 0x0023000000023986, 0xfc00000c000021be, 0xd000000000000002,
|
||||
0x0000000c00c0201e, 0x4000000000000004, 0x0000000000802001, 0xc000000000000001,
|
||||
0x0000000c00603dc1, 0x9000000000000002, 0x0000000c00603044, 0x5800000000000003,
|
||||
0x00000000805c8400, 0x07f2000000000000, 0x0000000000007f80, 0x1bf2000000000000,
|
||||
0x0000000000003f00, 0x02a0000003000000, 0x7ffe000000000000, 0x1ffffffffeffe0df,
|
||||
0x0000000000000040, 0x66fde00000000000, 0x001e0001c3000000, 0x0000000020002064,
|
||||
0x00000000e0000000, 0x001c0000001c0000, 0x000c0000000c0000, 0x3fb0000000000000,
|
||||
0x00000000200ffe40, 0x0000000000003800, 0x0000020000000060, 0x0e04018700000000,
|
||||
0x0000000009800000, 0x9ff81fe57f400000, 0x7fff000000000000, 0x17d000000000000f,
|
||||
0x000ff80000000004, 0x00003b3c00000003, 0x0003a34000000000, 0x00cff00000000000,
|
||||
0x031021fdfff70000, 0xfbffffffffffffff, 0x0000000000001000, 0x0001ffffffff0000,
|
||||
0x0003800000000000, 0x8000000000000000, 0xffffffff00000000, 0x0000fc0000000000,
|
||||
0x0000000006000000, 0x3ff7800000000000, 0x00000000c0000000, 0x0003000000000000,
|
||||
0x0000006000000844, 0x0003ffff00000030, 0x00003fc000000000, 0x000000000003ff80,
|
||||
0x13c8000000000007, 0x0000002000000000, 0x00667e0000000000, 0x1000000000001008,
|
||||
0xc19d000000000000, 0x0040300000000002, 0x0000212000000000, 0x0000000040000000,
|
||||
0x0000ffff0000ffff
|
||||
],
|
||||
r4: [
|
||||
0, 1, 2, 2, 2, 2, 3, 2, 2, 2, 2, 4, 2, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
],
|
||||
r5: &[
|
||||
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 6, 7, 8, 0, 9, 10, 11, 12, 13, 0, 0, 14, 15, 16, 0, 0, 17, 18, 19, 20,
|
||||
0, 0, 21, 22, 23, 24, 25, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 0, 0, 0,
|
||||
0, 0, 30, 0, 31, 0, 32, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 39, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 46, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 48, 0, 0, 48, 48,
|
||||
48, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
],
|
||||
r6: &[
|
||||
0x0000000000000000, 0x2000000000000000, 0x0000000100000000, 0x07c0000000000000,
|
||||
0x870000000000f06e, 0x0000006000000000, 0xff00000000000002, 0x800000000000007f,
|
||||
0x0678000000000003, 0x001fef8000000007, 0x0008000000000000, 0x7fc0000000000003,
|
||||
0x0000000000001c00, 0x40d3800000000000, 0x000007f880000000, 0x5000000000000003,
|
||||
0x001f1fc000800001, 0xff00000000000000, 0x000000000000005c, 0xa5f9000000000000,
|
||||
0x000000000000000d, 0xb03c800000000000, 0x0000000030000001, 0xa7f8000000000000,
|
||||
0x0000000000000001, 0x00bf280000000000, 0x00000fbce0000000, 0x79f800000000067e,
|
||||
0x000000000e7e0080, 0x00000000037ffc00, 0xbf7f000000000000, 0x006dfcfffffc0000,
|
||||
0xb47e000000000000, 0x00000000000000bf, 0x001f000000000000, 0x007f000000000000,
|
||||
0x0000000000078000, 0x0000000060000000, 0xf807c3a000000000, 0x00003c0000000fe7,
|
||||
0x000000000000001c, 0xf87fffffffffffff, 0x00201fffffffffff, 0x0000fffef8000010,
|
||||
0x000007dbf9ffff7f, 0x00000000007f0000, 0x00000000000007f0, 0xffffffff00000000,
|
||||
0xffffffffffffffff, 0x0000ffffffffffff
|
||||
],
|
||||
};
|
||||
|
||||
pub fn Grapheme_Extend(c: char) -> bool {
|
||||
Grapheme_Extend_table.lookup(c)
|
||||
}
|
||||
|
||||
pub const Lowercase_table: &super::BoolTrie = &super::BoolTrie {
|
||||
r1: [
|
||||
0x0000000000000000, 0x07fffffe00000000, 0x0420040000000000, 0xff7fffff80000000,
|
||||
|
@ -21,11 +21,14 @@
|
||||
# - UnicodeData.txt
|
||||
#
|
||||
# Since this should not require frequent updates, we just store this
|
||||
# out-of-line and check the unicode.rs file into git.
|
||||
# out-of-line and check the tables.rs file into git.
|
||||
|
||||
import fileinput, re, os, sys, operator, math
|
||||
import fileinput, re, os, sys, operator, math, datetime
|
||||
|
||||
preamble = '''// Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
|
||||
# The directory in which this file resides.
|
||||
fdir = os.path.dirname(os.path.realpath(__file__)) + "/"
|
||||
|
||||
preamble = '''// Copyright 2012-{year} The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
@ -40,8 +43,8 @@ preamble = '''// Copyright 2012-2016 The Rust Project Developers. See the COPYRI
|
||||
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
|
||||
|
||||
use unicode::version::UnicodeVersion;
|
||||
use unicode::bool_trie::{BoolTrie, SmallBoolTrie};
|
||||
'''
|
||||
use unicode::bool_trie::{{BoolTrie, SmallBoolTrie}};
|
||||
'''.format(year = datetime.datetime.now().year)
|
||||
|
||||
# Mapping taken from Table 12 from:
|
||||
# http://www.unicode.org/reports/tr44/#General_Category_Values
|
||||
@ -61,11 +64,11 @@ expanded_categories = {
|
||||
surrogate_codepoints = (0xd800, 0xdfff)
|
||||
|
||||
def fetch(f):
|
||||
if not os.path.exists(os.path.basename(f)):
|
||||
os.system("curl -O http://www.unicode.org/Public/UNIDATA/%s"
|
||||
% f)
|
||||
path = fdir + os.path.basename(f)
|
||||
if not os.path.exists(path):
|
||||
os.system("curl -o {0}{1} ftp://ftp.unicode.org/Public/UNIDATA/{1}".format(fdir, f))
|
||||
|
||||
if not os.path.exists(os.path.basename(f)):
|
||||
if not os.path.exists(path):
|
||||
sys.stderr.write("cannot load %s" % f)
|
||||
exit(1)
|
||||
|
||||
@ -84,7 +87,7 @@ def load_unicode_data(f):
|
||||
|
||||
udict = {}
|
||||
range_start = -1
|
||||
for line in fileinput.input(f):
|
||||
for line in fileinput.input(fdir + f):
|
||||
data = line.split(';')
|
||||
if len(data) != 15:
|
||||
continue
|
||||
@ -156,7 +159,7 @@ def load_unicode_data(f):
|
||||
|
||||
def load_special_casing(f, to_upper, to_lower, to_title):
|
||||
fetch(f)
|
||||
for line in fileinput.input(f):
|
||||
for line in fileinput.input(fdir + f):
|
||||
data = line.split('#')[0].split(';')
|
||||
if len(data) == 5:
|
||||
code, lower, title, upper, _comment = data
|
||||
@ -243,7 +246,7 @@ def load_properties(f, interestingprops):
|
||||
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
|
||||
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")
|
||||
|
||||
for line in fileinput.input(os.path.basename(f)):
|
||||
for line in fileinput.input(fdir + os.path.basename(f)):
|
||||
prop = None
|
||||
d_lo = 0
|
||||
d_hi = 0
|
||||
@ -456,7 +459,7 @@ def emit_norm_module(f, canon, compat, combine, norm_props):
|
||||
canon_comp_keys = sorted(canon_comp.keys())
|
||||
|
||||
if __name__ == "__main__":
|
||||
r = "tables.rs"
|
||||
r = fdir + "tables.rs"
|
||||
if os.path.exists(r):
|
||||
os.remove(r)
|
||||
with open(r, "w") as rf:
|
||||
@ -465,7 +468,7 @@ if __name__ == "__main__":
|
||||
|
||||
# download and parse all the data
|
||||
fetch("ReadMe.txt")
|
||||
with open("ReadMe.txt") as readme:
|
||||
with open(fdir + "ReadMe.txt") as readme:
|
||||
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
|
||||
unicode_version = re.search(pattern, readme.read()).groups()
|
||||
rf.write("""
|
||||
@ -483,7 +486,7 @@ pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
|
||||
to_upper, to_lower, to_title) = load_unicode_data("UnicodeData.txt")
|
||||
load_special_casing("SpecialCasing.txt", to_upper, to_lower, to_title)
|
||||
want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase",
|
||||
"Cased", "Case_Ignorable"]
|
||||
"Cased", "Case_Ignorable", "Grapheme_Extend"]
|
||||
derived = load_properties("DerivedCoreProperties.txt", want_derived)
|
||||
scripts = load_properties("Scripts.txt", [])
|
||||
props = load_properties("PropList.txt",
|
||||
@ -500,3 +503,4 @@ pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
|
||||
# normalizations and conversions module
|
||||
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
|
||||
emit_conversions_module(rf, to_upper, to_lower, to_title)
|
||||
print("Regenerated tables.rs.")
|
||||
|
Loading…
x
Reference in New Issue
Block a user