From 8a4eb9e3a87b1fb9a5078f6f45cf62e2f9f8bc2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Gammels=C3=A6ter?= Date: Wed, 15 Mar 2023 17:23:48 +0100 Subject: [PATCH] Skip serializing ascii chars in case LUTs Since ascii chars are already handled by a special case in the `to_lower` and `to_upper` functions, there's no need to waste space on them in the LUTs. --- library/core/src/unicode/unicode_data.rs | 26 ------------------- .../src/case_mapping.rs | 25 ++++++++---------- 2 files changed, 11 insertions(+), 40 deletions(-) diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index bd69ca520c2..aac706f3173 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -606,19 +606,6 @@ pub mod conversions { table.binary_search_by(|&(key, _)| key.cmp(&c)).ok() } static LOWERCASE_TABLE: &[(char, [char; 3])] = &[ - ('A', ['a', '\u{0}', '\u{0}']), ('B', ['b', '\u{0}', '\u{0}']), - ('C', ['c', '\u{0}', '\u{0}']), ('D', ['d', '\u{0}', '\u{0}']), - ('E', ['e', '\u{0}', '\u{0}']), ('F', ['f', '\u{0}', '\u{0}']), - ('G', ['g', '\u{0}', '\u{0}']), ('H', ['h', '\u{0}', '\u{0}']), - ('I', ['i', '\u{0}', '\u{0}']), ('J', ['j', '\u{0}', '\u{0}']), - ('K', ['k', '\u{0}', '\u{0}']), ('L', ['l', '\u{0}', '\u{0}']), - ('M', ['m', '\u{0}', '\u{0}']), ('N', ['n', '\u{0}', '\u{0}']), - ('O', ['o', '\u{0}', '\u{0}']), ('P', ['p', '\u{0}', '\u{0}']), - ('Q', ['q', '\u{0}', '\u{0}']), ('R', ['r', '\u{0}', '\u{0}']), - ('S', ['s', '\u{0}', '\u{0}']), ('T', ['t', '\u{0}', '\u{0}']), - ('U', ['u', '\u{0}', '\u{0}']), ('V', ['v', '\u{0}', '\u{0}']), - ('W', ['w', '\u{0}', '\u{0}']), ('X', ['x', '\u{0}', '\u{0}']), - ('Y', ['y', '\u{0}', '\u{0}']), ('Z', ['z', '\u{0}', '\u{0}']), ('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']), ('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']), ('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']), @@ -1456,19 +1443,6 @@ pub mod conversions { ]; static UPPERCASE_TABLE: &[(char, [char; 3])] = &[ - ('a', ['A', '\u{0}', '\u{0}']), ('b', ['B', '\u{0}', '\u{0}']), - ('c', ['C', '\u{0}', '\u{0}']), ('d', ['D', '\u{0}', '\u{0}']), - ('e', ['E', '\u{0}', '\u{0}']), ('f', ['F', '\u{0}', '\u{0}']), - ('g', ['G', '\u{0}', '\u{0}']), ('h', ['H', '\u{0}', '\u{0}']), - ('i', ['I', '\u{0}', '\u{0}']), ('j', ['J', '\u{0}', '\u{0}']), - ('k', ['K', '\u{0}', '\u{0}']), ('l', ['L', '\u{0}', '\u{0}']), - ('m', ['M', '\u{0}', '\u{0}']), ('n', ['N', '\u{0}', '\u{0}']), - ('o', ['O', '\u{0}', '\u{0}']), ('p', ['P', '\u{0}', '\u{0}']), - ('q', ['Q', '\u{0}', '\u{0}']), ('r', ['R', '\u{0}', '\u{0}']), - ('s', ['S', '\u{0}', '\u{0}']), ('t', ['T', '\u{0}', '\u{0}']), - ('u', ['U', '\u{0}', '\u{0}']), ('v', ['V', '\u{0}', '\u{0}']), - ('w', ['W', '\u{0}', '\u{0}']), ('x', ['X', '\u{0}', '\u{0}']), - ('y', ['Y', '\u{0}', '\u{0}']), ('z', ['Z', '\u{0}', '\u{0}']), ('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['S', 'S', '\u{0}']), ('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']), ('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']), diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs index 992aac1f857..b8153a71118 100644 --- a/src/tools/unicode-table-generator/src/case_mapping.rs +++ b/src/tools/unicode-table-generator/src/case_mapping.rs @@ -1,27 +1,24 @@ use crate::{fmt_list, UnicodeData}; -use std::fmt; +use std::{collections::BTreeMap, fmt}; pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String { let mut file = String::new(); file.push_str(HEADER.trim_start()); - - let decl_type = "&[(char, [char; 3])]"; - - file.push_str(&format!( - "static LOWERCASE_TABLE: {} = &[{}];", - decl_type, - fmt_list(data.to_lower.iter().map(to_mapping)) - )); + file.push_str(&generate_table("LOWER", &data.to_lower)); file.push_str("\n\n"); - file.push_str(&format!( - "static UPPERCASE_TABLE: {} = &[{}];", - decl_type, - fmt_list(data.to_upper.iter().map(to_mapping)) - )); + file.push_str(&generate_table("UPPER", &data.to_upper)); file } +fn generate_table(case: &str, data: &BTreeMap) -> String { + format!( + "static {}CASE_TABLE: &[(char, [char; 3])] = &[{}];", + case, + fmt_list(data.iter().map(to_mapping).filter(|(k, _)| !k.0.is_ascii())) + ) +} + fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) { ( CharEscape(std::char::from_u32(*key).unwrap()),