Skip serializing ascii chars in case LUTs

Since ascii chars are already handled by a special case in the `to_lower` and `to_upper` functions, there's no need to waste space on them in the LUTs.
2023-03-15 17:23:48 +01:00 · 2023-03-15 17:23:48 +01:00 · 8a4eb9e3a8
commit 8a4eb9e3a8
parent 992d154f3a
2 changed files with 11 additions and 40 deletions
--- a/library/core/src/unicode/unicode_data.rs
+++ b/library/core/src/unicode/unicode_data.rs
@ -606,19 +606,6 @@ pub mod conversions {
        table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
    }
    static LOWERCASE_TABLE: &[(char, [char; 3])] = &[
-        ('A', ['a', '\u{0}', '\u{0}']), ('B', ['b', '\u{0}', '\u{0}']),
-        ('C', ['c', '\u{0}', '\u{0}']), ('D', ['d', '\u{0}', '\u{0}']),
-        ('E', ['e', '\u{0}', '\u{0}']), ('F', ['f', '\u{0}', '\u{0}']),
-        ('G', ['g', '\u{0}', '\u{0}']), ('H', ['h', '\u{0}', '\u{0}']),
-        ('I', ['i', '\u{0}', '\u{0}']), ('J', ['j', '\u{0}', '\u{0}']),
-        ('K', ['k', '\u{0}', '\u{0}']), ('L', ['l', '\u{0}', '\u{0}']),
-        ('M', ['m', '\u{0}', '\u{0}']), ('N', ['n', '\u{0}', '\u{0}']),
-        ('O', ['o', '\u{0}', '\u{0}']), ('P', ['p', '\u{0}', '\u{0}']),
-        ('Q', ['q', '\u{0}', '\u{0}']), ('R', ['r', '\u{0}', '\u{0}']),
-        ('S', ['s', '\u{0}', '\u{0}']), ('T', ['t', '\u{0}', '\u{0}']),
-        ('U', ['u', '\u{0}', '\u{0}']), ('V', ['v', '\u{0}', '\u{0}']),
-        ('W', ['w', '\u{0}', '\u{0}']), ('X', ['x', '\u{0}', '\u{0}']),
-        ('Y', ['y', '\u{0}', '\u{0}']), ('Z', ['z', '\u{0}', '\u{0}']),
        ('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']),
        ('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']),
        ('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']),
@ -1456,19 +1443,6 @@ pub mod conversions {
    ];

    static UPPERCASE_TABLE: &[(char, [char; 3])] = &[
-        ('a', ['A', '\u{0}', '\u{0}']), ('b', ['B', '\u{0}', '\u{0}']),
-        ('c', ['C', '\u{0}', '\u{0}']), ('d', ['D', '\u{0}', '\u{0}']),
-        ('e', ['E', '\u{0}', '\u{0}']), ('f', ['F', '\u{0}', '\u{0}']),
-        ('g', ['G', '\u{0}', '\u{0}']), ('h', ['H', '\u{0}', '\u{0}']),
-        ('i', ['I', '\u{0}', '\u{0}']), ('j', ['J', '\u{0}', '\u{0}']),
-        ('k', ['K', '\u{0}', '\u{0}']), ('l', ['L', '\u{0}', '\u{0}']),
-        ('m', ['M', '\u{0}', '\u{0}']), ('n', ['N', '\u{0}', '\u{0}']),
-        ('o', ['O', '\u{0}', '\u{0}']), ('p', ['P', '\u{0}', '\u{0}']),
-        ('q', ['Q', '\u{0}', '\u{0}']), ('r', ['R', '\u{0}', '\u{0}']),
-        ('s', ['S', '\u{0}', '\u{0}']), ('t', ['T', '\u{0}', '\u{0}']),
-        ('u', ['U', '\u{0}', '\u{0}']), ('v', ['V', '\u{0}', '\u{0}']),
-        ('w', ['W', '\u{0}', '\u{0}']), ('x', ['X', '\u{0}', '\u{0}']),
-        ('y', ['Y', '\u{0}', '\u{0}']), ('z', ['Z', '\u{0}', '\u{0}']),
        ('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['S', 'S', '\u{0}']),
        ('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']),
        ('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']),
--- a/src/tools/unicode-table-generator/src/case_mapping.rs
+++ b/src/tools/unicode-table-generator/src/case_mapping.rs
@ -1,27 +1,24 @@
 use crate::{fmt_list, UnicodeData};
-use std::fmt;
+use std::{collections::BTreeMap, fmt};

 pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
    let mut file = String::new();

    file.push_str(HEADER.trim_start());
-
-    let decl_type = "&[(char, [char; 3])]";
-
-    file.push_str(&format!(
-        "static LOWERCASE_TABLE: {} = &[{}];",
-        decl_type,
-        fmt_list(data.to_lower.iter().map(to_mapping))
-    ));
+    file.push_str(&generate_table("LOWER", &data.to_lower));
    file.push_str("\n\n");
-    file.push_str(&format!(
-        "static UPPERCASE_TABLE: {} = &[{}];",
-        decl_type,
-        fmt_list(data.to_upper.iter().map(to_mapping))
-    ));
+    file.push_str(&generate_table("UPPER", &data.to_upper));
    file
 }

+fn generate_table(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String {
+    format!(
+        "static {}CASE_TABLE: &[(char, [char; 3])] = &[{}];",
+        case,
+        fmt_list(data.iter().map(to_mapping).filter(|(k, _)| !k.0.is_ascii()))
+    )
+}
+
 fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) {
    (
        CharEscape(std::char::from_u32(*key).unwrap()),