Skip serializing ascii chars in case LUTs

Since ascii chars are already handled by a special case in the
`to_lower` and `to_upper` functions, there's no need to waste space on
them in the LUTs.
This commit is contained in:
Martin Gammelsæter 2023-03-15 17:23:48 +01:00
parent 992d154f3a
commit 8a4eb9e3a8
2 changed files with 11 additions and 40 deletions

View File

@ -606,19 +606,6 @@ pub mod conversions {
table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
}
static LOWERCASE_TABLE: &[(char, [char; 3])] = &[
('A', ['a', '\u{0}', '\u{0}']), ('B', ['b', '\u{0}', '\u{0}']),
('C', ['c', '\u{0}', '\u{0}']), ('D', ['d', '\u{0}', '\u{0}']),
('E', ['e', '\u{0}', '\u{0}']), ('F', ['f', '\u{0}', '\u{0}']),
('G', ['g', '\u{0}', '\u{0}']), ('H', ['h', '\u{0}', '\u{0}']),
('I', ['i', '\u{0}', '\u{0}']), ('J', ['j', '\u{0}', '\u{0}']),
('K', ['k', '\u{0}', '\u{0}']), ('L', ['l', '\u{0}', '\u{0}']),
('M', ['m', '\u{0}', '\u{0}']), ('N', ['n', '\u{0}', '\u{0}']),
('O', ['o', '\u{0}', '\u{0}']), ('P', ['p', '\u{0}', '\u{0}']),
('Q', ['q', '\u{0}', '\u{0}']), ('R', ['r', '\u{0}', '\u{0}']),
('S', ['s', '\u{0}', '\u{0}']), ('T', ['t', '\u{0}', '\u{0}']),
('U', ['u', '\u{0}', '\u{0}']), ('V', ['v', '\u{0}', '\u{0}']),
('W', ['w', '\u{0}', '\u{0}']), ('X', ['x', '\u{0}', '\u{0}']),
('Y', ['y', '\u{0}', '\u{0}']), ('Z', ['z', '\u{0}', '\u{0}']),
('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']),
('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']),
('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']),
@ -1456,19 +1443,6 @@ pub mod conversions {
];
static UPPERCASE_TABLE: &[(char, [char; 3])] = &[
('a', ['A', '\u{0}', '\u{0}']), ('b', ['B', '\u{0}', '\u{0}']),
('c', ['C', '\u{0}', '\u{0}']), ('d', ['D', '\u{0}', '\u{0}']),
('e', ['E', '\u{0}', '\u{0}']), ('f', ['F', '\u{0}', '\u{0}']),
('g', ['G', '\u{0}', '\u{0}']), ('h', ['H', '\u{0}', '\u{0}']),
('i', ['I', '\u{0}', '\u{0}']), ('j', ['J', '\u{0}', '\u{0}']),
('k', ['K', '\u{0}', '\u{0}']), ('l', ['L', '\u{0}', '\u{0}']),
('m', ['M', '\u{0}', '\u{0}']), ('n', ['N', '\u{0}', '\u{0}']),
('o', ['O', '\u{0}', '\u{0}']), ('p', ['P', '\u{0}', '\u{0}']),
('q', ['Q', '\u{0}', '\u{0}']), ('r', ['R', '\u{0}', '\u{0}']),
('s', ['S', '\u{0}', '\u{0}']), ('t', ['T', '\u{0}', '\u{0}']),
('u', ['U', '\u{0}', '\u{0}']), ('v', ['V', '\u{0}', '\u{0}']),
('w', ['W', '\u{0}', '\u{0}']), ('x', ['X', '\u{0}', '\u{0}']),
('y', ['Y', '\u{0}', '\u{0}']), ('z', ['Z', '\u{0}', '\u{0}']),
('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['S', 'S', '\u{0}']),
('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']),
('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']),

View File

@ -1,27 +1,24 @@
use crate::{fmt_list, UnicodeData};
use std::fmt;
use std::{collections::BTreeMap, fmt};
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
let mut file = String::new();
file.push_str(HEADER.trim_start());
let decl_type = "&[(char, [char; 3])]";
file.push_str(&format!(
"static LOWERCASE_TABLE: {} = &[{}];",
decl_type,
fmt_list(data.to_lower.iter().map(to_mapping))
));
file.push_str(&generate_table("LOWER", &data.to_lower));
file.push_str("\n\n");
file.push_str(&format!(
"static UPPERCASE_TABLE: {} = &[{}];",
decl_type,
fmt_list(data.to_upper.iter().map(to_mapping))
));
file.push_str(&generate_table("UPPER", &data.to_upper));
file
}
fn generate_table(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String {
format!(
"static {}CASE_TABLE: &[(char, [char; 3])] = &[{}];",
case,
fmt_list(data.iter().map(to_mapping).filter(|(k, _)| !k.0.is_ascii()))
)
}
fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) {
(
CharEscape(std::char::from_u32(*key).unwrap()),