Skip serializing ascii chars in case LUTs
Since ascii chars are already handled by a special case in the `to_lower` and `to_upper` functions, there's no need to waste space on them in the LUTs.
This commit is contained in:
parent
992d154f3a
commit
8a4eb9e3a8
@ -606,19 +606,6 @@ pub mod conversions {
|
||||
table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
|
||||
}
|
||||
static LOWERCASE_TABLE: &[(char, [char; 3])] = &[
|
||||
('A', ['a', '\u{0}', '\u{0}']), ('B', ['b', '\u{0}', '\u{0}']),
|
||||
('C', ['c', '\u{0}', '\u{0}']), ('D', ['d', '\u{0}', '\u{0}']),
|
||||
('E', ['e', '\u{0}', '\u{0}']), ('F', ['f', '\u{0}', '\u{0}']),
|
||||
('G', ['g', '\u{0}', '\u{0}']), ('H', ['h', '\u{0}', '\u{0}']),
|
||||
('I', ['i', '\u{0}', '\u{0}']), ('J', ['j', '\u{0}', '\u{0}']),
|
||||
('K', ['k', '\u{0}', '\u{0}']), ('L', ['l', '\u{0}', '\u{0}']),
|
||||
('M', ['m', '\u{0}', '\u{0}']), ('N', ['n', '\u{0}', '\u{0}']),
|
||||
('O', ['o', '\u{0}', '\u{0}']), ('P', ['p', '\u{0}', '\u{0}']),
|
||||
('Q', ['q', '\u{0}', '\u{0}']), ('R', ['r', '\u{0}', '\u{0}']),
|
||||
('S', ['s', '\u{0}', '\u{0}']), ('T', ['t', '\u{0}', '\u{0}']),
|
||||
('U', ['u', '\u{0}', '\u{0}']), ('V', ['v', '\u{0}', '\u{0}']),
|
||||
('W', ['w', '\u{0}', '\u{0}']), ('X', ['x', '\u{0}', '\u{0}']),
|
||||
('Y', ['y', '\u{0}', '\u{0}']), ('Z', ['z', '\u{0}', '\u{0}']),
|
||||
('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']),
|
||||
('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']),
|
||||
('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']),
|
||||
@ -1456,19 +1443,6 @@ pub mod conversions {
|
||||
];
|
||||
|
||||
static UPPERCASE_TABLE: &[(char, [char; 3])] = &[
|
||||
('a', ['A', '\u{0}', '\u{0}']), ('b', ['B', '\u{0}', '\u{0}']),
|
||||
('c', ['C', '\u{0}', '\u{0}']), ('d', ['D', '\u{0}', '\u{0}']),
|
||||
('e', ['E', '\u{0}', '\u{0}']), ('f', ['F', '\u{0}', '\u{0}']),
|
||||
('g', ['G', '\u{0}', '\u{0}']), ('h', ['H', '\u{0}', '\u{0}']),
|
||||
('i', ['I', '\u{0}', '\u{0}']), ('j', ['J', '\u{0}', '\u{0}']),
|
||||
('k', ['K', '\u{0}', '\u{0}']), ('l', ['L', '\u{0}', '\u{0}']),
|
||||
('m', ['M', '\u{0}', '\u{0}']), ('n', ['N', '\u{0}', '\u{0}']),
|
||||
('o', ['O', '\u{0}', '\u{0}']), ('p', ['P', '\u{0}', '\u{0}']),
|
||||
('q', ['Q', '\u{0}', '\u{0}']), ('r', ['R', '\u{0}', '\u{0}']),
|
||||
('s', ['S', '\u{0}', '\u{0}']), ('t', ['T', '\u{0}', '\u{0}']),
|
||||
('u', ['U', '\u{0}', '\u{0}']), ('v', ['V', '\u{0}', '\u{0}']),
|
||||
('w', ['W', '\u{0}', '\u{0}']), ('x', ['X', '\u{0}', '\u{0}']),
|
||||
('y', ['Y', '\u{0}', '\u{0}']), ('z', ['Z', '\u{0}', '\u{0}']),
|
||||
('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['S', 'S', '\u{0}']),
|
||||
('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']),
|
||||
('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']),
|
||||
|
@ -1,27 +1,24 @@
|
||||
use crate::{fmt_list, UnicodeData};
|
||||
use std::fmt;
|
||||
use std::{collections::BTreeMap, fmt};
|
||||
|
||||
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
|
||||
let mut file = String::new();
|
||||
|
||||
file.push_str(HEADER.trim_start());
|
||||
|
||||
let decl_type = "&[(char, [char; 3])]";
|
||||
|
||||
file.push_str(&format!(
|
||||
"static LOWERCASE_TABLE: {} = &[{}];",
|
||||
decl_type,
|
||||
fmt_list(data.to_lower.iter().map(to_mapping))
|
||||
));
|
||||
file.push_str(&generate_table("LOWER", &data.to_lower));
|
||||
file.push_str("\n\n");
|
||||
file.push_str(&format!(
|
||||
"static UPPERCASE_TABLE: {} = &[{}];",
|
||||
decl_type,
|
||||
fmt_list(data.to_upper.iter().map(to_mapping))
|
||||
));
|
||||
file.push_str(&generate_table("UPPER", &data.to_upper));
|
||||
file
|
||||
}
|
||||
|
||||
fn generate_table(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String {
|
||||
format!(
|
||||
"static {}CASE_TABLE: &[(char, [char; 3])] = &[{}];",
|
||||
case,
|
||||
fmt_list(data.iter().map(to_mapping).filter(|(k, _)| !k.0.is_ascii()))
|
||||
)
|
||||
}
|
||||
|
||||
fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) {
|
||||
(
|
||||
CharEscape(std::char::from_u32(*key).unwrap()),
|
||||
|
Loading…
Reference in New Issue
Block a user