Push the byte of LAST_CHUNK_MAP into the array
This optimizes slightly better. Alphabetic : 2536 bytes Case_Ignorable : 1771 bytes Cased : 788 bytes Cc : 24 bytes Grapheme_Extend: 1488 bytes Lowercase : 863 bytes N : 1038 bytes Uppercase : 776 bytes White_Space : 83 bytes Total table sizes: 9367 bytes (-18 bytes; 2 bytes per set)
This commit is contained in:
parent
5f71d98f90
commit
233ab2f168
@ -10,7 +10,7 @@ fn range_search<
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
last_chunk_idx: u16,
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
@ -18,14 +18,14 @@ fn range_search<
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// The last entry of `chunk_idx_map` actually should be at `last_chunk_idx`,
|
||||
// so we need to remap it
|
||||
let chunk_idx = if chunk_map_idx < (chunk_idx_map.len() - 1) {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
} else if chunk_map_idx == last_chunk_idx as usize {
|
||||
chunk_idx_map[chunk_idx_map.len() - 1]
|
||||
} else {
|
||||
return false;
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
|
||||
let word = if idx < CANONICAL {
|
||||
@ -54,8 +54,8 @@ pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod alphabetic {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (393, 13);
|
||||
static BITSET_CHUNKS_MAP: [u8; 393] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 393;
|
||||
static BITSET_CHUNKS_MAP: [u8; 394] = [
|
||||
61, 18, 2, 35, 46, 39, 38, 74, 37, 25, 70, 34, 36, 73, 66, 5, 52, 58, 54, 58, 58, 58, 69,
|
||||
64, 43, 58, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 10, 6, 6, 23,
|
||||
@ -70,7 +70,7 @@ pub mod alphabetic {
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 21, 6, 6, 6, 6,
|
||||
6, 6, 6, 15, 72, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
|
||||
58, 58, 58, 58, 58, 58, 6, 62, 58, 58, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
58, 58, 58, 58, 58, 58, 6, 62, 58, 58, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 8]; 75] = [
|
||||
[0, 252, 121, 172, 14, 172, 172, 172], [13, 51, 125, 172, 79, 35, 166, 172],
|
||||
@ -312,8 +312,8 @@ pub mod alphabetic {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod case_ignorable {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 2);
|
||||
static BITSET_CHUNKS_MAP: [u8; 250] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 1792;
|
||||
static BITSET_CHUNKS_MAP: [u8; 251] = [
|
||||
14, 28, 47, 22, 19, 11, 4, 13, 9, 40, 39, 32, 49, 23, 15, 36, 18, 39, 39, 39, 39, 39, 27,
|
||||
26, 12, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
|
||||
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
|
||||
@ -324,7 +324,7 @@ pub mod case_ignorable {
|
||||
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 44, 39, 35, 39, 39,
|
||||
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39,
|
||||
39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 29, 39, 39, 39, 39, 39, 39, 39, 39, 39,
|
||||
34, 48, 39, 39, 39, 0, 39, 39, 21, 43, 39, 39, 45, 39, 39, 39, 39, 37,
|
||||
34, 48, 39, 39, 39, 0, 39, 39, 21, 43, 39, 39, 45, 39, 39, 39, 39, 37, 2,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 8]; 52] = [
|
||||
[3, 75, 88, 142, 142, 142, 142, 142], [5, 110, 38, 181, 142, 142, 12, 182],
|
||||
@ -503,14 +503,14 @@ pub mod case_ignorable {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod cased {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 12);
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 124;
|
||||
static BITSET_CHUNKS_MAP: [u8; 124] = [
|
||||
4, 0, 18, 18, 6, 18, 18, 9, 5, 8, 18, 3, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 14, 15, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, 16, 18, 1, 18, 10, 18, 18,
|
||||
7, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 13, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 2, 18, 18, 18, 18, 11,
|
||||
18, 2, 18, 18, 18, 18, 11, 12,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [
|
||||
[5, 5, 7, 5, 50, 10, 40, 58, 58, 58, 58, 58, 58, 58, 58, 58],
|
||||
@ -594,9 +594,9 @@ pub mod cased {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod cc {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (2, 0);
|
||||
static BITSET_CHUNKS_MAP: [u8; 2] = [
|
||||
0, 1,
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 2;
|
||||
static BITSET_CHUNKS_MAP: [u8; 3] = [
|
||||
0, 1, 0,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 1]; 3] = [
|
||||
[0], [1], [2],
|
||||
@ -623,8 +623,8 @@ pub mod cc {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod grapheme_extend {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 3);
|
||||
static BITSET_CHUNKS_MAP: [u8; 245] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 1792;
|
||||
static BITSET_CHUNKS_MAP: [u8; 246] = [
|
||||
34, 30, 41, 44, 17, 11, 0, 12, 9, 36, 34, 29, 43, 20, 13, 34, 21, 34, 34, 34, 34, 34, 26,
|
||||
34, 16, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
|
||||
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
|
||||
@ -635,7 +635,7 @@ pub mod grapheme_extend {
|
||||
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 40, 34, 33, 34,
|
||||
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
|
||||
34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 24, 34, 34, 34, 34, 34, 34, 34, 34,
|
||||
34, 32, 42, 34, 34, 34, 1, 34, 34, 19, 38, 34, 34, 39,
|
||||
34, 32, 42, 34, 34, 34, 1, 34, 34, 19, 38, 34, 34, 39, 3,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 8]; 45] = [
|
||||
[1, 85, 27, 86, 34, 84, 100, 88], [4, 60, 71, 120, 120, 120, 120, 120],
|
||||
@ -785,12 +785,12 @@ pub mod grapheme_extend {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod lowercase {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (122, 5);
|
||||
static BITSET_CHUNKS_MAP: [u8; 118] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 122;
|
||||
static BITSET_CHUNKS_MAP: [u8; 119] = [
|
||||
16, 2, 9, 9, 4, 9, 9, 15, 3, 12, 9, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 8, 10, 9, 0, 9, 14, 9, 9, 13, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17,
|
||||
9, 9, 6, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 5,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 18] = [
|
||||
[10, 55, 52, 6, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52],
|
||||
@ -886,8 +886,8 @@ pub mod lowercase {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod n {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (253, 21);
|
||||
static BITSET_CHUNKS_MAP: [u8; 249] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 253;
|
||||
static BITSET_CHUNKS_MAP: [u8; 250] = [
|
||||
45, 19, 19, 39, 23, 40, 6, 37, 33, 17, 19, 12, 42, 32, 41, 19, 8, 19, 2, 16, 19, 19, 13,
|
||||
19, 1, 43, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
|
||||
@ -898,7 +898,7 @@ pub mod n {
|
||||
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 38, 19, 30, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
|
||||
19, 19, 27, 19, 18, 19, 19, 19, 19, 22, 26, 19, 19, 29, 19, 3, 19, 24,
|
||||
19, 19, 27, 19, 18, 19, 19, 19, 19, 22, 26, 19, 19, 29, 19, 3, 19, 24, 21,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 8]; 47] = [
|
||||
[12, 52, 44, 44, 44, 44, 44, 44], [27, 44, 44, 44, 44, 44, 67, 44],
|
||||
@ -993,13 +993,13 @@ pub mod n {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod uppercase {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (124, 3);
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 124;
|
||||
static BITSET_CHUNKS_MAP: [u8; 124] = [
|
||||
12, 15, 5, 5, 0, 5, 5, 2, 4, 11, 5, 14, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 6, 5, 13, 5, 10, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 16, 5, 5,
|
||||
5, 5, 9,
|
||||
5, 5, 9, 3,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
|
||||
[41, 41, 5, 33, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 5, 0],
|
||||
@ -1083,9 +1083,9 @@ pub mod uppercase {
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub mod white_space {
|
||||
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (32, 3);
|
||||
static BITSET_CHUNKS_MAP: [u8; 22] = [
|
||||
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1,
|
||||
const BITSET_LAST_CHUNK_MAP: u16 = 32;
|
||||
static BITSET_CHUNKS_MAP: [u8; 23] = [
|
||||
0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 3,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 6]; 4] = [
|
||||
[1, 4, 2, 4, 4, 4], [4, 4, 0, 3, 4, 4], [4, 4, 4, 4, 4, 4], [5, 4, 4, 4, 4, 4],
|
||||
|
@ -8,7 +8,7 @@ fn range_search<
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
last_chunk_idx: u16,
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
@ -16,14 +16,14 @@ fn range_search<
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// The last entry of `chunk_idx_map` actually should be at `last_chunk_idx`,
|
||||
// so we need to remap it
|
||||
let chunk_idx = if chunk_map_idx < (chunk_idx_map.len() - 1) {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
} else if chunk_map_idx == last_chunk_idx as usize {
|
||||
chunk_idx_map[chunk_idx_map.len() - 1]
|
||||
} else {
|
||||
return false;
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
|
||||
let word = if idx < CANONICAL {
|
||||
|
@ -150,19 +150,22 @@ impl RawEmitter {
|
||||
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
|
||||
chunk_indices.pop();
|
||||
}
|
||||
// We do not count the LAST_CHUNK_MAP as adding bytes because it's a
|
||||
// small constant whose values are inlined directly into the instruction
|
||||
// stream.
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
|
||||
"const BITSET_LAST_CHUNK_MAP: u16 = {};",
|
||||
chunk_indices.len() - 1,
|
||||
chunk_indices.pop().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
self.bytes_used += 3;
|
||||
let nonzero = chunk_indices.pop().unwrap();
|
||||
// Try to pop again, now that we've recorded a non-zero pointing index
|
||||
// into the LAST_CHUNK_MAP.
|
||||
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
|
||||
chunk_indices.pop();
|
||||
}
|
||||
chunk_indices.push(nonzero);
|
||||
writeln!(
|
||||
&mut self.file,
|
||||
"static BITSET_CHUNKS_MAP: [u8; {}] = [{}];",
|
||||
|
Loading…
x
Reference in New Issue
Block a user