Let unicode-table-generator fail gracefully for bitsets
The "Alphabetic" property in Unicode 14 grew too big for the bitset representation, panicking "cannot pack 264 into 8 bits". However, we were already choosing the skiplist for that anyway, so this doesn't need to be a hard failure. That panic is now a returned `Err`, and then in `emit_codepoints` we automatically defer to skiplist.
This commit is contained in:
parent
e159d42a9a
commit
6b0b417299
@ -23,7 +23,7 @@ fn blank_line(&mut self) {
|
|||||||
writeln!(&mut self.file).unwrap();
|
writeln!(&mut self.file).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn emit_bitset(&mut self, ranges: &[Range<u32>]) {
|
fn emit_bitset(&mut self, ranges: &[Range<u32>]) -> Result<(), String> {
|
||||||
let last_code_point = ranges.last().unwrap().end;
|
let last_code_point = ranges.last().unwrap().end;
|
||||||
// bitset for every bit in the codepoint range
|
// bitset for every bit in the codepoint range
|
||||||
//
|
//
|
||||||
@ -44,7 +44,7 @@ fn emit_bitset(&mut self, ranges: &[Range<u32>]) {
|
|||||||
let unique_words =
|
let unique_words =
|
||||||
words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
|
words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
|
||||||
if unique_words.len() > u8::MAX as usize {
|
if unique_words.len() > u8::MAX as usize {
|
||||||
panic!("cannot pack {} into 8 bits", unique_words.len());
|
return Err(format!("cannot pack {} into 8 bits", unique_words.len()));
|
||||||
}
|
}
|
||||||
// needed for the chunk mapping to work
|
// needed for the chunk mapping to work
|
||||||
assert_eq!(unique_words[0], 0, "has a zero word");
|
assert_eq!(unique_words[0], 0, "has a zero word");
|
||||||
@ -105,6 +105,8 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|||||||
writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap();
|
writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap();
|
||||||
writeln!(&mut self.file, " )").unwrap();
|
writeln!(&mut self.file, " )").unwrap();
|
||||||
writeln!(&mut self.file, "}}").unwrap();
|
writeln!(&mut self.file, "}}").unwrap();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
|
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
|
||||||
@ -154,12 +156,12 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
|
|||||||
emitter.blank_line();
|
emitter.blank_line();
|
||||||
|
|
||||||
let mut bitset = emitter.clone();
|
let mut bitset = emitter.clone();
|
||||||
bitset.emit_bitset(&ranges);
|
let bitset_ok = bitset.emit_bitset(&ranges).is_ok();
|
||||||
|
|
||||||
let mut skiplist = emitter.clone();
|
let mut skiplist = emitter.clone();
|
||||||
skiplist.emit_skiplist(&ranges);
|
skiplist.emit_skiplist(&ranges);
|
||||||
|
|
||||||
if bitset.bytes_used <= skiplist.bytes_used {
|
if bitset_ok && bitset.bytes_used <= skiplist.bytes_used {
|
||||||
*emitter = bitset;
|
*emitter = bitset;
|
||||||
emitter.desc = String::from("bitset");
|
emitter.desc = String::from("bitset");
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user