Let unicode-table-generator fail gracefully for bitsets
The "Alphabetic" property in Unicode 14 grew too big for the bitset representation, panicking "cannot pack 264 into 8 bits". However, we were already choosing the skiplist for that anyway, so this doesn't need to be a hard failure. That panic is now a returned `Err`, and then in `emit_codepoints` we automatically defer to skiplist.
This commit is contained in:
parent
e159d42a9a
commit
6b0b417299
@ -23,7 +23,7 @@ fn blank_line(&mut self) {
|
||||
writeln!(&mut self.file).unwrap();
|
||||
}
|
||||
|
||||
fn emit_bitset(&mut self, ranges: &[Range<u32>]) {
|
||||
fn emit_bitset(&mut self, ranges: &[Range<u32>]) -> Result<(), String> {
|
||||
let last_code_point = ranges.last().unwrap().end;
|
||||
// bitset for every bit in the codepoint range
|
||||
//
|
||||
@ -44,7 +44,7 @@ fn emit_bitset(&mut self, ranges: &[Range<u32>]) {
|
||||
let unique_words =
|
||||
words.iter().cloned().collect::<BTreeSet<_>>().into_iter().collect::<Vec<_>>();
|
||||
if unique_words.len() > u8::MAX as usize {
|
||||
panic!("cannot pack {} into 8 bits", unique_words.len());
|
||||
return Err(format!("cannot pack {} into 8 bits", unique_words.len()));
|
||||
}
|
||||
// needed for the chunk mapping to work
|
||||
assert_eq!(unique_words[0], 0, "has a zero word");
|
||||
@ -105,6 +105,8 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap();
|
||||
writeln!(&mut self.file, " )").unwrap();
|
||||
writeln!(&mut self.file, "}}").unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
|
||||
@ -154,12 +156,12 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range<u32>]) {
|
||||
emitter.blank_line();
|
||||
|
||||
let mut bitset = emitter.clone();
|
||||
bitset.emit_bitset(&ranges);
|
||||
let bitset_ok = bitset.emit_bitset(&ranges).is_ok();
|
||||
|
||||
let mut skiplist = emitter.clone();
|
||||
skiplist.emit_skiplist(&ranges);
|
||||
|
||||
if bitset.bytes_used <= skiplist.bytes_used {
|
||||
if bitset_ok && bitset.bytes_used <= skiplist.bytes_used {
|
||||
*emitter = bitset;
|
||||
emitter.desc = String::from("bitset");
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user