From 6b0b41729939c3f7520e9ed86b36fba2524c7970 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 6 Oct 2021 17:35:49 -0700 Subject: [PATCH] Let unicode-table-generator fail gracefully for bitsets The "Alphabetic" property in Unicode 14 grew too big for the bitset representation, panicking "cannot pack 264 into 8 bits". However, we were already choosing the skiplist for that anyway, so this doesn't need to be a hard failure. That panic is now a returned `Err`, and then in `emit_codepoints` we automatically defer to skiplist. --- src/tools/unicode-table-generator/src/raw_emitter.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs index 42e7e5fb406..ab8eaee9541 100644 --- a/src/tools/unicode-table-generator/src/raw_emitter.rs +++ b/src/tools/unicode-table-generator/src/raw_emitter.rs @@ -23,7 +23,7 @@ fn blank_line(&mut self) { writeln!(&mut self.file).unwrap(); } - fn emit_bitset(&mut self, ranges: &[Range]) { + fn emit_bitset(&mut self, ranges: &[Range]) -> Result<(), String> { let last_code_point = ranges.last().unwrap().end; // bitset for every bit in the codepoint range // @@ -44,7 +44,7 @@ fn emit_bitset(&mut self, ranges: &[Range]) { let unique_words = words.iter().cloned().collect::>().into_iter().collect::>(); if unique_words.len() > u8::MAX as usize { - panic!("cannot pack {} into 8 bits", unique_words.len()); + return Err(format!("cannot pack {} into 8 bits", unique_words.len())); } // needed for the chunk mapping to work assert_eq!(unique_words[0], 0, "has a zero word"); @@ -105,6 +105,8 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(&mut self.file, " &BITSET_MAPPING,").unwrap(); writeln!(&mut self.file, " )").unwrap(); writeln!(&mut self.file, "}}").unwrap(); + + Ok(()) } fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) { @@ -154,12 +156,12 @@ pub fn emit_codepoints(emitter: &mut RawEmitter, ranges: &[Range]) { emitter.blank_line(); let mut bitset = emitter.clone(); - bitset.emit_bitset(&ranges); + let bitset_ok = bitset.emit_bitset(&ranges).is_ok(); let mut skiplist = emitter.clone(); skiplist.emit_skiplist(&ranges); - if bitset.bytes_used <= skiplist.bytes_used { + if bitset_ok && bitset.bytes_used <= skiplist.bytes_used { *emitter = bitset; emitter.desc = String::from("bitset"); } else {