From 4a3e169da746783fa7bf7cc61faa077e16989c5b Mon Sep 17 00:00:00 2001 From: Sage Mitchell Date: Fri, 2 Sep 2022 20:52:28 -0700 Subject: [PATCH] Make `char::is_lowercase` and `char::is_uppercase` const Implements #101400. --- library/core/src/char/methods.rs | 22 +++++++++++-- library/core/src/lib.rs | 1 + library/core/src/unicode/unicode_data.rs | 33 ++++++++++--------- .../src/range_search.rs | 11 ++++--- .../src/raw_emitter.rs | 15 ++++++--- 5 files changed, 55 insertions(+), 27 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 2433139a592..b7a63b7c675 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -746,10 +746,19 @@ pub fn is_alphabetic(self) -> bool { /// assert!(!'中'.is_lowercase()); /// assert!(!' '.is_lowercase()); /// ``` + /// + /// In a const context: + /// + /// ``` + /// #![feature(const_unicode_case_lookup)] + /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase(); + /// assert!(!CAPITAL_DELTA_IS_LOWERCASE); + /// ``` #[must_use] #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")] #[inline] - pub fn is_lowercase(self) -> bool { + pub const fn is_lowercase(self) -> bool { match self { 'a'..='z' => true, c => c > '\x7f' && unicode::Lowercase(c), @@ -779,10 +788,19 @@ pub fn is_lowercase(self) -> bool { /// assert!(!'中'.is_uppercase()); /// assert!(!' '.is_uppercase()); /// ``` + /// + /// In a const context: + /// + /// ``` + /// #![feature(const_unicode_case_lookup)] + /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase(); + /// assert!(CAPITAL_DELTA_IS_UPPERCASE); + /// ``` #[must_use] #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")] #[inline] - pub fn is_uppercase(self) -> bool { + pub const fn is_uppercase(self) -> bool { match self { 'A'..='Z' => true, c => c > '\x7f' && unicode::Uppercase(c), diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 9d857eb63da..5b1e2045fff 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -143,6 +143,7 @@ #![feature(const_type_id)] #![feature(const_type_name)] #![feature(const_default_impls)] +#![feature(const_unicode_case_lookup)] #![feature(const_unsafecell_get_mut)] #![feature(core_panic)] #![feature(duration_consts_float)] diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index c1eff3a36e6..2bbedf0e2d2 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -1,7 +1,8 @@ ///! This file is generated by src/tools/unicode-table-generator; do not edit manually! +#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")] #[inline(always)] -fn bitset_search< +const fn bitset_search< const N: usize, const CHUNK_SIZE: usize, const N1: usize, @@ -17,14 +18,14 @@ fn bitset_search< let bucket_idx = (needle / 64) as usize; let chunk_map_idx = bucket_idx / CHUNK_SIZE; let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) { - v + let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { + chunk_idx_map[chunk_map_idx] } else { return false; }; let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; - let word = if let Some(word) = bitset_canonical.get(idx) { - *word + let word = if idx < bitset_canonical.len() { + bitset_canonical[idx] } else { let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; let mut word = bitset_canonical[real_idx as usize]; @@ -318,14 +319,14 @@ pub fn lookup(c: char) -> bool { #[rustfmt::skip] pub mod lowercase { - static BITSET_CHUNKS_MAP: [u8; 123] = [ + const BITSET_CHUNKS_MAP: [u8; 123] = [ 14, 17, 0, 0, 9, 0, 0, 12, 13, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 3, 0, 0, 7, ]; - static BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [ + const BITSET_INDEX_CHUNKS: [[u8; 16]; 19] = [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 55, 0], @@ -346,7 +347,7 @@ pub mod lowercase { [16, 49, 2, 20, 66, 9, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0], [63, 39, 54, 12, 73, 61, 18, 1, 6, 62, 71, 19, 68, 69, 3, 44], ]; - static BITSET_CANONICAL: [u64; 55] = [ + const BITSET_CANONICAL: [u64; 55] = [ 0b0000000000000000000000000000000000000000000000000000000000000000, 0b1111111111111111110000000000000000000000000011111111111111111111, 0b1010101010101010101010101010101010101010101010101010100000000010, @@ -403,13 +404,14 @@ pub mod lowercase { 0b1110011111111111111111111111111111111111111111110000000000000000, 0b1110101111000000000000000000000000001111111111111111111111111100, ]; - static BITSET_MAPPING: [(u8, u8); 20] = [ + const BITSET_MAPPING: [(u8, u8); 20] = [ (0, 64), (1, 188), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66), (1, 70), (1, 77), (2, 146), (2, 144), (2, 83), (3, 12), (3, 6), (4, 156), (4, 78), (5, 187), (6, 132), (7, 93), ]; - pub fn lookup(c: char) -> bool { + #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")] + pub const fn lookup(c: char) -> bool { super::bitset_search( c as u32, &BITSET_CHUNKS_MAP, @@ -454,14 +456,14 @@ pub fn lookup(c: char) -> bool { #[rustfmt::skip] pub mod uppercase { - static BITSET_CHUNKS_MAP: [u8; 125] = [ + const BITSET_CHUNKS_MAP: [u8; 125] = [ 12, 15, 6, 6, 0, 6, 6, 2, 4, 11, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 14, 6, 10, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6, 6, 6, 9, 6, 3, ]; - static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [ + const BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [ [43, 43, 5, 34, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 5, 1], [43, 43, 5, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43], [43, 43, 39, 43, 43, 43, 43, 43, 17, 17, 62, 17, 42, 29, 24, 23], @@ -480,7 +482,7 @@ pub mod uppercase { [57, 19, 2, 18, 10, 47, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43], [57, 37, 17, 27, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43], ]; - static BITSET_CANONICAL: [u64; 43] = [ + const BITSET_CANONICAL: [u64; 43] = [ 0b0000011111111111111111111111111000000000000000000000000000000000, 0b0000000000111111111111111111111111111111111111111111111111111111, 0b0101010101010101010101010101010101010101010101010101010000000001, @@ -525,13 +527,14 @@ pub mod uppercase { 0b1111011111111111000000000000000000000000000000000000000000000000, 0b1111111100000000111111110000000000111111000000001111111100000000, ]; - static BITSET_MAPPING: [(u8, u8); 25] = [ + const BITSET_MAPPING: [(u8, u8); 25] = [ (0, 187), (0, 177), (0, 171), (0, 167), (0, 164), (0, 32), (0, 47), (0, 51), (0, 121), (0, 117), (0, 109), (1, 150), (1, 148), (1, 142), (1, 134), (1, 131), (1, 64), (2, 164), (2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171), ]; - pub fn lookup(c: char) -> bool { + #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")] + pub const fn lookup(c: char) -> bool { super::bitset_search( c as u32, &BITSET_CHUNKS_MAP, diff --git a/src/tools/unicode-table-generator/src/range_search.rs b/src/tools/unicode-table-generator/src/range_search.rs index 39b47ce703f..727ae3e9f78 100644 --- a/src/tools/unicode-table-generator/src/range_search.rs +++ b/src/tools/unicode-table-generator/src/range_search.rs @@ -1,5 +1,6 @@ +#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")] #[inline(always)] -fn bitset_search< +const fn bitset_search< const N: usize, const CHUNK_SIZE: usize, const N1: usize, @@ -15,14 +16,14 @@ fn bitset_search< let bucket_idx = (needle / 64) as usize; let chunk_map_idx = bucket_idx / CHUNK_SIZE; let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if let Some(&v) = chunk_idx_map.get(chunk_map_idx) { - v + let chunk_idx = if chunk_map_idx < chunk_idx_map.len() { + chunk_idx_map[chunk_map_idx] } else { return false; }; let idx = bitset_chunk_idx[chunk_idx as usize][chunk_piece] as usize; - let word = if let Some(word) = bitset_canonical.get(idx) { - *word + let word = if idx < bitset_canonical.len() { + bitset_canonical[idx] } else { let (real_idx, mapping) = bitset_canonicalized[idx - bitset_canonical.len()]; let mut word = bitset_canonical[real_idx as usize]; diff --git a/src/tools/unicode-table-generator/src/raw_emitter.rs b/src/tools/unicode-table-generator/src/raw_emitter.rs index 13eed7b1099..5a0aabf6283 100644 --- a/src/tools/unicode-table-generator/src/raw_emitter.rs +++ b/src/tools/unicode-table-generator/src/raw_emitter.rs @@ -76,7 +76,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( &mut self.file, - "static BITSET_CANONICAL: [u64; {}] = [{}];", + "const BITSET_CANONICAL: [u64; {}] = [{}];", canonicalized.canonical_words.len(), fmt_list(canonicalized.canonical_words.iter().map(|v| Bits(*v))), ) @@ -84,7 +84,7 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.bytes_used += 8 * canonicalized.canonical_words.len(); writeln!( &mut self.file, - "static BITSET_MAPPING: [(u8, u8); {}] = [{}];", + "const BITSET_MAPPING: [(u8, u8); {}] = [{}];", canonicalized.canonicalized_words.len(), fmt_list(&canonicalized.canonicalized_words), ) @@ -96,7 +96,12 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.blank_line(); - writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap(); + writeln!( + &mut self.file, + r#"#[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]"# + ) + .unwrap(); + writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap(); writeln!(&mut self.file, " super::bitset_search(",).unwrap(); writeln!(&mut self.file, " c as u32,").unwrap(); writeln!(&mut self.file, " &BITSET_CHUNKS_MAP,").unwrap(); @@ -130,7 +135,7 @@ fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: writeln!( &mut self.file, - "static BITSET_CHUNKS_MAP: [u8; {}] = [{}];", + "const BITSET_CHUNKS_MAP: [u8; {}] = [{}];", chunk_indices.len(), fmt_list(&chunk_indices), ) @@ -138,7 +143,7 @@ fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: self.bytes_used += chunk_indices.len(); writeln!( &mut self.file, - "static BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];", + "const BITSET_INDEX_CHUNKS: [[u8; {}]; {}] = [{}];", chunk_length, chunks.len(), fmt_list(chunks.iter()),