fix #124714 str.to_lowercase sigma handling
This commit is contained in:
parent
e3029d220f
commit
bbdf97254a
@ -375,14 +375,16 @@ pub fn to_lowercase(&self) -> String {
|
|||||||
// Safety: We have written only valid ASCII to our vec
|
// Safety: We have written only valid ASCII to our vec
|
||||||
let mut s = unsafe { String::from_utf8_unchecked(out) };
|
let mut s = unsafe { String::from_utf8_unchecked(out) };
|
||||||
|
|
||||||
for (i, c) in rest[..].char_indices() {
|
for (i, c) in rest.char_indices() {
|
||||||
if c == 'Σ' {
|
if c == 'Σ' {
|
||||||
// Σ maps to σ, except at the end of a word where it maps to ς.
|
// Σ maps to σ, except at the end of a word where it maps to ς.
|
||||||
// This is the only conditional (contextual) but language-independent mapping
|
// This is the only conditional (contextual) but language-independent mapping
|
||||||
// in `SpecialCasing.txt`,
|
// in `SpecialCasing.txt`,
|
||||||
// so hard-code it rather than have a generic "condition" mechanism.
|
// so hard-code it rather than have a generic "condition" mechanism.
|
||||||
// See https://github.com/rust-lang/rust/issues/26035
|
// See https://github.com/rust-lang/rust/issues/26035
|
||||||
map_uppercase_sigma(rest, i, &mut s)
|
let out_len = self.len() - rest.len();
|
||||||
|
let sigma_lowercase = map_uppercase_sigma(&self, i + out_len);
|
||||||
|
s.push(sigma_lowercase);
|
||||||
} else {
|
} else {
|
||||||
match conversions::to_lower(c) {
|
match conversions::to_lower(c) {
|
||||||
[a, '\0', _] => s.push(a),
|
[a, '\0', _] => s.push(a),
|
||||||
@ -400,13 +402,13 @@ pub fn to_lowercase(&self) -> String {
|
|||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
|
|
||||||
fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) {
|
fn map_uppercase_sigma(from: &str, i: usize) -> char {
|
||||||
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
|
// See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
|
||||||
// for the definition of `Final_Sigma`.
|
// for the definition of `Final_Sigma`.
|
||||||
debug_assert!('Σ'.len_utf8() == 2);
|
debug_assert!('Σ'.len_utf8() == 2);
|
||||||
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
|
let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
|
||||||
&& !case_ignorable_then_cased(from[i + 2..].chars());
|
&& !case_ignorable_then_cased(from[i + 2..].chars());
|
||||||
to.push_str(if is_word_final { "ς" } else { "σ" });
|
if is_word_final { 'ς' } else { 'σ' }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
|
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
|
||||||
|
@ -1848,6 +1848,9 @@ fn to_lowercase() {
|
|||||||
assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α");
|
assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α");
|
||||||
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");
|
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");
|
||||||
|
|
||||||
|
// https://github.com/rust-lang/rust/issues/124714
|
||||||
|
assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς");
|
||||||
|
|
||||||
// a really long string that has it's lowercase form
|
// a really long string that has it's lowercase form
|
||||||
// even longer. this tests that implementations don't assume
|
// even longer. this tests that implementations don't assume
|
||||||
// an incorrect upper bound on allocations
|
// an incorrect upper bound on allocations
|
||||||
|
Loading…
Reference in New Issue
Block a user