cfafc1b737
This commit renames a number of extension traits for slices and string slices, now that they have been refactored for DST. In many cases, multiple extension traits could now be consolidated. Further consolidation will be possible with generalized where clauses. The renamings are consistent with the [new `-Prelude` suffix](https://github.com/rust-lang/rfcs/pull/344). There are probably a few more candidates for being renamed this way, but that is left for API stabilization of the relevant modules. Because this renames traits, it is a: [breaking-change] However, I do not expect any code that currently uses the standard library to actually break. Closes #17917
152 lines
4.3 KiB
Rust
152 lines
4.3 KiB
Rust
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
|
|
// file at the top-level directory of this distribution and at
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
/*!
|
|
Functions for computing canonical and compatible decompositions
|
|
for Unicode characters.
|
|
*/
|
|
|
|
use core::cmp::{Equal, Less, Greater};
|
|
use core::option::{Option, Some, None};
|
|
use core::slice;
|
|
use core::slice::SlicePrelude;
|
|
use tables::normalization::{canonical_table, compatibility_table, composition_table};
|
|
|
|
fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> {
|
|
match r.binary_search(|&(val, _)| {
|
|
if c == val { Equal }
|
|
else if val < c { Less }
|
|
else { Greater }
|
|
}) {
|
|
slice::Found(idx) => {
|
|
let (_, result) = r[idx];
|
|
Some(result)
|
|
}
|
|
slice::NotFound(_) => None
|
|
}
|
|
}
|
|
|
|
/// Compute canonical Unicode decomposition for character
|
|
pub fn decompose_canonical(c: char, i: |char|) { d(c, i, false); }
|
|
|
|
/// Compute canonical or compatible Unicode decomposition for character
|
|
pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
|
|
|
|
fn d(c: char, i: |char|, k: bool) {
|
|
// 7-bit ASCII never decomposes
|
|
if c <= '\x7f' { i(c); return; }
|
|
|
|
// Perform decomposition for Hangul
|
|
if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
|
|
decompose_hangul(c, i);
|
|
return;
|
|
}
|
|
|
|
// First check the canonical decompositions
|
|
match bsearch_table(c, canonical_table) {
|
|
Some(canon) => {
|
|
for x in canon.iter() {
|
|
d(*x, |b| i(b), k);
|
|
}
|
|
return;
|
|
}
|
|
None => ()
|
|
}
|
|
|
|
// Bottom out if we're not doing compat.
|
|
if !k { i(c); return; }
|
|
|
|
// Then check the compatibility decompositions
|
|
match bsearch_table(c, compatibility_table) {
|
|
Some(compat) => {
|
|
for x in compat.iter() {
|
|
d(*x, |b| i(b), k);
|
|
}
|
|
return;
|
|
}
|
|
None => ()
|
|
}
|
|
|
|
// Finally bottom out.
|
|
i(c);
|
|
}
|
|
|
|
pub fn compose(a: char, b: char) -> Option<char> {
|
|
compose_hangul(a, b).or_else(|| {
|
|
match bsearch_table(a, composition_table) {
|
|
None => None,
|
|
Some(candidates) => {
|
|
match candidates.binary_search(|&(val, _)| {
|
|
if b == val { Equal }
|
|
else if val < b { Less }
|
|
else { Greater }
|
|
}) {
|
|
slice::Found(idx) => {
|
|
let (_, result) = candidates[idx];
|
|
Some(result)
|
|
}
|
|
slice::NotFound(_) => None
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
// Constants from Unicode 6.3.0 Section 3.12 Conjoining Jamo Behavior
|
|
const S_BASE: u32 = 0xAC00;
|
|
const L_BASE: u32 = 0x1100;
|
|
const V_BASE: u32 = 0x1161;
|
|
const T_BASE: u32 = 0x11A7;
|
|
const L_COUNT: u32 = 19;
|
|
const V_COUNT: u32 = 21;
|
|
const T_COUNT: u32 = 28;
|
|
const N_COUNT: u32 = (V_COUNT * T_COUNT);
|
|
const S_COUNT: u32 = (L_COUNT * N_COUNT);
|
|
|
|
// Decompose a precomposed Hangul syllable
|
|
#[inline(always)]
|
|
fn decompose_hangul(s: char, f: |char|) {
|
|
use core::mem::transmute;
|
|
|
|
let si = s as u32 - S_BASE;
|
|
|
|
let li = si / N_COUNT;
|
|
unsafe {
|
|
f(transmute(L_BASE + li));
|
|
|
|
let vi = (si % N_COUNT) / T_COUNT;
|
|
f(transmute(V_BASE + vi));
|
|
|
|
let ti = si % T_COUNT;
|
|
if ti > 0 {
|
|
f(transmute(T_BASE + ti));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compose a pair of Hangul Jamo
|
|
#[inline(always)]
|
|
fn compose_hangul(a: char, b: char) -> Option<char> {
|
|
use core::mem::transmute;
|
|
let l = a as u32;
|
|
let v = b as u32;
|
|
// Compose an LPart and a VPart
|
|
if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
|
|
let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
|
|
return unsafe { Some(transmute(r)) };
|
|
}
|
|
// Compose an LVPart and a TPart
|
|
if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) {
|
|
let r = l + (v - T_BASE);
|
|
return unsafe { Some(transmute(r)) };
|
|
}
|
|
None
|
|
}
|