rust/src/libstd/unicode.rs
Florian Zeitz df802a2754 std: Rename str::Normalizations to str::Decompositions
The Normalizations iterator has been renamed to Decompositions.
It does not currently include all forms of Unicode normalization,
but only encompasses decompositions.
If implemented recomposition would likely be a separate iterator
which works on the result of this one.

[breaking-change]
2014-05-13 17:24:07 -07:00

186 lines
10 KiB
Rust

// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// NOTE: The following code was generated by "src/etc/unicode.py", do not edit directly
#![allow(missing_doc, non_uppercase_statics)]
pub mod normalization {
use option::{Some, None};
use slice::ImmutableVector;
fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
use cmp::{Equal, Less, Greater};
match r.bsearch(|&(lo, hi, _)| {
if lo <= c && c <= hi { Equal }
else if hi < c { Less }
else { Greater }
}) {
Some(idx) => {
let (_, _, result) = r[idx];
result
}
None => 0
}
}
static combining_class_table : &'static [(char, char, u8)] = &[
('\u0300', '\u0314', 230), ('\u0315', '\u0315', 232),
('\u0316', '\u0319', 220), ('\u031a', '\u031a', 232),
('\u031b', '\u031b', 216), ('\u031c', '\u0320', 220),
('\u0321', '\u0322', 202), ('\u0323', '\u0326', 220),
('\u0327', '\u0328', 202), ('\u0329', '\u0333', 220),
('\u0334', '\u0338', 1), ('\u0339', '\u033c', 220),
('\u033d', '\u0344', 230), ('\u0345', '\u0345', 240),
('\u0346', '\u0346', 230), ('\u0347', '\u0349', 220),
('\u034a', '\u034c', 230), ('\u034d', '\u034e', 220),
('\u0350', '\u0352', 230), ('\u0353', '\u0356', 220),
('\u0357', '\u0357', 230), ('\u0358', '\u0358', 232),
('\u0359', '\u035a', 220), ('\u035b', '\u035b', 230),
('\u035c', '\u035c', 233), ('\u035d', '\u035e', 234),
('\u035f', '\u035f', 233), ('\u0360', '\u0361', 234),
('\u0362', '\u0362', 233), ('\u0363', '\u036f', 230),
('\u0483', '\u0487', 230), ('\u0591', '\u0591', 220),
('\u0592', '\u0595', 230), ('\u0596', '\u0596', 220),
('\u0597', '\u0599', 230), ('\u059a', '\u059a', 222),
('\u059b', '\u059b', 220), ('\u059c', '\u05a1', 230),
('\u05a2', '\u05a7', 220), ('\u05a8', '\u05a9', 230),
('\u05aa', '\u05aa', 220), ('\u05ab', '\u05ac', 230),
('\u05ad', '\u05ad', 222), ('\u05ae', '\u05ae', 228),
('\u05af', '\u05af', 230), ('\u05b0', '\u05b0', 10),
('\u05b1', '\u05b1', 11), ('\u05b2', '\u05b2', 12),
('\u05b3', '\u05b3', 13), ('\u05b4', '\u05b4', 14),
('\u05b5', '\u05b5', 15), ('\u05b6', '\u05b6', 16),
('\u05b7', '\u05b7', 17), ('\u05b8', '\u05b8', 18),
('\u05b9', '\u05ba', 19), ('\u05bb', '\u05bb', 20),
('\u05bc', '\u05bc', 21), ('\u05bd', '\u05bd', 22),
('\u05bf', '\u05bf', 23), ('\u05c1', '\u05c1', 24),
('\u05c2', '\u05c2', 25), ('\u05c4', '\u05c4', 230),
('\u05c5', '\u05c5', 220), ('\u05c7', '\u05c7', 18),
('\u0610', '\u0617', 230), ('\u0618', '\u0618', 30),
('\u0619', '\u0619', 31), ('\u061a', '\u061a', 32),
('\u064b', '\u064b', 27), ('\u064c', '\u064c', 28),
('\u064d', '\u064d', 29), ('\u064e', '\u064e', 30),
('\u064f', '\u064f', 31), ('\u0650', '\u0650', 32),
('\u0651', '\u0651', 33), ('\u0652', '\u0652', 34),
('\u0653', '\u0654', 230), ('\u0655', '\u0656', 220),
('\u0657', '\u065b', 230), ('\u065c', '\u065c', 220),
('\u065d', '\u065e', 230), ('\u065f', '\u065f', 220),
('\u0670', '\u0670', 35), ('\u06d6', '\u06dc', 230),
('\u06df', '\u06e2', 230), ('\u06e3', '\u06e3', 220),
('\u06e4', '\u06e4', 230), ('\u06e7', '\u06e8', 230),
('\u06ea', '\u06ea', 220), ('\u06eb', '\u06ec', 230),
('\u06ed', '\u06ed', 220), ('\u0711', '\u0711', 36),
('\u0730', '\u0730', 230), ('\u0731', '\u0731', 220),
('\u0732', '\u0733', 230), ('\u0734', '\u0734', 220),
('\u0735', '\u0736', 230), ('\u0737', '\u0739', 220),
('\u073a', '\u073a', 230), ('\u073b', '\u073c', 220),
('\u073d', '\u073d', 230), ('\u073e', '\u073e', 220),
('\u073f', '\u0741', 230), ('\u0742', '\u0742', 220),
('\u0743', '\u0743', 230), ('\u0744', '\u0744', 220),
('\u0745', '\u0745', 230), ('\u0746', '\u0746', 220),
('\u0747', '\u0747', 230), ('\u0748', '\u0748', 220),
('\u0749', '\u074a', 230), ('\u07eb', '\u07f1', 230),
('\u07f2', '\u07f2', 220), ('\u07f3', '\u07f3', 230),
('\u0816', '\u0819', 230), ('\u081b', '\u0823', 230),
('\u0825', '\u0827', 230), ('\u0829', '\u082d', 230),
('\u0859', '\u085b', 220), ('\u08e4', '\u08e5', 230),
('\u08e6', '\u08e6', 220), ('\u08e7', '\u08e8', 230),
('\u08e9', '\u08e9', 220), ('\u08ea', '\u08ec', 230),
('\u08ed', '\u08ef', 220), ('\u08f0', '\u08f0', 27),
('\u08f1', '\u08f1', 28), ('\u08f2', '\u08f2', 29),
('\u08f3', '\u08f5', 230), ('\u08f6', '\u08f6', 220),
('\u08f7', '\u08f8', 230), ('\u08f9', '\u08fa', 220),
('\u08fb', '\u08fe', 230), ('\u093c', '\u093c', 7),
('\u094d', '\u094d', 9), ('\u0951', '\u0951', 230),
('\u0952', '\u0952', 220), ('\u0953', '\u0954', 230),
('\u09bc', '\u09bc', 7), ('\u09cd', '\u09cd', 9),
('\u0a3c', '\u0a3c', 7), ('\u0a4d', '\u0a4d', 9),
('\u0abc', '\u0abc', 7), ('\u0acd', '\u0acd', 9),
('\u0b3c', '\u0b3c', 7), ('\u0b4d', '\u0b4d', 9),
('\u0bcd', '\u0bcd', 9), ('\u0c4d', '\u0c4d', 9),
('\u0c55', '\u0c55', 84), ('\u0c56', '\u0c56', 91),
('\u0cbc', '\u0cbc', 7), ('\u0ccd', '\u0ccd', 9),
('\u0d4d', '\u0d4d', 9), ('\u0dca', '\u0dca', 9),
('\u0e38', '\u0e39', 103), ('\u0e3a', '\u0e3a', 9),
('\u0e48', '\u0e4b', 107), ('\u0eb8', '\u0eb9', 118),
('\u0ec8', '\u0ecb', 122), ('\u0f18', '\u0f19', 220),
('\u0f35', '\u0f35', 220), ('\u0f37', '\u0f37', 220),
('\u0f39', '\u0f39', 216), ('\u0f71', '\u0f71', 129),
('\u0f72', '\u0f72', 130), ('\u0f74', '\u0f74', 132),
('\u0f7a', '\u0f7d', 130), ('\u0f80', '\u0f80', 130),
('\u0f82', '\u0f83', 230), ('\u0f84', '\u0f84', 9),
('\u0f86', '\u0f87', 230), ('\u0fc6', '\u0fc6', 220),
('\u1037', '\u1037', 7), ('\u1039', '\u103a', 9),
('\u108d', '\u108d', 220), ('\u135d', '\u135f', 230),
('\u1714', '\u1714', 9), ('\u1734', '\u1734', 9),
('\u17d2', '\u17d2', 9), ('\u17dd', '\u17dd', 230),
('\u18a9', '\u18a9', 228), ('\u1939', '\u1939', 222),
('\u193a', '\u193a', 230), ('\u193b', '\u193b', 220),
('\u1a17', '\u1a17', 230), ('\u1a18', '\u1a18', 220),
('\u1a60', '\u1a60', 9), ('\u1a75', '\u1a7c', 230),
('\u1a7f', '\u1a7f', 220), ('\u1b34', '\u1b34', 7),
('\u1b44', '\u1b44', 9), ('\u1b6b', '\u1b6b', 230),
('\u1b6c', '\u1b6c', 220), ('\u1b6d', '\u1b73', 230),
('\u1baa', '\u1bab', 9), ('\u1be6', '\u1be6', 7),
('\u1bf2', '\u1bf3', 9), ('\u1c37', '\u1c37', 7),
('\u1cd0', '\u1cd2', 230), ('\u1cd4', '\u1cd4', 1),
('\u1cd5', '\u1cd9', 220), ('\u1cda', '\u1cdb', 230),
('\u1cdc', '\u1cdf', 220), ('\u1ce0', '\u1ce0', 230),
('\u1ce2', '\u1ce8', 1), ('\u1ced', '\u1ced', 220),
('\u1cf4', '\u1cf4', 230), ('\u1dc0', '\u1dc1', 230),
('\u1dc2', '\u1dc2', 220), ('\u1dc3', '\u1dc9', 230),
('\u1dca', '\u1dca', 220), ('\u1dcb', '\u1dcc', 230),
('\u1dcd', '\u1dcd', 234), ('\u1dce', '\u1dce', 214),
('\u1dcf', '\u1dcf', 220), ('\u1dd0', '\u1dd0', 202),
('\u1dd1', '\u1de6', 230), ('\u1dfc', '\u1dfc', 233),
('\u1dfd', '\u1dfd', 220), ('\u1dfe', '\u1dfe', 230),
('\u1dff', '\u1dff', 220), ('\u20d0', '\u20d1', 230),
('\u20d2', '\u20d3', 1), ('\u20d4', '\u20d7', 230),
('\u20d8', '\u20da', 1), ('\u20db', '\u20dc', 230),
('\u20e1', '\u20e1', 230), ('\u20e5', '\u20e6', 1),
('\u20e7', '\u20e7', 230), ('\u20e8', '\u20e8', 220),
('\u20e9', '\u20e9', 230), ('\u20ea', '\u20eb', 1),
('\u20ec', '\u20ef', 220), ('\u20f0', '\u20f0', 230),
('\u2cef', '\u2cf1', 230), ('\u2d7f', '\u2d7f', 9),
('\u2de0', '\u2dff', 230), ('\u302a', '\u302a', 218),
('\u302b', '\u302b', 228), ('\u302c', '\u302c', 232),
('\u302d', '\u302d', 222), ('\u302e', '\u302f', 224),
('\u3099', '\u309a', 8), ('\ua66f', '\ua66f', 230),
('\ua674', '\ua67d', 230), ('\ua69f', '\ua69f', 230),
('\ua6f0', '\ua6f1', 230), ('\ua806', '\ua806', 9),
('\ua8c4', '\ua8c4', 9), ('\ua8e0', '\ua8f1', 230),
('\ua92b', '\ua92d', 220), ('\ua953', '\ua953', 9),
('\ua9b3', '\ua9b3', 7), ('\ua9c0', '\ua9c0', 9),
('\uaab0', '\uaab0', 230), ('\uaab2', '\uaab3', 230),
('\uaab4', '\uaab4', 220), ('\uaab7', '\uaab8', 230),
('\uaabe', '\uaabf', 230), ('\uaac1', '\uaac1', 230),
('\uaaf6', '\uaaf6', 9), ('\uabed', '\uabed', 9),
('\ufb1e', '\ufb1e', 26), ('\ufe20', '\ufe26', 230),
('\U000101fd', '\U000101fd', 220), ('\U00010a0d', '\U00010a0d', 220),
('\U00010a0f', '\U00010a0f', 230), ('\U00010a38', '\U00010a38', 230),
('\U00010a39', '\U00010a39', 1), ('\U00010a3a', '\U00010a3a', 220),
('\U00010a3f', '\U00010a3f', 9), ('\U00011046', '\U00011046', 9),
('\U000110b9', '\U000110b9', 9), ('\U000110ba', '\U000110ba', 7),
('\U00011100', '\U00011102', 230), ('\U00011133', '\U00011134', 9),
('\U000111c0', '\U000111c0', 9), ('\U000116b6', '\U000116b6', 9),
('\U000116b7', '\U000116b7', 7), ('\U0001d165', '\U0001d166', 216),
('\U0001d167', '\U0001d169', 1), ('\U0001d16d', '\U0001d16d', 226),
('\U0001d16e', '\U0001d172', 216), ('\U0001d17b', '\U0001d182', 220),
('\U0001d185', '\U0001d189', 230), ('\U0001d18a', '\U0001d18b', 220),
('\U0001d1aa', '\U0001d1ad', 230), ('\U0001d242', '\U0001d244', 230)
];
pub fn canonical_combining_class(c: char) -> u8 {
bsearch_range_value_table(c, combining_class_table)
}
}