From dba5625cb8e8b0d7463f23c8ec9ba5f373707ef5 Mon Sep 17 00:00:00 2001 From: Piotr Zolnierek Date: Sat, 1 Mar 2014 07:40:38 +0100 Subject: [PATCH] Remove code duplication Remove whitespace Update documentation for to_uppercase, to_lowercase --- src/etc/unicode.py | 46 +++++++++++---------------- src/libstd/char.rs | 14 +++++--- src/libstd/unicode.rs | 74 ++++++++++++++----------------------------- 3 files changed, 53 insertions(+), 81 deletions(-) diff --git a/src/etc/unicode.py b/src/etc/unicode.py index 6333a51ce3a..e32954c75d4 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -160,23 +160,22 @@ def ch_prefix(ix): def emit_bsearch_range_table(f): f.write(""" - fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { - use cmp::{Equal, Less, Greater}; - use vec::ImmutableVector; - use option::None; - r.bsearch(|&(lo,hi)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) != None - }\n\n +fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { + use cmp::{Equal, Less, Greater}; + use vec::ImmutableVector; + use option::None; + r.bsearch(|&(lo,hi)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) != None +}\n\n """); def emit_property_module(f, mod, tbl): f.write("pub mod %s {\n" % mod) keys = tbl.keys() keys.sort() - emit_bsearch_range_table(f); for cat in keys: if cat not in ["Nd", "Nl", "No", "Cc", @@ -192,7 +191,7 @@ def emit_property_module(f, mod, tbl): f.write("\n ];\n\n") f.write(" pub fn %s(c: char) -> bool {\n" % cat) - f.write(" bsearch_range_table(c, %s_table)\n" % cat) + f.write(" super::bsearch_range_table(c, %s_table)\n" % cat) f.write(" }\n\n") f.write("}\n") @@ -203,7 +202,7 @@ def emit_conversions_module(f, lowerupper, upperlower): use cmp::{Equal, Less, Greater}; use vec::ImmutableVector; use tuple::Tuple2; - use option::{ Option, Some, None }; + use option::{Option, Some, None}; pub fn to_lower(c: char) -> char { match bsearch_case_table(c, LuLl_table) { @@ -227,23 +226,15 @@ def emit_conversions_module(f, lowerupper, upperlower): }) } """); - emit_caseconversions(f, lowerupper, upperlower) + emit_caseconversion_table(f, "LuLl", upperlower) + emit_caseconversion_table(f, "LlLu", lowerupper) f.write("}\n") -def emit_caseconversions(f, lowerupper, upperlower): - f.write(" static LuLl_table : &'static [(char, char)] = &[\n") - sorted_by_lu = sorted(upperlower.iteritems(), key=operator.itemgetter(0)) +def emit_caseconversion_table(f, name, table): + f.write(" static %s_table : &'static [(char, char)] = &[\n" % name) + sorted_table = sorted(table.iteritems(), key=operator.itemgetter(0)) ix = 0 - for key, value in sorted_by_lu: - f.write(ch_prefix(ix)) - f.write("(%s, %s)" % (escape_char(key), escape_char(value))) - ix += 1 - f.write("\n ];\n\n") - - f.write(" static LlLu_table : &'static [(char, char)] = &[\n") - sorted_by_ll = sorted(lowerupper.iteritems(), key=operator.itemgetter(0)) - ix = 0 - for key, value in sorted_by_ll: + for key, value in sorted_table: f.write(ch_prefix(ix)) f.write("(%s, %s)" % (escape_char(key), escape_char(value))) ix += 1 @@ -425,6 +416,7 @@ rf.write('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGH ''') +emit_bsearch_range_table(rf); emit_property_module(rf, "general_category", gencats) emit_decomp_module(rf, canon_decomp, compat_decomp, combines) diff --git a/src/libstd/char.rs b/src/libstd/char.rs index 3fb9e47dbdc..52ca28c4ce8 100644 --- a/src/libstd/char.rs +++ b/src/libstd/char.rs @@ -228,11 +228,17 @@ pub fn to_digit(c: char, radix: uint) -> Option { /// Convert a char to its uppercase equivalent /// /// The case-folding performed is the common or simple mapping: -/// it only maps a codepoint to its equivalent if it is also a single codepoint +/// it maps one unicode codepoint (one char in Rust) to its uppercase equivalent according +/// to the Unicode database at ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt +/// The additional SpecialCasing.txt is not considered here, as it expands to multiple +/// codepoints in some cases. +/// +/// A full reference can be found here +/// http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992 /// /// # Return value /// -/// Returns the char itself if no conversion if possible +/// Returns the char itself if no conversion was made #[inline] pub fn to_uppercase(c: char) -> char { conversions::to_upper(c) @@ -240,8 +246,8 @@ pub fn to_uppercase(c: char) -> char { /// Convert a char to its lowercase equivalent /// -/// The case-folding performed is the common or simple mapping: -/// it only maps a codepoint to its equivalent if it is also a single codepoint +/// The case-folding performed is the common or simple mapping +/// see `to_uppercase` for references and more information /// /// # Return value /// diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs index d55ddbda002..b43003f0de2 100644 --- a/src/libstd/unicode.rs +++ b/src/libstd/unicode.rs @@ -13,26 +13,26 @@ #[allow(missing_doc)]; #[allow(non_uppercase_statics)]; + +fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { + use cmp::{Equal, Less, Greater}; + use vec::ImmutableVector; + use option::None; + r.bsearch(|&(lo,hi)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) != None +} + + pub mod general_category { - - fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { - use cmp::{Equal, Less, Greater}; - use vec::ImmutableVector; - use option::None; - r.bsearch(|&(lo,hi)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) != None - } - - static Cc_table : &'static [(char,char)] = &[ ('\x00', '\x1f'), ('\x7f', '\x9f') ]; pub fn Cc(c: char) -> bool { - bsearch_range_table(c, Cc_table) + super::bsearch_range_table(c, Cc_table) } static Nd_table : &'static [(char,char)] = &[ @@ -60,7 +60,7 @@ pub mod general_category { ]; pub fn Nd(c: char) -> bool { - bsearch_range_table(c, Nd_table) + super::bsearch_range_table(c, Nd_table) } static Nl_table : &'static [(char,char)] = &[ @@ -73,7 +73,7 @@ pub mod general_category { ]; pub fn Nl(c: char) -> bool { - bsearch_range_table(c, Nl_table) + super::bsearch_range_table(c, Nl_table) } static No_table : &'static [(char,char)] = &[ @@ -101,7 +101,7 @@ pub mod general_category { ]; pub fn No(c: char) -> bool { - bsearch_range_table(c, No_table) + super::bsearch_range_table(c, No_table) } } @@ -2323,19 +2323,6 @@ pub mod decompose { } pub mod derived_property { - - fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { - use cmp::{Equal, Less, Greater}; - use vec::ImmutableVector; - use option::None; - r.bsearch(|&(lo,hi)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) != None - } - - static Alphabetic_table : &'static [(char,char)] = &[ ('\x41', '\x5a'), ('\x61', '\x7a'), ('\xaa', '\xaa'), ('\xb5', '\xb5'), @@ -2745,7 +2732,7 @@ pub mod derived_property { ]; pub fn Alphabetic(c: char) -> bool { - bsearch_range_table(c, Alphabetic_table) + super::bsearch_range_table(c, Alphabetic_table) } static Lowercase_table : &'static [(char,char)] = &[ @@ -3067,7 +3054,7 @@ pub mod derived_property { ]; pub fn Lowercase(c: char) -> bool { - bsearch_range_table(c, Lowercase_table) + super::bsearch_range_table(c, Lowercase_table) } static Uppercase_table : &'static [(char,char)] = &[ @@ -3379,7 +3366,7 @@ pub mod derived_property { ]; pub fn Uppercase(c: char) -> bool { - bsearch_range_table(c, Uppercase_table) + super::bsearch_range_table(c, Uppercase_table) } static XID_Continue_table : &'static [(char,char)] = &[ @@ -3863,7 +3850,7 @@ pub mod derived_property { ]; pub fn XID_Continue(c: char) -> bool { - bsearch_range_table(c, XID_Continue_table) + super::bsearch_range_table(c, XID_Continue_table) } static XID_Start_table : &'static [(char,char)] = &[ @@ -4147,24 +4134,11 @@ pub mod derived_property { ]; pub fn XID_Start(c: char) -> bool { - bsearch_range_table(c, XID_Start_table) + super::bsearch_range_table(c, XID_Start_table) } } pub mod property { - - fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { - use cmp::{Equal, Less, Greater}; - use vec::ImmutableVector; - use option::None; - r.bsearch(|&(lo,hi)| { - if lo <= c && c <= hi { Equal } - else if hi < c { Less } - else { Greater } - }) != None - } - - static White_Space_table : &'static [(char,char)] = &[ ('\x09', '\x0d'), ('\x20', '\x20'), ('\x85', '\x85'), ('\xa0', '\xa0'), @@ -4175,7 +4149,7 @@ pub mod property { ]; pub fn White_Space(c: char) -> bool { - bsearch_range_table(c, White_Space_table) + super::bsearch_range_table(c, White_Space_table) } } @@ -4184,7 +4158,7 @@ pub mod conversions { use cmp::{Equal, Less, Greater}; use vec::ImmutableVector; use tuple::Tuple2; - use option::{ Option, Some, None }; + use option::{Option, Some, None}; pub fn to_lower(c: char) -> char { match bsearch_case_table(c, LuLl_table) {