auto merge of #6029 : Kimundi/rust/ascii-encoding, r=thestinger

Replaced {str, char, u8}::is_ascii Replaced str::to_lower and str::to_upper
2013-04-24 13:33:29 -07:00 · 2013-04-24 13:33:29 -07:00 · ee3789b4e4
commit ee3789b4e4
parent e26f992d5e 3759b5711d
13 changed files with 60 additions and 95 deletions
--- a/doc/rust.md
+++ b/doc/rust.md
@ -802,7 +802,7 @@ An example of `use` declarations:

 ~~~~
 use core::float::sin;
-use core::str::{slice, to_upper};
+use core::str::{slice, contains};
 use core::option::Some;

 fn main() {
@ -813,8 +813,8 @@ fn main() {
    info!(Some(1.0));

    // Equivalent to
-    // 'info!(core::str::to_upper(core::str::slice("foo", 0, 1)));'
-    info!(to_upper(slice("foo", 0, 1)));
+    // 'info!(core::str::contains(core::str::slice("foo", 0, 1), "oo"));'
+    info!(contains(slice("foo", 0, 1), "oo"));
 }
 ~~~~

--- a/src/compiletest/errors.rs
+++ b/src/compiletest/errors.rs
@ -50,7 +50,11 @@ fn parse_expected(line_num: uint, line: ~str) -> ~[ExpectedError] {
    while idx < len && line[idx] == (' ' as u8) { idx += 1u; }
    let start_kind = idx;
    while idx < len && line[idx] != (' ' as u8) { idx += 1u; }
-    let kind = str::to_lower(str::slice(line, start_kind, idx).to_owned());
+
+    // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+    // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+    let kind = str::slice(line, start_kind, idx);
+    let kind = kind.to_ascii().to_lower().to_str_ascii();

    // Extract msg:
    while idx < len && line[idx] == (' ' as u8) { idx += 1u; }
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@ -100,12 +100,6 @@ pub fn is_alphanumeric(c: char) -> bool {
        unicode::general_category::No(c);
 }

-/// Indicates whether the character is an ASCII character
-#[inline(always)]
-pub fn is_ascii(c: char) -> bool {
-   c - ('\x7F' & c) == '\x00'
-}
-
 /// Indicates whether the character is numeric (Nd, Nl, or No)
 #[inline(always)]
 pub fn is_digit(c: char) -> bool {
@ -116,7 +110,7 @@ pub fn is_digit(c: char) -> bool {

 /**
 * Checks if a character parses as a numeric digit in the given radix.
- * Compared to `is_digit()`, this function only recognizes the ascii
+ * Compared to `is_digit()`, this function only recognizes the
 * characters `0-9`, `a-z` and `A-Z`.
 *
 * Returns `true` if `c` is a valid digit under `radix`, and `false`
@ -163,7 +157,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
 }

 /**
- * Converts a number to the ascii character representing it.
+ * Converts a number to the character representing it.
 *
 * Returns `Some(char)` if `num` represents one digit under `radix`,
 * using one character of `0-9` or `a-z`, or `None` if it doesn't.
@ -316,12 +310,6 @@ fn test_to_digit() {
    assert!(to_digit('$', 36u).is_none());
 }

-#[test]
-fn test_is_ascii() {
-   assert!(str::all(~"banana", is_ascii));
-   assert!(! str::all(~"ประเทศไทย中华Việt Nam", is_ascii));
-}
-
 #[test]
 fn test_is_digit() {
   assert!(is_digit('2'));
--- a/src/libcore/num/uint-template/u8.rs
+++ b/src/libcore/num/uint-template/u8.rs
@ -10,16 +10,9 @@

 //! Operations and constants for `u8`

-pub use self::inst::is_ascii;
-
 mod inst {
    pub type T = u8;
    #[allow(non_camel_case_types)]
    pub type T_SIGNED = i8;
    pub static bits: uint = 8;
-
-    // Type-specific functions here. These must be reexported by the
-    // parent module so that they appear in core::u8 and not core::u8::u8;
-
-    pub fn is_ascii(x: T) -> bool { return 0 as T == x & 128 as T; }
 }
--- a/src/libcore/path.rs
+++ b/src/libcore/path.rs
@ -19,6 +19,7 @@ use libc;
 use option::{None, Option, Some};
 use str;
 use to_str::ToStr;
+use ascii::{AsciiCast, AsciiStr};

 #[deriving(Clone, Eq)]
 pub struct WindowsPath {
@ -753,7 +754,9 @@ impl GenericPath for WindowsPath {
    fn is_restricted(&self) -> bool {
        match self.filestem() {
            Some(stem) => {
-                match stem.to_lower() {
+                // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+                // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+                match stem.to_ascii().to_lower().to_str_ascii() {
                    ~"con" | ~"aux" | ~"com1" | ~"com2" | ~"com3" | ~"com4" |
                    ~"lpt1" | ~"lpt2" | ~"lpt3" | ~"prn" | ~"nul" => true,
                    _ => false
@ -809,7 +812,10 @@ impl GenericPath for WindowsPath {
            host: copy self.host,
            device: match self.device {
                None => None,
-                Some(ref device) => Some(device.to_upper())
+
+                // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+                // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+                Some(ref device) => Some(device.to_ascii().to_upper().to_str_ascii())
            },
            is_absolute: self.is_absolute,
            components: normalize(self.components)
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@ -27,7 +27,6 @@ use option::{None, Option, Some};
 use iterator::Iterator;
 use ptr;
 use str;
-use u8;
 use uint;
 use vec;
 use to_str::ToStr;
@ -787,22 +786,6 @@ pub fn each_split_within<'a>(ss: &'a str,
    }
 }

-/// Convert a string to lowercase. ASCII only
-pub fn to_lower(s: &str) -> ~str {
-    do map(s) |c| {
-        assert!(char::is_ascii(c));
-        (unsafe{libc::tolower(c as libc::c_char)}) as char
-    }
-}
-
-/// Convert a string to uppercase. ASCII only
-pub fn to_upper(s: &str) -> ~str {
-    do map(s) |c| {
-        assert!(char::is_ascii(c));
-        (unsafe{libc::toupper(c as libc::c_char)}) as char
-    }
-}
-
 /**
 * Replace all occurrences of one string with another
 *
@ -1610,13 +1593,6 @@ pub fn ends_with<'a,'b>(haystack: &'a str, needle: &'b str) -> bool {
 Section: String properties
 */

-/// Determines if a string contains only ASCII characters
-pub fn is_ascii(s: &str) -> bool {
-    let mut i: uint = len(s);
-    while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { return false; } }
-    return true;
-}
-
 /// Returns true if the string has length 0
 pub fn is_empty(s: &str) -> bool { len(s) == 0u }

@ -2403,8 +2379,6 @@ pub trait StrSlice<'self> {
    fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool);
    fn starts_with<'a>(&self, needle: &'a str) -> bool;
    fn substr(&self, begin: uint, n: uint) -> &'self str;
-    fn to_lower(&self) -> ~str;
-    fn to_upper(&self) -> ~str;
    fn escape_default(&self) -> ~str;
    fn escape_unicode(&self) -> ~str;
    fn trim(&self) -> &'self str;
@ -2565,12 +2539,6 @@ impl<'self> StrSlice<'self> for &'self str {
    fn substr(&self, begin: uint, n: uint) -> &'self str {
        substr(*self, begin, n)
    }
-    /// Convert a string to lowercase
-    #[inline]
-    fn to_lower(&self) -> ~str { to_lower(*self) }
-    /// Convert a string to uppercase
-    #[inline]
-    fn to_upper(&self) -> ~str { to_upper(*self) }
    /// Escape each char in `s` with char::escape_default.
    #[inline]
    fn escape_default(&self) -> ~str { escape_default(*self) }
@ -3084,27 +3052,6 @@ mod tests {
        assert!(repeat(~"hi", 0) == ~"");
    }

-    #[test]
-    fn test_to_upper() {
-        // libc::toupper, and hence str::to_upper
-        // are culturally insensitive: they only work for ASCII
-        // (see Issue #1347)
-        let unicode = ~""; //"\u65e5\u672c"; // uncomment once non-ASCII works
-        let input = ~"abcDEF" + unicode + ~"xyz:.;";
-        let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
-        let actual = to_upper(input);
-        assert!(expected == actual);
-    }
-
-    #[test]
-    fn test_to_lower() {
-        // libc::tolower, and hence str::to_lower
-        // are culturally insensitive: they only work for ASCII
-        // (see Issue #1347)
-        assert!(~"" == to_lower(""));
-        assert!(~"ymca" == to_lower("YMCA"));
-    }
-
    #[test]
    fn test_unsafe_slice() {
        assert!("ab" == unsafe {raw::slice_bytes("abc", 0, 2)});
@ -3337,13 +3284,6 @@ mod tests {
        assert!((!is_whitespace(~"   _   ")));
    }

-    #[test]
-    fn test_is_ascii() {
-        assert!((is_ascii(~"")));
-        assert!((is_ascii(~"a")));
-        assert!((!is_ascii(~"\u2009")));
-    }
-
    #[test]
    fn test_shift_byte() {
        let mut s = ~"ABC";
--- a/src/libcore/str/ascii.rs
+++ b/src/libcore/str/ascii.rs
@ -199,6 +199,7 @@ impl ToStrConsume for ~[Ascii] {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use str;

    macro_rules! v2ascii (
        ( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
@ -221,6 +222,9 @@ mod tests {
        assert_eq!('['.to_ascii().to_lower().to_char(), '[');
        assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
        assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
+
+        assert!(str::all(~"banana", |c| c.is_ascii()));
+        assert!(! str::all(~"ประเทศไทย中华Việt Nam", |c| c.is_ascii()));
    }

    #[test]
@ -234,6 +238,15 @@ mod tests {

        assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
        assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
+
+        assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
+        assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
+        assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
+
+        assert!("".is_ascii());
+        assert!("a".is_ascii());
+        assert!(!"\u2009".is_ascii());
+
    }

    #[test]
--- a/src/libcore/unstable/extfmt.rs
+++ b/src/libcore/unstable/extfmt.rs
@ -520,7 +520,13 @@ pub mod rt {
            match cv.ty {
              TyDefault => uint_to_str_prec(u, 10, prec),
              TyHexLower => uint_to_str_prec(u, 16, prec),
-              TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
+
+              // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+              // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+              TyHexUpper => {
+                let s = uint_to_str_prec(u, 16, prec);
+                s.to_ascii().to_upper().to_str_ascii()
+              }
              TyBits => uint_to_str_prec(u, 2, prec),
              TyOctal => uint_to_str_prec(u, 8, prec)
            };
--- a/src/librustc/driver/driver.rs
+++ b/src/librustc/driver/driver.rs
@ -546,7 +546,11 @@ pub fn build_session_options(binary: @~str,
    let lint_dict = lint::get_lint_dict();
    for lint_levels.each |level| {
        let level_name = lint::level_to_str(*level);
-        let level_short = level_name.substr(0,1).to_upper();
+
+        // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+        // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+        let level_short = level_name.substr(0,1);
+        let level_short = level_short.to_ascii().to_upper().to_str_ascii();
        let flags = vec::append(getopts::opt_strs(matches, level_short),
                                getopts::opt_strs(matches, level_name));
        for flags.each |lint_name| {
--- a/src/librustdoc/markdown_index_pass.rs
+++ b/src/librustdoc/markdown_index_pass.rs
@ -157,7 +157,9 @@ pub fn pandoc_header_id(header: &str) -> ~str {
        let s = str::replace(s, ~" ", ~"-");
        return s;
    }
-    fn convert_to_lowercase(s: &str) -> ~str { str::to_lower(s) }
+    // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+    // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+    fn convert_to_lowercase(s: &str) -> ~str { s.to_ascii().to_lower().to_str_ascii() }
    fn remove_up_to_first_letter(s: &str) -> ~str { s.to_str() }
    fn maybe_use_section_id(s: &str) -> ~str { s.to_str() }
 }
--- a/src/libstd/semver.rs
+++ b/src/libstd/semver.rs
@ -220,7 +220,7 @@ fn parse_reader(rdr: @io::Reader) -> Version {


 pub fn parse(s: &str) -> Option<Version> {
-    if ! str::is_ascii(s) {
+    if !s.is_ascii() {
        return None;
    }
    let s = s.trim();
--- a/src/libstd/sort.rs
+++ b/src/libstd/sort.rs
@ -885,8 +885,12 @@ mod tests {
        // tjc: funny that we have to use parens
        fn ile(x: &(&'static str), y: &(&'static str)) -> bool
        {
-            let x = x.to_lower();
-            let y = y.to_lower();
+            // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+            // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+            // (Actually, could just remove the to_str_* call, but needs an deriving(Ord) on
+            // Ascii)
+            let x = x.to_ascii().to_lower().to_str_ascii();
+            let y = y.to_ascii().to_lower().to_str_ascii();
            x <= y
        }

--- a/src/test/bench/shootout-k-nucleotide-pipes.rs
+++ b/src/test/bench/shootout-k-nucleotide-pipes.rs
@ -59,7 +59,10 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
   for pairs_sorted.each |kv| {
       let (k,v) = copy *kv;
       unsafe {
-           buffer += (fmt!("%s %0.3f\n", str::to_upper(str::raw::from_bytes(k)), v));
+           let b = str::raw::from_bytes(k);
+           // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+           // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+           buffer += (fmt!("%s %0.3f\n", b.to_ascii().to_upper().to_str_ascii(), v));
       }
   }

@ -68,7 +71,9 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {

 // given a map, search for the frequency of a pattern
 fn find(mm: &HashMap<~[u8], uint>, key: ~str) -> uint {
-   match mm.find(&str::to_bytes(str::to_lower(key))) {
+   // FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
+   // to_ascii_consume and to_str_consume to not do a unnecessary copy.
+   match mm.find(&str::to_bytes(key.to_ascii().to_lower().to_str_ascii())) {
      option::None      => { return 0u; }
      option::Some(&num) => { return num; }
   }