auto merge of #6029 : Kimundi/rust/ascii-encoding, r=thestinger

Replaced {str, char, u8}::is_ascii
Replaced str::to_lower and str::to_upper
This commit is contained in:
bors 2013-04-24 13:33:29 -07:00
commit ee3789b4e4
13 changed files with 60 additions and 95 deletions

View File

@ -802,7 +802,7 @@ An example of `use` declarations:
~~~~
use core::float::sin;
use core::str::{slice, to_upper};
use core::str::{slice, contains};
use core::option::Some;
fn main() {
@ -813,8 +813,8 @@ fn main() {
info!(Some(1.0));
// Equivalent to
// 'info!(core::str::to_upper(core::str::slice("foo", 0, 1)));'
info!(to_upper(slice("foo", 0, 1)));
// 'info!(core::str::contains(core::str::slice("foo", 0, 1), "oo"));'
info!(contains(slice("foo", 0, 1), "oo"));
}
~~~~

View File

@ -50,7 +50,11 @@ fn parse_expected(line_num: uint, line: ~str) -> ~[ExpectedError] {
while idx < len && line[idx] == (' ' as u8) { idx += 1u; }
let start_kind = idx;
while idx < len && line[idx] != (' ' as u8) { idx += 1u; }
let kind = str::to_lower(str::slice(line, start_kind, idx).to_owned());
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
let kind = str::slice(line, start_kind, idx);
let kind = kind.to_ascii().to_lower().to_str_ascii();
// Extract msg:
while idx < len && line[idx] == (' ' as u8) { idx += 1u; }

View File

@ -100,12 +100,6 @@ pub fn is_alphanumeric(c: char) -> bool {
unicode::general_category::No(c);
}
/// Indicates whether the character is an ASCII character
#[inline(always)]
pub fn is_ascii(c: char) -> bool {
c - ('\x7F' & c) == '\x00'
}
/// Indicates whether the character is numeric (Nd, Nl, or No)
#[inline(always)]
pub fn is_digit(c: char) -> bool {
@ -116,7 +110,7 @@ pub fn is_digit(c: char) -> bool {
/**
* Checks if a character parses as a numeric digit in the given radix.
* Compared to `is_digit()`, this function only recognizes the ascii
* Compared to `is_digit()`, this function only recognizes the
* characters `0-9`, `a-z` and `A-Z`.
*
* Returns `true` if `c` is a valid digit under `radix`, and `false`
@ -163,7 +157,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
}
/**
* Converts a number to the ascii character representing it.
* Converts a number to the character representing it.
*
* Returns `Some(char)` if `num` represents one digit under `radix`,
* using one character of `0-9` or `a-z`, or `None` if it doesn't.
@ -316,12 +310,6 @@ fn test_to_digit() {
assert!(to_digit('$', 36u).is_none());
}
#[test]
fn test_is_ascii() {
assert!(str::all(~"banana", is_ascii));
assert!(! str::all(~"ประเทศไทย中华Việt Nam", is_ascii));
}
#[test]
fn test_is_digit() {
assert!(is_digit('2'));

View File

@ -10,16 +10,9 @@
//! Operations and constants for `u8`
pub use self::inst::is_ascii;
mod inst {
pub type T = u8;
#[allow(non_camel_case_types)]
pub type T_SIGNED = i8;
pub static bits: uint = 8;
// Type-specific functions here. These must be reexported by the
// parent module so that they appear in core::u8 and not core::u8::u8;
pub fn is_ascii(x: T) -> bool { return 0 as T == x & 128 as T; }
}

View File

@ -19,6 +19,7 @@ use libc;
use option::{None, Option, Some};
use str;
use to_str::ToStr;
use ascii::{AsciiCast, AsciiStr};
#[deriving(Clone, Eq)]
pub struct WindowsPath {
@ -753,7 +754,9 @@ impl GenericPath for WindowsPath {
fn is_restricted(&self) -> bool {
match self.filestem() {
Some(stem) => {
match stem.to_lower() {
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
match stem.to_ascii().to_lower().to_str_ascii() {
~"con" | ~"aux" | ~"com1" | ~"com2" | ~"com3" | ~"com4" |
~"lpt1" | ~"lpt2" | ~"lpt3" | ~"prn" | ~"nul" => true,
_ => false
@ -809,7 +812,10 @@ impl GenericPath for WindowsPath {
host: copy self.host,
device: match self.device {
None => None,
Some(ref device) => Some(device.to_upper())
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
Some(ref device) => Some(device.to_ascii().to_upper().to_str_ascii())
},
is_absolute: self.is_absolute,
components: normalize(self.components)

View File

@ -27,7 +27,6 @@ use option::{None, Option, Some};
use iterator::Iterator;
use ptr;
use str;
use u8;
use uint;
use vec;
use to_str::ToStr;
@ -787,22 +786,6 @@ pub fn each_split_within<'a>(ss: &'a str,
}
}
/// Convert a string to lowercase. ASCII only
pub fn to_lower(s: &str) -> ~str {
do map(s) |c| {
assert!(char::is_ascii(c));
(unsafe{libc::tolower(c as libc::c_char)}) as char
}
}
/// Convert a string to uppercase. ASCII only
pub fn to_upper(s: &str) -> ~str {
do map(s) |c| {
assert!(char::is_ascii(c));
(unsafe{libc::toupper(c as libc::c_char)}) as char
}
}
/**
* Replace all occurrences of one string with another
*
@ -1610,13 +1593,6 @@ pub fn ends_with<'a,'b>(haystack: &'a str, needle: &'b str) -> bool {
Section: String properties
*/
/// Determines if a string contains only ASCII characters
pub fn is_ascii(s: &str) -> bool {
let mut i: uint = len(s);
while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { return false; } }
return true;
}
/// Returns true if the string has length 0
pub fn is_empty(s: &str) -> bool { len(s) == 0u }
@ -2403,8 +2379,6 @@ pub trait StrSlice<'self> {
fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool);
fn starts_with<'a>(&self, needle: &'a str) -> bool;
fn substr(&self, begin: uint, n: uint) -> &'self str;
fn to_lower(&self) -> ~str;
fn to_upper(&self) -> ~str;
fn escape_default(&self) -> ~str;
fn escape_unicode(&self) -> ~str;
fn trim(&self) -> &'self str;
@ -2565,12 +2539,6 @@ impl<'self> StrSlice<'self> for &'self str {
fn substr(&self, begin: uint, n: uint) -> &'self str {
substr(*self, begin, n)
}
/// Convert a string to lowercase
#[inline]
fn to_lower(&self) -> ~str { to_lower(*self) }
/// Convert a string to uppercase
#[inline]
fn to_upper(&self) -> ~str { to_upper(*self) }
/// Escape each char in `s` with char::escape_default.
#[inline]
fn escape_default(&self) -> ~str { escape_default(*self) }
@ -3084,27 +3052,6 @@ mod tests {
assert!(repeat(~"hi", 0) == ~"");
}
#[test]
fn test_to_upper() {
// libc::toupper, and hence str::to_upper
// are culturally insensitive: they only work for ASCII
// (see Issue #1347)
let unicode = ~""; //"\u65e5\u672c"; // uncomment once non-ASCII works
let input = ~"abcDEF" + unicode + ~"xyz:.;";
let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
let actual = to_upper(input);
assert!(expected == actual);
}
#[test]
fn test_to_lower() {
// libc::tolower, and hence str::to_lower
// are culturally insensitive: they only work for ASCII
// (see Issue #1347)
assert!(~"" == to_lower(""));
assert!(~"ymca" == to_lower("YMCA"));
}
#[test]
fn test_unsafe_slice() {
assert!("ab" == unsafe {raw::slice_bytes("abc", 0, 2)});
@ -3337,13 +3284,6 @@ mod tests {
assert!((!is_whitespace(~" _ ")));
}
#[test]
fn test_is_ascii() {
assert!((is_ascii(~"")));
assert!((is_ascii(~"a")));
assert!((!is_ascii(~"\u2009")));
}
#[test]
fn test_shift_byte() {
let mut s = ~"ABC";

View File

@ -199,6 +199,7 @@ impl ToStrConsume for ~[Ascii] {
#[cfg(test)]
mod tests {
use super::*;
use str;
macro_rules! v2ascii (
( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
@ -221,6 +222,9 @@ mod tests {
assert_eq!('['.to_ascii().to_lower().to_char(), '[');
assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
assert!(str::all(~"banana", |c| c.is_ascii()));
assert!(! str::all(~"ประเทศไทย中华Việt Nam", |c| c.is_ascii()));
}
#[test]
@ -234,6 +238,15 @@ mod tests {
assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
assert!("".is_ascii());
assert!("a".is_ascii());
assert!(!"\u2009".is_ascii());
}
#[test]

View File

@ -520,7 +520,13 @@ pub mod rt {
match cv.ty {
TyDefault => uint_to_str_prec(u, 10, prec),
TyHexLower => uint_to_str_prec(u, 16, prec),
TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
TyHexUpper => {
let s = uint_to_str_prec(u, 16, prec);
s.to_ascii().to_upper().to_str_ascii()
}
TyBits => uint_to_str_prec(u, 2, prec),
TyOctal => uint_to_str_prec(u, 8, prec)
};

View File

@ -546,7 +546,11 @@ pub fn build_session_options(binary: @~str,
let lint_dict = lint::get_lint_dict();
for lint_levels.each |level| {
let level_name = lint::level_to_str(*level);
let level_short = level_name.substr(0,1).to_upper();
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
let level_short = level_name.substr(0,1);
let level_short = level_short.to_ascii().to_upper().to_str_ascii();
let flags = vec::append(getopts::opt_strs(matches, level_short),
getopts::opt_strs(matches, level_name));
for flags.each |lint_name| {

View File

@ -157,7 +157,9 @@ pub fn pandoc_header_id(header: &str) -> ~str {
let s = str::replace(s, ~" ", ~"-");
return s;
}
fn convert_to_lowercase(s: &str) -> ~str { str::to_lower(s) }
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
fn convert_to_lowercase(s: &str) -> ~str { s.to_ascii().to_lower().to_str_ascii() }
fn remove_up_to_first_letter(s: &str) -> ~str { s.to_str() }
fn maybe_use_section_id(s: &str) -> ~str { s.to_str() }
}

View File

@ -220,7 +220,7 @@ fn parse_reader(rdr: @io::Reader) -> Version {
pub fn parse(s: &str) -> Option<Version> {
if ! str::is_ascii(s) {
if !s.is_ascii() {
return None;
}
let s = s.trim();

View File

@ -885,8 +885,12 @@ mod tests {
// tjc: funny that we have to use parens
fn ile(x: &(&'static str), y: &(&'static str)) -> bool
{
let x = x.to_lower();
let y = y.to_lower();
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
// (Actually, could just remove the to_str_* call, but needs an deriving(Ord) on
// Ascii)
let x = x.to_ascii().to_lower().to_str_ascii();
let y = y.to_ascii().to_lower().to_str_ascii();
x <= y
}

View File

@ -59,7 +59,10 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
for pairs_sorted.each |kv| {
let (k,v) = copy *kv;
unsafe {
buffer += (fmt!("%s %0.3f\n", str::to_upper(str::raw::from_bytes(k)), v));
let b = str::raw::from_bytes(k);
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
buffer += (fmt!("%s %0.3f\n", b.to_ascii().to_upper().to_str_ascii(), v));
}
}
@ -68,7 +71,9 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
// given a map, search for the frequency of a pattern
fn find(mm: &HashMap<~[u8], uint>, key: ~str) -> uint {
match mm.find(&str::to_bytes(str::to_lower(key))) {
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
match mm.find(&str::to_bytes(key.to_ascii().to_lower().to_str_ascii())) {
option::None => { return 0u; }
option::Some(&num) => { return num; }
}