auto merge of #6029 : Kimundi/rust/ascii-encoding, r=thestinger
Replaced {str, char, u8}::is_ascii Replaced str::to_lower and str::to_upper
This commit is contained in:
commit
ee3789b4e4
@ -802,7 +802,7 @@ An example of `use` declarations:
|
||||
|
||||
~~~~
|
||||
use core::float::sin;
|
||||
use core::str::{slice, to_upper};
|
||||
use core::str::{slice, contains};
|
||||
use core::option::Some;
|
||||
|
||||
fn main() {
|
||||
@ -813,8 +813,8 @@ fn main() {
|
||||
info!(Some(1.0));
|
||||
|
||||
// Equivalent to
|
||||
// 'info!(core::str::to_upper(core::str::slice("foo", 0, 1)));'
|
||||
info!(to_upper(slice("foo", 0, 1)));
|
||||
// 'info!(core::str::contains(core::str::slice("foo", 0, 1), "oo"));'
|
||||
info!(contains(slice("foo", 0, 1), "oo"));
|
||||
}
|
||||
~~~~
|
||||
|
||||
|
@ -50,7 +50,11 @@ fn parse_expected(line_num: uint, line: ~str) -> ~[ExpectedError] {
|
||||
while idx < len && line[idx] == (' ' as u8) { idx += 1u; }
|
||||
let start_kind = idx;
|
||||
while idx < len && line[idx] != (' ' as u8) { idx += 1u; }
|
||||
let kind = str::to_lower(str::slice(line, start_kind, idx).to_owned());
|
||||
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
let kind = str::slice(line, start_kind, idx);
|
||||
let kind = kind.to_ascii().to_lower().to_str_ascii();
|
||||
|
||||
// Extract msg:
|
||||
while idx < len && line[idx] == (' ' as u8) { idx += 1u; }
|
||||
|
@ -100,12 +100,6 @@ pub fn is_alphanumeric(c: char) -> bool {
|
||||
unicode::general_category::No(c);
|
||||
}
|
||||
|
||||
/// Indicates whether the character is an ASCII character
|
||||
#[inline(always)]
|
||||
pub fn is_ascii(c: char) -> bool {
|
||||
c - ('\x7F' & c) == '\x00'
|
||||
}
|
||||
|
||||
/// Indicates whether the character is numeric (Nd, Nl, or No)
|
||||
#[inline(always)]
|
||||
pub fn is_digit(c: char) -> bool {
|
||||
@ -116,7 +110,7 @@ pub fn is_digit(c: char) -> bool {
|
||||
|
||||
/**
|
||||
* Checks if a character parses as a numeric digit in the given radix.
|
||||
* Compared to `is_digit()`, this function only recognizes the ascii
|
||||
* Compared to `is_digit()`, this function only recognizes the
|
||||
* characters `0-9`, `a-z` and `A-Z`.
|
||||
*
|
||||
* Returns `true` if `c` is a valid digit under `radix`, and `false`
|
||||
@ -163,7 +157,7 @@ pub fn to_digit(c: char, radix: uint) -> Option<uint> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a number to the ascii character representing it.
|
||||
* Converts a number to the character representing it.
|
||||
*
|
||||
* Returns `Some(char)` if `num` represents one digit under `radix`,
|
||||
* using one character of `0-9` or `a-z`, or `None` if it doesn't.
|
||||
@ -316,12 +310,6 @@ fn test_to_digit() {
|
||||
assert!(to_digit('$', 36u).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_ascii() {
|
||||
assert!(str::all(~"banana", is_ascii));
|
||||
assert!(! str::all(~"ประเทศไทย中华Việt Nam", is_ascii));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_digit() {
|
||||
assert!(is_digit('2'));
|
||||
|
@ -10,16 +10,9 @@
|
||||
|
||||
//! Operations and constants for `u8`
|
||||
|
||||
pub use self::inst::is_ascii;
|
||||
|
||||
mod inst {
|
||||
pub type T = u8;
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type T_SIGNED = i8;
|
||||
pub static bits: uint = 8;
|
||||
|
||||
// Type-specific functions here. These must be reexported by the
|
||||
// parent module so that they appear in core::u8 and not core::u8::u8;
|
||||
|
||||
pub fn is_ascii(x: T) -> bool { return 0 as T == x & 128 as T; }
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ use libc;
|
||||
use option::{None, Option, Some};
|
||||
use str;
|
||||
use to_str::ToStr;
|
||||
use ascii::{AsciiCast, AsciiStr};
|
||||
|
||||
#[deriving(Clone, Eq)]
|
||||
pub struct WindowsPath {
|
||||
@ -753,7 +754,9 @@ impl GenericPath for WindowsPath {
|
||||
fn is_restricted(&self) -> bool {
|
||||
match self.filestem() {
|
||||
Some(stem) => {
|
||||
match stem.to_lower() {
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
match stem.to_ascii().to_lower().to_str_ascii() {
|
||||
~"con" | ~"aux" | ~"com1" | ~"com2" | ~"com3" | ~"com4" |
|
||||
~"lpt1" | ~"lpt2" | ~"lpt3" | ~"prn" | ~"nul" => true,
|
||||
_ => false
|
||||
@ -809,7 +812,10 @@ impl GenericPath for WindowsPath {
|
||||
host: copy self.host,
|
||||
device: match self.device {
|
||||
None => None,
|
||||
Some(ref device) => Some(device.to_upper())
|
||||
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
Some(ref device) => Some(device.to_ascii().to_upper().to_str_ascii())
|
||||
},
|
||||
is_absolute: self.is_absolute,
|
||||
components: normalize(self.components)
|
||||
|
@ -27,7 +27,6 @@ use option::{None, Option, Some};
|
||||
use iterator::Iterator;
|
||||
use ptr;
|
||||
use str;
|
||||
use u8;
|
||||
use uint;
|
||||
use vec;
|
||||
use to_str::ToStr;
|
||||
@ -787,22 +786,6 @@ pub fn each_split_within<'a>(ss: &'a str,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a string to lowercase. ASCII only
|
||||
pub fn to_lower(s: &str) -> ~str {
|
||||
do map(s) |c| {
|
||||
assert!(char::is_ascii(c));
|
||||
(unsafe{libc::tolower(c as libc::c_char)}) as char
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a string to uppercase. ASCII only
|
||||
pub fn to_upper(s: &str) -> ~str {
|
||||
do map(s) |c| {
|
||||
assert!(char::is_ascii(c));
|
||||
(unsafe{libc::toupper(c as libc::c_char)}) as char
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace all occurrences of one string with another
|
||||
*
|
||||
@ -1610,13 +1593,6 @@ pub fn ends_with<'a,'b>(haystack: &'a str, needle: &'b str) -> bool {
|
||||
Section: String properties
|
||||
*/
|
||||
|
||||
/// Determines if a string contains only ASCII characters
|
||||
pub fn is_ascii(s: &str) -> bool {
|
||||
let mut i: uint = len(s);
|
||||
while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { return false; } }
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if the string has length 0
|
||||
pub fn is_empty(s: &str) -> bool { len(s) == 0u }
|
||||
|
||||
@ -2403,8 +2379,6 @@ pub trait StrSlice<'self> {
|
||||
fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool);
|
||||
fn starts_with<'a>(&self, needle: &'a str) -> bool;
|
||||
fn substr(&self, begin: uint, n: uint) -> &'self str;
|
||||
fn to_lower(&self) -> ~str;
|
||||
fn to_upper(&self) -> ~str;
|
||||
fn escape_default(&self) -> ~str;
|
||||
fn escape_unicode(&self) -> ~str;
|
||||
fn trim(&self) -> &'self str;
|
||||
@ -2565,12 +2539,6 @@ impl<'self> StrSlice<'self> for &'self str {
|
||||
fn substr(&self, begin: uint, n: uint) -> &'self str {
|
||||
substr(*self, begin, n)
|
||||
}
|
||||
/// Convert a string to lowercase
|
||||
#[inline]
|
||||
fn to_lower(&self) -> ~str { to_lower(*self) }
|
||||
/// Convert a string to uppercase
|
||||
#[inline]
|
||||
fn to_upper(&self) -> ~str { to_upper(*self) }
|
||||
/// Escape each char in `s` with char::escape_default.
|
||||
#[inline]
|
||||
fn escape_default(&self) -> ~str { escape_default(*self) }
|
||||
@ -3084,27 +3052,6 @@ mod tests {
|
||||
assert!(repeat(~"hi", 0) == ~"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_upper() {
|
||||
// libc::toupper, and hence str::to_upper
|
||||
// are culturally insensitive: they only work for ASCII
|
||||
// (see Issue #1347)
|
||||
let unicode = ~""; //"\u65e5\u672c"; // uncomment once non-ASCII works
|
||||
let input = ~"abcDEF" + unicode + ~"xyz:.;";
|
||||
let expected = ~"ABCDEF" + unicode + ~"XYZ:.;";
|
||||
let actual = to_upper(input);
|
||||
assert!(expected == actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_lower() {
|
||||
// libc::tolower, and hence str::to_lower
|
||||
// are culturally insensitive: they only work for ASCII
|
||||
// (see Issue #1347)
|
||||
assert!(~"" == to_lower(""));
|
||||
assert!(~"ymca" == to_lower("YMCA"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unsafe_slice() {
|
||||
assert!("ab" == unsafe {raw::slice_bytes("abc", 0, 2)});
|
||||
@ -3337,13 +3284,6 @@ mod tests {
|
||||
assert!((!is_whitespace(~" _ ")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_ascii() {
|
||||
assert!((is_ascii(~"")));
|
||||
assert!((is_ascii(~"a")));
|
||||
assert!((!is_ascii(~"\u2009")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_shift_byte() {
|
||||
let mut s = ~"ABC";
|
||||
|
@ -199,6 +199,7 @@ impl ToStrConsume for ~[Ascii] {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use str;
|
||||
|
||||
macro_rules! v2ascii (
|
||||
( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
|
||||
@ -221,6 +222,9 @@ mod tests {
|
||||
assert_eq!('['.to_ascii().to_lower().to_char(), '[');
|
||||
assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
|
||||
assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
|
||||
|
||||
assert!(str::all(~"banana", |c| c.is_ascii()));
|
||||
assert!(! str::all(~"ประเทศไทย中华Việt Nam", |c| c.is_ascii()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -234,6 +238,15 @@ mod tests {
|
||||
|
||||
assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
|
||||
assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
|
||||
|
||||
assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
|
||||
assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
|
||||
assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
|
||||
|
||||
assert!("".is_ascii());
|
||||
assert!("a".is_ascii());
|
||||
assert!(!"\u2009".is_ascii());
|
||||
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -520,7 +520,13 @@ pub mod rt {
|
||||
match cv.ty {
|
||||
TyDefault => uint_to_str_prec(u, 10, prec),
|
||||
TyHexLower => uint_to_str_prec(u, 16, prec),
|
||||
TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
|
||||
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
TyHexUpper => {
|
||||
let s = uint_to_str_prec(u, 16, prec);
|
||||
s.to_ascii().to_upper().to_str_ascii()
|
||||
}
|
||||
TyBits => uint_to_str_prec(u, 2, prec),
|
||||
TyOctal => uint_to_str_prec(u, 8, prec)
|
||||
};
|
||||
|
@ -546,7 +546,11 @@ pub fn build_session_options(binary: @~str,
|
||||
let lint_dict = lint::get_lint_dict();
|
||||
for lint_levels.each |level| {
|
||||
let level_name = lint::level_to_str(*level);
|
||||
let level_short = level_name.substr(0,1).to_upper();
|
||||
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
let level_short = level_name.substr(0,1);
|
||||
let level_short = level_short.to_ascii().to_upper().to_str_ascii();
|
||||
let flags = vec::append(getopts::opt_strs(matches, level_short),
|
||||
getopts::opt_strs(matches, level_name));
|
||||
for flags.each |lint_name| {
|
||||
|
@ -157,7 +157,9 @@ pub fn pandoc_header_id(header: &str) -> ~str {
|
||||
let s = str::replace(s, ~" ", ~"-");
|
||||
return s;
|
||||
}
|
||||
fn convert_to_lowercase(s: &str) -> ~str { str::to_lower(s) }
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
fn convert_to_lowercase(s: &str) -> ~str { s.to_ascii().to_lower().to_str_ascii() }
|
||||
fn remove_up_to_first_letter(s: &str) -> ~str { s.to_str() }
|
||||
fn maybe_use_section_id(s: &str) -> ~str { s.to_str() }
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ fn parse_reader(rdr: @io::Reader) -> Version {
|
||||
|
||||
|
||||
pub fn parse(s: &str) -> Option<Version> {
|
||||
if ! str::is_ascii(s) {
|
||||
if !s.is_ascii() {
|
||||
return None;
|
||||
}
|
||||
let s = s.trim();
|
||||
|
@ -885,8 +885,12 @@ mod tests {
|
||||
// tjc: funny that we have to use parens
|
||||
fn ile(x: &(&'static str), y: &(&'static str)) -> bool
|
||||
{
|
||||
let x = x.to_lower();
|
||||
let y = y.to_lower();
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
// (Actually, could just remove the to_str_* call, but needs an deriving(Ord) on
|
||||
// Ascii)
|
||||
let x = x.to_ascii().to_lower().to_str_ascii();
|
||||
let y = y.to_ascii().to_lower().to_str_ascii();
|
||||
x <= y
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,10 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
|
||||
for pairs_sorted.each |kv| {
|
||||
let (k,v) = copy *kv;
|
||||
unsafe {
|
||||
buffer += (fmt!("%s %0.3f\n", str::to_upper(str::raw::from_bytes(k)), v));
|
||||
let b = str::raw::from_bytes(k);
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
buffer += (fmt!("%s %0.3f\n", b.to_ascii().to_upper().to_str_ascii(), v));
|
||||
}
|
||||
}
|
||||
|
||||
@ -68,7 +71,9 @@ fn sort_and_fmt(mm: &HashMap<~[u8], uint>, total: uint) -> ~str {
|
||||
|
||||
// given a map, search for the frequency of a pattern
|
||||
fn find(mm: &HashMap<~[u8], uint>, key: ~str) -> uint {
|
||||
match mm.find(&str::to_bytes(str::to_lower(key))) {
|
||||
// FIXME: #4318 Instead of to_ascii and to_str_ascii, could use
|
||||
// to_ascii_consume and to_str_consume to not do a unnecessary copy.
|
||||
match mm.find(&str::to_bytes(key.to_ascii().to_lower().to_str_ascii())) {
|
||||
option::None => { return 0u; }
|
||||
option::Some(&num) => { return num; }
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user