2013-04-20 22:54:13 +02:00
|
|
|
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
|
2013-04-20 19:39:15 +02:00
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2013-05-03 09:20:01 +03:00
|
|
|
//! Operations on ASCII strings and characters.
|
|
|
|
|
2013-04-20 19:39:15 +02:00
|
|
|
use to_str::{ToStr,ToStrConsume};
|
|
|
|
use str;
|
2013-05-12 20:34:15 -04:00
|
|
|
use str::StrSlice;
|
2013-04-20 19:39:15 +02:00
|
|
|
use cast;
|
2013-08-01 03:16:42 -04:00
|
|
|
use iterator::{Iterator, IteratorUtil};
|
2013-05-12 20:34:15 -04:00
|
|
|
use vec::{CopyableVector, ImmutableVector, OwnedVector};
|
2013-06-16 11:04:53 +02:00
|
|
|
use to_bytes::IterBytes;
|
2013-08-01 03:16:42 -04:00
|
|
|
use option::{Some, None};
|
2013-04-20 19:39:15 +02:00
|
|
|
|
2013-06-16 11:04:53 +02:00
|
|
|
/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
|
2013-04-22 21:42:25 +02:00
|
|
|
#[deriving(Clone, Eq)]
|
2013-04-20 19:39:15 +02:00
|
|
|
pub struct Ascii { priv chr: u8 }
|
|
|
|
|
2013-05-31 15:17:22 -07:00
|
|
|
impl Ascii {
|
2013-04-20 19:39:15 +02:00
|
|
|
/// Converts a ascii character into a `u8`.
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn to_byte(self) -> u8 {
|
2013-04-20 19:39:15 +02:00
|
|
|
self.chr
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Converts a ascii character into a `char`.
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn to_char(self) -> char {
|
2013-04-20 19:39:15 +02:00
|
|
|
self.chr as char
|
|
|
|
}
|
2013-04-20 22:50:50 +02:00
|
|
|
|
|
|
|
/// Convert to lowercase.
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn to_lower(self) -> Ascii {
|
2013-04-20 22:50:50 +02:00
|
|
|
if self.chr >= 65 && self.chr <= 90 {
|
|
|
|
Ascii{chr: self.chr | 0x20 }
|
|
|
|
} else {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Convert to uppercase.
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn to_upper(self) -> Ascii {
|
2013-04-20 22:50:50 +02:00
|
|
|
if self.chr >= 97 && self.chr <= 122 {
|
|
|
|
Ascii{chr: self.chr & !0x20 }
|
|
|
|
} else {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-31 15:17:22 -07:00
|
|
|
/// Compares two ascii characters of equality, ignoring case.
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn eq_ignore_case(self, other: Ascii) -> bool {
|
2013-04-20 22:50:50 +02:00
|
|
|
self.to_lower().chr == other.to_lower().chr
|
|
|
|
}
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ToStr for Ascii {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn to_str(&self) -> ~str { str::from_bytes(['\'' as u8, self.chr, '\'' as u8]) }
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Trait for converting into an ascii type.
|
|
|
|
pub trait AsciiCast<T> {
|
|
|
|
/// Convert to an ascii type
|
|
|
|
fn to_ascii(&self) -> T;
|
|
|
|
|
2013-06-16 11:04:53 +02:00
|
|
|
/// Convert to an ascii type, not doing any range asserts
|
|
|
|
unsafe fn to_ascii_nocheck(&self) -> T;
|
|
|
|
|
2013-04-20 19:39:15 +02:00
|
|
|
/// Check if convertible to ascii
|
|
|
|
fn is_ascii(&self) -> bool;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'self> AsciiCast<&'self[Ascii]> for &'self [u8] {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn to_ascii(&self) -> &'self[Ascii] {
|
|
|
|
assert!(self.is_ascii());
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe {self.to_ascii_nocheck()}
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
|
|
|
|
cast::transmute(*self)
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn is_ascii(&self) -> bool {
|
2013-08-01 03:16:42 -04:00
|
|
|
foreach b in self.iter() {
|
2013-04-20 19:39:15 +02:00
|
|
|
if !b.is_ascii() { return false; }
|
|
|
|
}
|
|
|
|
true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'self> AsciiCast<&'self[Ascii]> for &'self str {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn to_ascii(&self) -> &'self[Ascii] {
|
|
|
|
assert!(self.is_ascii());
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe {self.to_ascii_nocheck()}
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe fn to_ascii_nocheck(&self) -> &'self[Ascii] {
|
|
|
|
let (p,len): (*u8, uint) = cast::transmute(*self);
|
|
|
|
cast::transmute((p, len - 1))
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn is_ascii(&self) -> bool {
|
2013-07-27 23:38:38 +02:00
|
|
|
self.byte_iter().all(|b| b.is_ascii())
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsciiCast<Ascii> for u8 {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn to_ascii(&self) -> Ascii {
|
|
|
|
assert!(self.is_ascii());
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe {self.to_ascii_nocheck()}
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe fn to_ascii_nocheck(&self) -> Ascii {
|
2013-04-20 19:39:15 +02:00
|
|
|
Ascii{ chr: *self }
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn is_ascii(&self) -> bool {
|
|
|
|
*self & 128 == 0u8
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AsciiCast<Ascii> for char {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn to_ascii(&self) -> Ascii {
|
|
|
|
assert!(self.is_ascii());
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe {self.to_ascii_nocheck()}
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe fn to_ascii_nocheck(&self) -> Ascii {
|
2013-04-20 19:39:15 +02:00
|
|
|
Ascii{ chr: *self as u8 }
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn is_ascii(&self) -> bool {
|
|
|
|
*self - ('\x7F' & *self) == '\x00'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Trait for copyless casting to an ascii vector.
|
|
|
|
pub trait OwnedAsciiCast {
|
|
|
|
/// Take ownership and cast to an ascii vector without trailing zero element.
|
2013-06-16 11:04:53 +02:00
|
|
|
fn into_ascii(self) -> ~[Ascii];
|
|
|
|
|
|
|
|
/// Take ownership and cast to an ascii vector without trailing zero element.
|
|
|
|
/// Does not perform validation checks.
|
|
|
|
unsafe fn into_ascii_nocheck(self) -> ~[Ascii];
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl OwnedAsciiCast for ~[u8] {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
fn into_ascii(self) -> ~[Ascii] {
|
2013-04-20 19:39:15 +02:00
|
|
|
assert!(self.is_ascii());
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe {self.into_ascii_nocheck()}
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe fn into_ascii_nocheck(self) -> ~[Ascii] {
|
|
|
|
cast::transmute(self)
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl OwnedAsciiCast for ~str {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
fn into_ascii(self) -> ~[Ascii] {
|
2013-04-22 22:05:39 +02:00
|
|
|
assert!(self.is_ascii());
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe {self.into_ascii_nocheck()}
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
unsafe fn into_ascii_nocheck(self) -> ~[Ascii] {
|
|
|
|
let mut r: ~[Ascii] = cast::transmute(self);
|
|
|
|
r.pop();
|
|
|
|
r
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Trait for converting an ascii type to a string. Needed to convert `&[Ascii]` to `~str`
|
2013-04-22 21:42:25 +02:00
|
|
|
pub trait AsciiStr {
|
2013-04-20 19:39:15 +02:00
|
|
|
/// Convert to a string.
|
|
|
|
fn to_str_ascii(&self) -> ~str;
|
2013-04-22 21:42:25 +02:00
|
|
|
|
|
|
|
/// Convert to vector representing a lower cased ascii string.
|
|
|
|
fn to_lower(&self) -> ~[Ascii];
|
|
|
|
|
|
|
|
/// Convert to vector representing a upper cased ascii string.
|
|
|
|
fn to_upper(&self) -> ~[Ascii];
|
|
|
|
|
2013-06-16 11:04:53 +02:00
|
|
|
/// Compares two Ascii strings ignoring case
|
|
|
|
fn eq_ignore_case(self, other: &[Ascii]) -> bool;
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
2013-04-22 21:42:25 +02:00
|
|
|
impl<'self> AsciiStr for &'self [Ascii] {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-20 19:39:15 +02:00
|
|
|
fn to_str_ascii(&self) -> ~str {
|
|
|
|
let mut cpy = self.to_owned();
|
|
|
|
cpy.push(0u8.to_ascii());
|
|
|
|
unsafe {cast::transmute(cpy)}
|
|
|
|
}
|
2013-04-22 21:42:25 +02:00
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-22 21:42:25 +02:00
|
|
|
fn to_lower(&self) -> ~[Ascii] {
|
|
|
|
self.map(|a| a.to_lower())
|
|
|
|
}
|
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-04-22 21:42:25 +02:00
|
|
|
fn to_upper(&self) -> ~[Ascii] {
|
|
|
|
self.map(|a| a.to_upper())
|
|
|
|
}
|
2013-06-16 11:04:53 +02:00
|
|
|
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
fn eq_ignore_case(self, other: &[Ascii]) -> bool {
|
|
|
|
do self.iter().zip(other.iter()).all |(&a, &b)| { a.eq_ignore_case(b) }
|
|
|
|
}
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ToStrConsume for ~[Ascii] {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
fn into_str(self) -> ~str {
|
2013-04-20 19:39:15 +02:00
|
|
|
let mut cpy = self;
|
|
|
|
cpy.push(0u8.to_ascii());
|
|
|
|
unsafe {cast::transmute(cpy)}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-16 11:04:53 +02:00
|
|
|
impl IterBytes for Ascii {
|
2013-06-18 14:45:18 -07:00
|
|
|
#[inline]
|
2013-06-16 11:04:53 +02:00
|
|
|
fn iter_bytes(&self, _lsb0: bool, f: &fn(buf: &[u8]) -> bool) -> bool {
|
|
|
|
f([self.to_byte()])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Trait to convert to a owned byte array by consuming self
|
|
|
|
pub trait ToBytesConsume {
|
|
|
|
/// Converts to a owned byte array by consuming self
|
|
|
|
fn into_bytes(self) -> ~[u8];
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ToBytesConsume for ~[Ascii] {
|
|
|
|
fn into_bytes(self) -> ~[u8] {
|
|
|
|
unsafe {cast::transmute(self)}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-23 19:38:49 -04:00
|
|
|
#[cfg(test)]
|
2013-04-20 19:39:15 +02:00
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2013-06-16 11:04:53 +02:00
|
|
|
use to_bytes::ToBytes;
|
2013-04-20 19:39:15 +02:00
|
|
|
|
|
|
|
macro_rules! v2ascii (
|
|
|
|
( [$($e:expr),*]) => ( [$(Ascii{chr:$e}),*]);
|
|
|
|
(~[$($e:expr),*]) => (~[$(Ascii{chr:$e}),*]);
|
|
|
|
)
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ascii() {
|
|
|
|
assert_eq!(65u8.to_ascii().to_byte(), 65u8);
|
|
|
|
assert_eq!(65u8.to_ascii().to_char(), 'A');
|
|
|
|
assert_eq!('A'.to_ascii().to_char(), 'A');
|
|
|
|
assert_eq!('A'.to_ascii().to_byte(), 65u8);
|
2013-04-20 22:50:50 +02:00
|
|
|
|
2013-04-22 21:42:25 +02:00
|
|
|
assert_eq!('A'.to_ascii().to_lower().to_char(), 'a');
|
|
|
|
assert_eq!('Z'.to_ascii().to_lower().to_char(), 'z');
|
|
|
|
assert_eq!('a'.to_ascii().to_upper().to_char(), 'A');
|
|
|
|
assert_eq!('z'.to_ascii().to_upper().to_char(), 'Z');
|
2013-04-20 22:50:50 +02:00
|
|
|
|
2013-04-22 21:42:25 +02:00
|
|
|
assert_eq!('@'.to_ascii().to_lower().to_char(), '@');
|
|
|
|
assert_eq!('['.to_ascii().to_lower().to_char(), '[');
|
|
|
|
assert_eq!('`'.to_ascii().to_upper().to_char(), '`');
|
|
|
|
assert_eq!('{'.to_ascii().to_upper().to_char(), '{');
|
2013-04-23 11:08:13 +02:00
|
|
|
|
2013-06-10 00:34:23 +10:00
|
|
|
assert!("banana".iter().all(|c| c.is_ascii()));
|
|
|
|
assert!(!"ประเทศไทย中华Việt Nam".iter().all(|c| c.is_ascii()));
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ascii_vec() {
|
|
|
|
assert_eq!((&[40u8, 32u8, 59u8]).to_ascii(), v2ascii!([40, 32, 59]));
|
|
|
|
assert_eq!("( ;".to_ascii(), v2ascii!([40, 32, 59]));
|
|
|
|
// FIXME: #5475 borrowchk error, owned vectors do not live long enough
|
|
|
|
// if chained-from directly
|
|
|
|
let v = ~[40u8, 32u8, 59u8]; assert_eq!(v.to_ascii(), v2ascii!([40, 32, 59]));
|
|
|
|
let v = ~"( ;"; assert_eq!(v.to_ascii(), v2ascii!([40, 32, 59]));
|
2013-04-22 21:42:25 +02:00
|
|
|
|
|
|
|
assert_eq!("abCDef&?#".to_ascii().to_lower().to_str_ascii(), ~"abcdef&?#");
|
|
|
|
assert_eq!("abCDef&?#".to_ascii().to_upper().to_str_ascii(), ~"ABCDEF&?#");
|
2013-04-23 11:08:13 +02:00
|
|
|
|
|
|
|
assert_eq!("".to_ascii().to_lower().to_str_ascii(), ~"");
|
|
|
|
assert_eq!("YMCA".to_ascii().to_lower().to_str_ascii(), ~"ymca");
|
|
|
|
assert_eq!("abcDEFxyz:.;".to_ascii().to_upper().to_str_ascii(), ~"ABCDEFXYZ:.;");
|
|
|
|
|
2013-06-16 11:04:53 +02:00
|
|
|
assert!("aBcDeF&?#".to_ascii().eq_ignore_case("AbCdEf&?#".to_ascii()));
|
|
|
|
|
2013-04-23 11:08:13 +02:00
|
|
|
assert!("".is_ascii());
|
|
|
|
assert!("a".is_ascii());
|
|
|
|
assert!(!"\u2009".is_ascii());
|
|
|
|
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_owned_ascii_vec() {
|
2013-06-16 11:04:53 +02:00
|
|
|
assert_eq!((~"( ;").into_ascii(), v2ascii!(~[40, 32, 59]));
|
|
|
|
assert_eq!((~[40u8, 32u8, 59u8]).into_ascii(), v2ascii!(~[40, 32, 59]));
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ascii_to_str() { assert_eq!(v2ascii!([40, 32, 59]).to_str_ascii(), ~"( ;"); }
|
|
|
|
|
|
|
|
#[test]
|
2013-06-16 11:04:53 +02:00
|
|
|
fn test_ascii_into_str() {
|
|
|
|
assert_eq!(v2ascii!(~[40, 32, 59]).into_str(), ~"( ;");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_ascii_to_bytes() {
|
|
|
|
assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
|
|
|
|
assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
|
2013-04-20 19:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test] #[should_fail]
|
|
|
|
fn test_ascii_vec_fail_u8_slice() { (&[127u8, 128u8, 255u8]).to_ascii(); }
|
|
|
|
|
|
|
|
#[test] #[should_fail]
|
|
|
|
fn test_ascii_vec_fail_str_slice() { "zoä华".to_ascii(); }
|
|
|
|
|
|
|
|
#[test] #[should_fail]
|
|
|
|
fn test_ascii_fail_u8_slice() { 255u8.to_ascii(); }
|
|
|
|
|
|
|
|
#[test] #[should_fail]
|
|
|
|
fn test_ascii_fail_char_slice() { 'λ'.to_ascii(); }
|
|
|
|
}
|