rust/src/libcore/char.rs

// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Character manipulation (`char` type, Unicode Scalar Value)
//!
//! This module  provides the `Char` trait, as well as its implementation
//! for the primitive `char` type, in order to allow basic character manipulation.
//!
//! A `char` actually represents a
//! *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*,
//! as it can contain any Unicode code point except high-surrogate and
//! low-surrogate code points.
//!
//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
//! however the converse is not always true due to the above range limits
//! and, as such, should be performed via the `from_u32` function..


use mem::transmute;
use option::{None, Option, Some};
use iter::{Iterator, range_step};
use unicode::{derived_property, property, general_category, decompose, conversions};

#[cfg(not(test))] use cmp::{Eq, Ord, TotalEq, TotalOrd, Ordering};
#[cfg(not(test))] use default::Default;

// UTF-8 ranges and tags for encoding characters
static TAG_CONT: uint = 128u;
static MAX_ONE_B: uint = 128u;
static TAG_TWO_B: uint = 192u;
static MAX_TWO_B: uint = 2048u;
static TAG_THREE_B: uint = 224u;
static MAX_THREE_B: uint = 65536u;
static TAG_FOUR_B: uint = 240u;

/*
    Lu  Uppercase_Letter        an uppercase letter
    Ll  Lowercase_Letter        a lowercase letter
    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
    Lm  Modifier_Letter         a modifier letter
    Lo  Other_Letter            other letters, including syllables and ideographs
    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
    Me  Enclosing_Mark          an enclosing combining mark
    Nd  Decimal_Number          a decimal digit
    Nl  Letter_Number           a letterlike numeric character
    No  Other_Number            a numeric character of other type
    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
    Pi  Initial_Punctuation     an initial quotation mark
    Pf  Final_Punctuation       a final quotation mark
    Po  Other_Punctuation       a punctuation mark of other type
    Sm  Math_Symbol             a symbol of primarily mathematical use
    Sc  Currency_Symbol         a currency sign
    Sk  Modifier_Symbol         a non-letterlike modifier symbol
    So  Other_Symbol            a symbol of other type
    Zs  Space_Separator         a space character (of various non-zero widths)
    Zl  Line_Separator          U+2028 LINE SEPARATOR only
    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
    Cc  Control                 a C0 or C1 control code
    Cf  Format                  a format control character
    Cs  Surrogate               a surrogate code point
    Co  Private_Use             a private-use character
    Cn  Unassigned              a reserved unassigned code point or a noncharacter
*/

/// The highest valid code point
pub static MAX: char = '\U0010ffff';

/// Converts from `u32` to a `char`
#[inline]
pub fn from_u32(i: u32) -> Option<char> {
    // catch out-of-bounds and surrogates
    if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
        None
    } else {
        Some(unsafe { transmute(i) })
    }
}

/// Returns whether the specified `char` is considered a Unicode alphabetic
/// code point
pub fn is_alphabetic(c: char) -> bool   { derived_property::Alphabetic(c) }

/// Returns whether the specified `char` satisfies the 'XID_Start' Unicode property
///
/// 'XID_Start' is a Unicode Derived Property specified in
/// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
/// mostly similar to ID_Start but modified for closure under NFKx.
pub fn is_XID_start(c: char) -> bool    { derived_property::XID_Start(c) }

/// Returns whether the specified `char` satisfies the 'XID_Continue' Unicode property
///
/// 'XID_Continue' is a Unicode Derived Property specified in
/// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
/// mostly similar to 'ID_Continue' but modified for closure under NFKx.
pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }

///
/// Indicates whether a `char` is in lower case
///
/// This is defined according to the terms of the Unicode Derived Core Property 'Lowercase'.
///
#[inline]
pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }

///
/// Indicates whether a `char` is in upper case
///
/// This is defined according to the terms of the Unicode Derived Core Property 'Uppercase'.
///
#[inline]
pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }

///
/// Indicates whether a `char` is whitespace
///
/// Whitespace is defined in terms of the Unicode Property 'White_Space'.
///
#[inline]
pub fn is_whitespace(c: char) -> bool {
    // As an optimization ASCII whitespace characters are checked separately
    c == ' '
        || ('\x09' <= c && c <= '\x0d')
        || property::White_Space(c)
}

///
/// Indicates whether a `char` is alphanumeric
///
/// Alphanumericness is defined in terms of the Unicode General Categories
/// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
///
#[inline]
pub fn is_alphanumeric(c: char) -> bool {
    derived_property::Alphabetic(c)
        || general_category::Nd(c)
        || general_category::Nl(c)
        || general_category::No(c)
}

///
/// Indicates whether a `char` is a control code point
///
/// Control code points are defined in terms of the Unicode General Category
/// 'Cc'.
///
#[inline]
pub fn is_control(c: char) -> bool { general_category::Cc(c) }

/// Indicates whether the `char` is numeric (Nd, Nl, or No)
#[inline]
pub fn is_digit(c: char) -> bool {
    general_category::Nd(c)
        || general_category::Nl(c)
        || general_category::No(c)
}

///
/// Checks if a `char` parses as a numeric digit in the given radix
///
/// Compared to `is_digit()`, this function only recognizes the
/// characters `0-9`, `a-z` and `A-Z`.
///
/// # Return value
///
/// Returns `true` if `c` is a valid digit under `radix`, and `false`
/// otherwise.
///
/// # Failure
///
/// Fails if given a `radix` > 36.
///
/// # Note
///
/// This just wraps `to_digit()`.
///
#[inline]
pub fn is_digit_radix(c: char, radix: uint) -> bool {
    match to_digit(c, radix) {
        Some(_) => true,
        None    => false,
    }
}

///
/// Converts a `char` to the corresponding digit
///
/// # Return value
///
/// If `c` is between '0' and '9', the corresponding value
/// between 0 and 9. If `c` is 'a' or 'A', 10. If `c` is
/// 'b' or 'B', 11, etc. Returns none if the `char` does not
/// refer to a digit in the given radix.
///
/// # Failure
///
/// Fails if given a `radix` outside the range `[0..36]`.
///
#[inline]
pub fn to_digit(c: char, radix: uint) -> Option<uint> {
    if radix > 36 {
        fail!("to_digit: radix is too high (maximum 36)");
    }
    let val = match c {
      '0' .. '9' => c as uint - ('0' as uint),
      'a' .. 'z' => c as uint + 10u - ('a' as uint),
      'A' .. 'Z' => c as uint + 10u - ('A' as uint),
      _ => return None,
    };
    if val < radix { Some(val) }
    else { None }
}

/// Convert a char to its uppercase equivalent
///
/// The case-folding performed is the common or simple mapping:
/// it maps one unicode codepoint (one char in Rust) to its uppercase equivalent according
/// to the Unicode database at ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
/// The additional SpecialCasing.txt is not considered here, as it expands to multiple
/// codepoints in some cases.
///
/// A full reference can be found here
/// http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
///
/// # Return value
///
/// Returns the char itself if no conversion was made
#[inline]
pub fn to_uppercase(c: char) -> char {
    conversions::to_upper(c)
}

/// Convert a char to its lowercase equivalent
///
/// The case-folding performed is the common or simple mapping
/// see `to_uppercase` for references and more information
///
/// # Return value
///
/// Returns the char itself if no conversion if possible
#[inline]
pub fn to_lowercase(c: char) -> char {
    conversions::to_lower(c)
}

///
/// Converts a number to the character representing it
///
/// # Return value
///
/// Returns `Some(char)` if `num` represents one digit under `radix`,
/// using one character of `0-9` or `a-z`, or `None` if it doesn't.
///
/// # Failure
///
/// Fails if given an `radix` > 36.
///
#[inline]
pub fn from_digit(num: uint, radix: uint) -> Option<char> {
    if radix > 36 {
        fail!("from_digit: radix is to high (maximum 36)");
    }
    if num < radix {
        unsafe {
            if num < 10 {
                Some(transmute(('0' as uint + num) as u32))
            } else {
                Some(transmute(('a' as uint + num - 10u) as u32))
            }
        }
    } else {
        None
    }
}

// Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
static S_BASE: uint = 0xAC00;
static L_BASE: uint = 0x1100;
static V_BASE: uint = 0x1161;
static T_BASE: uint = 0x11A7;
static L_COUNT: uint = 19;
static V_COUNT: uint = 21;
static T_COUNT: uint = 28;
static N_COUNT: uint = (V_COUNT * T_COUNT);
static S_COUNT: uint = (L_COUNT * N_COUNT);

// Decompose a precomposed Hangul syllable
fn decompose_hangul(s: char, f: |char|) {
    let si = s as uint - S_BASE;

    let li = si / N_COUNT;
    unsafe {
        f(transmute((L_BASE + li) as u32));

        let vi = (si % N_COUNT) / T_COUNT;
        f(transmute((V_BASE + vi) as u32));

        let ti = si % T_COUNT;
        if ti > 0 {
            f(transmute((T_BASE + ti) as u32));
        }
    }
}

/// Returns the canonical decomposition of a character
pub fn decompose_canonical(c: char, f: |char|) {
    if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
        decompose::canonical(c, f);
    } else {
        decompose_hangul(c, f);
    }
}

/// Returns the compatibility decomposition of a character
pub fn decompose_compatible(c: char, f: |char|) {
    if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
        decompose::compatibility(c, f);
    } else {
        decompose_hangul(c, f);
    }
}

///
/// Returns the hexadecimal Unicode escape of a `char`
///
/// The rules are as follows:
///
/// - chars in [0,0xff] get 2-digit escapes: `\\xNN`
/// - chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`
/// - chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`
///
pub fn escape_unicode(c: char, f: |char|) {
    // avoid calling str::to_str_radix because we don't really need to allocate
    // here.
    f('\\');
    let pad = match () {
        _ if c <= '\xff'    => { f('x'); 2 }
        _ if c <= '\uffff'  => { f('u'); 4 }
        _                   => { f('U'); 8 }
    };
    for offset in range_step::<i32>(4 * (pad - 1), -1, -4) {
        unsafe {
            match ((c as i32) >> offset) & 0xf {
                i @ 0 .. 9 => { f(transmute('0' as i32 + i)); }
                i => { f(transmute('a' as i32 + (i - 10))); }
            }
        }
    }
}

///
/// Returns a 'default' ASCII and C++11-like literal escape of a `char`
///
/// The default is chosen with a bias toward producing literals that are
/// legal in a variety of languages, including C++11 and similar C-family
/// languages. The exact rules are:
///
/// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
/// - Single-quote, double-quote and backslash chars are backslash-escaped.
/// - Any other chars in the range [0x20,0x7e] are not escaped.
/// - Any other chars are given hex unicode escapes; see `escape_unicode`.
///
pub fn escape_default(c: char, f: |char|) {
    match c {
        '\t' => { f('\\'); f('t'); }
        '\r' => { f('\\'); f('r'); }
        '\n' => { f('\\'); f('n'); }
        '\\' => { f('\\'); f('\\'); }
        '\'' => { f('\\'); f('\''); }
        '"'  => { f('\\'); f('"'); }
        '\x20' .. '\x7e' => { f(c); }
        _ => c.escape_unicode(f),
    }
}

/// Returns the amount of bytes this `char` would need if encoded in UTF-8
pub fn len_utf8_bytes(c: char) -> uint {
    static MAX_ONE_B:   uint = 128u;
    static MAX_TWO_B:   uint = 2048u;
    static MAX_THREE_B: uint = 65536u;
    static MAX_FOUR_B:  uint = 2097152u;

    let code = c as uint;
    match () {
        _ if code < MAX_ONE_B   => 1u,
        _ if code < MAX_TWO_B   => 2u,
        _ if code < MAX_THREE_B => 3u,
        _ if code < MAX_FOUR_B  => 4u,
        _                       => fail!("invalid character!"),
    }
}

/// Useful functions for Unicode characters.
pub trait Char {
    /// Returns whether the specified character is considered a Unicode
    /// alphabetic code point.
    fn is_alphabetic(&self) -> bool;

    /// Returns whether the specified character satisfies the 'XID_Start'
    /// Unicode property.
    ///
    /// 'XID_Start' is a Unicode Derived Property specified in
    /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
    /// mostly similar to ID_Start but modified for closure under NFKx.
    fn is_XID_start(&self) -> bool;

    /// Returns whether the specified `char` satisfies the 'XID_Continue'
    /// Unicode property.
    ///
    /// 'XID_Continue' is a Unicode Derived Property specified in
    /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
    /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
    fn is_XID_continue(&self) -> bool;


    /// Indicates whether a character is in lowercase.
    ///
    /// This is defined according to the terms of the Unicode Derived Core
    /// Property `Lowercase`.
    fn is_lowercase(&self) -> bool;

    /// Indicates whether a character is in uppercase.
    ///
    /// This is defined according to the terms of the Unicode Derived Core
    /// Property `Uppercase`.
    fn is_uppercase(&self) -> bool;

    /// Indicates whether a character is whitespace.
    ///
    /// Whitespace is defined in terms of the Unicode Property `White_Space`.
    fn is_whitespace(&self) -> bool;

    /// Indicates whether a character is alphanumeric.
    ///
    /// Alphanumericness is defined in terms of the Unicode General Categories
    /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
    fn is_alphanumeric(&self) -> bool;

    /// Indicates whether a character is a control code point.
    ///
    /// Control code points are defined in terms of the Unicode General
    /// Category `Cc`.
    fn is_control(&self) -> bool;

    /// Indicates whether the character is numeric (Nd, Nl, or No).
    fn is_digit(&self) -> bool;

    /// Checks if a `char` parses as a numeric digit in the given radix.
    ///
    /// Compared to `is_digit()`, this function only recognizes the characters
    /// `0-9`, `a-z` and `A-Z`.
    ///
    /// # Return value
    ///
    /// Returns `true` if `c` is a valid digit under `radix`, and `false`
    /// otherwise.
    ///
    /// # Failure
    ///
    /// Fails if given a radix > 36.
    fn is_digit_radix(&self, radix: uint) -> bool;

    /// Converts a character to the corresponding digit.
    ///
    /// # Return value
    ///
    /// If `c` is between '0' and '9', the corresponding value between 0 and
    /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns
    /// none if the character does not refer to a digit in the given radix.
    ///
    /// # Failure
    ///
    /// Fails if given a radix outside the range [0..36].
    fn to_digit(&self, radix: uint) -> Option<uint>;

    /// Converts a character to its lowercase equivalent.
    ///
    /// The case-folding performed is the common or simple mapping. See
    /// `to_uppercase()` for references and more information.
    ///
    /// # Return value
    ///
    /// Returns the lowercase equivalent of the character, or the character
    /// itself if no conversion is possible.
    fn to_lowercase(&self) -> char;

    /// Converts a character to its uppercase equivalent.
    ///
    /// The case-folding performed is the common or simple mapping: it maps
    /// one unicode codepoint (one character in Rust) to its uppercase
    /// equivalent according to the Unicode database [1]. The additional
    /// `SpecialCasing.txt` is not considered here, as it expands to multiple
    /// codepoints in some cases.
    ///
    /// A full reference can be found here [2].
    ///
    /// # Return value
    ///
    /// Returns the uppercase equivalent of the character, or the character
    /// itself if no conversion was made.
    ///
    /// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
    ///
    /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
    fn to_uppercase(&self) -> char;

    /// Converts a number to the character representing it.
    ///
    /// # Return value
    ///
    /// Returns `Some(char)` if `num` represents one digit under `radix`,
    /// using one character of `0-9` or `a-z`, or `None` if it doesn't.
    ///
    /// # Failure
    ///
    /// Fails if given a radix > 36.
    fn from_digit(num: uint, radix: uint) -> Option<char>;

    /// Returns the hexadecimal Unicode escape of a character.
    ///
    /// The rules are as follows:
    ///
    /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
    /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`.
    /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`.
    fn escape_unicode(&self, f: |char|);

    /// Returns a 'default' ASCII and C++11-like literal escape of a
    /// character.
    ///
    /// The default is chosen with a bias toward producing literals that are
    /// legal in a variety of languages, including C++11 and similar C-family
    /// languages. The exact rules are:
    ///
    /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
    /// * Single-quote, double-quote and backslash chars are backslash-
    ///   escaped.
    /// * Any other chars in the range [0x20,0x7e] are not escaped.
    /// * Any other chars are given hex unicode escapes; see `escape_unicode`.
    fn escape_default(&self, f: |char|);

    /// Returns the amount of bytes this character would need if encoded in
    /// UTF-8.
    fn len_utf8_bytes(&self) -> uint;

    /// Encodes this character as UTF-8 into the provided byte buffer.
    ///
    /// The buffer must be at least 4 bytes long or a runtime failure may
    /// occur.
    ///
    /// This will then return the number of bytes written to the slice.
    fn encode_utf8(&self, dst: &mut [u8]) -> uint;

    /// Encodes this character as UTF-16 into the provided `u16` buffer.
    ///
    /// The buffer must be at least 2 elements long or a runtime failure may
    /// occur.
    ///
    /// This will then return the number of `u16`s written to the slice.
    fn encode_utf16(&self, dst: &mut [u16]) -> uint;
}

impl Char for char {
    fn is_alphabetic(&self) -> bool { is_alphabetic(*self) }

    fn is_XID_start(&self) -> bool { is_XID_start(*self) }

    fn is_XID_continue(&self) -> bool { is_XID_continue(*self) }

    fn is_lowercase(&self) -> bool { is_lowercase(*self) }

    fn is_uppercase(&self) -> bool { is_uppercase(*self) }

    fn is_whitespace(&self) -> bool { is_whitespace(*self) }

    fn is_alphanumeric(&self) -> bool { is_alphanumeric(*self) }

    fn is_control(&self) -> bool { is_control(*self) }

    fn is_digit(&self) -> bool { is_digit(*self) }

    fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) }

    fn to_digit(&self, radix: uint) -> Option<uint> { to_digit(*self, radix) }

    fn to_lowercase(&self) -> char { to_lowercase(*self) }

    fn to_uppercase(&self) -> char { to_uppercase(*self) }

    fn from_digit(num: uint, radix: uint) -> Option<char> { from_digit(num, radix) }

    fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) }

    fn escape_default(&self, f: |char|) { escape_default(*self, f) }

    fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }

    fn encode_utf8(&self, dst: &mut [u8]) -> uint {
        let code = *self as uint;
        if code < MAX_ONE_B {
            dst[0] = code as u8;
            return 1;
        } else if code < MAX_TWO_B {
            dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
            dst[1] = (code & 63u | TAG_CONT) as u8;
            return 2;
        } else if code < MAX_THREE_B {
            dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
            dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
            dst[2] = (code & 63u | TAG_CONT) as u8;
            return 3;
        } else {
            dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
            dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
            dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
            dst[3] = (code & 63u | TAG_CONT) as u8;
            return 4;
        }
    }

    fn encode_utf16(&self, dst: &mut [u16]) -> uint {
        let mut ch = *self as uint;
        if (ch & 0xFFFF_u) == ch {
            // The BMP falls through (assuming non-surrogate, as it
            // should)
            assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
            dst[0] = ch as u16;
            1
        } else {
            // Supplementary planes break into surrogates.
            assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
            ch -= 0x1_0000_u;
            dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
            dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
            2
        }
    }
}

#[cfg(not(test))]
impl Eq for char {
    #[inline]
    fn eq(&self, other: &char) -> bool { (*self) == (*other) }
}

#[cfg(not(test))]
impl TotalEq for char {}

#[cfg(not(test))]
impl Ord for char {
    #[inline]
    fn lt(&self, other: &char) -> bool { *self < *other }
}

#[cfg(not(test))]
impl TotalOrd for char {
    fn cmp(&self, other: &char) -> Ordering {
        (*self as u32).cmp(&(*other as u32))
    }
}

#[cfg(not(test))]
impl Default for char {
    #[inline]
    fn default() -> char { '\x00' }
}

#[cfg(test)]
mod test {
    use super::{escape_unicode, escape_default};

    use realcore::char::Char;
    use slice::ImmutableVector;
    use realstd::option::{Some, None};
    use realstd::strbuf::StrBuf;
    use realstd::str::StrAllocating;

    #[test]
    fn test_is_lowercase() {
        assert!('a'.is_lowercase());
        assert!('ö'.is_lowercase());
        assert!('ß'.is_lowercase());
        assert!(!'Ü'.is_lowercase());
        assert!(!'P'.is_lowercase());
    }

    #[test]
    fn test_is_uppercase() {
        assert!(!'h'.is_uppercase());
        assert!(!'ä'.is_uppercase());
        assert!(!'ß'.is_uppercase());
        assert!('Ö'.is_uppercase());
        assert!('T'.is_uppercase());
    }

    #[test]
    fn test_is_whitespace() {
        assert!(' '.is_whitespace());
        assert!('\u2007'.is_whitespace());
        assert!('\t'.is_whitespace());
        assert!('\n'.is_whitespace());
        assert!(!'a'.is_whitespace());
        assert!(!'_'.is_whitespace());
        assert!(!'\u0000'.is_whitespace());
    }

    #[test]
    fn test_to_digit() {
        assert_eq!('0'.to_digit(10u), Some(0u));
        assert_eq!('1'.to_digit(2u), Some(1u));
        assert_eq!('2'.to_digit(3u), Some(2u));
        assert_eq!('9'.to_digit(10u), Some(9u));
        assert_eq!('a'.to_digit(16u), Some(10u));
        assert_eq!('A'.to_digit(16u), Some(10u));
        assert_eq!('b'.to_digit(16u), Some(11u));
        assert_eq!('B'.to_digit(16u), Some(11u));
        assert_eq!('z'.to_digit(36u), Some(35u));
        assert_eq!('Z'.to_digit(36u), Some(35u));
        assert_eq!(' '.to_digit(10u), None);
        assert_eq!('$'.to_digit(36u), None);
    }

    #[test]
    fn test_to_lowercase() {
        assert_eq!('A'.to_lowercase(), 'a');
        assert_eq!('Ö'.to_lowercase(), 'ö');
        assert_eq!('ß'.to_lowercase(), 'ß');
        assert_eq!('Ü'.to_lowercase(), 'ü');
        assert_eq!('💩'.to_lowercase(), '💩');
        assert_eq!('Σ'.to_lowercase(), 'σ');
        assert_eq!('Τ'.to_lowercase(), 'τ');
        assert_eq!('Ι'.to_lowercase(), 'ι');
        assert_eq!('Γ'.to_lowercase(), 'γ');
        assert_eq!('Μ'.to_lowercase(), 'μ');
        assert_eq!('Α'.to_lowercase(), 'α');
        assert_eq!('Σ'.to_lowercase(), 'σ');
    }

    #[test]
    fn test_to_uppercase() {
        assert_eq!('a'.to_uppercase(), 'A');
        assert_eq!('ö'.to_uppercase(), 'Ö');
        assert_eq!('ß'.to_uppercase(), 'ß'); // not ẞ: Latin capital letter sharp s
        assert_eq!('ü'.to_uppercase(), 'Ü');
        assert_eq!('💩'.to_uppercase(), '💩');

        assert_eq!('σ'.to_uppercase(), 'Σ');
        assert_eq!('τ'.to_uppercase(), 'Τ');
        assert_eq!('ι'.to_uppercase(), 'Ι');
        assert_eq!('γ'.to_uppercase(), 'Γ');
        assert_eq!('μ'.to_uppercase(), 'Μ');
        assert_eq!('α'.to_uppercase(), 'Α');
        assert_eq!('ς'.to_uppercase(), 'Σ');
    }

    #[test]
    fn test_is_control() {
        assert!('\u0000'.is_control());
        assert!('\u0003'.is_control());
        assert!('\u0006'.is_control());
        assert!('\u0009'.is_control());
        assert!('\u007f'.is_control());
        assert!('\u0092'.is_control());
        assert!(!'\u0020'.is_control());
        assert!(!'\u0055'.is_control());
        assert!(!'\u0068'.is_control());
    }

    #[test]
    fn test_is_digit() {
       assert!('2'.is_digit());
       assert!('7'.is_digit());
       assert!(!'c'.is_digit());
       assert!(!'i'.is_digit());
       assert!(!'z'.is_digit());
       assert!(!'Q'.is_digit());
    }

    #[test]
    fn test_escape_default() {
        fn string(c: char) -> ~str {
            let mut result = StrBuf::new();
            escape_default(c, |c| { result.push_char(c); });
            return result.into_owned();
        }
        assert_eq!(string('\n'), "\\n".to_owned());
        assert_eq!(string('\r'), "\\r".to_owned());
        assert_eq!(string('\''), "\\'".to_owned());
        assert_eq!(string('"'), "\\\"".to_owned());
        assert_eq!(string(' '), " ".to_owned());
        assert_eq!(string('a'), "a".to_owned());
        assert_eq!(string('~'), "~".to_owned());
        assert_eq!(string('\x00'), "\\x00".to_owned());
        assert_eq!(string('\x1f'), "\\x1f".to_owned());
        assert_eq!(string('\x7f'), "\\x7f".to_owned());
        assert_eq!(string('\xff'), "\\xff".to_owned());
        assert_eq!(string('\u011b'), "\\u011b".to_owned());
        assert_eq!(string('\U0001d4b6'), "\\U0001d4b6".to_owned());
    }

    #[test]
    fn test_escape_unicode() {
        fn string(c: char) -> ~str {
            let mut result = StrBuf::new();
            escape_unicode(c, |c| { result.push_char(c); });
            return result.into_owned();
        }
        assert_eq!(string('\x00'), "\\x00".to_owned());
        assert_eq!(string('\n'), "\\x0a".to_owned());
        assert_eq!(string(' '), "\\x20".to_owned());
        assert_eq!(string('a'), "\\x61".to_owned());
        assert_eq!(string('\u011b'), "\\u011b".to_owned());
        assert_eq!(string('\U0001d4b6'), "\\U0001d4b6".to_owned());
    }

    #[test]
    fn test_to_str() {
        use realstd::to_str::ToStr;
        let s = 't'.to_str();
        assert_eq!(s, "t".to_owned());
    }

    #[test]
    fn test_encode_utf8() {
        fn check(input: char, expect: &[u8]) {
            let mut buf = [0u8, ..4];
            let n = input.encode_utf8(buf /* as mut slice! */);
            assert_eq!(buf.slice_to(n), expect);
        }

        check('x', [0x78]);
        check('\u00e9', [0xc3, 0xa9]);
        check('\ua66e', [0xea, 0x99, 0xae]);
        check('\U0001f4a9', [0xf0, 0x9f, 0x92, 0xa9]);
    }

    #[test]
    fn test_encode_utf16() {
        fn check(input: char, expect: &[u16]) {
            let mut buf = [0u16, ..2];
            let n = input.encode_utf16(buf /* as mut slice! */);
            assert_eq!(buf.slice_to(n), expect);
        }

        check('x', [0x0078]);
        check('\u00e9', [0x00e9]);
        check('\ua66e', [0xa66e]);
        check('\U0001f4a9', [0xd83d, 0xdca9]);
    }
}
-												Cleaned up case related functions a bit

											
										
										
											2013-04-18 10:01:23 +02:00
+								// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
-												Update license, add license boilerplate to most files. Remainder will follow.

											
										
										
											2012-12-03 16:48:01 -08:00
+								// file at the top-level directory of this distribution and at
 								// http://rust-lang.org/COPYRIGHT.
 								//
 								// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 								// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 								// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 								// option. This file may not be copied, modified, or distributed
 								// except according to those terms.
-												doc: don't refer to 'char' as characters

This seems to be causing some confusion among users. Rust's char are
not 8bit characters, but 32bit UCS-4 codepoint without surrogates
(Unicode Scalar Values as per Unicode glossary).
Make the doc more explicit about it.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-07 23:53:34 +01:00
+								//! Character manipulation (`char` type, Unicode Scalar Value)
 								//!
 								//! This module  provides the `Char` trait, as well as its implementation
 								//! for the primitive `char` type, in order to allow basic character manipulation.
 								//!
 								//! A `char` actually represents a
 								//! *[Unicode Scalar Value](http://www.unicode.org/glossary/#unicode_scalar_value)*,
 								//! as it can contain any Unicode code point except high-surrogate and
 								//! low-surrogate code points.
 								//!
 								//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
 								//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
 								//! however the converse is not always true due to the above range limits
 								//! and, as such, should be performed via the `from_u32` function..
-												Copy first batch of material from libstd to libcore.

											
										
										
											2011-12-13 16:25:51 -08:00
-												core: Remove the cast module

This commit revisits the `cast` module in libcore and libstd, and scrutinizes
all functions inside of it. The result was to remove the `cast` module entirely,
folding all functionality into the `mem` module. Specifically, this is the fate
of each function in the `cast` module.

* transmute - This function was moved to `mem`, but it is now marked as
              #[unstable]. This is due to planned changes to the `transmute`
              function and how it can be invoked (see the #[unstable] comment).
              For more information, see RFC 5 and #12898

* transmute_copy - This function was moved to `mem`, with clarification that is
                   is not an error to invoke it with T/U that are different
                   sizes, but rather that it is strongly discouraged. This
                   function is now #[stable]

* forget - This function was moved to `mem` and marked #[stable]

* bump_box_refcount - This function was removed due to the deprecation of
                      managed boxes as well as its questionable utility.

* transmute_mut - This function was previously deprecated, and removed as part
                  of this commit.

* transmute_mut_unsafe - This function doesn't serve much of a purpose when it
                         can be achieved with an `as` in safe code, so it was
                         removed.

* transmute_lifetime - This function was removed because it is likely a strong
                       indication that code is incorrect in the first place.

* transmute_mut_lifetime - This function was removed for the same reasons as
                           `transmute_lifetime`

* copy_lifetime - This function was moved to `mem`, but it is marked
                  `#[unstable]` now due to the likelihood of being removed in
                  the future if it is found to not be very useful.

* copy_mut_lifetime - This function was also moved to `mem`, but had the same
                      treatment as `copy_lifetime`.

* copy_lifetime_vec - This function was removed because it is not used today,
                      and its existence is not necessary with DST
                      (copy_lifetime will suffice).

In summary, the cast module was stripped down to these functions, and then the
functions were moved to the `mem` module.

    transmute - #[unstable]
    transmute_copy - #[stable]
    forget - #[stable]
    copy_lifetime - #[unstable]
    copy_mut_lifetime - #[unstable]

[breaking-change]

											
										
										
											2014-05-09 10:34:51 -07:00
+								use mem::transmute;
-												librustc: Make unqualified identifier searches terminate at the nearest module scope. r=tjc

											
										
										
											2013-01-08 19:37:25 -08:00
+								use option::{None, Option, Some};
-												Use std::iter::range_step

Use the iterator version instead of the old uint::/int::range_step
functions.

											
										
										
											2013-09-15 02:46:51 +02:00
+								use iter::{Iterator, range_step};
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								use unicode::{derived_property, property, general_category, decompose, conversions};
-												rustc: Make `<` and `=` into traits

											
										
										
											2012-08-27 16:26:35 -07:00
-												core: Inherit the char module

											
										
										
											2014-04-30 20:33:08 -07:00
+								#[cfg(not(test))] use cmp::{Eq, Ord, TotalEq, TotalOrd, Ordering};
-												std: Add a bunch of Default impls

											
										
										
											2013-09-11 21:49:25 -07:00
+								#[cfg(not(test))] use default::Default;
-												Adding missing imports for tests, and gate off others

											
										
										
											2013-02-28 11:57:33 -05:00
-												Implement formatting arguments for strings and integers

Closes #1651

											
										
										
											2013-08-10 00:28:47 -07:00
+								// UTF-8 ranges and tags for encoding characters
 								static TAG_CONT: uint = 128u;
 								static MAX_ONE_B: uint = 128u;
 								static TAG_TWO_B: uint = 192u;
 								static MAX_TWO_B: uint = 2048u;
 								static TAG_THREE_B: uint = 224u;
 								static MAX_THREE_B: uint = 65536u;
 								static TAG_FOUR_B: uint = 240u;
-												Add support to libcore for encoded-in-rust unicode character properties, at least. Add script to compute them from unicode.org.

											
										
										
											2011-12-23 18:48:08 -08:00
+								/*
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								    Lu  Uppercase_Letter        an uppercase letter
 								    Ll  Lowercase_Letter        a lowercase letter
 								    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
 								    Lm  Modifier_Letter         a modifier letter
 								    Lo  Other_Letter            other letters, including syllables and ideographs
 								    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
 								    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
 								    Me  Enclosing_Mark          an enclosing combining mark
 								    Nd  Decimal_Number          a decimal digit
 								    Nl  Letter_Number           a letterlike numeric character
 								    No  Other_Number            a numeric character of other type
-												Add support to libcore for encoded-in-rust unicode character properties, at least. Add script to compute them from unicode.org.

											
										
										
											2011-12-23 18:48:08 -08:00
+								    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
 								    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
 								    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
-												Add support to libcore for encoded-in-rust unicode character properties, at least. Add script to compute them from unicode.org.

											
										
										
											2011-12-23 18:48:08 -08:00
+								    Pi  Initial_Punctuation     an initial quotation mark
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								    Pf  Final_Punctuation       a final quotation mark
 								    Po  Other_Punctuation       a punctuation mark of other type
 								    Sm  Math_Symbol             a symbol of primarily mathematical use
 								    Sc  Currency_Symbol         a currency sign
 								    Sk  Modifier_Symbol         a non-letterlike modifier symbol
 								    So  Other_Symbol            a symbol of other type
 								    Zs  Space_Separator         a space character (of various non-zero widths)
 								    Zl  Line_Separator          U+2028 LINE SEPARATOR only
-												Add support to libcore for encoded-in-rust unicode character properties, at least. Add script to compute them from unicode.org.

											
										
										
											2011-12-23 18:48:08 -08:00
+								    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								    Cc  Control                 a C0 or C1 control code
 								    Cf  Format                  a format control character
 								    Cs  Surrogate               a surrogate code point
 								    Co  Private_Use             a private-use character
 								    Cn  Unassigned              a reserved unassigned code point or a noncharacter
-												Add support to libcore for encoded-in-rust unicode character properties, at least. Add script to compute them from unicode.org.

											
										
										
											2011-12-23 18:48:08 -08:00
+								*/
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								/// The highest valid code point
 								pub static MAX: char = '\U0010ffff';
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Converts from `u32` to a `char`
-												std: mark two helper functions #[inline].

`str::utf8_char_width` and `char::from_u32` are tiny, which means it's a
big performance hit to call them in a tight loop outside libstd.

											
										
										
											2014-02-23 09:11:36 +11:00
+								#[inline]
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								pub fn from_u32(i: u32) -> Option<char> {
 								    // catch out-of-bounds and surrogates
 								    if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
 								        None
 								    } else {
 								        Some(unsafe { transmute(i) })
 								    }
 								}
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Returns whether the specified `char` is considered a Unicode alphabetic
 								/// code point
-												Remove pub from core::{unicode,cmath,stackwalk,rt}

											
										
										
											2013-05-03 17:10:32 -04:00
+								pub fn is_alphabetic(c: char) -> bool   { derived_property::Alphabetic(c) }
-												doc: add two missing char methods doc-strings

XID_* property are defined in UAX #31, just reference it here.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:27:49 +01:00
 								/// Returns whether the specified `char` satisfies the 'XID_Start' Unicode property
 								///
 								/// 'XID_Start' is a Unicode Derived Property specified in
 								/// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
 								/// mostly similar to ID_Start but modified for closure under NFKx.
-												Remove pub from core::{unicode,cmath,stackwalk,rt}

											
										
										
											2013-05-03 17:10:32 -04:00
+								pub fn is_XID_start(c: char) -> bool    { derived_property::XID_Start(c) }
-												doc: add two missing char methods doc-strings

XID_* property are defined in UAX #31, just reference it here.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:27:49 +01:00
 								/// Returns whether the specified `char` satisfies the 'XID_Continue' Unicode property
 								///
 								/// 'XID_Continue' is a Unicode Derived Property specified in
 								/// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
 								/// mostly similar to 'ID_Continue' but modified for closure under NFKx.
-												Remove pub from core::{unicode,cmath,stackwalk,rt}

											
										
										
											2013-05-03 17:10:32 -04:00
+								pub fn is_XID_continue(c: char) -> bool { derived_property::XID_Continue(c) }
-												char: add is_lowercase(), is_uppercase()

											
										
										
											2011-12-24 10:41:11 +01:00
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Indicates whether a `char` is in lower case
 								///
 								/// This is defined according to the terms of the Unicode Derived Core Property 'Lowercase'.
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								#[inline]
-												Fix handling of upper/lowercase, and whitespace

											
										
										
											2013-11-26 06:15:45 +01:00
+								pub fn is_lowercase(c: char) -> bool { derived_property::Lowercase(c) }
-												char: add is_lowercase(), is_uppercase()

											
										
										
											2011-12-24 10:41:11 +01:00
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Indicates whether a `char` is in upper case
 								///
 								/// This is defined according to the terms of the Unicode Derived Core Property 'Uppercase'.
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								#[inline]
-												Fix handling of upper/lowercase, and whitespace

											
										
										
											2013-11-26 06:15:45 +01:00
+								pub fn is_uppercase(c: char) -> bool { derived_property::Uppercase(c) }
-												char: add is_lowercase(), is_uppercase()

											
										
										
											2011-12-24 10:41:11 +01:00
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Indicates whether a `char` is whitespace
 								///
 								/// Whitespace is defined in terms of the Unicode Property 'White_Space'.
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								#[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								pub fn is_whitespace(c: char) -> bool {
-												Fix handling of upper/lowercase, and whitespace

											
										
										
											2013-11-26 06:15:45 +01:00
+								    // As an optimization ASCII whitespace characters are checked separately
-												Faster check for ascii-space

Since ' ' is by far one of the most common characters, it is worthwhile
to put it first, and short-circuit the rest of the function.

On the same JSON benchmark, as the json_perf improvement, reading example.json
10 times from https://code.google.com/p/rapidjson/wiki/Performance.

Before: 0.16s
After: 0.11s

											
										
										
											2013-07-06 01:54:29 -04:00
+								    c == ' '
 								        || ('\x09' <= c && c <= '\x0d')
-												Fix handling of upper/lowercase, and whitespace

											
										
										
											2013-11-26 06:15:45 +01:00
+								        || property::White_Space(c)
-												Add support to libcore for encoded-in-rust unicode character properties, at least. Add script to compute them from unicode.org.

											
										
										
											2011-12-23 18:48:08 -08:00
+								}
-												Copy first batch of material from libstd to libcore.

											
										
										
											2011-12-13 16:25:51 -08:00
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Indicates whether a `char` is alphanumeric
 								///
 								/// Alphanumericness is defined in terms of the Unicode General Categories
 								/// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								#[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								pub fn is_alphanumeric(c: char) -> bool {
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								    derived_property::Alphabetic(c)
 								        || general_category::Nd(c)
 								        || general_category::Nl(c)
 								        || general_category::No(c)
-												Copy first batch of material from libstd to libcore.

											
										
										
											2011-12-13 16:25:51 -08:00
+								}
-												Added is_control function, method, and tests.

											
										
										
											2013-09-23 17:10:48 -04:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Indicates whether a `char` is a control code point
 								///
 								/// Control code points are defined in terms of the Unicode General Category
-												Added is_control function, method, and tests.

											
										
										
											2013-09-23 17:10:48 -04:00
+								/// 'Cc'.
 								///
 								#[inline]
 								pub fn is_control(c: char) -> bool { general_category::Cc(c) }
-												doc: don't refer to 'char' as characters

This seems to be causing some confusion among users. Rust's char are
not 8bit characters, but 32bit UCS-4 codepoint without surrogates
(Unicode Scalar Values as per Unicode glossary).
Make the doc more explicit about it.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-07 23:53:34 +01:00
+								/// Indicates whether the `char` is numeric (Nd, Nl, or No)
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								#[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								pub fn is_digit(c: char) -> bool {
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								    general_category::Nd(c)
 								        || general_category::Nl(c)
 								        || general_category::No(c)
-												core: added char::is_digit (matching Nd, Nl, No)

											
										
										
											2012-02-08 01:52:09 -08:00
+								}
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Checks if a `char` parses as a numeric digit in the given radix
 								///
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								/// Compared to `is_digit()`, this function only recognizes the
 								/// characters `0-9`, `a-z` and `A-Z`.
 								///
 								/// # Return value
 								///
 								/// Returns `true` if `c` is a valid digit under `radix`, and `false`
 								/// otherwise.
 								///
 								/// # Failure
 								///
 								/// Fails if given a `radix` > 36.
 								///
 								/// # Note
 								///
 								/// This just wraps `to_digit()`.
 								///
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								#[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								pub fn is_digit_radix(c: char, radix: uint) -> bool {
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								    match to_digit(c, radix) {
 								        Some(_) => true,
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								        None    => false,
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								    }
 								}
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Converts a `char` to the corresponding digit
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
 								/// # Return value
 								///
 								/// If `c` is between '0' and '9', the corresponding value
 								/// between 0 and 9. If `c` is 'a' or 'A', 10. If `c` is
-												doc: don't refer to 'char' as characters

This seems to be causing some confusion among users. Rust's char are
not 8bit characters, but 32bit UCS-4 codepoint without surrogates
(Unicode Scalar Values as per Unicode glossary).
Make the doc more explicit about it.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-07 23:53:34 +01:00
+								/// 'b' or 'B', 11, etc. Returns none if the `char` does not
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								/// refer to a digit in the given radix.
 								///
 								/// # Failure
 								///
 								/// Fails if given a `radix` outside the range `[0..36]`.
 								///
-												Inlining methods/functions in core.

Also inlining some functions which take functions as arguments.

											
										
										
											2013-01-13 23:37:30 +09:00
+								#[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								pub fn to_digit(c: char, radix: uint) -> Option<uint> {
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								    if radix > 36 {
-												core: Add a limited implementation of failure

This adds an small of failure to libcore, hamstrung by the fact that std::fmt
hasn't been migrated yet. A few asserts were re-worked to not use std::fmt
features, but these asserts can go back to their original form once std::fmt has
migrated.

The current failure implementation is to just have some symbols exposed by
std::rt::unwind that are linked against by libcore. This is an explicit circular
dependency, unfortunately. This will be officially supported in the future
through compiler support with much nicer failure messages. Additionally, there
are two depended-upon symbols today, but in the future there will only be one
(once std::fmt has migrated).

											
										
										
											2014-05-01 10:47:18 -07:00
+								        fail!("to_digit: radix is too high (maximum 36)");
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								    }
-												Convert alt to match. Stop parsing alt

											
										
										
											2012-08-06 12:34:08 -07:00
+								    let val = match c {
-												Remove the 'to' keyword

											
										
										
											2012-09-01 18:38:05 -07:00
+								      '0' .. '9' => c as uint - ('0' as uint),
 								      'a' .. 'z' => c as uint + 10u - ('a' as uint),
 								      'A' .. 'Z' => c as uint + 10u - ('A' as uint),
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								      _ => return None,
-												Make the various from_str functions return options

So that they can be used with user input without causing task
failures.

Closes #1335

											
										
										
											2012-02-22 13:18:15 +01:00
+								    };
-												Camel case the option type

											
										
										
											2012-08-20 12:23:37 -07:00
+								    if val < radix { Some(val) }
 								    else { None }
-												Copy first batch of material from libstd to libcore.

											
										
										
											2011-12-13 16:25:51 -08:00
+								}
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								/// Convert a char to its uppercase equivalent
 								///
 								/// The case-folding performed is the common or simple mapping:
-												Remove code duplication

Remove whitespace

Update documentation for to_uppercase, to_lowercase

											
										
										
											2014-03-01 07:40:38 +01:00
+								/// it maps one unicode codepoint (one char in Rust) to its uppercase equivalent according
 								/// to the Unicode database at ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
 								/// The additional SpecialCasing.txt is not considered here, as it expands to multiple
 								/// codepoints in some cases.
 								///
 								/// A full reference can be found here
 								/// http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								///
 								/// # Return value
 								///
-												Remove code duplication

Remove whitespace

Update documentation for to_uppercase, to_lowercase

											
										
										
											2014-03-01 07:40:38 +01:00
+								/// Returns the char itself if no conversion was made
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								#[inline]
 								pub fn to_uppercase(c: char) -> char {
 								    conversions::to_upper(c)
 								}
 								/// Convert a char to its lowercase equivalent
 								///
-												Remove code duplication

Remove whitespace

Update documentation for to_uppercase, to_lowercase

											
										
										
											2014-03-01 07:40:38 +01:00
+								/// The case-folding performed is the common or simple mapping
 								/// see `to_uppercase` for references and more information
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								///
 								/// # Return value
 								///
 								/// Returns the char itself if no conversion if possible
 								#[inline]
 								pub fn to_lowercase(c: char) -> char {
 								    conversions::to_lower(c)
 								}
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Converts a number to the character representing it
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
 								/// # Return value
 								///
 								/// Returns `Some(char)` if `num` represents one digit under `radix`,
 								/// using one character of `0-9` or `a-z`, or `None` if it doesn't.
 								///
 								/// # Failure
 								///
 								/// Fails if given an `radix` > 36.
 								///
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								#[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								pub fn from_digit(num: uint, radix: uint) -> Option<char> {
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								    if radix > 36 {
-												core: Add a limited implementation of failure

This adds an small of failure to libcore, hamstrung by the fact that std::fmt
hasn't been migrated yet. A few asserts were re-worked to not use std::fmt
features, but these asserts can go back to their original form once std::fmt has
migrated.

The current failure implementation is to just have some symbols exposed by
std::rt::unwind that are linked against by libcore. This is an explicit circular
dependency, unfortunately. This will be officially supported in the future
through compiler support with much nicer failure messages. Additionally, there
are two depended-upon symbols today, but in the future there will only be one
(once std::fmt has migrated).

											
										
										
											2014-05-01 10:47:18 -07:00
+								        fail!("from_digit: radix is to high (maximum 36)");
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								    }
 								    if num < radix {
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								        unsafe {
 								            if num < 10 {
 								                Some(transmute(('0' as uint + num) as u32))
 								            } else {
 								                Some(transmute(('a' as uint + num - 10u) as u32))
 								            }
-												Added char::from_digit(), char::is_digit_radix() and an argument check to char::to_digit().

											
										
										
											2013-01-20 21:28:12 +01:00
+								        }
 								    } else {
 								        None
 								    }
 								}
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
+								// Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
 								static S_BASE: uint = 0xAC00;
 								static L_BASE: uint = 0x1100;
 								static V_BASE: uint = 0x1161;
 								static T_BASE: uint = 0x11A7;
 								static L_COUNT: uint = 19;
 								static V_COUNT: uint = 21;
 								static T_COUNT: uint = 28;
 								static N_COUNT: uint = (V_COUNT * T_COUNT);
 								static S_COUNT: uint = (L_COUNT * N_COUNT);
 								// Decompose a precomposed Hangul syllable
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								fn decompose_hangul(s: char, f: |char|) {
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
+								    let si = s as uint - S_BASE;
 								    let li = si / N_COUNT;
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								    unsafe {
 								        f(transmute((L_BASE + li) as u32));
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								        let vi = (si % N_COUNT) / T_COUNT;
 								        f(transmute((V_BASE + vi) as u32));
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								        let ti = si % T_COUNT;
 								        if ti > 0 {
 								            f(transmute((T_BASE + ti) as u32));
 								        }
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
+								    }
 								}
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Returns the canonical decomposition of a character
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								pub fn decompose_canonical(c: char, f: |char|) {
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
+								    if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
 								        decompose::canonical(c, f);
 								    } else {
 								        decompose_hangul(c, f);
 								    }
 								}
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Returns the compatibility decomposition of a character
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								pub fn decompose_compatible(c: char, f: |char|) {
-												Add Unicode decomposition mappings to std::unicode

											
										
										
											2013-08-07 20:48:10 +02:00
+								    if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
 								        decompose::compatibility(c, f);
 								    } else {
 								        decompose_hangul(c, f);
 								    }
 								}
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Returns the hexadecimal Unicode escape of a `char`
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
 								/// The rules are as follows:
 								///
 								/// - chars in [0,0xff] get 2-digit escapes: `\\xNN`
 								/// - chars in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`
 								/// - chars above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`
 								///
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								pub fn escape_unicode(c: char, f: |char|) {
-												Change char::escape_{default,unicode} to take callbacks instead of allocating
strings

											
										
										
											2013-06-28 14:04:13 -07:00
+								    // avoid calling str::to_str_radix because we don't really need to allocate
 								    // here.
 								    f('\\');
-												Remove and replace cond! Closes #9282.

											
										
										
											2013-09-18 07:21:57 -04:00
+								    let pad = match () {
 								        _ if c <= '\xff'    => { f('x'); 2 }
 								        _ if c <= '\uffff'  => { f('u'); 4 }
 								        _                   => { f('U'); 8 }
 								    };
-												Use std::iter::range_step

Use the iterator version instead of the old uint::/int::range_step
functions.

											
										
										
											2013-09-15 02:46:51 +02:00
+								    for offset in range_step::<i32>(4 * (pad - 1), -1, -4) {
-												stop treating char as an integer type

Closes #7609

											
										
										
											2013-09-03 19:24:12 -04:00
+								        unsafe {
 								            match ((c as i32) >> offset) & 0xf {
 								                i @ 0 .. 9 => { f(transmute('0' as i32 + i)); }
 								                i => { f(transmute('a' as i32 + (i - 10))); }
 								            }
-												Change char::escape_{default,unicode} to take callbacks instead of allocating
strings

											
										
										
											2013-06-28 14:04:13 -07:00
+								        }
-												Use std::iter::range_step

Use the iterator version instead of the old uint::/int::range_step
functions.

											
										
										
											2013-09-15 02:46:51 +02:00
+								    }
-												Implement 2 kinds of char / str escaping. Use in rustc. Close #2306.

											
										
										
											2012-05-31 15:31:13 -07:00
+								}
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								/// Returns a 'default' ASCII and C++11-like literal escape of a `char`
-												Use `///` style doc-comments and add missing headings

											
										
										
											2013-05-19 06:03:40 +10:00
+								///
 								/// The default is chosen with a bias toward producing literals that are
 								/// legal in a variety of languages, including C++11 and similar C-family
 								/// languages. The exact rules are:
 								///
 								/// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
 								/// - Single-quote, double-quote and backslash chars are backslash-escaped.
 								/// - Any other chars in the range [0x20,0x7e] are not escaped.
 								/// - Any other chars are given hex unicode escapes; see `escape_unicode`.
 								///
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								pub fn escape_default(c: char, f: |char|) {
-												Convert alt to match. Stop parsing alt

											
										
										
											2012-08-06 12:34:08 -07:00
+								    match c {
-												Change char::escape_{default,unicode} to take callbacks instead of allocating
strings

											
										
										
											2013-06-28 14:04:13 -07:00
+								        '\t' => { f('\\'); f('t'); }
 								        '\r' => { f('\\'); f('r'); }
 								        '\n' => { f('\\'); f('n'); }
 								        '\\' => { f('\\'); f('\\'); }
 								        '\'' => { f('\\'); f('\''); }
 								        '"'  => { f('\\'); f('"'); }
 								        '\x20' .. '\x7e' => { f(c); }
 								        _ => c.escape_unicode(f),
-												Implement 2 kinds of char / str escaping. Use in rustc. Close #2306.

											
										
										
											2012-05-31 15:31:13 -07:00
+								    }
 								}
-												doc: don't refer to 'char' as characters

This seems to be causing some confusion among users. Rust's char are
not 8bit characters, but 32bit UCS-4 codepoint without surrogates
(Unicode Scalar Values as per Unicode glossary).
Make the doc more explicit about it.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-07 23:53:34 +01:00
+								/// Returns the amount of bytes this `char` would need if encoded in UTF-8
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								pub fn len_utf8_bytes(c: char) -> uint {
 								    static MAX_ONE_B:   uint = 128u;
 								    static MAX_TWO_B:   uint = 2048u;
 								    static MAX_THREE_B: uint = 65536u;
 								    static MAX_FOUR_B:  uint = 2097152u;
 								    let code = c as uint;
-												Remove and replace cond! Closes #9282.

											
										
										
											2013-09-18 07:21:57 -04:00
+								    match () {
 								        _ if code < MAX_ONE_B   => 1u,
 								        _ if code < MAX_TWO_B   => 2u,
 								        _ if code < MAX_THREE_B => 3u,
 								        _ if code < MAX_FOUR_B  => 4u,
-												Drop the '2' suffix from logging macros

Who doesn't like a massive renaming?

											
										
										
											2013-10-21 13:08:31 -07:00
+								        _                       => fail!("invalid character!"),
-												Remove and replace cond! Closes #9282.

											
										
										
											2013-09-18 07:21:57 -04:00
+								    }
-												Code modernisation and cleanup

											
										
										
											2013-05-19 05:53:30 +10:00
+								}
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
+								/// Useful functions for Unicode characters.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								pub trait Char {
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
+								    /// Returns whether the specified character is considered a Unicode
 								    /// alphabetic code point.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_alphabetic(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Returns whether the specified character satisfies the 'XID_Start'
 								    /// Unicode property.
 								    ///
 								    /// 'XID_Start' is a Unicode Derived Property specified in
 								    /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
 								    /// mostly similar to ID_Start but modified for closure under NFKx.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_XID_start(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Returns whether the specified `char` satisfies the 'XID_Continue'
 								    /// Unicode property.
 								    ///
 								    /// 'XID_Continue' is a Unicode Derived Property specified in
 								    /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
 								    /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_XID_continue(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Indicates whether a character is in lowercase.
 								    ///
 								    /// This is defined according to the terms of the Unicode Derived Core
 								    /// Property `Lowercase`.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_lowercase(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Indicates whether a character is in uppercase.
 								    ///
 								    /// This is defined according to the terms of the Unicode Derived Core
 								    /// Property `Uppercase`.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_uppercase(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Indicates whether a character is whitespace.
 								    ///
 								    /// Whitespace is defined in terms of the Unicode Property `White_Space`.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_whitespace(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Indicates whether a character is alphanumeric.
 								    ///
 								    /// Alphanumericness is defined in terms of the Unicode General Categories
 								    /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_alphanumeric(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Indicates whether a character is a control code point.
 								    ///
 								    /// Control code points are defined in terms of the Unicode General
 								    /// Category `Cc`.
-												Added is_control function, method, and tests.

											
										
										
											2013-09-23 17:10:48 -04:00
+								    fn is_control(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Indicates whether the character is numeric (Nd, Nl, or No).
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_digit(&self) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Checks if a `char` parses as a numeric digit in the given radix.
 								    ///
 								    /// Compared to `is_digit()`, this function only recognizes the characters
 								    /// `0-9`, `a-z` and `A-Z`.
 								    ///
 								    /// # Return value
 								    ///
 								    /// Returns `true` if `c` is a valid digit under `radix`, and `false`
 								    /// otherwise.
 								    ///
 								    /// # Failure
 								    ///
 								    /// Fails if given a radix > 36.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_digit_radix(&self, radix: uint) -> bool;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Converts a character to the corresponding digit.
 								    ///
 								    /// # Return value
 								    ///
 								    /// If `c` is between '0' and '9', the corresponding value between 0 and
 								    /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns
 								    /// none if the character does not refer to a digit in the given radix.
 								    ///
 								    /// # Failure
 								    ///
 								    /// Fails if given a radix outside the range [0..36].
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn to_digit(&self, radix: uint) -> Option<uint>;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Converts a character to its lowercase equivalent.
 								    ///
 								    /// The case-folding performed is the common or simple mapping. See
 								    /// `to_uppercase()` for references and more information.
 								    ///
 								    /// # Return value
 								    ///
 								    /// Returns the lowercase equivalent of the character, or the character
 								    /// itself if no conversion is possible.
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								    fn to_lowercase(&self) -> char;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Converts a character to its uppercase equivalent.
 								    ///
 								    /// The case-folding performed is the common or simple mapping: it maps
 								    /// one unicode codepoint (one character in Rust) to its uppercase
 								    /// equivalent according to the Unicode database [1]. The additional
 								    /// `SpecialCasing.txt` is not considered here, as it expands to multiple
 								    /// codepoints in some cases.
 								    ///
 								    /// A full reference can be found here [2].
 								    ///
 								    /// # Return value
 								    ///
 								    /// Returns the uppercase equivalent of the character, or the character
 								    /// itself if no conversion was made.
 								    ///
 								    /// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
 								    ///
 								    /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								    fn to_uppercase(&self) -> char;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Converts a number to the character representing it.
 								    ///
 								    /// # Return value
 								    ///
 								    /// Returns `Some(char)` if `num` represents one digit under `radix`,
 								    /// using one character of `0-9` or `a-z`, or `None` if it doesn't.
 								    ///
 								    /// # Failure
 								    ///
 								    /// Fails if given a radix > 36.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn from_digit(num: uint, radix: uint) -> Option<char>;
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Returns the hexadecimal Unicode escape of a character.
 								    ///
 								    /// The rules are as follows:
 								    ///
 								    /// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
 								    /// * Characters in [0x100,0xffff] get 4-digit escapes: `\\uNNNN`.
 								    /// * Characters above 0x10000 get 8-digit escapes: `\\UNNNNNNNN`.
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								    fn escape_unicode(&self, f: |char|);
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Returns a 'default' ASCII and C++11-like literal escape of a
 								    /// character.
 								    ///
 								    /// The default is chosen with a bias toward producing literals that are
 								    /// legal in a variety of languages, including C++11 and similar C-family
 								    /// languages. The exact rules are:
 								    ///
 								    /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
 								    /// * Single-quote, double-quote and backslash chars are backslash-
 								    ///   escaped.
 								    /// * Any other chars in the range [0x20,0x7e] are not escaped.
 								    /// * Any other chars are given hex unicode escapes; see `escape_unicode`.
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								    fn escape_default(&self, f: |char|);
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
 								    /// Returns the amount of bytes this character would need if encoded in
 								    /// UTF-8.
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn len_utf8_bytes(&self) -> uint;
-												Implement formatting arguments for strings and integers

Closes #1651

											
										
										
											2013-08-10 00:28:47 -07:00
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
+								    /// Encodes this character as UTF-8 into the provided byte buffer.
-												doc: uniform std::char doc-strings

Uniform and beautify doc-string for current rustdoc output.

Signed-off-by: Luca Bruno <lucab@debian.org>

											
										
										
											2014-03-08 00:13:54 +01:00
+								    ///
-												Implement Char::encode_utf16

And clean up encode_utf8 a bit.

											
										
										
											2014-04-11 12:49:31 -07:00
+								    /// The buffer must be at least 4 bytes long or a runtime failure may
-												libstd: Document the following modules:

* native::io
* std::char
* std::fmt
* std::fmt::parse
* std::io
* std::io::extensions
* std::io::net::ip
* std::io::net::udp
* std::io::net::unix
* std::io::pipe
* std::num
* std::num::f32
* std::num::f64
* std::num::strconv
* std::os

											
										
										
											2014-03-16 15:59:04 -07:00
+								    /// occur.
-												Implement formatting arguments for strings and integers

Closes #1651

											
										
										
											2013-08-10 00:28:47 -07:00
+								    ///
-												Implement Char::encode_utf16

And clean up encode_utf8 a bit.

											
										
										
											2014-04-11 12:49:31 -07:00
+								    /// This will then return the number of bytes written to the slice.
-												Implement formatting arguments for strings and integers

Closes #1651

											
										
										
											2013-08-10 00:28:47 -07:00
+								    fn encode_utf8(&self, dst: &mut [u8]) -> uint;
-												Implement Char::encode_utf16

And clean up encode_utf8 a bit.

											
										
										
											2014-04-11 12:49:31 -07:00
 								    /// Encodes this character as UTF-16 into the provided `u16` buffer.
 								    ///
 								    /// The buffer must be at least 2 elements long or a runtime failure may
 								    /// occur.
 								    ///
 								    /// This will then return the number of `u16`s written to the slice.
 								    fn encode_utf16(&self, dst: &mut [u16]) -> uint;
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								}
 								impl Char for char {
 								    fn is_alphabetic(&self) -> bool { is_alphabetic(*self) }
 								    fn is_XID_start(&self) -> bool { is_XID_start(*self) }
 								    fn is_XID_continue(&self) -> bool { is_XID_continue(*self) }
 								    fn is_lowercase(&self) -> bool { is_lowercase(*self) }
 								    fn is_uppercase(&self) -> bool { is_uppercase(*self) }
 								    fn is_whitespace(&self) -> bool { is_whitespace(*self) }
 								    fn is_alphanumeric(&self) -> bool { is_alphanumeric(*self) }
-												Added is_control function, method, and tests.

											
										
										
											2013-09-23 17:10:48 -04:00
+								    fn is_control(&self) -> bool { is_control(*self) }
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn is_digit(&self) -> bool { is_digit(*self) }
 								    fn is_digit_radix(&self, radix: uint) -> bool { is_digit_radix(*self, radix) }
 								    fn to_digit(&self, radix: uint) -> Option<uint> { to_digit(*self, radix) }
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
+								    fn to_lowercase(&self) -> char { to_lowercase(*self) }
 								    fn to_uppercase(&self) -> char { to_uppercase(*self) }
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								    fn from_digit(num: uint, radix: uint) -> Option<char> { from_digit(num, radix) }
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								    fn escape_unicode(&self, f: |char|) { escape_unicode(*self, f) }
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
-												libstd: Change all uses of `&fn(A)->B` over to `|A|->B` in libstd

											
										
										
											2013-11-18 21:15:42 -08:00
+								    fn escape_default(&self, f: |char|) { escape_default(*self, f) }
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
 								    fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
-												Implement formatting arguments for strings and integers

Closes #1651

											
										
										
											2013-08-10 00:28:47 -07:00
-												Implement Char::encode_utf16

And clean up encode_utf8 a bit.

											
										
										
											2014-04-11 12:49:31 -07:00
+								    fn encode_utf8(&self, dst: &mut [u8]) -> uint {
-												Implement formatting arguments for strings and integers

Closes #1651

											
										
										
											2013-08-10 00:28:47 -07:00
+								        let code = *self as uint;
 								        if code < MAX_ONE_B {
 								            dst[0] = code as u8;
 								            return 1;
 								        } else if code < MAX_TWO_B {
 								            dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
 								            dst[1] = (code & 63u | TAG_CONT) as u8;
 								            return 2;
 								        } else if code < MAX_THREE_B {
 								            dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
 								            dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
 								            dst[2] = (code & 63u | TAG_CONT) as u8;
 								            return 3;
 								        } else {
 								            dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
 								            dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
 								            dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
 								            dst[3] = (code & 63u | TAG_CONT) as u8;
 								            return 4;
 								        }
 								    }
-												Implement Char::encode_utf16

And clean up encode_utf8 a bit.

											
										
										
											2014-04-11 12:49:31 -07:00
 								    fn encode_utf16(&self, dst: &mut [u16]) -> uint {
 								        let mut ch = *self as uint;
 								        if (ch & 0xFFFF_u) == ch {
 								            // The BMP falls through (assuming non-surrogate, as it
 								            // should)
 								            assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
 								            dst[0] = ch as u16;
 
 								        } else {
 								            // Supplementary planes break into surrogates.
 								            assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
 								            ch -= 0x1_0000_u;
 								            dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
 								            dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
 
 								        }
 								    }
-												Create Char trait

											
										
										
											2013-05-19 05:50:02 +10:00
+								}
-												Remove #[cfg(notest)] and use #[cfg(not(test))] to cooincide with #[cfg(debug)]

											
										
										
											2013-05-08 21:11:23 +10:00
+								#[cfg(not(test))]
-												librustc: Replace `impl Type : Trait` with `impl Trait for Type`. rs=implflipping

											
										
										
											2013-02-14 11:47:00 -08:00
+								impl Eq for char {
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								    #[inline]
-												libcore: Remove `pure` from libcore. rs=depure

											
										
										
											2013-03-21 21:20:48 -07:00
+								    fn eq(&self, other: &char) -> bool { (*self) == (*other) }
-												rustc: De-mode all overloaded operators

											
										
										
											2012-09-19 18:00:26 -07:00
+								}
-												rustc: Make `<` and `=` into traits

											
										
										
											2012-08-27 16:26:35 -07:00
-												core: Inherit the char module

											
										
										
											2014-04-30 20:33:08 -07:00
+								#[cfg(not(test))]
 								impl TotalEq for char {}
-												Remove #[cfg(notest)] and use #[cfg(not(test))] to cooincide with #[cfg(debug)]

											
										
										
											2013-05-08 21:11:23 +10:00
+								#[cfg(not(test))]
-												implement Ord, TotalEq and TotalOrd for char

Closes #6063

											
										
										
											2013-04-25 13:55:39 -04:00
+								impl Ord for char {
-												replace #[inline(always)] with #[inline]. r=burningtree.

											
										
										
											2013-06-18 14:45:18 -07:00
+								    #[inline]
-												implement Ord, TotalEq and TotalOrd for char

Closes #6063

											
										
										
											2013-04-25 13:55:39 -04:00
+								    fn lt(&self, other: &char) -> bool { *self < *other }
 								}
-												core: Inherit the char module

											
										
										
											2014-04-30 20:33:08 -07:00
+								#[cfg(not(test))]
 								impl TotalOrd for char {
 								    fn cmp(&self, other: &char) -> Ordering {
 								        (*self as u32).cmp(&(*other as u32))
 								    }
 								}
-												std: Add a bunch of Default impls

											
										
										
											2013-09-11 21:49:25 -07:00
+								#[cfg(not(test))]
 								impl Default for char {
 								    #[inline]
 								    fn default() -> char { '\x00' }
 								}
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								#[cfg(test)]
 								mod test {
-												core: Get coretest working

This mostly involved frobbing imports between realstd, realcore, and the core
being test. Some of the imports are a little counterintuitive, but it mainly
focuses around libcore's types not implementing Show while libstd's types
implement Show.

											
										
										
											2014-05-01 18:06:59 -07:00
+								    use super::{escape_unicode, escape_default};
 								    use realcore::char::Char;
 								    use slice::ImmutableVector;
 								    use realstd::option::{Some, None};
 								    use realstd::strbuf::StrBuf;
 								    use realstd::str::StrAllocating;
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_is_lowercase() {
 								        assert!('a'.is_lowercase());
 								        assert!('ö'.is_lowercase());
 								        assert!('ß'.is_lowercase());
 								        assert!(!'Ü'.is_lowercase());
 								        assert!(!'P'.is_lowercase());
 								    }
-												libcore: Move core tests into libcore

											
										
										
											2012-01-17 17:28:21 -08:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_is_uppercase() {
 								        assert!(!'h'.is_uppercase());
 								        assert!(!'ä'.is_uppercase());
 								        assert!(!'ß'.is_uppercase());
 								        assert!('Ö'.is_uppercase());
 								        assert!('T'.is_uppercase());
 								    }
-												libcore: Move core tests into libcore

											
										
										
											2012-01-17 17:28:21 -08:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_is_whitespace() {
 								        assert!(' '.is_whitespace());
 								        assert!('\u2007'.is_whitespace());
 								        assert!('\t'.is_whitespace());
 								        assert!('\n'.is_whitespace());
 								        assert!(!'a'.is_whitespace());
 								        assert!(!'_'.is_whitespace());
 								        assert!(!'\u0000'.is_whitespace());
 								    }
-												libcore: Move core tests into libcore

											
										
										
											2012-01-17 17:28:21 -08:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_to_digit() {
 								        assert_eq!('0'.to_digit(10u), Some(0u));
 								        assert_eq!('1'.to_digit(2u), Some(1u));
 								        assert_eq!('2'.to_digit(3u), Some(2u));
 								        assert_eq!('9'.to_digit(10u), Some(9u));
 								        assert_eq!('a'.to_digit(16u), Some(10u));
 								        assert_eq!('A'.to_digit(16u), Some(10u));
 								        assert_eq!('b'.to_digit(16u), Some(11u));
 								        assert_eq!('B'.to_digit(16u), Some(11u));
 								        assert_eq!('z'.to_digit(36u), Some(35u));
 								        assert_eq!('Z'.to_digit(36u), Some(35u));
 								        assert_eq!(' '.to_digit(10u), None);
 								        assert_eq!('$'.to_digit(36u), None);
 								    }
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_to_lowercase() {
 								        assert_eq!('A'.to_lowercase(), 'a');
 								        assert_eq!('Ö'.to_lowercase(), 'ö');
 								        assert_eq!('ß'.to_lowercase(), 'ß');
 								        assert_eq!('Ü'.to_lowercase(), 'ü');
 								        assert_eq!('💩'.to_lowercase(), '💩');
 								        assert_eq!('Σ'.to_lowercase(), 'σ');
 								        assert_eq!('Τ'.to_lowercase(), 'τ');
 								        assert_eq!('Ι'.to_lowercase(), 'ι');
 								        assert_eq!('Γ'.to_lowercase(), 'γ');
 								        assert_eq!('Μ'.to_lowercase(), 'μ');
 								        assert_eq!('Α'.to_lowercase(), 'α');
 								        assert_eq!('Σ'.to_lowercase(), 'σ');
 								    }
-												Implement lower, upper case conversion for char

											
										
										
											2014-02-26 13:49:56 +01:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_to_uppercase() {
 								        assert_eq!('a'.to_uppercase(), 'A');
 								        assert_eq!('ö'.to_uppercase(), 'Ö');
 								        assert_eq!('ß'.to_uppercase(), 'ß'); // not ẞ: Latin capital letter sharp s
 								        assert_eq!('ü'.to_uppercase(), 'Ü');
 								        assert_eq!('💩'.to_uppercase(), '💩');
 								        assert_eq!('σ'.to_uppercase(), 'Σ');
 								        assert_eq!('τ'.to_uppercase(), 'Τ');
 								        assert_eq!('ι'.to_uppercase(), 'Ι');
 								        assert_eq!('γ'.to_uppercase(), 'Γ');
 								        assert_eq!('μ'.to_uppercase(), 'Μ');
 								        assert_eq!('α'.to_uppercase(), 'Α');
 								        assert_eq!('ς'.to_uppercase(), 'Σ');
 								    }
-												Added is_control function, method, and tests.

											
										
										
											2013-09-23 17:10:48 -04:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_is_control() {
 								        assert!('\u0000'.is_control());
 								        assert!('\u0003'.is_control());
 								        assert!('\u0006'.is_control());
 								        assert!('\u0009'.is_control());
 								        assert!('\u007f'.is_control());
 								        assert!('\u0092'.is_control());
 								        assert!(!'\u0020'.is_control());
 								        assert!(!'\u0055'.is_control());
 								        assert!(!'\u0068'.is_control());
 								    }
-												core: added char::is_digit (matching Nd, Nl, No)

											
										
										
											2012-02-08 01:52:09 -08:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_is_digit() {
 								       assert!('2'.is_digit());
 								       assert!('7'.is_digit());
 								       assert!(!'c'.is_digit());
 								       assert!(!'i'.is_digit());
 								       assert!(!'z'.is_digit());
 								       assert!(!'Q'.is_digit());
-												Change char::escape_{default,unicode} to take callbacks instead of allocating
strings

											
										
										
											2013-06-28 14:04:13 -07:00
+								    }
-												Implement 2 kinds of char / str escaping. Use in rustc. Close #2306.

											
										
										
											2012-05-31 15:31:13 -07:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_escape_default() {
 								        fn string(c: char) -> ~str {
 								            let mut result = StrBuf::new();
 								            escape_default(c, |c| { result.push_char(c); });
 								            return result.into_owned();
 								        }
 								        assert_eq!(string('\n'), "\\n".to_owned());
 								        assert_eq!(string('\r'), "\\r".to_owned());
 								        assert_eq!(string('\''), "\\'".to_owned());
 								        assert_eq!(string('"'), "\\\"".to_owned());
 								        assert_eq!(string(' '), " ".to_owned());
 								        assert_eq!(string('a'), "a".to_owned());
 								        assert_eq!(string('~'), "~".to_owned());
 								        assert_eq!(string('\x00'), "\\x00".to_owned());
 								        assert_eq!(string('\x1f'), "\\x1f".to_owned());
 								        assert_eq!(string('\x7f'), "\\x7f".to_owned());
 								        assert_eq!(string('\xff'), "\\xff".to_owned());
 								        assert_eq!(string('\u011b'), "\\u011b".to_owned());
 								        assert_eq!(string('\U0001d4b6'), "\\U0001d4b6".to_owned());
-												Change char::escape_{default,unicode} to take callbacks instead of allocating
strings

											
										
										
											2013-06-28 14:04:13 -07:00
+								    }
-												Added ToStr impl for char
Changed ToStr impl for Ascii

											
										
										
											2013-09-04 03:04:36 +02:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_escape_unicode() {
 								        fn string(c: char) -> ~str {
 								            let mut result = StrBuf::new();
 								            escape_unicode(c, |c| { result.push_char(c); });
 								            return result.into_owned();
 								        }
 								        assert_eq!(string('\x00'), "\\x00".to_owned());
 								        assert_eq!(string('\n'), "\\x0a".to_owned());
 								        assert_eq!(string(' '), "\\x20".to_owned());
 								        assert_eq!(string('a'), "\\x61".to_owned());
 								        assert_eq!(string('\u011b'), "\\u011b".to_owned());
 								        assert_eq!(string('\U0001d4b6'), "\\U0001d4b6".to_owned());
 								    }
-												Add tests for Char::encode_utf{8,16}

											
										
										
											2014-04-11 13:44:54 -07:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_to_str() {
-												core: Get coretest working

This mostly involved frobbing imports between realstd, realcore, and the core
being test. Some of the imports are a little counterintuitive, but it mainly
focuses around libcore's types not implementing Show while libstd's types
implement Show.

											
										
										
											2014-05-01 18:06:59 -07:00
+								        use realstd::to_str::ToStr;
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								        let s = 't'.to_str();
 								        assert_eq!(s, "t".to_owned());
-												Add tests for Char::encode_utf{8,16}

											
										
										
											2014-04-11 13:44:54 -07:00
+								    }
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_encode_utf8() {
 								        fn check(input: char, expect: &[u8]) {
 								            let mut buf = [0u8, ..4];
 								            let n = input.encode_utf8(buf /* as mut slice! */);
 								            assert_eq!(buf.slice_to(n), expect);
 								        }
-												Add tests for Char::encode_utf{8,16}

											
										
										
											2014-04-11 13:44:54 -07:00
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								        check('x', [0x78]);
 								        check('\u00e9', [0xc3, 0xa9]);
 								        check('\ua66e', [0xea, 0x99, 0xae]);
 								        check('\U0001f4a9', [0xf0, 0x9f, 0x92, 0xa9]);
-												Add tests for Char::encode_utf{8,16}

											
										
										
											2014-04-11 13:44:54 -07:00
+								    }
-												core: Bring char/finally test style up to date

											
										
										
											2014-05-01 10:24:21 -07:00
+								    #[test]
 								    fn test_encode_utf16() {
 								        fn check(input: char, expect: &[u16]) {
 								            let mut buf = [0u16, ..2];
 								            let n = input.encode_utf16(buf /* as mut slice! */);
 								            assert_eq!(buf.slice_to(n), expect);
 								        }
 								        check('x', [0x0078]);
 								        check('\u00e9', [0x00e9]);
 								        check('\ua66e', [0xa66e]);
 								        check('\U0001f4a9', [0xd83d, 0xdca9]);
 								    }
-												Add tests for Char::encode_utf{8,16}

											
										
										
											2014-04-11 13:44:54 -07:00
+								}