diff --git a/src/liballoc/arc.rs b/src/liballoc/arc.rs index 8befb0579c3..8528be2860c 100644 --- a/src/liballoc/arc.rs +++ b/src/liballoc/arc.rs @@ -210,6 +210,21 @@ impl Arc { // contents. unsafe { &**self._ptr } } + + // Non-inlined part of `drop`. + #[inline(never)] + unsafe fn drop_slow(&mut self) { + let ptr = *self._ptr; + + // Destroy the data at this time, even though we may not free the box allocation itself + // (there may still be weak pointers lying around). + drop(ptr::read(&self.inner().data)); + + if self.inner().weak.fetch_sub(1, Release) == 1 { + atomic::fence(Acquire); + deallocate(ptr as *mut u8, size_of::>(), min_align_of::>()) + } + } } /// Get the number of weak references to this value. @@ -325,6 +340,7 @@ impl Drop for Arc { /// /// } // implicit drop /// ``` + #[inline] fn drop(&mut self) { // This structure has #[unsafe_no_drop_flag], so this drop glue may run more than once (but // it is guaranteed to be zeroed after the first if it's run more than once) @@ -353,14 +369,8 @@ impl Drop for Arc { // [1]: (www.boost.org/doc/libs/1_55_0/doc/html/atomic/usage_examples.html) atomic::fence(Acquire); - // Destroy the data at this time, even though we may not free the box allocation itself - // (there may still be weak pointers lying around). - unsafe { drop(ptr::read(&self.inner().data)); } - - if self.inner().weak.fetch_sub(1, Release) == 1 { - atomic::fence(Acquire); - unsafe { deallocate(ptr as *mut u8, size_of::>(), - min_align_of::>()) } + unsafe { + self.drop_slow() } } } diff --git a/src/libunicode/char.rs b/src/libunicode/char.rs index 5850fed980a..e24ade58a52 100644 --- a/src/libunicode/char.rs +++ b/src/libunicode/char.rs @@ -41,6 +41,426 @@ pub use normalize::{decompose_canonical, decompose_compatible, compose}; pub use tables::normalization::canonical_combining_class; pub use tables::UNICODE_VERSION; +#[cfg(stage0)] +/// Functionality for manipulating `char`. +#[stable(feature = "rust1", since = "1.0.0")] +pub trait CharExt { + /// Checks if a `char` parses as a numeric digit in the given radix. + /// + /// Compared to `is_numeric()`, this function only recognizes the characters + /// `0-9`, `a-z` and `A-Z`. + /// + /// # Return value + /// + /// Returns `true` if `c` is a valid digit under `radix`, and `false` + /// otherwise. + /// + /// # Panics + /// + /// Panics if given a radix > 36. + /// + /// # Examples + /// + /// ``` + /// let c = '1'; + /// + /// assert!(c.is_digit(10)); + /// + /// assert!('f'.is_digit(16)); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn is_digit(self, radix: u32) -> bool; + + /// Converts a character to the corresponding digit. + /// + /// # Return value + /// + /// If `c` is between '0' and '9', the corresponding value between 0 and + /// 9. If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. Returns + /// none if the character does not refer to a digit in the given radix. + /// + /// # Panics + /// + /// Panics if given a radix outside the range [0..36]. + /// + /// # Examples + /// + /// ``` + /// let c = '1'; + /// + /// assert_eq!(c.to_digit(10), Some(1)); + /// + /// assert_eq!('f'.to_digit(16), Some(15)); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn to_digit(self, radix: u32) -> Option; + + /// Returns an iterator that yields the hexadecimal Unicode escape of a + /// character, as `char`s. + /// + /// All characters are escaped with Rust syntax of the form `\\u{NNNN}` + /// where `NNNN` is the shortest hexadecimal representation of the code + /// point. + /// + /// # Examples + /// + /// ``` + /// for i in '❤'.escape_unicode() { + /// println!("{}", i); + /// } + /// ``` + /// + /// This prints: + /// + /// ```text + /// \ + /// u + /// { + /// 2 + /// 7 + /// 6 + /// 4 + /// } + /// ``` + /// + /// Collecting into a `String`: + /// + /// ``` + /// let heart: String = '❤'.escape_unicode().collect(); + /// + /// assert_eq!(heart, r"\u{2764}"); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn escape_unicode(self) -> EscapeUnicode; + + /// Returns an iterator that yields the 'default' ASCII and + /// C++11-like literal escape of a character, as `char`s. + /// + /// The default is chosen with a bias toward producing literals that are + /// legal in a variety of languages, including C++11 and similar C-family + /// languages. The exact rules are: + /// + /// * Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively. + /// * Single-quote, double-quote and backslash chars are backslash- + /// escaped. + /// * Any other chars in the range [0x20,0x7e] are not escaped. + /// * Any other chars are given hex Unicode escapes; see `escape_unicode`. + /// + /// # Examples + /// + /// ``` + /// for i in '"'.escape_default() { + /// println!("{}", i); + /// } + /// ``` + /// + /// This prints: + /// + /// ```text + /// \ + /// " + /// ``` + /// + /// Collecting into a `String`: + /// + /// ``` + /// let quote: String = '"'.escape_default().collect(); + /// + /// assert_eq!(quote, "\\\""); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn escape_default(self) -> EscapeDefault; + + /// Returns the number of bytes this character would need if encoded in + /// UTF-8. + /// + /// # Examples + /// + /// ``` + /// let n = 'ß'.len_utf8(); + /// + /// assert_eq!(n, 2); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn len_utf8(self) -> usize; + + /// Returns the number of 16-bit code units this character would need if + /// encoded in UTF-16. + /// + /// # Examples + /// + /// ``` + /// let n = 'ß'.len_utf16(); + /// + /// assert_eq!(n, 1); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn len_utf16(self) -> usize; + + /// Encodes this character as UTF-8 into the provided byte buffer, and then + /// returns the number of bytes written. + /// + /// If the buffer is not large enough, nothing will be written into it and a + /// `None` will be returned. A buffer of length four is large enough to + /// encode any `char`. + /// + /// # Examples + /// + /// In both of these examples, 'ß' takes two bytes to encode. + /// + /// ``` + /// let mut b = [0; 2]; + /// + /// let result = 'ß'.encode_utf8(&mut b); + /// + /// assert_eq!(result, Some(2)); + /// ``` + /// + /// A buffer that's too small: + /// + /// ``` + /// let mut b = [0; 1]; + /// + /// let result = 'ß'.encode_utf8(&mut b); + /// + /// assert_eq!(result, None); + /// ``` + #[unstable(feature = "unicode", + reason = "pending decision about Iterator/Writer/Reader")] + fn encode_utf8(self, dst: &mut [u8]) -> Option; + + /// Encodes this character as UTF-16 into the provided `u16` buffer, and + /// then returns the number of `u16`s written. + /// + /// If the buffer is not large enough, nothing will be written into it and a + /// `None` will be returned. A buffer of length 2 is large enough to encode + /// any `char`. + /// + /// # Examples + /// + /// In both of these examples, 'ß' takes one `u16` to encode. + /// + /// ``` + /// let mut b = [0; 1]; + /// + /// let result = 'ß'.encode_utf16(&mut b); + /// + /// assert_eq!(result, Some(1)); + /// ``` + /// + /// A buffer that's too small: + /// + /// ``` + /// let mut b = [0; 0]; + /// + /// let result = 'ß'.encode_utf8(&mut b); + /// + /// assert_eq!(result, None); + /// ``` + #[unstable(feature = "unicode", + reason = "pending decision about Iterator/Writer/Reader")] + fn encode_utf16(self, dst: &mut [u16]) -> Option; + + /// Returns whether the specified character is considered a Unicode + /// alphabetic code point. + #[stable(feature = "rust1", since = "1.0.0")] + fn is_alphabetic(self) -> bool; + + /// Returns whether the specified character satisfies the 'XID_Start' + /// Unicode property. + /// + /// 'XID_Start' is a Unicode Derived Property specified in + /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), + /// mostly similar to ID_Start but modified for closure under NFKx. + #[unstable(feature = "unicode", + reason = "mainly needed for compiler internals")] + fn is_xid_start(self) -> bool; + + /// Returns whether the specified `char` satisfies the 'XID_Continue' + /// Unicode property. + /// + /// 'XID_Continue' is a Unicode Derived Property specified in + /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), + /// mostly similar to 'ID_Continue' but modified for closure under NFKx. + #[unstable(feature = "unicode", + reason = "mainly needed for compiler internals")] + fn is_xid_continue(self) -> bool; + + /// Indicates whether a character is in lowercase. + /// + /// This is defined according to the terms of the Unicode Derived Core + /// Property `Lowercase`. + #[stable(feature = "rust1", since = "1.0.0")] + fn is_lowercase(self) -> bool; + + /// Indicates whether a character is in uppercase. + /// + /// This is defined according to the terms of the Unicode Derived Core + /// Property `Uppercase`. + #[stable(feature = "rust1", since = "1.0.0")] + fn is_uppercase(self) -> bool; + + /// Indicates whether a character is whitespace. + /// + /// Whitespace is defined in terms of the Unicode Property `White_Space`. + #[stable(feature = "rust1", since = "1.0.0")] + fn is_whitespace(self) -> bool; + + /// Indicates whether a character is alphanumeric. + /// + /// Alphanumericness is defined in terms of the Unicode General Categories + /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'. + #[stable(feature = "rust1", since = "1.0.0")] + fn is_alphanumeric(self) -> bool; + + /// Indicates whether a character is a control code point. + /// + /// Control code points are defined in terms of the Unicode General + /// Category `Cc`. + #[stable(feature = "rust1", since = "1.0.0")] + fn is_control(self) -> bool; + + /// Indicates whether the character is numeric (Nd, Nl, or No). + #[stable(feature = "rust1", since = "1.0.0")] + fn is_numeric(self) -> bool; + + /// Converts a character to its lowercase equivalent. + /// + /// The case-folding performed is the common or simple mapping. See + /// `to_uppercase()` for references and more information. + /// + /// # Return value + /// + /// Returns an iterator which yields the characters corresponding to the + /// lowercase equivalent of the character. If no conversion is possible then + /// the input character is returned. + #[stable(feature = "rust1", since = "1.0.0")] + fn to_lowercase(self) -> ToLowercase; + + /// Converts a character to its uppercase equivalent. + /// + /// The case-folding performed is the common or simple mapping: it maps + /// one Unicode codepoint to its uppercase equivalent according to the + /// Unicode database [1]. The additional [`SpecialCasing.txt`] is not yet + /// considered here, but the iterator returned will soon support this form + /// of case folding. + /// + /// A full reference can be found here [2]. + /// + /// # Return value + /// + /// Returns an iterator which yields the characters corresponding to the + /// uppercase equivalent of the character. If no conversion is possible then + /// the input character is returned. + /// + /// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt + /// + /// [`SpecialCasing`.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt + /// + /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992 + #[stable(feature = "rust1", since = "1.0.0")] + fn to_uppercase(self) -> ToUppercase; + + /// Returns this character's displayed width in columns, or `None` if it is a + /// control character other than `'\x00'`. + /// + /// `is_cjk` determines behavior for characters in the Ambiguous category: + /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1. + /// In CJK contexts, `is_cjk` should be `true`, else it should be `false`. + /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/) + /// recommends that these characters be treated as 1 column (i.e., + /// `is_cjk` = `false`) if the context cannot be reliably determined. + #[unstable(feature = "unicode", + reason = "needs expert opinion. is_cjk flag stands out as ugly")] + fn width(self, is_cjk: bool) -> Option; +} + +#[cfg(stage0)] +#[stable(feature = "rust1", since = "1.0.0")] +impl CharExt for char { + #[inline] + fn is_digit(self, radix: u32) -> bool { C::is_digit(self, radix) } + fn to_digit(self, radix: u32) -> Option { C::to_digit(self, radix) } + fn escape_unicode(self) -> EscapeUnicode { C::escape_unicode(self) } + fn escape_default(self) -> EscapeDefault { C::escape_default(self) } + fn len_utf8(self) -> usize { C::len_utf8(self) } + fn len_utf16(self) -> usize { C::len_utf16(self) } + fn encode_utf8(self, dst: &mut [u8]) -> Option { C::encode_utf8(self, dst) } + fn encode_utf16(self, dst: &mut [u16]) -> Option { C::encode_utf16(self, dst) } + + #[inline] + fn is_alphabetic(self) -> bool { + match self { + 'a' ... 'z' | 'A' ... 'Z' => true, + c if c > '\x7f' => derived_property::Alphabetic(c), + _ => false + } + } + + #[inline] + fn is_xid_start(self) -> bool { derived_property::XID_Start(self) } + + #[inline] + fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } + + #[inline] + fn is_lowercase(self) -> bool { + match self { + 'a' ... 'z' => true, + c if c > '\x7f' => derived_property::Lowercase(c), + _ => false + } + } + + #[inline] + fn is_uppercase(self) -> bool { + match self { + 'A' ... 'Z' => true, + c if c > '\x7f' => derived_property::Uppercase(c), + _ => false + } + } + + #[inline] + fn is_whitespace(self) -> bool { + match self { + ' ' | '\x09' ... '\x0d' => true, + c if c > '\x7f' => property::White_Space(c), + _ => false + } + } + + #[inline] + fn is_alphanumeric(self) -> bool { + self.is_alphabetic() || self.is_numeric() + } + + #[inline] + fn is_control(self) -> bool { general_category::Cc(self) } + + #[inline] + fn is_numeric(self) -> bool { + match self { + '0' ... '9' => true, + c if c > '\x7f' => general_category::N(c), + _ => false + } + } + + #[inline] + fn to_lowercase(self) -> ToLowercase { + ToLowercase(Some(conversions::to_lower(self))) + } + + #[inline] + fn to_uppercase(self) -> ToUppercase { + ToUppercase(Some(conversions::to_upper(self))) + } + + #[inline] + fn width(self, is_cjk: bool) -> Option { charwidth::width(self, is_cjk) } +} + /// An iterator over the lowercase mapping of a given character, returned from /// the `lowercase` method on characters. #[stable(feature = "rust1", since = "1.0.0")] @@ -90,6 +510,7 @@ impl char { /// assert!('f'.is_digit(16)); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_digit(self, radix: u32) -> bool { C::is_digit(self, radix) } /// Converts a character to the corresponding digit. @@ -285,6 +706,7 @@ impl char { /// Returns whether the specified character is considered a Unicode /// alphabetic code point. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_alphabetic(self) -> bool { match self { 'a' ... 'z' | 'A' ... 'Z' => true, @@ -301,6 +723,7 @@ impl char { /// mostly similar to ID_Start but modified for closure under NFKx. #[unstable(feature = "unicode", reason = "mainly needed for compiler internals")] + #[inline] pub fn is_xid_start(self) -> bool { derived_property::XID_Start(self) } /// Returns whether the specified `char` satisfies the 'XID_Continue' @@ -311,6 +734,7 @@ impl char { /// mostly similar to 'ID_Continue' but modified for closure under NFKx. #[unstable(feature = "unicode", reason = "mainly needed for compiler internals")] + #[inline] pub fn is_xid_continue(self) -> bool { derived_property::XID_Continue(self) } /// Indicates whether a character is in lowercase. @@ -318,6 +742,7 @@ impl char { /// This is defined according to the terms of the Unicode Derived Core /// Property `Lowercase`. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_lowercase(self) -> bool { match self { 'a' ... 'z' => true, @@ -331,6 +756,7 @@ impl char { /// This is defined according to the terms of the Unicode Derived Core /// Property `Uppercase`. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_uppercase(self) -> bool { match self { 'A' ... 'Z' => true, @@ -343,6 +769,7 @@ impl char { /// /// Whitespace is defined in terms of the Unicode Property `White_Space`. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_whitespace(self) -> bool { match self { ' ' | '\x09' ... '\x0d' => true, @@ -356,6 +783,7 @@ impl char { /// Alphanumericness is defined in terms of the Unicode General Categories /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_alphanumeric(self) -> bool { self.is_alphabetic() || self.is_numeric() } @@ -365,10 +793,12 @@ impl char { /// Control code points are defined in terms of the Unicode General /// Category `Cc`. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_control(self) -> bool { general_category::Cc(self) } /// Indicates whether the character is numeric (Nd, Nl, or No). #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn is_numeric(self) -> bool { match self { '0' ... '9' => true, @@ -388,6 +818,7 @@ impl char { /// lowercase equivalent of the character. If no conversion is possible then /// the input character is returned. #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn to_lowercase(self) -> ToLowercase { ToLowercase(Some(conversions::to_lower(self))) } @@ -414,6 +845,7 @@ impl char { /// /// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992 #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn to_uppercase(self) -> ToUppercase { ToUppercase(Some(conversions::to_upper(self))) }