auto merge of #19042 : SimonSapin/rust/generic-utf16-encoder, r=alexcrichton

This allows encoding to UTF-16 something that is not in UTF-8, e.g. a `[char]` UTF-32 string.

This might help with servo/servo#4023
This commit is contained in:
bors 2014-11-21 14:21:48 +00:00
commit 9efa23e9c0
3 changed files with 34 additions and 4 deletions

View File

@ -74,7 +74,8 @@ use vec::Vec;
pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
pub use core::str::{Bytes, CharSplits};
pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
pub use core::str::{Utf16CodeUnits, eq_slice, is_utf8, is_utf16, Utf16Items};
pub use core::str::{Utf16Encoder, Utf16CodeUnits};
pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
pub use core::str::{FromStr, from_str};

View File

@ -762,11 +762,33 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
/// Use with the `std::iter` module.
#[deriving(Clone)]
pub struct Utf16CodeUnits<'a> {
chars: Chars<'a>,
extra: u16
encoder: Utf16Encoder<Chars<'a>>
}
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
#[inline]
fn next(&mut self) -> Option<u16> { self.encoder.next() }
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
}
/// Iterator adaptor for encoding `char`s to UTF-16.
#[deriving(Clone)]
pub struct Utf16Encoder<I> {
chars: I,
extra: u16
}
impl<I> Utf16Encoder<I> {
/// Create an UTF-16 encoder from any `char` iterator.
pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<char> {
Utf16Encoder { chars: chars, extra: 0 }
}
}
impl<I> Iterator<u16> for Utf16Encoder<I> where I: Iterator<char> {
#[inline]
fn next(&mut self) -> Option<u16> {
if self.extra != 0 {
@ -2225,7 +2247,7 @@ impl StrPrelude for str {
#[inline]
fn utf16_units(&self) -> Utf16CodeUnits {
Utf16CodeUnits{ chars: self.chars(), extra: 0}
Utf16CodeUnits { encoder: Utf16Encoder::new(self.chars()) }
}
#[inline]

View File

@ -114,3 +114,10 @@ fn test_rev_split_char_iterator_no_trailing() {
split.reverse();
assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
}
#[test]
fn test_utf16_code_units() {
use core::str::Utf16Encoder;
assert_eq!(Utf16Encoder::new(vec!['é', '\U0001F4A9'].into_iter()).collect::<Vec<u16>>(),
vec![0xE9, 0xD83D, 0xDCA9])
}