std: convert str::to_utf16 to a method.

This commit is contained in:
Huon Wilson 2013-06-14 01:44:15 +10:00
parent 4b18fff2be
commit f1886680e0
2 changed files with 29 additions and 28 deletions

View File

@ -144,7 +144,7 @@ pub mod win32 {
}
pub fn as_utf16_p<T>(s: &str, f: &fn(*u16) -> T) -> T {
let mut t = str::to_utf16(s);
let mut t = s.to_utf16();
// Null terminate before passing on.
t += [0u16];
vec::as_imm_buf(t, |buf, _len| f(buf))

View File

@ -759,30 +759,6 @@ pub fn is_utf16(v: &[u16]) -> bool {
return true;
}
/// Converts to a vector of `u16` encoded as UTF-16
pub fn to_utf16(s: &str) -> ~[u16] {
let mut u = ~[];
for s.iter().advance |ch| {
// Arithmetic with u32 literals is easier on the eyes than chars.
let mut ch = ch as u32;
if (ch & 0xFFFF_u32) == ch {
// The BMP falls through (assuming non-surrogate, as it
// should)
assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
u.push(ch as u16)
} else {
// Supplementary planes break into surrogates.
assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
ch -= 0x1_0000_u32;
let w1 = 0xD800_u16 | ((ch >> 10) as u16);
let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
u.push_all([w1, w2])
}
}
u
}
/// Iterates over the utf-16 characters in the specified slice, yielding each
/// decoded unicode character to the function provided.
///
@ -1188,6 +1164,7 @@ pub trait StrSlice<'self> {
fn replace(&self, from: &str, to: &str) -> ~str;
fn to_owned(&self) -> ~str;
fn to_managed(&self) -> @str;
fn to_utf16(&self) -> ~[u16];
fn is_char_boundary(&self, index: uint) -> bool;
fn char_range_at(&self, start: uint) -> CharRange;
fn char_at(&self, i: uint) -> char;
@ -1602,6 +1579,30 @@ impl<'self> StrSlice<'self> for &'self str {
unsafe { ::cast::transmute(v) }
}
/// Converts to a vector of `u16` encoded as UTF-16.
fn to_utf16(&self) -> ~[u16] {
let mut u = ~[];
for self.iter().advance |ch| {
// Arithmetic with u32 literals is easier on the eyes than chars.
let mut ch = ch as u32;
if (ch & 0xFFFF_u32) == ch {
// The BMP falls through (assuming non-surrogate, as it
// should)
assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
u.push(ch as u16)
} else {
// Supplementary planes break into surrogates.
assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
ch -= 0x1_0000_u32;
let w1 = 0xD800_u16 | ((ch >> 10) as u16);
let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
u.push_all([w1, w2])
}
}
u
}
/**
* Returns false if the index points into the middle of a multi-byte
* character sequence.
@ -3116,10 +3117,10 @@ mod tests {
for pairs.each |p| {
let (s, u) = copy *p;
assert!(to_utf16(s) == u);
assert!(s.to_utf16() == u);
assert!(from_utf16(u) == s);
assert!(from_utf16(to_utf16(s)) == s);
assert!(to_utf16(from_utf16(u)) == u);
assert!(from_utf16(s.to_utf16()) == s);
assert!(from_utf16(u).to_utf16() == u);
}
}