Implement Char::encode_utf16
And clean up encode_utf8 a bit.
This commit is contained in:
parent
8b6091e8f1
commit
e011939b1a
@ -560,11 +560,19 @@ pub trait Char {
|
||||
|
||||
/// Encodes this character as UTF-8 into the provided byte buffer.
|
||||
///
|
||||
/// The buffer must be at least 4 bytes long or a runtime failure will
|
||||
/// The buffer must be at least 4 bytes long or a runtime failure may
|
||||
/// occur.
|
||||
///
|
||||
/// This will then return the number of characters written to the slice.
|
||||
/// This will then return the number of bytes written to the slice.
|
||||
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
|
||||
|
||||
/// Encodes this character as UTF-16 into the provided `u16` buffer.
|
||||
///
|
||||
/// The buffer must be at least 2 elements long or a runtime failure may
|
||||
/// occur.
|
||||
///
|
||||
/// This will then return the number of `u16`s written to the slice.
|
||||
fn encode_utf16(&self, dst: &mut [u16]) -> uint;
|
||||
}
|
||||
|
||||
impl Char for char {
|
||||
@ -602,7 +610,7 @@ impl Char for char {
|
||||
|
||||
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
|
||||
|
||||
fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
|
||||
fn encode_utf8(&self, dst: &mut [u8]) -> uint {
|
||||
let code = *self as uint;
|
||||
if code < MAX_ONE_B {
|
||||
dst[0] = code as u8;
|
||||
@ -624,6 +632,24 @@ impl Char for char {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_utf16(&self, dst: &mut [u16]) -> uint {
|
||||
let mut ch = *self as uint;
|
||||
if (ch & 0xFFFF_u) == ch {
|
||||
// The BMP falls through (assuming non-surrogate, as it
|
||||
// should)
|
||||
assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
|
||||
dst[0] = ch as u16;
|
||||
1
|
||||
} else {
|
||||
// Supplementary planes break into surrogates.
|
||||
assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
|
||||
ch -= 0x1_0000_u;
|
||||
dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
|
||||
dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
|
||||
2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(test))]
|
||||
|
@ -2555,22 +2555,9 @@ impl<'a> StrSlice<'a> for &'a str {
|
||||
fn to_utf16(&self) -> ~[u16] {
|
||||
let mut u = ~[];
|
||||
for ch in self.chars() {
|
||||
// Arithmetic with u32 literals is easier on the eyes than chars.
|
||||
let mut ch = ch as u32;
|
||||
|
||||
if (ch & 0xFFFF_u32) == ch {
|
||||
// The BMP falls through (assuming non-surrogate, as it
|
||||
// should)
|
||||
assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
|
||||
u.push(ch as u16)
|
||||
} else {
|
||||
// Supplementary planes break into surrogates.
|
||||
assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
|
||||
ch -= 0x1_0000_u32;
|
||||
let w1 = 0xD800_u16 | ((ch >> 10) as u16);
|
||||
let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
|
||||
u.push_all([w1, w2])
|
||||
}
|
||||
let mut buf = [0u16, ..2];
|
||||
let n = ch.encode_utf16(buf /* as mut slice! */);
|
||||
u.push_all(buf.slice_to(n));
|
||||
}
|
||||
u
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user