std: convert str::char_at* to methods.
This commit is contained in:
parent
f632f46614
commit
ec5a028ada
@ -429,7 +429,7 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
|
||||
if *idx >= haystack.len() {
|
||||
return false;
|
||||
}
|
||||
let range = str::char_range_at(haystack, *idx);
|
||||
let range = haystack.char_range_at(*idx);
|
||||
if range.ch != needle {
|
||||
return false;
|
||||
}
|
||||
@ -440,7 +440,7 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
|
||||
fn scan_integer(haystack: &str, idx: &mut uint) -> bool {
|
||||
let mut i = *idx;
|
||||
while i < haystack.len() {
|
||||
let range = str::char_range_at(haystack, i);
|
||||
let range = haystack.char_range_at(i);
|
||||
if range.ch < '0' || '9' < range.ch {
|
||||
break;
|
||||
}
|
||||
@ -460,7 +460,7 @@ fn scan_string(haystack: &str, needle: &str, idx: &mut uint) -> bool {
|
||||
if haystack_i >= haystack.len() {
|
||||
return false;
|
||||
}
|
||||
let range = str::char_range_at(haystack, haystack_i);
|
||||
let range = haystack.char_range_at(haystack_i);
|
||||
haystack_i = range.next;
|
||||
if !scan_char(needle, range.ch, &mut needle_i) {
|
||||
return false;
|
||||
|
@ -112,7 +112,7 @@ pub struct Opt {
|
||||
|
||||
fn mkname(nm: &str) -> Name {
|
||||
if nm.len() == 1u {
|
||||
Short(str::char_at(nm, 0u))
|
||||
Short(nm.char_at(0u))
|
||||
} else {
|
||||
Long(nm.to_owned())
|
||||
}
|
||||
@ -261,7 +261,7 @@ pub fn getopts(args: &[~str], opts: &[Opt]) -> Result {
|
||||
let mut last_valid_opt_id = None;
|
||||
names = ~[];
|
||||
while j < curlen {
|
||||
let range = str::char_range_at(cur, j);
|
||||
let range = cur.char_range_at(j);
|
||||
let opt = Short(range.ch);
|
||||
|
||||
/* In a series of potential options (eg. -aheJ), if we
|
||||
@ -565,11 +565,11 @@ pub mod groups {
|
||||
hasarg: hasarg,
|
||||
occur: occur}],
|
||||
|
||||
(1,0) => ~[Opt {name: Short(str::char_at(short_name, 0)),
|
||||
(1,0) => ~[Opt {name: Short(short_name.char_at(0)),
|
||||
hasarg: hasarg,
|
||||
occur: occur}],
|
||||
|
||||
(1,_) => ~[Opt {name: Short(str::char_at(short_name, 0)),
|
||||
(1,_) => ~[Opt {name: Short(short_name.char_at(0)),
|
||||
hasarg: hasarg,
|
||||
occur: occur},
|
||||
Opt {name: Long((long_name)),
|
||||
|
@ -1132,7 +1132,7 @@ pub mod node {
|
||||
pub fn char_at(mut node: @Node, mut pos: uint) -> char {
|
||||
loop {
|
||||
match *node {
|
||||
Leaf(x) => return str::char_at(*x.content, pos),
|
||||
Leaf(x) => return x.content.char_at(pos),
|
||||
Concat(Concat {left, right, _}) => {
|
||||
let left_len = char_len(left);
|
||||
node = if left_len > pos { left }
|
||||
@ -1257,8 +1257,7 @@ pub mod node {
|
||||
return None
|
||||
} else {
|
||||
let range =
|
||||
str::char_range_at(*aleaf.content,
|
||||
(*it).leaf_byte_pos + aleaf.byte_offset);
|
||||
aleaf.content.char_range_at((*it).leaf_byte_pos + aleaf.byte_offset);
|
||||
let ch = range.ch;
|
||||
let next = range.next;
|
||||
(*it).leaf_byte_pos = next - aleaf.byte_offset;
|
||||
@ -1345,7 +1344,7 @@ mod tests {
|
||||
equal = false;
|
||||
} break; }
|
||||
Some(c) => {
|
||||
let range = str::char_range_at(*sample, string_iter);
|
||||
let range = sample.char_range_at(string_iter);
|
||||
string_iter = range.next;
|
||||
if range.ch != c { equal = false; break; }
|
||||
}
|
||||
|
@ -296,7 +296,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
|
||||
|
||||
let mut i = 0u;
|
||||
while i < digits {
|
||||
let range = str::char_range_at(ss, pos);
|
||||
let range = ss.char_range_at(pos);
|
||||
pos = range.next;
|
||||
|
||||
match range.ch {
|
||||
@ -323,7 +323,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
|
||||
}
|
||||
|
||||
fn parse_char(s: &str, pos: uint, c: char) -> Result<uint, ~str> {
|
||||
let range = str::char_range_at(s, pos);
|
||||
let range = s.char_range_at(pos);
|
||||
|
||||
if c == range.ch {
|
||||
Ok(range.next)
|
||||
@ -600,7 +600,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
|
||||
let mut pos = pos;
|
||||
let len = s.len();
|
||||
while pos < len {
|
||||
let range = str::char_range_at(s, pos);
|
||||
let range = s.char_range_at(pos);
|
||||
pos = range.next;
|
||||
if range.ch == ' ' { break; }
|
||||
}
|
||||
@ -609,7 +609,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
|
||||
}
|
||||
}
|
||||
'z' => {
|
||||
let range = str::char_range_at(s, pos);
|
||||
let range = s.char_range_at(pos);
|
||||
|
||||
if range.ch == '+' || range.ch == '-' {
|
||||
match match_digits(s, range.next, 4u, false) {
|
||||
@ -655,7 +655,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
|
||||
let mut result = Err(~"Invalid time");
|
||||
|
||||
while !rdr.eof() && pos < len {
|
||||
let range = str::char_range_at(s, pos);
|
||||
let range = s.char_range_at(pos);
|
||||
let ch = range.ch;
|
||||
let next = range.next;
|
||||
|
||||
|
@ -842,7 +842,7 @@ fn check_item_non_camel_case_types(cx: &Context, it: @ast::item) {
|
||||
let ident = cx.sess.str_of(ident);
|
||||
assert!(!ident.is_empty());
|
||||
let ident = ident.trim_chars(&['_']);
|
||||
char::is_uppercase(str::char_at(ident, 0)) &&
|
||||
char::is_uppercase(ident.char_at(0)) &&
|
||||
!ident.contains_char('_')
|
||||
}
|
||||
|
||||
|
@ -672,7 +672,7 @@ impl<T:Reader> ReaderUtil for T {
|
||||
val <<= 6;
|
||||
val += (next & 63) as uint;
|
||||
}
|
||||
// See str::char_at
|
||||
// See str::StrSlice::char_at
|
||||
val += ((b0 << ((w + 1) as u8)) as uint)
|
||||
<< (w - 1) * 6 - w - 1u;
|
||||
chars.push(val as char);
|
||||
|
@ -370,7 +370,7 @@ Section: Adding to and removing from a string
|
||||
pub fn pop_char(s: &mut ~str) -> char {
|
||||
let end = s.len();
|
||||
assert!(end > 0u);
|
||||
let CharRange {ch, next} = char_range_at_reverse(*s, end);
|
||||
let CharRange {ch, next} = s.char_range_at_reverse(end);
|
||||
unsafe { raw::set_len(s, next); }
|
||||
return ch;
|
||||
}
|
||||
@ -383,7 +383,7 @@ pub fn pop_char(s: &mut ~str) -> char {
|
||||
* If the string does not contain any characters
|
||||
*/
|
||||
pub fn shift_char(s: &mut ~str) -> char {
|
||||
let CharRange {ch, next} = char_range_at(*s, 0u);
|
||||
let CharRange {ch, next} = s.char_range_at(0u);
|
||||
*s = unsafe { raw::slice_bytes_owned(*s, next, s.len()) };
|
||||
return ch;
|
||||
}
|
||||
@ -399,7 +399,7 @@ pub fn shift_char(s: &mut ~str) -> char {
|
||||
*/
|
||||
#[inline]
|
||||
pub fn slice_shift_char<'a>(s: &'a str) -> (char, &'a str) {
|
||||
let CharRange {ch, next} = char_range_at(s, 0u);
|
||||
let CharRange {ch, next} = s.char_range_at(0u);
|
||||
let next_s = unsafe { raw::slice_bytes(s, next, s.len()) };
|
||||
return (ch, next_s);
|
||||
}
|
||||
@ -532,7 +532,7 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Se
|
||||
}
|
||||
} else {
|
||||
while self.position < l && self.count > 0 {
|
||||
let CharRange {ch, next} = char_range_at(self.string, self.position);
|
||||
let CharRange {ch, next} = self.string.char_range_at(self.position);
|
||||
|
||||
if self.sep.matches(ch) {
|
||||
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
|
||||
@ -1198,7 +1198,7 @@ pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
|
||||
assert!(is_char_boundary(s, end));
|
||||
let mut (i, len) = (start, 0u);
|
||||
while i < end {
|
||||
let next = char_range_at(s, i).next;
|
||||
let next = s.char_range_at(i).next;
|
||||
len += 1u;
|
||||
i = next;
|
||||
}
|
||||
@ -1213,7 +1213,7 @@ pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
|
||||
let l = s.len();
|
||||
while cnt > 0u {
|
||||
assert!(end < l);
|
||||
let next = char_range_at(s, end).next;
|
||||
let next = s.char_range_at(end).next;
|
||||
cnt -= 1u;
|
||||
end = next;
|
||||
}
|
||||
@ -1233,130 +1233,12 @@ pub fn utf8_char_width(b: u8) -> uint {
|
||||
return 6u;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns false if the index points into the middle of a multi-byte
|
||||
* character sequence.
|
||||
*/
|
||||
pub fn is_char_boundary(s: &str, index: uint) -> bool {
|
||||
if index == s.len() { return true; }
|
||||
let b = s[index];
|
||||
return b < 128u8 || b >= 192u8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pluck a character out of a string and return the index of the next
|
||||
* character.
|
||||
*
|
||||
* This function can be used to iterate over the unicode characters of a
|
||||
* string.
|
||||
*
|
||||
* # Example
|
||||
*
|
||||
* ~~~ {.rust}
|
||||
* let s = "中华Việt Nam";
|
||||
* let i = 0u;
|
||||
* while i < s.len() {
|
||||
* let CharRange {ch, next} = str::char_range_at(s, i);
|
||||
* std::io::println(fmt!("%u: %c",i,ch));
|
||||
* i = next;
|
||||
* }
|
||||
* ~~~
|
||||
*
|
||||
* # Example output
|
||||
*
|
||||
* ~~~
|
||||
* 0: 中
|
||||
* 3: 华
|
||||
* 6: V
|
||||
* 7: i
|
||||
* 8: ệ
|
||||
* 11: t
|
||||
* 12:
|
||||
* 13: N
|
||||
* 14: a
|
||||
* 15: m
|
||||
* ~~~
|
||||
*
|
||||
* # Arguments
|
||||
*
|
||||
* * s - The string
|
||||
* * i - The byte offset of the char to extract
|
||||
*
|
||||
* # Return value
|
||||
*
|
||||
* A record {ch: char, next: uint} containing the char value and the byte
|
||||
* index of the next unicode character.
|
||||
*
|
||||
* # Failure
|
||||
*
|
||||
* If `i` is greater than or equal to the length of the string.
|
||||
* If `i` is not the index of the beginning of a valid UTF-8 character.
|
||||
*/
|
||||
pub fn char_range_at(s: &str, i: uint) -> CharRange {
|
||||
let b0 = s[i];
|
||||
let w = utf8_char_width(b0);
|
||||
assert!((w != 0u));
|
||||
if w == 1u { return CharRange {ch: b0 as char, next: i + 1u}; }
|
||||
let mut val = 0u;
|
||||
let end = i + w;
|
||||
let mut i = i + 1u;
|
||||
while i < end {
|
||||
let byte = s[i];
|
||||
assert_eq!(byte & 192u8, tag_cont_u8);
|
||||
val <<= 6u;
|
||||
val += (byte & 63u8) as uint;
|
||||
i += 1u;
|
||||
}
|
||||
// Clunky way to get the right bits from the first byte. Uses two shifts,
|
||||
// the first to clip off the marker bits at the left of the byte, and then
|
||||
// a second (as uint) to get it to the right position.
|
||||
val += ((b0 << ((w + 1u) as u8)) as uint) << ((w - 1u) * 6u - w - 1u);
|
||||
return CharRange {ch: val as char, next: i};
|
||||
}
|
||||
|
||||
/// Plucks the character starting at the `i`th byte of a string
|
||||
pub fn char_at(s: &str, i: uint) -> char {
|
||||
return char_range_at(s, i).ch;
|
||||
}
|
||||
|
||||
#[allow(missing_doc)]
|
||||
pub struct CharRange {
|
||||
ch: char,
|
||||
next: uint
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a byte position and a str, return the previous char and its position.
|
||||
*
|
||||
* This function can be used to iterate over a unicode string in reverse.
|
||||
*
|
||||
* Returns 0 for next index if called on start index 0.
|
||||
*/
|
||||
pub fn char_range_at_reverse(ss: &str, start: uint) -> CharRange {
|
||||
let mut prev = start;
|
||||
|
||||
// while there is a previous byte == 10......
|
||||
while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 {
|
||||
prev -= 1u;
|
||||
}
|
||||
|
||||
// now refer to the initial byte of previous char
|
||||
if prev > 0u {
|
||||
prev -= 1u;
|
||||
} else {
|
||||
prev = 0u;
|
||||
}
|
||||
|
||||
|
||||
let ch = char_at(ss, prev);
|
||||
return CharRange {ch:ch, next:prev};
|
||||
}
|
||||
|
||||
/// Plucks the character ending at the `i`th byte of a string
|
||||
pub fn char_at_reverse(s: &str, i: uint) -> char {
|
||||
char_range_at_reverse(s, i).ch
|
||||
}
|
||||
|
||||
// UTF-8 tags and ranges
|
||||
static tag_cont_u8: u8 = 128u8;
|
||||
static tag_cont: uint = 128u;
|
||||
@ -1776,7 +1658,10 @@ pub trait StrSlice<'self> {
|
||||
fn trim_right_chars(&self, chars_to_trim: &[char]) -> &'self str;
|
||||
fn to_owned(&self) -> ~str;
|
||||
fn to_managed(&self) -> @str;
|
||||
fn is_char_boundary(s: &str, index: uint) -> bool;
|
||||
fn char_range_at(&self, start: uint) -> CharRange;
|
||||
fn char_at(&self, i: uint) -> char;
|
||||
fn char_range_at_reverse(&self, start: uint) -> CharRange;
|
||||
fn char_at_reverse(&self, i: uint) -> char;
|
||||
fn to_bytes(&self) -> ~[u8];
|
||||
|
||||
@ -1967,7 +1852,7 @@ impl<'self> StrSlice<'self> for &'self str {
|
||||
match self.rfind(|c| !char::is_whitespace(c)) {
|
||||
None => "",
|
||||
Some(last) => {
|
||||
let next = char_range_at(*self, last).next;
|
||||
let next = self.char_range_at(last).next;
|
||||
unsafe { raw::slice_bytes(*self, 0u, next) }
|
||||
}
|
||||
}
|
||||
@ -2019,8 +1904,8 @@ impl<'self> StrSlice<'self> for &'self str {
|
||||
match self.rfind(|c| !chars_to_trim.contains(&c)) {
|
||||
None => "",
|
||||
Some(last) => {
|
||||
let next = char_range_at(self, last).next;
|
||||
unsafe { raw::slice_bytes(self, 0u, next) }
|
||||
let next = self.char_range_at(last).next;
|
||||
unsafe { raw::slice_bytes(*self, 0u, next) }
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2037,12 +1922,122 @@ impl<'self> StrSlice<'self> for &'self str {
|
||||
unsafe { ::cast::transmute(v) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn char_at(&self, i: uint) -> char { char_at(*self, i) }
|
||||
/**
|
||||
* Returns false if the index points into the middle of a multi-byte
|
||||
* character sequence.
|
||||
*/
|
||||
fn is_char_boundary(&self, index: uint) -> bool {
|
||||
if index == self.len() { return true; }
|
||||
let b = self[index];
|
||||
return b < 128u8 || b >= 192u8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pluck a character out of a string and return the index of the next
|
||||
* character.
|
||||
*
|
||||
* This function can be used to iterate over the unicode characters of a
|
||||
* string.
|
||||
*
|
||||
* # Example
|
||||
*
|
||||
* ~~~ {.rust}
|
||||
* let s = "中华Việt Nam";
|
||||
* let i = 0u;
|
||||
* while i < s.len() {
|
||||
* let CharRange {ch, next} = s.char_range_at(i);
|
||||
* std::io::println(fmt!("%u: %c",i,ch));
|
||||
* i = next;
|
||||
* }
|
||||
* ~~~
|
||||
*
|
||||
* # Example output
|
||||
*
|
||||
* ~~~
|
||||
* 0: 中
|
||||
* 3: 华
|
||||
* 6: V
|
||||
* 7: i
|
||||
* 8: ệ
|
||||
* 11: t
|
||||
* 12:
|
||||
* 13: N
|
||||
* 14: a
|
||||
* 15: m
|
||||
* ~~~
|
||||
*
|
||||
* # Arguments
|
||||
*
|
||||
* * s - The string
|
||||
* * i - The byte offset of the char to extract
|
||||
*
|
||||
* # Return value
|
||||
*
|
||||
* A record {ch: char, next: uint} containing the char value and the byte
|
||||
* index of the next unicode character.
|
||||
*
|
||||
* # Failure
|
||||
*
|
||||
* If `i` is greater than or equal to the length of the string.
|
||||
* If `i` is not the index of the beginning of a valid UTF-8 character.
|
||||
*/
|
||||
fn char_range_at(&self, i: uint) -> CharRange {
|
||||
let b0 = self[i];
|
||||
let w = utf8_char_width(b0);
|
||||
assert!((w != 0u));
|
||||
if w == 1u { return CharRange {ch: b0 as char, next: i + 1u}; }
|
||||
let mut val = 0u;
|
||||
let end = i + w;
|
||||
let mut i = i + 1u;
|
||||
while i < end {
|
||||
let byte = self[i];
|
||||
assert_eq!(byte & 192u8, tag_cont_u8);
|
||||
val <<= 6u;
|
||||
val += (byte & 63u8) as uint;
|
||||
i += 1u;
|
||||
}
|
||||
// Clunky way to get the right bits from the first byte. Uses two shifts,
|
||||
// the first to clip off the marker bits at the left of the byte, and then
|
||||
// a second (as uint) to get it to the right position.
|
||||
val += ((b0 << ((w + 1u) as u8)) as uint) << ((w - 1u) * 6u - w - 1u);
|
||||
return CharRange {ch: val as char, next: i};
|
||||
}
|
||||
|
||||
/// Plucks the character starting at the `i`th byte of a string
|
||||
#[inline]
|
||||
fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
|
||||
|
||||
/**
|
||||
* Given a byte position and a str, return the previous char and its position.
|
||||
*
|
||||
* This function can be used to iterate over a unicode string in reverse.
|
||||
*
|
||||
* Returns 0 for next index if called on start index 0.
|
||||
*/
|
||||
fn char_range_at_reverse(&self, start: uint) -> CharRange {
|
||||
let mut prev = start;
|
||||
|
||||
// while there is a previous byte == 10......
|
||||
while prev > 0u && self[prev - 1u] & 192u8 == tag_cont_u8 {
|
||||
prev -= 1u;
|
||||
}
|
||||
|
||||
// now refer to the initial byte of previous char
|
||||
if prev > 0u {
|
||||
prev -= 1u;
|
||||
} else {
|
||||
prev = 0u;
|
||||
}
|
||||
|
||||
|
||||
let ch = self.char_at(prev);
|
||||
return CharRange {ch:ch, next:prev};
|
||||
}
|
||||
|
||||
/// Plucks the character ending at the `i`th byte of a string
|
||||
#[inline]
|
||||
fn char_at_reverse(&self, i: uint) -> char {
|
||||
char_at_reverse(*self, i)
|
||||
self.char_range_at_reverse(i).ch
|
||||
}
|
||||
|
||||
fn to_bytes(&self) -> ~[u8] { to_bytes(*self) }
|
||||
@ -3182,7 +3177,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_char_range_at_reverse_underflow() {
|
||||
assert_eq!(char_range_at_reverse("abc", 0).next, 0);
|
||||
assert_eq!("abc".char_range_at_reverse(0).next, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -180,7 +180,7 @@ pub fn bump(rdr: &mut StringReader) {
|
||||
if current_byte_offset < (*rdr.src).len() {
|
||||
assert!(rdr.curr != -1 as char);
|
||||
let last_char = rdr.curr;
|
||||
let next = str::char_range_at(*rdr.src, current_byte_offset);
|
||||
let next = rdr.src.char_range_at(current_byte_offset);
|
||||
let byte_offset_diff = next.next - current_byte_offset;
|
||||
rdr.pos = rdr.pos + BytePos(byte_offset_diff);
|
||||
rdr.curr = next.ch;
|
||||
@ -204,7 +204,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool {
|
||||
pub fn nextch(rdr: @mut StringReader) -> char {
|
||||
let offset = byte_offset(rdr, rdr.pos).to_uint();
|
||||
if offset < (*rdr.src).len() {
|
||||
return str::char_at(*rdr.src, offset);
|
||||
return rdr.src.char_at(offset);
|
||||
} else { return -1 as char; }
|
||||
}
|
||||
|
||||
|
@ -22,8 +22,8 @@ pub fn main() {
|
||||
assert!(str::char_len(s) == 4u);
|
||||
assert!(str::to_chars(s).len() == 4u);
|
||||
assert!(str::from_chars(str::to_chars(s)) == s);
|
||||
assert!(str::char_at(s, 0u) == 'e');
|
||||
assert!(str::char_at(s, 1u) == 'é');
|
||||
assert!(s.char_at(0u) == 'e');
|
||||
assert!(s.char_at(1u) == 'é');
|
||||
|
||||
assert!((str::is_utf8(str::to_bytes(s))));
|
||||
assert!((!str::is_utf8(~[0x80_u8])));
|
||||
|
Loading…
x
Reference in New Issue
Block a user