From ec5a028adac360537c8f37a669eda522bd8c9b6b Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Mon, 10 Jun 2013 21:46:36 +1000
Subject: [PATCH] std: convert str::char_at* to methods.

---
 src/compiletest/runtest.rs      |   6 +-
 src/libextra/getopts.rs         |   8 +-
 src/libextra/rope.rs            |   7 +-
 src/libextra/time.rs            |  10 +-
 src/librustc/middle/lint.rs     |   2 +-
 src/libstd/io.rs                |   2 +-
 src/libstd/str.rs               | 257 ++++++++++++++++----------------
 src/libsyntax/parse/lexer.rs    |   4 +-
 src/test/run-pass/utf8_chars.rs |   4 +-
 9 files changed, 147 insertions(+), 153 deletions(-)

diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs
index d87101ffb85..c9e44a79160 100644
--- a/src/compiletest/runtest.rs
+++ b/src/compiletest/runtest.rs
@@ -429,7 +429,7 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
     if *idx >= haystack.len() {
         return false;
     }
-    let range = str::char_range_at(haystack, *idx);
+    let range = haystack.char_range_at(*idx);
     if range.ch != needle {
         return false;
     }
@@ -440,7 +440,7 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
 fn scan_integer(haystack: &str, idx: &mut uint) -> bool {
     let mut i = *idx;
     while i < haystack.len() {
-        let range = str::char_range_at(haystack, i);
+        let range = haystack.char_range_at(i);
         if range.ch < '0' || '9' < range.ch {
             break;
         }
@@ -460,7 +460,7 @@ fn scan_string(haystack: &str, needle: &str, idx: &mut uint) -> bool {
         if haystack_i >= haystack.len() {
             return false;
         }
-        let range = str::char_range_at(haystack, haystack_i);
+        let range = haystack.char_range_at(haystack_i);
         haystack_i = range.next;
         if !scan_char(needle, range.ch, &mut needle_i) {
             return false;
diff --git a/src/libextra/getopts.rs b/src/libextra/getopts.rs
index 111de53052c..76e921f02f9 100644
--- a/src/libextra/getopts.rs
+++ b/src/libextra/getopts.rs
@@ -112,7 +112,7 @@ pub struct Opt {
 
 fn mkname(nm: &str) -> Name {
   if nm.len() == 1u {
-      Short(str::char_at(nm, 0u))
+      Short(nm.char_at(0u))
   } else {
       Long(nm.to_owned())
   }
@@ -261,7 +261,7 @@ pub fn getopts(args: &[~str], opts: &[Opt]) -> Result {
                 let mut last_valid_opt_id = None;
                 names = ~[];
                 while j < curlen {
-                    let range = str::char_range_at(cur, j);
+                    let range = cur.char_range_at(j);
                     let opt = Short(range.ch);
 
                     /* In a series of potential options (eg. -aheJ), if we
@@ -565,11 +565,11 @@ pub mod groups {
                            hasarg: hasarg,
                            occur: occur}],
 
-           (1,0) => ~[Opt {name: Short(str::char_at(short_name, 0)),
+           (1,0) => ~[Opt {name: Short(short_name.char_at(0)),
                            hasarg: hasarg,
                            occur: occur}],
 
-           (1,_) => ~[Opt {name: Short(str::char_at(short_name, 0)),
+           (1,_) => ~[Opt {name: Short(short_name.char_at(0)),
                            hasarg: hasarg,
                            occur:  occur},
                       Opt {name:   Long((long_name)),
diff --git a/src/libextra/rope.rs b/src/libextra/rope.rs
index de78e0a6eeb..80d80fa0ade 100644
--- a/src/libextra/rope.rs
+++ b/src/libextra/rope.rs
@@ -1132,7 +1132,7 @@ pub mod node {
     pub fn char_at(mut node: @Node, mut pos: uint) -> char {
         loop {
             match *node {
-              Leaf(x) => return str::char_at(*x.content, pos),
+              Leaf(x) => return x.content.char_at(pos),
               Concat(Concat {left, right, _}) => {
                 let left_len = char_len(left);
                 node = if left_len > pos { left }
@@ -1257,8 +1257,7 @@ pub mod node {
                     return None
                 } else {
                     let range =
-                        str::char_range_at(*aleaf.content,
-                                     (*it).leaf_byte_pos + aleaf.byte_offset);
+                        aleaf.content.char_range_at((*it).leaf_byte_pos + aleaf.byte_offset);
                     let ch = range.ch;
                     let next = range.next;
                     (*it).leaf_byte_pos = next - aleaf.byte_offset;
@@ -1345,7 +1344,7 @@ mod tests {
                     equal = false;
                 } break; }
               Some(c) => {
-                let range = str::char_range_at(*sample, string_iter);
+                let range = sample.char_range_at(string_iter);
                 string_iter = range.next;
                 if range.ch != c { equal = false; break; }
               }
diff --git a/src/libextra/time.rs b/src/libextra/time.rs
index fea5cb560ac..caaa2994405 100644
--- a/src/libextra/time.rs
+++ b/src/libextra/time.rs
@@ -296,7 +296,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
 
         let mut i = 0u;
         while i < digits {
-            let range = str::char_range_at(ss, pos);
+            let range = ss.char_range_at(pos);
             pos = range.next;
 
             match range.ch {
@@ -323,7 +323,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
     }
 
     fn parse_char(s: &str, pos: uint, c: char) -> Result<uint, ~str> {
-        let range = str::char_range_at(s, pos);
+        let range = s.char_range_at(pos);
 
         if c == range.ch {
             Ok(range.next)
@@ -600,7 +600,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
                 let mut pos = pos;
                 let len = s.len();
                 while pos < len {
-                    let range = str::char_range_at(s, pos);
+                    let range = s.char_range_at(pos);
                     pos = range.next;
                     if range.ch == ' ' { break; }
                 }
@@ -609,7 +609,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
             }
           }
           'z' => {
-            let range = str::char_range_at(s, pos);
+            let range = s.char_range_at(pos);
 
             if range.ch == '+' || range.ch == '-' {
                 match match_digits(s, range.next, 4u, false) {
@@ -655,7 +655,7 @@ priv fn do_strptime(s: &str, format: &str) -> Result<Tm, ~str> {
         let mut result = Err(~"Invalid time");
 
         while !rdr.eof() && pos < len {
-            let range = str::char_range_at(s, pos);
+            let range = s.char_range_at(pos);
             let ch = range.ch;
             let next = range.next;
 
diff --git a/src/librustc/middle/lint.rs b/src/librustc/middle/lint.rs
index 7462067162d..92147bf4e0f 100644
--- a/src/librustc/middle/lint.rs
+++ b/src/librustc/middle/lint.rs
@@ -842,7 +842,7 @@ fn check_item_non_camel_case_types(cx: &Context, it: @ast::item) {
         let ident = cx.sess.str_of(ident);
         assert!(!ident.is_empty());
         let ident = ident.trim_chars(&['_']);
-        char::is_uppercase(str::char_at(ident, 0)) &&
+        char::is_uppercase(ident.char_at(0)) &&
             !ident.contains_char('_')
     }
 
diff --git a/src/libstd/io.rs b/src/libstd/io.rs
index 8ec3a4cdd81..7f3af79e27c 100644
--- a/src/libstd/io.rs
+++ b/src/libstd/io.rs
@@ -672,7 +672,7 @@ impl<T:Reader> ReaderUtil for T {
                     val <<= 6;
                     val += (next & 63) as uint;
                 }
-                // See str::char_at
+                // See str::StrSlice::char_at
                 val += ((b0 << ((w + 1) as u8)) as uint)
                     << (w - 1) * 6 - w - 1u;
                 chars.push(val as char);
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index 605a11032a1..25d9a63b479 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -370,7 +370,7 @@ Section: Adding to and removing from a string
 pub fn pop_char(s: &mut ~str) -> char {
     let end = s.len();
     assert!(end > 0u);
-    let CharRange {ch, next} = char_range_at_reverse(*s, end);
+    let CharRange {ch, next} = s.char_range_at_reverse(end);
     unsafe { raw::set_len(s, next); }
     return ch;
 }
@@ -383,7 +383,7 @@ pub fn pop_char(s: &mut ~str) -> char {
  * If the string does not contain any characters
  */
 pub fn shift_char(s: &mut ~str) -> char {
-    let CharRange {ch, next} = char_range_at(*s, 0u);
+    let CharRange {ch, next} = s.char_range_at(0u);
     *s = unsafe { raw::slice_bytes_owned(*s, next, s.len()) };
     return ch;
 }
@@ -399,7 +399,7 @@ pub fn shift_char(s: &mut ~str) -> char {
  */
 #[inline]
 pub fn slice_shift_char<'a>(s: &'a str) -> (char, &'a str) {
-    let CharRange {ch, next} = char_range_at(s, 0u);
+    let CharRange {ch, next} = s.char_range_at(0u);
     let next_s = unsafe { raw::slice_bytes(s, next, s.len()) };
     return (ch, next_s);
 }
@@ -532,7 +532,7 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Se
             }
         } else {
             while self.position < l && self.count > 0 {
-                let CharRange {ch, next} = char_range_at(self.string, self.position);
+                let CharRange {ch, next} = self.string.char_range_at(self.position);
 
                 if self.sep.matches(ch) {
                     let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
@@ -1198,7 +1198,7 @@ pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
     assert!(is_char_boundary(s, end));
     let mut (i, len) = (start, 0u);
     while i < end {
-        let next = char_range_at(s, i).next;
+        let next = s.char_range_at(i).next;
         len += 1u;
         i = next;
     }
@@ -1213,7 +1213,7 @@ pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
     let l = s.len();
     while cnt > 0u {
         assert!(end < l);
-        let next = char_range_at(s, end).next;
+        let next = s.char_range_at(end).next;
         cnt -= 1u;
         end = next;
     }
@@ -1233,130 +1233,12 @@ pub fn utf8_char_width(b: u8) -> uint {
     return 6u;
 }
 
-/**
- * Returns false if the index points into the middle of a multi-byte
- * character sequence.
- */
-pub fn is_char_boundary(s: &str, index: uint) -> bool {
-    if index == s.len() { return true; }
-    let b = s[index];
-    return b < 128u8 || b >= 192u8;
-}
-
-/**
- * Pluck a character out of a string and return the index of the next
- * character.
- *
- * This function can be used to iterate over the unicode characters of a
- * string.
- *
- * # Example
- *
- * ~~~ {.rust}
- * let s = "中华Việt Nam";
- * let i = 0u;
- * while i < s.len() {
- *     let CharRange {ch, next} = str::char_range_at(s, i);
- *     std::io::println(fmt!("%u: %c",i,ch));
- *     i = next;
- * }
- * ~~~
- *
- * # Example output
- *
- * ~~~
- * 0: 中
- * 3: 华
- * 6: V
- * 7: i
- * 8: ệ
- * 11: t
- * 12:
- * 13: N
- * 14: a
- * 15: m
- * ~~~
- *
- * # Arguments
- *
- * * s - The string
- * * i - The byte offset of the char to extract
- *
- * # Return value
- *
- * A record {ch: char, next: uint} containing the char value and the byte
- * index of the next unicode character.
- *
- * # Failure
- *
- * If `i` is greater than or equal to the length of the string.
- * If `i` is not the index of the beginning of a valid UTF-8 character.
- */
-pub fn char_range_at(s: &str, i: uint) -> CharRange {
-    let b0 = s[i];
-    let w = utf8_char_width(b0);
-    assert!((w != 0u));
-    if w == 1u { return CharRange {ch: b0 as char, next: i + 1u}; }
-    let mut val = 0u;
-    let end = i + w;
-    let mut i = i + 1u;
-    while i < end {
-        let byte = s[i];
-        assert_eq!(byte & 192u8, tag_cont_u8);
-        val <<= 6u;
-        val += (byte & 63u8) as uint;
-        i += 1u;
-    }
-    // Clunky way to get the right bits from the first byte. Uses two shifts,
-    // the first to clip off the marker bits at the left of the byte, and then
-    // a second (as uint) to get it to the right position.
-    val += ((b0 << ((w + 1u) as u8)) as uint) << ((w - 1u) * 6u - w - 1u);
-    return CharRange {ch: val as char, next: i};
-}
-
-/// Plucks the character starting at the `i`th byte of a string
-pub fn char_at(s: &str, i: uint) -> char {
-    return char_range_at(s, i).ch;
-}
-
 #[allow(missing_doc)]
 pub struct CharRange {
     ch: char,
     next: uint
 }
 
-/**
- * Given a byte position and a str, return the previous char and its position.
- *
- * This function can be used to iterate over a unicode string in reverse.
- *
- * Returns 0 for next index if called on start index 0.
- */
-pub fn char_range_at_reverse(ss: &str, start: uint) -> CharRange {
-    let mut prev = start;
-
-    // while there is a previous byte == 10......
-    while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 {
-        prev -= 1u;
-    }
-
-    // now refer to the initial byte of previous char
-    if prev > 0u {
-        prev -= 1u;
-    } else {
-        prev = 0u;
-    }
-
-
-    let ch = char_at(ss, prev);
-    return CharRange {ch:ch, next:prev};
-}
-
-/// Plucks the character ending at the `i`th byte of a string
-pub fn char_at_reverse(s: &str, i: uint) -> char {
-    char_range_at_reverse(s, i).ch
-}
-
 // UTF-8 tags and ranges
 static tag_cont_u8: u8 = 128u8;
 static tag_cont: uint = 128u;
@@ -1776,7 +1658,10 @@ pub trait StrSlice<'self> {
     fn trim_right_chars(&self, chars_to_trim: &[char]) -> &'self str;
     fn to_owned(&self) -> ~str;
     fn to_managed(&self) -> @str;
+    fn is_char_boundary(s: &str, index: uint) -> bool;
+    fn char_range_at(&self, start: uint) -> CharRange;
     fn char_at(&self, i: uint) -> char;
+    fn char_range_at_reverse(&self, start: uint) -> CharRange;
     fn char_at_reverse(&self, i: uint) -> char;
     fn to_bytes(&self) -> ~[u8];
 
@@ -1967,7 +1852,7 @@ impl<'self> StrSlice<'self> for &'self str {
         match self.rfind(|c| !char::is_whitespace(c)) {
             None => "",
             Some(last) => {
-                let next = char_range_at(*self, last).next;
+                let next = self.char_range_at(last).next;
                 unsafe { raw::slice_bytes(*self, 0u, next) }
             }
         }
@@ -2019,8 +1904,8 @@ impl<'self> StrSlice<'self> for &'self str {
         match self.rfind(|c| !chars_to_trim.contains(&c)) {
             None => "",
             Some(last) => {
-                let next = char_range_at(self, last).next;
-                unsafe { raw::slice_bytes(self, 0u, next) }
+                let next = self.char_range_at(last).next;
+                unsafe { raw::slice_bytes(*self, 0u, next) }
             }
         }
     }
@@ -2037,12 +1922,122 @@ impl<'self> StrSlice<'self> for &'self str {
         unsafe { ::cast::transmute(v) }
     }
 
-    #[inline]
-    fn char_at(&self, i: uint) -> char { char_at(*self, i) }
+    /**
+     * Returns false if the index points into the middle of a multi-byte
+     * character sequence.
+     */
+    fn is_char_boundary(&self, index: uint) -> bool {
+        if index == self.len() { return true; }
+        let b = self[index];
+        return b < 128u8 || b >= 192u8;
+    }
 
+    /**
+     * Pluck a character out of a string and return the index of the next
+     * character.
+     *
+     * This function can be used to iterate over the unicode characters of a
+     * string.
+     *
+     * # Example
+     *
+     * ~~~ {.rust}
+     * let s = "中华Việt Nam";
+     * let i = 0u;
+     * while i < s.len() {
+     *     let CharRange {ch, next} = s.char_range_at(i);
+     *     std::io::println(fmt!("%u: %c",i,ch));
+     *     i = next;
+     * }
+     * ~~~
+     *
+     * # Example output
+     *
+     * ~~~
+     * 0: 中
+     * 3: 华
+     * 6: V
+     * 7: i
+     * 8: ệ
+     * 11: t
+     * 12:
+     * 13: N
+     * 14: a
+     * 15: m
+     * ~~~
+     *
+     * # Arguments
+     *
+     * * s - The string
+     * * i - The byte offset of the char to extract
+     *
+     * # Return value
+     *
+     * A record {ch: char, next: uint} containing the char value and the byte
+     * index of the next unicode character.
+     *
+     * # Failure
+     *
+     * If `i` is greater than or equal to the length of the string.
+     * If `i` is not the index of the beginning of a valid UTF-8 character.
+     */
+    fn char_range_at(&self, i: uint) -> CharRange {
+        let b0 = self[i];
+        let w = utf8_char_width(b0);
+        assert!((w != 0u));
+        if w == 1u { return CharRange {ch: b0 as char, next: i + 1u}; }
+        let mut val = 0u;
+        let end = i + w;
+        let mut i = i + 1u;
+        while i < end {
+            let byte = self[i];
+            assert_eq!(byte & 192u8, tag_cont_u8);
+            val <<= 6u;
+            val += (byte & 63u8) as uint;
+            i += 1u;
+        }
+        // Clunky way to get the right bits from the first byte. Uses two shifts,
+        // the first to clip off the marker bits at the left of the byte, and then
+        // a second (as uint) to get it to the right position.
+        val += ((b0 << ((w + 1u) as u8)) as uint) << ((w - 1u) * 6u - w - 1u);
+        return CharRange {ch: val as char, next: i};
+    }
+
+    /// Plucks the character starting at the `i`th byte of a string
+    #[inline]
+    fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
+
+    /**
+     * Given a byte position and a str, return the previous char and its position.
+     *
+     * This function can be used to iterate over a unicode string in reverse.
+     *
+     * Returns 0 for next index if called on start index 0.
+     */
+    fn char_range_at_reverse(&self, start: uint) -> CharRange {
+        let mut prev = start;
+
+        // while there is a previous byte == 10......
+        while prev > 0u && self[prev - 1u] & 192u8 == tag_cont_u8 {
+            prev -= 1u;
+        }
+
+        // now refer to the initial byte of previous char
+        if prev > 0u {
+            prev -= 1u;
+        } else {
+            prev = 0u;
+        }
+
+
+        let ch = self.char_at(prev);
+        return CharRange {ch:ch, next:prev};
+    }
+
+    /// Plucks the character ending at the `i`th byte of a string
     #[inline]
     fn char_at_reverse(&self, i: uint) -> char {
-        char_at_reverse(*self, i)
+        self.char_range_at_reverse(i).ch
     }
 
     fn to_bytes(&self) -> ~[u8] { to_bytes(*self) }
@@ -3182,7 +3177,7 @@ mod tests {
 
     #[test]
     fn test_char_range_at_reverse_underflow() {
-        assert_eq!(char_range_at_reverse("abc", 0).next, 0);
+        assert_eq!("abc".char_range_at_reverse(0).next, 0);
     }
 
     #[test]
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 809a222352f..5f9bc4ca6f6 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -180,7 +180,7 @@ pub fn bump(rdr: &mut StringReader) {
     if current_byte_offset < (*rdr.src).len() {
         assert!(rdr.curr != -1 as char);
         let last_char = rdr.curr;
-        let next = str::char_range_at(*rdr.src, current_byte_offset);
+        let next = rdr.src.char_range_at(current_byte_offset);
         let byte_offset_diff = next.next - current_byte_offset;
         rdr.pos = rdr.pos + BytePos(byte_offset_diff);
         rdr.curr = next.ch;
@@ -204,7 +204,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool {
 pub fn nextch(rdr: @mut StringReader) -> char {
     let offset = byte_offset(rdr, rdr.pos).to_uint();
     if offset < (*rdr.src).len() {
-        return str::char_at(*rdr.src, offset);
+        return rdr.src.char_at(offset);
     } else { return -1 as char; }
 }
 
diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs
index 94990d649d8..b7ce617fe50 100644
--- a/src/test/run-pass/utf8_chars.rs
+++ b/src/test/run-pass/utf8_chars.rs
@@ -22,8 +22,8 @@ pub fn main() {
     assert!(str::char_len(s) == 4u);
     assert!(str::to_chars(s).len() == 4u);
     assert!(str::from_chars(str::to_chars(s)) == s);
-    assert!(str::char_at(s, 0u) == 'e');
-    assert!(str::char_at(s, 1u) == 'é');
+    assert!(s.char_at(0u) == 'e');
+    assert!(s.char_at(1u) == 'é');
 
     assert!((str::is_utf8(str::to_bytes(s))));
     assert!((!str::is_utf8(~[0x80_u8])));