From 2ba44e24d5fccfc6fe7c28a742d18e03b6acafae Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Sun, 12 Feb 2012 01:32:09 -0800
Subject: [PATCH] (core::str) rename byte_len_range -> substr_len_bytes and
 rename char_len_range -> substr_len_chars

---
 src/comp/syntax/codemap.rs |  2 +-
 src/libcore/str.rs         | 92 +++++++++++++++++++++-----------------
 src/libstd/rope.rs         | 10 ++---
 3 files changed, 56 insertions(+), 48 deletions(-)

diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs
index cb2590e1951..d1b60916e19 100644
--- a/src/comp/syntax/codemap.rs
+++ b/src/comp/syntax/codemap.rs
@@ -135,7 +135,7 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)
     let {fm,line} = lookup_line(cm,chpos,lookup);
     let line_offset = fm.lines[line].byte - fm.start_pos.byte;
     let col = chpos - fm.lines[line].ch;
-    let col_offset = str::byte_len_range(*fm.src, line_offset, col);
+    let col_offset = str::substr_len_bytes(*fm.src, line_offset, col);
     ret {fm: fm, pos: line_offset + col_offset};
 }
 
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index c9f42914032..522ae89d395 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -85,8 +85,8 @@ export
    // Misc
    // FIXME: perhaps some more of this section shouldn't be exported?
    is_utf8,
-   char_len_range,
-   byte_len_range,
+   substr_len_bytes,
+   substr_len_chars,
    utf8_char_width,
    char_range_at,
    char_at,
@@ -271,7 +271,7 @@ Failure:
 If the string does not contain any characters.
 */
 fn pop_char(&s: str) -> char unsafe {
-    let end = byte_len(s);
+    let end = len_bytes(s);
     let {ch:ch, prev:end} = char_range_at_reverse(s, end);
     s = unsafe::slice_bytes(s, 0u, end);
     ret ch;
@@ -288,7 +288,7 @@ If the string does not contain any characters.
 */
 fn shift_char(&s: str) -> char unsafe {
     let r = char_range_at(s, 0u);
-    s = unsafe::slice_bytes(s, r.next, byte_len(s));
+    s = unsafe::slice_bytes(s, r.next, len_bytes(s));
     ret r.ch;
 }
 
@@ -367,7 +367,7 @@ Convert a string to a vector of characters
 fn chars(s: str) -> [char] {
     let buf: [char] = [];
     let i = 0u;
-    let len = byte_len(s);
+    let len = len_bytes(s);
     while i < len {
         let cur = char_range_at(s, i);
         buf += [cur.ch];
@@ -443,7 +443,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe {
     assert u8::is_ascii(sep);
 
     let vv = [];
-    let start = 0u, current = 0u, len = byte_len(ss);
+    let start = 0u, current = 0u, len = len_bytes(ss);
     let splits_done = 0u;
 
     while splits_done < count && current < len {
@@ -550,7 +550,7 @@ up to `count` times
 fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe {
 
    let vv = [];
-   let start = 0u, current = 0u, len = byte_len(ss);
+   let start = 0u, current = 0u, len = len_bytes(ss);
    let splits_done = 0u;
 
    while splits_done < count && current < len {
@@ -656,10 +656,11 @@ Returns:
 The original string with all occurances of `from` replaced with `to`
 */
 fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe {
-    if byte_len(s) == 0u {
+    if len_bytes(s) == 0u {
         ret "";
     } else if starts_with(s, from) {
-        ret to + replace(unsafe::slice_bytes(s, byte_len(from), byte_len(s)),
+        ret to + replace(
+                     unsafe::slice_bytes(s, len_bytes(from), len_bytes(s)),
                                        from, to);
     } else {
         let idx = find(s, from);
@@ -726,7 +727,7 @@ Return true if a predicate matches all characters or
 if the string contains no characters
 */
 fn all(s: str, it: fn(char) -> bool) -> bool{
-    ret substr_all(s, 0u, byte_len(s), it);
+    ret substr_all(s, 0u, len_bytes(s), it);
 }
 
 /*
@@ -746,7 +747,7 @@ Apply a function to each character
 */
 fn map(ss: str, ff: fn(char) -> char) -> str {
     let result = "";
-    reserve(result, byte_len(ss));
+    reserve(result, len_bytes(ss));
 
     chars_iter(ss, {|cc|
         str::push_char(result, ff(cc));
@@ -762,7 +763,7 @@ Iterate over the bytes in a string
 */
 fn bytes_iter(ss: str, it: fn(u8)) {
     let pos = 0u;
-    let len = byte_len(ss);
+    let len = len_bytes(ss);
 
     while (pos < len) {
         it(ss[pos]);
@@ -776,7 +777,7 @@ Function: chars_iter
 Iterate over the characters in a string
 */
 fn chars_iter(s: str, it: fn(char)) {
-    let pos = 0u, len = byte_len(s);
+    let pos = 0u, len = len_bytes(s);
     while (pos < len) {
         let {ch, next} = char_range_at(s, pos);
         pos = next;
@@ -833,7 +834,7 @@ Section: Searching
 fn index(ss: str, cc: char) -> option<uint> {
     let bii = 0u;
     let cii = 0u;
-    let len = byte_len(ss);
+    let len = len_bytes(ss);
     while bii < len {
         let {ch, next} = char_range_at(ss, bii);
 
@@ -855,7 +856,7 @@ fn index(ss: str, cc: char) -> option<uint> {
 // Returns the index of the first matching char
 // (as option some/none)
 fn rindex(ss: str, cc: char) -> option<uint> {
-    let bii = byte_len(ss);
+    let bii = len_bytes(ss);
     let cii = len(ss);
     while bii > 0u {
         let {ch, prev} = char_range_at_reverse(ss, bii);
@@ -890,8 +891,8 @@ The index of the first occurance of `needle`, or -1 if not found.
 FIXME: UTF-8
 */
 fn find(haystack: str, needle: str) -> int {
-    let haystack_len: int = byte_len(haystack) as int;
-    let needle_len: int = byte_len(needle) as int;
+    let haystack_len: int = len_bytes(haystack) as int;
+    let needle_len: int = len_bytes(needle) as int;
     if needle_len == 0 { ret 0; }
     fn match_at(haystack: str, needle: str, i: int) -> bool {
         let j: int = i;
@@ -931,8 +932,8 @@ haystack - The string to look in
 needle - The string to look for
 */
 fn starts_with(haystack: str, needle: str) -> bool {
-    let haystack_len: uint = byte_len(haystack);
-    let needle_len: uint = byte_len(needle);
+    let haystack_len: uint = len_bytes(haystack);
+    let needle_len: uint = len_bytes(needle);
     if needle_len == 0u { ret true; }
     if needle_len > haystack_len { ret false; }
     ret eq(substr(haystack, 0u, needle_len), needle);
@@ -969,7 +970,7 @@ Function: is_ascii
 Determines if a string contains only ASCII characters
 */
 fn is_ascii(s: str) -> bool {
-    let i: uint = byte_len(s);
+    let i: uint = len_bytes(s);
     while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } }
     ret true;
 }
@@ -1019,7 +1020,7 @@ pure fn byte_len(s: str) -> uint unsafe { len_bytes(s) }
 // String length or size in characters.
 // (Synonyms: len_chars, char_len)
 fn len(s: str) -> uint {
-    char_len_range(s, 0u, byte_len(s))
+    substr_len_chars(s, 0u, len_bytes(s))
 }
 
 fn len_chars(s: str) -> uint { len(s) }
@@ -1054,7 +1055,7 @@ fn is_utf8(v: [u8]) -> bool {
 }
 
 /*
-Function: char_len_range
+Function: substr_len_chars
 
 As char_len but for a slice of a string
 
@@ -1071,10 +1072,8 @@ Safety note:
 - This function does not check whether the substring is valid.
 - This function fails if `byte_offset` or `byte_len` do not
  represent valid positions inside `s`
-
-FIXME: rename to 'substr_len_chars'
 */
-fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
+fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint {
     let i         = byte_start;
     let byte_stop = i + byte_len;
     let len   = 0u;
@@ -1088,7 +1087,7 @@ fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
 }
 
 /*
-Function: byte_len_range
+Function: substr_len_bytes
 
 As byte_len but for a substring
 
@@ -1108,7 +1107,7 @@ valid positions in `s`
 
 FIXME: rename to 'substr_len_bytes'
 */
-fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint {
+fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint {
     let i = byte_offset;
     let chars = 0u;
     while chars < char_len {
@@ -1149,7 +1148,7 @@ This function can be used to iterate over the unicode characters of a string.
 Example:
 > let s = "中华Việt Nam";
 > let i = 0u;
-> while i < str::byte_len(s) {
+> while i < str::len_bytes(s) {
 >    let {ch, next} = str::char_range_at(s, i);
 >    std::io::println(#fmt("%u: %c",i,ch));
 >    i = next;
@@ -1391,7 +1390,7 @@ mod unsafe {
    unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe {
        // FIXME: Typestate precondition
        assert (begin <= end);
-       assert (end <= byte_len(s));
+       assert (end <= len_bytes(s));
 
        let v = as_bytes(s) { |v| vec::slice(v, begin, end) };
        v += [0u8];
@@ -1408,7 +1407,7 @@ mod unsafe {
    unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint)
        : uint::le(begin, end) -> str {
        // would need some magic to make this a precondition
-       assert (end <= byte_len(s));
+       assert (end <= len_bytes(s));
        ret slice_bytes(s, begin, end);
    }
 
@@ -1430,7 +1429,7 @@ mod unsafe {
    //
    // Removes the last byte from a string and returns it.  (Not UTF-8 safe).
    unsafe fn pop_byte(&s: str) -> u8 unsafe {
-       let len = byte_len(s);
+       let len = len_bytes(s);
        assert (len > 0u);
        let b = s[len - 1u];
        s = unsafe::slice_bytes(s, 0u, len - 1u);
@@ -1441,7 +1440,7 @@ mod unsafe {
    //
    // Removes the first byte from a string and returns it. (Not UTF-8 safe).
    unsafe fn shift_byte(&s: str) -> u8 unsafe {
-       let len = byte_len(s);
+       let len = len_bytes(s);
        assert (len > 0u);
        let b = s[0];
        s = unsafe::slice_bytes(s, 1u, len);
@@ -1470,14 +1469,23 @@ mod tests {
     }
 
     #[test]
-    fn test_bytes_len() {
-        assert (byte_len("") == 0u);
-        assert (byte_len("hello world") == 11u);
-        assert (byte_len("\x63") == 1u);
-        assert (byte_len("\xa2") == 2u);
-        assert (byte_len("\u03c0") == 2u);
-        assert (byte_len("\u2620") == 3u);
-        assert (byte_len("\U0001d11e") == 4u);
+    fn test_len() {
+        assert (len_bytes("") == 0u);
+        assert (len_bytes("hello world") == 11u);
+        assert (len_bytes("\x63") == 1u);
+        assert (len_bytes("\xa2") == 2u);
+        assert (len_bytes("\u03c0") == 2u);
+        assert (len_bytes("\u2620") == 3u);
+        assert (len_bytes("\U0001d11e") == 4u);
+
+        assert (len("") == 0u);
+        assert (len("hello world") == 11u);
+        assert (len("\x63") == 1u);
+        assert (len("\xa2") == 1u);
+        assert (len("\u03c0") == 1u);
+        assert (len("\u2620") == 1u);
+        assert (len("\U0001d11e") == 1u);
+        assert (len("ประเทศไทย中华Việt Nam") == 19u);
     }
 
     #[test]
@@ -1712,7 +1720,7 @@ mod tests {
     #[test]
     fn test_substr() {
         fn t(a: str, b: str, start: int) {
-            assert (eq(substr(a, start as uint, byte_len(b)), b));
+            assert (eq(substr(a, start as uint, len_bytes(b)), b));
         }
         t("hello", "llo", 2);
         t("hello", "el", 1);
@@ -2002,7 +2010,7 @@ mod tests {
         let v: [u8] = bytes(s1);
         let s2: str = from_bytes(v);
         let i: uint = 0u;
-        let n1: uint = byte_len(s1);
+        let n1: uint = len_bytes(s1);
         let n2: uint = vec::len::<u8>(v);
         assert (n1 == n2);
         while i < n1 {
diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs
index c329cdd3494..07ee837a9b9 100644
--- a/src/libstd/rope.rs
+++ b/src/libstd/rope.rs
@@ -743,7 +743,7 @@ mod node {
      */
     fn of_substr(str: @str, byte_start: uint, byte_len: uint) -> @node {
         ret of_substr_unsafer(str, byte_start, byte_len,
-                  str::char_len_range(*str, byte_start, byte_len));
+                  str::substr_len_chars(*str, byte_start, byte_len));
     }
 
     /*
@@ -795,7 +795,7 @@ mod node {
                     if i == 0u  { first_leaf_char_len }
                     else { hint_max_leaf_char_len };
                 let chunk_byte_len =
-                    str::byte_len_range(*str, offset, chunk_char_len);
+                    str::substr_len_bytes(*str, offset, chunk_char_len);
                 nodes[i] = @leaf({
                     byte_offset: offset,
                     byte_len:    chunk_byte_len,
@@ -998,7 +998,7 @@ mod node {
             alt(*node) {
               node::leaf(x) {
                 let char_len =
-                    str::char_len_range(*x.content, byte_offset, byte_len);
+                    str::substr_len_chars(*x.content, byte_offset, byte_len);
                 ret @leaf({byte_offset: byte_offset,
                                 byte_len:    byte_len,
                                 char_len:    char_len,
@@ -1059,9 +1059,9 @@ mod node {
                     ret node;
                 }
                 let byte_offset =
-                    str::byte_len_range(*x.content, 0u, char_offset);
+                    str::substr_len_bytes(*x.content, 0u, char_offset);
                 let byte_len    =
-                    str::byte_len_range(*x.content, byte_offset, char_len);
+                    str::substr_len_bytes(*x.content, byte_offset, char_len);
                 ret @leaf({byte_offset: byte_offset,
                            byte_len:    byte_len,
                            char_len:    char_len,