From 2b0396c34adc95efc0451536554a6f7c928c1e61 Mon Sep 17 00:00:00 2001
From: Kevin Cantu <me@kevincantu.org>
Date: Mon, 6 Feb 2012 23:06:21 -0800
Subject: [PATCH] core: make str::substr use char positions (and replace other
 uses)

---
 src/comp/back/link.rs  |  9 +++++----
 src/comp/util/ppaux.rs |  4 ++--
 src/libcore/extfmt.rs  | 18 +++++++++---------
 src/libcore/str.rs     | 38 ++++++++++++++++++++------------------
 src/libstd/fs.rs       |  4 ++--
 src/libstd/rope.rs     |  7 ++++---
 src/libstd/sha1.rs     |  5 +++--
 7 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs
index 68810fcd81b..714ed3c865a 100644
--- a/src/comp/back/link.rs
+++ b/src/comp/back/link.rs
@@ -113,12 +113,13 @@ mod write {
 
     // Decides what to call an intermediate file, given the name of the output
     // and the extension to use.
-    fn mk_intermediate_name(output_path: str, extension: str) -> str {
+    fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe {
         let dot_pos = str::index(output_path, '.' as u8);
         let stem;
         if dot_pos < 0 {
             stem = output_path;
-        } else { stem = str::substr(output_path, 0u, dot_pos as uint); }
+        } else { stem = str::unsafe::slice_bytes(output_path, 0u,
+                                                 dot_pos as uint); }
         ret stem + "." + extension;
     }
     fn run_passes(sess: session, llmod: ModuleRef, output: str) {
@@ -480,8 +481,8 @@ fn build_link_meta(sess: session, c: ast::crate, output: str,
     ret {name: name, vers: vers, extras_hash: extras_hash};
 }
 
-fn truncated_sha1_result(sha: sha1) -> str {
-    ret str::substr(sha.result_str(), 0u, 16u);
+fn truncated_sha1_result(sha: sha1) -> str unsafe {
+    ret str::unsafe::slice_bytes(sha.result_str(), 0u, 16u);
 }
 
 
diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs
index 3b2cf157e32..db7d6de5f8b 100644
--- a/src/comp/util/ppaux.rs
+++ b/src/comp/util/ppaux.rs
@@ -116,9 +116,9 @@ fn ty_to_str(cx: ctxt, typ: t) -> str {
     }
 }
 
-fn ty_to_short_str(cx: ctxt, typ: t) -> str {
+fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe {
     let s = encoder::encoded_ty(cx, typ);
-    if str::byte_len(s) >= 32u { s = str::substr(s, 0u, 32u); }
+    if str::byte_len(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); }
     ret s;
 }
 
diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs
index 534f2a492d2..f792c0682fd 100644
--- a/src/libcore/extfmt.rs
+++ b/src/libcore/extfmt.rs
@@ -80,7 +80,7 @@ mod ct {
     enum piece { piece_string(str), piece_conv(conv), }
     type error_fn = fn@(str) -> ! ;
 
-    fn parse_fmt_string(s: str, error: error_fn) -> [piece] {
+    fn parse_fmt_string(s: str, error: error_fn) -> [piece] unsafe {
         let pieces: [piece] = [];
         let lim = str::byte_len(s);
         let buf = "";
@@ -93,13 +93,13 @@ mod ct {
         }
         let i = 0u;
         while i < lim {
-            let curr = str::substr(s, i, 1u);
+            let curr = str::unsafe::slice_bytes(s, i, i+1u);
             if str::eq(curr, "%") {
                 i += 1u;
                 if i >= lim {
                     error("unterminated conversion at end of string");
                 }
-                let curr2 = str::substr(s, i, 1u);
+                let curr2 = str::unsafe::slice_bytes(s, i, i+1u);
                 if str::eq(curr2, "%") {
                     buf += curr2;
                     i += 1u;
@@ -223,9 +223,9 @@ mod ct {
             } else { {count: count_implied, next: i} };
     }
     fn parse_type(s: str, i: uint, lim: uint, error: error_fn) ->
-       {ty: ty, next: uint} {
+       {ty: ty, next: uint} unsafe {
         if i >= lim { error("missing type in conversion"); }
-        let tstr = str::substr(s, i, 1u);
+        let tstr = str::unsafe::slice_bytes(s, i, i+1u);
         // TODO: Do we really want two signed types here?
         // How important is it to be printf compatible?
         let t =
@@ -317,7 +317,7 @@ mod rt {
     fn conv_char(cv: conv, c: char) -> str {
         ret pad(cv, str::from_char(c), pad_nozero);
     }
-    fn conv_str(cv: conv, s: str) -> str {
+    fn conv_str(cv: conv, s: str) -> str unsafe {
         // For strings, precision is the maximum characters
         // displayed
 
@@ -327,7 +327,7 @@ mod rt {
               count_implied { s }
               count_is(max) {
                 if max as uint < str::char_len(s) {
-                    str::substr(s, 0u, max as uint)
+                    str::unsafe::slice_bytes(s, 0u, max as uint)
                 } else { s }
               }
             };
@@ -391,7 +391,7 @@ mod rt {
         ret str::from_bytes(svec);
     }
     enum pad_mode { pad_signed, pad_unsigned, pad_nozero, }
-    fn pad(cv: conv, s: str, mode: pad_mode) -> str {
+    fn pad(cv: conv, s: str, mode: pad_mode) -> str unsafe {
         let uwidth;
         alt cv.width {
           count_implied { ret s; }
@@ -440,7 +440,7 @@ mod rt {
                 let headstr = str::from_bytes([head]);
                 // FIXME: not UTF-8 safe
                 let bytelen = str::byte_len(s);
-                let numpart = str::substr(s, 1u, bytelen - 1u);
+                let numpart = str::unsafe::slice_bytes(s, 1u, bytelen);
                 ret headstr + padstr + numpart;
             }
         }
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 16cc0fddf2b..009fcec0388 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -249,12 +249,12 @@ Failure:
 
 If the string does not contain any characters.
 */
-fn pop_char(&s: str) -> char {
+fn pop_char(&s: str) -> char unsafe {
     let end = byte_len(s);
     while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; }
     assert (end > 0u);
     let ch = char_at(s, end - 1u);
-    s = substr(s, 0u, end - 1u);
+    s = unsafe::slice_bytes(s, 0u, end - 1u);
     ret ch;
 }
 
@@ -267,9 +267,9 @@ Failure:
 
 If the string does not contain any characters.
 */
-fn shift_char(&s: str) -> char {
+fn shift_char(&s: str) -> char unsafe {
     let r = char_range_at(s, 0u);
-    s = substr(s, r.next, byte_len(s) - r.next);
+    s = unsafe::slice_bytes(s, r.next, byte_len(s));
     ret r.ch;
 }
 
@@ -306,12 +306,13 @@ Function: pop_byte
 Removes the last byte from a string and returns it.
 
 This function is not unicode-safe.
+FIXME: move to unsafe?
 */
-fn pop_byte(&s: str) -> u8 {
+fn pop_byte(&s: str) -> u8 unsafe {
     let len = byte_len(s);
     assert (len > 0u);
     let b = s[len - 1u];
-    s = substr(s, 0u, len - 1u);
+    s = unsafe::slice_bytes(s, 0u, len - 1u);
     ret b;
 }
 
@@ -321,12 +322,13 @@ Function: shift_byte
 Removes the first byte from a string and returns it.
 
 This function is not unicode-safe.
+FIXME: move to unsafe?
 */
-fn shift_byte(&s: str) -> u8 {
+fn shift_byte(&s: str) -> u8 unsafe {
     let len = byte_len(s);
     assert (len > 0u);
     let b = s[0];
-    s = substr(s, 1u, len - 1u);
+    s = unsafe::slice_bytes(s, 1u, len);
     ret b;
 }
 
@@ -413,17 +415,15 @@ fn chars(s: str) -> [char] {
 /*
 Function: substr
 
-Take a substring of another. Returns a string containing `len` bytes
-starting at byte offset `begin`.
-
-FIXME: This function is not unicode-safe.
+Take a substring of another. Returns a string containing `len` chars
+starting at char offset `begin`.
 
 Failure:
 
-If `begin` + `len` is is greater than the byte length of the string
+If `begin` + `len` is is greater than the char length of the string
 */
-fn substr(s: str, begin: uint, len: uint) -> str unsafe {
-    ret unsafe::slice_bytes(s, begin, begin + len);
+fn substr(s: str, begin: uint, len: uint) -> str {
+    ret slice(s, begin, begin + len);
 }
 
 /*
@@ -941,8 +941,8 @@ haystack - The string to look in
 needle - The string to look for
 */
 fn ends_with(haystack: str, needle: str) -> bool {
-    let haystack_len: uint = byte_len(haystack);
-    let needle_len: uint = byte_len(needle);
+    let haystack_len: uint = char_len(haystack);
+    let needle_len: uint = char_len(needle);
     ret if needle_len == 0u {
             true
         } else if needle_len > haystack_len {
@@ -1598,7 +1598,9 @@ mod tests {
         }
         t("hello", "llo", 2);
         t("hello", "el", 1);
-        t("substr should not be a challenge", "not", 14);
+
+        assert "ะเทศไท"
+            == substr("ประเทศไทย中华Việt Nam", 2u, 6u);
     }
 
     #[test]
diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs
index ef0ff7e6b57..239b8768858 100644
--- a/src/libstd/fs.rs
+++ b/src/libstd/fs.rs
@@ -43,13 +43,13 @@ The dirname of "/usr/share" will be "/usr", but the dirname of
 
 If the path is not prefixed with a directory, then "." is returned.
 */
-fn dirname(p: path) -> path {
+fn dirname(p: path) -> path unsafe {
     let i: int = str::rindex(p, os_fs::path_sep as u8);
     if i == -1 {
         i = str::rindex(p, os_fs::alt_path_sep as u8);
         if i == -1 { ret "."; }
     }
-    ret str::substr(p, 0u, i as uint);
+    ret str::unsafe::slice_bytes(p, 0u, i as uint);
 }
 
 /*
diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs
index 28cdbfa3d40..b586f114bce 100644
--- a/src/libstd/rope.rs
+++ b/src/libstd/rope.rs
@@ -1341,11 +1341,12 @@ mod tests {
           node::empty { ret "" }
           node::content(x) {
             let str = @mutable "";
-            fn aux(str: @mutable str, node: @node::node) {
+            fn aux(str: @mutable str, node: @node::node) unsafe {
                 alt(*node) {
                   node::leaf(x) {
-                    *str += str::substr(
-                        *x.content, x.byte_offset, x.byte_len);
+                    *str += str::unsafe::slice_bytes(
+                        *x.content, x.byte_offset,
+                        x.byte_offset + x.byte_len);
                   }
                   node::concat(x) {
                     aux(str, x.left);
diff --git a/src/libstd/sha1.rs b/src/libstd/sha1.rs
index e23737a6c3f..a9911d01f03 100644
--- a/src/libstd/sha1.rs
+++ b/src/libstd/sha1.rs
@@ -291,7 +291,7 @@ fn mk_sha1() -> sha1 {
 mod tests {
 
     #[test]
-    fn test() {
+    fn test() unsafe {
         type test = {input: str, output: [u8]};
 
         fn a_million_letter_a() -> str {
@@ -372,7 +372,8 @@ mod tests {
             let left = len;
             while left > 0u {
                 let take = (left + 1u) / 2u;
-                sh.input_str(str::substr(t.input, len - left, take));
+                sh.input_str(str::unsafe::slice_bytes(t.input, len - left,
+                             take + len - left));
                 left = left - take;
             }
             let out = sh.result();