From 2b0396c34adc95efc0451536554a6f7c928c1e61 Mon Sep 17 00:00:00 2001 From: Kevin Cantu <me@kevincantu.org> Date: Mon, 6 Feb 2012 23:06:21 -0800 Subject: [PATCH] core: make str::substr use char positions (and replace other uses) --- src/comp/back/link.rs | 9 +++++---- src/comp/util/ppaux.rs | 4 ++-- src/libcore/extfmt.rs | 18 +++++++++--------- src/libcore/str.rs | 38 ++++++++++++++++++++------------------ src/libstd/fs.rs | 4 ++-- src/libstd/rope.rs | 7 ++++--- src/libstd/sha1.rs | 5 +++-- 7 files changed, 45 insertions(+), 40 deletions(-) diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index 68810fcd81b..714ed3c865a 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -113,12 +113,13 @@ mod write { // Decides what to call an intermediate file, given the name of the output // and the extension to use. - fn mk_intermediate_name(output_path: str, extension: str) -> str { + fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe { let dot_pos = str::index(output_path, '.' as u8); let stem; if dot_pos < 0 { stem = output_path; - } else { stem = str::substr(output_path, 0u, dot_pos as uint); } + } else { stem = str::unsafe::slice_bytes(output_path, 0u, + dot_pos as uint); } ret stem + "." + extension; } fn run_passes(sess: session, llmod: ModuleRef, output: str) { @@ -480,8 +481,8 @@ fn build_link_meta(sess: session, c: ast::crate, output: str, ret {name: name, vers: vers, extras_hash: extras_hash}; } -fn truncated_sha1_result(sha: sha1) -> str { - ret str::substr(sha.result_str(), 0u, 16u); +fn truncated_sha1_result(sha: sha1) -> str unsafe { + ret str::unsafe::slice_bytes(sha.result_str(), 0u, 16u); } diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs index 3b2cf157e32..db7d6de5f8b 100644 --- a/src/comp/util/ppaux.rs +++ b/src/comp/util/ppaux.rs @@ -116,9 +116,9 @@ fn ty_to_str(cx: ctxt, typ: t) -> str { } } -fn ty_to_short_str(cx: ctxt, typ: t) -> str { +fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe { let s = encoder::encoded_ty(cx, typ); - if str::byte_len(s) >= 32u { s = str::substr(s, 0u, 32u); } + if str::byte_len(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); } ret s; } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 534f2a492d2..f792c0682fd 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -80,7 +80,7 @@ mod ct { enum piece { piece_string(str), piece_conv(conv), } type error_fn = fn@(str) -> ! ; - fn parse_fmt_string(s: str, error: error_fn) -> [piece] { + fn parse_fmt_string(s: str, error: error_fn) -> [piece] unsafe { let pieces: [piece] = []; let lim = str::byte_len(s); let buf = ""; @@ -93,13 +93,13 @@ mod ct { } let i = 0u; while i < lim { - let curr = str::substr(s, i, 1u); + let curr = str::unsafe::slice_bytes(s, i, i+1u); if str::eq(curr, "%") { i += 1u; if i >= lim { error("unterminated conversion at end of string"); } - let curr2 = str::substr(s, i, 1u); + let curr2 = str::unsafe::slice_bytes(s, i, i+1u); if str::eq(curr2, "%") { buf += curr2; i += 1u; @@ -223,9 +223,9 @@ mod ct { } else { {count: count_implied, next: i} }; } fn parse_type(s: str, i: uint, lim: uint, error: error_fn) -> - {ty: ty, next: uint} { + {ty: ty, next: uint} unsafe { if i >= lim { error("missing type in conversion"); } - let tstr = str::substr(s, i, 1u); + let tstr = str::unsafe::slice_bytes(s, i, i+1u); // TODO: Do we really want two signed types here? // How important is it to be printf compatible? let t = @@ -317,7 +317,7 @@ mod rt { fn conv_char(cv: conv, c: char) -> str { ret pad(cv, str::from_char(c), pad_nozero); } - fn conv_str(cv: conv, s: str) -> str { + fn conv_str(cv: conv, s: str) -> str unsafe { // For strings, precision is the maximum characters // displayed @@ -327,7 +327,7 @@ mod rt { count_implied { s } count_is(max) { if max as uint < str::char_len(s) { - str::substr(s, 0u, max as uint) + str::unsafe::slice_bytes(s, 0u, max as uint) } else { s } } }; @@ -391,7 +391,7 @@ mod rt { ret str::from_bytes(svec); } enum pad_mode { pad_signed, pad_unsigned, pad_nozero, } - fn pad(cv: conv, s: str, mode: pad_mode) -> str { + fn pad(cv: conv, s: str, mode: pad_mode) -> str unsafe { let uwidth; alt cv.width { count_implied { ret s; } @@ -440,7 +440,7 @@ mod rt { let headstr = str::from_bytes([head]); // FIXME: not UTF-8 safe let bytelen = str::byte_len(s); - let numpart = str::substr(s, 1u, bytelen - 1u); + let numpart = str::unsafe::slice_bytes(s, 1u, bytelen); ret headstr + padstr + numpart; } } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 16cc0fddf2b..009fcec0388 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -249,12 +249,12 @@ Failure: If the string does not contain any characters. */ -fn pop_char(&s: str) -> char { +fn pop_char(&s: str) -> char unsafe { let end = byte_len(s); while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; } assert (end > 0u); let ch = char_at(s, end - 1u); - s = substr(s, 0u, end - 1u); + s = unsafe::slice_bytes(s, 0u, end - 1u); ret ch; } @@ -267,9 +267,9 @@ Failure: If the string does not contain any characters. */ -fn shift_char(&s: str) -> char { +fn shift_char(&s: str) -> char unsafe { let r = char_range_at(s, 0u); - s = substr(s, r.next, byte_len(s) - r.next); + s = unsafe::slice_bytes(s, r.next, byte_len(s)); ret r.ch; } @@ -306,12 +306,13 @@ Function: pop_byte Removes the last byte from a string and returns it. This function is not unicode-safe. +FIXME: move to unsafe? */ -fn pop_byte(&s: str) -> u8 { +fn pop_byte(&s: str) -> u8 unsafe { let len = byte_len(s); assert (len > 0u); let b = s[len - 1u]; - s = substr(s, 0u, len - 1u); + s = unsafe::slice_bytes(s, 0u, len - 1u); ret b; } @@ -321,12 +322,13 @@ Function: shift_byte Removes the first byte from a string and returns it. This function is not unicode-safe. +FIXME: move to unsafe? */ -fn shift_byte(&s: str) -> u8 { +fn shift_byte(&s: str) -> u8 unsafe { let len = byte_len(s); assert (len > 0u); let b = s[0]; - s = substr(s, 1u, len - 1u); + s = unsafe::slice_bytes(s, 1u, len); ret b; } @@ -413,17 +415,15 @@ fn chars(s: str) -> [char] { /* Function: substr -Take a substring of another. Returns a string containing `len` bytes -starting at byte offset `begin`. - -FIXME: This function is not unicode-safe. +Take a substring of another. Returns a string containing `len` chars +starting at char offset `begin`. Failure: -If `begin` + `len` is is greater than the byte length of the string +If `begin` + `len` is is greater than the char length of the string */ -fn substr(s: str, begin: uint, len: uint) -> str unsafe { - ret unsafe::slice_bytes(s, begin, begin + len); +fn substr(s: str, begin: uint, len: uint) -> str { + ret slice(s, begin, begin + len); } /* @@ -941,8 +941,8 @@ haystack - The string to look in needle - The string to look for */ fn ends_with(haystack: str, needle: str) -> bool { - let haystack_len: uint = byte_len(haystack); - let needle_len: uint = byte_len(needle); + let haystack_len: uint = char_len(haystack); + let needle_len: uint = char_len(needle); ret if needle_len == 0u { true } else if needle_len > haystack_len { @@ -1598,7 +1598,9 @@ mod tests { } t("hello", "llo", 2); t("hello", "el", 1); - t("substr should not be a challenge", "not", 14); + + assert "ะเทศไท" + == substr("ประเทศไทย中华Việt Nam", 2u, 6u); } #[test] diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index ef0ff7e6b57..239b8768858 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -43,13 +43,13 @@ The dirname of "/usr/share" will be "/usr", but the dirname of If the path is not prefixed with a directory, then "." is returned. */ -fn dirname(p: path) -> path { +fn dirname(p: path) -> path unsafe { let i: int = str::rindex(p, os_fs::path_sep as u8); if i == -1 { i = str::rindex(p, os_fs::alt_path_sep as u8); if i == -1 { ret "."; } } - ret str::substr(p, 0u, i as uint); + ret str::unsafe::slice_bytes(p, 0u, i as uint); } /* diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 28cdbfa3d40..b586f114bce 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -1341,11 +1341,12 @@ mod tests { node::empty { ret "" } node::content(x) { let str = @mutable ""; - fn aux(str: @mutable str, node: @node::node) { + fn aux(str: @mutable str, node: @node::node) unsafe { alt(*node) { node::leaf(x) { - *str += str::substr( - *x.content, x.byte_offset, x.byte_len); + *str += str::unsafe::slice_bytes( + *x.content, x.byte_offset, + x.byte_offset + x.byte_len); } node::concat(x) { aux(str, x.left); diff --git a/src/libstd/sha1.rs b/src/libstd/sha1.rs index e23737a6c3f..a9911d01f03 100644 --- a/src/libstd/sha1.rs +++ b/src/libstd/sha1.rs @@ -291,7 +291,7 @@ fn mk_sha1() -> sha1 { mod tests { #[test] - fn test() { + fn test() unsafe { type test = {input: str, output: [u8]}; fn a_million_letter_a() -> str { @@ -372,7 +372,8 @@ mod tests { let left = len; while left > 0u { let take = (left + 1u) / 2u; - sh.input_str(str::substr(t.input, len - left, take)); + sh.input_str(str::unsafe::slice_bytes(t.input, len - left, + take + len - left)); left = left - take; } let out = sh.result();