diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index fd966b40884..c2cc106b3e2 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -109,13 +109,10 @@ mod write { // Decides what to call an intermediate file, given the name of the output // and the extension to use. fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe { - let stem = alt str::index(output_path, '.') { - option::some(dot_pos) { - str::slice(output_path, 0u, dot_pos) - } - option::none { output_path } - }; - + let stem = alt str::find_char(output_path, '.') { + some(dot_pos) { str::slice(output_path, 0u, dot_pos) } + none { output_path } + }; ret stem + "." + extension; } @@ -566,7 +563,7 @@ fn link_binary(sess: session, // Converts a library file name into a cc -l argument fn unlib(config: @session::config, filename: str) -> str unsafe { let rmlib = fn@(filename: str) -> str { - let found = str::find(filename, "lib"); + let found = str::find_str(filename, "lib"); if config.os == session::os_macos || (config.os == session::os_linux || config.os == session::os_freebsd) && diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 9520d9fd341..a7dbe574717 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -157,7 +157,7 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines { fn get_line(fm: filemap, line: int) -> str unsafe { let begin: uint = fm.lines[line].byte - fm.start_pos.byte; - let end = alt str::index_from(*fm.src, '\n', begin, str::len(*fm.src)) { + let end = alt str::find_char_from(*fm.src, '\n', begin) { some(e) { e } none { str::len(*fm.src) } }; @@ -165,14 +165,12 @@ fn get_line(fm: filemap, line: int) -> str unsafe { } fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) - -> {fm: filemap, pos: uint} -{ - fn lookup(pos: file_pos) -> uint { ret pos.ch; } - let {fm,line} = lookup_line(cm,chpos,lookup); + -> {fm: filemap, pos: uint} { + let {fm, line} = lookup_line(cm, chpos, {|pos| pos.ch}); let line_offset = fm.lines[line].byte - fm.start_pos.byte; let col = chpos - fm.lines[line].ch; - let col_offset = str::substr_len(*fm.src, line_offset, col); - ret {fm: fm, pos: line_offset + col_offset}; + let col_offset = str::count_bytes(*fm.src, line_offset, col); + {fm: fm, pos: line_offset + col_offset} } fn span_to_snippet(sp: span, cm: codemap::codemap) -> str { diff --git a/src/comp/syntax/ext/qquote.rs b/src/comp/syntax/ext/qquote.rs index e53307b474a..7f7d5a387f0 100644 --- a/src/comp/syntax/ext/qquote.rs +++ b/src/comp/syntax/ext/qquote.rs @@ -214,7 +214,7 @@ fn finish if (j < g_len && i == cx.gather[j].lo) { assert ch == '$'; let repl = #fmt("$%u ", j); - state = skip(str::len_chars(repl)); + state = skip(str::char_len(repl)); str2 += repl; } alt state { diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index 90b0c7d34e5..1fedd785750 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -25,7 +25,7 @@ fn load_errors(testfile: str) -> [expected_error] { fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { let error_tag = "//!"; let idx; - alt str::find(line, error_tag) { + alt str::find_str(line, error_tag) { option::none { ret []; } option::some(nn) { idx = (nn as uint) + str::len(error_tag); } } diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index 099598d7fdb..636cf0746a6 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -106,7 +106,7 @@ fn parse_name_directive(line: str, directive: str) -> bool { fn parse_name_value_directive(line: str, directive: str) -> option unsafe { let keycolon = directive + ":"; - alt str::find(line, keycolon) { + alt str::find_str(line, keycolon) { option::some(colon) { let value = str::slice(line, colon + str::len(keycolon), str::len(line)); diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index 5fb0bca25fa..74fcbf841f4 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -198,7 +198,7 @@ fn check_error_patterns(props: test_props, let next_err_idx = 0u; let next_err_pat = props.error_patterns[next_err_idx]; - for line: str in str::split_byte(procres.stderr, '\n' as u8) { + for line: str in str::split_char(procres.stderr, '\n') { if str::contains(line, next_err_pat) { #debug("found error pattern %s", next_err_pat); next_err_idx += 1u; @@ -245,7 +245,7 @@ fn check_expected_errors(expected_errors: [errors::expected_error], // filename:line1:col1: line2:col2: *warning:* msg // where line1:col1: is the starting point, line2:col2: // is the ending point, and * represents ANSI color codes. - for line: str in str::split_byte(procres.stderr, '\n' as u8) { + for line: str in str::split_char(procres.stderr, '\n') { let was_expected = false; vec::iteri(expected_errors) {|i, ee| if !found_flags[i] { @@ -350,7 +350,7 @@ fn split_maybe_args(argstr: option) -> [str] { } alt argstr { - option::some(s) { rm_whitespace(str::split_byte(s, ' ' as u8)) } + option::some(s) { rm_whitespace(str::split_char(s, ' ')) } option::none { [] } } } @@ -410,12 +410,10 @@ fn make_out_name(config: config, testfile: str, extension: str) -> str { fn output_base_name(config: config, testfile: str) -> str { let base = config.build_base; - let filename = - { - let parts = str::split_byte(fs::basename(testfile), '.' as u8); - parts = vec::slice(parts, 0u, vec::len(parts) - 1u); - str::connect(parts, ".") - }; + let filename = { + let parts = str::split_char(fs::basename(testfile), '.'); + str::connect(vec::slice(parts, 0u, vec::len(parts) - 1u), ".") + }; #fmt["%s%s.%s", base, filename, config.stage_id] } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index b5e2eade244..288ae779a2a 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -320,16 +320,14 @@ mod rt { fn conv_str(cv: conv, s: str) -> str unsafe { // For strings, precision is the maximum characters // displayed - - let unpadded = - alt cv.precision { - count_implied { s } - count_is(max) { - if max as uint < str::len_chars(s) { - str::substr(s, 0u, max as uint) - } else { s } - } - }; + let unpadded = alt cv.precision { + count_implied { s } + count_is(max) { + if max as uint < str::char_len(s) { + str::substr(s, 0u, max as uint) + } else { s } + } + }; ret pad(cv, unpadded, pad_nozero); } fn conv_float(cv: conv, f: float) -> str { @@ -368,7 +366,7 @@ mod rt { "" } else { let s = uint::to_str(num, radix); - let len = str::len_chars(s); + let len = str::char_len(s); if len < prec { let diff = prec - len; let pad = str_init_elt(diff, '0'); @@ -400,7 +398,7 @@ mod rt { uwidth = width as uint; } } - let strlen = str::len_chars(s); + let strlen = str::char_len(s); if uwidth <= strlen { ret s; } let padchar = ' '; let diff = uwidth - strlen; diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 2b8a8956c83..915329292a6 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -44,7 +44,6 @@ export to_lower, to_upper, replace, - escape, // Comparing strings eq, @@ -52,8 +51,8 @@ export hash, // Iterating through strings - all, - any, + all, any, + all_between, any_between, map, bytes_iter, chars_iter, @@ -63,14 +62,11 @@ export lines_iter, // Searching - //index_chars, - index, - index_from, - rindex, - //rindex_chars, - find, - find_from, - find_chars, + find, find_from, find_between, + rfind, rfind_from, rfind_between, + find_char, find_char_from, find_char_between, + rfind_char, rfind_char_from, rfind_char_between, + find_str, find_str_from, find_str_between, contains, starts_with, ends_with, @@ -81,29 +77,23 @@ export is_not_empty, is_whitespace, len, - len_chars, + char_len, // Misc - // FIXME: perhaps some more of this section shouldn't be exported? is_utf8, - substr_len, - substr_len_chars, + count_chars, count_bytes, utf8_char_width, char_range_at, is_char_boundary, char_at, - substr_all, - escape_char, as_bytes, as_buf, - //buf, sbuf, reserve, unsafe; - #[abi = "cdecl"] native mod rustrt { fn rust_str_push(&s: str, ch: u8); @@ -303,18 +293,14 @@ Function: trim_left Returns a string with leading whitespace removed. */ -fn trim_left(s: str) -> str { - fn count_whities(s: [char]) -> uint { - let i = 0u; - while i < vec::len(s) { - if !char::is_whitespace(s[i]) { break; } - i += 1u; - } - ret i; +fn trim_left(+s: str) -> str { + alt find(s, {|c| !char::is_whitespace(c)}) { + none { "" } + some(first) { + if first == 0u { s } + else unsafe { unsafe::slice_bytes(s, first, len(s)) } + } } - let chars = chars(s); - let whities = count_whities(chars); - ret from_chars(vec::slice(chars, whities, vec::len(chars))); } /* @@ -322,18 +308,15 @@ Function: trim_right Returns a string with trailing whitespace removed. */ -fn trim_right(s: str) -> str { - fn count_whities(s: [char]) -> uint { - let i = vec::len(s); - while 0u < i { - if !char::is_whitespace(s[i - 1u]) { break; } - i -= 1u; - } - ret i; +fn trim_right(+s: str) -> str { + alt rfind(s, {|c| !char::is_whitespace(c)}) { + none { "" } + some(last) { + let {next, _} = char_range_at(s, last); + if next == len(s) { s } + else unsafe { unsafe::slice_bytes(s, 0u, next) } + } } - let chars = chars(s); - let whities = count_whities(chars); - ret from_chars(vec::slice(chars, 0u, whities)); } /* @@ -341,8 +324,7 @@ Function: trim Returns a string with leading and trailing whitespace removed */ -fn trim(s: str) -> str { trim_left(trim_right(s)) } - +fn trim(+s: str) -> str { trim_left(trim_right(s)) } /* Section: Transforming strings @@ -376,15 +358,11 @@ fn chars(s: str) -> [char] { /* Function: substr -Take a substring of another. Returns a string containing `len` bytes -starting at char offset `begin`. - -Failure: - -If `begin` + `len` is is greater than the char length of the string +Take a substring of another. Returns a string containing `n` +characters starting at byte offset `begin`. */ -fn substr(s: str, begin: uint, len: uint) -> str { - ret slice(s, begin, begin + len); +fn substr(s: str, begin: uint, n: uint) -> str { + slice(s, begin, begin + count_bytes(s, begin, n)) } // Function: slice @@ -633,17 +611,6 @@ fn replace(s: str, from: str, to: str) -> str unsafe { result } -/* -Function: escape - -Escapes special characters inside the string, making it safe for transfer. -*/ -fn escape(s: str) -> str { - let r = ""; - chars_iter(s) { |c| r += escape_char(c) }; - r -} - /* Section: Comparing strings */ @@ -670,7 +637,6 @@ String hash function fn hash(&&s: str) -> uint { // djb hash. // FIXME: replace with murmur. - let u: uint = 5381u; for c: u8 in s { u *= 33u; u += c as uint; } ret u; @@ -686,8 +652,8 @@ Function: all Return true if a predicate matches all characters or if the string contains no characters */ -fn all(s: str, it: fn(char) -> bool) -> bool{ - ret substr_all(s, 0u, len(s), it); +fn all(s: str, it: fn(char) -> bool) -> bool { + all_between(s, 0u, len(s), it) } /* @@ -697,7 +663,7 @@ Return true if a predicate matches any character (and false if it matches none or there are no characters) */ fn any(ss: str, pred: fn(char) -> bool) -> bool { - !all(ss, {|cc| !pred(cc)}) + !all(ss, {|cc| !pred(cc)}) } /* @@ -708,12 +674,8 @@ Apply a function to each character fn map(ss: str, ff: fn(char) -> char) -> str { let result = ""; reserve(result, len(ss)); - - chars_iter(ss, {|cc| - str::push_char(result, ff(cc)); - }); - - ret result; + chars_iter(ss) {|cc| str::push_char(result, ff(cc));} + result } /* @@ -787,172 +749,192 @@ fn lines_iter(ss: str, ff: fn(&&str)) { Section: Searching */ -// Function: index +// Function: find_char // // Returns the byte index of the first matching char // (as option some/none) -fn index(ss: str, cc: char) -> option { - index_from(ss, cc, 0u, len(ss)) +fn find_char(s: str, c: char) -> option { + find_char_between(s, c, 0u, len(s)) } -// Function: index_from +// Function: find_char_from // // Returns the byte index of the first matching char -// (as option some/none), starting at `nn` -fn index_from(ss: str, cc: char, start: uint, end: uint) -> option { - let bii = start; - while bii < end { - let {ch, next} = char_range_at(ss, bii); - - // found here? - if ch == cc { - ret some(bii); - } - - bii = next; - } - - // wasn't found - ret none; +// (as option some/none), starting from `start` +fn find_char_from(s: str, c: char, from: uint) -> option { + find_char_between(s, c, from, len(s)) } -// Function: index_chars -// -// Returns the char index of the first matching char -// (as option some/none) -// FIXME: delete? -fn index_chars(ss: str, cc: char) -> option { - let bii = 0u; - let cii = 0u; - let len = len(ss); - while bii < len { - let {ch, next} = char_range_at(ss, bii); - - // found here? - if ch == cc { - ret some(cii); - } - - cii += 1u; - bii = next; - } - - // wasn't found - ret none; -} - -// Function: rindex +// Function: find_char_between // // Returns the byte index of the first matching char -// (as option some/none) -fn rindex(ss: str, cc: char) -> option { - let bii = len(ss); - while bii > 0u { - let {ch, prev} = char_range_at_reverse(ss, bii); - bii = prev; - - // found here? - if ch == cc { - ret some(bii); +// (as option some/none), between `start` and `end` +fn find_char_between(s: str, c: char, start: uint, end: uint) + -> option { + if c < 128u as char { + assert start <= end; + assert end <= len(s); + let i = start, b = c as u8; + while i < end { + if s[i] == b { ret some(i); } + i += 1u; } + ret none; + } else { + find_between(s, start, end, {|x| x == c}) } - - // wasn't found - ret none; } -// Function: rindex_chars +// Function: rfind_char // -// Returns the char index of the first matching char +// Returns the byte index of the last matching char // (as option some/none) -// FIXME: delete? -fn rindex_chars(ss: str, cc: char) -> option { - let bii = len(ss); - let cii = len_chars(ss); - while bii > 0u { - let {ch, prev} = char_range_at_reverse(ss, bii); - cii -= 1u; - bii = prev; +fn rfind_char(s: str, c: char) -> option { + rfind_char_between(s, c, len(s), 0u) +} - // found here? - if ch == cc { - ret some(cii); +// Function: rfind_char_from +// +// Returns the byte index of the last matching char +// (as option some/none), starting from `start` +fn rfind_char_from(s: str, c: char, start: uint) -> option { + rfind_char_between(s, c, start, 0u) +} + +// Function: rfind_char_between +// +// Returns the byte index of the last matching char (as option +// some/none), between from `start` and `end` (start must be greater +// than or equal to end) +fn rfind_char_between(s: str, c: char, start: uint, end: uint) + -> option { + if c < 128u as char { + assert start >= end; + assert start <= len(s); + let i = start, b = c as u8; + while i > end { + i -= 1u; + if s[i] == b { ret some(i); } } + ret none; + } else { + rfind_between(s, start, end, {|x| x == c}) } +} - // wasn't found +// Function: find +// +// Returns, as an option, the first character that passes the given +// predicate +fn find(s: str, f: fn(char) -> bool) -> option { + find_between(s, 0u, len(s), f) +} + +// Function: find_from +// +// Returns, as an option, the first character that passes the given +// predicate, starting at byte offset `start` +fn find_from(s: str, start: uint, f: fn(char) -> bool) -> option { + find_between(s, start, len(s), f) +} + +// Function: find_between +// +// Returns, as an option, the first character that passes the given +// predicate, between byte offsets `start` and `end` +fn find_between(s: str, start: uint, end: uint, f: fn(char) -> bool) + -> option { + assert start <= end; + assert end <= len(s); + assert is_char_boundary(s, start); + let i = start; + while i < end { + let {ch, next} = char_range_at(s, i); + if f(ch) { ret some(i); } + i = next; + } ret none; } -//Function: find +// Function: rfind +// +// Returns, as an option, the last character in the string that passes +// the given predicate +fn rfind(s: str, f: fn(char) -> bool) -> option { + rfind_between(s, len(s), 0u, f) +} + +// Function: rfind_from +// +// Returns, as an option, the last character that passes the given +// predicate, up until byte offset `start` +fn rfind_from(s: str, start: uint, f: fn(char) -> bool) -> option { + rfind_between(s, start, 0u, f) +} + +// Function: rfind_between +// +// Returns, as an option, the last character that passes the given +// predicate, between byte offsets `start` and `end` (`start` must be +// greater than or equal to `end`) +fn rfind_between(s: str, start: uint, end: uint, f: fn(char) -> bool) + -> option { + assert start >= end; + assert start <= len(s); + assert is_char_boundary(s, start); + let i = start; + while i > end { + let {ch, prev} = char_range_at_reverse(s, i); + if f(ch) { ret some(prev); } + i = prev; + } + ret none; +} + +// Utility used by various searching functions +fn match_at(haystack: str, needle: str, at: uint) -> bool { + let i = at; + for c in needle { if haystack[i] != c { ret false; } i += 1u; } + ret true; +} + +//Function: find_str // // Find the byte position of the first instance of one string // within another, or return option::none -fn find(haystack: str, needle: str) -> option { - find_from(haystack, needle, 0u, len(haystack)) +fn find_str(haystack: str, needle: str) -> option { + find_str_between(haystack, needle, 0u, len(haystack)) } -//Function: find_from +//Function: find_str_from // // Find the byte position of the first instance of one string // within another, or return option::none -// -// FIXME: Boyer-Moore should be significantly faster -fn find_from(haystack: str, needle: str, start: uint, end:uint) +fn find_str_from(haystack: str, needle: str, start: uint) -> option { + find_str_between(haystack, needle, start, len(haystack)) +} + +//Function: find_str_between +// +// Find the byte position of the first instance of one string +// within another, or return option::none +fn find_str_between(haystack: str, needle: str, start: uint, end:uint) + -> option { + // FIXME: Boyer-Moore should be significantly faster assert end <= len(haystack); - let needle_len = len(needle); - if needle_len == 0u { ret some(start); } if needle_len > end { ret none; } - fn match_at(haystack: str, needle: str, ii: uint) -> bool { - let jj = ii; - for c: u8 in needle { if haystack[jj] != c { ret false; } jj += 1u; } - ret true; + let i = start, e = end - needle_len; + while i <= e { + if match_at(haystack, needle, i) { ret some(i); } + i += 1u; } - - let ii = start; - while ii <= end - needle_len { - if match_at(haystack, needle, ii) { ret some(ii); } - ii += 1u; - } - ret none; } -// Function: find_chars -// -// Find the char position of the first instance of one string -// within another, or return option::none -// FIXME: delete? -fn find_chars(haystack: str, needle: str) -> option { - alt find(haystack, needle) { - none { ret none; } - some(nn) { ret some(b2c_pos(haystack, nn)); } - } -} - -// Function: b2c_pos -// -// Convert a byte position into a char position -// within a given string -fn b2c_pos(ss: str, bpos: uint) -> uint { - assert bpos == 0u || bpos < len(ss); - - let ii = 0u; - let cpos = 0u; - - while ii < bpos { - let sz = utf8_char_width(ss[ii]); - ii += sz; - cpos += 1u; - } - - ret cpos; -} - /* Function: contains @@ -964,7 +946,7 @@ haystack - The string to look in needle - The string to look for */ fn contains(haystack: str, needle: str) -> bool { - option::is_some(find(haystack, needle)) + option::is_some(find_str(haystack, needle)) } /* @@ -978,11 +960,10 @@ haystack - The string to look in needle - The string to look for */ fn starts_with(haystack: str, needle: str) -> bool unsafe { - let haystack_len: uint = len(haystack); - let needle_len: uint = len(needle); - if needle_len == 0u { ret true; } - if needle_len > haystack_len { ret false; } - ret eq(unsafe::slice_bytes(haystack, 0u, needle_len), needle); + let haystack_len = len(haystack), needle_len = len(needle); + if needle_len == 0u { true } + else if needle_len > haystack_len { false } + else { match_at(haystack, needle, 0u) } } /* @@ -994,16 +975,10 @@ haystack - The string to look in needle - The string to look for */ fn ends_with(haystack: str, needle: str) -> bool { - let haystack_len: uint = len(haystack); - let needle_len: uint = len(needle); - ret if needle_len == 0u { - true - } else if needle_len > haystack_len { - false - } else { - eq(substr(haystack, haystack_len - needle_len, needle_len), - needle) - }; + let haystack_len = len(haystack), needle_len = len(needle); + if needle_len == 0u { true } + else if needle_len > haystack_len { false } + else { match_at(haystack, needle, haystack_len - needle_len) } } /* @@ -1054,10 +1029,10 @@ pure fn len(s: str) -> uint unsafe { (*repr).fill - 1u } -// FIXME: delete? -fn len_chars(s: str) -> uint { - substr_len_chars(s, 0u, len(s)) -} +// Function: char_len +// +// Returns the number of characters that a string holds +fn char_len(s: str) -> uint { count_chars(s, 0u, len(s)) } /* Section: Misc @@ -1086,68 +1061,44 @@ fn is_utf8(v: [u8]) -> bool { } /* -Function: substr_len_chars +Function: count_chars As char_len but for a slice of a string Parameters: s - A valid string - byte_start - The position inside `s` where to start counting in bytes. - byte_len - The number of bytes of `s` to take into account. + start - The position inside `s` where to start counting in bytes. + end - The position where to stop counting Returns: - The number of Unicode characters in `s` in -segment [byte_start, byte_start+len( . - -Safety note: -- This function does not check whether the substring is valid. -- This function fails if `byte_offset` or `byte_len` do not - represent valid positions inside `s` - -FIXME: delete? + The number of Unicode characters in `s` between the given indices. */ -fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint { - let i = byte_start; - let byte_stop = i + byte_len; - let len = 0u; - while i < byte_stop { - let chsize = utf8_char_width(s[i]); - assert (chsize > 0u); +fn count_chars(s: str, start: uint, end: uint) -> uint { + assert is_char_boundary(s, start); + assert is_char_boundary(s, end); + let i = start, len = 0u; + while i < end { + let {next, _} = char_range_at(s, i); len += 1u; - i += chsize; + i = next; } ret len; } -/* -Function: substr_len - -As byte_len but for a substring - -Parameters: -s - A string -byte_offset - The byte offset at which to start in the string -char_len - The number of chars (not bytes!) in the range - -Returns: -The number of bytes in the substring starting at `byte_offset` and -containing `char_len` chars. - -Safety note: - -This function fails if `byte_offset` or `char_len` do not represent -valid positions in `s` -*/ -fn substr_len(s: str, byte_offset: uint, char_len: uint) -> uint { - let i = byte_offset; - let chars = 0u; - while chars < char_len { - let chsize = utf8_char_width(s[i]); - assert (chsize > 0u); - i += chsize; - chars += 1u; +// Function count_bytes +// +// Counts the number of bytes taken by the `n` in `s` starting from +// `start`. +fn count_bytes(s: str, start: uint, n: uint) -> uint { + assert is_char_boundary(s, start); + let end = start, cnt = n, l = len(s); + while cnt > 0u { + assert end < l; + let {next, _} = char_range_at(s, end); + cnt -= 1u; + end = next; } - ret i - byte_offset; + end - start } /* @@ -1159,10 +1110,8 @@ Given a first byte, determine how many bytes are in this UTF-8 character pure fn utf8_char_width(b: u8) -> uint { let byte: uint = b as uint; if byte < 128u { ret 1u; } - if byte < 192u { - ret 0u; // Not a valid start byte - - } + // Not a valid start byte + if byte < 192u { ret 0u; } if byte < 224u { ret 2u; } if byte < 240u { ret 3u; } if byte < 248u { ret 4u; } @@ -1170,6 +1119,18 @@ pure fn utf8_char_width(b: u8) -> uint { ret 6u; } +/* +Function is_char_boundary + +Returns false if the index points into the middle of a multi-byte +character sequence. +*/ +pure fn is_char_boundary(s: str, index: uint) -> bool { + if index == len(s) { ret true; } + let b = s[index]; + ret b < 128u8 || b >= 192u8; +} + /* Function: char_range_at @@ -1235,18 +1196,6 @@ fn char_range_at(s: str, i: uint) -> {ch: char, next: uint} { ret {ch: val as char, next: i}; } -/* -Function is_char_boundary - -Returns false if the index points into the middle of a multi-byte -character sequence. -*/ -pure fn is_char_boundary(s: str, index: uint) -> bool { - if index == len(s) { ret true; } - let b = s[index]; - ret b < 128u8 || b >= 192u8; -} - /* Function: char_at @@ -1274,14 +1223,14 @@ fn char_range_at_reverse(ss: str, start: uint) -> {ch: char, prev: uint} { } /* -Function: substr_all +Function: all_between Loop through a substring, char by char Parameters: s - A string to traverse. It may be empty. -byte_offset - The byte offset at which to start in the string. -byte_len - The number of bytes to traverse in the string +start - The byte offset at which to start in the string. +end - The end of the range to traverse it - A block to execute with each consecutive character of `s`. Return `true` to continue, `false` to stop. @@ -1295,34 +1244,19 @@ Safety note: - This function fails if `byte_offset` or `byte_len` do not represent valid positions inside `s` */ -fn substr_all(s: str, byte_offset: uint, byte_len: uint, - it: fn(char) -> bool) -> bool { - let i = byte_offset; - let result = true; - while i < byte_len { - let {ch, next} = char_range_at(s, i); - if !it(ch) {result = false; break;} - i = next; - } - ret result; +fn all_between(s: str, start: uint, end: uint, it: fn(char) -> bool) -> bool { + assert is_char_boundary(s, start); + let i = start; + while i < end { + let {ch, next} = char_range_at(s, i); + if !it(ch) { ret false; } + i = next; + } + ret true; } - -/* -Function: escape_char - -Escapes a single character. -*/ -fn escape_char(c: char) -> str { - alt c { - '"' { "\\\"" } - '\\' { "\\\\" } - '\n' { "\\n" } - '\t' { "\\t" } - '\r' { "\\r" } - '\x00' to '\x1f' { #fmt["\\x%02x", c as uint] } - v { from_char(c) } - } +fn any_between(s: str, start: uint, end: uint, it: fn(char) -> bool) -> bool { + !all_between(s, start, end, {|c| !it(c)}) } // UTF-8 tags and ranges @@ -1395,7 +1329,6 @@ mod unsafe { from_bytes, from_byte, slice_bytes, - slice_bytes_safe_range, push_byte, push_bytes, pop_byte, @@ -1441,18 +1374,6 @@ mod unsafe { ret s; } - /* - Function: slice_bytes_safe_range - - Like slice_bytes, with a precondition - */ - unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint) - : uint::le(begin, end) -> str { - // would need some magic to make this a precondition - assert (end <= len(s)); - ret slice_bytes(s, begin, end); - } - // Function: push_byte // // Appends a byte to a string. (Not UTF-8 safe). @@ -1488,7 +1409,7 @@ mod unsafe { s = unsafe::slice_bytes(s, 1u, len); ret b; } - + unsafe fn set_len(&v: str, new_len: uint) { let repr: *vec::unsafe::vec_repr = ::unsafe::reinterpret_cast(v); (*repr).fill = new_len + 1u; @@ -1526,39 +1447,23 @@ mod tests { assert (len("\u2620") == 3u); assert (len("\U0001d11e") == 4u); - assert (len_chars("") == 0u); - assert (len_chars("hello world") == 11u); - assert (len_chars("\x63") == 1u); - assert (len_chars("\xa2") == 1u); - assert (len_chars("\u03c0") == 1u); - assert (len_chars("\u2620") == 1u); - assert (len_chars("\U0001d11e") == 1u); - assert (len_chars("ประเทศไทย中华Việt Nam") == 19u); + assert (char_len("") == 0u); + assert (char_len("hello world") == 11u); + assert (char_len("\x63") == 1u); + assert (char_len("\xa2") == 1u); + assert (char_len("\u03c0") == 1u); + assert (char_len("\u2620") == 1u); + assert (char_len("\U0001d11e") == 1u); + assert (char_len("ประเทศไทย中华Việt Nam") == 19u); } #[test] - fn test_index_chars() { - assert ( index_chars("hello", 'h') == some(0u)); - assert ( index_chars("hello", 'e') == some(1u)); - assert ( index_chars("hello", 'o') == some(4u)); - assert ( index_chars("hello", 'z') == none); - } - - #[test] - fn test_rindex() { - assert rindex("hello", 'l') == some(3u); - assert rindex("hello", 'o') == some(4u); - assert rindex("hello", 'h') == some(0u); - assert rindex("hello", 'z') == none; - assert rindex("ประเทศไทย中华Việt Nam", '华') == some(30u); - } - - #[test] - fn test_rindex_chars() { - assert (rindex_chars("hello", 'l') == some(3u)); - assert (rindex_chars("hello", 'o') == some(4u)); - assert (rindex_chars("hello", 'h') == some(0u)); - assert (rindex_chars("hello", 'z') == none); + fn test_rfind_char() { + assert rfind_char("hello", 'l') == some(3u); + assert rfind_char("hello", 'o') == some(4u); + assert rfind_char("hello", 'h') == some(0u); + assert rfind_char("hello", 'z') == none; + assert rfind_char("ประเทศไทย中华Việt Nam", '华') == some(30u); } #[test] @@ -1752,67 +1657,45 @@ mod tests { } #[test] - fn test_find() { + fn test_find_str() { // byte positions - assert (find("banana", "apple pie") == none); - assert (find("", "") == some(0u)); + assert find_str("banana", "apple pie") == none; + assert find_str("", "") == some(0u); let data = "ประเทศไทย中华Việt Nam"; - assert (find(data, "") == some(0u)); - assert (find(data, "ประเ") == some( 0u)); - assert (find(data, "ะเ") == some( 6u)); - assert (find(data, "中华") == some(27u)); - assert (find(data, "ไท华") == none); + assert find_str(data, "") == some(0u); + assert find_str(data, "ประเ") == some( 0u); + assert find_str(data, "ะเ") == some( 6u); + assert find_str(data, "中华") == some(27u); + assert find_str(data, "ไท华") == none; } #[test] - fn test_find_from() { + fn test_find_str_between() { // byte positions - assert (find_from("", "", 0u, 0u) == some(0u)); + assert find_str_between("", "", 0u, 0u) == some(0u); let data = "abcabc"; - assert find_from(data, "ab", 0u, 6u) == some(0u); - assert find_from(data, "ab", 2u, 6u) == some(3u); - assert find_from(data, "ab", 2u, 4u) == none; + assert find_str_between(data, "ab", 0u, 6u) == some(0u); + assert find_str_between(data, "ab", 2u, 6u) == some(3u); + assert find_str_between(data, "ab", 2u, 4u) == none; let data = "ประเทศไทย中华Việt Nam"; data += data; - assert find_from(data, "", 0u, 43u) == some(0u); - assert find_from(data, "", 6u, 43u) == some(6u); + assert find_str_between(data, "", 0u, 43u) == some(0u); + assert find_str_between(data, "", 6u, 43u) == some(6u); - assert find_from(data, "ประ", 0u, 43u) == some( 0u); - assert find_from(data, "ทศไ", 0u, 43u) == some(12u); - assert find_from(data, "ย中", 0u, 43u) == some(24u); - assert find_from(data, "iệt", 0u, 43u) == some(34u); - assert find_from(data, "Nam", 0u, 43u) == some(40u); + assert find_str_between(data, "ประ", 0u, 43u) == some( 0u); + assert find_str_between(data, "ทศไ", 0u, 43u) == some(12u); + assert find_str_between(data, "ย中", 0u, 43u) == some(24u); + assert find_str_between(data, "iệt", 0u, 43u) == some(34u); + assert find_str_between(data, "Nam", 0u, 43u) == some(40u); - assert find_from(data, "ประ", 43u, 86u) == some(43u); - assert find_from(data, "ทศไ", 43u, 86u) == some(55u); - assert find_from(data, "ย中", 43u, 86u) == some(67u); - assert find_from(data, "iệt", 43u, 86u) == some(77u); - assert find_from(data, "Nam", 43u, 86u) == some(83u); - } - - #[test] - fn test_find_chars() { - // char positions - assert (find_chars("banana", "apple pie") == none); - assert (find_chars("", "") == some(0u)); - - let data = "ประเทศไทย中华Việt Nam"; - assert (find_chars(data, "") == some(0u)); - assert (find_chars(data, "ประเ") == some(0u)); - assert (find_chars(data, "ะเ") == some(2u)); - assert (find_chars(data, "中华") == some(9u)); - assert (find_chars(data, "ไท华") == none); - } - - #[test] - fn test_b2c_pos() { - let data = "ประเทศไทย中华Việt Nam"; - assert 0u == b2c_pos(data, 0u); - assert 2u == b2c_pos(data, 6u); - assert 9u == b2c_pos(data, 27u); + assert find_str_between(data, "ประ", 43u, 86u) == some(43u); + assert find_str_between(data, "ทศไ", 43u, 86u) == some(55u); + assert find_str_between(data, "ย中", 43u, 86u) == some(67u); + assert find_str_between(data, "iệt", 43u, 86u) == some(77u); + assert find_str_between(data, "Nam", 43u, 86u) == some(83u); } #[test] @@ -1822,9 +1705,7 @@ mod tests { } t("hello", "llo", 2); t("hello", "el", 1); - - assert "ะเทศไท" - == substr("ประเทศไทย中华Việt Nam", 6u, 18u); + assert "ะเทศไท" == substr("ประเทศไทย中华Việt Nam", 6u, 6u); } #[test] @@ -2312,19 +2193,6 @@ mod tests { } } - #[test] - fn test_escape() { - assert(escape("abcdef") == "abcdef"); - assert(escape("abc\\def") == "abc\\\\def"); - assert(escape("abc\ndef") == "abc\\ndef"); - assert(escape("abc\"def") == "abc\\\"def"); - } - - #[test] - fn test_escape_char() { - assert escape_char('\x1f') == "\\x1f"; - } - #[test] fn test_map() { assert "" == map("", char::to_upper); diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index 8e835bad7f6..dab88080e55 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -33,19 +33,15 @@ A path or fragment of a filesystem path type path = str; fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { - let ii; - alt str::rindex(pp, os_fs::path_sep) { - option::some(xx) { ii = xx; } - option::none { - alt str::rindex(pp, os_fs::alt_path_sep) { - option::some(xx) { ii = xx; } - option::none { ret {dirname: ".", basename: pp}; } - } - } + alt str::rfind(pp, {|ch| + ch == os_fs::path_sep || ch == os_fs::alt_path_sep + }) { + some(i) { + {dirname: str::slice(pp, 0u, i), + basename: str::slice(pp, i + 1u, str::len(pp))} + } + none { {dirname: ".", basename: pp} } } - - ret {dirname: str::slice(pp, 0u, ii), - basename: str::slice(pp, ii + 1u, str::len(pp))}; } /* diff --git a/src/libstd/json.rs b/src/libstd/json.rs index 7a888f250ed..8de15a60c87 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -3,10 +3,6 @@ import float; import map; -import core::option; -import option::{some, none}; -import str; -import vec; export json; export to_str; diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index 10144cc9731..6b9f7a17f74 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -743,7 +743,7 @@ mod node { */ fn of_substr(str: @str, byte_start: uint, byte_len: uint) -> @node { ret of_substr_unsafer(str, byte_start, byte_len, - str::substr_len_chars(*str, byte_start, byte_len)); + str::count_chars(*str, byte_start, byte_len)); } /* @@ -795,7 +795,7 @@ mod node { if i == 0u { first_leaf_char_len } else { hint_max_leaf_char_len }; let chunk_byte_len = - str::substr_len(*str, offset, chunk_char_len); + str::count_bytes(*str, offset, chunk_char_len); nodes[i] = @leaf({ byte_offset: offset, byte_len: chunk_byte_len, @@ -998,7 +998,7 @@ mod node { alt(*node) { node::leaf(x) { let char_len = - str::substr_len_chars(*x.content, byte_offset, byte_len); + str::count_chars(*x.content, byte_offset, byte_len); ret @leaf({byte_offset: byte_offset, byte_len: byte_len, char_len: char_len, @@ -1059,9 +1059,9 @@ mod node { ret node; } let byte_offset = - str::substr_len(*x.content, 0u, char_offset); + str::count_bytes(*x.content, 0u, char_offset); let byte_len = - str::substr_len(*x.content, byte_offset, char_len); + str::count_bytes(*x.content, byte_offset, char_len); ret @leaf({byte_offset: byte_offset, byte_len: byte_len, char_len: char_len, @@ -1138,9 +1138,9 @@ mod node { fn loop_chars(node: @node, it: fn(char) -> bool) -> bool { ret loop_leaves(node, {|leaf| - ret str::substr_all(*leaf.content, - leaf.byte_offset, - leaf.byte_len, it) + ret str::all_between(*leaf.content, + leaf.byte_offset, + leaf.byte_len, it) }) } @@ -1373,7 +1373,7 @@ mod tests { let sample = @"0123456789ABCDE"; let r = of_str(sample); - assert char_len(r) == str::len_chars(*sample); + assert char_len(r) == str::char_len(*sample); assert rope_to_string(r) == *sample; } @@ -1384,7 +1384,7 @@ mod tests { while i < 10 { *buf = *buf + *buf; i+=1;} let sample = @*buf; let r = of_str(sample); - assert char_len(r) == str::len_chars(*sample); + assert char_len(r) == str::char_len(*sample); assert rope_to_string(r) == *sample; let string_iter = 0u; @@ -1427,7 +1427,7 @@ mod tests { } } - assert len == str::len_chars(*sample); + assert len == str::char_len(*sample); } #[test] diff --git a/src/rustdoc/desc_pass.rs b/src/rustdoc/desc_pass.rs index 9dbda071ba4..f5c8d95c65a 100644 --- a/src/rustdoc/desc_pass.rs +++ b/src/rustdoc/desc_pass.rs @@ -282,7 +282,7 @@ mod test { astsrv::from_str(source) {|srv| let doc = extract::from_srv(srv, ""); let doc = attr_pass::mk_pass()(srv, doc); - mk_pass(str::trim)(srv, doc) + mk_pass({|s| str::trim(s)})(srv, doc) } } } \ No newline at end of file diff --git a/src/rustdoc/markdown_pass.rs b/src/rustdoc/markdown_pass.rs index d473f25d090..c81bba6fda5 100644 --- a/src/rustdoc/markdown_pass.rs +++ b/src/rustdoc/markdown_pass.rs @@ -56,10 +56,10 @@ fn should_write_modules_last() { fn d() { }" ); - let idx_a = option::get(str::find(markdown, "# Module `a`")); - let idx_b = option::get(str::find(markdown, "## Function `b`")); - let idx_c = option::get(str::find(markdown, "# Module `c`")); - let idx_d = option::get(str::find(markdown, "## Function `d`")); + let idx_a = option::get(str::find_str(markdown, "# Module `a`")); + let idx_b = option::get(str::find_str(markdown, "## Function `b`")); + let idx_c = option::get(str::find_str(markdown, "# Module `c`")); + let idx_d = option::get(str::find_str(markdown, "## Function `d`")); assert idx_b < idx_d; assert idx_d < idx_a; diff --git a/src/rustdoc/trim_pass.rs b/src/rustdoc/trim_pass.rs index 221ac86a4ed..aabb60c396e 100644 --- a/src/rustdoc/trim_pass.rs +++ b/src/rustdoc/trim_pass.rs @@ -10,7 +10,7 @@ is interpreted as the brief description. export mk_pass; fn mk_pass() -> pass { - desc_pass::mk_pass(str::trim) + desc_pass::mk_pass({|s| str::trim(s)}) } #[test] diff --git a/src/test/bench/sudoku.rs b/src/test/bench/sudoku.rs index 9b02ee0a49f..7cf23192dee 100644 --- a/src/test/bench/sudoku.rs +++ b/src/test/bench/sudoku.rs @@ -33,7 +33,7 @@ fn read_grid(f: io::reader) -> grid_t { let g = vec::init_fn(10u, {|_i| vec::init_elt_mut(10u, 0 as u8) }); while !f.eof() { - let comps = str::split_byte(str::trim(f.read_line()), ',' as u8); + let comps = str::split_char(str::trim(f.read_line()), ','); if vec::len(comps) >= 3u { let row = option::get(uint::from_str(comps[0])) as u8; let col = option::get(uint::from_str(comps[1])) as u8; diff --git a/src/test/compile-fail/fn-constraint.rs b/src/test/compile-fail/fn-constraint.rs index b8a4a164c7e..2843ef93ea6 100644 --- a/src/test/compile-fail/fn-constraint.rs +++ b/src/test/compile-fail/fn-constraint.rs @@ -3,7 +3,8 @@ use std; import str::*; fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 4u; let b: uint = 1u; - log(error, str::unsafe::slice_bytes_safe_range("kitties", a, b)); + log(error, foo(a, b)); } diff --git a/src/test/compile-fail/no-constraint-prop.rs b/src/test/compile-fail/no-constraint-prop.rs index 75e4d67028f..27bccac911e 100644 --- a/src/test/compile-fail/no-constraint-prop.rs +++ b/src/test/compile-fail/no-constraint-prop.rs @@ -1,6 +1,7 @@ // error-pattern:Unsatisfied precondition constraint (for example, uint::le fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 1u; let b: uint = 4u; let c: uint = 5u; @@ -13,5 +14,5 @@ fn main() unsafe { // the next statement, since it's not true in the // prestate. let d <- a; - log(debug, str::unsafe::slice_bytes_safe_range("kitties", b, d)); + log(debug, foo(b, d)); } diff --git a/src/test/run-fail/fn-constraint.rs b/src/test/run-fail/fn-constraint.rs index 2c7934fdffc..f24d96e13cc 100644 --- a/src/test/run-fail/fn-constraint.rs +++ b/src/test/run-fail/fn-constraint.rs @@ -1,8 +1,9 @@ // error-pattern:Predicate uint::le(a, b) failed fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 4u; let b: uint = 1u; check (uint::le(a, b)); - log(error, str::unsafe::slice_bytes_safe_range("kitties", a, b)); + log(error, foo(a, b)); } diff --git a/src/test/run-pass/constraint-prop-expr-move.rs b/src/test/run-pass/constraint-prop-expr-move.rs index e4a2badb9ec..c737628b0c4 100644 --- a/src/test/run-pass/constraint-prop-expr-move.rs +++ b/src/test/run-pass/constraint-prop-expr-move.rs @@ -1,9 +1,10 @@ fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 1u; let b: uint = 4u; let c: uint = 17u; check (uint::le(a, b)); c <- a; - log(debug, str::unsafe::slice_bytes_safe_range("kitties", c, b)); + log(debug, foo(c, b)); } diff --git a/src/test/run-pass/constraint-prop-move.rs b/src/test/run-pass/constraint-prop-move.rs index a12ba6b04d5..a07b60799e8 100644 --- a/src/test/run-pass/constraint-prop-move.rs +++ b/src/test/run-pass/constraint-prop-move.rs @@ -1,7 +1,8 @@ fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 1u; let b: uint = 4u; check (uint::le(a, b)); let c <- a; - log(debug, str::unsafe::slice_bytes_safe_range("kitties", c, b)); + log(debug, foo(c, b)); } diff --git a/src/test/run-pass/constraint-prop-swap.rs b/src/test/run-pass/constraint-prop-swap.rs index a2070295487..59202911a9a 100644 --- a/src/test/run-pass/constraint-prop-swap.rs +++ b/src/test/run-pass/constraint-prop-swap.rs @@ -1,7 +1,8 @@ fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 4u; let b: uint = 1u; check (uint::le(b, a)); b <-> a; - log(debug, str::unsafe::slice_bytes_safe_range("kitties", a, b)); + log(debug, foo(a, b)); } diff --git a/src/test/run-pass/constraint-prop.rs b/src/test/run-pass/constraint-prop.rs index 2bbff1342d2..6d5808be3d1 100644 --- a/src/test/run-pass/constraint-prop.rs +++ b/src/test/run-pass/constraint-prop.rs @@ -1,7 +1,8 @@ fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 1u; let b: uint = 4u; - check (uint::le(a, b)); + check uint::le(a, b); let c = b; - log(debug, str::unsafe::slice_bytes_safe_range("kitties", a, c)); + log(debug, foo(a, c)); } diff --git a/src/test/run-pass/fn-constraint.rs b/src/test/run-pass/fn-constraint.rs index 20cca31e2a1..476a6b74a3d 100644 --- a/src/test/run-pass/fn-constraint.rs +++ b/src/test/run-pass/fn-constraint.rs @@ -1,6 +1,7 @@ fn main() unsafe { + fn foo(_a: uint, _b: uint) : uint::le(_a, _b) {} let a: uint = 1u; let b: uint = 4u; check (uint::le(a, b)); - log(debug, str::unsafe::slice_bytes_safe_range("kitties", a, b)); + log(debug, foo(a, b)); } diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index 2d38b9dd245..ed3da9bf53b 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -8,8 +8,8 @@ fn main() { let s: str = str::from_chars(chs); assert (str::len(s) == 10u); - assert (str::len_chars(s) == 4u); - assert (vec::len::(str::chars(s)) == 4u); + assert (str::char_len(s) == 4u); + assert (vec::len(str::chars(s)) == 4u); assert (str::eq(str::from_chars(str::chars(s)), s)); assert (str::char_at(s, 0u) == 'e'); assert (str::char_at(s, 1u) == 'é');