2010-06-23 21:03:09 -07:00
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
import rustrt::sbuf;
|
2011-05-17 20:41:41 +02:00
|
|
|
import vec::rustrt::vbuf;
|
2011-06-20 17:29:54 -07:00
|
|
|
import uint::le;
|
2011-05-31 18:24:06 -07:00
|
|
|
export sbuf;
|
|
|
|
export rustrt;
|
|
|
|
export eq;
|
|
|
|
export lteq;
|
|
|
|
export hash;
|
|
|
|
export is_utf8;
|
|
|
|
export is_ascii;
|
|
|
|
export alloc;
|
|
|
|
export byte_len;
|
|
|
|
export buf;
|
|
|
|
export bytes;
|
|
|
|
export from_bytes;
|
|
|
|
export unsafe_from_bytes;
|
|
|
|
export unsafe_from_byte;
|
|
|
|
export str_from_cstr;
|
|
|
|
export str_from_buf;
|
|
|
|
export push_utf8_bytes;
|
|
|
|
export from_char;
|
|
|
|
export from_chars;
|
|
|
|
export utf8_char_width;
|
|
|
|
export char_range_at;
|
|
|
|
export char_at;
|
|
|
|
export char_len;
|
|
|
|
export to_chars;
|
|
|
|
export push_char;
|
|
|
|
export pop_char;
|
|
|
|
export shift_char;
|
|
|
|
export unshift_char;
|
|
|
|
export refcount;
|
|
|
|
export index;
|
|
|
|
export rindex;
|
|
|
|
export find;
|
|
|
|
export starts_with;
|
|
|
|
export ends_with;
|
|
|
|
export substr;
|
|
|
|
export slice;
|
|
|
|
export shift_byte;
|
|
|
|
export pop_byte;
|
|
|
|
export push_byte;
|
|
|
|
export unshift_byte;
|
|
|
|
export split;
|
|
|
|
export concat;
|
|
|
|
export connect;
|
2011-07-05 16:02:02 -07:00
|
|
|
export connect_ivec;
|
2011-05-31 18:24:06 -07:00
|
|
|
export to_upper;
|
2011-06-20 17:29:54 -07:00
|
|
|
export safe_slice;
|
2011-07-10 02:05:52 -07:00
|
|
|
export bytes_ivec;
|
2011-07-10 12:47:51 -07:00
|
|
|
export unsafe_from_bytes_ivec;
|
2011-05-31 18:24:06 -07:00
|
|
|
|
2010-06-23 21:03:09 -07:00
|
|
|
native "rust" mod rustrt {
|
2010-09-22 15:44:13 -07:00
|
|
|
type sbuf;
|
|
|
|
fn str_buf(str s) -> sbuf;
|
2011-04-27 13:06:19 -07:00
|
|
|
fn str_vec(str s) -> vec[u8];
|
2010-09-22 15:44:13 -07:00
|
|
|
fn str_byte_len(str s) -> uint;
|
|
|
|
fn str_alloc(uint n_bytes) -> str;
|
2011-07-08 22:23:11 -07:00
|
|
|
fn str_from_ivec(&u8[mutable?] b) -> str;
|
2011-03-22 16:38:47 -07:00
|
|
|
fn str_from_vec(vec[mutable? u8] b) -> str;
|
2011-03-16 18:40:51 -07:00
|
|
|
fn str_from_cstr(sbuf cstr) -> str;
|
|
|
|
fn str_from_buf(sbuf buf, uint len) -> str;
|
2011-03-24 12:11:32 +01:00
|
|
|
fn str_push_byte(str s, uint byte) -> str;
|
2011-05-11 00:05:03 -04:00
|
|
|
fn str_slice(str s, uint begin, uint end) -> str;
|
2010-09-22 15:44:13 -07:00
|
|
|
fn refcount[T](str s) -> uint;
|
2010-06-23 21:03:09 -07:00
|
|
|
}
|
|
|
|
|
2010-08-24 09:59:02 -07:00
|
|
|
fn eq(&str a, &str b) -> bool {
|
2010-09-22 15:44:13 -07:00
|
|
|
let uint i = byte_len(a);
|
2011-06-15 11:19:50 -07:00
|
|
|
if (byte_len(b) != i) { ret false; }
|
2010-09-22 15:44:13 -07:00
|
|
|
while (i > 0u) {
|
|
|
|
i -= 1u;
|
|
|
|
auto cha = a.(i);
|
|
|
|
auto chb = b.(i);
|
2011-06-15 11:19:50 -07:00
|
|
|
if (cha != chb) { ret false; }
|
2010-08-20 12:57:38 -07:00
|
|
|
}
|
2010-09-22 15:44:13 -07:00
|
|
|
ret true;
|
2010-08-20 12:57:38 -07:00
|
|
|
}
|
|
|
|
|
2010-12-21 16:43:28 -08:00
|
|
|
fn lteq(&str a, &str b) -> bool {
|
|
|
|
let uint i = byte_len(a);
|
|
|
|
let uint j = byte_len(b);
|
|
|
|
let uint n = i;
|
2011-06-15 11:19:50 -07:00
|
|
|
if (j < n) { n = j; }
|
2010-12-21 16:43:28 -08:00
|
|
|
let uint x = 0u;
|
|
|
|
while (x < n) {
|
|
|
|
auto cha = a.(x);
|
|
|
|
auto chb = b.(x);
|
2011-06-15 11:19:50 -07:00
|
|
|
if (cha < chb) { ret true; } else if (cha > chb) { ret false; }
|
2010-12-21 16:43:28 -08:00
|
|
|
x += 1u;
|
|
|
|
}
|
|
|
|
ret i <= j;
|
|
|
|
}
|
|
|
|
|
2010-08-24 09:59:02 -07:00
|
|
|
fn hash(&str s) -> uint {
|
2010-09-22 15:44:13 -07:00
|
|
|
// djb hash.
|
|
|
|
// FIXME: replace with murmur.
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2010-09-22 15:44:13 -07:00
|
|
|
let uint u = 5381u;
|
2011-06-15 11:19:50 -07:00
|
|
|
for (u8 c in s) { u *= 33u; u += c as uint; }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret u;
|
2010-08-24 09:59:02 -07:00
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-03-24 12:11:32 +01:00
|
|
|
// UTF-8 tags and ranges
|
2011-06-15 11:19:50 -07:00
|
|
|
const u8 tag_cont_u8 = 128u8;
|
|
|
|
|
|
|
|
const uint tag_cont = 128u;
|
|
|
|
|
|
|
|
const uint max_one_b = 128u;
|
|
|
|
|
|
|
|
const uint tag_two_b = 192u;
|
|
|
|
|
|
|
|
const uint max_two_b = 2048u;
|
|
|
|
|
|
|
|
const uint tag_three_b = 224u;
|
|
|
|
|
|
|
|
const uint max_three_b = 65536u;
|
|
|
|
|
|
|
|
const uint tag_four_b = 240u;
|
|
|
|
|
|
|
|
const uint max_four_b = 2097152u;
|
|
|
|
|
|
|
|
const uint tag_five_b = 248u;
|
|
|
|
|
|
|
|
const uint max_five_b = 67108864u;
|
|
|
|
|
|
|
|
const uint tag_six_b = 252u;
|
2011-03-24 12:11:32 +01:00
|
|
|
|
2010-06-23 21:03:09 -07:00
|
|
|
fn is_utf8(vec[u8] v) -> bool {
|
2011-03-24 12:11:32 +01:00
|
|
|
auto i = 0u;
|
2011-05-17 20:41:41 +02:00
|
|
|
auto total = vec::len[u8](v);
|
2011-03-24 12:11:32 +01:00
|
|
|
while (i < total) {
|
|
|
|
auto chsize = utf8_char_width(v.(i));
|
2011-06-15 11:19:50 -07:00
|
|
|
if (chsize == 0u) { ret false; }
|
|
|
|
if (i + chsize > total) { ret false; }
|
2011-03-24 12:11:32 +01:00
|
|
|
i += 1u;
|
|
|
|
while (chsize > 1u) {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (v.(i) & 192u8 != tag_cont_u8) { ret false; }
|
2011-03-24 12:11:32 +01:00
|
|
|
i += 1u;
|
|
|
|
chsize -= 1u;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret true;
|
2010-06-23 21:03:09 -07:00
|
|
|
}
|
|
|
|
|
2010-08-04 23:09:25 -07:00
|
|
|
fn is_ascii(str s) -> bool {
|
2010-09-22 15:44:13 -07:00
|
|
|
let uint i = byte_len(s);
|
2011-06-15 11:19:50 -07:00
|
|
|
while (i > 0u) { i -= 1u; if (s.(i) & 128u8 != 0u8) { ret false; } }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret true;
|
2010-08-04 23:09:25 -07:00
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn alloc(uint n_bytes) -> str { ret rustrt::str_alloc(n_bytes); }
|
|
|
|
|
2010-06-23 21:03:09 -07:00
|
|
|
|
2010-07-25 00:36:03 -07:00
|
|
|
// Returns the number of bytes (a.k.a. UTF-8 code units) in s.
|
|
|
|
// Contrast with a function that would return the number of code
|
|
|
|
// points (char's), combining character sequences, words, etc. See
|
|
|
|
// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a
|
|
|
|
// way to implement those.
|
2011-06-15 11:19:50 -07:00
|
|
|
fn byte_len(str s) -> uint { ret rustrt::str_byte_len(s); }
|
2010-06-23 21:03:09 -07:00
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn buf(str s) -> sbuf { ret rustrt::str_buf(s); }
|
2010-08-04 23:09:25 -07:00
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn bytes(str s) -> vec[u8] { ret rustrt::str_vec(s); }
|
|
|
|
|
2011-07-10 02:05:52 -07:00
|
|
|
fn bytes_ivec(str s) -> u8[] {
|
|
|
|
auto sbuffer = buf(s);
|
|
|
|
auto ptr = unsafe::reinterpret_cast(sbuffer);
|
|
|
|
ret ivec::unsafe::from_buf(ptr, byte_len(s));
|
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn from_bytes(vec[u8] v) -> str { ret rustrt::str_from_vec(v); }
|
2010-08-11 16:06:45 -07:00
|
|
|
|
|
|
|
|
2011-03-10 15:56:51 +01:00
|
|
|
// FIXME temp thing
|
2011-03-22 16:38:47 -07:00
|
|
|
fn unsafe_from_bytes(vec[mutable? u8] v) -> str {
|
2011-05-12 17:24:54 +02:00
|
|
|
ret rustrt::str_from_vec(v);
|
2011-03-22 16:38:47 -07:00
|
|
|
}
|
|
|
|
|
2011-07-08 22:23:11 -07:00
|
|
|
fn unsafe_from_bytes_ivec(&u8[mutable?] v) -> str {
|
|
|
|
ret rustrt::str_from_ivec(v);
|
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn unsafe_from_byte(u8 u) -> str { ret rustrt::str_from_vec([u]); }
|
2011-03-16 14:58:02 -07:00
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn str_from_cstr(sbuf cstr) -> str { ret rustrt::str_from_cstr(cstr); }
|
2011-03-16 18:40:51 -07:00
|
|
|
|
2011-04-19 13:35:49 -07:00
|
|
|
fn str_from_buf(sbuf buf, uint len) -> str {
|
2011-05-12 17:24:54 +02:00
|
|
|
ret rustrt::str_from_buf(buf, len);
|
2011-03-16 18:40:51 -07:00
|
|
|
}
|
|
|
|
|
2011-03-24 12:11:32 +01:00
|
|
|
fn push_utf8_bytes(&mutable str s, char ch) {
|
|
|
|
auto code = ch as uint;
|
|
|
|
if (code < max_one_b) {
|
2011-05-12 17:24:54 +02:00
|
|
|
s = rustrt::str_push_byte(s, code);
|
2011-03-24 12:11:32 +01:00
|
|
|
} else if (code < max_two_b) {
|
2011-06-15 11:19:50 -07:00
|
|
|
s = rustrt::str_push_byte(s, code >> 6u & 31u | tag_two_b);
|
|
|
|
s = rustrt::str_push_byte(s, code & 63u | tag_cont);
|
2011-03-24 12:11:32 +01:00
|
|
|
} else if (code < max_three_b) {
|
2011-06-15 11:19:50 -07:00
|
|
|
s = rustrt::str_push_byte(s, code >> 12u & 15u | tag_three_b);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 6u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code & 63u | tag_cont);
|
2011-03-24 12:11:32 +01:00
|
|
|
} else if (code < max_four_b) {
|
2011-06-15 11:19:50 -07:00
|
|
|
s = rustrt::str_push_byte(s, code >> 18u & 7u | tag_four_b);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 12u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 6u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code & 63u | tag_cont);
|
2011-03-24 12:11:32 +01:00
|
|
|
} else if (code < max_five_b) {
|
2011-06-15 11:19:50 -07:00
|
|
|
s = rustrt::str_push_byte(s, code >> 24u & 3u | tag_five_b);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 18u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 12u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 6u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code & 63u | tag_cont);
|
2011-03-24 12:11:32 +01:00
|
|
|
} else {
|
2011-06-15 11:19:50 -07:00
|
|
|
s = rustrt::str_push_byte(s, code >> 30u & 1u | tag_six_b);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 24u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 18u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 12u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code >> 6u & 63u | tag_cont);
|
|
|
|
s = rustrt::str_push_byte(s, code & 63u | tag_cont);
|
2011-03-24 12:11:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn from_char(char ch) -> str {
|
|
|
|
auto buf = "";
|
|
|
|
push_utf8_bytes(buf, ch);
|
|
|
|
ret buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn from_chars(vec[char] chs) -> str {
|
|
|
|
auto buf = "";
|
2011-06-15 11:19:50 -07:00
|
|
|
for (char ch in chs) { push_utf8_bytes(buf, ch); }
|
2011-03-24 12:11:32 +01:00
|
|
|
ret buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn utf8_char_width(u8 b) -> uint {
|
|
|
|
let uint byte = b as uint;
|
2011-06-15 11:19:50 -07:00
|
|
|
if (byte < 128u) { ret 1u; }
|
|
|
|
if (byte < 192u) {
|
|
|
|
ret 0u; // Not a valid start byte
|
|
|
|
|
|
|
|
}
|
|
|
|
if (byte < 224u) { ret 2u; }
|
|
|
|
if (byte < 240u) { ret 3u; }
|
|
|
|
if (byte < 248u) { ret 4u; }
|
|
|
|
if (byte < 252u) { ret 5u; }
|
2011-03-24 12:11:32 +01:00
|
|
|
ret 6u;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn char_range_at(str s, uint i) -> tup(char, uint) {
|
|
|
|
auto b0 = s.(i);
|
|
|
|
auto w = utf8_char_width(b0);
|
2011-05-02 17:47:24 -07:00
|
|
|
assert (w != 0u);
|
2011-06-15 11:19:50 -07:00
|
|
|
if (w == 1u) { ret tup(b0 as char, i + 1u); }
|
2011-03-24 12:11:32 +01:00
|
|
|
auto val = 0u;
|
|
|
|
auto end = i + w;
|
|
|
|
i += 1u;
|
|
|
|
while (i < end) {
|
|
|
|
auto byte = s.(i);
|
2011-06-15 11:19:50 -07:00
|
|
|
assert (byte & 192u8 == tag_cont_u8);
|
2011-03-24 12:11:32 +01:00
|
|
|
val <<= 6u;
|
2011-06-15 11:19:50 -07:00
|
|
|
val += byte & 63u8 as uint;
|
2011-03-24 12:11:32 +01:00
|
|
|
i += 1u;
|
|
|
|
}
|
|
|
|
// Clunky way to get the right bits from the first byte. Uses two shifts,
|
|
|
|
// the first to clip off the marker bits at the left of the byte, and then
|
|
|
|
// a second (as uint) to get it to the right position.
|
2011-06-15 11:19:50 -07:00
|
|
|
|
|
|
|
val += (b0 << (w + 1u as u8) as uint) << (w - 1u) * 6u - w - 1u;
|
2011-03-24 12:11:32 +01:00
|
|
|
ret tup(val as char, i);
|
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn char_at(str s, uint i) -> char { ret char_range_at(s, i)._0; }
|
2011-03-24 12:11:32 +01:00
|
|
|
|
|
|
|
fn char_len(str s) -> uint {
|
|
|
|
auto i = 0u;
|
|
|
|
auto len = 0u;
|
|
|
|
auto total = byte_len(s);
|
|
|
|
while (i < total) {
|
|
|
|
auto chsize = utf8_char_width(s.(i));
|
2011-05-02 17:47:24 -07:00
|
|
|
assert (chsize > 0u);
|
2011-03-24 12:11:32 +01:00
|
|
|
len += 1u;
|
|
|
|
i += chsize;
|
|
|
|
}
|
2011-05-02 17:47:24 -07:00
|
|
|
assert (i == total);
|
2011-03-24 12:11:32 +01:00
|
|
|
ret len;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn to_chars(str s) -> vec[char] {
|
2011-05-16 18:21:22 -07:00
|
|
|
let vec[char] buf = [];
|
2011-03-24 12:11:32 +01:00
|
|
|
auto i = 0u;
|
|
|
|
auto len = byte_len(s);
|
|
|
|
while (i < len) {
|
|
|
|
auto cur = char_range_at(s, i);
|
2011-05-17 20:41:41 +02:00
|
|
|
vec::push[char](buf, cur._0);
|
2011-03-24 12:11:32 +01:00
|
|
|
i = cur._1;
|
|
|
|
}
|
|
|
|
ret buf;
|
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
fn push_char(&mutable str s, char ch) { s += from_char(ch); }
|
2011-03-24 12:11:32 +01:00
|
|
|
|
|
|
|
fn pop_char(&mutable str s) -> char {
|
|
|
|
auto end = byte_len(s);
|
2011-06-15 11:19:50 -07:00
|
|
|
while (end > 0u && s.(end - 1u) & 192u8 == tag_cont_u8) { end -= 1u; }
|
2011-05-02 17:47:24 -07:00
|
|
|
assert (end > 0u);
|
2011-03-24 12:11:32 +01:00
|
|
|
auto ch = char_at(s, end - 1u);
|
|
|
|
s = substr(s, 0u, end - 1u);
|
|
|
|
ret ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn shift_char(&mutable str s) -> char {
|
|
|
|
auto r = char_range_at(s, 0u);
|
|
|
|
s = substr(s, r._1, byte_len(s) - r._1);
|
|
|
|
ret r._0;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unshift_char(&mutable str s, char ch) {
|
2011-06-19 20:31:53 -07:00
|
|
|
s = from_char(ch) + s;
|
2011-03-24 12:11:32 +01:00
|
|
|
}
|
2011-03-16 14:58:02 -07:00
|
|
|
|
2010-08-11 16:06:45 -07:00
|
|
|
fn refcount(str s) -> uint {
|
2011-05-12 17:24:54 +02:00
|
|
|
auto r = rustrt::refcount[u8](s);
|
|
|
|
if (r == dbg::const_refcount) {
|
2010-11-09 14:15:07 -08:00
|
|
|
ret r;
|
|
|
|
} else {
|
Make moving of temporaries do the right thing, use it to optimize
This adds support for dropping cleanups for temporary values when they
are moved somewhere else. It then adds wraps most copy operations
(return, put in data structure, box, etc) in a way that will fall back
to a move when it is safe.
This saves a lot of taking/dropping, shaving over a megabyte off the
stage2/rustc binary size.
In some cases, most notably function returns, we could detect that the
returned value is a local variable, and can thus be safely moved even
though it is not a temporary. This will require putting some more
information in lvals.
I did not yet handle function arguments, since the logic for passing
them looked too convoluted to touch. I'll probably try that in the
near future, since it's bound to be a big win.
2011-07-07 13:36:12 +02:00
|
|
|
ret r - 1u;
|
2010-11-09 14:15:07 -08:00
|
|
|
}
|
2010-08-04 23:09:25 -07:00
|
|
|
}
|
2010-09-22 15:20:19 -07:00
|
|
|
|
|
|
|
|
|
|
|
// Standard bits from the world of string libraries.
|
|
|
|
fn index(str s, u8 c) -> int {
|
2010-09-22 15:44:13 -07:00
|
|
|
let int i = 0;
|
2011-06-15 11:19:50 -07:00
|
|
|
for (u8 k in s) { if (k == c) { ret i; } i += 1; }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret -1;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn rindex(str s, u8 c) -> int {
|
2011-05-17 20:41:41 +02:00
|
|
|
let int n = str::byte_len(s) as int;
|
2011-06-15 11:19:50 -07:00
|
|
|
while (n >= 0) { if (s.(n) == c) { ret n; } n -= 1; }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret n;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn find(str haystack, str needle) -> int {
|
2010-09-22 15:44:13 -07:00
|
|
|
let int haystack_len = byte_len(haystack) as int;
|
|
|
|
let int needle_len = byte_len(needle) as int;
|
2011-06-15 11:19:50 -07:00
|
|
|
if (needle_len == 0) { ret 0; }
|
|
|
|
fn match_at(&str haystack, &str needle, int i) -> bool {
|
2010-09-22 15:44:13 -07:00
|
|
|
let int j = i;
|
2011-06-15 11:19:50 -07:00
|
|
|
for (u8 c in needle) { if (haystack.(j) != c) { ret false; } j += 1; }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret true;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
2010-09-22 15:44:13 -07:00
|
|
|
let int i = 0;
|
|
|
|
while (i <= haystack_len - needle_len) {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (match_at(haystack, needle, i)) { ret i; }
|
2010-09-22 15:44:13 -07:00
|
|
|
i += 1;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
ret -1;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
2011-01-03 20:39:33 -08:00
|
|
|
fn starts_with(str haystack, str needle) -> bool {
|
|
|
|
let uint haystack_len = byte_len(haystack);
|
|
|
|
let uint needle_len = byte_len(needle);
|
2011-06-15 11:19:50 -07:00
|
|
|
if (needle_len == 0u) { ret true; }
|
|
|
|
if (needle_len > haystack_len) { ret false; }
|
2011-01-03 20:39:33 -08:00
|
|
|
ret eq(substr(haystack, 0u, needle_len), needle);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn ends_with(str haystack, str needle) -> bool {
|
|
|
|
let uint haystack_len = byte_len(haystack);
|
|
|
|
let uint needle_len = byte_len(needle);
|
2011-05-22 01:36:01 -04:00
|
|
|
ret if (needle_len == 0u) {
|
2011-06-15 11:19:50 -07:00
|
|
|
true
|
|
|
|
} else if (needle_len > haystack_len) {
|
|
|
|
false
|
|
|
|
} else {
|
|
|
|
eq(substr(haystack, haystack_len - needle_len, needle_len),
|
|
|
|
needle)
|
|
|
|
};
|
2011-01-03 20:39:33 -08:00
|
|
|
}
|
|
|
|
|
2010-09-22 15:20:19 -07:00
|
|
|
fn substr(str s, uint begin, uint len) -> str {
|
2011-04-26 17:38:14 +02:00
|
|
|
ret slice(s, begin, begin + len);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn slice(str s, uint begin, uint end) -> str {
|
2011-05-11 00:05:03 -04:00
|
|
|
// FIXME: Typestate precondition
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-05-11 00:05:03 -04:00
|
|
|
assert (begin <= end);
|
2011-05-17 20:41:41 +02:00
|
|
|
assert (end <= str::byte_len(s));
|
2011-05-12 17:24:54 +02:00
|
|
|
ret rustrt::str_slice(s, begin, end);
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
2011-06-20 17:29:54 -07:00
|
|
|
fn safe_slice(str s, uint begin, uint end) : le(begin, end) -> str {
|
|
|
|
assert (end <= str::byte_len(s)); // would need some magic to
|
|
|
|
// make this a precondition
|
|
|
|
ret rustrt::str_slice(s, begin, end);
|
|
|
|
}
|
|
|
|
|
2011-03-16 14:58:02 -07:00
|
|
|
fn shift_byte(&mutable str s) -> u8 {
|
|
|
|
auto len = byte_len(s);
|
2011-05-02 17:47:24 -07:00
|
|
|
assert (len > 0u);
|
2011-03-16 14:58:02 -07:00
|
|
|
auto b = s.(0);
|
|
|
|
s = substr(s, 1u, len - 1u);
|
|
|
|
ret b;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn pop_byte(&mutable str s) -> u8 {
|
|
|
|
auto len = byte_len(s);
|
2011-05-02 17:47:24 -07:00
|
|
|
assert (len > 0u);
|
2011-03-16 14:58:02 -07:00
|
|
|
auto b = s.(len - 1u);
|
|
|
|
s = substr(s, 0u, len - 1u);
|
|
|
|
ret b;
|
|
|
|
}
|
|
|
|
|
|
|
|
fn push_byte(&mutable str s, u8 b) {
|
2011-05-12 17:24:54 +02:00
|
|
|
s = rustrt::str_push_byte(s, b as uint);
|
2011-03-16 14:58:02 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn unshift_byte(&mutable str s, u8 b) {
|
2011-06-24 17:55:02 +02:00
|
|
|
auto rs = alloc(byte_len(s) + 1u);
|
|
|
|
rs += unsafe_from_byte(b);
|
|
|
|
rs += s;
|
|
|
|
s = rs;
|
2011-03-16 14:58:02 -07:00
|
|
|
}
|
|
|
|
|
2010-09-22 15:20:19 -07:00
|
|
|
fn split(str s, u8 sep) -> vec[str] {
|
2011-05-16 18:21:22 -07:00
|
|
|
let vec[str] v = [];
|
2010-09-22 15:44:13 -07:00
|
|
|
let str accum = "";
|
|
|
|
let bool ends_with_sep = false;
|
|
|
|
for (u8 c in s) {
|
|
|
|
if (c == sep) {
|
2011-05-16 18:21:22 -07:00
|
|
|
v += [accum];
|
2010-09-22 15:44:13 -07:00
|
|
|
accum = "";
|
|
|
|
ends_with_sep = true;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { accum += unsafe_from_byte(c); ends_with_sep = false; }
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
2011-06-15 11:19:50 -07:00
|
|
|
if (str::byte_len(accum) != 0u || ends_with_sep) { v += [accum]; }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret v;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn concat(vec[str] v) -> str {
|
2010-09-22 15:44:13 -07:00
|
|
|
let str s = "";
|
2011-06-15 11:19:50 -07:00
|
|
|
for (str ss in v) { s += ss; }
|
2010-09-22 15:44:13 -07:00
|
|
|
ret s;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn connect(vec[str] v, str sep) -> str {
|
2010-09-22 15:44:13 -07:00
|
|
|
let str s = "";
|
|
|
|
let bool first = true;
|
|
|
|
for (str ss in v) {
|
2011-06-15 11:19:50 -07:00
|
|
|
if (first) { first = false; } else { s += sep; }
|
2010-09-22 15:44:13 -07:00
|
|
|
s += ss;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
2010-09-22 15:44:13 -07:00
|
|
|
ret s;
|
2010-09-22 15:20:19 -07:00
|
|
|
}
|
|
|
|
|
2011-07-05 16:02:02 -07:00
|
|
|
fn connect_ivec(&str[] v, str sep) -> str {
|
|
|
|
let str s = "";
|
|
|
|
let bool first = true;
|
|
|
|
for (str ss in v) {
|
|
|
|
if (first) { first = false; } else { s += sep; }
|
|
|
|
s += ss;
|
|
|
|
}
|
|
|
|
ret s;
|
|
|
|
}
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
|
2011-04-13 21:36:32 -04:00
|
|
|
// FIXME: This only handles ASCII
|
|
|
|
fn to_upper(str s) -> str {
|
|
|
|
auto outstr = "";
|
|
|
|
auto ascii_a = 'a' as u8;
|
|
|
|
auto ascii_z = 'z' as u8;
|
|
|
|
auto diff = 32u8;
|
|
|
|
for (u8 byte in s) {
|
|
|
|
auto next;
|
|
|
|
if (ascii_a <= byte && byte <= ascii_z) {
|
|
|
|
next = byte - diff;
|
2011-06-15 11:19:50 -07:00
|
|
|
} else { next = byte; }
|
2011-04-13 21:36:32 -04:00
|
|
|
push_byte(outstr, next);
|
|
|
|
}
|
|
|
|
ret outstr;
|
|
|
|
}
|
2010-09-22 15:20:19 -07:00
|
|
|
// Local Variables:
|
|
|
|
// mode: rust;
|
|
|
|
// fill-column: 78;
|
|
|
|
// indent-tabs-mode: nil
|
2010-09-22 15:44:13 -07:00
|
|
|
// c-basic-offset: 4
|
2010-09-22 15:20:19 -07:00
|
|
|
// buffer-file-coding-system: utf-8-unix
|
2011-05-16 18:21:22 -07:00
|
|
|
// compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
|
2010-09-22 15:20:19 -07:00
|
|
|
// End:
|