2010-06-23 23:03:09 -05:00
|
|
|
import rustrt.sbuf;
|
|
|
|
|
2010-08-11 18:06:45 -05:00
|
|
|
import std._vec.rustrt.vbuf;
|
|
|
|
|
2010-06-23 23:03:09 -05:00
|
|
|
native "rust" mod rustrt {
|
2010-09-22 17:44:13 -05:00
|
|
|
type sbuf;
|
|
|
|
fn str_buf(str s) -> sbuf;
|
|
|
|
fn str_byte_len(str s) -> uint;
|
|
|
|
fn str_alloc(uint n_bytes) -> str;
|
|
|
|
fn str_from_vec(vec[u8] b) -> str;
|
|
|
|
fn refcount[T](str s) -> uint;
|
2010-06-23 23:03:09 -05:00
|
|
|
}
|
|
|
|
|
2010-08-24 11:59:02 -05:00
|
|
|
fn eq(&str a, &str b) -> bool {
|
2010-09-22 17:44:13 -05:00
|
|
|
let uint i = byte_len(a);
|
|
|
|
if (byte_len(b) != i) {
|
|
|
|
ret false;
|
|
|
|
}
|
|
|
|
while (i > 0u) {
|
|
|
|
i -= 1u;
|
|
|
|
auto cha = a.(i);
|
|
|
|
auto chb = b.(i);
|
|
|
|
if (cha != chb) {
|
|
|
|
ret false;
|
|
|
|
}
|
2010-08-20 14:57:38 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret true;
|
2010-08-20 14:57:38 -05:00
|
|
|
}
|
|
|
|
|
2010-08-24 11:59:02 -05:00
|
|
|
fn hash(&str s) -> uint {
|
2010-09-22 17:44:13 -05:00
|
|
|
// djb hash.
|
|
|
|
// FIXME: replace with murmur.
|
|
|
|
let uint u = 5381u;
|
|
|
|
for (u8 c in s) {
|
|
|
|
u *= 33u;
|
|
|
|
u += (c as uint);
|
|
|
|
}
|
|
|
|
ret u;
|
2010-08-24 11:59:02 -05:00
|
|
|
}
|
|
|
|
|
2010-06-23 23:03:09 -05:00
|
|
|
fn is_utf8(vec[u8] v) -> bool {
|
2010-09-22 17:44:13 -05:00
|
|
|
fail; // FIXME
|
2010-06-23 23:03:09 -05:00
|
|
|
}
|
|
|
|
|
2010-08-05 01:09:25 -05:00
|
|
|
fn is_ascii(str s) -> bool {
|
2010-09-22 17:44:13 -05:00
|
|
|
let uint i = byte_len(s);
|
|
|
|
while (i > 0u) {
|
|
|
|
i -= 1u;
|
|
|
|
if ((s.(i) & 0x80u8) != 0u8) {
|
|
|
|
ret false;
|
|
|
|
}
|
2010-08-05 01:09:25 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret true;
|
2010-08-05 01:09:25 -05:00
|
|
|
}
|
|
|
|
|
2010-07-05 16:42:12 -05:00
|
|
|
fn alloc(uint n_bytes) -> str {
|
2010-09-22 17:44:13 -05:00
|
|
|
ret rustrt.str_alloc(n_bytes);
|
2010-06-23 23:03:09 -05:00
|
|
|
}
|
|
|
|
|
2010-07-25 02:36:03 -05:00
|
|
|
// Returns the number of bytes (a.k.a. UTF-8 code units) in s.
|
|
|
|
// Contrast with a function that would return the number of code
|
|
|
|
// points (char's), combining character sequences, words, etc. See
|
|
|
|
// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a
|
|
|
|
// way to implement those.
|
|
|
|
fn byte_len(str s) -> uint {
|
2010-09-22 17:44:13 -05:00
|
|
|
ret rustrt.str_byte_len(s);
|
2010-06-23 23:03:09 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn buf(str s) -> sbuf {
|
2010-09-22 17:44:13 -05:00
|
|
|
ret rustrt.str_buf(s);
|
2010-06-23 23:03:09 -05:00
|
|
|
}
|
2010-08-05 01:09:25 -05:00
|
|
|
|
2010-08-11 18:06:45 -05:00
|
|
|
fn bytes(str s) -> vec[u8] {
|
2010-09-22 17:44:13 -05:00
|
|
|
/* FIXME (issue #58):
|
|
|
|
* Should be...
|
|
|
|
*
|
|
|
|
* fn ith(str s, uint i) -> u8 {
|
|
|
|
* ret s.(i);
|
|
|
|
* }
|
|
|
|
* ret _vec.init_fn[u8](bind ith(s, _), byte_len(s));
|
|
|
|
*
|
|
|
|
* but we do not correctly decrement refcount of s when
|
|
|
|
* the binding dies, so we have to do this manually.
|
|
|
|
*/
|
|
|
|
let uint n = _str.byte_len(s);
|
|
|
|
let vec[u8] v = _vec.alloc[u8](n);
|
|
|
|
let uint i = 0u;
|
|
|
|
while (i < n) {
|
|
|
|
v += vec(s.(i));
|
|
|
|
i += 1u;
|
|
|
|
}
|
|
|
|
ret v;
|
2010-08-11 18:06:45 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn from_bytes(vec[u8] v) : is_utf8(v) -> str {
|
2010-09-22 17:44:13 -05:00
|
|
|
ret rustrt.str_from_vec(v);
|
2010-08-11 18:06:45 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn refcount(str s) -> uint {
|
2010-11-09 16:15:07 -06:00
|
|
|
auto r = rustrt.refcount[u8](s);
|
|
|
|
if (r == dbg.const_refcount) {
|
|
|
|
ret r;
|
|
|
|
} else {
|
|
|
|
// -1 because calling this function incremented the refcount.
|
|
|
|
ret r - 1u;
|
|
|
|
}
|
2010-08-05 01:09:25 -05:00
|
|
|
}
|
2010-09-22 17:20:19 -05:00
|
|
|
|
|
|
|
|
|
|
|
// Standard bits from the world of string libraries.
|
|
|
|
|
|
|
|
fn index(str s, u8 c) -> int {
|
2010-09-22 17:44:13 -05:00
|
|
|
let int i = 0;
|
|
|
|
for (u8 k in s) {
|
|
|
|
if (k == c) {
|
|
|
|
ret i;
|
|
|
|
}
|
|
|
|
i += 1;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret -1;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn rindex(str s, u8 c) -> int {
|
2010-09-22 17:44:13 -05:00
|
|
|
let int n = _str.byte_len(s) as int;
|
|
|
|
while (n >= 0) {
|
|
|
|
if (s.(n) == c) {
|
|
|
|
ret n;
|
|
|
|
}
|
|
|
|
n -= 1;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret n;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn find(str haystack, str needle) -> int {
|
|
|
|
|
2010-09-22 17:44:13 -05:00
|
|
|
let int haystack_len = byte_len(haystack) as int;
|
|
|
|
let int needle_len = byte_len(needle) as int;
|
2010-09-22 17:20:19 -05:00
|
|
|
|
2010-09-22 17:44:13 -05:00
|
|
|
if (needle_len == 0) {
|
|
|
|
ret 0;
|
|
|
|
}
|
2010-09-22 17:20:19 -05:00
|
|
|
|
2010-09-22 17:44:13 -05:00
|
|
|
fn match_at(&str haystack,
|
|
|
|
&str needle,
|
|
|
|
int i) -> bool {
|
|
|
|
let int j = i;
|
|
|
|
for (u8 c in needle) {
|
|
|
|
if (haystack.(j) != c) {
|
|
|
|
ret false;
|
|
|
|
}
|
|
|
|
j += 1;
|
|
|
|
}
|
|
|
|
ret true;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
2010-09-22 17:44:13 -05:00
|
|
|
let int i = 0;
|
|
|
|
while (i <= haystack_len - needle_len) {
|
|
|
|
if (match_at(haystack, needle, i)) {
|
|
|
|
ret i;
|
|
|
|
}
|
|
|
|
i += 1;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret -1;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn substr(str s, uint begin, uint len) -> str {
|
2010-09-22 17:44:13 -05:00
|
|
|
let str accum = "";
|
|
|
|
let uint i = begin;
|
|
|
|
while (i < begin+len) {
|
|
|
|
accum += s.(i);
|
|
|
|
i += 1u;
|
|
|
|
}
|
|
|
|
ret accum;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn split(str s, u8 sep) -> vec[str] {
|
2010-09-22 17:44:13 -05:00
|
|
|
let vec[str] v = vec();
|
|
|
|
let str accum = "";
|
|
|
|
let bool ends_with_sep = false;
|
|
|
|
for (u8 c in s) {
|
|
|
|
if (c == sep) {
|
|
|
|
v += accum;
|
|
|
|
accum = "";
|
|
|
|
ends_with_sep = true;
|
|
|
|
} else {
|
|
|
|
accum += c;
|
|
|
|
ends_with_sep = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (_str.byte_len(accum) != 0u ||
|
|
|
|
ends_with_sep) {
|
|
|
|
v += accum;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret v;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn concat(vec[str] v) -> str {
|
2010-09-22 17:44:13 -05:00
|
|
|
let str s = "";
|
|
|
|
for (str ss in v) {
|
|
|
|
s += ss;
|
|
|
|
}
|
|
|
|
ret s;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn connect(vec[str] v, str sep) -> str {
|
2010-09-22 17:44:13 -05:00
|
|
|
let str s = "";
|
|
|
|
let bool first = true;
|
|
|
|
for (str ss in v) {
|
|
|
|
if (first) {
|
|
|
|
first = false;
|
|
|
|
} else {
|
|
|
|
s += sep;
|
|
|
|
}
|
|
|
|
s += ss;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
2010-09-22 17:44:13 -05:00
|
|
|
ret s;
|
2010-09-22 17:20:19 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Local Variables:
|
|
|
|
// mode: rust;
|
|
|
|
// fill-column: 78;
|
|
|
|
// indent-tabs-mode: nil
|
2010-09-22 17:44:13 -05:00
|
|
|
// c-basic-offset: 4
|
2010-09-22 17:20:19 -05:00
|
|
|
// buffer-file-coding-system: utf-8-unix
|
|
|
|
// compile-command: "make -k -C .. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
|
|
|
|
// End:
|