From 3f6e8ffe64b57b0eaba6812208e94500422ca40c Mon Sep 17 00:00:00 2001 From: Jeffrey Yasskin Date: Sun, 25 Jul 2010 00:36:03 -0700 Subject: [PATCH] Implement _str.len() to return the number of bytes, rename it to byte_len(), and add a test. --- src/Makefile | 1 + src/lib/_str.rs | 15 ++++++++++----- src/rt/rust_builtin.cpp | 6 ++++++ src/test/run-pass/str-lib.rs | 16 ++++++++++++++++ 4 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 src/test/run-pass/str-lib.rs diff --git a/src/Makefile b/src/Makefile index 792eeec0a3b..1d79a46700a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -506,6 +506,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \ str-append.rs \ str-concat.rs \ str-idx.rs \ + str-lib.rs \ tag.rs \ tail-cps.rs \ tail-direct.rs \ diff --git a/src/lib/_str.rs b/src/lib/_str.rs index 7d1a2dbdcd7..a607c7d5df5 100644 --- a/src/lib/_str.rs +++ b/src/lib/_str.rs @@ -3,7 +3,7 @@ import rustrt.sbuf; native "rust" mod rustrt { type sbuf; fn str_buf(str s) -> sbuf; - fn str_len(str s) -> uint; + fn str_byte_len(str s) -> uint; fn str_alloc(uint n_bytes) -> str; fn refcount[T](str s) -> uint; } @@ -13,7 +13,7 @@ fn is_utf8(vec[u8] v) -> bool { } fn is_ascii(str s) -> bool { - let uint i = len(s); + let uint i = byte_len(s); while (i > 0u) { i -= 1u; if ((s.(i) & 0x80u8) != 0u8) { @@ -27,8 +27,13 @@ fn alloc(uint n_bytes) -> str { ret rustrt.str_alloc(n_bytes); } -fn len(str s) -> uint { - ret rustrt.str_len(s); +// Returns the number of bytes (a.k.a. UTF-8 code units) in s. +// Contrast with a function that would return the number of code +// points (char's), combining character sequences, words, etc. See +// http://icu-project.org/apiref/icu4c/classBreakIterator.html for a +// way to implement those. +fn byte_len(str s) -> uint { + ret rustrt.str_byte_len(s); } fn buf(str s) -> sbuf { @@ -39,5 +44,5 @@ fn bytes(&str s) -> vec[u8] { fn ith(str s, uint i) -> u8 { ret s.(i); } - ret _vec.init_fn[u8](bind ith(s, _), _str.len(s)); + ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s)); } diff --git a/src/rt/rust_builtin.cpp b/src/rt/rust_builtin.cpp index 657109c6df2..d8d9b8d6eb4 100644 --- a/src/rt/rust_builtin.cpp +++ b/src/rt/rust_builtin.cpp @@ -115,6 +115,12 @@ str_buf(rust_task *task, rust_str *s) return (char const *)&s->data[0]; } +extern "C" CDECL size_t +str_byte_len(rust_task *task, rust_str *s) +{ + return s->fill - 1; // -1 for the '\0' terminator. +} + extern "C" CDECL void * vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset) { diff --git a/src/test/run-pass/str-lib.rs b/src/test/run-pass/str-lib.rs new file mode 100644 index 00000000000..585f9b8de5f --- /dev/null +++ b/src/test/run-pass/str-lib.rs @@ -0,0 +1,16 @@ +use std; +import std._str; + +fn test_bytes_len() { + check (_str.byte_len("") == 0u); + check (_str.byte_len("hello world") == 11u); + check (_str.byte_len("\x63") == 1u); + check (_str.byte_len("\xa2") == 2u); + check (_str.byte_len("\u03c0") == 2u); + check (_str.byte_len("\u2620") == 3u); + check (_str.byte_len("\U0001d11e") == 4u); +} + +fn main() { + test_bytes_len(); +}