rust/src/libstd/rope.rs

1381 lines
39 KiB
Rust
Raw Normal View History

/*!
* High-level text containers.
*
* Ropes are a high-level representation of text that offers
* much better performance than strings for common operations,
* and generally reduce memory allocations and copies, while only
* entailing a small degradation of less common operations.
*
* More precisely, where a string is represented as a memory buffer,
* a rope is a tree structure whose leaves are slices of immutable
* strings. Therefore, concatenation, appending, prepending, substrings,
* etc. are operations that require only trivial tree manipulation,
* generally without having to copy memory. In addition, the tree
* structure of ropes makes them suitable as a form of index to speed-up
* access to Unicode characters by index in long chunks of text.
*
* The following operations are algorithmically faster in ropes:
*
* * extracting a subrope is logarithmic (linear in strings);
* * appending/prepending is near-constant time (linear in strings);
* * concatenation is near-constant time (linear in strings);
* * char length is constant-time (linear in strings);
* * access to a character by index is logarithmic (linear in strings);
*/
2011-11-04 12:38:42 -05:00
/// The type of ropes.
2011-11-04 12:38:42 -05:00
type rope = node::root;
/*
Section: Creating a rope
*/
/// Create an empty rope
2011-11-04 12:38:42 -05:00
fn empty() -> rope {
2012-08-01 19:30:05 -05:00
return node::empty;
2011-11-04 12:38:42 -05:00
}
/**
* Adopt a string as a rope.
*
* # Arguments
*
* * str - A valid string.
*
* # Return value
*
* A rope representing the same string as `str`. Depending of the length
* of `str`, this rope may be empty, flat or complex.
*
* # Performance notes
*
* * this operation does not copy the string;
* * the function runs in linear time.
*/
fn of_str(str: @~str) -> rope {
2012-08-01 19:30:05 -05:00
return of_substr(str, 0u, str::len(*str));
2011-11-04 12:38:42 -05:00
}
/**
* As `of_str` but for a substring.
*
* # Arguments
* * byte_offset - The offset of `str` at which the rope starts.
* * byte_len - The number of bytes of `str` to use.
*
* # Return value
*
* A rope representing the same string as `str::substr(str, byte_offset,
* byte_len)`. Depending on `byte_len`, this rope may be empty, flat or
* complex.
*
* # Performance note
*
* This operation does not copy the substring.
*
* # Safety notes
*
* * this function does _not_ check the validity of the substring;
* * this function fails if `byte_offset` or `byte_len` do not match `str`.
*/
fn of_substr(str: @~str, byte_offset: uint, byte_len: uint) -> rope {
2012-08-01 19:30:05 -05:00
if byte_len == 0u { return node::empty; }
2012-02-23 03:44:04 -06:00
if byte_offset + byte_len > str::len(*str) { fail; }
2012-08-01 19:30:05 -05:00
return node::content(node::of_substr(str, byte_offset, byte_len));
2011-11-04 12:38:42 -05:00
}
/*
Section: Adding things to a rope
*/
/**
* Add one char to the end of the rope
*
* # Performance note
*
* * this function executes in near-constant time
*/
2011-11-04 12:38:42 -05:00
fn append_char(rope: rope, char: char) -> rope {
2012-08-01 19:30:05 -05:00
return append_str(rope, @str::from_chars(~[char]));
2011-11-04 12:38:42 -05:00
}
/**
* Add one string to the end of the rope
*
* # Performance note
*
* * this function executes in near-linear time
*/
fn append_str(rope: rope, str: @~str) -> rope {
2012-08-01 19:30:05 -05:00
return append_rope(rope, of_str(str))
2011-11-04 12:38:42 -05:00
}
/**
* Add one char to the beginning of the rope
*
* # Performance note
* * this function executes in near-constant time
*/
2011-11-04 12:38:42 -05:00
fn prepend_char(rope: rope, char: char) -> rope {
2012-08-01 19:30:05 -05:00
return prepend_str(rope, @str::from_chars(~[char]));
2011-11-04 12:38:42 -05:00
}
/**
* Add one string to the beginning of the rope
*
* # Performance note
* * this function executes in near-linear time
*/
fn prepend_str(rope: rope, str: @~str) -> rope {
2012-08-01 19:30:05 -05:00
return append_rope(of_str(str), rope)
2011-11-04 12:38:42 -05:00
}
/// Concatenate two ropes
2011-11-04 12:38:42 -05:00
fn append_rope(left: rope, right: rope) -> rope {
alt(left) {
2012-08-03 21:59:04 -05:00
node::empty => return right,
node::content(left_content) => {
2011-11-04 12:38:42 -05:00
alt(right) {
2012-08-03 21:59:04 -05:00
node::empty => return left,
node::content(right_content) => {
2012-08-01 19:30:05 -05:00
return node::content(node::concat2(left_content, right_content));
2012-08-03 21:59:04 -05:00
}
2011-11-04 12:38:42 -05:00
}
}
}
}
/**
* Concatenate many ropes.
*
* If the ropes are balanced initially and have the same height, the resulting
* rope remains balanced. However, this function does not take any further
* measure to ensure that the result is balanced.
*/
fn concat(v: ~[rope]) -> rope {
2012-03-26 20:35:18 -05:00
//Copy `v` into a mut vector
let mut len = vec::len(v);
2012-08-01 19:30:05 -05:00
if len == 0u { return node::empty; }
2012-03-12 17:52:30 -05:00
let ropes = vec::to_mut(vec::from_elem(len, v[0]));
2012-06-30 18:19:07 -05:00
for uint::range(1u, len) |i| {
ropes[i] = v[i];
}
//Merge progresively
while len > 1u {
2012-06-30 18:19:07 -05:00
for uint::range(0u, len/2u) |i| {
ropes[i] = append_rope(ropes[2u*i], ropes[2u*i+1u]);
}
if len%2u != 0u {
ropes[len/2u] = ropes[len - 1u];
len = len/2u + 1u;
} else {
len = len/2u;
}
}
//Return final rope
2012-08-01 19:30:05 -05:00
return ropes[0];
2011-11-04 12:38:42 -05:00
}
/*
Section: Keeping ropes healthy
*/
/**
* Balance a rope.
*
* # Return value
*
* A copy of the rope in which small nodes have been grouped in memory,
* and with a reduced height.
*
* If you perform numerous rope concatenations, it is generally a good idea
* to rebalance your rope at some point, before using it for other purposes.
*/
2011-11-04 12:38:42 -05:00
fn bal(rope:rope) -> rope {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return rope,
node::content(x) => alt(node::bal(x)) {
option::none => rope,
option::some(y) => node::content(y)
2011-11-04 12:38:42 -05:00
}
}
}
/*
Section: Transforming ropes
*/
/**
* Extract a subrope from a rope.
*
* # Performance note
*
* * on a balanced rope, this operation takes algorithmic time;
* * this operation does not involve any copying
*
* # Safety note
*
* * this function fails if char_offset/char_len do not represent
* valid positions in rope
*/
2011-11-04 12:38:42 -05:00
fn sub_chars(rope: rope, char_offset: uint, char_len: uint) -> rope {
2012-08-01 19:30:05 -05:00
if char_len == 0u { return node::empty; }
2011-11-04 12:38:42 -05:00
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => fail,
node::content(node) => if char_len > node::char_len(node) {
fail
} else {
return node::content(node::sub_chars(node, char_offset, char_len))
2011-11-04 12:38:42 -05:00
}
}
}
/**
* Extract a subrope from a rope.
*
* # Performance note
*
* * on a balanced rope, this operation takes algorithmic time;
* * this operation does not involve any copying
*
* # Safety note
*
* * this function fails if byte_offset/byte_len do not represent
* valid positions in rope
*/
2011-11-04 12:38:42 -05:00
fn sub_bytes(rope: rope, byte_offset: uint, byte_len: uint) -> rope {
2012-08-01 19:30:05 -05:00
if byte_len == 0u { return node::empty; }
2011-11-04 12:38:42 -05:00
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => fail,
node::content(node) =>if byte_len > node::byte_len(node) {
fail
} else {
return node::content(node::sub_bytes(node, byte_offset, byte_len))
2011-11-04 12:38:42 -05:00
}
}
}
/*
Section: Comparing ropes
*/
/**
* Compare two ropes by Unicode lexicographical order.
*
* This function compares only the contents of the rope, not their structure.
*
* # Return value
*
* A negative value if `left < right`, 0 if eq(left, right) or a positive
* value if `left > right`
*/
2011-11-04 12:38:42 -05:00
fn cmp(left: rope, right: rope) -> int {
alt((left, right)) {
2012-08-03 21:59:04 -05:00
(node::empty, node::empty) => return 0,
(node::empty, _) => return -1,
(_, node::empty) => return 1,
(node::content(a), node::content(b)) => {
2012-08-01 19:30:05 -05:00
return node::cmp(a, b);
2011-11-04 12:38:42 -05:00
}
}
}
/**
* Returns `true` if both ropes have the same content (regardless of
* their structure), `false` otherwise
*/
2011-11-04 12:38:42 -05:00
fn eq(left: rope, right: rope) -> bool {
2012-08-01 19:30:05 -05:00
return cmp(left, right) == 0;
2011-11-04 12:38:42 -05:00
}
/**
* # Arguments
*
* * left - an arbitrary rope
* * right - an arbitrary rope
*
* # Return value
*
* `true` if `left <= right` in lexicographical order (regardless of their
* structure), `false` otherwise
*/
fn le(left: rope, right: rope) -> bool {
2012-08-01 19:30:05 -05:00
return cmp(left, right) <= 0;
2011-11-04 12:38:42 -05:00
}
/**
* # Arguments
*
* * left - an arbitrary rope
* * right - an arbitrary rope
*
* # Return value
*
* `true` if `left < right` in lexicographical order (regardless of their
* structure), `false` otherwise
*/
2011-11-04 12:38:42 -05:00
fn lt(left: rope, right: rope) -> bool {
2012-08-01 19:30:05 -05:00
return cmp(left, right) < 0;
2011-11-04 12:38:42 -05:00
}
/**
* # Arguments
*
* * left - an arbitrary rope
* * right - an arbitrary rope
*
* # Return value
*
* `true` if `left >= right` in lexicographical order (regardless of their
* structure), `false` otherwise
*/
fn ge(left: rope, right: rope) -> bool {
2012-08-01 19:30:05 -05:00
return cmp(left, right) >= 0;
2011-11-04 12:38:42 -05:00
}
/**
* # Arguments
*
* * left - an arbitrary rope
* * right - an arbitrary rope
*
* # Return value
*
* `true` if `left > right` in lexicographical order (regardless of their
* structure), `false` otherwise
*/
2011-11-04 12:38:42 -05:00
fn gt(left: rope, right: rope) -> bool {
2012-08-01 19:30:05 -05:00
return cmp(left, right) > 0;
2011-11-04 12:38:42 -05:00
}
/*
Section: Iterating
*/
/**
* Loop through a rope, char by char
*
* While other mechanisms are available, this is generally the best manner
* of looping through the contents of a rope char by char. If you prefer a
* loop that iterates through the contents string by string (e.g. to print
* the contents of the rope or output it to the system), however,
* you should rather use `traverse_components`.
*
* # Arguments
*
* * rope - A rope to traverse. It may be empty.
* * it - A block to execute with each consecutive character of the rope.
* Return `true` to continue, `false` to stop.
*
* # Return value
*
* `true` If execution proceeded correctly, `false` if it was interrupted,
* that is if `it` returned `false` at any point.
*/
2012-01-23 16:59:00 -06:00
fn loop_chars(rope: rope, it: fn(char) -> bool) -> bool {
2011-11-04 12:38:42 -05:00
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return true,
node::content(x) => return node::loop_chars(x, it)
2011-11-04 12:38:42 -05:00
}
}
/**
* Loop through a rope, char by char, until the end.
*
* # Arguments
* * rope - A rope to traverse. It may be empty
* * it - A block to execute with each consecutive character of the rope.
*/
2012-01-23 16:59:00 -06:00
fn iter_chars(rope: rope, it: fn(char)) {
2012-06-30 18:19:07 -05:00
do loop_chars(rope) |x| {
2011-11-04 12:38:42 -05:00
it(x);
true
};
2011-11-04 12:38:42 -05:00
}
/**
* Loop through a rope, string by string
*
* While other mechanisms are available, this is generally the best manner of
* looping through the contents of a rope string by string, which may be
* useful e.g. to print strings as you see them (without having to copy their
* contents into a new string), to send them to then network, to write them to
* a file, etc.. If you prefer a loop that iterates through the contents
* char by char (e.g. to search for a char), however, you should rather
* use `traverse`.
*
* # Arguments
*
* * rope - A rope to traverse. It may be empty
* * it - A block to execute with each consecutive string component of the
* rope. Return `true` to continue, `false` to stop
*
* # Return value
*
* `true` If execution proceeded correctly, `false` if it was interrupted,
* that is if `it` returned `false` at any point.
*/
2012-01-23 16:59:00 -06:00
fn loop_leaves(rope: rope, it: fn(node::leaf) -> bool) -> bool{
2011-11-04 12:38:42 -05:00
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return true,
node::content(x) => return node::loop_leaves(x, it)
2011-11-04 12:38:42 -05:00
}
}
mod iterator {
mod leaf {
fn start(rope: rope) -> node::leaf_iterator::t {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return node::leaf_iterator::empty(),
node::content(x) => return node::leaf_iterator::start(x)
2011-11-04 12:38:42 -05:00
}
}
fn next(it: node::leaf_iterator::t) -> option<node::leaf> {
2012-08-01 19:30:05 -05:00
return node::leaf_iterator::next(it);
2011-11-04 12:38:42 -05:00
}
}
mod char {
fn start(rope: rope) -> node::char_iterator::t {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return node::char_iterator::empty(),
node::content(x) => return node::char_iterator::start(x)
2011-11-04 12:38:42 -05:00
}
}
fn next(it: node::char_iterator::t) -> option<char> {
2012-08-01 19:30:05 -05:00
return node::char_iterator::next(it)
2011-11-04 12:38:42 -05:00
}
}
}
/*
Section: Rope properties
*/
/**
* Returns the height of the rope.
*
* The height of the rope is a bound on the number of operations which
* must be performed during a character access before finding the leaf in
* which a character is contained.
*
* # Performance note
*
* Constant time.
*/
2011-11-04 12:38:42 -05:00
fn height(rope: rope) -> uint {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return 0u,
node::content(x) => return node::height(x)
2011-11-04 12:38:42 -05:00
}
}
/**
* The number of character in the rope
*
* # Performance note
*
* Constant time.
*/
2011-11-04 12:38:42 -05:00
pure fn char_len(rope: rope) -> uint {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return 0u,
node::content(x) => return node::char_len(x)
2011-11-04 12:38:42 -05:00
}
}
/**
* The number of bytes in the rope
*
* # Performance note
*
* Constant time.
*/
2011-11-04 12:38:42 -05:00
pure fn byte_len(rope: rope) -> uint {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => return 0u,
node::content(x) => return node::byte_len(x)
2011-11-04 12:38:42 -05:00
}
}
/**
* The character at position `pos`
*
* # Arguments
*
* * pos - A position in the rope
*
* # Safety notes
*
* The function will fail if `pos` is not a valid position in the rope.
*
* # Performance note
*
* This function executes in a time proportional to the height of the
* rope + the (bounded) length of the largest leaf.
*/
2011-11-04 12:38:42 -05:00
fn char_at(rope: rope, pos: uint) -> char {
alt(rope) {
2012-08-03 21:59:04 -05:00
node::empty => fail,
node::content(x) => return node::char_at(x, pos)
2011-11-04 12:38:42 -05:00
}
}
/*
Section: Implementation
*/
mod node {
/// Implementation of type `rope`
2012-01-19 17:20:57 -06:00
enum root {
/// An empty rope
2012-01-19 19:55:34 -06:00
empty,
/// A non-empty rope
2012-01-19 19:55:34 -06:00
content(@node),
2011-11-04 12:38:42 -05:00
}
/**
* A text component in a rope.
*
* This is actually a slice in a rope, so as to ensure maximal sharing.
*
* # Fields
*
* * byte_offset = The number of bytes skippen in `content`
* * byte_len - The number of bytes of `content` to use
* * char_len - The number of chars in the leaf.
* * content - Contents of the leaf.
*
* Note that we can have `char_len < str::char_len(content)`, if
* this leaf is only a subset of the string. Also note that the
* string can be shared between several ropes, e.g. for indexing
* purposes.
*/
2012-03-07 20:17:30 -06:00
type leaf = {
byte_offset: uint,
byte_len: uint,
char_len: uint,
content: @~str
2012-03-07 20:17:30 -06:00
};
2011-11-04 12:38:42 -05:00
/**
* A node obtained from the concatenation of two other nodes
*
* # Fields
*
* * left - The node containing the beginning of the text.
* * right - The node containing the end of the text.
* * char_len - The number of chars contained in all leaves of this node.
* * byte_len - The number of bytes in the subrope.
*
* Used to pre-allocate the correct amount of storage for
* serialization.
*
* * height - Height of the subrope.
*
* Used for rebalancing and to allocate stacks for traversals.
*/
2011-11-04 12:38:42 -05:00
type concat = {
//FIXME (#2744): Perhaps a `vec` instead of `left`/`right`
left: @node,
2011-11-04 12:38:42 -05:00
right: @node,
char_len: uint,
byte_len: uint,
height: uint
};
2012-01-19 17:20:57 -06:00
enum node {
/// A leaf consisting in a `str`
2012-01-19 19:55:34 -06:00
leaf(leaf),
/// The concatenation of two ropes
2012-01-19 19:55:34 -06:00
concat(concat),
2011-11-04 12:38:42 -05:00
}
/**
* The maximal number of chars that _should_ be permitted in a single node
*
* This is not a strict value
*/
2011-11-04 12:38:42 -05:00
const hint_max_leaf_char_len: uint = 256u;
/**
* The maximal height that _should_ be permitted in a tree.
*
* This is not a strict value
*/
2011-11-04 12:38:42 -05:00
const hint_max_node_height: uint = 16u;
/**
* Adopt a string as a node.
*
* If the string is longer than `max_leaf_char_len`, it is
* logically split between as many leaves as necessary. Regardless,
* the string itself is not copied.
*
* Performance note: The complexity of this function is linear in
* the length of `str`.
*/
fn of_str(str: @~str) -> @node {
2012-08-01 19:30:05 -05:00
return of_substr(str, 0u, str::len(*str));
2011-11-04 12:38:42 -05:00
}
/**
* Adopt a slice of a string as a node.
*
* If the slice is longer than `max_leaf_char_len`, it is logically split
* between as many leaves as necessary. Regardless, the string itself
* is not copied
*
* # Arguments
*
* * byte_start - The byte offset where the slice of `str` starts.
* * byte_len - The number of bytes from `str` to use.
*
* # Safety note
*
* Behavior is undefined if `byte_start` or `byte_len` do not represent
* valid positions in `str`
*/
fn of_substr(str: @~str, byte_start: uint, byte_len: uint) -> @node {
2012-08-01 19:30:05 -05:00
return of_substr_unsafer(str, byte_start, byte_len,
str::count_chars(*str, byte_start, byte_len));
}
/**
* Adopt a slice of a string as a node.
*
* If the slice is longer than `max_leaf_char_len`, it is logically split
* between as many leaves as necessary. Regardless, the string itself
* is not copied
*
* # Arguments
*
* * byte_start - The byte offset where the slice of `str` starts.
* * byte_len - The number of bytes from `str` to use.
* * char_len - The number of chars in `str` in the interval
* [byte_start, byte_start+byte_len)
*
* # Safety notes
*
* * Behavior is undefined if `byte_start` or `byte_len` do not represent
* valid positions in `str`
* * Behavior is undefined if `char_len` does not accurately represent the
* number of chars between byte_start and byte_start+byte_len
*/
fn of_substr_unsafer(str: @~str, byte_start: uint, byte_len: uint,
char_len: uint) -> @node {
2012-02-23 03:44:04 -06:00
assert(byte_start + byte_len <= str::len(*str));
2011-11-04 12:38:42 -05:00
let candidate = @leaf({
byte_offset: byte_start,
byte_len: byte_len,
char_len: char_len,
content: str});
if char_len <= hint_max_leaf_char_len {
2012-08-01 19:30:05 -05:00
return candidate;
2011-11-04 12:38:42 -05:00
} else {
//Firstly, split `str` in slices of hint_max_leaf_char_len
let mut leaves = uint::div_ceil(char_len, hint_max_leaf_char_len);
2011-11-04 12:38:42 -05:00
//Number of leaves
2012-03-12 17:52:30 -05:00
let nodes = vec::to_mut(vec::from_elem(leaves, candidate));
2011-11-04 12:38:42 -05:00
let mut i = 0u;
let mut offset = byte_start;
2011-11-04 12:38:42 -05:00
let first_leaf_char_len =
if char_len%hint_max_leaf_char_len == 0u {
hint_max_leaf_char_len
} else {
char_len%hint_max_leaf_char_len
};
while i < leaves {
let chunk_char_len: uint =
if i == 0u { first_leaf_char_len }
else { hint_max_leaf_char_len };
let chunk_byte_len =
str::count_bytes(*str, offset, chunk_char_len);
2011-11-04 12:38:42 -05:00
nodes[i] = @leaf({
byte_offset: offset,
byte_len: chunk_byte_len,
char_len: chunk_char_len,
content: str
});
offset += chunk_byte_len;
i += 1u;
}
//Then, build a tree from these slices by collapsing them
while leaves > 1u {
i = 0u;
while i < leaves - 1u {//Concat nodes 0 with 1, 2 with 3 etc.
nodes[i/2u] = concat2(nodes[i], nodes[i + 1u]);
i += 2u;
}
if i == leaves - 1u {
//And don't forget the last node if it is in even position
nodes[i/2u] = nodes[i];
}
leaves = uint::div_ceil(leaves, 2u);
}
2012-08-01 19:30:05 -05:00
return nodes[0u];
2011-11-04 12:38:42 -05:00
}
}
pure fn byte_len(node: @node) -> uint {
//FIXME (#2744): Could we do this without the pattern-matching?
alt(*node) {
2012-08-03 21:59:04 -05:00
leaf(y) => return y.byte_len,
concat(y) => return y.byte_len
2011-11-04 12:38:42 -05:00
}
}
pure fn char_len(node: @node) -> uint {
alt(*node) {
2012-08-03 21:59:04 -05:00
leaf(y) => return y.char_len,
concat(y) => return y.char_len
2011-11-04 12:38:42 -05:00
}
}
/**
* Concatenate a forest of nodes into one tree.
*
* # Arguments
*
* * forest - The forest. This vector is progressively rewritten during
* execution and should be discarded as meaningless afterwards.
*/
fn tree_from_forest_destructive(forest: ~[mut @node]) -> @node {
2012-05-24 15:35:57 -05:00
let mut i;
let mut len = vec::len(forest);
2011-11-04 12:38:42 -05:00
while len > 1u {
i = 0u;
while i < len - 1u {//Concat nodes 0 with 1, 2 with 3 etc.
let mut left = forest[i];
let mut right = forest[i+1u];
2011-11-04 12:38:42 -05:00
let left_len = char_len(left);
let right_len= char_len(right);
let mut left_height= height(left);
let mut right_height=height(right);
if left_len + right_len > hint_max_leaf_char_len {
2011-11-04 12:38:42 -05:00
if left_len <= hint_max_leaf_char_len {
left = flatten(left);
left_height = height(left);
2011-11-04 12:38:42 -05:00
}
if right_len <= hint_max_leaf_char_len {
right = flatten(right);
right_height = height(right);
2011-11-04 12:38:42 -05:00
}
}
if left_height >= hint_max_node_height {
left = of_substr_unsafer(@serialize_node(left),
0u,byte_len(left),
left_len);
}
if right_height >= hint_max_node_height {
right = of_substr_unsafer(@serialize_node(right),
0u,byte_len(right),
right_len);
}
2011-11-04 12:38:42 -05:00
forest[i/2u] = concat2(left, right);
i += 2u;
}
if i == len - 1u {
//And don't forget the last node if it is in even position
forest[i/2u] = forest[i];
}
len = uint::div_ceil(len, 2u);
}
2012-08-01 19:30:05 -05:00
return forest[0];
2011-11-04 12:38:42 -05:00
}
fn serialize_node(node: @node) -> ~str unsafe {
let mut buf = vec::to_mut(vec::from_elem(byte_len(node), 0u8));
let mut offset = 0u;//Current position in the buffer
2011-11-04 12:38:42 -05:00
let it = leaf_iterator::start(node);
loop {
2011-11-04 12:38:42 -05:00
alt(leaf_iterator::next(it)) {
2012-08-03 21:59:04 -05:00
option::none => break,
option::some(x) => {
//FIXME (#2744): Replace with memcpy or something similar
let mut local_buf: ~[u8] =
unsafe::reinterpret_cast(*x.content);
let mut i = x.byte_offset;
2011-11-04 12:38:42 -05:00
while i < x.byte_len {
buf[offset] = local_buf[i];
offset += 1u;
i += 1u;
}
unsafe::forget(local_buf);
2011-11-04 12:38:42 -05:00
}
}
}
2012-08-01 19:30:05 -05:00
return unsafe::transmute(buf);
2011-11-04 12:38:42 -05:00
}
/**
* Replace a subtree by a single leaf with the same contents.
*
* * Performance note
*
* This function executes in linear time.
*/
2011-11-04 12:38:42 -05:00
fn flatten(node: @node) -> @node unsafe {
alt(*node) {
2012-08-03 21:59:04 -05:00
leaf(_) => return node,
concat(x) => {
2012-08-01 19:30:05 -05:00
return @leaf({
2011-11-04 12:38:42 -05:00
byte_offset: 0u,
byte_len: x.byte_len,
char_len: x.char_len,
content: @serialize_node(node)
})
}
}
}
/**
* Balance a node.
*
* # Algorithm
*
* * if the node height is smaller than `hint_max_node_height`, do nothing
* * otherwise, gather all leaves as a forest, rebuild a balanced node,
* concatenating small leaves along the way
*
* # Return value
*
* * `option::none` if no transformation happened
* * `option::some(x)` otherwise, in which case `x` has the same contents
* as `node` bot lower height and/or fragmentation.
*/
fn bal(node: @node) -> option<@node> {
2012-08-01 19:30:05 -05:00
if height(node) < hint_max_node_height { return option::none; }
//1. Gather all leaves as a forest
let mut forest = ~[mut];
let it = leaf_iterator::start(node);
loop {
alt (leaf_iterator::next(it)) {
2012-08-03 21:59:04 -05:00
option::none => break,
option::some(x) => vec::push(forest, @leaf(x))
2011-11-04 12:38:42 -05:00
}
}
//2. Rebuild tree from forest
let root = @*tree_from_forest_destructive(forest);
2012-08-01 19:30:05 -05:00
return option::some(root);
2011-11-04 12:38:42 -05:00
}
/**
* Compute the subnode of a node.
*
* # Arguments
*
* * node - A node
* * byte_offset - A byte offset in `node`
* * byte_len - The number of bytes to return
*
* # Performance notes
*
* * this function performs no copying;
* * this function executes in a time proportional to the height of `node`
*
* # Safety notes
*
* This function fails if `byte_offset` or `byte_len` do not represent
* valid positions in `node`.
*/
2011-11-04 12:38:42 -05:00
fn sub_bytes(node: @node, byte_offset: uint, byte_len: uint) -> @node {
let mut node = node;
let mut byte_offset = byte_offset;
loop {
2011-11-04 12:38:42 -05:00
if byte_offset == 0u && byte_len == node::byte_len(node) {
2012-08-01 19:30:05 -05:00
return node;
2011-11-04 12:38:42 -05:00
}
alt(*node) {
2012-08-03 21:59:04 -05:00
node::leaf(x) => {
2011-11-04 12:38:42 -05:00
let char_len =
str::count_chars(*x.content, byte_offset, byte_len);
2012-08-01 19:30:05 -05:00
return @leaf({byte_offset: byte_offset,
2011-11-04 12:38:42 -05:00
byte_len: byte_len,
char_len: char_len,
content: x.content});
}
2012-08-03 21:59:04 -05:00
node::concat(x) => {
2011-11-04 12:38:42 -05:00
let left_len: uint = node::byte_len(x.left);
if byte_offset <= left_len {
if byte_offset + byte_len <= left_len {
//Case 1: Everything fits in x.left, tail-call
2011-11-04 12:38:42 -05:00
node = x.left;
} else {
//Case 2: A (non-empty, possibly full) suffix
//of x.left and a (non-empty, possibly full) prefix
//of x.right
let left_result =
sub_bytes(x.left, byte_offset, left_len);
let right_result =
sub_bytes(x.right, 0u, left_len - byte_offset);
2012-08-01 19:30:05 -05:00
return concat2(left_result, right_result);
2011-11-04 12:38:42 -05:00
}
} else {
//Case 3: Everything fits in x.right
byte_offset -= left_len;
node = x.right;
}
}
}
};
2011-11-04 12:38:42 -05:00
}
/**
* Compute the subnode of a node.
*
* # Arguments
*
* * node - A node
* * char_offset - A char offset in `node`
* * char_len - The number of chars to return
*
* # Performance notes
*
* * this function performs no copying;
* * this function executes in a time proportional to the height of `node`
*
* # Safety notes
*
* This function fails if `char_offset` or `char_len` do not represent
* valid positions in `node`.
*/
2011-11-04 12:38:42 -05:00
fn sub_chars(node: @node, char_offset: uint, char_len: uint) -> @node {
let mut node = node;
let mut char_offset = char_offset;
loop {
alt(*node) {
2012-08-03 21:59:04 -05:00
node::leaf(x) => {
if char_offset == 0u && char_len == x.char_len {
2012-08-01 19:30:05 -05:00
return node;
}
let byte_offset =
str::count_bytes(*x.content, 0u, char_offset);
let byte_len =
str::count_bytes(*x.content, byte_offset, char_len);
2012-08-01 19:30:05 -05:00
return @leaf({byte_offset: byte_offset,
byte_len: byte_len,
char_len: char_len,
content: x.content});
}
2012-08-03 21:59:04 -05:00
node::concat(x) => {
2012-08-01 19:30:05 -05:00
if char_offset == 0u && char_len == x.char_len {return node;}
let left_len : uint = node::char_len(x.left);
if char_offset <= left_len {
if char_offset + char_len <= left_len {
//Case 1: Everything fits in x.left, tail call
node = x.left;
} else {
//Case 2: A (non-empty, possibly full) suffix
//of x.left and a (non-empty, possibly full) prefix
//of x.right
let left_result =
sub_chars(x.left, char_offset, left_len);
let right_result =
sub_chars(x.right, 0u, left_len - char_offset);
2012-08-01 19:30:05 -05:00
return concat2(left_result, right_result);
}
2011-11-04 12:38:42 -05:00
} else {
//Case 3: Everything fits in x.right, tail call
node = x.right;
char_offset -= left_len;
2011-11-04 12:38:42 -05:00
}
}
2011-11-04 12:38:42 -05:00
}
};
2011-11-04 12:38:42 -05:00
}
fn concat2(left: @node, right: @node) -> @node {
2012-08-01 19:30:05 -05:00
return @concat({left : left,
2011-11-04 12:38:42 -05:00
right : right,
char_len: char_len(left) + char_len(right),
byte_len: byte_len(left) + byte_len(right),
height: uint::max(height(left), height(right)) + 1u
2011-11-04 12:38:42 -05:00
})
}
fn height(node: @node) -> uint {
alt(*node) {
2012-08-03 21:59:04 -05:00
leaf(_) => return 0u,
concat(x) => return x.height
2011-11-04 12:38:42 -05:00
}
}
fn cmp(a: @node, b: @node) -> int {
let ita = char_iterator::start(a);
let itb = char_iterator::start(b);
let mut result = 0;
2011-11-04 12:38:42 -05:00
while result == 0 {
alt((char_iterator::next(ita), char_iterator::next(itb))) {
2012-08-03 21:59:04 -05:00
(option::none, option::none) => break,
(option::some(chara), option::some(charb)) => {
2011-11-04 12:38:42 -05:00
result = char::cmp(chara, charb);
}
2012-08-03 21:59:04 -05:00
(option::some(_), _) => {
2011-11-04 12:38:42 -05:00
result = 1;
}
2012-08-03 21:59:04 -05:00
(_, option::some(_)) => {
2011-11-04 12:38:42 -05:00
result = -1;
}
}
}
2012-08-01 19:30:05 -05:00
return result;
2011-11-04 12:38:42 -05:00
}
2012-01-23 16:59:00 -06:00
fn loop_chars(node: @node, it: fn(char) -> bool) -> bool {
2012-08-01 19:30:05 -05:00
return loop_leaves(node,|leaf| {
str::all_between(*leaf.content,
leaf.byte_offset,
leaf.byte_len, it)
2012-06-30 18:19:07 -05:00
});
2011-11-04 12:38:42 -05:00
}
/**
* Loop through a node, leaf by leaf
*
* # Arguments
*
* * rope - A node to traverse.
* * it - A block to execute with each consecutive leaf of the node.
* Return `true` to continue, `false` to stop
*
* # Arguments
*
* `true` If execution proceeded correctly, `false` if it was interrupted,
* that is if `it` returned `false` at any point.
*/
2012-01-23 16:59:00 -06:00
fn loop_leaves(node: @node, it: fn(leaf) -> bool) -> bool{
let mut current = node;
loop {
2011-11-04 12:38:42 -05:00
alt(*current) {
2012-08-03 21:59:04 -05:00
leaf(x) => return it(x),
concat(x) => if loop_leaves(x.left, it) { //non tail call
current = x.right; //tail call
} else {
return false;
2011-11-04 12:38:42 -05:00
}
}
};
2011-11-04 12:38:42 -05:00
}
/**
* # Arguments
*
* * pos - A position in the rope
*
* # Return value
*
* The character at position `pos`
*
* # Safety notes
*
* The function will fail if `pos` is not a valid position in the rope.
*
* Performance note: This function executes in a time
* proportional to the height of the rope + the (bounded)
* length of the largest leaf.
*/
fn char_at(node: @node, pos: uint) -> char {
let mut node = node;
let mut pos = pos;
loop {
alt *node {
2012-08-03 21:59:04 -05:00
leaf(x) => return str::char_at(*x.content, pos),
concat({left, right, _}) => {
let left_len = char_len(left);
node = if left_len > pos { left }
else { pos -= left_len; right };
}
}
};
}
2011-11-04 12:38:42 -05:00
mod leaf_iterator {
type t = {
stack: ~[mut @node],
2012-03-26 20:35:18 -05:00
mut stackpos: int
2011-11-04 12:38:42 -05:00
};
fn empty() -> t {
let stack : ~[mut @node] = ~[mut];
2012-08-01 19:30:05 -05:00
return {stack: stack, mut stackpos: -1}
2011-11-04 12:38:42 -05:00
}
fn start(node: @node) -> t {
2012-03-12 17:52:30 -05:00
let stack = vec::to_mut(vec::from_elem(height(node)+1u, node));
2012-08-01 19:30:05 -05:00
return {
2011-11-04 12:38:42 -05:00
stack: stack,
2012-03-26 20:35:18 -05:00
mut stackpos: 0
2011-11-04 12:38:42 -05:00
}
}
fn next(it: t) -> option<leaf> {
2012-08-01 19:30:05 -05:00
if it.stackpos < 0 { return option::none; }
loop {
2011-11-04 12:38:42 -05:00
let current = it.stack[it.stackpos];
it.stackpos -= 1;
alt(*current) {
2012-08-03 21:59:04 -05:00
concat(x) => {
2011-11-04 12:38:42 -05:00
it.stackpos += 1;
it.stack[it.stackpos] = x.right;
it.stackpos += 1;
it.stack[it.stackpos] = x.left;
}
2012-08-03 21:59:04 -05:00
leaf(x) => return option::some(x)
2011-11-04 12:38:42 -05:00
}
};
2011-11-04 12:38:42 -05:00
}
}
mod char_iterator {
type t = {
leaf_iterator: leaf_iterator::t,
2012-03-26 20:35:18 -05:00
mut leaf: option<leaf>,
mut leaf_byte_pos: uint
2011-11-04 12:38:42 -05:00
};
fn start(node: @node) -> t {
2012-08-01 19:30:05 -05:00
return {
2011-11-04 12:38:42 -05:00
leaf_iterator: leaf_iterator::start(node),
2012-03-26 20:35:18 -05:00
mut leaf: option::none,
mut leaf_byte_pos: 0u
2011-11-04 12:38:42 -05:00
}
}
fn empty() -> t {
2012-08-01 19:30:05 -05:00
return {
2011-11-04 12:38:42 -05:00
leaf_iterator: leaf_iterator::empty(),
2012-03-26 20:35:18 -05:00
mut leaf: option::none,
mut leaf_byte_pos: 0u
2011-11-04 12:38:42 -05:00
}
}
fn next(it: t) -> option<char> {
loop {
2011-11-04 12:38:42 -05:00
alt(get_current_or_next_leaf(it)) {
2012-08-03 21:59:04 -05:00
option::none => return option::none,
option::some(_) => {
2011-11-04 12:38:42 -05:00
let next_char = get_next_char_in_leaf(it);
alt(next_char) {
2012-08-03 21:59:04 -05:00
option::none => again,
option::some(_) => return next_char
2011-11-04 12:38:42 -05:00
}
}
}
};
2011-11-04 12:38:42 -05:00
}
fn get_current_or_next_leaf(it: t) -> option<leaf> {
2011-11-04 12:38:42 -05:00
alt(it.leaf) {
2012-08-03 21:59:04 -05:00
option::some(_) => return it.leaf,
option::none => {
2011-11-04 12:38:42 -05:00
let next = leaf_iterator::next(it.leaf_iterator);
alt(next) {
2012-08-03 21:59:04 -05:00
option::none => return option::none,
option::some(_) => {
2011-11-04 12:38:42 -05:00
it.leaf = next;
it.leaf_byte_pos = 0u;
2012-08-01 19:30:05 -05:00
return next;
2011-11-04 12:38:42 -05:00
}
}
}
}
}
fn get_next_char_in_leaf(it: t) -> option<char> {
2012-05-12 21:31:28 -05:00
alt copy it.leaf {
2012-08-03 21:59:04 -05:00
option::none => return option::none,
option::some(aleaf) => {
if it.leaf_byte_pos >= aleaf.byte_len {
2011-11-04 12:38:42 -05:00
//We are actually past the end of the leaf
it.leaf = option::none;
2012-08-01 19:30:05 -05:00
return option::none
2011-11-04 12:38:42 -05:00
} else {
let {ch, next} =
str::char_range_at(*aleaf.content,
it.leaf_byte_pos + aleaf.byte_offset);
it.leaf_byte_pos = next - aleaf.byte_offset;
2012-08-01 19:30:05 -05:00
return option::some(ch)
2011-11-04 12:38:42 -05:00
}
}
}
}
}
}
2012-01-17 21:05:07 -06:00
#[cfg(test)]
mod tests {
//Utility function, used for sanity check
fn rope_to_string(r: rope) -> ~str {
2012-01-17 21:05:07 -06:00
alt(r) {
2012-08-03 21:59:04 -05:00
node::empty => return ~"",
node::content(x) => {
let str = @mut ~"";
2012-07-14 14:19:36 -05:00
fn aux(str: @mut ~str, node: @node::node) unsafe {
2012-01-17 21:05:07 -06:00
alt(*node) {
2012-08-03 21:59:04 -05:00
node::leaf(x) => {
*str += str::slice(
*x.content, x.byte_offset,
x.byte_offset + x.byte_len);
2012-01-17 21:05:07 -06:00
}
2012-08-03 21:59:04 -05:00
node::concat(x) => {
2012-01-17 21:05:07 -06:00
aux(str, x.left);
aux(str, x.right);
}
}
}
aux(str, x);
2012-08-01 19:30:05 -05:00
return *str
2012-01-17 21:05:07 -06:00
}
}
}
#[test]
fn trivial() {
assert char_len(empty()) == 0u;
assert byte_len(empty()) == 0u;
}
#[test]
fn of_string1() {
let sample = @~"0123456789ABCDE";
2012-01-17 21:05:07 -06:00
let r = of_str(sample);
assert char_len(r) == str::char_len(*sample);
2012-01-17 21:05:07 -06:00
assert rope_to_string(r) == *sample;
}
#[test]
fn of_string2() {
let buf = @ mut ~"1234567890";
let mut i = 0;
2012-01-17 21:05:07 -06:00
while i < 10 { *buf = *buf + *buf; i+=1;}
let sample = @*buf;
let r = of_str(sample);
assert char_len(r) == str::char_len(*sample);
2012-01-17 21:05:07 -06:00
assert rope_to_string(r) == *sample;
let mut string_iter = 0u;
2012-02-23 03:44:04 -06:00
let string_len = str::len(*sample);
2012-01-17 21:05:07 -06:00
let rope_iter = iterator::char::start(r);
let mut equal = true;
2012-01-17 21:05:07 -06:00
while equal {
alt(node::char_iterator::next(rope_iter)) {
2012-08-03 21:59:04 -05:00
option::none => {
2012-01-17 21:05:07 -06:00
if string_iter < string_len {
equal = false;
} break; }
2012-08-03 21:59:04 -05:00
option::some(c) => {
2012-01-17 21:05:07 -06:00
let {ch, next} = str::char_range_at(*sample, string_iter);
string_iter = next;
if ch != c { equal = false; break; }
}
}
}
assert equal;
}
#[test]
fn iter1() {
let buf = @ mut ~"1234567890";
let mut i = 0;
2012-01-17 21:05:07 -06:00
while i < 10 { *buf = *buf + *buf; i+=1;}
let sample = @*buf;
let r = of_str(sample);
let mut len = 0u;
2012-01-17 21:05:07 -06:00
let it = iterator::char::start(r);
loop {
2012-01-17 21:05:07 -06:00
alt(node::char_iterator::next(it)) {
2012-08-03 21:59:04 -05:00
option::none => break,
option::some(_) => len += 1u
2012-01-17 21:05:07 -06:00
}
}
assert len == str::char_len(*sample);
2012-01-17 21:05:07 -06:00
}
#[test]
fn bal1() {
let init = @~"1234567890";
let buf = @mut * init;
let mut i = 0;
2012-01-17 21:05:07 -06:00
while i < 8 { *buf = *buf + *buf; i+=1;}
let sample = @*buf;
let r1 = of_str(sample);
let mut r2 = of_str(init);
2012-01-17 21:05:07 -06:00
i = 0;
while i < 8 { r2 = append_rope(r2, r2); i+= 1;}
assert eq(r1, r2);
let r3 = bal(r2);
assert char_len(r1) == char_len(r3);
assert eq(r1, r3);
}
#[test]
#[ignore]
2012-01-17 21:05:07 -06:00
fn char_at1() {
//Generate a large rope
let mut r = of_str(@~"123456789");
2012-06-30 18:19:07 -05:00
for uint::range(0u, 10u) |_i| {
2012-01-17 21:05:07 -06:00
r = append_rope(r, r);
}
//Copy it in the slowest possible way
let mut r2 = empty();
2012-06-30 18:19:07 -05:00
for uint::range(0u, char_len(r)) |i| {
2012-01-17 21:05:07 -06:00
r2 = append_char(r2, char_at(r, i));
}
assert eq(r, r2);
let mut r3 = empty();
2012-06-30 18:19:07 -05:00
for uint::range(0u, char_len(r)) |i| {
2012-01-17 21:05:07 -06:00
r3 = prepend_char(r3, char_at(r, char_len(r) - i - 1u));
}
assert eq(r, r3);
//Additional sanity checks
let balr = bal(r);
let bal2 = bal(r2);
let bal3 = bal(r3);
assert eq(r, balr);
assert eq(r, bal2);
assert eq(r, bal3);
assert eq(r2, r3);
assert eq(bal2, bal3);
}
#[test]
fn concat1() {
//Generate a reasonable rope
let chunk = of_str(@~"123456789");
let mut r = empty();
2012-06-30 18:19:07 -05:00
for uint::range(0u, 10u) |_i| {
2012-01-17 21:05:07 -06:00
r = append_rope(r, chunk);
}
//Same rope, obtained with rope::concat
2012-03-12 17:52:30 -05:00
let r2 = concat(vec::from_elem(10u, chunk));
2012-01-17 21:05:07 -06:00
assert eq(r, r2);
}
}