Working prototype

Implements a few formatting options using an approach where we modify the source using a rope. Uses very ad-hoc rules for formatting.
2015-03-08 11:46:35 +13:00 · 2015-03-08 11:46:35 +13:00 · 7417ab5aed
commit 7417ab5aed
parent f1e698c838
4 changed files with 751 additions and 126 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,17 @@
+[package]
+
+name = "rustfmt"
+version = "0.0.1"
+authors = ["Nicholas Cameron <nrc@ncameron.org>"]
+description = "tool to find and fix Rust formatting issues"
+repository = "https://github.com/nick29581/rustfmt"
+readme = "README.md"
+license = "Apache-2.0/MIT"
+
+#[dependencies.reprint]
+#reprint = "0.0.1"
+#path = "/home/ncameron/reprint"
+
+[[bin]]
+name = "rustfmt"
+path = "src/mod.rs"
--- a/src/changes.rs
+++ b/src/changes.rs
@ -8,33 +8,26 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

+
 // TODO
-// composable changes
 // print to files (maybe that shouldn't be here, but in mod)
 // tests
-// docs

 use rope::{Rope, RopeSlice};
 use std::collections::HashMap;
-use syntax::codemap::{CodeMap, Span, Pos, BytePos};
+use syntax::codemap::{CodeMap, Span, BytePos};
 use std::fmt;

+// This is basically a wrapper around a bunch of Ropes which makes it convenient
+// to work with libsyntax. It is badly named.
 pub struct ChangeSet<'a> {
    file_map: HashMap<String, Rope>,
-    // FIXME, we only keep a codemap around so we can have convenience methods
-    // taking Spans, it would be more resuable to factor this (and the methods)
-    // out into an adaptor.
    codemap: &'a CodeMap,
    pub count: u64,
 }

-pub struct FileIterator<'c, 'a: 'c> {
-    change_set: &'c ChangeSet<'a>,
-    keys: Vec<&'c String>,
-    cur_key: usize,
-}
-
 impl<'a> ChangeSet<'a> {
+    // Create a new ChangeSet for a given libsyntax CodeMap.
    pub fn from_codemap(codemap: &'a CodeMap) -> ChangeSet<'a> {
        let mut result = ChangeSet {
            file_map: HashMap::new(),
@ -43,14 +36,16 @@ impl<'a> ChangeSet<'a> {
        };

        for f in codemap.files.borrow().iter() {
-            let contents = Rope::from_string(f.src.clone());
+            let contents = Rope::from_string((&**f.src.as_ref().unwrap()).clone());
            result.file_map.insert(f.name.clone(), contents);
        }

        result
    }

-    // start and end are unadjusted.
+    // Change a span of text in our stored text into the new text (`text`).
+    // The span of text to change is given in the coordinates of the original
+    // source text, not the current text,
    pub fn change(&mut self, file_name: &str, start: usize, end: usize, text: String) {
        println!("change: {}:{}-{} \"{}\"", file_name, start, end, text);

@ -59,9 +54,10 @@ impl<'a> ChangeSet<'a> {
        let file = &mut self.file_map[*file_name];

        if end - start == text.len() {
-            // TODO
-            panic!();
-            file.replace_str(start, &text[]);
+            // TODO src_replace_str would be much more efficient
+            //file.src_replace_str(start, &text);
+            file.src_remove(start, end);
+            file.src_insert(start, text);
        } else {
            // TODO if we do this in one op, could we get better change info?
            file.src_remove(start, end);
@ -69,28 +65,34 @@ impl<'a> ChangeSet<'a> {
        }
    }

+    // As for `change()`, but use a Span to indicate the text to change.
    pub fn change_span(&mut self, span: Span, text: String) {
        let l_loc = self.codemap.lookup_char_pos(span.lo);
        let file_offset = l_loc.file.start_pos.0;
-        self.change(&l_loc.file.name[],
+        self.change(&l_loc.file.name,
                    (span.lo.0 - file_offset) as usize,
                    (span.hi.0 - file_offset) as usize,
                    text)
    }

+    // Get a slice of the current text. Coordinates are relative to the source
+    // text. I.e., this method returns the text which has been changed from the
+    // indicated span.
    pub fn slice(&self, file_name: &str, start: usize, end: usize) -> RopeSlice {
        let file = &self.file_map[*file_name];
        file.src_slice(start..end)
    }

+    // As for `slice()`, but use a Span to indicate the text to return.
    pub fn slice_span(&self, span:Span) -> RopeSlice {
        let l_loc = self.codemap.lookup_char_pos(span.lo);
        let file_offset = l_loc.file.start_pos.0;
-        self.slice(&l_loc.file.name[],
+        self.slice(&l_loc.file.name,
                   (span.lo.0 - file_offset) as usize,
                   (span.hi.0 - file_offset) as usize)
    }

+    // Return an iterator over the entire changed text.
    pub fn text<'c>(&'c self) -> FileIterator<'c, 'a> {
        FileIterator {
            change_set: self,
@ -99,16 +101,26 @@ impl<'a> ChangeSet<'a> {
        }
    }

+    // Get the current line-relative position of a position in the source text.
    pub fn col(&self, loc: BytePos) -> usize {
        let l_loc = self.codemap.lookup_char_pos(loc);
        let file_offset = l_loc.file.start_pos.0;
-        let file = &self.file_map[l_loc.file.name[]];
+        let file = &self.file_map[l_loc.file.name[..]];
        file.col_for_src_loc(loc.0 as usize - file_offset as usize)
    }
 }

+// Iterates over each file in the ChangSet. Yields the filename and the changed
+// text for that file.
+pub struct FileIterator<'c, 'a: 'c> {
+    change_set: &'c ChangeSet<'a>,
+    keys: Vec<&'c String>,
+    cur_key: usize,
+}
+
 impl<'c, 'a> Iterator for FileIterator<'c, 'a> {
    type Item = (&'c str, &'c Rope);
+
    fn next(&mut self) -> Option<(&'c str, &'c Rope)> {
        if self.cur_key >= self.keys.len() {
            return None;
@ -116,15 +128,16 @@ impl<'c, 'a> Iterator for FileIterator<'c, 'a> {

        let key = self.keys[self.cur_key];
        self.cur_key += 1;
-        return Some((&key[], &self.change_set.file_map[*key]))
+        return Some((&key, &self.change_set.file_map[*key]))
    }
 }

 impl<'a> fmt::Display for ChangeSet<'a> {
+    // Prints the entire changed text.
    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        for (f, r) in self.text() {
            try!(write!(fmt, "{}:\n", f));
-            try!(write!(fmt, "{}", r));
+            try!(write!(fmt, "{}\n\n", r));
        }
        Ok(())
    }    
--- a/src/mod.rs
+++ b/src/mod.rs
@ -0,0 +1,468 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(box_syntax)]
+#![feature(box_patterns)]
+#![feature(rustc_private)]
+#![feature(collections)]
+#![feature(os)]
+#![feature(core)]
+#![feature(unicode)]
+#![feature(old_path)]
+#![feature(exit_status)]
+
+#[macro_use]
+extern crate log;
+
+extern crate getopts;
+extern crate rustc;
+extern crate rustc_driver;
+extern crate syntax;
+
+use rustc::session::Session;
+use rustc::session::config::{self, Input};
+use rustc_driver::{driver, CompilerCalls, Compilation};
+
+use syntax::ast;
+use syntax::codemap::{self, CodeMap, Span, Pos};
+use syntax::diagnostics;
+use syntax::parse::token;
+use syntax::print::pprust;
+use syntax::visit;
+
+use std::mem;
+
+use changes::ChangeSet;
+
+pub mod rope;
+mod changes;
+
+const IDEAL_WIDTH: usize = 80;
+const MAX_WIDTH: usize = 100;
+const MIN_STRING: usize = 10;
+
+// Formatting which depends on the AST.
+fn fmt_ast<'a>(krate: &ast::Crate, codemap: &'a CodeMap) -> ChangeSet<'a> {
+    let mut visitor = FmtVisitor { codemap: codemap,
+                                   changes: ChangeSet::from_codemap(codemap) };
+    visit::walk_crate(&mut visitor, krate);
+
+    visitor.changes
+}
+
+// Formatting done on a char by char basis.
+fn fmt_lines(changes: &mut ChangeSet) {
+    // Iterate over the chars in the change set.
+    for (f, text) in changes.text() {
+        let mut trims = vec![];
+        let mut last_wspace = None;
+        let mut line_len = 0;
+        let mut cur_line = 1;
+        for (c, b) in text.chars() {
+            if c == '\n' { // TOOD test for \r too
+                // Check for (and record) trailing whitespace.
+                if let Some(lw) = last_wspace {
+                    trims.push((lw, b));
+                    line_len -= b - lw;
+                }
+                // Check for any line width errors we couldn't correct.
+                if line_len > MAX_WIDTH {
+                    // FIXME store the error rather than reporting immediately.
+                    println!("Rustfmt couldn't fix (sorry). {}:{}: line longer than {} characters",
+                             f, cur_line, MAX_WIDTH);
+                }
+                line_len = 0;
+                cur_line += 1;
+                last_wspace = None;
+            } else {
+                line_len += 1;
+                if c.is_whitespace() {
+                    if last_wspace.is_none() {
+                        last_wspace = Some(b);
+                    }
+                } else {
+                    last_wspace = None;
+                }
+            }
+        }
+
+        unsafe {
+            // Invariant: we only mutate a rope after we have searched it, then
+            // we will not search it again.
+            let mut_text: &mut rope::Rope = mem::transmute(text);
+            let mut_count: &mut u64 = mem::transmute(&changes.count);
+            let mut offset = 0;
+            // Get rid of any trailing whitespace we recorded earlier.
+            for &(s, e) in trims.iter() {
+                // Note that we change the underlying ropes directly, we don't
+                // go through the changeset because our change positions are
+                // relative to the newest text, not the original.
+                debug!("Stripping trailing whitespace {}:{}-{} \"{}\"",
+                       f, s, e, text.slice(s-offset..e-offset));
+                mut_text.remove(s-offset, e-offset);
+                *mut_count += 1;
+                offset += e - s;
+            }
+        }
+    }
+}
+
+struct FmtVisitor<'a> {
+    codemap: &'a CodeMap,
+    changes: ChangeSet<'a>,
+}
+
+impl<'a, 'v> visit::Visitor<'v> for FmtVisitor<'a> {
+    fn visit_expr(&mut self, ex: &'v ast::Expr) {
+        match ex.node {
+            ast::Expr_::ExprLit(ref l) => match l.node {
+                ast::Lit_::LitStr(ref is, _) => {
+                    self.rewrite_string(&is, l.span);
+                }
+                _ => {}
+            },
+            _ => {}
+        }
+
+        visit::walk_expr(self, ex)
+    }
+
+    fn visit_fn(&mut self,
+                fk: visit::FnKind<'v>,
+                fd: &'v ast::FnDecl,
+                b: &'v ast::Block,
+                s: Span,
+                _: ast::NodeId) {
+        self.fix_formal_args(fd);
+        visit::walk_fn(self, fk, fd, b, s);
+    }
+
+    fn visit_item(&mut self, item: &'v ast::Item) {
+        // TODO check each item is on a new line and is correctly indented.
+        match item.node {
+            ast::Item_::ItemUse(ref vp) => {
+                match vp.node {
+                    ast::ViewPath_::ViewPathList(ref path, ref path_list) => {
+                        let new_str = self.fix_use_list(path, path_list, vp.span);
+
+                        // TODO move these optimisations to ChangeSet
+                        if new_str != self.codemap.span_to_snippet(item.span).unwrap() {
+                            self.changes.change_span(item.span, new_str);
+                        }
+                    }
+                    ast::ViewPath_::ViewPathGlob(_) => {
+                        // FIXME convert to list?
+                    }
+                    _ => {}
+                }
+            }
+            _ => {}
+        }
+        visit::walk_item(self, item);
+    }
+}
+
+fn make_indent(width: usize) -> String {
+    let mut indent = String::with_capacity(width);
+    for _ in 0..width {
+        indent.push(' ')
+    }
+    indent
+}
+
+impl<'a> FmtVisitor<'a> {
+    // TODO NEEDS TESTS
+    fn rewrite_string(&mut self, s: &str, span: Span) {
+        // FIXME I bet this stomps unicode escapes in the source string
+
+        // Check if there is anything to fix: we always try to fixup multi-line
+        // strings, or if the string is too long for the line.
+        let l_loc = self.codemap.lookup_char_pos(span.lo);
+        let r_loc = self.codemap.lookup_char_pos(span.hi);
+        if l_loc.line == r_loc.line && r_loc.col.to_usize() <= MAX_WIDTH {
+            return;
+        }
+
+        // TODO if lo.col > IDEAL - 10, start a new line (need cur indent for that)
+
+        let s = s.escape_default();
+
+        // TODO use fixed value.
+        let l_loc = self.codemap.lookup_char_pos(span.lo);
+        let l_col = l_loc.col.to_usize();
+        
+        let indent = make_indent(l_col + 1);
+        let indent = &indent;
+
+        let max_chars = MAX_WIDTH - (l_col + 1);
+
+        let mut cur_start = 0;
+        let mut result = String::new();
+        result.push('"');
+        loop {
+            let mut cur_end = cur_start + max_chars;
+
+            if cur_end >= s.len() {
+                result.push_str(&s[cur_start..]);
+                break;
+            }
+
+            // Make sure we're on a char boundary.
+            cur_end = next_char(&s, cur_end);
+
+            // Push cur_end left until we reach whitespace
+            while !s.char_at(cur_end-1).is_whitespace() {
+                cur_end = prev_char(&s, cur_end);
+
+                if cur_end - cur_start < MIN_STRING {
+                    // We can't break at whitespace, fall back to splitting
+                    // anywhere that doesn't break an escape sequence
+                    cur_end = next_char(&s, cur_start + max_chars);
+                    while s.char_at(cur_end) == '\\' {
+                        cur_end = prev_char(&s, cur_end);
+                    }
+                }
+            }
+            // Make sure there is no whitespace to the right of the break.
+            while cur_end < s.len() && s.char_at(cur_end).is_whitespace() {
+                cur_end = next_char(&s, cur_end+1);
+            }
+            result.push_str(&s[cur_start..cur_end]);
+            result.push_str("\\\n");
+            result.push_str(indent);
+
+            cur_start = cur_end;
+        }
+        result.push('"');
+
+        // Check that we actually changed something.
+        if result == self.codemap.span_to_snippet(span).unwrap() {
+            return;
+        }
+
+        self.changes.change_span(span, result);
+    }
+
+    // Basically just pretty prints a multi-item import.
+    fn fix_use_list(&mut self,
+                    path: &ast::Path,
+                    path_list: &[ast::PathListItem],
+                    vp_span: Span) -> String {
+        // FIXME remove unused imports
+
+        // FIXME check indentation
+        let l_loc = self.codemap.lookup_char_pos(vp_span.lo);
+        let path_str = pprust::path_to_string(&path);
+        let indent = l_loc.col.0;
+        // After accounting for the overhead, how much space left for
+        // the item list? ( 5 = :: + { + } + ; )
+        let space = IDEAL_WIDTH - (indent + path_str.len() + 5);
+        // 4 = `use` + one space
+        // TODO might be pub use
+        let indent = make_indent(indent-4);
+
+        let mut cur_str = String::new();
+        let mut first = true;
+        // If `self` is in the list, put it first.
+        if path_list.iter().any(|vpi|
+            if let ast::PathListItem_::PathListMod{ .. } = vpi.node {
+                true
+            } else {
+                false
+            }
+        ) {
+            cur_str = "self".to_string();
+            first = false;
+        }
+
+        let mut new_str = String::new();
+        for vpi in path_list.iter() {
+            match vpi.node {
+                ast::PathListItem_::PathListIdent{ name, .. } => {
+                    let next_item = &token::get_ident(name);
+                    if cur_str.len() + next_item.len() > space {
+                        let cur_line = format!("{}use {}::{{{}}};\n", indent, path_str, cur_str);
+                        new_str.push_str(&cur_line);
+
+                        cur_str = String::new();
+                        first = true;
+                    }
+
+                    if first {
+                        first = false;
+                    } else {
+                        cur_str.push_str(", ");
+                    }
+
+                    cur_str.push_str(next_item);
+                }
+                ast::PathListItem_::PathListMod{ .. } => {}
+            }
+        }
+
+        assert!(!first);
+        let cur_line = format!("{}use {}::{{{}}};", indent, path_str, cur_str);
+        new_str.push_str(&cur_line);
+
+        new_str
+    }
+
+    fn fix_formal_args<'v>(&mut self, fd: &'v ast::FnDecl) {
+        // For now, just check the arguments line up and make them per-row if the line is too long.
+        let args = &fd.inputs;
+        if args.len() <= 1 {
+            return;
+        }
+        // TODO not really using the hi positions
+        let spans: Vec<_> = args.iter().map(|a| (a.pat.span.lo, a.ty.span.hi)).collect();
+        let locs: Vec<_> = spans.iter().map(|&(a, b)| (self.codemap.lookup_char_pos(a), self.codemap.lookup_char_pos(b))).collect();
+        let first_loc = &locs[0].0;
+        // TODO need to adjust for previous changes here.
+        let same_row = locs.iter().all(|&(ref l, _)| l.line == first_loc.line);
+        let same_col = locs.iter().all(|&(ref l, _)| l.col == first_loc.col);
+
+        if same_col {
+            // TODO Check one arg per line and no lines in between (except comments)
+            return;
+        }        
+
+        if same_row { // TODO check line is < 100 && first_loc.line {
+            // TODO could also fix whitespace around punctuaton here
+            // TODO and could check that we're on the same line as the function call, if possible
+            return;
+        }
+
+        let col = self.changes.col(spans[0].0);
+        let mut indent = String::with_capacity(col);
+        indent.push('\n');
+        for _ in 0..col { indent.push(' '); }
+        let last_idx = spans.len() - 1;
+        for (i, s) in spans.iter().enumerate() {
+            // Take the span from lo to lo (or the last hi for the last arg), 
+            // trim, push on top of indent, then replace the old lo-lo span with it.
+            let mut new_text = if i == 0 {
+                "".to_string()
+            } else {
+                indent.clone()
+            };
+            let hi = if i == last_idx {
+                s.1
+            } else {
+                spans[i+1].0
+            };
+            // TODO need a version of slice taking locs, not a span
+            let snippet = self.changes.slice_span(Span{ lo: s.0, hi: hi, expn_id: codemap::NO_EXPANSION }).to_string();
+            let snippet = snippet.trim();
+            new_text.push_str(snippet);
+            self.changes.change(&first_loc.file.name, (s.0).0 as usize, hi.0 as usize, new_text);
+        }
+    }
+}
+
+#[inline]
+fn prev_char(s: &str, mut i: usize) -> usize {
+    if i == 0 { return 0; }
+
+    i -= 1;
+    while !s.is_char_boundary(i) {
+        i -= 1;
+    }
+    i
+}
+
+#[inline]
+fn next_char(s: &str, mut i: usize) -> usize {
+    if i >= s.len() { return s.len(); }
+
+    while !s.is_char_boundary(i) {
+        i += 1;
+    }
+    i
+}
+
+struct RustFmtCalls {
+    input_path: Option<Path>,
+}
+
+impl<'a> CompilerCalls<'a> for RustFmtCalls {
+    fn early_callback(&mut self,
+                      _: &getopts::Matches,
+                      _: &diagnostics::registry::Registry)
+                      -> Compilation {
+        Compilation::Continue
+    }
+
+    fn some_input(&mut self, input: Input, input_path: Option<Path>) -> (Input, Option<Path>) {
+        match input_path {
+            Some(ref ip) => self.input_path = Some(ip.clone()),
+            _ => {
+                // FIXME should handle string input and write to stdout or something
+                panic!("No input path");
+            }
+        }
+        (input, input_path)
+    }
+
+    fn no_input(&mut self,
+                _: &getopts::Matches,
+                _: &config::Options,
+                _: &Option<Path>,
+                _: &Option<Path>,
+                _: &diagnostics::registry::Registry)
+                -> Option<(Input, Option<Path>)> {
+        panic!("No input supplied to RustFmt");
+    }
+
+    fn late_callback(&mut self,
+                     _: &getopts::Matches,
+                     _: &Session,
+                     _: &Input,
+                     _: &Option<Path>,
+                     _: &Option<Path>)
+                     -> Compilation {
+        Compilation::Continue
+    }
+
+    fn build_controller(&mut self, _: &Session) -> driver::CompileController<'a> {
+        let mut control = driver::CompileController::basic();
+        control.after_parse.stop = Compilation::Stop;
+        control.after_parse.callback = box |state| {
+            let krate = state.krate.unwrap();
+            let codemap = state.session.codemap();
+            let mut changes = fmt_ast(krate, codemap);
+            fmt_lines(&mut changes);
+
+            println!("Making {} changes", changes.count);
+            println!("{}", changes);
+            // FIXME(#5) Should be user specified whether to show or replace.
+        };
+
+        control
+    }
+}
+
+fn main() {
+    let args = std::os::args();
+    let mut call_ctxt = RustFmtCalls { input_path: None };
+    rustc_driver::run_compiler(&args, &mut call_ctxt);
+    std::env::set_exit_status(0);
+}
+
+// FIXME comments
+// comments aren't in the AST, which makes processing them difficult, but then
+// comments are complicated anyway. I think I am happy putting off tackling them
+// for now. Long term the soluton is for comments to be in the AST, but that means
+// only the libsyntax AST, not the rustc one, which means waiting for the ASTs
+// to diverge one day....
+
+// Once we do have comments, we just have to implement a simple word wrapping
+// algorithm to keep the width under IDEAL_WIDTH. We should also convert multiline
+// /* ... */ comments to // and check doc comments are in the right place and of
+// the right kind.
--- a/src/rope.rs
+++ b/src/rope.rs
@ -14,25 +14,31 @@
 // tests
 // pull out into its own crate
 // impl Default, Extend
-// impl DOubleEndedIter and ExactSizeIter for RopeChars
+// impl DoubleEndedIter and ExactSizeIter for RopeChars
 // better allocation
-// balancing
-// thread safety/parallisation
+// balancing?

 extern crate unicode;
 use std::fmt;
 use std::ops::Range;
+use std::num::{SignedInt, Int};

-// A Rope, based on an unbalanced binary tree.
-
+// A Rope, based on an unbalanced binary tree. The rope is somewhat special in
+// that it tracks positions in the source text. So when locating a position in
+// the rope, the user can use either a current position in the text or a
+// position in the source text, which the Rope will adjust to a current position
+// whilst searching.
 pub struct Rope {
    root: Node,
    len: usize,
    src_len: usize,
-    // FIXME: Allocation is very dumb at the moment, we always add another buffer for every inserted string and we never resuse or collect old memory
+    // FIXME: Allocation is very dumb at the moment, we always add another
+    // buffer for every inserted string and we never resuse or collect old
+    // memory
    storage: Vec<Vec<u8>>
 }

+// A view over a portion of a Rope. Analagous to string slices (`str`);
 pub struct RopeSlice<'rope> {
    // All nodes which make up the slice, in order.
    nodes: Vec<&'rope Lnode>,
@ -42,6 +48,7 @@ pub struct RopeSlice<'rope> {
    len: usize,
 }

+// An iterator over the chars in a rope.
 pub struct RopeChars<'rope> {
    data: RopeSlice<'rope>,
    cur_node: usize,
@ -49,8 +56,8 @@ pub struct RopeChars<'rope> {
    abs_byte: usize,
 }

-
 impl Rope {
+    // Create an empty rope.
    pub fn new() -> Rope {
        Rope {
            root: Node::empty_inner(),
@ -62,7 +69,7 @@ impl Rope {

    // Uses text as initial storage.
    pub fn from_string(text: String) -> Rope {
-        // TODO should split large texts into segments as we insert
+        // TODO should split very large texts into segments as we insert

        let mut result = Rope::new();
        result.insert(0, text);
@ -70,47 +77,41 @@ impl Rope {
        result
    }

+    // When initialising a rope, indicates that the rope is complete wrt the
+    // source text.
    fn fix_src(&mut self) {
        self.root.fix_src();
        self.src_len = self.len;
    }

+    // Length of the rope.
    pub fn len(&self) -> usize {
        self.len
    }

-    pub fn insert(&mut self, start: usize, text: String) {
-        if text.len() == 0 {
-            return;
-        }
-
-        debug_assert!(start <= self.len(), "insertion out of bounds of rope");
-
-        let len = text.len();
-        let storage = text.into_bytes();
-        let new_node = box Node::new_leaf(&storage[][0] as *const u8, len, 0);
-        self.storage.push(storage);
-
-        match self.root.insert(new_node, start, start) {
-            NodeAction::Change(n, adj) => {
-                assert!(adj as usize == len);
-                self.root = *n;
-            }
-            NodeAction::Adjust(adj) => {
-                assert!(adj as usize == len);
-            }
-            _ => panic!("Unexpected action")
-        }
-        self.len += len;
-    }
-
    pub fn insert_copy(&mut self, start: usize, text: &str) {
-        // If we did clever things with allocation, we could do better here
+        // FIXME If we did clever things with allocation, we could do better here.
        self.insert(start, text.to_string());
    }

+    pub fn insert(&mut self, start: usize, text: String) {
+        self.insert_inner(start,
+                          text,
+                          |this, node| this.root.insert(node, start, start))
+    }
+
    pub fn src_insert(&mut self, start: usize, text: String) {
-        // TODO refactor with insert
+        self.insert_inner(start,
+                          text,
+                          |this, node| this.root.src_insert(node, start, start))
+    }
+
+    fn insert_inner<F>(&mut self,
+                       start: usize,
+                       text: String,
+                       do_insert: F)
+        where F: Fn(&mut Rope, Box<Node>) -> NodeAction
+    {
        if text.len() == 0 {
            return;
        }
@ -119,10 +120,10 @@ impl Rope {

        let len = text.len();
        let storage = text.into_bytes();
-        let new_node = box Node::new_leaf(&storage[][0] as *const u8, len, 0);
+        let new_node = box Node::new_leaf(&storage[..][0] as *const u8, len, 0);
        self.storage.push(storage);

-        match self.root.src_insert(new_node, start, start) {
+        match do_insert(self, new_node) {
            NodeAction::Change(n, adj) => {
                assert!(adj as usize == len);
                self.root = *n;
@ -147,35 +148,25 @@ impl Rope {
    }

    pub fn remove(&mut self, start: usize, end: usize) {
-        assert!(end >= start);
-        if start == end {
-            return;
-        }
-
-        let action = self.root.remove(start, end, start);
-        match action {
-            NodeAction::None => {}
-            NodeAction::Remove => {
-                self.root = Node::empty_inner();
-                self.len = 0;
-            }
-            NodeAction::Adjust(adj) => self.len = (self.len as isize + adj) as usize,
-            NodeAction::Change(node, adj) => {
-                self.root = *node;
-                self.len = (self.len as isize + adj) as usize;
-            }
-        }
+        self.remove_inner(start, end, |this| this.root.remove(start, end, start))
    }

    pub fn src_remove(&mut self, start: usize, end: usize) {
-        // TODO refactor with remove
+        self.remove_inner(start, end, |this| this.root.src_remove(start, end, start))
+    }
+
+    fn remove_inner<F>(&mut self,
+                       start: usize,
+                       end: usize,
+                       do_remove: F)
+        where F: Fn(&mut Rope) -> NodeAction
+    {
        assert!(end >= start);
        if start == end {
            return;
        }

-        let action = self.root.src_remove(start, end, start);
-        match action {
+        match do_remove(self) {
            NodeAction::None => {}
            NodeAction::Remove => {
                self.root = Node::empty_inner();
@ -190,6 +181,7 @@ impl Rope {
    }

    // TODO src_replace
+    // TODO src_replace_str

    // This can go horribly wrong if you overwrite a grapheme of different size.
    // It is the callers responsibility to ensure that the grapheme at point start
@ -200,7 +192,7 @@ impl Rope {
        // I think that is better than duplicating a bunch of code.
        // It should be possible to view a &char as a &[u8] somehow, and then
        // we can optimise this (FIXME).
-        self.replace_str(start, &new_char.to_string()[]);
+        self.replace_str(start, &new_char.to_string()[..]);
    }

    pub fn replace_str(&mut self, start: usize, new_str: &str) {
@ -332,6 +324,10 @@ impl ::std::str::FromStr for Rope {

 impl<'a> fmt::Display for RopeSlice<'a> {
    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        if self.nodes.len() == 0 {
+            return Ok(());
+        }
+
        let last_idx = self.nodes.len() - 1;
        for (i, n) in self.nodes.iter().enumerate() {
            let mut ptr = n.text;
@ -346,7 +342,7 @@ impl<'a> fmt::Display for RopeSlice<'a> {
            unsafe {
                try!(write!(fmt,
                            "{}",
-                            ::std::str::from_utf8(::std::slice::from_raw_buf(&ptr, len)).unwrap()));
+                            ::std::str::from_utf8(::std::slice::from_raw_parts(ptr, len)).unwrap()));
            }
        }
        Ok(())
@ -371,7 +367,7 @@ impl<'a> fmt::Debug for RopeSlice<'a> {
            unsafe {
                try!(write!(fmt,
                            "\"{}\"",
-                            ::std::str::from_utf8(::std::slice::from_raw_buf(&ptr, len)).unwrap()));
+                            ::std::str::from_utf8(::std::slice::from_raw_parts(ptr, len)).unwrap()));
            }
        }
        Ok(())
@ -408,7 +404,7 @@ impl fmt::Display for Node {
                unsafe {
                    write!(fmt,
                           "{}",
-                           ::std::str::from_utf8(::std::slice::from_raw_buf(text, len)).unwrap())
+                           ::std::str::from_utf8(::std::slice::from_raw_parts(*text, len)).unwrap())
                }
            }
        }
@ -437,7 +433,7 @@ impl fmt::Debug for Node {
                unsafe {
                    write!(fmt,
                           "(\"{}\"; {})",
-                           ::std::str::from_utf8(::std::slice::from_raw_buf(text, len)).unwrap(),
+                           ::std::str::from_utf8(::std::slice::from_raw_parts(*text, len)).unwrap(),
                           len)
                }
            }
@ -519,7 +515,7 @@ impl Node {
        }
    }

-    // All these methods are just doing dynamic dispatch, TODO use a macro
+    // Most of these methods are just doing dynamic dispatch, TODO use a macro

    // precond: start < end
    fn remove(&mut self, start: usize, end: usize, src_start: usize) -> NodeAction {
@ -534,19 +530,9 @@ impl Node {
            Node::InnerNode(ref mut i) => i.src_remove(start, end, src_start),
            Node::LeafNode(ref mut l) => {
                debug!("src_remove: pre-adjust {}-{}; {}", start, end, l.src_offset);
-                let mut start = start as isize + l.src_offset;
-                if start < 0 {
-                    start = 0;
-                }
-                let mut end = end as isize + l.src_offset;
-                if end < 0 {
-                    end = 0;
-                }
-                // TODO src_start?
-                let mut src_start = src_start as isize + l.src_offset;
-                if src_start < 0 {
-                    src_start = 0;
-                }
+                let start = minz(start as isize + l.src_offset);
+                let end = minz(end as isize + l.src_offset);
+                let src_start = minz(src_start as isize + l.src_offset);
                debug!("src_remove: post-adjust {}-{}, {}", start, end, src_start);
                if end > start {
                    l.remove(start as usize, end as usize, src_start as usize)
@ -569,15 +555,8 @@ impl Node {
            Node::InnerNode(ref mut i) => i.src_insert(node, start, src_start),
            Node::LeafNode(ref mut l) => {
                debug!("src_insert: pre-adjust {}, {}; {}", start, src_start, l.src_offset);
-                let mut start = start as isize + l.src_offset;
-                if start < 0 {
-                    start = 0;
-                }
-                // TODO src_start?
-                let mut src_start = src_start as isize + l.src_offset;
-                if src_start < 0 {
-                    src_start = 0;
-                }
+                let start = minz(start as isize + l.src_offset);
+                let src_start = minz(src_start as isize + l.src_offset);
                debug!("src_insert: post-adjust {}, {}", start, src_start);
                l.insert(node, start as usize, src_start as usize)
            }
@ -596,14 +575,8 @@ impl Node {
            Node::InnerNode(ref i) => i.find_src_slice(start, end, slice),
            Node::LeafNode(ref l) => {
                debug!("find_src_slice: pre-adjust {}-{}; {}", start, end, l.src_offset);
-                let mut start = start as isize + l.src_offset;
-                if start < 0 {
-                    start = 0;
-                }
-                let mut end = end as isize + l.src_offset;
-                if end < 0 {
-                    end = 0;
-                }
+                let start = minz(start as isize + l.src_offset);
+                let end = minz(end as isize + l.src_offset);
                debug!("find_src_slice: post-adjust {}-{}", start, end);
                if end > start {
                    l.find_slice(start as usize, end as usize, slice);
@ -1117,21 +1090,14 @@ impl Lnode {
            i -= 1;
        }

-        let loc = if loc < 0 {
-            0
-        } else {
-            loc as usize
-        };
+        let loc = minz(loc) as usize;
        debug!("Lnode::col_for_src_loc, return Continue({})", loc);
        Search::Continue(loc)
    }

    fn find_last_char(&self, needle: char) -> Option<usize> {
        // FIXME due to multi-byte chars, this will give false positives
-        // I think we must search forwards from the start :-( Perhaps we could
-        // track unicode vs ascii or something (I don't think there is an efficient
-        // way to read unicode backwards, I might be wrong).
-        // std::str::GraphemeIndices can do this!
+        // FIXME use std::str::GraphemeIndices to do this!
        let mut loc = self.len as isize - 1;
        while loc >= 0 {
            unsafe {
@ -1147,8 +1113,169 @@ impl Lnode {
    }
 }

-//TODO comment etc.
+// The state of searching through a rope.
 enum Search {
+    // TODO comment
    Continue(usize),
+    // TODO comment
    Done(usize)
 }
+
+fn minz<I: SignedInt>(x: I) -> I {
+    if x.is_negative() {
+        return I::zero();
+    }
+
+    x
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    // FIXME is this a Rust bug? Why is minz not imported by the glob import?
+    use super::minz;
+
+    #[test]
+    fn test_new() {
+        let r = Rope::new();
+        assert!(r.len() == 0);
+        assert!(r.to_string() == "");
+
+        let r = Rope::from_string("Hello world!".to_string());
+        assert!(r.len() == 12);
+        assert!(r.to_string() == "Hello world!");
+    }
+
+    #[test]
+    fn test_minz() {
+        let x: i32 = 0;
+        assert!(super::minz(x) == 0);
+        let x: i32 = 42;
+        assert!(minz(x) == 42);
+        let x: i32 = -42;
+        assert!(minz(x) == 0);
+        let x: isize = 0;
+        assert!(minz(x) == 0);
+        let x: isize = 42;
+        assert!(minz(x) == 42);
+        let x: isize = -42;
+        assert!(minz(x) == 0);
+    }
+
+    #[test]
+    fn test_from_string() {
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        assert!(r.to_string() == "Hello world!");
+    }
+
+    #[test]
+    fn test_remove() {
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.remove(0, 10);
+        assert!(r.to_string() == "d!");
+        assert!(r.src_slice(0..5).to_string() == "");
+        assert!(r.src_slice(10..12).to_string() == "d!");       
+
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.remove(4, 12);
+        assert!(r.to_string() == "Hell");
+        // TODO
+        //assert!(r.src_slice(0..4).to_string() == "Hell");
+        //assert!(r.src_slice(10..12).to_string() == "");       
+
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.remove(4, 10);
+        assert!(r.to_string() == "Helld!");
+        // TODO
+        //assert!(r.src_slice(1..5).to_string() == "ell");
+        assert!(r.src_slice(9..12).to_string() == "d!");
+    }
+
+    #[test]
+    fn test_insert_copy() {
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.insert_copy(0, "foo");
+        assert!(r.to_string() == "fooHello world!");
+        assert!(r.slice(2..8).to_string() == "oHello");
+
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.insert_copy(12, "foo");
+        assert!(r.to_string() == "Hello world!foo");
+        assert!(r.slice(2..8).to_string() == "llo wo");
+
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.insert_copy(5, "foo");
+        assert!(r.to_string() == "Hellofoo world!");
+        assert!(r.slice(2..8).to_string() == "llofoo");
+    }
+
+    #[test]
+    fn test_push_copy() {
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.push_copy("foo");
+        assert!(r.to_string() == "Hello world!foo");
+        assert!(r.slice(2..8).to_string() == "llo wo");
+    }
+
+    #[test]
+    fn test_insert_replace() {
+        let mut r: Rope = "hello worl\u{00bb0}!".parse().unwrap();
+        r.insert_copy(5, "bb");
+        assert!(r.to_string() == "hellobb worlர!");
+        r.replace(0, 'H');
+        r.replace(15, '~');
+        r.replace_str(5, "fo\u{00cb0}");
+        assert!(r.to_string() == "Hellofoರrlர~");
+        assert!(r.slice(0..10).to_string() == "Hellofoರ");
+        assert!(r.slice(5..10).to_string() == "foರ");
+        assert!(r.slice(10..15).to_string() == "rlர");
+
+        let expected = "Hellofoರrlர~";
+        let mut byte_pos = 0;
+        for ((c, b), e) in r.chars().zip(expected.chars()) {
+            assert!(c == e);
+            assert!(b == byte_pos);
+            byte_pos += e.len_utf8();
+        }
+    }
+
+    #[test]
+    fn test_src_insert_remove_col_for_src_loc() {
+        let mut r: Rope = "hello\n world!".parse().unwrap();
+        r.src_insert(4, "foo".to_string());
+        r.src_insert(5, "bar".to_string());
+        assert!(r.to_string() == "hellfooobar\n world!");
+
+        r.src_remove(2, 4);
+        r.src_remove(10, 12);
+        assert!(r.to_string() == "hefooobar\n wor!");
+
+        let expected = "hefooobar\n wor!";
+        let mut byte_pos = 0;
+        for ((c, b), e) in r.chars().zip(expected.chars()) {
+            assert!(c == e);
+            assert!(b == byte_pos);
+            byte_pos += e.len_utf8();
+        }
+
+        let expected = [0, 1, 2, 2, 5, 9, 0, 1, 2, 3, 4, 4, 4];
+        for i in 0..13 {
+            assert!(r.col_for_src_loc(i) == expected[i]);
+        }
+    }
+
+    #[test]
+    fn test_src_insert() {
+        let mut r: Rope = "Hello world!".parse().unwrap();
+        r.src_insert(4, "foo".to_string());
+        r.src_insert(0, "foo".to_string());
+        r.src_insert(12, "foo".to_string());
+        assert!(r.to_string() == "fooHellfooo world!foo");
+        r.src_insert(4, "bar".to_string());
+        r.src_insert(5, "bar".to_string());
+        r.src_insert(3, "bar".to_string());
+        r.src_insert(0, "bar".to_string());
+        r.src_insert(12, "bar".to_string());
+        assert!(r.to_string() == "barfooHelbarlbarfooobar world!barfoo");
+    }
+}