diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000000..31edfbbdc06 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,17 @@ +[package] + +name = "rustfmt" +version = "0.0.1" +authors = ["Nicholas Cameron "] +description = "tool to find and fix Rust formatting issues" +repository = "https://github.com/nick29581/rustfmt" +readme = "README.md" +license = "Apache-2.0/MIT" + +#[dependencies.reprint] +#reprint = "0.0.1" +#path = "/home/ncameron/reprint" + +[[bin]] +name = "rustfmt" +path = "src/mod.rs" diff --git a/src/changes.rs b/src/changes.rs index 22fdc0ff51d..bcfbed17f5b 100644 --- a/src/changes.rs +++ b/src/changes.rs @@ -8,33 +8,26 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. + // TODO -// composable changes // print to files (maybe that shouldn't be here, but in mod) // tests -// docs use rope::{Rope, RopeSlice}; use std::collections::HashMap; -use syntax::codemap::{CodeMap, Span, Pos, BytePos}; +use syntax::codemap::{CodeMap, Span, BytePos}; use std::fmt; +// This is basically a wrapper around a bunch of Ropes which makes it convenient +// to work with libsyntax. It is badly named. pub struct ChangeSet<'a> { file_map: HashMap, - // FIXME, we only keep a codemap around so we can have convenience methods - // taking Spans, it would be more resuable to factor this (and the methods) - // out into an adaptor. codemap: &'a CodeMap, pub count: u64, } -pub struct FileIterator<'c, 'a: 'c> { - change_set: &'c ChangeSet<'a>, - keys: Vec<&'c String>, - cur_key: usize, -} - impl<'a> ChangeSet<'a> { + // Create a new ChangeSet for a given libsyntax CodeMap. pub fn from_codemap(codemap: &'a CodeMap) -> ChangeSet<'a> { let mut result = ChangeSet { file_map: HashMap::new(), @@ -43,14 +36,16 @@ impl<'a> ChangeSet<'a> { }; for f in codemap.files.borrow().iter() { - let contents = Rope::from_string(f.src.clone()); + let contents = Rope::from_string((&**f.src.as_ref().unwrap()).clone()); result.file_map.insert(f.name.clone(), contents); } result } - // start and end are unadjusted. + // Change a span of text in our stored text into the new text (`text`). + // The span of text to change is given in the coordinates of the original + // source text, not the current text, pub fn change(&mut self, file_name: &str, start: usize, end: usize, text: String) { println!("change: {}:{}-{} \"{}\"", file_name, start, end, text); @@ -59,9 +54,10 @@ impl<'a> ChangeSet<'a> { let file = &mut self.file_map[*file_name]; if end - start == text.len() { - // TODO - panic!(); - file.replace_str(start, &text[]); + // TODO src_replace_str would be much more efficient + //file.src_replace_str(start, &text); + file.src_remove(start, end); + file.src_insert(start, text); } else { // TODO if we do this in one op, could we get better change info? file.src_remove(start, end); @@ -69,28 +65,34 @@ impl<'a> ChangeSet<'a> { } } + // As for `change()`, but use a Span to indicate the text to change. pub fn change_span(&mut self, span: Span, text: String) { let l_loc = self.codemap.lookup_char_pos(span.lo); let file_offset = l_loc.file.start_pos.0; - self.change(&l_loc.file.name[], + self.change(&l_loc.file.name, (span.lo.0 - file_offset) as usize, (span.hi.0 - file_offset) as usize, text) } + // Get a slice of the current text. Coordinates are relative to the source + // text. I.e., this method returns the text which has been changed from the + // indicated span. pub fn slice(&self, file_name: &str, start: usize, end: usize) -> RopeSlice { let file = &self.file_map[*file_name]; file.src_slice(start..end) } + // As for `slice()`, but use a Span to indicate the text to return. pub fn slice_span(&self, span:Span) -> RopeSlice { let l_loc = self.codemap.lookup_char_pos(span.lo); let file_offset = l_loc.file.start_pos.0; - self.slice(&l_loc.file.name[], + self.slice(&l_loc.file.name, (span.lo.0 - file_offset) as usize, (span.hi.0 - file_offset) as usize) } + // Return an iterator over the entire changed text. pub fn text<'c>(&'c self) -> FileIterator<'c, 'a> { FileIterator { change_set: self, @@ -99,16 +101,26 @@ impl<'a> ChangeSet<'a> { } } + // Get the current line-relative position of a position in the source text. pub fn col(&self, loc: BytePos) -> usize { let l_loc = self.codemap.lookup_char_pos(loc); let file_offset = l_loc.file.start_pos.0; - let file = &self.file_map[l_loc.file.name[]]; + let file = &self.file_map[l_loc.file.name[..]]; file.col_for_src_loc(loc.0 as usize - file_offset as usize) } } +// Iterates over each file in the ChangSet. Yields the filename and the changed +// text for that file. +pub struct FileIterator<'c, 'a: 'c> { + change_set: &'c ChangeSet<'a>, + keys: Vec<&'c String>, + cur_key: usize, +} + impl<'c, 'a> Iterator for FileIterator<'c, 'a> { type Item = (&'c str, &'c Rope); + fn next(&mut self) -> Option<(&'c str, &'c Rope)> { if self.cur_key >= self.keys.len() { return None; @@ -116,15 +128,16 @@ impl<'c, 'a> Iterator for FileIterator<'c, 'a> { let key = self.keys[self.cur_key]; self.cur_key += 1; - return Some((&key[], &self.change_set.file_map[*key])) + return Some((&key, &self.change_set.file_map[*key])) } } impl<'a> fmt::Display for ChangeSet<'a> { + // Prints the entire changed text. fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { for (f, r) in self.text() { try!(write!(fmt, "{}:\n", f)); - try!(write!(fmt, "{}", r)); + try!(write!(fmt, "{}\n\n", r)); } Ok(()) } diff --git a/src/mod.rs b/src/mod.rs new file mode 100644 index 00000000000..c0bad7a0e31 --- /dev/null +++ b/src/mod.rs @@ -0,0 +1,468 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(box_syntax)] +#![feature(box_patterns)] +#![feature(rustc_private)] +#![feature(collections)] +#![feature(os)] +#![feature(core)] +#![feature(unicode)] +#![feature(old_path)] +#![feature(exit_status)] + +#[macro_use] +extern crate log; + +extern crate getopts; +extern crate rustc; +extern crate rustc_driver; +extern crate syntax; + +use rustc::session::Session; +use rustc::session::config::{self, Input}; +use rustc_driver::{driver, CompilerCalls, Compilation}; + +use syntax::ast; +use syntax::codemap::{self, CodeMap, Span, Pos}; +use syntax::diagnostics; +use syntax::parse::token; +use syntax::print::pprust; +use syntax::visit; + +use std::mem; + +use changes::ChangeSet; + +pub mod rope; +mod changes; + +const IDEAL_WIDTH: usize = 80; +const MAX_WIDTH: usize = 100; +const MIN_STRING: usize = 10; + +// Formatting which depends on the AST. +fn fmt_ast<'a>(krate: &ast::Crate, codemap: &'a CodeMap) -> ChangeSet<'a> { + let mut visitor = FmtVisitor { codemap: codemap, + changes: ChangeSet::from_codemap(codemap) }; + visit::walk_crate(&mut visitor, krate); + + visitor.changes +} + +// Formatting done on a char by char basis. +fn fmt_lines(changes: &mut ChangeSet) { + // Iterate over the chars in the change set. + for (f, text) in changes.text() { + let mut trims = vec![]; + let mut last_wspace = None; + let mut line_len = 0; + let mut cur_line = 1; + for (c, b) in text.chars() { + if c == '\n' { // TOOD test for \r too + // Check for (and record) trailing whitespace. + if let Some(lw) = last_wspace { + trims.push((lw, b)); + line_len -= b - lw; + } + // Check for any line width errors we couldn't correct. + if line_len > MAX_WIDTH { + // FIXME store the error rather than reporting immediately. + println!("Rustfmt couldn't fix (sorry). {}:{}: line longer than {} characters", + f, cur_line, MAX_WIDTH); + } + line_len = 0; + cur_line += 1; + last_wspace = None; + } else { + line_len += 1; + if c.is_whitespace() { + if last_wspace.is_none() { + last_wspace = Some(b); + } + } else { + last_wspace = None; + } + } + } + + unsafe { + // Invariant: we only mutate a rope after we have searched it, then + // we will not search it again. + let mut_text: &mut rope::Rope = mem::transmute(text); + let mut_count: &mut u64 = mem::transmute(&changes.count); + let mut offset = 0; + // Get rid of any trailing whitespace we recorded earlier. + for &(s, e) in trims.iter() { + // Note that we change the underlying ropes directly, we don't + // go through the changeset because our change positions are + // relative to the newest text, not the original. + debug!("Stripping trailing whitespace {}:{}-{} \"{}\"", + f, s, e, text.slice(s-offset..e-offset)); + mut_text.remove(s-offset, e-offset); + *mut_count += 1; + offset += e - s; + } + } + } +} + +struct FmtVisitor<'a> { + codemap: &'a CodeMap, + changes: ChangeSet<'a>, +} + +impl<'a, 'v> visit::Visitor<'v> for FmtVisitor<'a> { + fn visit_expr(&mut self, ex: &'v ast::Expr) { + match ex.node { + ast::Expr_::ExprLit(ref l) => match l.node { + ast::Lit_::LitStr(ref is, _) => { + self.rewrite_string(&is, l.span); + } + _ => {} + }, + _ => {} + } + + visit::walk_expr(self, ex) + } + + fn visit_fn(&mut self, + fk: visit::FnKind<'v>, + fd: &'v ast::FnDecl, + b: &'v ast::Block, + s: Span, + _: ast::NodeId) { + self.fix_formal_args(fd); + visit::walk_fn(self, fk, fd, b, s); + } + + fn visit_item(&mut self, item: &'v ast::Item) { + // TODO check each item is on a new line and is correctly indented. + match item.node { + ast::Item_::ItemUse(ref vp) => { + match vp.node { + ast::ViewPath_::ViewPathList(ref path, ref path_list) => { + let new_str = self.fix_use_list(path, path_list, vp.span); + + // TODO move these optimisations to ChangeSet + if new_str != self.codemap.span_to_snippet(item.span).unwrap() { + self.changes.change_span(item.span, new_str); + } + } + ast::ViewPath_::ViewPathGlob(_) => { + // FIXME convert to list? + } + _ => {} + } + } + _ => {} + } + visit::walk_item(self, item); + } +} + +fn make_indent(width: usize) -> String { + let mut indent = String::with_capacity(width); + for _ in 0..width { + indent.push(' ') + } + indent +} + +impl<'a> FmtVisitor<'a> { + // TODO NEEDS TESTS + fn rewrite_string(&mut self, s: &str, span: Span) { + // FIXME I bet this stomps unicode escapes in the source string + + // Check if there is anything to fix: we always try to fixup multi-line + // strings, or if the string is too long for the line. + let l_loc = self.codemap.lookup_char_pos(span.lo); + let r_loc = self.codemap.lookup_char_pos(span.hi); + if l_loc.line == r_loc.line && r_loc.col.to_usize() <= MAX_WIDTH { + return; + } + + // TODO if lo.col > IDEAL - 10, start a new line (need cur indent for that) + + let s = s.escape_default(); + + // TODO use fixed value. + let l_loc = self.codemap.lookup_char_pos(span.lo); + let l_col = l_loc.col.to_usize(); + + let indent = make_indent(l_col + 1); + let indent = &indent; + + let max_chars = MAX_WIDTH - (l_col + 1); + + let mut cur_start = 0; + let mut result = String::new(); + result.push('"'); + loop { + let mut cur_end = cur_start + max_chars; + + if cur_end >= s.len() { + result.push_str(&s[cur_start..]); + break; + } + + // Make sure we're on a char boundary. + cur_end = next_char(&s, cur_end); + + // Push cur_end left until we reach whitespace + while !s.char_at(cur_end-1).is_whitespace() { + cur_end = prev_char(&s, cur_end); + + if cur_end - cur_start < MIN_STRING { + // We can't break at whitespace, fall back to splitting + // anywhere that doesn't break an escape sequence + cur_end = next_char(&s, cur_start + max_chars); + while s.char_at(cur_end) == '\\' { + cur_end = prev_char(&s, cur_end); + } + } + } + // Make sure there is no whitespace to the right of the break. + while cur_end < s.len() && s.char_at(cur_end).is_whitespace() { + cur_end = next_char(&s, cur_end+1); + } + result.push_str(&s[cur_start..cur_end]); + result.push_str("\\\n"); + result.push_str(indent); + + cur_start = cur_end; + } + result.push('"'); + + // Check that we actually changed something. + if result == self.codemap.span_to_snippet(span).unwrap() { + return; + } + + self.changes.change_span(span, result); + } + + // Basically just pretty prints a multi-item import. + fn fix_use_list(&mut self, + path: &ast::Path, + path_list: &[ast::PathListItem], + vp_span: Span) -> String { + // FIXME remove unused imports + + // FIXME check indentation + let l_loc = self.codemap.lookup_char_pos(vp_span.lo); + let path_str = pprust::path_to_string(&path); + let indent = l_loc.col.0; + // After accounting for the overhead, how much space left for + // the item list? ( 5 = :: + { + } + ; ) + let space = IDEAL_WIDTH - (indent + path_str.len() + 5); + // 4 = `use` + one space + // TODO might be pub use + let indent = make_indent(indent-4); + + let mut cur_str = String::new(); + let mut first = true; + // If `self` is in the list, put it first. + if path_list.iter().any(|vpi| + if let ast::PathListItem_::PathListMod{ .. } = vpi.node { + true + } else { + false + } + ) { + cur_str = "self".to_string(); + first = false; + } + + let mut new_str = String::new(); + for vpi in path_list.iter() { + match vpi.node { + ast::PathListItem_::PathListIdent{ name, .. } => { + let next_item = &token::get_ident(name); + if cur_str.len() + next_item.len() > space { + let cur_line = format!("{}use {}::{{{}}};\n", indent, path_str, cur_str); + new_str.push_str(&cur_line); + + cur_str = String::new(); + first = true; + } + + if first { + first = false; + } else { + cur_str.push_str(", "); + } + + cur_str.push_str(next_item); + } + ast::PathListItem_::PathListMod{ .. } => {} + } + } + + assert!(!first); + let cur_line = format!("{}use {}::{{{}}};", indent, path_str, cur_str); + new_str.push_str(&cur_line); + + new_str + } + + fn fix_formal_args<'v>(&mut self, fd: &'v ast::FnDecl) { + // For now, just check the arguments line up and make them per-row if the line is too long. + let args = &fd.inputs; + if args.len() <= 1 { + return; + } + // TODO not really using the hi positions + let spans: Vec<_> = args.iter().map(|a| (a.pat.span.lo, a.ty.span.hi)).collect(); + let locs: Vec<_> = spans.iter().map(|&(a, b)| (self.codemap.lookup_char_pos(a), self.codemap.lookup_char_pos(b))).collect(); + let first_loc = &locs[0].0; + // TODO need to adjust for previous changes here. + let same_row = locs.iter().all(|&(ref l, _)| l.line == first_loc.line); + let same_col = locs.iter().all(|&(ref l, _)| l.col == first_loc.col); + + if same_col { + // TODO Check one arg per line and no lines in between (except comments) + return; + } + + if same_row { // TODO check line is < 100 && first_loc.line { + // TODO could also fix whitespace around punctuaton here + // TODO and could check that we're on the same line as the function call, if possible + return; + } + + let col = self.changes.col(spans[0].0); + let mut indent = String::with_capacity(col); + indent.push('\n'); + for _ in 0..col { indent.push(' '); } + let last_idx = spans.len() - 1; + for (i, s) in spans.iter().enumerate() { + // Take the span from lo to lo (or the last hi for the last arg), + // trim, push on top of indent, then replace the old lo-lo span with it. + let mut new_text = if i == 0 { + "".to_string() + } else { + indent.clone() + }; + let hi = if i == last_idx { + s.1 + } else { + spans[i+1].0 + }; + // TODO need a version of slice taking locs, not a span + let snippet = self.changes.slice_span(Span{ lo: s.0, hi: hi, expn_id: codemap::NO_EXPANSION }).to_string(); + let snippet = snippet.trim(); + new_text.push_str(snippet); + self.changes.change(&first_loc.file.name, (s.0).0 as usize, hi.0 as usize, new_text); + } + } +} + +#[inline] +fn prev_char(s: &str, mut i: usize) -> usize { + if i == 0 { return 0; } + + i -= 1; + while !s.is_char_boundary(i) { + i -= 1; + } + i +} + +#[inline] +fn next_char(s: &str, mut i: usize) -> usize { + if i >= s.len() { return s.len(); } + + while !s.is_char_boundary(i) { + i += 1; + } + i +} + +struct RustFmtCalls { + input_path: Option, +} + +impl<'a> CompilerCalls<'a> for RustFmtCalls { + fn early_callback(&mut self, + _: &getopts::Matches, + _: &diagnostics::registry::Registry) + -> Compilation { + Compilation::Continue + } + + fn some_input(&mut self, input: Input, input_path: Option) -> (Input, Option) { + match input_path { + Some(ref ip) => self.input_path = Some(ip.clone()), + _ => { + // FIXME should handle string input and write to stdout or something + panic!("No input path"); + } + } + (input, input_path) + } + + fn no_input(&mut self, + _: &getopts::Matches, + _: &config::Options, + _: &Option, + _: &Option, + _: &diagnostics::registry::Registry) + -> Option<(Input, Option)> { + panic!("No input supplied to RustFmt"); + } + + fn late_callback(&mut self, + _: &getopts::Matches, + _: &Session, + _: &Input, + _: &Option, + _: &Option) + -> Compilation { + Compilation::Continue + } + + fn build_controller(&mut self, _: &Session) -> driver::CompileController<'a> { + let mut control = driver::CompileController::basic(); + control.after_parse.stop = Compilation::Stop; + control.after_parse.callback = box |state| { + let krate = state.krate.unwrap(); + let codemap = state.session.codemap(); + let mut changes = fmt_ast(krate, codemap); + fmt_lines(&mut changes); + + println!("Making {} changes", changes.count); + println!("{}", changes); + // FIXME(#5) Should be user specified whether to show or replace. + }; + + control + } +} + +fn main() { + let args = std::os::args(); + let mut call_ctxt = RustFmtCalls { input_path: None }; + rustc_driver::run_compiler(&args, &mut call_ctxt); + std::env::set_exit_status(0); +} + +// FIXME comments +// comments aren't in the AST, which makes processing them difficult, but then +// comments are complicated anyway. I think I am happy putting off tackling them +// for now. Long term the soluton is for comments to be in the AST, but that means +// only the libsyntax AST, not the rustc one, which means waiting for the ASTs +// to diverge one day.... + +// Once we do have comments, we just have to implement a simple word wrapping +// algorithm to keep the width under IDEAL_WIDTH. We should also convert multiline +// /* ... */ comments to // and check doc comments are in the right place and of +// the right kind. diff --git a/src/rope.rs b/src/rope.rs index 150e51b4bd3..5a96370308a 100644 --- a/src/rope.rs +++ b/src/rope.rs @@ -14,25 +14,31 @@ // tests // pull out into its own crate // impl Default, Extend -// impl DOubleEndedIter and ExactSizeIter for RopeChars +// impl DoubleEndedIter and ExactSizeIter for RopeChars // better allocation -// balancing -// thread safety/parallisation +// balancing? extern crate unicode; use std::fmt; use std::ops::Range; +use std::num::{SignedInt, Int}; -// A Rope, based on an unbalanced binary tree. - +// A Rope, based on an unbalanced binary tree. The rope is somewhat special in +// that it tracks positions in the source text. So when locating a position in +// the rope, the user can use either a current position in the text or a +// position in the source text, which the Rope will adjust to a current position +// whilst searching. pub struct Rope { root: Node, len: usize, src_len: usize, - // FIXME: Allocation is very dumb at the moment, we always add another buffer for every inserted string and we never resuse or collect old memory + // FIXME: Allocation is very dumb at the moment, we always add another + // buffer for every inserted string and we never resuse or collect old + // memory storage: Vec> } +// A view over a portion of a Rope. Analagous to string slices (`str`); pub struct RopeSlice<'rope> { // All nodes which make up the slice, in order. nodes: Vec<&'rope Lnode>, @@ -42,6 +48,7 @@ pub struct RopeSlice<'rope> { len: usize, } +// An iterator over the chars in a rope. pub struct RopeChars<'rope> { data: RopeSlice<'rope>, cur_node: usize, @@ -49,8 +56,8 @@ pub struct RopeChars<'rope> { abs_byte: usize, } - impl Rope { + // Create an empty rope. pub fn new() -> Rope { Rope { root: Node::empty_inner(), @@ -62,7 +69,7 @@ impl Rope { // Uses text as initial storage. pub fn from_string(text: String) -> Rope { - // TODO should split large texts into segments as we insert + // TODO should split very large texts into segments as we insert let mut result = Rope::new(); result.insert(0, text); @@ -70,47 +77,41 @@ impl Rope { result } + // When initialising a rope, indicates that the rope is complete wrt the + // source text. fn fix_src(&mut self) { self.root.fix_src(); self.src_len = self.len; } + // Length of the rope. pub fn len(&self) -> usize { self.len } - pub fn insert(&mut self, start: usize, text: String) { - if text.len() == 0 { - return; - } - - debug_assert!(start <= self.len(), "insertion out of bounds of rope"); - - let len = text.len(); - let storage = text.into_bytes(); - let new_node = box Node::new_leaf(&storage[][0] as *const u8, len, 0); - self.storage.push(storage); - - match self.root.insert(new_node, start, start) { - NodeAction::Change(n, adj) => { - assert!(adj as usize == len); - self.root = *n; - } - NodeAction::Adjust(adj) => { - assert!(adj as usize == len); - } - _ => panic!("Unexpected action") - } - self.len += len; - } - pub fn insert_copy(&mut self, start: usize, text: &str) { - // If we did clever things with allocation, we could do better here + // FIXME If we did clever things with allocation, we could do better here. self.insert(start, text.to_string()); } + pub fn insert(&mut self, start: usize, text: String) { + self.insert_inner(start, + text, + |this, node| this.root.insert(node, start, start)) + } + pub fn src_insert(&mut self, start: usize, text: String) { - // TODO refactor with insert + self.insert_inner(start, + text, + |this, node| this.root.src_insert(node, start, start)) + } + + fn insert_inner(&mut self, + start: usize, + text: String, + do_insert: F) + where F: Fn(&mut Rope, Box) -> NodeAction + { if text.len() == 0 { return; } @@ -119,10 +120,10 @@ impl Rope { let len = text.len(); let storage = text.into_bytes(); - let new_node = box Node::new_leaf(&storage[][0] as *const u8, len, 0); + let new_node = box Node::new_leaf(&storage[..][0] as *const u8, len, 0); self.storage.push(storage); - match self.root.src_insert(new_node, start, start) { + match do_insert(self, new_node) { NodeAction::Change(n, adj) => { assert!(adj as usize == len); self.root = *n; @@ -147,35 +148,25 @@ impl Rope { } pub fn remove(&mut self, start: usize, end: usize) { - assert!(end >= start); - if start == end { - return; - } - - let action = self.root.remove(start, end, start); - match action { - NodeAction::None => {} - NodeAction::Remove => { - self.root = Node::empty_inner(); - self.len = 0; - } - NodeAction::Adjust(adj) => self.len = (self.len as isize + adj) as usize, - NodeAction::Change(node, adj) => { - self.root = *node; - self.len = (self.len as isize + adj) as usize; - } - } + self.remove_inner(start, end, |this| this.root.remove(start, end, start)) } pub fn src_remove(&mut self, start: usize, end: usize) { - // TODO refactor with remove + self.remove_inner(start, end, |this| this.root.src_remove(start, end, start)) + } + + fn remove_inner(&mut self, + start: usize, + end: usize, + do_remove: F) + where F: Fn(&mut Rope) -> NodeAction + { assert!(end >= start); if start == end { return; } - let action = self.root.src_remove(start, end, start); - match action { + match do_remove(self) { NodeAction::None => {} NodeAction::Remove => { self.root = Node::empty_inner(); @@ -190,6 +181,7 @@ impl Rope { } // TODO src_replace + // TODO src_replace_str // This can go horribly wrong if you overwrite a grapheme of different size. // It is the callers responsibility to ensure that the grapheme at point start @@ -200,7 +192,7 @@ impl Rope { // I think that is better than duplicating a bunch of code. // It should be possible to view a &char as a &[u8] somehow, and then // we can optimise this (FIXME). - self.replace_str(start, &new_char.to_string()[]); + self.replace_str(start, &new_char.to_string()[..]); } pub fn replace_str(&mut self, start: usize, new_str: &str) { @@ -332,6 +324,10 @@ impl ::std::str::FromStr for Rope { impl<'a> fmt::Display for RopeSlice<'a> { fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + if self.nodes.len() == 0 { + return Ok(()); + } + let last_idx = self.nodes.len() - 1; for (i, n) in self.nodes.iter().enumerate() { let mut ptr = n.text; @@ -346,7 +342,7 @@ impl<'a> fmt::Display for RopeSlice<'a> { unsafe { try!(write!(fmt, "{}", - ::std::str::from_utf8(::std::slice::from_raw_buf(&ptr, len)).unwrap())); + ::std::str::from_utf8(::std::slice::from_raw_parts(ptr, len)).unwrap())); } } Ok(()) @@ -371,7 +367,7 @@ impl<'a> fmt::Debug for RopeSlice<'a> { unsafe { try!(write!(fmt, "\"{}\"", - ::std::str::from_utf8(::std::slice::from_raw_buf(&ptr, len)).unwrap())); + ::std::str::from_utf8(::std::slice::from_raw_parts(ptr, len)).unwrap())); } } Ok(()) @@ -408,7 +404,7 @@ impl fmt::Display for Node { unsafe { write!(fmt, "{}", - ::std::str::from_utf8(::std::slice::from_raw_buf(text, len)).unwrap()) + ::std::str::from_utf8(::std::slice::from_raw_parts(*text, len)).unwrap()) } } } @@ -437,7 +433,7 @@ impl fmt::Debug for Node { unsafe { write!(fmt, "(\"{}\"; {})", - ::std::str::from_utf8(::std::slice::from_raw_buf(text, len)).unwrap(), + ::std::str::from_utf8(::std::slice::from_raw_parts(*text, len)).unwrap(), len) } } @@ -519,7 +515,7 @@ impl Node { } } - // All these methods are just doing dynamic dispatch, TODO use a macro + // Most of these methods are just doing dynamic dispatch, TODO use a macro // precond: start < end fn remove(&mut self, start: usize, end: usize, src_start: usize) -> NodeAction { @@ -534,19 +530,9 @@ impl Node { Node::InnerNode(ref mut i) => i.src_remove(start, end, src_start), Node::LeafNode(ref mut l) => { debug!("src_remove: pre-adjust {}-{}; {}", start, end, l.src_offset); - let mut start = start as isize + l.src_offset; - if start < 0 { - start = 0; - } - let mut end = end as isize + l.src_offset; - if end < 0 { - end = 0; - } - // TODO src_start? - let mut src_start = src_start as isize + l.src_offset; - if src_start < 0 { - src_start = 0; - } + let start = minz(start as isize + l.src_offset); + let end = minz(end as isize + l.src_offset); + let src_start = minz(src_start as isize + l.src_offset); debug!("src_remove: post-adjust {}-{}, {}", start, end, src_start); if end > start { l.remove(start as usize, end as usize, src_start as usize) @@ -569,15 +555,8 @@ impl Node { Node::InnerNode(ref mut i) => i.src_insert(node, start, src_start), Node::LeafNode(ref mut l) => { debug!("src_insert: pre-adjust {}, {}; {}", start, src_start, l.src_offset); - let mut start = start as isize + l.src_offset; - if start < 0 { - start = 0; - } - // TODO src_start? - let mut src_start = src_start as isize + l.src_offset; - if src_start < 0 { - src_start = 0; - } + let start = minz(start as isize + l.src_offset); + let src_start = minz(src_start as isize + l.src_offset); debug!("src_insert: post-adjust {}, {}", start, src_start); l.insert(node, start as usize, src_start as usize) } @@ -596,14 +575,8 @@ impl Node { Node::InnerNode(ref i) => i.find_src_slice(start, end, slice), Node::LeafNode(ref l) => { debug!("find_src_slice: pre-adjust {}-{}; {}", start, end, l.src_offset); - let mut start = start as isize + l.src_offset; - if start < 0 { - start = 0; - } - let mut end = end as isize + l.src_offset; - if end < 0 { - end = 0; - } + let start = minz(start as isize + l.src_offset); + let end = minz(end as isize + l.src_offset); debug!("find_src_slice: post-adjust {}-{}", start, end); if end > start { l.find_slice(start as usize, end as usize, slice); @@ -1117,21 +1090,14 @@ impl Lnode { i -= 1; } - let loc = if loc < 0 { - 0 - } else { - loc as usize - }; + let loc = minz(loc) as usize; debug!("Lnode::col_for_src_loc, return Continue({})", loc); Search::Continue(loc) } fn find_last_char(&self, needle: char) -> Option { // FIXME due to multi-byte chars, this will give false positives - // I think we must search forwards from the start :-( Perhaps we could - // track unicode vs ascii or something (I don't think there is an efficient - // way to read unicode backwards, I might be wrong). - // std::str::GraphemeIndices can do this! + // FIXME use std::str::GraphemeIndices to do this! let mut loc = self.len as isize - 1; while loc >= 0 { unsafe { @@ -1147,8 +1113,169 @@ impl Lnode { } } -//TODO comment etc. +// The state of searching through a rope. enum Search { + // TODO comment Continue(usize), + // TODO comment Done(usize) } + +fn minz(x: I) -> I { + if x.is_negative() { + return I::zero(); + } + + x +} + +#[cfg(test)] +mod test { + use super::*; + // FIXME is this a Rust bug? Why is minz not imported by the glob import? + use super::minz; + + #[test] + fn test_new() { + let r = Rope::new(); + assert!(r.len() == 0); + assert!(r.to_string() == ""); + + let r = Rope::from_string("Hello world!".to_string()); + assert!(r.len() == 12); + assert!(r.to_string() == "Hello world!"); + } + + #[test] + fn test_minz() { + let x: i32 = 0; + assert!(super::minz(x) == 0); + let x: i32 = 42; + assert!(minz(x) == 42); + let x: i32 = -42; + assert!(minz(x) == 0); + let x: isize = 0; + assert!(minz(x) == 0); + let x: isize = 42; + assert!(minz(x) == 42); + let x: isize = -42; + assert!(minz(x) == 0); + } + + #[test] + fn test_from_string() { + let mut r: Rope = "Hello world!".parse().unwrap(); + assert!(r.to_string() == "Hello world!"); + } + + #[test] + fn test_remove() { + let mut r: Rope = "Hello world!".parse().unwrap(); + r.remove(0, 10); + assert!(r.to_string() == "d!"); + assert!(r.src_slice(0..5).to_string() == ""); + assert!(r.src_slice(10..12).to_string() == "d!"); + + let mut r: Rope = "Hello world!".parse().unwrap(); + r.remove(4, 12); + assert!(r.to_string() == "Hell"); + // TODO + //assert!(r.src_slice(0..4).to_string() == "Hell"); + //assert!(r.src_slice(10..12).to_string() == ""); + + let mut r: Rope = "Hello world!".parse().unwrap(); + r.remove(4, 10); + assert!(r.to_string() == "Helld!"); + // TODO + //assert!(r.src_slice(1..5).to_string() == "ell"); + assert!(r.src_slice(9..12).to_string() == "d!"); + } + + #[test] + fn test_insert_copy() { + let mut r: Rope = "Hello world!".parse().unwrap(); + r.insert_copy(0, "foo"); + assert!(r.to_string() == "fooHello world!"); + assert!(r.slice(2..8).to_string() == "oHello"); + + let mut r: Rope = "Hello world!".parse().unwrap(); + r.insert_copy(12, "foo"); + assert!(r.to_string() == "Hello world!foo"); + assert!(r.slice(2..8).to_string() == "llo wo"); + + let mut r: Rope = "Hello world!".parse().unwrap(); + r.insert_copy(5, "foo"); + assert!(r.to_string() == "Hellofoo world!"); + assert!(r.slice(2..8).to_string() == "llofoo"); + } + + #[test] + fn test_push_copy() { + let mut r: Rope = "Hello world!".parse().unwrap(); + r.push_copy("foo"); + assert!(r.to_string() == "Hello world!foo"); + assert!(r.slice(2..8).to_string() == "llo wo"); + } + + #[test] + fn test_insert_replace() { + let mut r: Rope = "hello worl\u{00bb0}!".parse().unwrap(); + r.insert_copy(5, "bb"); + assert!(r.to_string() == "hellobb worlர!"); + r.replace(0, 'H'); + r.replace(15, '~'); + r.replace_str(5, "fo\u{00cb0}"); + assert!(r.to_string() == "Hellofoರrlர~"); + assert!(r.slice(0..10).to_string() == "Hellofoರ"); + assert!(r.slice(5..10).to_string() == "foರ"); + assert!(r.slice(10..15).to_string() == "rlர"); + + let expected = "Hellofoರrlர~"; + let mut byte_pos = 0; + for ((c, b), e) in r.chars().zip(expected.chars()) { + assert!(c == e); + assert!(b == byte_pos); + byte_pos += e.len_utf8(); + } + } + + #[test] + fn test_src_insert_remove_col_for_src_loc() { + let mut r: Rope = "hello\n world!".parse().unwrap(); + r.src_insert(4, "foo".to_string()); + r.src_insert(5, "bar".to_string()); + assert!(r.to_string() == "hellfooobar\n world!"); + + r.src_remove(2, 4); + r.src_remove(10, 12); + assert!(r.to_string() == "hefooobar\n wor!"); + + let expected = "hefooobar\n wor!"; + let mut byte_pos = 0; + for ((c, b), e) in r.chars().zip(expected.chars()) { + assert!(c == e); + assert!(b == byte_pos); + byte_pos += e.len_utf8(); + } + + let expected = [0, 1, 2, 2, 5, 9, 0, 1, 2, 3, 4, 4, 4]; + for i in 0..13 { + assert!(r.col_for_src_loc(i) == expected[i]); + } + } + + #[test] + fn test_src_insert() { + let mut r: Rope = "Hello world!".parse().unwrap(); + r.src_insert(4, "foo".to_string()); + r.src_insert(0, "foo".to_string()); + r.src_insert(12, "foo".to_string()); + assert!(r.to_string() == "fooHellfooo world!foo"); + r.src_insert(4, "bar".to_string()); + r.src_insert(5, "bar".to_string()); + r.src_insert(3, "bar".to_string()); + r.src_insert(0, "bar".to_string()); + r.src_insert(12, "bar".to_string()); + assert!(r.to_string() == "barfooHelbarlbarfooobar world!barfoo"); + } +}