Working prototype

Implements a few formatting options using an approach where we modify the source using a rope. Uses very ad-hoc rules for formatting.
This commit is contained in:
Nick Cameron 2015-03-08 11:46:35 +13:00
parent f1e698c838
commit 7417ab5aed
4 changed files with 751 additions and 126 deletions

17
Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "rustfmt"
version = "0.0.1"
authors = ["Nicholas Cameron <nrc@ncameron.org>"]
description = "tool to find and fix Rust formatting issues"
repository = "https://github.com/nick29581/rustfmt"
readme = "README.md"
license = "Apache-2.0/MIT"
#[dependencies.reprint]
#reprint = "0.0.1"
#path = "/home/ncameron/reprint"
[[bin]]
name = "rustfmt"
path = "src/mod.rs"

View File

@ -8,33 +8,26 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// TODO
// composable changes
// print to files (maybe that shouldn't be here, but in mod)
// tests
// docs
use rope::{Rope, RopeSlice};
use std::collections::HashMap;
use syntax::codemap::{CodeMap, Span, Pos, BytePos};
use syntax::codemap::{CodeMap, Span, BytePos};
use std::fmt;
// This is basically a wrapper around a bunch of Ropes which makes it convenient
// to work with libsyntax. It is badly named.
pub struct ChangeSet<'a> {
file_map: HashMap<String, Rope>,
// FIXME, we only keep a codemap around so we can have convenience methods
// taking Spans, it would be more resuable to factor this (and the methods)
// out into an adaptor.
codemap: &'a CodeMap,
pub count: u64,
}
pub struct FileIterator<'c, 'a: 'c> {
change_set: &'c ChangeSet<'a>,
keys: Vec<&'c String>,
cur_key: usize,
}
impl<'a> ChangeSet<'a> {
// Create a new ChangeSet for a given libsyntax CodeMap.
pub fn from_codemap(codemap: &'a CodeMap) -> ChangeSet<'a> {
let mut result = ChangeSet {
file_map: HashMap::new(),
@ -43,14 +36,16 @@ impl<'a> ChangeSet<'a> {
};
for f in codemap.files.borrow().iter() {
let contents = Rope::from_string(f.src.clone());
let contents = Rope::from_string((&**f.src.as_ref().unwrap()).clone());
result.file_map.insert(f.name.clone(), contents);
}
result
}
// start and end are unadjusted.
// Change a span of text in our stored text into the new text (`text`).
// The span of text to change is given in the coordinates of the original
// source text, not the current text,
pub fn change(&mut self, file_name: &str, start: usize, end: usize, text: String) {
println!("change: {}:{}-{} \"{}\"", file_name, start, end, text);
@ -59,9 +54,10 @@ impl<'a> ChangeSet<'a> {
let file = &mut self.file_map[*file_name];
if end - start == text.len() {
// TODO
panic!();
file.replace_str(start, &text[]);
// TODO src_replace_str would be much more efficient
//file.src_replace_str(start, &text);
file.src_remove(start, end);
file.src_insert(start, text);
} else {
// TODO if we do this in one op, could we get better change info?
file.src_remove(start, end);
@ -69,28 +65,34 @@ impl<'a> ChangeSet<'a> {
}
}
// As for `change()`, but use a Span to indicate the text to change.
pub fn change_span(&mut self, span: Span, text: String) {
let l_loc = self.codemap.lookup_char_pos(span.lo);
let file_offset = l_loc.file.start_pos.0;
self.change(&l_loc.file.name[],
self.change(&l_loc.file.name,
(span.lo.0 - file_offset) as usize,
(span.hi.0 - file_offset) as usize,
text)
}
// Get a slice of the current text. Coordinates are relative to the source
// text. I.e., this method returns the text which has been changed from the
// indicated span.
pub fn slice(&self, file_name: &str, start: usize, end: usize) -> RopeSlice {
let file = &self.file_map[*file_name];
file.src_slice(start..end)
}
// As for `slice()`, but use a Span to indicate the text to return.
pub fn slice_span(&self, span:Span) -> RopeSlice {
let l_loc = self.codemap.lookup_char_pos(span.lo);
let file_offset = l_loc.file.start_pos.0;
self.slice(&l_loc.file.name[],
self.slice(&l_loc.file.name,
(span.lo.0 - file_offset) as usize,
(span.hi.0 - file_offset) as usize)
}
// Return an iterator over the entire changed text.
pub fn text<'c>(&'c self) -> FileIterator<'c, 'a> {
FileIterator {
change_set: self,
@ -99,16 +101,26 @@ impl<'a> ChangeSet<'a> {
}
}
// Get the current line-relative position of a position in the source text.
pub fn col(&self, loc: BytePos) -> usize {
let l_loc = self.codemap.lookup_char_pos(loc);
let file_offset = l_loc.file.start_pos.0;
let file = &self.file_map[l_loc.file.name[]];
let file = &self.file_map[l_loc.file.name[..]];
file.col_for_src_loc(loc.0 as usize - file_offset as usize)
}
}
// Iterates over each file in the ChangSet. Yields the filename and the changed
// text for that file.
pub struct FileIterator<'c, 'a: 'c> {
change_set: &'c ChangeSet<'a>,
keys: Vec<&'c String>,
cur_key: usize,
}
impl<'c, 'a> Iterator for FileIterator<'c, 'a> {
type Item = (&'c str, &'c Rope);
fn next(&mut self) -> Option<(&'c str, &'c Rope)> {
if self.cur_key >= self.keys.len() {
return None;
@ -116,15 +128,16 @@ impl<'c, 'a> Iterator for FileIterator<'c, 'a> {
let key = self.keys[self.cur_key];
self.cur_key += 1;
return Some((&key[], &self.change_set.file_map[*key]))
return Some((&key, &self.change_set.file_map[*key]))
}
}
impl<'a> fmt::Display for ChangeSet<'a> {
// Prints the entire changed text.
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
for (f, r) in self.text() {
try!(write!(fmt, "{}:\n", f));
try!(write!(fmt, "{}", r));
try!(write!(fmt, "{}\n\n", r));
}
Ok(())
}

468
src/mod.rs Normal file
View File

@ -0,0 +1,468 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(box_syntax)]
#![feature(box_patterns)]
#![feature(rustc_private)]
#![feature(collections)]
#![feature(os)]
#![feature(core)]
#![feature(unicode)]
#![feature(old_path)]
#![feature(exit_status)]
#[macro_use]
extern crate log;
extern crate getopts;
extern crate rustc;
extern crate rustc_driver;
extern crate syntax;
use rustc::session::Session;
use rustc::session::config::{self, Input};
use rustc_driver::{driver, CompilerCalls, Compilation};
use syntax::ast;
use syntax::codemap::{self, CodeMap, Span, Pos};
use syntax::diagnostics;
use syntax::parse::token;
use syntax::print::pprust;
use syntax::visit;
use std::mem;
use changes::ChangeSet;
pub mod rope;
mod changes;
const IDEAL_WIDTH: usize = 80;
const MAX_WIDTH: usize = 100;
const MIN_STRING: usize = 10;
// Formatting which depends on the AST.
fn fmt_ast<'a>(krate: &ast::Crate, codemap: &'a CodeMap) -> ChangeSet<'a> {
let mut visitor = FmtVisitor { codemap: codemap,
changes: ChangeSet::from_codemap(codemap) };
visit::walk_crate(&mut visitor, krate);
visitor.changes
}
// Formatting done on a char by char basis.
fn fmt_lines(changes: &mut ChangeSet) {
// Iterate over the chars in the change set.
for (f, text) in changes.text() {
let mut trims = vec![];
let mut last_wspace = None;
let mut line_len = 0;
let mut cur_line = 1;
for (c, b) in text.chars() {
if c == '\n' { // TOOD test for \r too
// Check for (and record) trailing whitespace.
if let Some(lw) = last_wspace {
trims.push((lw, b));
line_len -= b - lw;
}
// Check for any line width errors we couldn't correct.
if line_len > MAX_WIDTH {
// FIXME store the error rather than reporting immediately.
println!("Rustfmt couldn't fix (sorry). {}:{}: line longer than {} characters",
f, cur_line, MAX_WIDTH);
}
line_len = 0;
cur_line += 1;
last_wspace = None;
} else {
line_len += 1;
if c.is_whitespace() {
if last_wspace.is_none() {
last_wspace = Some(b);
}
} else {
last_wspace = None;
}
}
}
unsafe {
// Invariant: we only mutate a rope after we have searched it, then
// we will not search it again.
let mut_text: &mut rope::Rope = mem::transmute(text);
let mut_count: &mut u64 = mem::transmute(&changes.count);
let mut offset = 0;
// Get rid of any trailing whitespace we recorded earlier.
for &(s, e) in trims.iter() {
// Note that we change the underlying ropes directly, we don't
// go through the changeset because our change positions are
// relative to the newest text, not the original.
debug!("Stripping trailing whitespace {}:{}-{} \"{}\"",
f, s, e, text.slice(s-offset..e-offset));
mut_text.remove(s-offset, e-offset);
*mut_count += 1;
offset += e - s;
}
}
}
}
struct FmtVisitor<'a> {
codemap: &'a CodeMap,
changes: ChangeSet<'a>,
}
impl<'a, 'v> visit::Visitor<'v> for FmtVisitor<'a> {
fn visit_expr(&mut self, ex: &'v ast::Expr) {
match ex.node {
ast::Expr_::ExprLit(ref l) => match l.node {
ast::Lit_::LitStr(ref is, _) => {
self.rewrite_string(&is, l.span);
}
_ => {}
},
_ => {}
}
visit::walk_expr(self, ex)
}
fn visit_fn(&mut self,
fk: visit::FnKind<'v>,
fd: &'v ast::FnDecl,
b: &'v ast::Block,
s: Span,
_: ast::NodeId) {
self.fix_formal_args(fd);
visit::walk_fn(self, fk, fd, b, s);
}
fn visit_item(&mut self, item: &'v ast::Item) {
// TODO check each item is on a new line and is correctly indented.
match item.node {
ast::Item_::ItemUse(ref vp) => {
match vp.node {
ast::ViewPath_::ViewPathList(ref path, ref path_list) => {
let new_str = self.fix_use_list(path, path_list, vp.span);
// TODO move these optimisations to ChangeSet
if new_str != self.codemap.span_to_snippet(item.span).unwrap() {
self.changes.change_span(item.span, new_str);
}
}
ast::ViewPath_::ViewPathGlob(_) => {
// FIXME convert to list?
}
_ => {}
}
}
_ => {}
}
visit::walk_item(self, item);
}
}
fn make_indent(width: usize) -> String {
let mut indent = String::with_capacity(width);
for _ in 0..width {
indent.push(' ')
}
indent
}
impl<'a> FmtVisitor<'a> {
// TODO NEEDS TESTS
fn rewrite_string(&mut self, s: &str, span: Span) {
// FIXME I bet this stomps unicode escapes in the source string
// Check if there is anything to fix: we always try to fixup multi-line
// strings, or if the string is too long for the line.
let l_loc = self.codemap.lookup_char_pos(span.lo);
let r_loc = self.codemap.lookup_char_pos(span.hi);
if l_loc.line == r_loc.line && r_loc.col.to_usize() <= MAX_WIDTH {
return;
}
// TODO if lo.col > IDEAL - 10, start a new line (need cur indent for that)
let s = s.escape_default();
// TODO use fixed value.
let l_loc = self.codemap.lookup_char_pos(span.lo);
let l_col = l_loc.col.to_usize();
let indent = make_indent(l_col + 1);
let indent = &indent;
let max_chars = MAX_WIDTH - (l_col + 1);
let mut cur_start = 0;
let mut result = String::new();
result.push('"');
loop {
let mut cur_end = cur_start + max_chars;
if cur_end >= s.len() {
result.push_str(&s[cur_start..]);
break;
}
// Make sure we're on a char boundary.
cur_end = next_char(&s, cur_end);
// Push cur_end left until we reach whitespace
while !s.char_at(cur_end-1).is_whitespace() {
cur_end = prev_char(&s, cur_end);
if cur_end - cur_start < MIN_STRING {
// We can't break at whitespace, fall back to splitting
// anywhere that doesn't break an escape sequence
cur_end = next_char(&s, cur_start + max_chars);
while s.char_at(cur_end) == '\\' {
cur_end = prev_char(&s, cur_end);
}
}
}
// Make sure there is no whitespace to the right of the break.
while cur_end < s.len() && s.char_at(cur_end).is_whitespace() {
cur_end = next_char(&s, cur_end+1);
}
result.push_str(&s[cur_start..cur_end]);
result.push_str("\\\n");
result.push_str(indent);
cur_start = cur_end;
}
result.push('"');
// Check that we actually changed something.
if result == self.codemap.span_to_snippet(span).unwrap() {
return;
}
self.changes.change_span(span, result);
}
// Basically just pretty prints a multi-item import.
fn fix_use_list(&mut self,
path: &ast::Path,
path_list: &[ast::PathListItem],
vp_span: Span) -> String {
// FIXME remove unused imports
// FIXME check indentation
let l_loc = self.codemap.lookup_char_pos(vp_span.lo);
let path_str = pprust::path_to_string(&path);
let indent = l_loc.col.0;
// After accounting for the overhead, how much space left for
// the item list? ( 5 = :: + { + } + ; )
let space = IDEAL_WIDTH - (indent + path_str.len() + 5);
// 4 = `use` + one space
// TODO might be pub use
let indent = make_indent(indent-4);
let mut cur_str = String::new();
let mut first = true;
// If `self` is in the list, put it first.
if path_list.iter().any(|vpi|
if let ast::PathListItem_::PathListMod{ .. } = vpi.node {
true
} else {
false
}
) {
cur_str = "self".to_string();
first = false;
}
let mut new_str = String::new();
for vpi in path_list.iter() {
match vpi.node {
ast::PathListItem_::PathListIdent{ name, .. } => {
let next_item = &token::get_ident(name);
if cur_str.len() + next_item.len() > space {
let cur_line = format!("{}use {}::{{{}}};\n", indent, path_str, cur_str);
new_str.push_str(&cur_line);
cur_str = String::new();
first = true;
}
if first {
first = false;
} else {
cur_str.push_str(", ");
}
cur_str.push_str(next_item);
}
ast::PathListItem_::PathListMod{ .. } => {}
}
}
assert!(!first);
let cur_line = format!("{}use {}::{{{}}};", indent, path_str, cur_str);
new_str.push_str(&cur_line);
new_str
}
fn fix_formal_args<'v>(&mut self, fd: &'v ast::FnDecl) {
// For now, just check the arguments line up and make them per-row if the line is too long.
let args = &fd.inputs;
if args.len() <= 1 {
return;
}
// TODO not really using the hi positions
let spans: Vec<_> = args.iter().map(|a| (a.pat.span.lo, a.ty.span.hi)).collect();
let locs: Vec<_> = spans.iter().map(|&(a, b)| (self.codemap.lookup_char_pos(a), self.codemap.lookup_char_pos(b))).collect();
let first_loc = &locs[0].0;
// TODO need to adjust for previous changes here.
let same_row = locs.iter().all(|&(ref l, _)| l.line == first_loc.line);
let same_col = locs.iter().all(|&(ref l, _)| l.col == first_loc.col);
if same_col {
// TODO Check one arg per line and no lines in between (except comments)
return;
}
if same_row { // TODO check line is < 100 && first_loc.line {
// TODO could also fix whitespace around punctuaton here
// TODO and could check that we're on the same line as the function call, if possible
return;
}
let col = self.changes.col(spans[0].0);
let mut indent = String::with_capacity(col);
indent.push('\n');
for _ in 0..col { indent.push(' '); }
let last_idx = spans.len() - 1;
for (i, s) in spans.iter().enumerate() {
// Take the span from lo to lo (or the last hi for the last arg),
// trim, push on top of indent, then replace the old lo-lo span with it.
let mut new_text = if i == 0 {
"".to_string()
} else {
indent.clone()
};
let hi = if i == last_idx {
s.1
} else {
spans[i+1].0
};
// TODO need a version of slice taking locs, not a span
let snippet = self.changes.slice_span(Span{ lo: s.0, hi: hi, expn_id: codemap::NO_EXPANSION }).to_string();
let snippet = snippet.trim();
new_text.push_str(snippet);
self.changes.change(&first_loc.file.name, (s.0).0 as usize, hi.0 as usize, new_text);
}
}
}
#[inline]
fn prev_char(s: &str, mut i: usize) -> usize {
if i == 0 { return 0; }
i -= 1;
while !s.is_char_boundary(i) {
i -= 1;
}
i
}
#[inline]
fn next_char(s: &str, mut i: usize) -> usize {
if i >= s.len() { return s.len(); }
while !s.is_char_boundary(i) {
i += 1;
}
i
}
struct RustFmtCalls {
input_path: Option<Path>,
}
impl<'a> CompilerCalls<'a> for RustFmtCalls {
fn early_callback(&mut self,
_: &getopts::Matches,
_: &diagnostics::registry::Registry)
-> Compilation {
Compilation::Continue
}
fn some_input(&mut self, input: Input, input_path: Option<Path>) -> (Input, Option<Path>) {
match input_path {
Some(ref ip) => self.input_path = Some(ip.clone()),
_ => {
// FIXME should handle string input and write to stdout or something
panic!("No input path");
}
}
(input, input_path)
}
fn no_input(&mut self,
_: &getopts::Matches,
_: &config::Options,
_: &Option<Path>,
_: &Option<Path>,
_: &diagnostics::registry::Registry)
-> Option<(Input, Option<Path>)> {
panic!("No input supplied to RustFmt");
}
fn late_callback(&mut self,
_: &getopts::Matches,
_: &Session,
_: &Input,
_: &Option<Path>,
_: &Option<Path>)
-> Compilation {
Compilation::Continue
}
fn build_controller(&mut self, _: &Session) -> driver::CompileController<'a> {
let mut control = driver::CompileController::basic();
control.after_parse.stop = Compilation::Stop;
control.after_parse.callback = box |state| {
let krate = state.krate.unwrap();
let codemap = state.session.codemap();
let mut changes = fmt_ast(krate, codemap);
fmt_lines(&mut changes);
println!("Making {} changes", changes.count);
println!("{}", changes);
// FIXME(#5) Should be user specified whether to show or replace.
};
control
}
}
fn main() {
let args = std::os::args();
let mut call_ctxt = RustFmtCalls { input_path: None };
rustc_driver::run_compiler(&args, &mut call_ctxt);
std::env::set_exit_status(0);
}
// FIXME comments
// comments aren't in the AST, which makes processing them difficult, but then
// comments are complicated anyway. I think I am happy putting off tackling them
// for now. Long term the soluton is for comments to be in the AST, but that means
// only the libsyntax AST, not the rustc one, which means waiting for the ASTs
// to diverge one day....
// Once we do have comments, we just have to implement a simple word wrapping
// algorithm to keep the width under IDEAL_WIDTH. We should also convert multiline
// /* ... */ comments to // and check doc comments are in the right place and of
// the right kind.

View File

@ -14,25 +14,31 @@
// tests
// pull out into its own crate
// impl Default, Extend
// impl DOubleEndedIter and ExactSizeIter for RopeChars
// impl DoubleEndedIter and ExactSizeIter for RopeChars
// better allocation
// balancing
// thread safety/parallisation
// balancing?
extern crate unicode;
use std::fmt;
use std::ops::Range;
use std::num::{SignedInt, Int};
// A Rope, based on an unbalanced binary tree.
// A Rope, based on an unbalanced binary tree. The rope is somewhat special in
// that it tracks positions in the source text. So when locating a position in
// the rope, the user can use either a current position in the text or a
// position in the source text, which the Rope will adjust to a current position
// whilst searching.
pub struct Rope {
root: Node,
len: usize,
src_len: usize,
// FIXME: Allocation is very dumb at the moment, we always add another buffer for every inserted string and we never resuse or collect old memory
// FIXME: Allocation is very dumb at the moment, we always add another
// buffer for every inserted string and we never resuse or collect old
// memory
storage: Vec<Vec<u8>>
}
// A view over a portion of a Rope. Analagous to string slices (`str`);
pub struct RopeSlice<'rope> {
// All nodes which make up the slice, in order.
nodes: Vec<&'rope Lnode>,
@ -42,6 +48,7 @@ pub struct RopeSlice<'rope> {
len: usize,
}
// An iterator over the chars in a rope.
pub struct RopeChars<'rope> {
data: RopeSlice<'rope>,
cur_node: usize,
@ -49,8 +56,8 @@ pub struct RopeChars<'rope> {
abs_byte: usize,
}
impl Rope {
// Create an empty rope.
pub fn new() -> Rope {
Rope {
root: Node::empty_inner(),
@ -62,7 +69,7 @@ impl Rope {
// Uses text as initial storage.
pub fn from_string(text: String) -> Rope {
// TODO should split large texts into segments as we insert
// TODO should split very large texts into segments as we insert
let mut result = Rope::new();
result.insert(0, text);
@ -70,47 +77,41 @@ impl Rope {
result
}
// When initialising a rope, indicates that the rope is complete wrt the
// source text.
fn fix_src(&mut self) {
self.root.fix_src();
self.src_len = self.len;
}
// Length of the rope.
pub fn len(&self) -> usize {
self.len
}
pub fn insert(&mut self, start: usize, text: String) {
if text.len() == 0 {
return;
}
debug_assert!(start <= self.len(), "insertion out of bounds of rope");
let len = text.len();
let storage = text.into_bytes();
let new_node = box Node::new_leaf(&storage[][0] as *const u8, len, 0);
self.storage.push(storage);
match self.root.insert(new_node, start, start) {
NodeAction::Change(n, adj) => {
assert!(adj as usize == len);
self.root = *n;
}
NodeAction::Adjust(adj) => {
assert!(adj as usize == len);
}
_ => panic!("Unexpected action")
}
self.len += len;
}
pub fn insert_copy(&mut self, start: usize, text: &str) {
// If we did clever things with allocation, we could do better here
// FIXME If we did clever things with allocation, we could do better here.
self.insert(start, text.to_string());
}
pub fn insert(&mut self, start: usize, text: String) {
self.insert_inner(start,
text,
|this, node| this.root.insert(node, start, start))
}
pub fn src_insert(&mut self, start: usize, text: String) {
// TODO refactor with insert
self.insert_inner(start,
text,
|this, node| this.root.src_insert(node, start, start))
}
fn insert_inner<F>(&mut self,
start: usize,
text: String,
do_insert: F)
where F: Fn(&mut Rope, Box<Node>) -> NodeAction
{
if text.len() == 0 {
return;
}
@ -119,10 +120,10 @@ impl Rope {
let len = text.len();
let storage = text.into_bytes();
let new_node = box Node::new_leaf(&storage[][0] as *const u8, len, 0);
let new_node = box Node::new_leaf(&storage[..][0] as *const u8, len, 0);
self.storage.push(storage);
match self.root.src_insert(new_node, start, start) {
match do_insert(self, new_node) {
NodeAction::Change(n, adj) => {
assert!(adj as usize == len);
self.root = *n;
@ -147,35 +148,25 @@ impl Rope {
}
pub fn remove(&mut self, start: usize, end: usize) {
assert!(end >= start);
if start == end {
return;
}
let action = self.root.remove(start, end, start);
match action {
NodeAction::None => {}
NodeAction::Remove => {
self.root = Node::empty_inner();
self.len = 0;
}
NodeAction::Adjust(adj) => self.len = (self.len as isize + adj) as usize,
NodeAction::Change(node, adj) => {
self.root = *node;
self.len = (self.len as isize + adj) as usize;
}
}
self.remove_inner(start, end, |this| this.root.remove(start, end, start))
}
pub fn src_remove(&mut self, start: usize, end: usize) {
// TODO refactor with remove
self.remove_inner(start, end, |this| this.root.src_remove(start, end, start))
}
fn remove_inner<F>(&mut self,
start: usize,
end: usize,
do_remove: F)
where F: Fn(&mut Rope) -> NodeAction
{
assert!(end >= start);
if start == end {
return;
}
let action = self.root.src_remove(start, end, start);
match action {
match do_remove(self) {
NodeAction::None => {}
NodeAction::Remove => {
self.root = Node::empty_inner();
@ -190,6 +181,7 @@ impl Rope {
}
// TODO src_replace
// TODO src_replace_str
// This can go horribly wrong if you overwrite a grapheme of different size.
// It is the callers responsibility to ensure that the grapheme at point start
@ -200,7 +192,7 @@ impl Rope {
// I think that is better than duplicating a bunch of code.
// It should be possible to view a &char as a &[u8] somehow, and then
// we can optimise this (FIXME).
self.replace_str(start, &new_char.to_string()[]);
self.replace_str(start, &new_char.to_string()[..]);
}
pub fn replace_str(&mut self, start: usize, new_str: &str) {
@ -332,6 +324,10 @@ impl ::std::str::FromStr for Rope {
impl<'a> fmt::Display for RopeSlice<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
if self.nodes.len() == 0 {
return Ok(());
}
let last_idx = self.nodes.len() - 1;
for (i, n) in self.nodes.iter().enumerate() {
let mut ptr = n.text;
@ -346,7 +342,7 @@ impl<'a> fmt::Display for RopeSlice<'a> {
unsafe {
try!(write!(fmt,
"{}",
::std::str::from_utf8(::std::slice::from_raw_buf(&ptr, len)).unwrap()));
::std::str::from_utf8(::std::slice::from_raw_parts(ptr, len)).unwrap()));
}
}
Ok(())
@ -371,7 +367,7 @@ impl<'a> fmt::Debug for RopeSlice<'a> {
unsafe {
try!(write!(fmt,
"\"{}\"",
::std::str::from_utf8(::std::slice::from_raw_buf(&ptr, len)).unwrap()));
::std::str::from_utf8(::std::slice::from_raw_parts(ptr, len)).unwrap()));
}
}
Ok(())
@ -408,7 +404,7 @@ impl fmt::Display for Node {
unsafe {
write!(fmt,
"{}",
::std::str::from_utf8(::std::slice::from_raw_buf(text, len)).unwrap())
::std::str::from_utf8(::std::slice::from_raw_parts(*text, len)).unwrap())
}
}
}
@ -437,7 +433,7 @@ impl fmt::Debug for Node {
unsafe {
write!(fmt,
"(\"{}\"; {})",
::std::str::from_utf8(::std::slice::from_raw_buf(text, len)).unwrap(),
::std::str::from_utf8(::std::slice::from_raw_parts(*text, len)).unwrap(),
len)
}
}
@ -519,7 +515,7 @@ impl Node {
}
}
// All these methods are just doing dynamic dispatch, TODO use a macro
// Most of these methods are just doing dynamic dispatch, TODO use a macro
// precond: start < end
fn remove(&mut self, start: usize, end: usize, src_start: usize) -> NodeAction {
@ -534,19 +530,9 @@ impl Node {
Node::InnerNode(ref mut i) => i.src_remove(start, end, src_start),
Node::LeafNode(ref mut l) => {
debug!("src_remove: pre-adjust {}-{}; {}", start, end, l.src_offset);
let mut start = start as isize + l.src_offset;
if start < 0 {
start = 0;
}
let mut end = end as isize + l.src_offset;
if end < 0 {
end = 0;
}
// TODO src_start?
let mut src_start = src_start as isize + l.src_offset;
if src_start < 0 {
src_start = 0;
}
let start = minz(start as isize + l.src_offset);
let end = minz(end as isize + l.src_offset);
let src_start = minz(src_start as isize + l.src_offset);
debug!("src_remove: post-adjust {}-{}, {}", start, end, src_start);
if end > start {
l.remove(start as usize, end as usize, src_start as usize)
@ -569,15 +555,8 @@ impl Node {
Node::InnerNode(ref mut i) => i.src_insert(node, start, src_start),
Node::LeafNode(ref mut l) => {
debug!("src_insert: pre-adjust {}, {}; {}", start, src_start, l.src_offset);
let mut start = start as isize + l.src_offset;
if start < 0 {
start = 0;
}
// TODO src_start?
let mut src_start = src_start as isize + l.src_offset;
if src_start < 0 {
src_start = 0;
}
let start = minz(start as isize + l.src_offset);
let src_start = minz(src_start as isize + l.src_offset);
debug!("src_insert: post-adjust {}, {}", start, src_start);
l.insert(node, start as usize, src_start as usize)
}
@ -596,14 +575,8 @@ impl Node {
Node::InnerNode(ref i) => i.find_src_slice(start, end, slice),
Node::LeafNode(ref l) => {
debug!("find_src_slice: pre-adjust {}-{}; {}", start, end, l.src_offset);
let mut start = start as isize + l.src_offset;
if start < 0 {
start = 0;
}
let mut end = end as isize + l.src_offset;
if end < 0 {
end = 0;
}
let start = minz(start as isize + l.src_offset);
let end = minz(end as isize + l.src_offset);
debug!("find_src_slice: post-adjust {}-{}", start, end);
if end > start {
l.find_slice(start as usize, end as usize, slice);
@ -1117,21 +1090,14 @@ impl Lnode {
i -= 1;
}
let loc = if loc < 0 {
0
} else {
loc as usize
};
let loc = minz(loc) as usize;
debug!("Lnode::col_for_src_loc, return Continue({})", loc);
Search::Continue(loc)
}
fn find_last_char(&self, needle: char) -> Option<usize> {
// FIXME due to multi-byte chars, this will give false positives
// I think we must search forwards from the start :-( Perhaps we could
// track unicode vs ascii or something (I don't think there is an efficient
// way to read unicode backwards, I might be wrong).
// std::str::GraphemeIndices can do this!
// FIXME use std::str::GraphemeIndices to do this!
let mut loc = self.len as isize - 1;
while loc >= 0 {
unsafe {
@ -1147,8 +1113,169 @@ impl Lnode {
}
}
//TODO comment etc.
// The state of searching through a rope.
enum Search {
// TODO comment
Continue(usize),
// TODO comment
Done(usize)
}
fn minz<I: SignedInt>(x: I) -> I {
if x.is_negative() {
return I::zero();
}
x
}
#[cfg(test)]
mod test {
use super::*;
// FIXME is this a Rust bug? Why is minz not imported by the glob import?
use super::minz;
#[test]
fn test_new() {
let r = Rope::new();
assert!(r.len() == 0);
assert!(r.to_string() == "");
let r = Rope::from_string("Hello world!".to_string());
assert!(r.len() == 12);
assert!(r.to_string() == "Hello world!");
}
#[test]
fn test_minz() {
let x: i32 = 0;
assert!(super::minz(x) == 0);
let x: i32 = 42;
assert!(minz(x) == 42);
let x: i32 = -42;
assert!(minz(x) == 0);
let x: isize = 0;
assert!(minz(x) == 0);
let x: isize = 42;
assert!(minz(x) == 42);
let x: isize = -42;
assert!(minz(x) == 0);
}
#[test]
fn test_from_string() {
let mut r: Rope = "Hello world!".parse().unwrap();
assert!(r.to_string() == "Hello world!");
}
#[test]
fn test_remove() {
let mut r: Rope = "Hello world!".parse().unwrap();
r.remove(0, 10);
assert!(r.to_string() == "d!");
assert!(r.src_slice(0..5).to_string() == "");
assert!(r.src_slice(10..12).to_string() == "d!");
let mut r: Rope = "Hello world!".parse().unwrap();
r.remove(4, 12);
assert!(r.to_string() == "Hell");
// TODO
//assert!(r.src_slice(0..4).to_string() == "Hell");
//assert!(r.src_slice(10..12).to_string() == "");
let mut r: Rope = "Hello world!".parse().unwrap();
r.remove(4, 10);
assert!(r.to_string() == "Helld!");
// TODO
//assert!(r.src_slice(1..5).to_string() == "ell");
assert!(r.src_slice(9..12).to_string() == "d!");
}
#[test]
fn test_insert_copy() {
let mut r: Rope = "Hello world!".parse().unwrap();
r.insert_copy(0, "foo");
assert!(r.to_string() == "fooHello world!");
assert!(r.slice(2..8).to_string() == "oHello");
let mut r: Rope = "Hello world!".parse().unwrap();
r.insert_copy(12, "foo");
assert!(r.to_string() == "Hello world!foo");
assert!(r.slice(2..8).to_string() == "llo wo");
let mut r: Rope = "Hello world!".parse().unwrap();
r.insert_copy(5, "foo");
assert!(r.to_string() == "Hellofoo world!");
assert!(r.slice(2..8).to_string() == "llofoo");
}
#[test]
fn test_push_copy() {
let mut r: Rope = "Hello world!".parse().unwrap();
r.push_copy("foo");
assert!(r.to_string() == "Hello world!foo");
assert!(r.slice(2..8).to_string() == "llo wo");
}
#[test]
fn test_insert_replace() {
let mut r: Rope = "hello worl\u{00bb0}!".parse().unwrap();
r.insert_copy(5, "bb");
assert!(r.to_string() == "hellobb worlர!");
r.replace(0, 'H');
r.replace(15, '~');
r.replace_str(5, "fo\u{00cb0}");
assert!(r.to_string() == "Hellofoರrlர~");
assert!(r.slice(0..10).to_string() == "Hellofoರ");
assert!(r.slice(5..10).to_string() == "foರ");
assert!(r.slice(10..15).to_string() == "rlர");
let expected = "Hellofoರrlர~";
let mut byte_pos = 0;
for ((c, b), e) in r.chars().zip(expected.chars()) {
assert!(c == e);
assert!(b == byte_pos);
byte_pos += e.len_utf8();
}
}
#[test]
fn test_src_insert_remove_col_for_src_loc() {
let mut r: Rope = "hello\n world!".parse().unwrap();
r.src_insert(4, "foo".to_string());
r.src_insert(5, "bar".to_string());
assert!(r.to_string() == "hellfooobar\n world!");
r.src_remove(2, 4);
r.src_remove(10, 12);
assert!(r.to_string() == "hefooobar\n wor!");
let expected = "hefooobar\n wor!";
let mut byte_pos = 0;
for ((c, b), e) in r.chars().zip(expected.chars()) {
assert!(c == e);
assert!(b == byte_pos);
byte_pos += e.len_utf8();
}
let expected = [0, 1, 2, 2, 5, 9, 0, 1, 2, 3, 4, 4, 4];
for i in 0..13 {
assert!(r.col_for_src_loc(i) == expected[i]);
}
}
#[test]
fn test_src_insert() {
let mut r: Rope = "Hello world!".parse().unwrap();
r.src_insert(4, "foo".to_string());
r.src_insert(0, "foo".to_string());
r.src_insert(12, "foo".to_string());
assert!(r.to_string() == "fooHellfooo world!foo");
r.src_insert(4, "bar".to_string());
r.src_insert(5, "bar".to_string());
r.src_insert(3, "bar".to_string());
r.src_insert(0, "bar".to_string());
r.src_insert(12, "bar".to_string());
assert!(r.to_string() == "barfooHelbarlbarfooobar world!barfoo");
}
}