Detect when comments disappear

When the reformatted code doesn't contain the same quantity of comments
as the original code, use the original code instead of the reformatted
code.
This is done for all expressions and `let` statements.

This should be used at the finest grained level possible, to avoid that
a small disappearing comment prevents a big chunk of code to be
reformatted.

Kind of fixes (avoid disappearing comments, but prevents a good
formatting is such case) #285 #225 #563 #743
This commit is contained in:
Gaëtan Cassiers 2016-01-10 15:12:15 +01:00
parent 66abad9445
commit 9f98f725cb
5 changed files with 277 additions and 43 deletions

View File

@ -8,13 +8,17 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Format comments.
// Formatting and tools for comments.
use std::iter;
use std::{self, iter};
use syntax::codemap::Span;
use Indent;
use config::Config;
use rewrite::RewriteContext;
use string::{StringFormat, rewrite_string};
use utils::wrap_str;
pub fn rewrite_comment(orig: &str,
block_style: bool,
@ -150,7 +154,7 @@ impl FindUncommented for str {
}
Some(c) => {
match kind {
CodeCharKind::Normal if b == c => {}
FullCodeCharKind::Normal if b == c => {}
_ => {
needle_iter = pat.chars();
}
@ -174,7 +178,7 @@ impl FindUncommented for str {
pub fn find_comment_end(s: &str) -> Option<usize> {
let mut iter = CharClasses::new(s.char_indices());
for (kind, (i, _c)) in &mut iter {
if kind == CodeCharKind::Normal {
if kind == FullCodeCharKind::Normal {
return Some(i);
}
}
@ -189,7 +193,7 @@ pub fn find_comment_end(s: &str) -> Option<usize> {
/// Returns true if text contains any comment.
pub fn contains_comment(text: &str) -> bool {
CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
}
struct CharClasses<T>
@ -240,6 +244,33 @@ pub enum CodeCharKind {
Comment,
}
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
enum FullCodeCharKind {
Normal,
StartComment,
InComment,
EndComment,
}
impl FullCodeCharKind {
fn is_comment(&self) -> bool {
match *self {
FullCodeCharKind::Normal => false,
FullCodeCharKind::StartComment |
FullCodeCharKind::InComment |
FullCodeCharKind::EndComment => true,
}
}
fn to_codecharkind(&self) -> CodeCharKind {
if self.is_comment() {
CodeCharKind::Comment
} else {
CodeCharKind::Normal
}
}
}
impl<T> CharClasses<T>
where T: Iterator,
T::Item: RichChar
@ -256,9 +287,9 @@ impl<T> Iterator for CharClasses<T>
where T: Iterator,
T::Item: RichChar
{
type Item = (CodeCharKind, T::Item);
type Item = (FullCodeCharKind, T::Item);
fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
let item = try_opt!(self.base.next());
let chr = item.get_char();
self.status = match self.status {
@ -286,11 +317,11 @@ impl<T> Iterator for CharClasses<T>
match self.base.peek() {
Some(next) if next.get_char() == '*' => {
self.status = CharClassesStatus::BlockCommentOpening(1);
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::StartComment, item));
}
Some(next) if next.get_char() == '/' => {
self.status = CharClassesStatus::LineComment;
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::StartComment, item));
}
_ => CharClassesStatus::Normal,
}
@ -299,12 +330,7 @@ impl<T> Iterator for CharClasses<T>
}
}
CharClassesStatus::BlockComment(deepness) => {
if deepness == 0 {
// This is the closing '/'
assert_eq!(chr, '/');
self.status = CharClassesStatus::Normal;
return Some((CodeCharKind::Comment, item));
}
assert!(deepness != 0);
self.status = match self.base.peek() {
Some(next) if next.get_char() == '/' && chr == '*' => {
CharClassesStatus::BlockCommentClosing(deepness - 1)
@ -314,34 +340,92 @@ impl<T> Iterator for CharClasses<T>
}
_ => CharClassesStatus::BlockComment(deepness),
};
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::InComment, item));
}
CharClassesStatus::BlockCommentOpening(deepness) => {
assert_eq!(chr, '*');
self.status = CharClassesStatus::BlockComment(deepness);
return Some((CodeCharKind::Comment, item));
return Some((FullCodeCharKind::InComment, item));
}
CharClassesStatus::BlockCommentClosing(deepness) => {
assert_eq!(chr, '/');
self.status = if deepness == 0 {
CharClassesStatus::Normal
if deepness == 0 {
self.status = CharClassesStatus::Normal;
return Some((FullCodeCharKind::EndComment, item));
} else {
CharClassesStatus::BlockComment(deepness)
};
return Some((CodeCharKind::Comment, item));
self.status = CharClassesStatus::BlockComment(deepness);
return Some((FullCodeCharKind::InComment, item));
}
}
CharClassesStatus::LineComment => {
self.status = match chr {
'\n' => CharClassesStatus::Normal,
_ => CharClassesStatus::LineComment,
};
return Some((CodeCharKind::Comment, item));
match chr {
'\n' => {
self.status = CharClassesStatus::Normal;
return Some((FullCodeCharKind::EndComment, item));
}
_ => {
self.status = CharClassesStatus::LineComment;
return Some((FullCodeCharKind::InComment, item));
}
}
}
};
Some((CodeCharKind::Normal, item))
Some((FullCodeCharKind::Normal, item))
}
}
/// Iterator over functional and commented parts of a string. Any part of a string is either
/// functional code, either *one* block comment, either *one* line comment. Whitespace between
/// comments is functional code. Line comments contain their ending newlines.
struct UngroupedCommentCodeSlices<'a> {
slice: &'a str,
iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
}
impl<'a> UngroupedCommentCodeSlices<'a> {
fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
UngroupedCommentCodeSlices {
slice: code,
iter: CharClasses::new(code.char_indices()).peekable(),
}
}
}
impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
type Item = (CodeCharKind, usize, &'a str);
fn next(&mut self) -> Option<Self::Item> {
let (kind, (start_idx, _)) = try_opt!(self.iter.next());
match kind {
FullCodeCharKind::Normal => {
// Consume all the Normal code
while let Some(&(FullCodeCharKind::Normal, (_, _))) = self.iter.peek() {
let _ = self.iter.next();
}
}
FullCodeCharKind::StartComment => {
// Consume the whole comment
while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
}
_ => panic!(),
}
let slice = match self.iter.peek() {
Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
None => &self.slice[start_idx..],
};
Some((if kind.is_comment() {
CodeCharKind::Comment
} else {
CodeCharKind::Normal
},
start_idx,
slice))
}
}
/// Iterator over an alternating sequence of functional and commented parts of
/// a string. The first item is always a, possibly zero length, subslice of
/// functional text. Line style comments contain their ending newlines.
@ -383,7 +467,7 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
first_whitespace = Some(i);
}
if kind == self.last_slice_kind && !is_comment_connector {
if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
let last_index = match first_whitespace {
Some(j) => j,
None => i,
@ -419,20 +503,124 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
}
}
/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
/// (if it fits in the width/offset, else return None), else return `new`
pub fn recover_comment_removed(new: String,
span: Span,
context: &RewriteContext,
width: usize,
offset: Indent)
-> Option<String> {
let snippet = context.snippet(span);
if changed_comment_content(&snippet, &new) {
// We missed some comments
// Keep previous formatting if it satisfies the constrains
return wrap_str(snippet, context.config.max_width, width, offset);
} else {
Some(new)
}
}
/// Return true if the two strings of code have the same payload of comments.
/// The payload of comments is everything in the string except:
/// - actual code (not comments)
/// - comment start/end marks
/// - whitespace
/// - '*' at the beginning of lines in block comments
fn changed_comment_content(orig: &str, new: &str) -> bool {
// Cannot write this as a fn since we cannot return types containing closures
let code_comment_content = |code| {
let slices = UngroupedCommentCodeSlices::new(code);
slices.filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
.flat_map(|(_, _, s)| CommentReducer::new(s))
};
let res = code_comment_content(orig).ne(code_comment_content(new));
debug!("comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
res,
orig,
new,
code_comment_content(orig).collect::<String>(),
code_comment_content(new).collect::<String>());
res
}
/// Iterator over the 'payload' characters of a comment.
/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
/// for example).
struct CommentReducer<'a> {
is_block: bool,
at_start_line: bool,
iter: std::str::Chars<'a>,
}
impl<'a> CommentReducer<'a> {
fn new(comment: &'a str) -> CommentReducer<'a> {
let is_block = comment.starts_with("/*");
let comment = remove_comment_header(comment);
CommentReducer {
is_block: is_block,
at_start_line: false, // There are no supplementary '*' on the first line
iter: comment.chars(),
}
}
}
impl<'a> Iterator for CommentReducer<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
loop {
let mut c = try_opt!(self.iter.next());
if self.is_block && self.at_start_line {
while c.is_whitespace() {
c = try_opt!(self.iter.next());
}
// Ignore leading '*'
if c == '*' {
c = try_opt!(self.iter.next());
}
} else {
if c == '\n' {
self.at_start_line = true;
}
}
if !c.is_whitespace() {
return Some(c);
}
}
}
}
fn remove_comment_header(comment: &str) -> &str {
if comment.starts_with("///") || comment.starts_with("//!") {
&comment[3..]
} else if comment.starts_with("//") {
&comment[2..]
} else if comment.starts_with("/**") || comment.starts_with("/*!") {
&comment[3..comment.len() - 2]
} else {
assert!(comment.starts_with("/*"),
format!("string '{}' is not a comment", comment));
&comment[2..comment.len() - 2]
}
}
#[cfg(test)]
mod test {
use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
CommentCodeSlices};
use super::{CharClasses, CodeCharKind, FullCodeCharKind, contains_comment, rewrite_comment,
FindUncommented, CommentCodeSlices};
use Indent;
#[test]
fn char_classes() {
let mut iter = CharClasses::new("//\n\n".chars());
assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
assert_eq!(None, iter.next());
}
@ -507,8 +695,8 @@ mod test {
CharClasses::new(text.chars())
.filter_map(|(s, c)| {
match s {
CodeCharKind::Normal => Some(c),
CodeCharKind::Comment => None,
FullCodeCharKind::Normal => Some(c),
_ => None,
}
})
.collect()

View File

@ -23,7 +23,7 @@ use utils::{span_after, extra_offset, last_line_width, wrap_str, binary_search,
semicolon_for_stmt};
use visitor::FmtVisitor;
use config::{Config, StructLitStyle, MultilineStyle};
use comment::{FindUncommented, rewrite_comment, contains_comment};
use comment::{FindUncommented, rewrite_comment, contains_comment, recover_comment_removed};
use types::rewrite_path;
use items::{span_lo_for_arg, span_hi_for_arg};
use chains::rewrite_chain;
@ -35,7 +35,7 @@ use syntax::visit::Visitor;
impl Rewrite for ast::Expr {
fn rewrite(&self, context: &RewriteContext, width: usize, offset: Indent) -> Option<String> {
match self.node {
let result = match self.node {
ast::Expr_::ExprVec(ref expr_vec) => {
rewrite_array(expr_vec.iter().map(|e| &**e),
mk_sp(span_after(self.span, "[", context.codemap), self.span.hi),
@ -207,7 +207,8 @@ impl Rewrite for ast::Expr {
width,
offset)
}
}
};
result.and_then(|res| recover_comment_removed(res, self.span, context, width, offset))
}
}
@ -478,7 +479,7 @@ impl Rewrite for ast::Block {
impl Rewrite for ast::Stmt {
fn rewrite(&self, context: &RewriteContext, _width: usize, offset: Indent) -> Option<String> {
match self.node {
let result = match self.node {
ast::Stmt_::StmtDecl(ref decl, _) => {
if let ast::Decl_::DeclLocal(ref local) = decl.node {
local.rewrite(context, context.config.max_width, offset)
@ -499,7 +500,8 @@ impl Rewrite for ast::Stmt {
.map(|s| s + suffix)
}
ast::Stmt_::StmtMac(..) => None,
}
};
result.and_then(|res| recover_comment_removed(res, self.span, context, _width, offset))
}
}

View File

@ -17,7 +17,8 @@ fn main() {
< *mut JSObject >:: relocate(entry);
let x: Foo/*::*/<A >;
let x: Foo<A >;
let x: Foo/*::*/<A>;
}
fn op(foo: Bar, key : &[u8], upd : Fn(Option<&memcache::Item> , Baz ) -> Result) -> MapResult {}

View File

@ -0,0 +1,42 @@
// All the comments here should not disappear.
fn a() {
match x {
X |
// A comment
Y => {}
};
}
fn b() {
match x {
X =>
// A comment
y
}
}
fn c() {
a() /* ... */;
}
fn foo() -> Vec<i32> {
(0..11)
.map(|x|
// This comment disappears.
if x % 2 == 0 { x } else { x * 2 })
.collect()
}
fn d() {
if true /* and ... */ {
a();
}
}
fn calc_page_len(prefix_len: usize, sofar: usize) -> usize {
2 // page type and flags
+ 1 // stored depth
+ 2 // stored count
+ prefix_len + sofar // sum of size of all the actual items
}

View File

@ -17,6 +17,7 @@ fn main() {
<*mut JSObject>::relocate(entry);
let x: Foo<A>;
let x: Foo/*::*/<A>;
}
fn op(foo: Bar, key: &[u8], upd: Fn(Option<&memcache::Item>, Baz) -> Result) -> MapResult {}