Detect when comments disappear

When the reformatted code doesn't contain the same quantity of comments as the original code, use the original code instead of the reformatted code. This is done for all expressions and `let` statements. This should be used at the finest grained level possible, to avoid that a small disappearing comment prevents a big chunk of code to be reformatted. Kind of fixes (avoid disappearing comments, but prevents a good formatting is such case) #285 #225 #563 #743
2016-01-10 15:12:15 +01:00 · 2016-01-10 15:12:15 +01:00 · 9f98f725cb
commit 9f98f725cb
parent 66abad9445
5 changed files with 277 additions and 43 deletions
--- a/src/comment.rs
+++ b/src/comment.rs
@ -8,13 +8,17 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-// Format comments.
+// Formatting and tools for comments.

-use std::iter;
+use std::{self, iter};
+
+use syntax::codemap::Span;

 use Indent;
 use config::Config;
+use rewrite::RewriteContext;
 use string::{StringFormat, rewrite_string};
+use utils::wrap_str;

 pub fn rewrite_comment(orig: &str,
                       block_style: bool,
@ -150,7 +154,7 @@ impl FindUncommented for str {
                }
                Some(c) => {
                    match kind {
-                        CodeCharKind::Normal if b == c => {}
+                        FullCodeCharKind::Normal if b == c => {}
                        _ => {
                            needle_iter = pat.chars();
                        }
@ -174,7 +178,7 @@ impl FindUncommented for str {
 pub fn find_comment_end(s: &str) -> Option<usize> {
    let mut iter = CharClasses::new(s.char_indices());
    for (kind, (i, _c)) in &mut iter {
-        if kind == CodeCharKind::Normal {
+        if kind == FullCodeCharKind::Normal {
            return Some(i);
        }
    }
@ -189,7 +193,7 @@ pub fn find_comment_end(s: &str) -> Option<usize> {

 /// Returns true if text contains any comment.
 pub fn contains_comment(text: &str) -> bool {
-    CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
+    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 }

 struct CharClasses<T>
@ -240,6 +244,33 @@ pub enum CodeCharKind {
    Comment,
 }

+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+enum FullCodeCharKind {
+    Normal,
+    StartComment,
+    InComment,
+    EndComment,
+}
+
+impl FullCodeCharKind {
+    fn is_comment(&self) -> bool {
+        match *self {
+            FullCodeCharKind::Normal => false,
+            FullCodeCharKind::StartComment |
+            FullCodeCharKind::InComment |
+            FullCodeCharKind::EndComment => true,
+        }
+    }
+
+    fn to_codecharkind(&self) -> CodeCharKind {
+        if self.is_comment() {
+            CodeCharKind::Comment
+        } else {
+            CodeCharKind::Normal
+        }
+    }
+}
+
 impl<T> CharClasses<T>
    where T: Iterator,
          T::Item: RichChar
@ -256,9 +287,9 @@ impl<T> Iterator for CharClasses<T>
    where T: Iterator,
          T::Item: RichChar
 {
-    type Item = (CodeCharKind, T::Item);
+    type Item = (FullCodeCharKind, T::Item);

-    fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
+    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
        let item = try_opt!(self.base.next());
        let chr = item.get_char();
        self.status = match self.status {
@ -286,11 +317,11 @@ impl<T> Iterator for CharClasses<T>
                        match self.base.peek() {
                            Some(next) if next.get_char() == '*' => {
                                self.status = CharClassesStatus::BlockCommentOpening(1);
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                            }
                            Some(next) if next.get_char() == '/' => {
                                self.status = CharClassesStatus::LineComment;
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                            }
                            _ => CharClassesStatus::Normal,
                        }
@ -299,12 +330,7 @@ impl<T> Iterator for CharClasses<T>
                }
            }
            CharClassesStatus::BlockComment(deepness) => {
-                if deepness == 0 {
-                    // This is the closing '/'
-                    assert_eq!(chr, '/');
-                    self.status = CharClassesStatus::Normal;
-                    return Some((CodeCharKind::Comment, item));
-                }
+                assert!(deepness != 0);
                self.status = match self.base.peek() {
                    Some(next) if next.get_char() == '/' && chr == '*' => {
                        CharClassesStatus::BlockCommentClosing(deepness - 1)
@ -314,34 +340,92 @@ impl<T> Iterator for CharClasses<T>
                    }
                    _ => CharClassesStatus::BlockComment(deepness),
                };
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
            }
            CharClassesStatus::BlockCommentOpening(deepness) => {
                assert_eq!(chr, '*');
                self.status = CharClassesStatus::BlockComment(deepness);
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
            }
            CharClassesStatus::BlockCommentClosing(deepness) => {
                assert_eq!(chr, '/');
-                self.status = if deepness == 0 {
-                    CharClassesStatus::Normal
+                if deepness == 0 {
+                    self.status = CharClassesStatus::Normal;
+                    return Some((FullCodeCharKind::EndComment, item));
                } else {
-                    CharClassesStatus::BlockComment(deepness)
-                };
-                return Some((CodeCharKind::Comment, item));
+                    self.status = CharClassesStatus::BlockComment(deepness);
+                    return Some((FullCodeCharKind::InComment, item));
+                }
            }
            CharClassesStatus::LineComment => {
-                self.status = match chr {
-                    '\n' => CharClassesStatus::Normal,
-                    _ => CharClassesStatus::LineComment,
-                };
-                return Some((CodeCharKind::Comment, item));
+                match chr {
+                    '\n' => {
+                        self.status = CharClassesStatus::Normal;
+                        return Some((FullCodeCharKind::EndComment, item));
+                    }
+                    _ => {
+                        self.status = CharClassesStatus::LineComment;
+                        return Some((FullCodeCharKind::InComment, item));
+                    }
+                }
            }
        };
-        Some((CodeCharKind::Normal, item))
+        Some((FullCodeCharKind::Normal, item))
    }
 }

+/// Iterator over functional and commented parts of a string. Any part of a string is either
+/// functional code, either *one* block comment, either *one* line comment. Whitespace between
+/// comments is functional code. Line comments contain their ending newlines.
+struct UngroupedCommentCodeSlices<'a> {
+    slice: &'a str,
+    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
+}
+
+impl<'a> UngroupedCommentCodeSlices<'a> {
+    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
+        UngroupedCommentCodeSlices {
+            slice: code,
+            iter: CharClasses::new(code.char_indices()).peekable(),
+        }
+    }
+}
+
+impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
+    type Item = (CodeCharKind, usize, &'a str);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let (kind, (start_idx, _)) = try_opt!(self.iter.next());
+        match kind {
+            FullCodeCharKind::Normal => {
+                // Consume all the Normal code
+                while let Some(&(FullCodeCharKind::Normal, (_, _))) = self.iter.peek() {
+                    let _ = self.iter.next();
+                }
+            }
+            FullCodeCharKind::StartComment => {
+                // Consume the whole comment
+                while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
+            }
+            _ => panic!(),
+        }
+        let slice = match self.iter.peek() {
+            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
+            None => &self.slice[start_idx..],
+        };
+        Some((if kind.is_comment() {
+            CodeCharKind::Comment
+        } else {
+            CodeCharKind::Normal
+        },
+              start_idx,
+              slice))
+    }
+}
+
+
+
+
 /// Iterator over an alternating sequence of functional and commented parts of
 /// a string. The first item is always a, possibly zero length, subslice of
 /// functional text. Line style comments contain their ending newlines.
@ -383,7 +467,7 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
                first_whitespace = Some(i);
            }

-            if kind == self.last_slice_kind && !is_comment_connector {
+            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
                let last_index = match first_whitespace {
                    Some(j) => j,
                    None => i,
@ -419,20 +503,124 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
    }
 }

+/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
+/// (if it fits in the width/offset, else return None), else return `new`
+pub fn recover_comment_removed(new: String,
+                               span: Span,
+                               context: &RewriteContext,
+                               width: usize,
+                               offset: Indent)
+                               -> Option<String> {
+    let snippet = context.snippet(span);
+    if changed_comment_content(&snippet, &new) {
+        // We missed some comments
+        // Keep previous formatting if it satisfies the constrains
+        return wrap_str(snippet, context.config.max_width, width, offset);
+    } else {
+        Some(new)
+    }
+}
+
+/// Return true if the two strings of code have the same payload of comments.
+/// The payload of comments is everything in the string except:
+/// 	- actual code (not comments)
+/// 	- comment start/end marks
+///		- whitespace
+///		- '*' at the beginning of lines in block comments
+fn changed_comment_content(orig: &str, new: &str) -> bool {
+    // Cannot write this as a fn since we cannot return types containing closures
+    let code_comment_content = |code| {
+        let slices = UngroupedCommentCodeSlices::new(code);
+        slices.filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
+              .flat_map(|(_, _, s)| CommentReducer::new(s))
+    };
+    let res = code_comment_content(orig).ne(code_comment_content(new));
+    debug!("comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
+           res,
+           orig,
+           new,
+           code_comment_content(orig).collect::<String>(),
+           code_comment_content(new).collect::<String>());
+    res
+}
+
+
+/// Iterator over the 'payload' characters of a comment.
+/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
+/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
+/// for example).
+struct CommentReducer<'a> {
+    is_block: bool,
+    at_start_line: bool,
+    iter: std::str::Chars<'a>,
+}
+
+impl<'a> CommentReducer<'a> {
+    fn new(comment: &'a str) -> CommentReducer<'a> {
+        let is_block = comment.starts_with("/*");
+        let comment = remove_comment_header(comment);
+        CommentReducer {
+            is_block: is_block,
+            at_start_line: false, // There are no supplementary '*' on the first line
+            iter: comment.chars(),
+        }
+    }
+}
+
+impl<'a> Iterator for CommentReducer<'a> {
+    type Item = char;
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            let mut c = try_opt!(self.iter.next());
+            if self.is_block && self.at_start_line {
+                while c.is_whitespace() {
+                    c = try_opt!(self.iter.next());
+                }
+                // Ignore leading '*'
+                if c == '*' {
+                    c = try_opt!(self.iter.next());
+                }
+            } else {
+                if c == '\n' {
+                    self.at_start_line = true;
+                }
+            }
+            if !c.is_whitespace() {
+                return Some(c);
+            }
+        }
+    }
+}
+
+
+fn remove_comment_header(comment: &str) -> &str {
+    if comment.starts_with("///") || comment.starts_with("//!") {
+        &comment[3..]
+    } else if comment.starts_with("//") {
+        &comment[2..]
+    } else if comment.starts_with("/**") || comment.starts_with("/*!") {
+        &comment[3..comment.len() - 2]
+    } else {
+        assert!(comment.starts_with("/*"),
+                format!("string '{}' is not a comment", comment));
+        &comment[2..comment.len() - 2]
+    }
+}
+
 #[cfg(test)]
 mod test {
-    use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
-                CommentCodeSlices};
+    use super::{CharClasses, CodeCharKind, FullCodeCharKind, contains_comment, rewrite_comment,
+                FindUncommented, CommentCodeSlices};
    use Indent;

    #[test]
    fn char_classes() {
        let mut iter = CharClasses::new("//\n\n".chars());

-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
        assert_eq!(None, iter.next());
    }

@ -507,8 +695,8 @@ mod test {
        CharClasses::new(text.chars())
            .filter_map(|(s, c)| {
                match s {
-                    CodeCharKind::Normal => Some(c),
-                    CodeCharKind::Comment => None,
+                    FullCodeCharKind::Normal => Some(c),
+                    _ => None,
                }
            })
            .collect()
--- a/src/expr.rs
+++ b/src/expr.rs
@ -23,7 +23,7 @@ use utils::{span_after, extra_offset, last_line_width, wrap_str, binary_search,
            semicolon_for_stmt};
 use visitor::FmtVisitor;
 use config::{Config, StructLitStyle, MultilineStyle};
-use comment::{FindUncommented, rewrite_comment, contains_comment};
+use comment::{FindUncommented, rewrite_comment, contains_comment, recover_comment_removed};
 use types::rewrite_path;
 use items::{span_lo_for_arg, span_hi_for_arg};
 use chains::rewrite_chain;
@ -35,7 +35,7 @@ use syntax::visit::Visitor;

 impl Rewrite for ast::Expr {
    fn rewrite(&self, context: &RewriteContext, width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
            ast::Expr_::ExprVec(ref expr_vec) => {
                rewrite_array(expr_vec.iter().map(|e| &**e),
                              mk_sp(span_after(self.span, "[", context.codemap), self.span.hi),
@ -207,7 +207,8 @@ impl Rewrite for ast::Expr {
                         width,
                         offset)
            }
-        }
+        };
+        result.and_then(|res| recover_comment_removed(res, self.span, context, width, offset))
    }
 }

@ -478,7 +479,7 @@ impl Rewrite for ast::Block {

 impl Rewrite for ast::Stmt {
    fn rewrite(&self, context: &RewriteContext, _width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
            ast::Stmt_::StmtDecl(ref decl, _) => {
                if let ast::Decl_::DeclLocal(ref local) = decl.node {
                    local.rewrite(context, context.config.max_width, offset)
@ -499,7 +500,8 @@ impl Rewrite for ast::Stmt {
                  .map(|s| s + suffix)
            }
            ast::Stmt_::StmtMac(..) => None,
-        }
+        };
+        result.and_then(|res| recover_comment_removed(res, self.span, context, _width, offset))
    }
 }

--- a/tests/source/paths.rs
+++ b/tests/source/paths.rs
@ -17,7 +17,8 @@ fn main() {

    < *mut JSObject >:: relocate(entry);

-    let x: Foo/*::*/<A   >;
+    let x: Foo<A   >;
+    let x: Foo/*::*/<A>;
 }

 fn op(foo: Bar, key : &[u8], upd : Fn(Option<&memcache::Item> , Baz  ) -> Result) -> MapResult {}
--- a/tests/target/comment-not-disappear.rs
+++ b/tests/target/comment-not-disappear.rs
@ -0,0 +1,42 @@
+// All the comments here should not disappear.
+
+fn a() {
+    match x {
+        X |
+        // A comment
+        Y => {}
+    };
+}
+
+fn b() {
+    match x {
+        X =>
+            // A comment
+            y
+    }
+}
+
+fn c() {
+    a() /* ... */;
+}
+
+fn foo() -> Vec<i32> {
+    (0..11)
+        .map(|x|
+        // This comment disappears.
+        if x % 2 == 0 { x } else { x * 2 })
+        .collect()
+}
+
+fn d() {
+    if true /* and ... */ {
+        a();
+    }
+}
+
+fn calc_page_len(prefix_len: usize, sofar: usize) -> usize {
+    2 // page type and flags
+    + 1 // stored depth
+    + 2 // stored count
+    + prefix_len + sofar // sum of size of all the actual items
+}
--- a/tests/target/paths.rs
+++ b/tests/target/paths.rs
@ -17,6 +17,7 @@ fn main() {
    <*mut JSObject>::relocate(entry);

    let x: Foo<A>;
+    let x: Foo/*::*/<A>;
 }

 fn op(foo: Bar, key: &[u8], upd: Fn(Option<&memcache::Item>, Baz) -> Result) -> MapResult {}