Merge pull request #756 from cassiersg/missed-comments

Detect (and recover) when comments disappear
2016-01-11 01:16:52 +01:00 · 2016-01-11 01:16:52 +01:00 · b236819f72
commit b236819f72
parent 0badc128f7 b117d7b2b8
5 changed files with 285 additions and 43 deletions
--- a/src/comment.rs
+++ b/src/comment.rs
@ -8,13 +8,17 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-// Format comments.
+// Formatting and tools for comments.

-use std::iter;
+use std::{self, iter};
+
+use syntax::codemap::Span;

 use Indent;
 use config::Config;
+use rewrite::RewriteContext;
 use string::{StringFormat, rewrite_string};
+use utils::wrap_str;

 pub fn rewrite_comment(orig: &str,
                       block_style: bool,
@ -150,7 +154,7 @@ impl FindUncommented for str {
                }
                Some(c) => {
                    match kind {
-                        CodeCharKind::Normal if b == c => {}
+                        FullCodeCharKind::Normal if b == c => {}
                        _ => {
                            needle_iter = pat.chars();
                        }
@ -174,7 +178,7 @@ impl FindUncommented for str {
 pub fn find_comment_end(s: &str) -> Option<usize> {
    let mut iter = CharClasses::new(s.char_indices());
    for (kind, (i, _c)) in &mut iter {
-        if kind == CodeCharKind::Normal {
+        if kind == FullCodeCharKind::Normal {
            return Some(i);
        }
    }
@ -189,7 +193,7 @@ pub fn find_comment_end(s: &str) -> Option<usize> {

 /// Returns true if text contains any comment.
 pub fn contains_comment(text: &str) -> bool {
-    CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
+    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 }

 struct CharClasses<T>
@ -234,12 +238,47 @@ enum CharClassesStatus {
    LineComment,
 }

+/// Distinguish between functionnal part of code and comments
 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 pub enum CodeCharKind {
    Normal,
    Comment,
 }

+/// Distinguish between functionnal part of code and comments,
+/// describing opening and closing of comments for ease when chunking
+/// code from tagged characters
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+enum FullCodeCharKind {
+    Normal,
+    /// The first character of a comment, there is only one for a comment (always '/')
+    StartComment,
+    /// Any character inside a comment including the second character of comment
+    /// marks ("//", "/*")
+    InComment,
+    /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
+    EndComment,
+}
+
+impl FullCodeCharKind {
+    fn is_comment(&self) -> bool {
+        match *self {
+            FullCodeCharKind::Normal => false,
+            FullCodeCharKind::StartComment |
+            FullCodeCharKind::InComment |
+            FullCodeCharKind::EndComment => true,
+        }
+    }
+
+    fn to_codecharkind(&self) -> CodeCharKind {
+        if self.is_comment() {
+            CodeCharKind::Comment
+        } else {
+            CodeCharKind::Normal
+        }
+    }
+}
+
 impl<T> CharClasses<T>
    where T: Iterator,
          T::Item: RichChar
@ -256,9 +295,9 @@ impl<T> Iterator for CharClasses<T>
    where T: Iterator,
          T::Item: RichChar
 {
-    type Item = (CodeCharKind, T::Item);
+    type Item = (FullCodeCharKind, T::Item);

-    fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
+    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
        let item = try_opt!(self.base.next());
        let chr = item.get_char();
        self.status = match self.status {
@ -286,11 +325,11 @@ impl<T> Iterator for CharClasses<T>
                        match self.base.peek() {
                            Some(next) if next.get_char() == '*' => {
                                self.status = CharClassesStatus::BlockCommentOpening(1);
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                            }
                            Some(next) if next.get_char() == '/' => {
                                self.status = CharClassesStatus::LineComment;
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                            }
                            _ => CharClassesStatus::Normal,
                        }
@ -299,12 +338,7 @@ impl<T> Iterator for CharClasses<T>
                }
            }
            CharClassesStatus::BlockComment(deepness) => {
-                if deepness == 0 {
-                    // This is the closing '/'
-                    assert_eq!(chr, '/');
-                    self.status = CharClassesStatus::Normal;
-                    return Some((CodeCharKind::Comment, item));
-                }
+                assert!(deepness != 0);
                self.status = match self.base.peek() {
                    Some(next) if next.get_char() == '/' && chr == '*' => {
                        CharClassesStatus::BlockCommentClosing(deepness - 1)
@ -314,34 +348,92 @@ impl<T> Iterator for CharClasses<T>
                    }
                    _ => CharClassesStatus::BlockComment(deepness),
                };
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
            }
            CharClassesStatus::BlockCommentOpening(deepness) => {
                assert_eq!(chr, '*');
                self.status = CharClassesStatus::BlockComment(deepness);
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
            }
            CharClassesStatus::BlockCommentClosing(deepness) => {
                assert_eq!(chr, '/');
-                self.status = if deepness == 0 {
-                    CharClassesStatus::Normal
+                if deepness == 0 {
+                    self.status = CharClassesStatus::Normal;
+                    return Some((FullCodeCharKind::EndComment, item));
                } else {
-                    CharClassesStatus::BlockComment(deepness)
-                };
-                return Some((CodeCharKind::Comment, item));
+                    self.status = CharClassesStatus::BlockComment(deepness);
+                    return Some((FullCodeCharKind::InComment, item));
+                }
            }
            CharClassesStatus::LineComment => {
-                self.status = match chr {
-                    '\n' => CharClassesStatus::Normal,
-                    _ => CharClassesStatus::LineComment,
-                };
-                return Some((CodeCharKind::Comment, item));
+                match chr {
+                    '\n' => {
+                        self.status = CharClassesStatus::Normal;
+                        return Some((FullCodeCharKind::EndComment, item));
+                    }
+                    _ => {
+                        self.status = CharClassesStatus::LineComment;
+                        return Some((FullCodeCharKind::InComment, item));
+                    }
+                }
            }
        };
-        Some((CodeCharKind::Normal, item))
+        Some((FullCodeCharKind::Normal, item))
    }
 }

+/// Iterator over functional and commented parts of a string. Any part of a string is either
+/// functional code, either *one* block comment, either *one* line comment. Whitespace between
+/// comments is functional code. Line comments contain their ending newlines.
+struct UngroupedCommentCodeSlices<'a> {
+    slice: &'a str,
+    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
+}
+
+impl<'a> UngroupedCommentCodeSlices<'a> {
+    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
+        UngroupedCommentCodeSlices {
+            slice: code,
+            iter: CharClasses::new(code.char_indices()).peekable(),
+        }
+    }
+}
+
+impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
+    type Item = (CodeCharKind, usize, &'a str);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let (kind, (start_idx, _)) = try_opt!(self.iter.next());
+        match kind {
+            FullCodeCharKind::Normal => {
+                // Consume all the Normal code
+                while let Some(&(FullCodeCharKind::Normal, (_, _))) = self.iter.peek() {
+                    let _ = self.iter.next();
+                }
+            }
+            FullCodeCharKind::StartComment => {
+                // Consume the whole comment
+                while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
+            }
+            _ => panic!(),
+        }
+        let slice = match self.iter.peek() {
+            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
+            None => &self.slice[start_idx..],
+        };
+        Some((if kind.is_comment() {
+            CodeCharKind::Comment
+        } else {
+            CodeCharKind::Normal
+        },
+              start_idx,
+              slice))
+    }
+}
+
+
+
+
 /// Iterator over an alternating sequence of functional and commented parts of
 /// a string. The first item is always a, possibly zero length, subslice of
 /// functional text. Line style comments contain their ending newlines.
@ -383,7 +475,7 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
                first_whitespace = Some(i);
            }

-            if kind == self.last_slice_kind && !is_comment_connector {
+            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
                let last_index = match first_whitespace {
                    Some(j) => j,
                    None => i,
@ -419,20 +511,124 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
    }
 }

+/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
+/// (if it fits in the width/offset, else return None), else return `new`
+pub fn recover_comment_removed(new: String,
+                               span: Span,
+                               context: &RewriteContext,
+                               width: usize,
+                               offset: Indent)
+                               -> Option<String> {
+    let snippet = context.snippet(span);
+    if changed_comment_content(&snippet, &new) {
+        // We missed some comments
+        // Keep previous formatting if it satisfies the constrains
+        return wrap_str(snippet, context.config.max_width, width, offset);
+    } else {
+        Some(new)
+    }
+}
+
+/// Return true if the two strings of code have the same payload of comments.
+/// The payload of comments is everything in the string except:
+/// 	- actual code (not comments)
+/// 	- comment start/end marks
+///		- whitespace
+///		- '*' at the beginning of lines in block comments
+fn changed_comment_content(orig: &str, new: &str) -> bool {
+    // Cannot write this as a fn since we cannot return types containing closures
+    let code_comment_content = |code| {
+        let slices = UngroupedCommentCodeSlices::new(code);
+        slices.filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
+              .flat_map(|(_, _, s)| CommentReducer::new(s))
+    };
+    let res = code_comment_content(orig).ne(code_comment_content(new));
+    debug!("comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
+           res,
+           orig,
+           new,
+           code_comment_content(orig).collect::<String>(),
+           code_comment_content(new).collect::<String>());
+    res
+}
+
+
+/// Iterator over the 'payload' characters of a comment.
+/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
+/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
+/// for example).
+struct CommentReducer<'a> {
+    is_block: bool,
+    at_start_line: bool,
+    iter: std::str::Chars<'a>,
+}
+
+impl<'a> CommentReducer<'a> {
+    fn new(comment: &'a str) -> CommentReducer<'a> {
+        let is_block = comment.starts_with("/*");
+        let comment = remove_comment_header(comment);
+        CommentReducer {
+            is_block: is_block,
+            at_start_line: false, // There are no supplementary '*' on the first line
+            iter: comment.chars(),
+        }
+    }
+}
+
+impl<'a> Iterator for CommentReducer<'a> {
+    type Item = char;
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            let mut c = try_opt!(self.iter.next());
+            if self.is_block && self.at_start_line {
+                while c.is_whitespace() {
+                    c = try_opt!(self.iter.next());
+                }
+                // Ignore leading '*'
+                if c == '*' {
+                    c = try_opt!(self.iter.next());
+                }
+            } else {
+                if c == '\n' {
+                    self.at_start_line = true;
+                }
+            }
+            if !c.is_whitespace() {
+                return Some(c);
+            }
+        }
+    }
+}
+
+
+fn remove_comment_header(comment: &str) -> &str {
+    if comment.starts_with("///") || comment.starts_with("//!") {
+        &comment[3..]
+    } else if comment.starts_with("//") {
+        &comment[2..]
+    } else if comment.starts_with("/**") || comment.starts_with("/*!") {
+        &comment[3..comment.len() - 2]
+    } else {
+        assert!(comment.starts_with("/*"),
+                format!("string '{}' is not a comment", comment));
+        &comment[2..comment.len() - 2]
+    }
+}
+
 #[cfg(test)]
 mod test {
-    use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
-                CommentCodeSlices};
+    use super::{CharClasses, CodeCharKind, FullCodeCharKind, contains_comment, rewrite_comment,
+                FindUncommented, CommentCodeSlices};
    use Indent;

    #[test]
    fn char_classes() {
        let mut iter = CharClasses::new("//\n\n".chars());

-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
        assert_eq!(None, iter.next());
    }

@ -507,8 +703,8 @@ mod test {
        CharClasses::new(text.chars())
            .filter_map(|(s, c)| {
                match s {
-                    CodeCharKind::Normal => Some(c),
-                    CodeCharKind::Comment => None,
+                    FullCodeCharKind::Normal => Some(c),
+                    _ => None,
                }
            })
            .collect()
--- a/src/expr.rs
+++ b/src/expr.rs
@ -23,7 +23,7 @@ use utils::{span_after, extra_offset, last_line_width, wrap_str, binary_search,
            semicolon_for_stmt};
 use visitor::FmtVisitor;
 use config::{Config, StructLitStyle, MultilineStyle};
-use comment::{FindUncommented, rewrite_comment, contains_comment};
+use comment::{FindUncommented, rewrite_comment, contains_comment, recover_comment_removed};
 use types::rewrite_path;
 use items::{span_lo_for_arg, span_hi_for_arg};
 use chains::rewrite_chain;
@ -35,7 +35,7 @@ use syntax::visit::Visitor;

 impl Rewrite for ast::Expr {
    fn rewrite(&self, context: &RewriteContext, width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
            ast::Expr_::ExprVec(ref expr_vec) => {
                rewrite_array(expr_vec.iter().map(|e| &**e),
                              mk_sp(span_after(self.span, "[", context.codemap), self.span.hi),
@ -207,7 +207,8 @@ impl Rewrite for ast::Expr {
                         width,
                         offset)
            }
-        }
+        };
+        result.and_then(|res| recover_comment_removed(res, self.span, context, width, offset))
    }
 }

@ -478,7 +479,7 @@ impl Rewrite for ast::Block {

 impl Rewrite for ast::Stmt {
    fn rewrite(&self, context: &RewriteContext, _width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
            ast::Stmt_::StmtDecl(ref decl, _) => {
                if let ast::Decl_::DeclLocal(ref local) = decl.node {
                    local.rewrite(context, context.config.max_width, offset)
@ -499,7 +500,8 @@ impl Rewrite for ast::Stmt {
                  .map(|s| s + suffix)
            }
            ast::Stmt_::StmtMac(..) => None,
-        }
+        };
+        result.and_then(|res| recover_comment_removed(res, self.span, context, _width, offset))
    }
 }

--- a/tests/source/paths.rs
+++ b/tests/source/paths.rs
@ -17,7 +17,8 @@ fn main() {

    < *mut JSObject >:: relocate(entry);

-    let x: Foo/*::*/<A   >;
+    let x: Foo<A   >;
+    let x: Foo/*::*/<A>;
 }

 fn op(foo: Bar, key : &[u8], upd : Fn(Option<&memcache::Item> , Baz  ) -> Result) -> MapResult {}
--- a/tests/target/comment-not-disappear.rs
+++ b/tests/target/comment-not-disappear.rs
@ -0,0 +1,42 @@
+// All the comments here should not disappear.
+
+fn a() {
+    match x {
+        X |
+        // A comment
+        Y => {}
+    };
+}
+
+fn b() {
+    match x {
+        X =>
+            // A comment
+            y
+    }
+}
+
+fn c() {
+    a() /* ... */;
+}
+
+fn foo() -> Vec<i32> {
+    (0..11)
+        .map(|x|
+        // This comment disappears.
+        if x % 2 == 0 { x } else { x * 2 })
+        .collect()
+}
+
+fn d() {
+    if true /* and ... */ {
+        a();
+    }
+}
+
+fn calc_page_len(prefix_len: usize, sofar: usize) -> usize {
+    2 // page type and flags
+    + 1 // stored depth
+    + 2 // stored count
+    + prefix_len + sofar // sum of size of all the actual items
+}
--- a/tests/target/paths.rs
+++ b/tests/target/paths.rs
@ -17,6 +17,7 @@ fn main() {
    <*mut JSObject>::relocate(entry);

    let x: Foo<A>;
+    let x: Foo/*::*/<A>;
 }

 fn op(foo: Bar, key: &[u8], upd: Fn(Option<&memcache::Item>, Baz) -> Result) -> MapResult {}