use rustc::lint::*; use syntax::ast; use syntax::codemap::{Span, BytePos}; use utils::span_lint; /// **What it does:** Checks for the presence of `_`, `::` or camel-case words /// outside ticks in documentation. /// /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and /// camel-case probably indicates some code which should be included between /// ticks. `_` can also be used for empasis in markdown, this lint tries to /// consider that. /// /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks /// for is limited, and there are still false positives. /// /// **Examples:** /// ```rust /// /// Do something with the foo_bar parameter. See also that::other::module::foo. /// // ^ `foo_bar` and `that::other::module::foo` should be ticked. /// fn doit(foo_bar) { .. } /// ``` declare_lint! { pub DOC_MARKDOWN, Warn, "presence of `_`, `::` or camel-case outside backticks in documentation" } #[derive(Clone)] pub struct Doc { valid_idents: Vec, } impl Doc { pub fn new(valid_idents: Vec) -> Self { Doc { valid_idents: valid_idents } } } impl LintPass for Doc { fn get_lints(&self) -> LintArray { lint_array![DOC_MARKDOWN] } } impl EarlyLintPass for Doc { fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) { check_attrs(cx, &self.valid_idents, &krate.attrs); } fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) { check_attrs(cx, &self.valid_idents, &item.attrs); } } /// Cleanup documentation decoration (`///` and such). /// /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of /// the span but this function is inspired from the later. #[allow(cast_possible_truncation)] pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> { // one-line comments lose their prefix const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; for prefix in ONELINERS { if comment.starts_with(*prefix) { return vec![(comment[prefix.len()..].to_owned(), Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })]; } } if comment.starts_with("/*") { return comment[3..comment.len() - 2] .lines() .map(|line| { let offset = line.as_ptr() as usize - comment.as_ptr() as usize; debug_assert_eq!(offset as u32 as usize, offset); (line.to_owned(), Span { lo: span.lo + BytePos(offset as u32), ..span }) }) .collect(); } panic!("not a doc-comment: {}", comment); } pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { let mut docs = vec![]; for attr in attrs { if attr.is_sugared_doc { if let Some(ref doc) = attr.value_str() { let doc = (*doc.as_str()).to_owned(); docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span))); } } } if !docs.is_empty() { let _ = check_doc(cx, valid_idents, &docs); } } #[allow(while_let_loop)] // #362 fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> { // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context. // There really is no markdown specification that would disambiguate this properly. This is // what GitHub and Rustdoc do: // // foo_bar test_quz → foo_bar test_quz // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized) // _foo bar_ test_quz_ → foo bar test_quz_ // \_foo bar\_ → _foo bar_ // (_baz_) → (baz) // foo _ bar _ baz → foo _ bar _ baz /// Character that can appear in a path fn is_path_char(c: char) -> bool { match c { t if t.is_alphanumeric() => true, ':' | '_' => true, _ => false, } } #[derive(Clone, Debug)] /// This type is used to iterate through the documentation characters, keeping the span at the /// same time. struct Parser<'a> { /// First byte of the current potential match current_word_begin: usize, /// List of lines and their associated span docs: &'a [(String, Span)], /// Index of the current line we are parsing line: usize, /// Whether we are in a link link: bool, /// Whether we are at the beginning of a line new_line: bool, /// Whether we were to the end of a line last time `next` was called reset: bool, /// The position of the current character within the current line pos: usize, } impl<'a> Parser<'a> { fn advance_begin(&mut self) { self.current_word_begin = self.pos; } fn line(&self) -> (&'a str, Span) { let (ref doc, span) = self.docs[self.line]; (doc, span) } fn peek(&self) -> Option { self.line().0[self.pos..].chars().next() } #[allow(while_let_on_iterator)] // borrowck complains about for fn jump_to(&mut self, n: char) -> Result { while let Some((new_line, c)) = self.next() { if c == n { self.advance_begin(); return Ok(new_line); } } Err(()) } fn next_line(&mut self) { self.pos = 0; self.current_word_begin = 0; self.line += 1; self.new_line = true; } fn put_back(&mut self, c: char) { self.pos -= c.len_utf8(); } #[allow(cast_possible_truncation)] fn word(&self) -> (&'a str, Span) { let begin = self.current_word_begin; let end = self.pos; debug_assert_eq!(end as u32 as usize, end); debug_assert_eq!(begin as u32 as usize, begin); let (doc, mut span) = self.line(); span.hi = span.lo + BytePos(end as u32); span.lo = span.lo + BytePos(begin as u32); (&doc[begin..end], span) } } impl<'a> Iterator for Parser<'a> { type Item = (bool, char); fn next(&mut self) -> Option<(bool, char)> { while self.line < self.docs.len() { if self.reset { self.line += 1; self.reset = false; self.pos = 0; self.current_word_begin = 0; } let mut chars = self.line().0[self.pos..].chars(); let c = chars.next(); if let Some(c) = c { self.pos += c.len_utf8(); let new_line = self.new_line; self.new_line = c == '\n' || (self.new_line && c.is_whitespace()); return Some((new_line, c)); } else if self.line == self.docs.len() - 1 { return None; } else { self.new_line = true; self.reset = true; self.pos += 1; return Some((true, '\n')); } } None } } let mut parser = Parser { current_word_begin: 0, docs: docs, line: 0, link: false, new_line: true, reset: false, pos: 0, }; /// Check for fanced code block. macro_rules! check_block { ($parser:expr, $c:tt, $new_line:expr) => {{ check_block!($parser, $c, $c, $new_line) }}; ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{ fn check_block(parser: &mut Parser, new_line: bool) -> Result { if new_line { let mut lookup_parser = parser.clone(); if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) { *parser = lookup_parser; // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~ let mut open_count = 3; while let Some((false, $c)) = parser.next() { open_count += 1; } loop { loop { if try!(parser.jump_to($c_expr)) { break; } } lookup_parser = parser.clone(); let a = lookup_parser.next(); let b = lookup_parser.next(); if let (Some((false, $c)), Some((false, $c))) = (a, b) { let mut close_count = 3; while let Some((false, $c)) = lookup_parser.next() { close_count += 1; } if close_count == open_count { *parser = lookup_parser; return Ok(true); } } } } } Ok(false) } check_block(&mut $parser, $new_line) }}; } loop { match parser.next() { Some((new_line, c)) => { match c { '#' if new_line => { // don’t warn on titles parser.next_line(); }, '`' => { if try!(check_block!(parser, '`', new_line)) { continue; } // not a code block, just inline code try!(parser.jump_to('`')); }, '~' => { if try!(check_block!(parser, '~', new_line)) { continue; } // ~ does not introduce inline code, but two of them introduce // strikethrough. Too bad for the consistency but we don't care about // strikethrough. }, '[' => { // Check for a reference definition `[foo]:` at the beginning of a line let mut link = true; if new_line { let mut lookup_parser = parser.clone(); if lookup_parser.any(|(_, c)| c == ']') { if let Some((_, ':')) = lookup_parser.next() { lookup_parser.next_line(); parser = lookup_parser; link = false; } } } parser.advance_begin(); parser.link = link; }, ']' if parser.link => { parser.link = false; match parser.peek() { Some('(') => { try!(parser.jump_to(')')); }, Some('[') => { try!(parser.jump_to(']')); }, Some(_) => continue, None => return Err(()), } }, c if !is_path_char(c) => { parser.advance_begin(); }, _ => { if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) { parser.put_back(c); } let (word, span) = parser.word(); check_word(cx, valid_idents, word, span); parser.advance_begin(); }, } }, None => break, } } Ok(()) } fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) { /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok). fn is_camel_case(s: &str) -> bool { if s.starts_with(|c: char| c.is_digit(10)) { return false; } let s = if s.ends_with('s') { &s[..s.len() - 1] } else { s }; s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 } fn has_underscore(s: &str) -> bool { s != "_" && !s.contains("\\_") && s.contains('_') } // Trim punctuation as in `some comment (see foo::bar).` // ^^ // Or even as in `_foo bar_` which is emphasized. let word = word.trim_matches(|c: char| !c.is_alphanumeric()); if valid_idents.iter().any(|i| i == word) { return; } if has_underscore(word) || word.contains("::") || is_camel_case(word) { span_lint(cx, DOC_MARKDOWN, span, &format!("you should put `{}` between ticks in the documentation", word)); } }