rust/clippy_lints/src/tabs_in_doc_comments.rs

231 lines
6.5 KiB
Rust
Raw Normal View History

use clippy_utils::diagnostics::span_lint_and_sugg;
2020-03-01 12:23:33 +09:00
use rustc_ast::ast;
2019-11-15 16:18:08 +01:00
use rustc_errors::Applicability;
2020-01-12 15:08:41 +09:00
use rustc_lint::{EarlyContext, EarlyLintPass};
use rustc_session::declare_lint_pass;
use rustc_span::{BytePos, Span};
2019-11-15 16:18:08 +01:00
declare_clippy_lint! {
/// ### What it does
/// Checks doc comments for usage of tab characters.
2019-11-15 16:18:08 +01:00
///
/// ### Why is this bad?
/// The rust style-guide promotes spaces instead of tabs for indentation.
2019-11-15 16:18:08 +01:00
/// To keep a consistent view on the source, also doc comments should not have tabs.
/// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the
/// display settings of the author and reader differ.
///
/// ### Example
/// ```no_run
2019-11-15 16:18:08 +01:00
/// ///
/// /// Struct to hold two strings:
/// /// - first one
/// /// - second one
/// pub struct DoubleString {
/// ///
/// /// - First String:
/// /// - needs to be inside here
/// first_string: String,
/// ///
/// /// - Second String:
/// /// - needs to be inside here
/// second_string: String,
///}
/// ```
///
/// Will be converted to:
/// ```no_run
2019-11-15 16:18:08 +01:00
/// ///
/// /// Struct to hold two strings:
/// /// - first one
/// /// - second one
/// pub struct DoubleString {
/// ///
/// /// - First String:
/// /// - needs to be inside here
/// first_string: String,
/// ///
/// /// - Second String:
/// /// - needs to be inside here
/// second_string: String,
///}
/// ```
#[clippy::version = "1.41.0"]
2019-11-15 16:18:08 +01:00
pub TABS_IN_DOC_COMMENTS,
style,
"using tabs in doc comments is not recommended"
}
declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);
impl TabsInDocComments {
fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
2020-07-22 17:59:17 +03:00
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
2019-11-15 16:18:08 +01:00
let comment = comment.as_str();
for (lo, hi) in get_chunks_of_tabs(comment) {
// +3 skips the opening delimiter
2019-11-15 16:18:08 +01:00
let new_span = Span::new(
2020-07-22 17:59:17 +03:00
attr.span.lo() + BytePos(3 + lo),
attr.span.lo() + BytePos(3 + hi),
2019-11-15 16:18:08 +01:00
attr.span.ctxt(),
2021-04-18 14:27:04 +02:00
attr.span.parent(),
2019-11-15 16:18:08 +01:00
);
span_lint_and_sugg(
cx,
TABS_IN_DOC_COMMENTS,
new_span,
"using tabs in doc comments is not recommended",
"consider using four spaces per tab",
" ".repeat((hi - lo) as usize),
Applicability::MaybeIncorrect,
);
}
}
}
}
impl EarlyLintPass for TabsInDocComments {
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {
Self::warn_if_tabs_in_doc(cx, attribute);
2019-11-15 16:18:08 +01:00
}
}
///
/// scans the string for groups of tabs and returns the start(inclusive) and end positions
/// (exclusive) of all groups
/// e.g. "sd\tasd\t\taa" will be converted to [(2, 3), (6, 8)] as
/// 012 3456 7 89
/// ^-^ ^---^
fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
let line_length_way_to_long = "doc comment longer than 2^32 chars";
let mut spans: Vec<(u32, u32)> = vec![];
let mut current_start: u32 = 0;
// tracker to decide if the last group of tabs is not closed by a non-tab character
let mut is_active = false;
// Note that we specifically need the char _byte_ indices here, not the positional indexes
// within the char array to deal with multi-byte characters properly. `char_indices` does
// exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
let char_indices: Vec<_> = the_str.char_indices().collect();
2019-11-15 16:18:08 +01:00
if let [(_, '\t')] = char_indices.as_slice() {
2019-11-15 16:18:08 +01:00
return vec![(0, 1)];
}
for entry in char_indices.windows(2) {
match entry {
[(_, '\t'), (_, '\t')] => {
2019-11-15 16:18:08 +01:00
// either string starts with double tab, then we have to set it active,
// otherwise is_active is true anyway
is_active = true;
},
[(_, _), (index_b, '\t')] => {
2019-11-15 16:18:08 +01:00
// as ['\t', '\t'] is excluded, this has to be a start of a tab group,
// set indices accordingly
is_active = true;
current_start = u32::try_from(*index_b).unwrap();
2019-11-15 16:18:08 +01:00
},
[(_, '\t'), (index_b, _)] => {
2019-11-15 16:18:08 +01:00
// this now has to be an end of the group, hence we have to push a new tuple
is_active = false;
spans.push((current_start, u32::try_from(*index_b).unwrap()));
2019-11-15 16:18:08 +01:00
},
_ => {},
}
}
// only possible when tabs are at the end, insert last group
if is_active {
spans.push((
current_start,
u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),
2019-11-15 16:18:08 +01:00
));
}
spans
}
#[cfg(test)]
mod tests_for_get_chunks_of_tabs {
use super::get_chunks_of_tabs;
#[test]
fn test_unicode_han_string() {
let res = get_chunks_of_tabs(" \u{4f4d}\t");
assert_eq!(res, vec![(4, 5)]);
}
2019-11-15 16:18:08 +01:00
#[test]
fn test_empty_string() {
let res = get_chunks_of_tabs("");
assert_eq!(res, vec![]);
}
#[test]
fn test_simple() {
let res = get_chunks_of_tabs("sd\t\t\taa");
assert_eq!(res, vec![(2, 5)]);
}
#[test]
fn test_only_t() {
let res = get_chunks_of_tabs("\t\t");
assert_eq!(res, vec![(0, 2)]);
}
#[test]
fn test_only_one_t() {
let res = get_chunks_of_tabs("\t");
assert_eq!(res, vec![(0, 1)]);
}
#[test]
fn test_double() {
let res = get_chunks_of_tabs("sd\tasd\t\taa");
assert_eq!(res, vec![(2, 3), (6, 8)]);
}
#[test]
fn test_start() {
let res = get_chunks_of_tabs("\t\taa");
assert_eq!(res, vec![(0, 2)]);
}
#[test]
fn test_end() {
let res = get_chunks_of_tabs("aa\t\t");
assert_eq!(res, vec![(2, 4)]);
}
#[test]
fn test_start_single() {
let res = get_chunks_of_tabs("\taa");
assert_eq!(res, vec![(0, 1)]);
}
#[test]
fn test_end_single() {
let res = get_chunks_of_tabs("aa\t");
assert_eq!(res, vec![(2, 3)]);
}
#[test]
fn test_no_tabs() {
let res = get_chunks_of_tabs("dsfs");
assert_eq!(res, vec![]);
}
}