rustdoc: Unindent doc fragments on Attributes construction

This commit is contained in:
Vadim Petrochenkov 2022-04-21 15:01:44 +03:00
parent 3d3dafb771
commit 7803a41151
7 changed files with 89 additions and 133 deletions

View File

@ -1,13 +1,11 @@
use std::cell::RefCell;
use std::default::Default;
use std::fmt;
use std::hash::Hash;
use std::iter;
use std::lazy::SyncOnceCell as OnceCell;
use std::path::PathBuf;
use std::rc::Rc;
use std::sync::Arc;
use std::vec;
use std::{cmp, fmt, iter};
use arrayvec::ArrayVec;
@ -55,6 +53,9 @@ crate use self::Type::{
};
crate use self::Visibility::{Inherited, Public};
#[cfg(test)]
mod tests;
crate type ItemIdSet = FxHashSet<ItemId>;
#[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
@ -1028,6 +1029,86 @@ crate fn collapse_doc_fragments(doc_strings: &[DocFragment]) -> String {
acc
}
/// Removes excess indentation on comments in order for the Markdown
/// to be parsed correctly. This is necessary because the convention for
/// writing documentation is to provide a space between the /// or //! marker
/// and the doc text, but Markdown is whitespace-sensitive. For example,
/// a block of text with four-space indentation is parsed as a code block,
/// so if we didn't unindent comments, these list items
///
/// /// A list:
/// ///
/// /// - Foo
/// /// - Bar
///
/// would be parsed as if they were in a code block, which is likely not what the user intended.
fn unindent_doc_fragments(docs: &mut Vec<DocFragment>) {
// `add` is used in case the most common sugared doc syntax is used ("/// "). The other
// fragments kind's lines are never starting with a whitespace unless they are using some
// markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
// we need to take into account the fact that the minimum indent minus one (to take this
// whitespace into account).
//
// For example:
//
// /// hello!
// #[doc = "another"]
//
// In this case, you want "hello! another" and not "hello! another".
let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
&& docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
{
// In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
// "decide" how much the minimum indent will be.
1
} else {
0
};
// `min_indent` is used to know how much whitespaces from the start of each lines must be
// removed. Example:
//
// /// hello!
// #[doc = "another"]
//
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
// (5 - 1) whitespaces.
let Some(min_indent) = docs
.iter()
.map(|fragment| {
fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
if line.chars().all(|c| c.is_whitespace()) {
min_indent
} else {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
cmp::min(min_indent, whitespace)
+ if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
}
})
})
.min()
else {
return;
};
for fragment in docs {
if fragment.doc == kw::Empty {
continue;
}
let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
min_indent - add
} else {
min_indent
};
fragment.indent = min_indent;
}
}
/// A link that has not yet been rendered.
///
/// This link will be turned into a rendered link by [`Item::links`].
@ -1119,6 +1200,8 @@ impl Attributes {
}
}
unindent_doc_fragments(&mut doc_strings);
Attributes { doc_strings, other_attrs }
}

View File

@ -20,7 +20,7 @@ fn create_doc_fragment(s: &str) -> Vec<DocFragment> {
fn run_test(input: &str, expected: &str) {
create_default_session_globals_then(|| {
let mut s = create_doc_fragment(input);
unindent_fragments(&mut s);
unindent_doc_fragments(&mut s);
assert_eq!(collapse_doc_fragments(&s), expected);
});
}

View File

@ -1174,8 +1174,6 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
nested: F,
) {
let ast_attrs = self.tcx.hir().attrs(hir_id);
let mut attrs = Attributes::from_ast(ast_attrs, None);
if let Some(ref cfg) = ast_attrs.cfg(self.tcx, &FxHashSet::default()) {
if !cfg.matches(&self.sess.parse_sess, Some(self.sess.features_untracked())) {
return;
@ -1187,9 +1185,9 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
self.collector.names.push(name);
}
attrs.unindent_doc_comments();
// The collapse-docs pass won't combine sugared/raw doc attributes, or included files with
// anything else, this will combine them for us.
let attrs = Attributes::from_ast(ast_attrs, None);
if let Some(doc) = attrs.collapsed_doc_value() {
// Use the outermost invocation, so that doctest names come from where the docs were written.
let span = ast_attrs

View File

@ -63,9 +63,7 @@ crate fn early_resolve_intra_doc_links(
}
fn doc_attrs<'a>(attrs: impl Iterator<Item = &'a ast::Attribute>) -> Attributes {
let mut attrs = Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true);
attrs.unindent_doc_comments();
attrs
Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true)
}
struct EarlyDocLinkResolver<'r, 'ra> {

View File

@ -24,9 +24,6 @@ crate use self::strip_private::STRIP_PRIVATE;
mod strip_priv_imports;
crate use self::strip_priv_imports::STRIP_PRIV_IMPORTS;
mod unindent_comments;
crate use self::unindent_comments::UNINDENT_COMMENTS;
mod propagate_doc_cfg;
crate use self::propagate_doc_cfg::PROPAGATE_DOC_CFG;
@ -81,7 +78,6 @@ crate enum Condition {
crate const PASSES: &[Pass] = &[
CHECK_DOC_TEST_VISIBILITY,
STRIP_HIDDEN,
UNINDENT_COMMENTS,
STRIP_PRIVATE,
STRIP_PRIV_IMPORTS,
PROPAGATE_DOC_CFG,
@ -96,7 +92,6 @@ crate const PASSES: &[Pass] = &[
/// The list of passes run by default.
crate const DEFAULT_PASSES: &[ConditionalPass] = &[
ConditionalPass::always(COLLECT_TRAIT_IMPLS),
ConditionalPass::always(UNINDENT_COMMENTS),
ConditionalPass::always(CHECK_DOC_TEST_VISIBILITY),
ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden),
ConditionalPass::new(STRIP_PRIVATE, WhenNotDocumentPrivate),

View File

@ -1,116 +0,0 @@
//! Removes excess indentation on comments in order for the Markdown
//! to be parsed correctly. This is necessary because the convention for
//! writing documentation is to provide a space between the /// or //! marker
//! and the doc text, but Markdown is whitespace-sensitive. For example,
//! a block of text with four-space indentation is parsed as a code block,
//! so if we didn't unindent comments, these list items
//!
//! /// A list:
//! ///
//! /// - Foo
//! /// - Bar
//!
//! would be parsed as if they were in a code block, which is likely not what the user intended.
use std::cmp;
use rustc_span::symbol::kw;
use crate::clean::{self, DocFragment, DocFragmentKind, Item};
use crate::core::DocContext;
use crate::fold::{self, DocFolder};
use crate::passes::Pass;
#[cfg(test)]
mod tests;
crate const UNINDENT_COMMENTS: Pass = Pass {
name: "unindent-comments",
run: unindent_comments,
description: "removes excess indentation on comments in order for markdown to like it",
};
crate fn unindent_comments(krate: clean::Crate, _: &mut DocContext<'_>) -> clean::Crate {
CommentCleaner.fold_crate(krate)
}
struct CommentCleaner;
impl fold::DocFolder for CommentCleaner {
fn fold_item(&mut self, mut i: Item) -> Option<Item> {
i.attrs.unindent_doc_comments();
Some(self.fold_item_recur(i))
}
}
impl clean::Attributes {
crate fn unindent_doc_comments(&mut self) {
unindent_fragments(&mut self.doc_strings);
}
}
fn unindent_fragments(docs: &mut Vec<DocFragment>) {
// `add` is used in case the most common sugared doc syntax is used ("/// "). The other
// fragments kind's lines are never starting with a whitespace unless they are using some
// markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
// we need to take into account the fact that the minimum indent minus one (to take this
// whitespace into account).
//
// For example:
//
// /// hello!
// #[doc = "another"]
//
// In this case, you want "hello! another" and not "hello! another".
let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
&& docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
{
// In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
// "decide" how much the minimum indent will be.
1
} else {
0
};
// `min_indent` is used to know how much whitespaces from the start of each lines must be
// removed. Example:
//
// /// hello!
// #[doc = "another"]
//
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
// (5 - 1) whitespaces.
let Some(min_indent) = docs
.iter()
.map(|fragment| {
fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
if line.chars().all(|c| c.is_whitespace()) {
min_indent
} else {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
cmp::min(min_indent, whitespace)
+ if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
}
})
})
.min()
else {
return;
};
for fragment in docs {
if fragment.doc == kw::Empty {
continue;
}
let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
min_indent - add
} else {
min_indent
};
fragment.indent = min_indent;
}
}

View File

@ -1,7 +1,6 @@
Available passes for running rustdoc:
check_doc_test_visibility - run various visibility-related lints on doctests
strip-hidden - strips all `#[doc(hidden)]` items from the output
unindent-comments - removes excess indentation on comments in order for markdown to like it
strip-private - strips all private items from a crate which cannot be seen externally, implies strip-priv-imports
strip-priv-imports - strips all private import statements (`use`, `extern crate`) from a crate
propagate-doc-cfg - propagates `#[doc(cfg(...))]` to child items
@ -14,7 +13,6 @@ check-invalid-html-tags - detects invalid HTML tags in doc comments
Default passes for rustdoc:
collect-trait-impls
unindent-comments
check_doc_test_visibility
strip-hidden (when not --document-hidden-items)
strip-private (when not --document-private-items)