rustdoc: Unindent doc fragments on Attributes construction

2022-04-21 15:01:44 +03:00 · 2022-04-21 15:01:44 +03:00 · 7803a41151
commit 7803a41151
parent 3d3dafb771
7 changed files with 89 additions and 133 deletions
--- a/src/librustdoc/clean/types.rs
+++ b/src/librustdoc/clean/types.rs
@ -1,13 +1,11 @@
 use std::cell::RefCell;
 use std::default::Default;
-use std::fmt;
 use std::hash::Hash;
-use std::iter;
 use std::lazy::SyncOnceCell as OnceCell;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::sync::Arc;
-use std::vec;
+use std::{cmp, fmt, iter};

 use arrayvec::ArrayVec;

@ -55,6 +53,9 @@ crate use self::Type::{
 };
 crate use self::Visibility::{Inherited, Public};

+#[cfg(test)]
+mod tests;
+
 crate type ItemIdSet = FxHashSet<ItemId>;

 #[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
@ -1028,6 +1029,86 @@ crate fn collapse_doc_fragments(doc_strings: &[DocFragment]) -> String {
    acc
 }

+/// Removes excess indentation on comments in order for the Markdown
+/// to be parsed correctly. This is necessary because the convention for
+/// writing documentation is to provide a space between the /// or //! marker
+/// and the doc text, but Markdown is whitespace-sensitive. For example,
+/// a block of text with four-space indentation is parsed as a code block,
+/// so if we didn't unindent comments, these list items
+///
+/// /// A list:
+/// ///
+/// ///    - Foo
+/// ///    - Bar
+///
+/// would be parsed as if they were in a code block, which is likely not what the user intended.
+fn unindent_doc_fragments(docs: &mut Vec<DocFragment>) {
+    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
+    // fragments kind's lines are never starting with a whitespace unless they are using some
+    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
+    // we need to take into account the fact that the minimum indent minus one (to take this
+    // whitespace into account).
+    //
+    // For example:
+    //
+    // /// hello!
+    // #[doc = "another"]
+    //
+    // In this case, you want "hello! another" and not "hello!  another".
+    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
+        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
+    {
+        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
+        // "decide" how much the minimum indent will be.
+        1
+    } else {
+        0
+    };
+
+    // `min_indent` is used to know how much whitespaces from the start of each lines must be
+    // removed. Example:
+    //
+    // ///     hello!
+    // #[doc = "another"]
+    //
+    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
+    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
+    // (5 - 1) whitespaces.
+    let Some(min_indent) = docs
+        .iter()
+        .map(|fragment| {
+            fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
+                if line.chars().all(|c| c.is_whitespace()) {
+                    min_indent
+                } else {
+                    // Compare against either space or tab, ignoring whether they are
+                    // mixed or not.
+                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
+                    cmp::min(min_indent, whitespace)
+                        + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
+                }
+            })
+        })
+        .min()
+    else {
+        return;
+    };
+
+    for fragment in docs {
+        if fragment.doc == kw::Empty {
+            continue;
+        }
+
+        let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
+            min_indent - add
+        } else {
+            min_indent
+        };
+
+        fragment.indent = min_indent;
+    }
+}
+
 /// A link that has not yet been rendered.
 ///
 /// This link will be turned into a rendered link by [`Item::links`].
@ -1119,6 +1200,8 @@ impl Attributes {
            }
        }

+        unindent_doc_fragments(&mut doc_strings);
+
        Attributes { doc_strings, other_attrs }
    }

--- a/src/librustdoc/passes/unindent_comments/tests.rs
+++ b/src/librustdoc/passes/unindent_comments/tests.rs
@ -20,7 +20,7 @@ fn create_doc_fragment(s: &str) -> Vec<DocFragment> {
 fn run_test(input: &str, expected: &str) {
    create_default_session_globals_then(|| {
        let mut s = create_doc_fragment(input);
-        unindent_fragments(&mut s);
+        unindent_doc_fragments(&mut s);
        assert_eq!(collapse_doc_fragments(&s), expected);
    });
 }
--- a/src/librustdoc/doctest.rs
+++ b/src/librustdoc/doctest.rs
@ -1174,8 +1174,6 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
        nested: F,
    ) {
        let ast_attrs = self.tcx.hir().attrs(hir_id);
-        let mut attrs = Attributes::from_ast(ast_attrs, None);
-
        if let Some(ref cfg) = ast_attrs.cfg(self.tcx, &FxHashSet::default()) {
            if !cfg.matches(&self.sess.parse_sess, Some(self.sess.features_untracked())) {
                return;
@ -1187,9 +1185,9 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
            self.collector.names.push(name);
        }

-        attrs.unindent_doc_comments();
        // The collapse-docs pass won't combine sugared/raw doc attributes, or included files with
        // anything else, this will combine them for us.
+        let attrs = Attributes::from_ast(ast_attrs, None);
        if let Some(doc) = attrs.collapsed_doc_value() {
            // Use the outermost invocation, so that doctest names come from where the docs were written.
            let span = ast_attrs
--- a/src/librustdoc/passes/collect_intra_doc_links/early.rs
+++ b/src/librustdoc/passes/collect_intra_doc_links/early.rs
@ -63,9 +63,7 @@ crate fn early_resolve_intra_doc_links(
 }

 fn doc_attrs<'a>(attrs: impl Iterator<Item = &'a ast::Attribute>) -> Attributes {
-    let mut attrs = Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true);
-    attrs.unindent_doc_comments();
-    attrs
+    Attributes::from_ast_iter(attrs.map(|attr| (attr, None)), true)
 }

 struct EarlyDocLinkResolver<'r, 'ra> {
--- a/src/librustdoc/passes/mod.rs
+++ b/src/librustdoc/passes/mod.rs
@ -24,9 +24,6 @@ crate use self::strip_private::STRIP_PRIVATE;
 mod strip_priv_imports;
 crate use self::strip_priv_imports::STRIP_PRIV_IMPORTS;

-mod unindent_comments;
-crate use self::unindent_comments::UNINDENT_COMMENTS;
-
 mod propagate_doc_cfg;
 crate use self::propagate_doc_cfg::PROPAGATE_DOC_CFG;

@ -81,7 +78,6 @@ crate enum Condition {
 crate const PASSES: &[Pass] = &[
    CHECK_DOC_TEST_VISIBILITY,
    STRIP_HIDDEN,
-    UNINDENT_COMMENTS,
    STRIP_PRIVATE,
    STRIP_PRIV_IMPORTS,
    PROPAGATE_DOC_CFG,
@ -96,7 +92,6 @@ crate const PASSES: &[Pass] = &[
 /// The list of passes run by default.
 crate const DEFAULT_PASSES: &[ConditionalPass] = &[
    ConditionalPass::always(COLLECT_TRAIT_IMPLS),
-    ConditionalPass::always(UNINDENT_COMMENTS),
    ConditionalPass::always(CHECK_DOC_TEST_VISIBILITY),
    ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden),
    ConditionalPass::new(STRIP_PRIVATE, WhenNotDocumentPrivate),
--- a/src/librustdoc/passes/unindent_comments.rs
+++ b/src/librustdoc/passes/unindent_comments.rs
@ -1,116 +0,0 @@
-//! Removes excess indentation on comments in order for the Markdown
-//! to be parsed correctly. This is necessary because the convention for
-//! writing documentation is to provide a space between the /// or //! marker
-//! and the doc text, but Markdown is whitespace-sensitive. For example,
-//! a block of text with four-space indentation is parsed as a code block,
-//! so if we didn't unindent comments, these list items
-//!
-//! /// A list:
-//! ///
-//! ///    - Foo
-//! ///    - Bar
-//!
-//! would be parsed as if they were in a code block, which is likely not what the user intended.
-use std::cmp;
-
-use rustc_span::symbol::kw;
-
-use crate::clean::{self, DocFragment, DocFragmentKind, Item};
-use crate::core::DocContext;
-use crate::fold::{self, DocFolder};
-use crate::passes::Pass;
-
-#[cfg(test)]
-mod tests;
-
-crate const UNINDENT_COMMENTS: Pass = Pass {
-    name: "unindent-comments",
-    run: unindent_comments,
-    description: "removes excess indentation on comments in order for markdown to like it",
-};
-
-crate fn unindent_comments(krate: clean::Crate, _: &mut DocContext<'_>) -> clean::Crate {
-    CommentCleaner.fold_crate(krate)
-}
-
-struct CommentCleaner;
-
-impl fold::DocFolder for CommentCleaner {
-    fn fold_item(&mut self, mut i: Item) -> Option<Item> {
-        i.attrs.unindent_doc_comments();
-        Some(self.fold_item_recur(i))
-    }
-}
-
-impl clean::Attributes {
-    crate fn unindent_doc_comments(&mut self) {
-        unindent_fragments(&mut self.doc_strings);
-    }
-}
-
-fn unindent_fragments(docs: &mut Vec<DocFragment>) {
-    // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
-    // fragments kind's lines are never starting with a whitespace unless they are using some
-    // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
-    // we need to take into account the fact that the minimum indent minus one (to take this
-    // whitespace into account).
-    //
-    // For example:
-    //
-    // /// hello!
-    // #[doc = "another"]
-    //
-    // In this case, you want "hello! another" and not "hello!  another".
-    let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
-        && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
-    {
-        // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
-        // "decide" how much the minimum indent will be.
-        1
-    } else {
-        0
-    };
-
-    // `min_indent` is used to know how much whitespaces from the start of each lines must be
-    // removed. Example:
-    //
-    // ///     hello!
-    // #[doc = "another"]
-    //
-    // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
-    // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
-    // (5 - 1) whitespaces.
-    let Some(min_indent) = docs
-        .iter()
-        .map(|fragment| {
-            fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
-                if line.chars().all(|c| c.is_whitespace()) {
-                    min_indent
-                } else {
-                    // Compare against either space or tab, ignoring whether they are
-                    // mixed or not.
-                    let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
-                    cmp::min(min_indent, whitespace)
-                        + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
-                }
-            })
-        })
-        .min()
-    else {
-        return;
-    };
-
-    for fragment in docs {
-        if fragment.doc == kw::Empty {
-            continue;
-        }
-
-        let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
-            min_indent - add
-        } else {
-            min_indent
-        };
-
-        fragment.indent = min_indent;
-    }
-}
--- a/src/test/rustdoc-ui/issue-91713.stdout
+++ b/src/test/rustdoc-ui/issue-91713.stdout
@ -1,7 +1,6 @@
 Available passes for running rustdoc:
 check_doc_test_visibility - run various visibility-related lints on doctests
        strip-hidden - strips all `#[doc(hidden)]` items from the output
-   unindent-comments - removes excess indentation on comments in order for markdown to like it
       strip-private - strips all private items from a crate which cannot be seen externally, implies strip-priv-imports
  strip-priv-imports - strips all private import statements (`use`, `extern crate`) from a crate
   propagate-doc-cfg - propagates `#[doc(cfg(...))]` to child items
@ -14,7 +13,6 @@ check-invalid-html-tags - detects invalid HTML tags in doc comments

 Default passes for rustdoc:
 collect-trait-impls
-   unindent-comments
 check_doc_test_visibility
        strip-hidden  (when not --document-hidden-items)
       strip-private  (when not --document-private-items)