rustdoc: add usable lint for pulldown-cmark-0.11 parsing changes

This commit is contained in:
Michael Howell 2024-06-29 11:24:36 -07:00
parent 15fbe618a1
commit 294c3dda88
7 changed files with 278 additions and 0 deletions

View File

@ -3121,6 +3121,17 @@ dependencies = [
"cc", "cc",
] ]
[[package]]
name = "pulldown-cmark"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b"
dependencies = [
"bitflags 2.5.0",
"memchr",
"unicase",
]
[[package]] [[package]]
name = "pulldown-cmark" name = "pulldown-cmark"
version = "0.10.3" version = "0.10.3"
@ -4890,6 +4901,7 @@ dependencies = [
"indexmap", "indexmap",
"itertools", "itertools",
"minifier", "minifier",
"pulldown-cmark 0.9.6",
"regex", "regex",
"rustdoc-json-types", "rustdoc-json-types",
"serde", "serde",

View File

@ -13,6 +13,7 @@ base64 = "0.21.7"
itertools = "0.12" itertools = "0.12"
indexmap = "2" indexmap = "2"
minifier = "0.3.0" minifier = "0.3.0"
pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false }
regex = "1" regex = "1"
rustdoc-json-types = { path = "../rustdoc-json-types" } rustdoc-json-types = { path = "../rustdoc-json-types" }
serde_json = "1.0" serde_json = "1.0"

View File

@ -196,6 +196,14 @@ macro_rules! declare_rustdoc_lint {
"detects redundant explicit links in doc comments" "detects redundant explicit links in doc comments"
} }
declare_rustdoc_lint! {
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
/// the new one.
UNPORTABLE_MARKDOWN,
Warn,
"detects markdown that is interpreted differently in different parser"
}
pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| { pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
vec![ vec![
BROKEN_INTRA_DOC_LINKS, BROKEN_INTRA_DOC_LINKS,
@ -209,6 +217,7 @@ macro_rules! declare_rustdoc_lint {
MISSING_CRATE_LEVEL_DOCS, MISSING_CRATE_LEVEL_DOCS,
UNESCAPED_BACKTICKS, UNESCAPED_BACKTICKS,
REDUNDANT_EXPLICIT_LINKS, REDUNDANT_EXPLICIT_LINKS,
UNPORTABLE_MARKDOWN,
] ]
}); });

View File

@ -6,6 +6,7 @@
mod html_tags; mod html_tags;
mod redundant_explicit_links; mod redundant_explicit_links;
mod unescaped_backticks; mod unescaped_backticks;
mod unportable_markdown;
use super::Pass; use super::Pass;
use crate::clean::*; use crate::clean::*;
@ -31,6 +32,7 @@ fn visit_item(&mut self, item: &Item) {
html_tags::visit_item(self.cx, item); html_tags::visit_item(self.cx, item);
unescaped_backticks::visit_item(self.cx, item); unescaped_backticks::visit_item(self.cx, item);
redundant_explicit_links::visit_item(self.cx, item); redundant_explicit_links::visit_item(self.cx, item);
unportable_markdown::visit_item(self.cx, item);
self.visit_item_recur(item) self.visit_item_recur(item)
} }

View File

@ -0,0 +1,152 @@
//! Detects specific markdown syntax that's different between pulldown-cmark
//! 0.9 and 0.11.
//!
//! This is a mitigation for old parser bugs that affected some
//! real crates' docs. The old parser claimed to comply with CommonMark,
//! but it did not. These warnings will eventually be removed,
//! though some of them may become Clippy lints.
//!
//! <https://github.com/rust-lang/rust/pull/121659#issuecomment-1992752820>
//!
//! <https://rustc-dev-guide.rust-lang.org/bug-fix-procedure.html#add-the-lint-to-the-list-of-removed-lists>
use crate::clean::Item;
use crate::core::DocContext;
use pulldown_cmark as cmarkn;
use pulldown_cmark_old as cmarko;
use rustc_lint_defs::Applicability;
use rustc_resolve::rustdoc::source_span_for_markdown_range;
use std::collections::{BTreeMap, BTreeSet};
pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
let tcx = cx.tcx;
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
// If non-local, no need to check anything.
return;
};
let dox = item.doc_value();
if dox.is_empty() {
return;
}
// P1: unintended strikethrough was fixed by requiring single-tildes to flank
// the same way underscores do, so nothing is done here
// P2: block quotes without following space parsed wrong
//
// This is the set of starting points for block quotes with no space after
// the `>`. It is populated by the new parser, and if the old parser fails to
// clear it out, it'll produce a warning.
let mut spaceless_block_quotes = BTreeSet::new();
// P3: missing footnote references
//
// This is populated by listening for FootnoteReference from
// the new parser and old parser.
let mut missing_footnote_references = BTreeMap::new();
let mut found_footnote_references = BTreeSet::new();
// populate problem cases from new parser
{
pub fn main_body_opts_new() -> cmarkn::Options {
cmarkn::Options::ENABLE_TABLES
| cmarkn::Options::ENABLE_FOOTNOTES
| cmarkn::Options::ENABLE_STRIKETHROUGH
| cmarkn::Options::ENABLE_TASKLISTS
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
}
let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter();
while let Some((event, span)) = parser_new.next() {
if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event {
if !dox[span.clone()].starts_with("> ") {
spaceless_block_quotes.insert(span.start);
}
}
if let cmarkn::Event::FootnoteReference(_) = event {
found_footnote_references.insert(span.start + 1);
}
}
}
// remove cases where they don't actually differ
{
pub fn main_body_opts_old() -> cmarko::Options {
cmarko::Options::ENABLE_TABLES
| cmarko::Options::ENABLE_FOOTNOTES
| cmarko::Options::ENABLE_STRIKETHROUGH
| cmarko::Options::ENABLE_TASKLISTS
| cmarko::Options::ENABLE_SMART_PUNCTUATION
}
let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter();
while let Some((event, span)) = parser_old.next() {
if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event {
if !dox[span.clone()].starts_with("> ") {
spaceless_block_quotes.remove(&span.start);
}
}
if let cmarko::Event::FootnoteReference(_) = event {
if !found_footnote_references.contains(&(span.start + 1)) {
missing_footnote_references.insert(span.start + 1, span);
}
}
}
}
for start in spaceless_block_quotes {
let (span, precise) =
source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings)
.map(|span| (span, true))
.unwrap_or_else(|| (item.attr_span(tcx), false));
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| {
lint.primary_message("unportable markdown");
lint.help(format!("confusing block quote with no space after the `>` marker"));
if precise {
lint.span_suggestion(
span.shrink_to_hi(),
"if the quote is intended, add a space",
" ",
Applicability::MaybeIncorrect,
);
lint.span_suggestion(
span.shrink_to_lo(),
"if it should not be a quote, escape it",
"\\",
Applicability::MaybeIncorrect,
);
}
});
}
for (_caret, span) in missing_footnote_references {
let (ref_span, precise) =
source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings)
.map(|span| (span, true))
.unwrap_or_else(|| (item.attr_span(tcx), false));
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| {
lint.primary_message("unportable markdown");
if precise {
lint.span_suggestion(
ref_span.shrink_to_lo(),
"if it should not be a footnote, escape it",
"\\",
Applicability::MaybeIncorrect,
);
}
if dox.as_bytes().get(span.end) == Some(&b'[') {
lint.help("confusing footnote reference and link");
if precise {
lint.span_suggestion(
ref_span.shrink_to_hi(),
"if the footnote is intended, add a space",
" ",
Applicability::MaybeIncorrect,
);
} else {
lint.help("there should be a space between the link and the footnote");
}
}
});
}
}

View File

@ -0,0 +1,63 @@
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
//
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes.
//
// This version of the lint is targeted at two especially-common cases where docs got broken.
// Other differences in parsing should not warn.
#![allow(rustdoc::broken_intra_doc_links)]
#![deny(rustdoc::unportable_markdown)]
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
///
/// Test footnote [^foot].
///
/// [^foot]: This is nested within the footnote now, but didn't used to be.
///
/// This is a multi-paragraph footnote.
pub struct GfmFootnotes;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/773>
///
/// test [^foo][^bar]
//~^ ERROR unportable markdown
///
/// [^foo]: test
/// [^bar]: test2
pub struct FootnoteSmashedName;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
///
/// - _t
/// # test
/// t_
pub struct NestingCornerCase;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
///
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
pub struct Emphasis1;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
///
/// |
/// |
pub struct NotEnoughTable;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/675>
///
/// foo
/// >bar
//~^ ERROR unportable markdown
pub struct BlockQuoteNoSpace;
/// Negative test.
///
/// foo
/// > bar
pub struct BlockQuoteSpace;
/// Negative test.
///
/// >bar
/// baz
pub struct BlockQuoteNoSpaceStart;

View File

@ -0,0 +1,39 @@
error: unportable markdown
--> $DIR/unportable-markdown.rs:21:10
|
LL | /// test [^foo][^bar]
| ^^^^^^
|
= help: confusing footnote reference and link
note: the lint level is defined here
--> $DIR/unportable-markdown.rs:8:9
|
LL | #![deny(rustdoc::unportable_markdown)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
help: if it should not be a footnote, escape it
|
LL | /// test \[^foo][^bar]
| +
help: if the footnote is intended, add a space
|
LL | /// test [^foo] [^bar]
| +
error: unportable markdown
--> $DIR/unportable-markdown.rs:49:5
|
LL | /// >bar
| ^
|
= help: confusing block quote with no space after the `>` marker
help: if the quote is intended, add a space
|
LL | /// > bar
| +
help: if it should not be a quote, escape it
|
LL | /// \>bar
| +
error: aborting due to 2 previous errors