Auto merge of #127127 - notriddle:notriddle/pulldown-cmark-0.11, r=GuillaumeGomez

rustdoc: update to pulldown-cmark 0.11

r? rustdoc

This pull request updates rustdoc to the latest version of pulldown-cmark. Along with adding new markdown extensions (which this PR doesn't enable), the new pulldown-cmark version also fixes a large number of bugs. Because all text files successfully parse as markdown, these bugfixes change the output, which can break people's existing docs.

A crater run, https://github.com/rust-lang/rust/pull/121659, has already been run for this change.

The first commit upgrades and fixes rustdoc. The second commit adds a lint for the footnote and block quote parser changes, which break the largest numbers of docs in the Crater run. The strikethrough change was mitigated in pulldown-cmark itself.

Unblocks https://github.com/rust-lang/rust-clippy/pull/12876
This commit is contained in:
bors 2024-07-04 01:50:31 +00:00
commit 66b4f0021b
16 changed files with 385 additions and 94 deletions

View File

@ -3141,7 +3141,19 @@ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"getopts", "getopts",
"memchr", "memchr",
"pulldown-cmark-escape", "pulldown-cmark-escape 0.10.1",
"unicase",
]
[[package]]
name = "pulldown-cmark"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
dependencies = [
"bitflags 2.5.0",
"memchr",
"pulldown-cmark-escape 0.11.0",
"unicase", "unicase",
] ]
@ -3151,6 +3163,12 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3" checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3"
[[package]]
name = "pulldown-cmark-escape"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
[[package]] [[package]]
name = "pulldown-cmark-to-cmark" name = "pulldown-cmark-to-cmark"
version = "13.0.0" version = "13.0.0"
@ -4604,7 +4622,7 @@ name = "rustc_resolve"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"bitflags 2.5.0", "bitflags 2.5.0",
"pulldown-cmark 0.9.6", "pulldown-cmark 0.11.0",
"rustc_arena", "rustc_arena",
"rustc_ast", "rustc_ast",
"rustc_ast_pretty", "rustc_ast_pretty",
@ -4883,6 +4901,7 @@ dependencies = [
"indexmap", "indexmap",
"itertools", "itertools",
"minifier", "minifier",
"pulldown-cmark 0.9.6",
"regex", "regex",
"rustdoc-json-types", "rustdoc-json-types",
"serde", "serde",

View File

@ -35,11 +35,11 @@
//! | | | | //! | | | |
//! | `ParallelIterator` | `Iterator` | `rayon::iter::ParallelIterator` | //! | `ParallelIterator` | `Iterator` | `rayon::iter::ParallelIterator` |
//! //!
//! [^1] `MTLock` is similar to `Lock`, but the serial version avoids the cost //! [^1]: `MTLock` is similar to `Lock`, but the serial version avoids the cost
//! of a `RefCell`. This is appropriate when interior mutability is not //! of a `RefCell`. This is appropriate when interior mutability is not
//! required. //! required.
//! //!
//! [^2] `MTRef`, `MTLockRef` are type aliases. //! [^2]: `MTRef`, `MTLockRef` are type aliases.
pub use crate::marker::*; pub use crate::marker::*;
use std::collections::HashMap; use std::collections::HashMap;

View File

@ -6,7 +6,7 @@ edition = "2021"
[dependencies] [dependencies]
# tidy-alphabetical-start # tidy-alphabetical-start
bitflags = "2.4.1" bitflags = "2.4.1"
pulldown-cmark = { version = "0.9.6", default-features = false } pulldown-cmark = { version = "0.11", features = ["html"], default-features = false }
rustc_arena = { path = "../rustc_arena" } rustc_arena = { path = "../rustc_arena" }
rustc_ast = { path = "../rustc_ast" } rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_ast_pretty = { path = "../rustc_ast_pretty" }

View File

@ -1,4 +1,6 @@
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag}; use pulldown_cmark::{
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
};
use rustc_ast as ast; use rustc_ast as ast;
use rustc_ast::util::comments::beautify_doc_string; use rustc_ast::util::comments::beautify_doc_string;
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
while let Some(event) = event_iter.next() { while let Some(event) = event_iter.next() {
match event { match event {
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
if may_be_doc_link(link_type) =>
{
if matches!( if matches!(
link_type, link_type,
LinkType::Inline LinkType::Inline
@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
} }
} }
links.push(preprocess_link(&dest)); links.push(preprocess_link(&dest_url));
} }
_ => {} _ => {}
} }
@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
} }
/// Collects additional data of link. /// Collects additional data of link.
fn collect_link_data<'input, 'callback>( fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
event_iter: &mut Parser<'input, 'callback>, event_iter: &mut Parser<'input, F>,
) -> Option<Box<str>> { ) -> Option<Box<str>> {
let mut display_text: Option<String> = None; let mut display_text: Option<String> = None;
let mut append_text = |text: CowStr<'_>| { let mut append_text = |text: CowStr<'_>| {

View File

@ -13,6 +13,7 @@ base64 = "0.21.7"
itertools = "0.12" itertools = "0.12"
indexmap = "2" indexmap = "2"
minifier = "0.3.0" minifier = "0.3.0"
pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false }
regex = "1" regex = "1"
rustdoc-json-types = { path = "../rustdoc-json-types" } rustdoc-json-types = { path = "../rustdoc-json-types" }
serde_json = "1.0" serde_json = "1.0"

View File

@ -54,7 +54,8 @@
use crate::html::toc::TocBuilder; use crate::html::toc::TocBuilder;
use pulldown_cmark::{ use pulldown_cmark::{
html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag, html, BrokenLink, BrokenLinkCallback, CodeBlockKind, CowStr, Event, LinkType, OffsetIter,
Options, Parser, Tag, TagEnd,
}; };
#[cfg(test)] #[cfg(test)]
@ -230,7 +231,7 @@ fn next(&mut self) -> Option<Self::Item> {
let mut original_text = String::new(); let mut original_text = String::new();
for event in &mut self.inner { for event in &mut self.inner {
match event { match event {
Event::End(Tag::CodeBlock(..)) => break, Event::End(TagEnd::CodeBlock) => break,
Event::Text(ref s) => { Event::Text(ref s) => {
original_text.push_str(s); original_text.push_str(s);
} }
@ -359,16 +360,17 @@ fn next(&mut self) -> Option<Self::Item> {
match &mut event { match &mut event {
// This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]` // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]`
// Remove any disambiguator. // Remove any disambiguator.
Some(Event::Start(Tag::Link( Some(Event::Start(Tag::Link {
// [fn@f] or [fn@f][] // [fn@f] or [fn@f][]
LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
dest, dest_url,
title, title,
))) => { ..
debug!("saw start of shortcut link to {dest} with title {title}"); })) => {
debug!("saw start of shortcut link to {dest_url} with title {title}");
// If this is a shortcut link, it was resolved by the broken_link_callback. // If this is a shortcut link, it was resolved by the broken_link_callback.
// So the URL will already be updated properly. // So the URL will already be updated properly.
let link = self.links.iter().find(|&link| *link.href == **dest); let link = self.links.iter().find(|&link| *link.href == **dest_url);
// Since this is an external iterator, we can't replace the inner text just yet. // Since this is an external iterator, we can't replace the inner text just yet.
// Store that we saw a link so we know to replace it later. // Store that we saw a link so we know to replace it later.
if let Some(link) = link { if let Some(link) = link {
@ -381,17 +383,10 @@ fn next(&mut self) -> Option<Self::Item> {
} }
} }
// Now that we're done with the shortcut link, don't replace any more text. // Now that we're done with the shortcut link, don't replace any more text.
Some(Event::End(Tag::Link( Some(Event::End(TagEnd::Link)) if self.shortcut_link.is_some() => {
LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, debug!("saw end of shortcut link");
dest,
_,
))) => {
debug!("saw end of shortcut link to {dest}");
if self.links.iter().any(|link| *link.href == **dest) {
assert!(self.shortcut_link.is_some(), "saw closing link without opening tag");
self.shortcut_link = None; self.shortcut_link = None;
} }
}
// Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link. // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link.
// [`fn@f`] // [`fn@f`]
Some(Event::Code(text)) => { Some(Event::Code(text)) => {
@ -433,9 +428,11 @@ fn next(&mut self) -> Option<Self::Item> {
} }
// If this is a link, but not a shortcut link, // If this is a link, but not a shortcut link,
// replace the URL, since the broken_link_callback was not called. // replace the URL, since the broken_link_callback was not called.
Some(Event::Start(Tag::Link(_, dest, title))) => { Some(Event::Start(Tag::Link { dest_url, title, .. })) => {
if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) { if let Some(link) =
*dest = CowStr::Borrowed(link.href.as_ref()); self.links.iter().find(|&link| *link.original_text == **dest_url)
{
*dest_url = CowStr::Borrowed(link.href.as_ref());
if title.is_empty() && !link.tooltip.is_empty() { if title.is_empty() && !link.tooltip.is_empty() {
*title = CowStr::Borrowed(link.tooltip.as_ref()); *title = CowStr::Borrowed(link.tooltip.as_ref());
} }
@ -477,9 +474,9 @@ fn next(&mut self) -> Option<Self::Item> {
self.stored_events.push_back(Event::Start(Tag::Table(t))); self.stored_events.push_back(Event::Start(Tag::Table(t)));
Event::Html(CowStr::Borrowed("<div>")) Event::Html(CowStr::Borrowed("<div>"))
} }
Event::End(Tag::Table(t)) => { Event::End(TagEnd::Table) => {
self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>"))); self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
Event::End(Tag::Table(t)) Event::End(TagEnd::Table)
} }
e => e, e => e,
}) })
@ -519,11 +516,11 @@ fn next(&mut self) -> Option<Self::Item> {
} }
let event = self.inner.next(); let event = self.inner.next();
if let Some((Event::Start(Tag::Heading(level, _, _)), _)) = event { if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
let mut id = String::new(); let mut id = String::new();
for event in &mut self.inner { for event in &mut self.inner {
match &event.0 { match &event.0 {
Event::End(Tag::Heading(..)) => break, Event::End(TagEnd::Heading(_)) => break,
Event::Text(text) | Event::Code(text) => { Event::Text(text) | Event::Code(text) => {
id.extend(text.chars().filter_map(slugify)); id.extend(text.chars().filter_map(slugify));
self.buf.push_back(event); self.buf.push_back(event);
@ -566,27 +563,27 @@ fn new(iter: I) -> Self {
} }
} }
fn check_if_allowed_tag(t: &Tag<'_>) -> bool { fn check_if_allowed_tag(t: &TagEnd) -> bool {
matches!( matches!(
t, t,
Tag::Paragraph TagEnd::Paragraph
| Tag::Emphasis | TagEnd::Emphasis
| Tag::Strong | TagEnd::Strong
| Tag::Strikethrough | TagEnd::Strikethrough
| Tag::Link(..) | TagEnd::Link
| Tag::BlockQuote | TagEnd::BlockQuote
) )
} }
fn is_forbidden_tag(t: &Tag<'_>) -> bool { fn is_forbidden_tag(t: &TagEnd) -> bool {
matches!( matches!(
t, t,
Tag::CodeBlock(_) TagEnd::CodeBlock
| Tag::Table(_) | TagEnd::Table
| Tag::TableHead | TagEnd::TableHead
| Tag::TableRow | TagEnd::TableRow
| Tag::TableCell | TagEnd::TableCell
| Tag::FootnoteDefinition(_) | TagEnd::FootnoteDefinition
) )
} }
@ -604,12 +601,12 @@ fn next(&mut self) -> Option<Self::Item> {
let mut is_start = true; let mut is_start = true;
let is_allowed_tag = match event { let is_allowed_tag = match event {
Event::Start(ref c) => { Event::Start(ref c) => {
if is_forbidden_tag(c) { if is_forbidden_tag(&c.to_end()) {
self.skipped_tags += 1; self.skipped_tags += 1;
return None; return None;
} }
self.depth += 1; self.depth += 1;
check_if_allowed_tag(c) check_if_allowed_tag(&c.to_end())
} }
Event::End(ref c) => { Event::End(ref c) => {
if is_forbidden_tag(c) { if is_forbidden_tag(c) {
@ -633,7 +630,7 @@ fn next(&mut self) -> Option<Self::Item> {
if is_start { if is_start {
Some(Event::Start(Tag::Paragraph)) Some(Event::Start(Tag::Paragraph))
} else { } else {
Some(Event::End(Tag::Paragraph)) Some(Event::End(TagEnd::Paragraph))
} }
} else { } else {
Some(event) Some(event)
@ -679,7 +676,7 @@ fn next(&mut self) -> Option<Self::Item> {
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => { Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
let mut content = Vec::new(); let mut content = Vec::new();
for (event, _) in &mut self.inner { for (event, _) in &mut self.inner {
if let Event::End(Tag::FootnoteDefinition(..)) = event { if let Event::End(TagEnd::FootnoteDefinition) = event {
break; break;
} }
content.push(event); content.push(event);
@ -696,7 +693,7 @@ fn next(&mut self) -> Option<Self::Item> {
for (mut content, id) in v { for (mut content, id) in v {
write!(ret, "<li id=\"fn{id}\">").unwrap(); write!(ret, "<li id=\"fn{id}\">").unwrap();
let mut is_paragraph = false; let mut is_paragraph = false;
if let Some(&Event::End(Tag::Paragraph)) = content.last() { if let Some(&Event::End(TagEnd::Paragraph)) = content.last() {
content.pop(); content.pop();
is_paragraph = true; is_paragraph = true;
} }
@ -806,7 +803,7 @@ pub(crate) fn find_codes<T: doctest::DoctestVisitor>(
tests.visit_test(text, block_info, line); tests.visit_test(text, block_info, line);
prev_offset = offset.start; prev_offset = offset.start;
} }
Event::Start(Tag::Heading(level, _, _)) => { Event::Start(Tag::Heading { level, .. }) => {
register_header = Some(level as u32); register_header = Some(level as u32);
} }
Event::Text(ref s) if register_header.is_some() => { Event::Text(ref s) if register_header.is_some() => {
@ -1432,7 +1429,7 @@ pub(crate) fn into_string(self) -> String {
// Treat inline HTML as plain text. // Treat inline HTML as plain text.
let p = p.map(|event| match event.0 { let p = p.map(|event| match event.0 {
Event::Html(text) => (Event::Text(text), event.1), Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
_ => event, _ => event,
}); });
@ -1442,7 +1439,7 @@ pub(crate) fn into_string(self) -> String {
let p = Footnotes::new(p); let p = Footnotes::new(p);
let p = TableWrapper::new(p.map(|(ev, _)| ev)); let p = TableWrapper::new(p.map(|(ev, _)| ev));
let p = p.filter(|event| { let p = p.filter(|event| {
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
}); });
html::push_html(&mut s, p); html::push_html(&mut s, p);
@ -1472,7 +1469,7 @@ pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
let mut s = String::new(); let mut s = String::new();
let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| { let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
}); });
html::push_html(&mut s, without_paragraphs); html::push_html(&mut s, without_paragraphs);
@ -1544,8 +1541,8 @@ fn markdown_summary_with_limit(
_ => {} _ => {}
}, },
Event::End(tag) => match tag { Event::End(tag) => match tag {
Tag::Emphasis | Tag::Strong => buf.close_tag(), TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
Tag::Paragraph | Tag::Heading(..) => return ControlFlow::Break(()), TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
_ => {} _ => {}
}, },
Event::HardBreak | Event::SoftBreak => buf.push(" ")?, Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
@ -1605,8 +1602,8 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin
} }
Event::HardBreak | Event::SoftBreak => s.push(' '), Event::HardBreak | Event::SoftBreak => s.push(' '),
Event::Start(Tag::CodeBlock(..)) => break, Event::Start(Tag::CodeBlock(..)) => break,
Event::End(Tag::Paragraph) => break, Event::End(TagEnd::Paragraph) => break,
Event::End(Tag::Heading(..)) => break, Event::End(TagEnd::Heading(..)) => break,
_ => (), _ => (),
} }
} }
@ -1765,7 +1762,7 @@ pub(crate) fn markdown_links<'md, R>(
while let Some((event, span)) = event_iter.next() { while let Some((event, span)) = event_iter.next() {
match event { match event {
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => {
let range = match link_type { let range = match link_type {
// Link is pulled from the link itself. // Link is pulled from the link itself.
LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
@ -1775,7 +1772,7 @@ pub(crate) fn markdown_links<'md, R>(
LinkType::Inline => span_for_offset_backward(span, b'(', b')'), LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
// Link is pulled from elsewhere in the document. // Link is pulled from elsewhere in the document.
LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
span_for_link(&dest, span) span_for_link(&dest_url, span)
} }
LinkType::Autolink | LinkType::Email => unreachable!(), LinkType::Autolink | LinkType::Email => unreachable!(),
}; };
@ -1795,7 +1792,7 @@ pub(crate) fn markdown_links<'md, R>(
if let Some(link) = preprocess_link(MarkdownLink { if let Some(link) = preprocess_link(MarkdownLink {
kind: link_type, kind: link_type,
link: dest.into_string(), link: dest_url.into_string(),
display_text, display_text,
range, range,
}) { }) {
@ -1810,8 +1807,8 @@ pub(crate) fn markdown_links<'md, R>(
} }
/// Collects additional data of link. /// Collects additional data of link.
fn collect_link_data<'input, 'callback>( fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
event_iter: &mut OffsetIter<'input, 'callback>, event_iter: &mut OffsetIter<'input, F>,
) -> Option<String> { ) -> Option<String> {
let mut display_text: Option<String> = None; let mut display_text: Option<String> = None;
let mut append_text = |text: CowStr<'_>| { let mut append_text = |text: CowStr<'_>| {

View File

@ -196,6 +196,14 @@ macro_rules! declare_rustdoc_lint {
"detects redundant explicit links in doc comments" "detects redundant explicit links in doc comments"
} }
declare_rustdoc_lint! {
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
/// the new one.
UNPORTABLE_MARKDOWN,
Warn,
"detects markdown that is interpreted differently in different parser"
}
pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| { pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
vec![ vec![
BROKEN_INTRA_DOC_LINKS, BROKEN_INTRA_DOC_LINKS,
@ -209,6 +217,7 @@ macro_rules! declare_rustdoc_lint {
MISSING_CRATE_LEVEL_DOCS, MISSING_CRATE_LEVEL_DOCS,
UNESCAPED_BACKTICKS, UNESCAPED_BACKTICKS,
REDUNDANT_EXPLICIT_LINKS, REDUNDANT_EXPLICIT_LINKS,
UNPORTABLE_MARKDOWN,
] ]
}); });

View File

@ -6,6 +6,7 @@
mod html_tags; mod html_tags;
mod redundant_explicit_links; mod redundant_explicit_links;
mod unescaped_backticks; mod unescaped_backticks;
mod unportable_markdown;
use super::Pass; use super::Pass;
use crate::clean::*; use crate::clean::*;
@ -31,6 +32,7 @@ fn visit_item(&mut self, item: &Item) {
html_tags::visit_item(self.cx, item); html_tags::visit_item(self.cx, item);
unescaped_backticks::visit_item(self.cx, item); unescaped_backticks::visit_item(self.cx, item);
redundant_explicit_links::visit_item(self.cx, item); redundant_explicit_links::visit_item(self.cx, item);
unportable_markdown::visit_item(self.cx, item);
self.visit_item_recur(item) self.visit_item_recur(item)
} }

View File

@ -42,11 +42,11 @@ pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) {
match event { match event {
Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag), Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag),
// We don't want to check the text inside code blocks or links. // We don't want to check the text inside code blocks or links.
Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => {
while let Some((event, _)) = p.next() { while let Some((event, _)) = p.next() {
match event { match event {
Event::End(end) Event::End(end)
if mem::discriminant(&end) == mem::discriminant(&tag) => if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) =>
{ {
break; break;
} }

View File

@ -4,7 +4,7 @@
use crate::core::DocContext; use crate::core::DocContext;
use crate::html::markdown::main_body_opts; use crate::html::markdown::main_body_opts;
use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd};
use rustc_resolve::rustdoc::source_span_for_markdown_range; use rustc_resolve::rustdoc::source_span_for_markdown_range;
use std::iter::Peekable; use std::iter::Peekable;
@ -140,10 +140,10 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
for (event, range) in p { for (event, range) in p {
match event { match event {
Event::Start(Tag::CodeBlock(_)) => in_code_block = true, Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
Event::Html(text) if !in_code_block => { Event::Html(text) | Event::InlineHtml(text) if !in_code_block => {
extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
} }
Event::End(Tag::CodeBlock(_)) => in_code_block = false, Event::End(TagEnd::CodeBlock) => in_code_block = false,
_ => {} _ => {}
} }
} }

View File

@ -1,6 +1,8 @@
use std::ops::Range; use std::ops::Range;
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, OffsetIter, Parser, Tag}; use pulldown_cmark::{
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, OffsetIter, Parser, Tag,
};
use rustc_ast::NodeId; use rustc_ast::NodeId;
use rustc_errors::SuggestionStyle; use rustc_errors::SuggestionStyle;
use rustc_hir::def::{DefKind, DocLinkResMap, Namespace, Res}; use rustc_hir::def::{DefKind, DocLinkResMap, Namespace, Res};
@ -95,7 +97,7 @@ fn check_redundant_explicit_link<'md>(
while let Some((event, link_range)) = offset_iter.next() { while let Some((event, link_range)) = offset_iter.next() {
match event { match event {
Event::Start(Tag::Link(link_type, dest, _)) => { Event::Start(Tag::Link { link_type, dest_url, .. }) => {
let link_data = collect_link_data(&mut offset_iter); let link_data = collect_link_data(&mut offset_iter);
if let Some(resolvable_link) = link_data.resolvable_link.as_ref() { if let Some(resolvable_link) = link_data.resolvable_link.as_ref() {
@ -108,7 +110,7 @@ fn check_redundant_explicit_link<'md>(
} }
} }
let explicit_link = dest.to_string(); let explicit_link = dest_url.to_string();
let display_link = link_data.resolvable_link.clone()?; let display_link = link_data.resolvable_link.clone()?;
if explicit_link.ends_with(&display_link) || display_link.ends_with(&explicit_link) if explicit_link.ends_with(&display_link) || display_link.ends_with(&explicit_link)
@ -122,7 +124,7 @@ fn check_redundant_explicit_link<'md>(
doc, doc,
resolutions, resolutions,
link_range, link_range,
dest.to_string(), dest_url.to_string(),
link_data, link_data,
if link_type == LinkType::Inline { if link_type == LinkType::Inline {
(b'(', b')') (b'(', b')')
@ -139,7 +141,7 @@ fn check_redundant_explicit_link<'md>(
doc, doc,
resolutions, resolutions,
link_range, link_range,
&dest, &dest_url,
link_data, link_data,
); );
} }
@ -259,7 +261,9 @@ fn find_resolution(resolutions: &DocLinkResMap, path: &str) -> Option<Res<NodeId
} }
/// Collects all necessary data of link. /// Collects all necessary data of link.
fn collect_link_data(offset_iter: &mut OffsetIter<'_, '_>) -> LinkData { fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
offset_iter: &mut OffsetIter<'input, F>,
) -> LinkData {
let mut resolvable_link = None; let mut resolvable_link = None;
let mut resolvable_link_range = None; let mut resolvable_link_range = None;
let mut display_link = String::new(); let mut display_link = String::new();

View File

@ -0,0 +1,152 @@
//! Detects specific markdown syntax that's different between pulldown-cmark
//! 0.9 and 0.11.
//!
//! This is a mitigation for old parser bugs that affected some
//! real crates' docs. The old parser claimed to comply with CommonMark,
//! but it did not. These warnings will eventually be removed,
//! though some of them may become Clippy lints.
//!
//! <https://github.com/rust-lang/rust/pull/121659#issuecomment-1992752820>
//!
//! <https://rustc-dev-guide.rust-lang.org/bug-fix-procedure.html#add-the-lint-to-the-list-of-removed-lists>
use crate::clean::Item;
use crate::core::DocContext;
use pulldown_cmark as cmarkn;
use pulldown_cmark_old as cmarko;
use rustc_lint_defs::Applicability;
use rustc_resolve::rustdoc::source_span_for_markdown_range;
use std::collections::{BTreeMap, BTreeSet};
pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
let tcx = cx.tcx;
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
// If non-local, no need to check anything.
return;
};
let dox = item.doc_value();
if dox.is_empty() {
return;
}
// P1: unintended strikethrough was fixed by requiring single-tildes to flank
// the same way underscores do, so nothing is done here
// P2: block quotes without following space parsed wrong
//
// This is the set of starting points for block quotes with no space after
// the `>`. It is populated by the new parser, and if the old parser fails to
// clear it out, it'll produce a warning.
let mut spaceless_block_quotes = BTreeSet::new();
// P3: missing footnote references
//
// This is populated by listening for FootnoteReference from
// the new parser and old parser.
let mut missing_footnote_references = BTreeMap::new();
let mut found_footnote_references = BTreeSet::new();
// populate problem cases from new parser
{
pub fn main_body_opts_new() -> cmarkn::Options {
cmarkn::Options::ENABLE_TABLES
| cmarkn::Options::ENABLE_FOOTNOTES
| cmarkn::Options::ENABLE_STRIKETHROUGH
| cmarkn::Options::ENABLE_TASKLISTS
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
}
let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter();
while let Some((event, span)) = parser_new.next() {
if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event {
if !dox[span.clone()].starts_with("> ") {
spaceless_block_quotes.insert(span.start);
}
}
if let cmarkn::Event::FootnoteReference(_) = event {
found_footnote_references.insert(span.start + 1);
}
}
}
// remove cases where they don't actually differ
{
pub fn main_body_opts_old() -> cmarko::Options {
cmarko::Options::ENABLE_TABLES
| cmarko::Options::ENABLE_FOOTNOTES
| cmarko::Options::ENABLE_STRIKETHROUGH
| cmarko::Options::ENABLE_TASKLISTS
| cmarko::Options::ENABLE_SMART_PUNCTUATION
}
let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter();
while let Some((event, span)) = parser_old.next() {
if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event {
if !dox[span.clone()].starts_with("> ") {
spaceless_block_quotes.remove(&span.start);
}
}
if let cmarko::Event::FootnoteReference(_) = event {
if !found_footnote_references.contains(&(span.start + 1)) {
missing_footnote_references.insert(span.start + 1, span);
}
}
}
}
for start in spaceless_block_quotes {
let (span, precise) =
source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings)
.map(|span| (span, true))
.unwrap_or_else(|| (item.attr_span(tcx), false));
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| {
lint.primary_message("unportable markdown");
lint.help(format!("confusing block quote with no space after the `>` marker"));
if precise {
lint.span_suggestion(
span.shrink_to_hi(),
"if the quote is intended, add a space",
" ",
Applicability::MaybeIncorrect,
);
lint.span_suggestion(
span.shrink_to_lo(),
"if it should not be a quote, escape it",
"\\",
Applicability::MaybeIncorrect,
);
}
});
}
for (_caret, span) in missing_footnote_references {
let (ref_span, precise) =
source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings)
.map(|span| (span, true))
.unwrap_or_else(|| (item.attr_span(tcx), false));
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| {
lint.primary_message("unportable markdown");
if precise {
lint.span_suggestion(
ref_span.shrink_to_lo(),
"if it should not be a footnote, escape it",
"\\",
Applicability::MaybeIncorrect,
);
}
if dox.as_bytes().get(span.end) == Some(&b'[') {
lint.help("confusing footnote reference and link");
if precise {
lint.span_suggestion(
ref_span.shrink_to_hi(),
"if the footnote is intended, add a space",
" ",
Applicability::MaybeIncorrect,
);
} else {
lint.help("there should be a space between the link and the footnote");
}
}
});
}
}

View File

@ -6,10 +6,10 @@
use clippy_utils::visitors::Visitable; use clippy_utils::visitors::Visitable;
use clippy_utils::{in_constant, is_entrypoint_fn, is_trait_impl_item, method_chain_args}; use clippy_utils::{in_constant, is_entrypoint_fn, is_trait_impl_item, method_chain_args};
use pulldown_cmark::Event::{ use pulldown_cmark::Event::{
Code, End, FootnoteReference, HardBreak, Html, Rule, SoftBreak, Start, TaskListMarker, Text, Code, DisplayMath, End, FootnoteReference, HardBreak, Html, InlineHtml, InlineMath, Rule, SoftBreak, Start, TaskListMarker, Text,
}; };
use pulldown_cmark::Tag::{BlockQuote, CodeBlock, FootnoteDefinition, Heading, Item, Link, Paragraph}; use pulldown_cmark::Tag::{BlockQuote, CodeBlock, FootnoteDefinition, Heading, Item, Link, Paragraph};
use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options}; use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options, TagEnd};
use rustc_ast::ast::Attribute; use rustc_ast::ast::Attribute;
use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::fx::FxHashSet;
use rustc_hir::intravisit::{self, Visitor}; use rustc_hir::intravisit::{self, Visitor};
@ -659,7 +659,7 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
while let Some((event, range)) = events.next() { while let Some((event, range)) = events.next() {
match event { match event {
Html(tag) => { Html(tag) | InlineHtml(tag) => {
if tag.starts_with("<code") { if tag.starts_with("<code") {
code_level += 1; code_level += 1;
} else if tag.starts_with("</code") { } else if tag.starts_with("</code") {
@ -670,11 +670,11 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
blockquote_level -= 1; blockquote_level -= 1;
} }
}, },
Start(BlockQuote) => { Start(BlockQuote(_)) => {
blockquote_level += 1; blockquote_level += 1;
containers.push(Container::Blockquote); containers.push(Container::Blockquote);
}, },
End(BlockQuote) => { End(TagEnd::BlockQuote) => {
blockquote_level -= 1; blockquote_level -= 1;
containers.pop(); containers.pop();
}, },
@ -699,15 +699,15 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
} }
} }
}, },
End(CodeBlock(_)) => { End(TagEnd::CodeBlock) => {
in_code = false; in_code = false;
is_rust = false; is_rust = false;
ignore = false; ignore = false;
}, },
Start(Link(_, url, _)) => in_link = Some(url), Start(Link { dest_url, .. }) => in_link = Some(dest_url),
End(Link(..)) => in_link = None, End(TagEnd::Link) => in_link = None,
Start(Heading(_, _, _) | Paragraph | Item) => { Start(Heading { .. } | Paragraph | Item) => {
if let Start(Heading(_, _, _)) = event { if let Start(Heading { .. }) = event {
in_heading = true; in_heading = true;
} }
if let Start(Item) = event { if let Start(Item) = event {
@ -720,11 +720,11 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
ticks_unbalanced = false; ticks_unbalanced = false;
paragraph_range = range; paragraph_range = range;
}, },
End(Heading(_, _, _) | Paragraph | Item) => { End(TagEnd::Heading(_) | TagEnd::Paragraph | TagEnd::Item) => {
if let End(Heading(_, _, _)) = event { if let End(TagEnd::Heading(_)) = event {
in_heading = false; in_heading = false;
} }
if let End(Item) = event { if let End(TagEnd::Item) = event {
containers.pop(); containers.pop();
} }
if ticks_unbalanced && let Some(span) = fragments.span(cx, paragraph_range.clone()) { if ticks_unbalanced && let Some(span) = fragments.span(cx, paragraph_range.clone()) {
@ -746,8 +746,8 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
text_to_check = Vec::new(); text_to_check = Vec::new();
}, },
Start(FootnoteDefinition(..)) => in_footnote_definition = true, Start(FootnoteDefinition(..)) => in_footnote_definition = true,
End(FootnoteDefinition(..)) => in_footnote_definition = false, End(TagEnd::FootnoteDefinition) => in_footnote_definition = false,
Start(_tag) | End(_tag) => (), // We don't care about other tags Start(_) | End(_) => (), // We don't care about other tags
SoftBreak | HardBreak => { SoftBreak | HardBreak => {
if !containers.is_empty() if !containers.is_empty()
&& let Some((next_event, next_range)) = events.peek() && let Some((next_event, next_range)) = events.peek()
@ -765,7 +765,7 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
); );
} }
}, },
TaskListMarker(_) | Code(_) | Rule => (), TaskListMarker(_) | Code(_) | Rule | InlineMath(..) | DisplayMath(..) => (),
FootnoteReference(text) | Text(text) => { FootnoteReference(text) | Text(text) => {
paragraph_range.end = range.end; paragraph_range.end = range.end;
ticks_unbalanced |= text.contains('`') && !in_code; ticks_unbalanced |= text.contains('`') && !in_code;

View File

@ -335,6 +335,7 @@
"proc-macro2", "proc-macro2",
"psm", "psm",
"pulldown-cmark", "pulldown-cmark",
"pulldown-cmark-escape",
"punycode", "punycode",
"quote", "quote",
"r-efi", "r-efi",

View File

@ -0,0 +1,63 @@
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
//
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes.
//
// This version of the lint is targeted at two especially-common cases where docs got broken.
// Other differences in parsing should not warn.
#![allow(rustdoc::broken_intra_doc_links)]
#![deny(rustdoc::unportable_markdown)]
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
///
/// Test footnote [^foot].
///
/// [^foot]: This is nested within the footnote now, but didn't used to be.
///
/// This is a multi-paragraph footnote.
pub struct GfmFootnotes;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/773>
///
/// test [^foo][^bar]
//~^ ERROR unportable markdown
///
/// [^foo]: test
/// [^bar]: test2
pub struct FootnoteSmashedName;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
///
/// - _t
/// # test
/// t_
pub struct NestingCornerCase;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
///
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
pub struct Emphasis1;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
///
/// |
/// |
pub struct NotEnoughTable;
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/675>
///
/// foo
/// >bar
//~^ ERROR unportable markdown
pub struct BlockQuoteNoSpace;
/// Negative test.
///
/// foo
/// > bar
pub struct BlockQuoteSpace;
/// Negative test.
///
/// >bar
/// baz
pub struct BlockQuoteNoSpaceStart;

View File

@ -0,0 +1,39 @@
error: unportable markdown
--> $DIR/unportable-markdown.rs:21:10
|
LL | /// test [^foo][^bar]
| ^^^^^^
|
= help: confusing footnote reference and link
note: the lint level is defined here
--> $DIR/unportable-markdown.rs:8:9
|
LL | #![deny(rustdoc::unportable_markdown)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
help: if it should not be a footnote, escape it
|
LL | /// test \[^foo][^bar]
| +
help: if the footnote is intended, add a space
|
LL | /// test [^foo] [^bar]
| +
error: unportable markdown
--> $DIR/unportable-markdown.rs:49:5
|
LL | /// >bar
| ^
|
= help: confusing block quote with no space after the `>` marker
help: if the quote is intended, add a space
|
LL | /// > bar
| +
help: if it should not be a quote, escape it
|
LL | /// \>bar
| +
error: aborting due to 2 previous errors