Auto merge of #127127 - notriddle:notriddle/pulldown-cmark-0.11, r=GuillaumeGomez
rustdoc: update to pulldown-cmark 0.11 r? rustdoc This pull request updates rustdoc to the latest version of pulldown-cmark. Along with adding new markdown extensions (which this PR doesn't enable), the new pulldown-cmark version also fixes a large number of bugs. Because all text files successfully parse as markdown, these bugfixes change the output, which can break people's existing docs. A crater run, https://github.com/rust-lang/rust/pull/121659, has already been run for this change. The first commit upgrades and fixes rustdoc. The second commit adds a lint for the footnote and block quote parser changes, which break the largest numbers of docs in the Crater run. The strikethrough change was mitigated in pulldown-cmark itself. Unblocks https://github.com/rust-lang/rust-clippy/pull/12876
This commit is contained in:
commit
66b4f0021b
23
Cargo.lock
23
Cargo.lock
@ -3141,7 +3141,19 @@ dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"getopts",
|
||||
"memchr",
|
||||
"pulldown-cmark-escape",
|
||||
"pulldown-cmark-escape 0.10.1",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"memchr",
|
||||
"pulldown-cmark-escape 0.11.0",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
@ -3151,6 +3163,12 @@ version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3"
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark-escape"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark-to-cmark"
|
||||
version = "13.0.0"
|
||||
@ -4604,7 +4622,7 @@ name = "rustc_resolve"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"pulldown-cmark 0.9.6",
|
||||
"pulldown-cmark 0.11.0",
|
||||
"rustc_arena",
|
||||
"rustc_ast",
|
||||
"rustc_ast_pretty",
|
||||
@ -4883,6 +4901,7 @@ dependencies = [
|
||||
"indexmap",
|
||||
"itertools",
|
||||
"minifier",
|
||||
"pulldown-cmark 0.9.6",
|
||||
"regex",
|
||||
"rustdoc-json-types",
|
||||
"serde",
|
||||
|
@ -35,11 +35,11 @@
|
||||
//! | | | |
|
||||
//! | `ParallelIterator` | `Iterator` | `rayon::iter::ParallelIterator` |
|
||||
//!
|
||||
//! [^1] `MTLock` is similar to `Lock`, but the serial version avoids the cost
|
||||
//! [^1]: `MTLock` is similar to `Lock`, but the serial version avoids the cost
|
||||
//! of a `RefCell`. This is appropriate when interior mutability is not
|
||||
//! required.
|
||||
//!
|
||||
//! [^2] `MTRef`, `MTLockRef` are type aliases.
|
||||
//! [^2]: `MTRef`, `MTLockRef` are type aliases.
|
||||
|
||||
pub use crate::marker::*;
|
||||
use std::collections::HashMap;
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
[dependencies]
|
||||
# tidy-alphabetical-start
|
||||
bitflags = "2.4.1"
|
||||
pulldown-cmark = { version = "0.9.6", default-features = false }
|
||||
pulldown-cmark = { version = "0.11", features = ["html"], default-features = false }
|
||||
rustc_arena = { path = "../rustc_arena" }
|
||||
rustc_ast = { path = "../rustc_ast" }
|
||||
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
|
||||
|
@ -1,4 +1,6 @@
|
||||
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
|
||||
use pulldown_cmark::{
|
||||
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
|
||||
};
|
||||
use rustc_ast as ast;
|
||||
use rustc_ast::util::comments::beautify_doc_string;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
|
||||
|
||||
while let Some(event) = event_iter.next() {
|
||||
match event {
|
||||
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
|
||||
Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
|
||||
if may_be_doc_link(link_type) =>
|
||||
{
|
||||
if matches!(
|
||||
link_type,
|
||||
LinkType::Inline
|
||||
@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
|
||||
}
|
||||
}
|
||||
|
||||
links.push(preprocess_link(&dest));
|
||||
links.push(preprocess_link(&dest_url));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
|
||||
}
|
||||
|
||||
/// Collects additional data of link.
|
||||
fn collect_link_data<'input, 'callback>(
|
||||
event_iter: &mut Parser<'input, 'callback>,
|
||||
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
|
||||
event_iter: &mut Parser<'input, F>,
|
||||
) -> Option<Box<str>> {
|
||||
let mut display_text: Option<String> = None;
|
||||
let mut append_text = |text: CowStr<'_>| {
|
||||
|
@ -13,6 +13,7 @@ base64 = "0.21.7"
|
||||
itertools = "0.12"
|
||||
indexmap = "2"
|
||||
minifier = "0.3.0"
|
||||
pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false }
|
||||
regex = "1"
|
||||
rustdoc-json-types = { path = "../rustdoc-json-types" }
|
||||
serde_json = "1.0"
|
||||
|
@ -54,7 +54,8 @@
|
||||
use crate::html::toc::TocBuilder;
|
||||
|
||||
use pulldown_cmark::{
|
||||
html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag,
|
||||
html, BrokenLink, BrokenLinkCallback, CodeBlockKind, CowStr, Event, LinkType, OffsetIter,
|
||||
Options, Parser, Tag, TagEnd,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
@ -230,7 +231,7 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut original_text = String::new();
|
||||
for event in &mut self.inner {
|
||||
match event {
|
||||
Event::End(Tag::CodeBlock(..)) => break,
|
||||
Event::End(TagEnd::CodeBlock) => break,
|
||||
Event::Text(ref s) => {
|
||||
original_text.push_str(s);
|
||||
}
|
||||
@ -359,16 +360,17 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
match &mut event {
|
||||
// This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]`
|
||||
// Remove any disambiguator.
|
||||
Some(Event::Start(Tag::Link(
|
||||
Some(Event::Start(Tag::Link {
|
||||
// [fn@f] or [fn@f][]
|
||||
LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
|
||||
dest,
|
||||
link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
|
||||
dest_url,
|
||||
title,
|
||||
))) => {
|
||||
debug!("saw start of shortcut link to {dest} with title {title}");
|
||||
..
|
||||
})) => {
|
||||
debug!("saw start of shortcut link to {dest_url} with title {title}");
|
||||
// If this is a shortcut link, it was resolved by the broken_link_callback.
|
||||
// So the URL will already be updated properly.
|
||||
let link = self.links.iter().find(|&link| *link.href == **dest);
|
||||
let link = self.links.iter().find(|&link| *link.href == **dest_url);
|
||||
// Since this is an external iterator, we can't replace the inner text just yet.
|
||||
// Store that we saw a link so we know to replace it later.
|
||||
if let Some(link) = link {
|
||||
@ -381,16 +383,9 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
}
|
||||
}
|
||||
// Now that we're done with the shortcut link, don't replace any more text.
|
||||
Some(Event::End(Tag::Link(
|
||||
LinkType::ShortcutUnknown | LinkType::CollapsedUnknown,
|
||||
dest,
|
||||
_,
|
||||
))) => {
|
||||
debug!("saw end of shortcut link to {dest}");
|
||||
if self.links.iter().any(|link| *link.href == **dest) {
|
||||
assert!(self.shortcut_link.is_some(), "saw closing link without opening tag");
|
||||
self.shortcut_link = None;
|
||||
}
|
||||
Some(Event::End(TagEnd::Link)) if self.shortcut_link.is_some() => {
|
||||
debug!("saw end of shortcut link");
|
||||
self.shortcut_link = None;
|
||||
}
|
||||
// Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link.
|
||||
// [`fn@f`]
|
||||
@ -433,9 +428,11 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
}
|
||||
// If this is a link, but not a shortcut link,
|
||||
// replace the URL, since the broken_link_callback was not called.
|
||||
Some(Event::Start(Tag::Link(_, dest, title))) => {
|
||||
if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) {
|
||||
*dest = CowStr::Borrowed(link.href.as_ref());
|
||||
Some(Event::Start(Tag::Link { dest_url, title, .. })) => {
|
||||
if let Some(link) =
|
||||
self.links.iter().find(|&link| *link.original_text == **dest_url)
|
||||
{
|
||||
*dest_url = CowStr::Borrowed(link.href.as_ref());
|
||||
if title.is_empty() && !link.tooltip.is_empty() {
|
||||
*title = CowStr::Borrowed(link.tooltip.as_ref());
|
||||
}
|
||||
@ -477,9 +474,9 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
self.stored_events.push_back(Event::Start(Tag::Table(t)));
|
||||
Event::Html(CowStr::Borrowed("<div>"))
|
||||
}
|
||||
Event::End(Tag::Table(t)) => {
|
||||
Event::End(TagEnd::Table) => {
|
||||
self.stored_events.push_back(Event::Html(CowStr::Borrowed("</div>")));
|
||||
Event::End(Tag::Table(t))
|
||||
Event::End(TagEnd::Table)
|
||||
}
|
||||
e => e,
|
||||
})
|
||||
@ -519,11 +516,11 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
}
|
||||
|
||||
let event = self.inner.next();
|
||||
if let Some((Event::Start(Tag::Heading(level, _, _)), _)) = event {
|
||||
if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event {
|
||||
let mut id = String::new();
|
||||
for event in &mut self.inner {
|
||||
match &event.0 {
|
||||
Event::End(Tag::Heading(..)) => break,
|
||||
Event::End(TagEnd::Heading(_)) => break,
|
||||
Event::Text(text) | Event::Code(text) => {
|
||||
id.extend(text.chars().filter_map(slugify));
|
||||
self.buf.push_back(event);
|
||||
@ -566,27 +563,27 @@ fn new(iter: I) -> Self {
|
||||
}
|
||||
}
|
||||
|
||||
fn check_if_allowed_tag(t: &Tag<'_>) -> bool {
|
||||
fn check_if_allowed_tag(t: &TagEnd) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
Tag::Paragraph
|
||||
| Tag::Emphasis
|
||||
| Tag::Strong
|
||||
| Tag::Strikethrough
|
||||
| Tag::Link(..)
|
||||
| Tag::BlockQuote
|
||||
TagEnd::Paragraph
|
||||
| TagEnd::Emphasis
|
||||
| TagEnd::Strong
|
||||
| TagEnd::Strikethrough
|
||||
| TagEnd::Link
|
||||
| TagEnd::BlockQuote
|
||||
)
|
||||
}
|
||||
|
||||
fn is_forbidden_tag(t: &Tag<'_>) -> bool {
|
||||
fn is_forbidden_tag(t: &TagEnd) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
Tag::CodeBlock(_)
|
||||
| Tag::Table(_)
|
||||
| Tag::TableHead
|
||||
| Tag::TableRow
|
||||
| Tag::TableCell
|
||||
| Tag::FootnoteDefinition(_)
|
||||
TagEnd::CodeBlock
|
||||
| TagEnd::Table
|
||||
| TagEnd::TableHead
|
||||
| TagEnd::TableRow
|
||||
| TagEnd::TableCell
|
||||
| TagEnd::FootnoteDefinition
|
||||
)
|
||||
}
|
||||
|
||||
@ -604,12 +601,12 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut is_start = true;
|
||||
let is_allowed_tag = match event {
|
||||
Event::Start(ref c) => {
|
||||
if is_forbidden_tag(c) {
|
||||
if is_forbidden_tag(&c.to_end()) {
|
||||
self.skipped_tags += 1;
|
||||
return None;
|
||||
}
|
||||
self.depth += 1;
|
||||
check_if_allowed_tag(c)
|
||||
check_if_allowed_tag(&c.to_end())
|
||||
}
|
||||
Event::End(ref c) => {
|
||||
if is_forbidden_tag(c) {
|
||||
@ -633,7 +630,7 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
if is_start {
|
||||
Some(Event::Start(Tag::Paragraph))
|
||||
} else {
|
||||
Some(Event::End(Tag::Paragraph))
|
||||
Some(Event::End(TagEnd::Paragraph))
|
||||
}
|
||||
} else {
|
||||
Some(event)
|
||||
@ -679,7 +676,7 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
|
||||
let mut content = Vec::new();
|
||||
for (event, _) in &mut self.inner {
|
||||
if let Event::End(Tag::FootnoteDefinition(..)) = event {
|
||||
if let Event::End(TagEnd::FootnoteDefinition) = event {
|
||||
break;
|
||||
}
|
||||
content.push(event);
|
||||
@ -696,7 +693,7 @@ fn next(&mut self) -> Option<Self::Item> {
|
||||
for (mut content, id) in v {
|
||||
write!(ret, "<li id=\"fn{id}\">").unwrap();
|
||||
let mut is_paragraph = false;
|
||||
if let Some(&Event::End(Tag::Paragraph)) = content.last() {
|
||||
if let Some(&Event::End(TagEnd::Paragraph)) = content.last() {
|
||||
content.pop();
|
||||
is_paragraph = true;
|
||||
}
|
||||
@ -806,7 +803,7 @@ pub(crate) fn find_codes<T: doctest::DoctestVisitor>(
|
||||
tests.visit_test(text, block_info, line);
|
||||
prev_offset = offset.start;
|
||||
}
|
||||
Event::Start(Tag::Heading(level, _, _)) => {
|
||||
Event::Start(Tag::Heading { level, .. }) => {
|
||||
register_header = Some(level as u32);
|
||||
}
|
||||
Event::Text(ref s) if register_header.is_some() => {
|
||||
@ -1432,7 +1429,7 @@ pub(crate) fn into_string(self) -> String {
|
||||
|
||||
// Treat inline HTML as plain text.
|
||||
let p = p.map(|event| match event.0 {
|
||||
Event::Html(text) => (Event::Text(text), event.1),
|
||||
Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1),
|
||||
_ => event,
|
||||
});
|
||||
|
||||
@ -1442,7 +1439,7 @@ pub(crate) fn into_string(self) -> String {
|
||||
let p = Footnotes::new(p);
|
||||
let p = TableWrapper::new(p.map(|(ev, _)| ev));
|
||||
let p = p.filter(|event| {
|
||||
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph))
|
||||
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
|
||||
});
|
||||
html::push_html(&mut s, p);
|
||||
|
||||
@ -1472,7 +1469,7 @@ pub(crate) fn into_string_with_has_more_content(self) -> (String, bool) {
|
||||
let mut s = String::new();
|
||||
|
||||
let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| {
|
||||
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph))
|
||||
!matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph))
|
||||
});
|
||||
|
||||
html::push_html(&mut s, without_paragraphs);
|
||||
@ -1544,8 +1541,8 @@ fn markdown_summary_with_limit(
|
||||
_ => {}
|
||||
},
|
||||
Event::End(tag) => match tag {
|
||||
Tag::Emphasis | Tag::Strong => buf.close_tag(),
|
||||
Tag::Paragraph | Tag::Heading(..) => return ControlFlow::Break(()),
|
||||
TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(),
|
||||
TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()),
|
||||
_ => {}
|
||||
},
|
||||
Event::HardBreak | Event::SoftBreak => buf.push(" ")?,
|
||||
@ -1605,8 +1602,8 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin
|
||||
}
|
||||
Event::HardBreak | Event::SoftBreak => s.push(' '),
|
||||
Event::Start(Tag::CodeBlock(..)) => break,
|
||||
Event::End(Tag::Paragraph) => break,
|
||||
Event::End(Tag::Heading(..)) => break,
|
||||
Event::End(TagEnd::Paragraph) => break,
|
||||
Event::End(TagEnd::Heading(..)) => break,
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
@ -1765,7 +1762,7 @@ pub(crate) fn markdown_links<'md, R>(
|
||||
|
||||
while let Some((event, span)) = event_iter.next() {
|
||||
match event {
|
||||
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
|
||||
Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => {
|
||||
let range = match link_type {
|
||||
// Link is pulled from the link itself.
|
||||
LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
|
||||
@ -1775,7 +1772,7 @@ pub(crate) fn markdown_links<'md, R>(
|
||||
LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
|
||||
// Link is pulled from elsewhere in the document.
|
||||
LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
|
||||
span_for_link(&dest, span)
|
||||
span_for_link(&dest_url, span)
|
||||
}
|
||||
LinkType::Autolink | LinkType::Email => unreachable!(),
|
||||
};
|
||||
@ -1795,7 +1792,7 @@ pub(crate) fn markdown_links<'md, R>(
|
||||
|
||||
if let Some(link) = preprocess_link(MarkdownLink {
|
||||
kind: link_type,
|
||||
link: dest.into_string(),
|
||||
link: dest_url.into_string(),
|
||||
display_text,
|
||||
range,
|
||||
}) {
|
||||
@ -1810,8 +1807,8 @@ pub(crate) fn markdown_links<'md, R>(
|
||||
}
|
||||
|
||||
/// Collects additional data of link.
|
||||
fn collect_link_data<'input, 'callback>(
|
||||
event_iter: &mut OffsetIter<'input, 'callback>,
|
||||
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
|
||||
event_iter: &mut OffsetIter<'input, F>,
|
||||
) -> Option<String> {
|
||||
let mut display_text: Option<String> = None;
|
||||
let mut append_text = |text: CowStr<'_>| {
|
||||
|
@ -196,6 +196,14 @@ macro_rules! declare_rustdoc_lint {
|
||||
"detects redundant explicit links in doc comments"
|
||||
}
|
||||
|
||||
declare_rustdoc_lint! {
|
||||
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
|
||||
/// the new one.
|
||||
UNPORTABLE_MARKDOWN,
|
||||
Warn,
|
||||
"detects markdown that is interpreted differently in different parser"
|
||||
}
|
||||
|
||||
pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
|
||||
vec![
|
||||
BROKEN_INTRA_DOC_LINKS,
|
||||
@ -209,6 +217,7 @@ macro_rules! declare_rustdoc_lint {
|
||||
MISSING_CRATE_LEVEL_DOCS,
|
||||
UNESCAPED_BACKTICKS,
|
||||
REDUNDANT_EXPLICIT_LINKS,
|
||||
UNPORTABLE_MARKDOWN,
|
||||
]
|
||||
});
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
mod html_tags;
|
||||
mod redundant_explicit_links;
|
||||
mod unescaped_backticks;
|
||||
mod unportable_markdown;
|
||||
|
||||
use super::Pass;
|
||||
use crate::clean::*;
|
||||
@ -31,6 +32,7 @@ fn visit_item(&mut self, item: &Item) {
|
||||
html_tags::visit_item(self.cx, item);
|
||||
unescaped_backticks::visit_item(self.cx, item);
|
||||
redundant_explicit_links::visit_item(self.cx, item);
|
||||
unportable_markdown::visit_item(self.cx, item);
|
||||
|
||||
self.visit_item_recur(item)
|
||||
}
|
||||
|
@ -42,11 +42,11 @@ pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) {
|
||||
match event {
|
||||
Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag),
|
||||
// We don't want to check the text inside code blocks or links.
|
||||
Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => {
|
||||
Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => {
|
||||
while let Some((event, _)) = p.next() {
|
||||
match event {
|
||||
Event::End(end)
|
||||
if mem::discriminant(&end) == mem::discriminant(&tag) =>
|
||||
if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) =>
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
use crate::core::DocContext;
|
||||
use crate::html::markdown::main_body_opts;
|
||||
|
||||
use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag};
|
||||
use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd};
|
||||
use rustc_resolve::rustdoc::source_span_for_markdown_range;
|
||||
|
||||
use std::iter::Peekable;
|
||||
@ -140,10 +140,10 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
|
||||
for (event, range) in p {
|
||||
match event {
|
||||
Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
|
||||
Event::Html(text) if !in_code_block => {
|
||||
Event::Html(text) | Event::InlineHtml(text) if !in_code_block => {
|
||||
extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
|
||||
}
|
||||
Event::End(Tag::CodeBlock(_)) => in_code_block = false,
|
||||
Event::End(TagEnd::CodeBlock) => in_code_block = false,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, OffsetIter, Parser, Tag};
|
||||
use pulldown_cmark::{
|
||||
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, OffsetIter, Parser, Tag,
|
||||
};
|
||||
use rustc_ast::NodeId;
|
||||
use rustc_errors::SuggestionStyle;
|
||||
use rustc_hir::def::{DefKind, DocLinkResMap, Namespace, Res};
|
||||
@ -95,7 +97,7 @@ fn check_redundant_explicit_link<'md>(
|
||||
|
||||
while let Some((event, link_range)) = offset_iter.next() {
|
||||
match event {
|
||||
Event::Start(Tag::Link(link_type, dest, _)) => {
|
||||
Event::Start(Tag::Link { link_type, dest_url, .. }) => {
|
||||
let link_data = collect_link_data(&mut offset_iter);
|
||||
|
||||
if let Some(resolvable_link) = link_data.resolvable_link.as_ref() {
|
||||
@ -108,7 +110,7 @@ fn check_redundant_explicit_link<'md>(
|
||||
}
|
||||
}
|
||||
|
||||
let explicit_link = dest.to_string();
|
||||
let explicit_link = dest_url.to_string();
|
||||
let display_link = link_data.resolvable_link.clone()?;
|
||||
|
||||
if explicit_link.ends_with(&display_link) || display_link.ends_with(&explicit_link)
|
||||
@ -122,7 +124,7 @@ fn check_redundant_explicit_link<'md>(
|
||||
doc,
|
||||
resolutions,
|
||||
link_range,
|
||||
dest.to_string(),
|
||||
dest_url.to_string(),
|
||||
link_data,
|
||||
if link_type == LinkType::Inline {
|
||||
(b'(', b')')
|
||||
@ -139,7 +141,7 @@ fn check_redundant_explicit_link<'md>(
|
||||
doc,
|
||||
resolutions,
|
||||
link_range,
|
||||
&dest,
|
||||
&dest_url,
|
||||
link_data,
|
||||
);
|
||||
}
|
||||
@ -259,7 +261,9 @@ fn find_resolution(resolutions: &DocLinkResMap, path: &str) -> Option<Res<NodeId
|
||||
}
|
||||
|
||||
/// Collects all necessary data of link.
|
||||
fn collect_link_data(offset_iter: &mut OffsetIter<'_, '_>) -> LinkData {
|
||||
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
|
||||
offset_iter: &mut OffsetIter<'input, F>,
|
||||
) -> LinkData {
|
||||
let mut resolvable_link = None;
|
||||
let mut resolvable_link_range = None;
|
||||
let mut display_link = String::new();
|
||||
|
152
src/librustdoc/passes/lint/unportable_markdown.rs
Normal file
152
src/librustdoc/passes/lint/unportable_markdown.rs
Normal file
@ -0,0 +1,152 @@
|
||||
//! Detects specific markdown syntax that's different between pulldown-cmark
|
||||
//! 0.9 and 0.11.
|
||||
//!
|
||||
//! This is a mitigation for old parser bugs that affected some
|
||||
//! real crates' docs. The old parser claimed to comply with CommonMark,
|
||||
//! but it did not. These warnings will eventually be removed,
|
||||
//! though some of them may become Clippy lints.
|
||||
//!
|
||||
//! <https://github.com/rust-lang/rust/pull/121659#issuecomment-1992752820>
|
||||
//!
|
||||
//! <https://rustc-dev-guide.rust-lang.org/bug-fix-procedure.html#add-the-lint-to-the-list-of-removed-lists>
|
||||
|
||||
use crate::clean::Item;
|
||||
use crate::core::DocContext;
|
||||
use pulldown_cmark as cmarkn;
|
||||
use pulldown_cmark_old as cmarko;
|
||||
use rustc_lint_defs::Applicability;
|
||||
use rustc_resolve::rustdoc::source_span_for_markdown_range;
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
|
||||
let tcx = cx.tcx;
|
||||
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
|
||||
// If non-local, no need to check anything.
|
||||
return;
|
||||
};
|
||||
|
||||
let dox = item.doc_value();
|
||||
if dox.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
// P1: unintended strikethrough was fixed by requiring single-tildes to flank
|
||||
// the same way underscores do, so nothing is done here
|
||||
|
||||
// P2: block quotes without following space parsed wrong
|
||||
//
|
||||
// This is the set of starting points for block quotes with no space after
|
||||
// the `>`. It is populated by the new parser, and if the old parser fails to
|
||||
// clear it out, it'll produce a warning.
|
||||
let mut spaceless_block_quotes = BTreeSet::new();
|
||||
|
||||
// P3: missing footnote references
|
||||
//
|
||||
// This is populated by listening for FootnoteReference from
|
||||
// the new parser and old parser.
|
||||
let mut missing_footnote_references = BTreeMap::new();
|
||||
let mut found_footnote_references = BTreeSet::new();
|
||||
|
||||
// populate problem cases from new parser
|
||||
{
|
||||
pub fn main_body_opts_new() -> cmarkn::Options {
|
||||
cmarkn::Options::ENABLE_TABLES
|
||||
| cmarkn::Options::ENABLE_FOOTNOTES
|
||||
| cmarkn::Options::ENABLE_STRIKETHROUGH
|
||||
| cmarkn::Options::ENABLE_TASKLISTS
|
||||
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
|
||||
}
|
||||
let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter();
|
||||
while let Some((event, span)) = parser_new.next() {
|
||||
if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event {
|
||||
if !dox[span.clone()].starts_with("> ") {
|
||||
spaceless_block_quotes.insert(span.start);
|
||||
}
|
||||
}
|
||||
if let cmarkn::Event::FootnoteReference(_) = event {
|
||||
found_footnote_references.insert(span.start + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remove cases where they don't actually differ
|
||||
{
|
||||
pub fn main_body_opts_old() -> cmarko::Options {
|
||||
cmarko::Options::ENABLE_TABLES
|
||||
| cmarko::Options::ENABLE_FOOTNOTES
|
||||
| cmarko::Options::ENABLE_STRIKETHROUGH
|
||||
| cmarko::Options::ENABLE_TASKLISTS
|
||||
| cmarko::Options::ENABLE_SMART_PUNCTUATION
|
||||
}
|
||||
let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter();
|
||||
while let Some((event, span)) = parser_old.next() {
|
||||
if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event {
|
||||
if !dox[span.clone()].starts_with("> ") {
|
||||
spaceless_block_quotes.remove(&span.start);
|
||||
}
|
||||
}
|
||||
if let cmarko::Event::FootnoteReference(_) = event {
|
||||
if !found_footnote_references.contains(&(span.start + 1)) {
|
||||
missing_footnote_references.insert(span.start + 1, span);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for start in spaceless_block_quotes {
|
||||
let (span, precise) =
|
||||
source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings)
|
||||
.map(|span| (span, true))
|
||||
.unwrap_or_else(|| (item.attr_span(tcx), false));
|
||||
|
||||
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| {
|
||||
lint.primary_message("unportable markdown");
|
||||
lint.help(format!("confusing block quote with no space after the `>` marker"));
|
||||
if precise {
|
||||
lint.span_suggestion(
|
||||
span.shrink_to_hi(),
|
||||
"if the quote is intended, add a space",
|
||||
" ",
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
lint.span_suggestion(
|
||||
span.shrink_to_lo(),
|
||||
"if it should not be a quote, escape it",
|
||||
"\\",
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
for (_caret, span) in missing_footnote_references {
|
||||
let (ref_span, precise) =
|
||||
source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings)
|
||||
.map(|span| (span, true))
|
||||
.unwrap_or_else(|| (item.attr_span(tcx), false));
|
||||
|
||||
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| {
|
||||
lint.primary_message("unportable markdown");
|
||||
if precise {
|
||||
lint.span_suggestion(
|
||||
ref_span.shrink_to_lo(),
|
||||
"if it should not be a footnote, escape it",
|
||||
"\\",
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
}
|
||||
if dox.as_bytes().get(span.end) == Some(&b'[') {
|
||||
lint.help("confusing footnote reference and link");
|
||||
if precise {
|
||||
lint.span_suggestion(
|
||||
ref_span.shrink_to_hi(),
|
||||
"if the footnote is intended, add a space",
|
||||
" ",
|
||||
Applicability::MaybeIncorrect,
|
||||
);
|
||||
} else {
|
||||
lint.help("there should be a space between the link and the footnote");
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
@ -6,10 +6,10 @@
|
||||
use clippy_utils::visitors::Visitable;
|
||||
use clippy_utils::{in_constant, is_entrypoint_fn, is_trait_impl_item, method_chain_args};
|
||||
use pulldown_cmark::Event::{
|
||||
Code, End, FootnoteReference, HardBreak, Html, Rule, SoftBreak, Start, TaskListMarker, Text,
|
||||
Code, DisplayMath, End, FootnoteReference, HardBreak, Html, InlineHtml, InlineMath, Rule, SoftBreak, Start, TaskListMarker, Text,
|
||||
};
|
||||
use pulldown_cmark::Tag::{BlockQuote, CodeBlock, FootnoteDefinition, Heading, Item, Link, Paragraph};
|
||||
use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options};
|
||||
use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options, TagEnd};
|
||||
use rustc_ast::ast::Attribute;
|
||||
use rustc_data_structures::fx::FxHashSet;
|
||||
use rustc_hir::intravisit::{self, Visitor};
|
||||
@ -659,7 +659,7 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
|
||||
|
||||
while let Some((event, range)) = events.next() {
|
||||
match event {
|
||||
Html(tag) => {
|
||||
Html(tag) | InlineHtml(tag) => {
|
||||
if tag.starts_with("<code") {
|
||||
code_level += 1;
|
||||
} else if tag.starts_with("</code") {
|
||||
@ -670,11 +670,11 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
|
||||
blockquote_level -= 1;
|
||||
}
|
||||
},
|
||||
Start(BlockQuote) => {
|
||||
Start(BlockQuote(_)) => {
|
||||
blockquote_level += 1;
|
||||
containers.push(Container::Blockquote);
|
||||
},
|
||||
End(BlockQuote) => {
|
||||
End(TagEnd::BlockQuote) => {
|
||||
blockquote_level -= 1;
|
||||
containers.pop();
|
||||
},
|
||||
@ -699,15 +699,15 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
|
||||
}
|
||||
}
|
||||
},
|
||||
End(CodeBlock(_)) => {
|
||||
End(TagEnd::CodeBlock) => {
|
||||
in_code = false;
|
||||
is_rust = false;
|
||||
ignore = false;
|
||||
},
|
||||
Start(Link(_, url, _)) => in_link = Some(url),
|
||||
End(Link(..)) => in_link = None,
|
||||
Start(Heading(_, _, _) | Paragraph | Item) => {
|
||||
if let Start(Heading(_, _, _)) = event {
|
||||
Start(Link { dest_url, .. }) => in_link = Some(dest_url),
|
||||
End(TagEnd::Link) => in_link = None,
|
||||
Start(Heading { .. } | Paragraph | Item) => {
|
||||
if let Start(Heading { .. }) = event {
|
||||
in_heading = true;
|
||||
}
|
||||
if let Start(Item) = event {
|
||||
@ -720,11 +720,11 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
|
||||
ticks_unbalanced = false;
|
||||
paragraph_range = range;
|
||||
},
|
||||
End(Heading(_, _, _) | Paragraph | Item) => {
|
||||
if let End(Heading(_, _, _)) = event {
|
||||
End(TagEnd::Heading(_) | TagEnd::Paragraph | TagEnd::Item) => {
|
||||
if let End(TagEnd::Heading(_)) = event {
|
||||
in_heading = false;
|
||||
}
|
||||
if let End(Item) = event {
|
||||
if let End(TagEnd::Item) = event {
|
||||
containers.pop();
|
||||
}
|
||||
if ticks_unbalanced && let Some(span) = fragments.span(cx, paragraph_range.clone()) {
|
||||
@ -746,8 +746,8 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
|
||||
text_to_check = Vec::new();
|
||||
},
|
||||
Start(FootnoteDefinition(..)) => in_footnote_definition = true,
|
||||
End(FootnoteDefinition(..)) => in_footnote_definition = false,
|
||||
Start(_tag) | End(_tag) => (), // We don't care about other tags
|
||||
End(TagEnd::FootnoteDefinition) => in_footnote_definition = false,
|
||||
Start(_) | End(_) => (), // We don't care about other tags
|
||||
SoftBreak | HardBreak => {
|
||||
if !containers.is_empty()
|
||||
&& let Some((next_event, next_range)) = events.peek()
|
||||
@ -765,7 +765,7 @@ fn check_doc<'a, Events: Iterator<Item = (pulldown_cmark::Event<'a>, Range<usize
|
||||
);
|
||||
}
|
||||
},
|
||||
TaskListMarker(_) | Code(_) | Rule => (),
|
||||
TaskListMarker(_) | Code(_) | Rule | InlineMath(..) | DisplayMath(..) => (),
|
||||
FootnoteReference(text) | Text(text) => {
|
||||
paragraph_range.end = range.end;
|
||||
ticks_unbalanced |= text.contains('`') && !in_code;
|
||||
|
@ -335,6 +335,7 @@
|
||||
"proc-macro2",
|
||||
"psm",
|
||||
"pulldown-cmark",
|
||||
"pulldown-cmark-escape",
|
||||
"punycode",
|
||||
"quote",
|
||||
"r-efi",
|
||||
|
63
tests/rustdoc-ui/unportable-markdown.rs
Normal file
63
tests/rustdoc-ui/unportable-markdown.rs
Normal file
@ -0,0 +1,63 @@
|
||||
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
|
||||
//
|
||||
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes.
|
||||
//
|
||||
// This version of the lint is targeted at two especially-common cases where docs got broken.
|
||||
// Other differences in parsing should not warn.
|
||||
#![allow(rustdoc::broken_intra_doc_links)]
|
||||
#![deny(rustdoc::unportable_markdown)]
|
||||
|
||||
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
|
||||
///
|
||||
/// Test footnote [^foot].
|
||||
///
|
||||
/// [^foot]: This is nested within the footnote now, but didn't used to be.
|
||||
///
|
||||
/// This is a multi-paragraph footnote.
|
||||
pub struct GfmFootnotes;
|
||||
|
||||
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/773>
|
||||
///
|
||||
/// test [^foo][^bar]
|
||||
//~^ ERROR unportable markdown
|
||||
///
|
||||
/// [^foo]: test
|
||||
/// [^bar]: test2
|
||||
pub struct FootnoteSmashedName;
|
||||
|
||||
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
|
||||
///
|
||||
/// - _t
|
||||
/// # test
|
||||
/// t_
|
||||
pub struct NestingCornerCase;
|
||||
|
||||
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
|
||||
///
|
||||
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
|
||||
pub struct Emphasis1;
|
||||
|
||||
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
|
||||
///
|
||||
/// |
|
||||
/// |
|
||||
pub struct NotEnoughTable;
|
||||
|
||||
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/675>
|
||||
///
|
||||
/// foo
|
||||
/// >bar
|
||||
//~^ ERROR unportable markdown
|
||||
pub struct BlockQuoteNoSpace;
|
||||
|
||||
/// Negative test.
|
||||
///
|
||||
/// foo
|
||||
/// > bar
|
||||
pub struct BlockQuoteSpace;
|
||||
|
||||
/// Negative test.
|
||||
///
|
||||
/// >bar
|
||||
/// baz
|
||||
pub struct BlockQuoteNoSpaceStart;
|
39
tests/rustdoc-ui/unportable-markdown.stderr
Normal file
39
tests/rustdoc-ui/unportable-markdown.stderr
Normal file
@ -0,0 +1,39 @@
|
||||
error: unportable markdown
|
||||
--> $DIR/unportable-markdown.rs:21:10
|
||||
|
|
||||
LL | /// test [^foo][^bar]
|
||||
| ^^^^^^
|
||||
|
|
||||
= help: confusing footnote reference and link
|
||||
note: the lint level is defined here
|
||||
--> $DIR/unportable-markdown.rs:8:9
|
||||
|
|
||||
LL | #![deny(rustdoc::unportable_markdown)]
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
help: if it should not be a footnote, escape it
|
||||
|
|
||||
LL | /// test \[^foo][^bar]
|
||||
| +
|
||||
help: if the footnote is intended, add a space
|
||||
|
|
||||
LL | /// test [^foo] [^bar]
|
||||
| +
|
||||
|
||||
error: unportable markdown
|
||||
--> $DIR/unportable-markdown.rs:49:5
|
||||
|
|
||||
LL | /// >bar
|
||||
| ^
|
||||
|
|
||||
= help: confusing block quote with no space after the `>` marker
|
||||
help: if the quote is intended, add a space
|
||||
|
|
||||
LL | /// > bar
|
||||
| +
|
||||
help: if it should not be a quote, escape it
|
||||
|
|
||||
LL | /// \>bar
|
||||
| +
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
Loading…
Reference in New Issue
Block a user