From b1d232a6da800b26f18d79a0340436fa4aec7327 Mon Sep 17 00:00:00 2001 From: Kyle Lin Date: Thu, 29 Jun 2023 12:07:24 +0800 Subject: [PATCH] rework link parsing loop --- src/librustdoc/html/markdown.rs | 89 ++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 24 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 0ba2d992392..b5ab8a7b12a 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -50,7 +50,7 @@ use crate::html::render::small_url_encode; use crate::html::toc::TocBuilder; use pulldown_cmark::{ - html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, + html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag, }; #[cfg(test)] @@ -1240,6 +1240,7 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin pub(crate) struct MarkdownLink { pub kind: LinkType, pub link: String, + pub display_text: String, pub range: MarkdownLinkRange, } @@ -1263,8 +1264,8 @@ impl MarkdownLinkRange { } } -pub(crate) fn markdown_links( - md: &str, +pub(crate) fn markdown_links<'md, R>( + md: &'md str, preprocess_link: impl Fn(MarkdownLink) -> Option, ) -> Vec { if md.is_empty() { @@ -1375,32 +1376,72 @@ pub(crate) fn markdown_links( MarkdownLinkRange::Destination(range.clone()) }; - Parser::new_with_broken_link_callback( + let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into())); + let mut event_iter = Parser::new_with_broken_link_callback( md, main_body_opts(), - Some(&mut |link: BrokenLink<'_>| Some((link.reference, "".into()))), + Some(&mut broken_link_callback), ) - .into_offset_iter() - .filter_map(|(event, span)| match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { - let range = match link_type { - // Link is pulled from the link itself. - LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { - span_for_offset_backward(span, b'[', b']') + .into_offset_iter(); + let mut links = Vec::new(); + + while let Some((event, span)) = event_iter.next() { + match event { + Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + let range = match link_type { + // Link is pulled from the link itself. + LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { + span_for_offset_backward(span, b'[', b']') + } + LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'), + LinkType::Inline => span_for_offset_backward(span, b'(', b')'), + // Link is pulled from elsewhere in the document. + LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { + span_for_link(&dest, span) + } + LinkType::Autolink | LinkType::Email => unreachable!(), + }; + + let display_text = + collect_link_data(&mut event_iter).map_or(String::new(), CowStr::into_string); + + if let Some(link) = preprocess_link(MarkdownLink { + kind: link_type, + display_text, + link: dest.into_string(), + range, + }) { + links.push(link); } - LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'), - LinkType::Inline => span_for_offset_backward(span, b'(', b')'), - // Link is pulled from elsewhere in the document. - LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { - span_for_link(&dest, span) - } - LinkType::Autolink | LinkType::Email => unreachable!(), - }; - preprocess_link(MarkdownLink { kind: link_type, range, link: dest.into_string() }) + } + _ => {} } - _ => None, - }) - .collect() + } + + links +} + +fn collect_link_data<'input, 'callback>( + event_iter: &mut OffsetIter<'input, 'callback>, +) -> Option> { + let mut display_text = None; + + while let Some((event, _span)) = event_iter.next() { + match event { + Event::Text(code) => { + display_text = Some(code); + } + Event::Code(code) => { + display_text = Some(code); + } + Event::End(_) => { + break; + } + _ => {} + } + } + + display_text } #[derive(Debug)]