rework link parsing loop

This commit is contained in:
Kyle Lin 2023-06-29 12:07:24 +08:00
parent fe3eae3f31
commit b1d232a6da

View File

@ -50,7 +50,7 @@ use crate::html::render::small_url_encode;
use crate::html::toc::TocBuilder; use crate::html::toc::TocBuilder;
use pulldown_cmark::{ use pulldown_cmark::{
html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, Options, Parser, Tag, html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag,
}; };
#[cfg(test)] #[cfg(test)]
@ -1240,6 +1240,7 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin
pub(crate) struct MarkdownLink { pub(crate) struct MarkdownLink {
pub kind: LinkType, pub kind: LinkType,
pub link: String, pub link: String,
pub display_text: String,
pub range: MarkdownLinkRange, pub range: MarkdownLinkRange,
} }
@ -1263,8 +1264,8 @@ impl MarkdownLinkRange {
} }
} }
pub(crate) fn markdown_links<R>( pub(crate) fn markdown_links<'md, R>(
md: &str, md: &'md str,
preprocess_link: impl Fn(MarkdownLink) -> Option<R>, preprocess_link: impl Fn(MarkdownLink) -> Option<R>,
) -> Vec<R> { ) -> Vec<R> {
if md.is_empty() { if md.is_empty() {
@ -1375,32 +1376,72 @@ pub(crate) fn markdown_links<R>(
MarkdownLinkRange::Destination(range.clone()) MarkdownLinkRange::Destination(range.clone())
}; };
Parser::new_with_broken_link_callback( let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
let mut event_iter = Parser::new_with_broken_link_callback(
md, md,
main_body_opts(), main_body_opts(),
Some(&mut |link: BrokenLink<'_>| Some((link.reference, "".into()))), Some(&mut broken_link_callback),
) )
.into_offset_iter() .into_offset_iter();
.filter_map(|(event, span)| match event { let mut links = Vec::new();
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
let range = match link_type { while let Some((event, span)) = event_iter.next() {
// Link is pulled from the link itself. match event {
LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
span_for_offset_backward(span, b'[', b']') let range = match link_type {
// Link is pulled from the link itself.
LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => {
span_for_offset_backward(span, b'[', b']')
}
LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'),
LinkType::Inline => span_for_offset_backward(span, b'(', b')'),
// Link is pulled from elsewhere in the document.
LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
span_for_link(&dest, span)
}
LinkType::Autolink | LinkType::Email => unreachable!(),
};
let display_text =
collect_link_data(&mut event_iter).map_or(String::new(), CowStr::into_string);
if let Some(link) = preprocess_link(MarkdownLink {
kind: link_type,
display_text,
link: dest.into_string(),
range,
}) {
links.push(link);
} }
LinkType::CollapsedUnknown => span_for_offset_forward(span, b'[', b']'), }
LinkType::Inline => span_for_offset_backward(span, b'(', b')'), _ => {}
// Link is pulled from elsewhere in the document.
LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => {
span_for_link(&dest, span)
}
LinkType::Autolink | LinkType::Email => unreachable!(),
};
preprocess_link(MarkdownLink { kind: link_type, range, link: dest.into_string() })
} }
_ => None, }
})
.collect() links
}
fn collect_link_data<'input, 'callback>(
event_iter: &mut OffsetIter<'input, 'callback>,
) -> Option<CowStr<'input>> {
let mut display_text = None;
while let Some((event, _span)) = event_iter.next() {
match event {
Event::Text(code) => {
display_text = Some(code);
}
Event::Code(code) => {
display_text = Some(code);
}
Event::End(_) => {
break;
}
_ => {}
}
}
display_text
} }
#[derive(Debug)] #[derive(Debug)]