rustdoc: Optimize and refactor doc link resolution

- Cache doc link resolutions obtained early
- Cache markdown links retrieved from doc strings early
- Rename and restructure the code in early doc link resolution to be closer to #94857
This commit is contained in:
Vadim Petrochenkov 2022-04-16 23:53:13 +03:00
parent e2d3a4f631
commit 72ed101428
4 changed files with 112 additions and 40 deletions

View File

@ -1088,6 +1088,13 @@ impl Attributes {
crate fn from_ast( crate fn from_ast(
attrs: &[ast::Attribute], attrs: &[ast::Attribute],
additional_attrs: Option<(&[ast::Attribute], DefId)>, additional_attrs: Option<(&[ast::Attribute], DefId)>,
) -> Attributes {
Attributes::from_ast_iter(attrs.iter(), additional_attrs)
}
crate fn from_ast_iter<'a>(
attrs: impl Iterator<Item = &'a ast::Attribute>,
additional_attrs: Option<(&[ast::Attribute], DefId)>,
) -> Attributes { ) -> Attributes {
let mut doc_strings: Vec<DocFragment> = vec![]; let mut doc_strings: Vec<DocFragment> = vec![];
let clean_attr = |(attr, parent_module): (&ast::Attribute, Option<DefId>)| { let clean_attr = |(attr, parent_module): (&ast::Attribute, Option<DefId>)| {
@ -1115,7 +1122,7 @@ impl Attributes {
let other_attrs = additional_attrs let other_attrs = additional_attrs
.into_iter() .into_iter()
.flat_map(|(attrs, id)| attrs.iter().map(move |attr| (attr, Some(id)))) .flat_map(|(attrs, id)| attrs.iter().map(move |attr| (attr, Some(id))))
.chain(attrs.iter().map(|attr| (attr, None))) .chain(attrs.map(|attr| (attr, None)))
.filter_map(clean_attr) .filter_map(clean_attr)
.collect(); .collect();

View File

@ -4,7 +4,7 @@ use rustc_data_structures::sync::{self, Lrc};
use rustc_errors::emitter::{Emitter, EmitterWriter}; use rustc_errors::emitter::{Emitter, EmitterWriter};
use rustc_errors::json::JsonEmitter; use rustc_errors::json::JsonEmitter;
use rustc_feature::UnstableFeatures; use rustc_feature::UnstableFeatures;
use rustc_hir::def::Res; use rustc_hir::def::{Namespace, Res};
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId}; use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
use rustc_hir::intravisit::{self, Visitor}; use rustc_hir::intravisit::{self, Visitor};
use rustc_hir::{HirId, Path, TraitCandidate}; use rustc_hir::{HirId, Path, TraitCandidate};
@ -29,11 +29,14 @@ use crate::clean::inline::build_external_trait;
use crate::clean::{self, ItemId, TraitWithExtraInfo}; use crate::clean::{self, ItemId, TraitWithExtraInfo};
use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions}; use crate::config::{Options as RustdocOptions, OutputFormat, RenderOptions};
use crate::formats::cache::Cache; use crate::formats::cache::Cache;
use crate::html::markdown::MarkdownLink;
use crate::passes::{self, Condition::*}; use crate::passes::{self, Condition::*};
crate use rustc_session::config::{DebuggingOptions, Input, Options}; crate use rustc_session::config::{DebuggingOptions, Input, Options};
crate struct ResolverCaches { crate struct ResolverCaches {
crate markdown_links: Option<FxHashMap<String, Vec<MarkdownLink>>>,
crate doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option<Res<NodeId>>>,
/// Traits in scope for a given module. /// Traits in scope for a given module.
/// See `collect_intra_doc_links::traits_implemented_by` for more details. /// See `collect_intra_doc_links::traits_implemented_by` for more details.
crate traits_in_scope: DefIdMap<Vec<TraitCandidate>>, crate traits_in_scope: DefIdMap<Vec<TraitCandidate>>,

View File

@ -3,6 +3,7 @@
//! [RFC 1946]: https://github.com/rust-lang/rfcs/blob/master/text/1946-intra-rustdoc-links.md //! [RFC 1946]: https://github.com/rust-lang/rfcs/blob/master/text/1946-intra-rustdoc-links.md
use pulldown_cmark::LinkType; use pulldown_cmark::LinkType;
use rustc_ast::util::comments::may_have_doc_links;
use rustc_data_structures::{fx::FxHashMap, intern::Interned, stable_set::FxHashSet}; use rustc_data_structures::{fx::FxHashMap, intern::Interned, stable_set::FxHashSet};
use rustc_errors::{Applicability, Diagnostic}; use rustc_errors::{Applicability, Diagnostic};
use rustc_hir::def::Namespace::*; use rustc_hir::def::Namespace::*;
@ -556,7 +557,15 @@ impl<'a, 'tcx> LinkCollector<'a, 'tcx> {
// Resolver doesn't know about true, false, and types that aren't paths (e.g. `()`). // Resolver doesn't know about true, false, and types that aren't paths (e.g. `()`).
let result = self let result = self
.cx .cx
.enter_resolver(|resolver| resolver.resolve_rustdoc_path(path_str, ns, module_id)) .resolver_caches
.doc_link_resolutions
.get(&(Symbol::intern(path_str), ns, module_id))
.copied()
.unwrap_or_else(|| {
self.cx.enter_resolver(|resolver| {
resolver.resolve_rustdoc_path(path_str, ns, module_id)
})
})
.and_then(|res| res.try_into().ok()) .and_then(|res| res.try_into().ok())
.or_else(|| resolve_primitive(path_str, ns)) .or_else(|| resolve_primitive(path_str, ns))
.or_else(|| self.resolve_macro_rules(path_str, ns)); .or_else(|| self.resolve_macro_rules(path_str, ns));
@ -1041,16 +1050,29 @@ impl<'a, 'tcx> DocVisitor for LinkCollector<'a, 'tcx> {
// Rather than merging all documentation into one, resolve it one attribute at a time // Rather than merging all documentation into one, resolve it one attribute at a time
// so we know which module it came from. // so we know which module it came from.
for (parent_module, doc) in item.attrs.collapsed_doc_value_by_module_level() { for (parent_module, doc) in item.attrs.collapsed_doc_value_by_module_level() {
if !may_have_doc_links(&doc) {
continue;
}
debug!("combined_docs={}", doc); debug!("combined_docs={}", doc);
// NOTE: if there are links that start in one crate and end in another, this will not resolve them. // NOTE: if there are links that start in one crate and end in another, this will not resolve them.
// This is a degenerate case and it's not supported by rustdoc. // This is a degenerate case and it's not supported by rustdoc.
let parent_node = parent_module.or(parent_node); let parent_node = parent_module.or(parent_node);
for md_link in markdown_links(&doc) { let mut tmp_links = self
.cx
.resolver_caches
.markdown_links
.take()
.expect("`markdown_links` are already borrowed");
if !tmp_links.contains_key(&doc) {
tmp_links.insert(doc.clone(), markdown_links(&doc));
}
for md_link in &tmp_links[&doc] {
let link = self.resolve_link(&item, &doc, parent_node, md_link); let link = self.resolve_link(&item, &doc, parent_node, md_link);
if let Some(link) = link { if let Some(link) = link {
self.cx.cache.intra_doc_links.entry(item.item_id).or_default().push(link); self.cx.cache.intra_doc_links.entry(item.item_id).or_default().push(link);
} }
} }
self.cx.resolver_caches.markdown_links = Some(tmp_links);
} }
if item.is_mod() { if item.is_mod() {
@ -1181,7 +1203,7 @@ impl LinkCollector<'_, '_> {
item: &Item, item: &Item,
dox: &str, dox: &str,
parent_node: Option<DefId>, parent_node: Option<DefId>,
ori_link: MarkdownLink, ori_link: &MarkdownLink,
) -> Option<ItemLink> { ) -> Option<ItemLink> {
trace!("considering link '{}'", ori_link.link); trace!("considering link '{}'", ori_link.link);
@ -1320,7 +1342,7 @@ impl LinkCollector<'_, '_> {
} }
Some(ItemLink { Some(ItemLink {
link: ori_link.link, link: ori_link.link.clone(),
link_text, link_text,
did: res.def_id(self.cx.tcx), did: res.def_id(self.cx.tcx),
fragment, fragment,
@ -1343,7 +1365,7 @@ impl LinkCollector<'_, '_> {
&diag_info, &diag_info,
)?; )?;
let id = clean::register_res(self.cx, rustc_hir::def::Res::Def(kind, id)); let id = clean::register_res(self.cx, rustc_hir::def::Res::Def(kind, id));
Some(ItemLink { link: ori_link.link, link_text, did: id, fragment }) Some(ItemLink { link: ori_link.link.clone(), link_text, did: id, fragment })
} }
} }
} }

View File

@ -1,19 +1,20 @@
use crate::clean::Attributes; use crate::clean::Attributes;
use crate::core::ResolverCaches; use crate::core::ResolverCaches;
use crate::html::markdown::markdown_links; use crate::html::markdown::{markdown_links, MarkdownLink};
use crate::passes::collect_intra_doc_links::preprocess_link; use crate::passes::collect_intra_doc_links::preprocess_link;
use rustc_ast::visit::{self, AssocCtxt, Visitor}; use rustc_ast::visit::{self, AssocCtxt, Visitor};
use rustc_ast::{self as ast, ItemKind}; use rustc_ast::{self as ast, ItemKind};
use rustc_ast_lowering::ResolverAstLowering; use rustc_ast_lowering::ResolverAstLowering;
use rustc_hir::def::Namespace::TypeNS; use rustc_data_structures::fx::FxHashMap;
use rustc_hir::def::{DefKind, Res}; use rustc_hir::def::Namespace::*;
use rustc_hir::def::{DefKind, Namespace, Res};
use rustc_hir::def_id::{DefId, DefIdMap, DefIdSet, LocalDefId, CRATE_DEF_ID}; use rustc_hir::def_id::{DefId, DefIdMap, DefIdSet, LocalDefId, CRATE_DEF_ID};
use rustc_hir::TraitCandidate; use rustc_hir::TraitCandidate;
use rustc_middle::ty::{DefIdTree, Visibility}; use rustc_middle::ty::{DefIdTree, Visibility};
use rustc_resolve::{ParentScope, Resolver}; use rustc_resolve::{ParentScope, Resolver};
use rustc_session::config::Externs; use rustc_session::config::Externs;
use rustc_span::SyntaxContext; use rustc_span::{Symbol, SyntaxContext};
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::mem; use std::mem;
@ -28,6 +29,8 @@ crate fn early_resolve_intra_doc_links(
resolver, resolver,
current_mod: CRATE_DEF_ID, current_mod: CRATE_DEF_ID,
visited_mods: Default::default(), visited_mods: Default::default(),
markdown_links: Default::default(),
doc_link_resolutions: Default::default(),
traits_in_scope: Default::default(), traits_in_scope: Default::default(),
all_traits: Default::default(), all_traits: Default::default(),
all_trait_impls: Default::default(), all_trait_impls: Default::default(),
@ -36,7 +39,7 @@ crate fn early_resolve_intra_doc_links(
// Overridden `visit_item` below doesn't apply to the crate root, // Overridden `visit_item` below doesn't apply to the crate root,
// so we have to visit its attributes and reexports separately. // so we have to visit its attributes and reexports separately.
link_resolver.load_links_in_attrs(&krate.attrs); link_resolver.resolve_doc_links_local(&krate.attrs);
link_resolver.process_module_children_or_reexports(CRATE_DEF_ID.to_def_id()); link_resolver.process_module_children_or_reexports(CRATE_DEF_ID.to_def_id());
visit::walk_crate(&mut link_resolver, krate); visit::walk_crate(&mut link_resolver, krate);
link_resolver.process_extern_impls(); link_resolver.process_extern_impls();
@ -50,6 +53,8 @@ crate fn early_resolve_intra_doc_links(
} }
ResolverCaches { ResolverCaches {
markdown_links: Some(link_resolver.markdown_links),
doc_link_resolutions: link_resolver.doc_link_resolutions,
traits_in_scope: link_resolver.traits_in_scope, traits_in_scope: link_resolver.traits_in_scope,
all_traits: Some(link_resolver.all_traits), all_traits: Some(link_resolver.all_traits),
all_trait_impls: Some(link_resolver.all_trait_impls), all_trait_impls: Some(link_resolver.all_trait_impls),
@ -57,10 +62,18 @@ crate fn early_resolve_intra_doc_links(
} }
} }
fn doc_attrs<'a>(attrs: impl Iterator<Item = &'a ast::Attribute>) -> Attributes {
let mut attrs = Attributes::from_ast_iter(attrs.filter(|attr| attr.doc_str().is_some()), None);
attrs.unindent_doc_comments();
attrs
}
struct EarlyDocLinkResolver<'r, 'ra> { struct EarlyDocLinkResolver<'r, 'ra> {
resolver: &'r mut Resolver<'ra>, resolver: &'r mut Resolver<'ra>,
current_mod: LocalDefId, current_mod: LocalDefId,
visited_mods: DefIdSet, visited_mods: DefIdSet,
markdown_links: FxHashMap<String, Vec<MarkdownLink>>,
doc_link_resolutions: FxHashMap<(Symbol, Namespace, DefId), Option<Res<ast::NodeId>>>,
traits_in_scope: DefIdMap<Vec<TraitCandidate>>, traits_in_scope: DefIdMap<Vec<TraitCandidate>>,
all_traits: Vec<DefId>, all_traits: Vec<DefId>,
all_trait_impls: Vec<DefId>, all_trait_impls: Vec<DefId>,
@ -92,18 +105,11 @@ impl EarlyDocLinkResolver<'_, '_> {
} }
} }
fn add_traits_in_parent_scope(&mut self, def_id: DefId) {
if let Some(module_id) = self.resolver.parent(def_id) {
self.add_traits_in_scope(module_id);
}
}
/// Add traits in scope for links in impls collected by the `collect-intra-doc-links` pass. /// Add traits in scope for links in impls collected by the `collect-intra-doc-links` pass.
/// That pass filters impls using type-based information, but we don't yet have such /// That pass filters impls using type-based information, but we don't yet have such
/// information here, so we just conservatively calculate traits in scope for *all* modules /// information here, so we just conservatively calculate traits in scope for *all* modules
/// having impls in them. /// having impls in them.
fn process_extern_impls(&mut self) { fn process_extern_impls(&mut self) {
// FIXME: Need to resolve doc links on all these impl and trait items below.
// Resolving links in already existing crates may trigger loading of new crates. // Resolving links in already existing crates may trigger loading of new crates.
let mut start_cnum = 0; let mut start_cnum = 0;
loop { loop {
@ -124,7 +130,7 @@ impl EarlyDocLinkResolver<'_, '_> {
// the current crate, and links in their doc comments are not resolved. // the current crate, and links in their doc comments are not resolved.
for &def_id in &all_traits { for &def_id in &all_traits {
if self.resolver.cstore().visibility_untracked(def_id) == Visibility::Public { if self.resolver.cstore().visibility_untracked(def_id) == Visibility::Public {
self.add_traits_in_parent_scope(def_id); self.resolve_doc_links_extern_impl(def_id, false);
} }
} }
for &(trait_def_id, impl_def_id, simplified_self_ty) in &all_trait_impls { for &(trait_def_id, impl_def_id, simplified_self_ty) in &all_trait_impls {
@ -135,17 +141,17 @@ impl EarlyDocLinkResolver<'_, '_> {
== Visibility::Public == Visibility::Public
}) })
{ {
self.add_traits_in_parent_scope(impl_def_id); self.resolve_doc_links_extern_impl(impl_def_id, false);
} }
} }
for (ty_def_id, impl_def_id) in all_inherent_impls { for (ty_def_id, impl_def_id) in all_inherent_impls {
if self.resolver.cstore().visibility_untracked(ty_def_id) == Visibility::Public if self.resolver.cstore().visibility_untracked(ty_def_id) == Visibility::Public
{ {
self.add_traits_in_parent_scope(impl_def_id); self.resolve_doc_links_extern_impl(impl_def_id, true);
} }
} }
for def_id in all_incoherent_impls { for impl_def_id in all_incoherent_impls {
self.add_traits_in_parent_scope(def_id); self.resolve_doc_links_extern_impl(impl_def_id, true);
} }
self.all_traits.extend(all_traits); self.all_traits.extend(all_traits);
@ -161,16 +167,52 @@ impl EarlyDocLinkResolver<'_, '_> {
} }
} }
fn load_links_in_attrs(&mut self, attrs: &[ast::Attribute]) { fn resolve_doc_links_extern_impl(&mut self, def_id: DefId, _is_inherent: bool) {
// FIXME: Resolve links in associated items in addition to traits themselves,
// `force` is used to provide traits in scope for the associated items.
self.resolve_doc_links_extern_outer(def_id, def_id, true);
}
fn resolve_doc_links_extern_outer(&mut self, def_id: DefId, scope_id: DefId, force: bool) {
if !force && !self.resolver.cstore().may_have_doc_links_untracked(def_id) {
return;
}
// FIXME: actually resolve links, not just add traits in scope.
if let Some(parent_id) = self.resolver.parent(scope_id) {
self.add_traits_in_scope(parent_id);
}
}
fn resolve_doc_links_extern_inner(&mut self, def_id: DefId) {
if !self.resolver.cstore().may_have_doc_links_untracked(def_id) {
return;
}
// FIXME: actually resolve links, not just add traits in scope.
self.add_traits_in_scope(def_id);
}
fn resolve_doc_links_local(&mut self, attrs: &[ast::Attribute]) {
if !attrs.iter().any(|attr| attr.may_have_doc_links()) {
return;
}
let module_id = self.current_mod.to_def_id(); let module_id = self.current_mod.to_def_id();
self.resolve_doc_links(doc_attrs(attrs.iter()), module_id);
}
fn resolve_doc_links(&mut self, attrs: Attributes, module_id: DefId) {
let mut need_traits_in_scope = false; let mut need_traits_in_scope = false;
for (doc_module, doc) in for (doc_module, doc) in attrs.collapsed_doc_value_by_module_level() {
Attributes::from_ast(attrs, None).collapsed_doc_value_by_module_level()
{
assert_eq!(doc_module, None); assert_eq!(doc_module, None);
for link in markdown_links(&doc.as_str()) { for link in self.markdown_links.entry(doc).or_insert_with_key(|doc| markdown_links(doc))
{
if let Some(Ok(pinfo)) = preprocess_link(&link) { if let Some(Ok(pinfo)) = preprocess_link(&link) {
self.resolver.resolve_rustdoc_path(&pinfo.path_str, TypeNS, module_id); // FIXME: Resolve the path in all namespaces and resolve its prefixes too.
let ns = TypeNS;
self.doc_link_resolutions
.entry((Symbol::intern(&pinfo.path_str), ns, module_id))
.or_insert_with_key(|(path, ns, module_id)| {
self.resolver.resolve_rustdoc_path(path.as_str(), *ns, *module_id)
});
need_traits_in_scope = true; need_traits_in_scope = true;
} }
} }
@ -197,15 +239,13 @@ impl EarlyDocLinkResolver<'_, '_> {
&& module_id.is_local() && module_id.is_local()
{ {
if let Some(def_id) = child.res.opt_def_id() && !def_id.is_local() { if let Some(def_id) = child.res.opt_def_id() && !def_id.is_local() {
// FIXME: Need to resolve doc links on all these extern items
// reached through reexports.
let scope_id = match child.res { let scope_id = match child.res {
Res::Def(DefKind::Variant, ..) => self.resolver.parent(def_id).unwrap(), Res::Def(DefKind::Variant, ..) => self.resolver.parent(def_id).unwrap(),
_ => def_id, _ => def_id,
}; };
self.add_traits_in_parent_scope(scope_id); // Outer attribute scope self.resolve_doc_links_extern_outer(def_id, scope_id, false); // Outer attribute scope
if let Res::Def(DefKind::Mod, ..) = child.res { if let Res::Def(DefKind::Mod, ..) = child.res {
self.add_traits_in_scope(def_id); // Inner attribute scope self.resolve_doc_links_extern_inner(def_id); // Inner attribute scope
} }
// Traits are processed in `add_extern_traits_in_scope`. // Traits are processed in `add_extern_traits_in_scope`.
if let Res::Def(DefKind::Mod | DefKind::Enum, ..) = child.res { if let Res::Def(DefKind::Mod | DefKind::Enum, ..) = child.res {
@ -219,10 +259,10 @@ impl EarlyDocLinkResolver<'_, '_> {
impl Visitor<'_> for EarlyDocLinkResolver<'_, '_> { impl Visitor<'_> for EarlyDocLinkResolver<'_, '_> {
fn visit_item(&mut self, item: &ast::Item) { fn visit_item(&mut self, item: &ast::Item) {
self.load_links_in_attrs(&item.attrs); // Outer attribute scope self.resolve_doc_links_local(&item.attrs); // Outer attribute scope
if let ItemKind::Mod(..) = item.kind { if let ItemKind::Mod(..) = item.kind {
let old_mod = mem::replace(&mut self.current_mod, self.resolver.local_def_id(item.id)); let old_mod = mem::replace(&mut self.current_mod, self.resolver.local_def_id(item.id));
self.load_links_in_attrs(&item.attrs); // Inner attribute scope self.resolve_doc_links_local(&item.attrs); // Inner attribute scope
self.process_module_children_or_reexports(self.current_mod.to_def_id()); self.process_module_children_or_reexports(self.current_mod.to_def_id());
visit::walk_item(self, item); visit::walk_item(self, item);
self.current_mod = old_mod; self.current_mod = old_mod;
@ -241,22 +281,22 @@ impl Visitor<'_> for EarlyDocLinkResolver<'_, '_> {
} }
fn visit_assoc_item(&mut self, item: &ast::AssocItem, ctxt: AssocCtxt) { fn visit_assoc_item(&mut self, item: &ast::AssocItem, ctxt: AssocCtxt) {
self.load_links_in_attrs(&item.attrs); self.resolve_doc_links_local(&item.attrs);
visit::walk_assoc_item(self, item, ctxt) visit::walk_assoc_item(self, item, ctxt)
} }
fn visit_foreign_item(&mut self, item: &ast::ForeignItem) { fn visit_foreign_item(&mut self, item: &ast::ForeignItem) {
self.load_links_in_attrs(&item.attrs); self.resolve_doc_links_local(&item.attrs);
visit::walk_foreign_item(self, item) visit::walk_foreign_item(self, item)
} }
fn visit_variant(&mut self, v: &ast::Variant) { fn visit_variant(&mut self, v: &ast::Variant) {
self.load_links_in_attrs(&v.attrs); self.resolve_doc_links_local(&v.attrs);
visit::walk_variant(self, v) visit::walk_variant(self, v)
} }
fn visit_field_def(&mut self, field: &ast::FieldDef) { fn visit_field_def(&mut self, field: &ast::FieldDef) {
self.load_links_in_attrs(&field.attrs); self.resolve_doc_links_local(&field.attrs);
visit::walk_field_def(self, field) visit::walk_field_def(self, field)
} }