Rollup merge of #84101 - jyn514:early-pass, r=Manishearth

rustdoc: Move crate loader to collect_intra_doc_links::early

This groups the similar code together, and also allows making most of collect_intra_doc_links private again.

This builds on https://github.com/rust-lang/rust/pull/84066, but it wouldn't be too hard to base it off master if you want this to land first.
Helps with https://github.com/rust-lang/rust/issues/83761.

r? manishearth

Fixes https://github.com/rust-lang/rust/issues/84046
This commit is contained in:
Dylan DPC 2021-04-12 01:04:10 +02:00 committed by GitHub
commit 1ff117e987
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 220 additions and 133 deletions

View File

@ -5,8 +5,8 @@ use rustc_driver::abort_on_err;
use rustc_errors::emitter::{Emitter, EmitterWriter};
use rustc_errors::json::JsonEmitter;
use rustc_feature::UnstableFeatures;
use rustc_hir::def::{Namespace::TypeNS, Res};
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, CRATE_DEF_INDEX, LOCAL_CRATE};
use rustc_hir::def::Res;
use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, LOCAL_CRATE};
use rustc_hir::HirId;
use rustc_hir::{
intravisit::{self, NestedVisitorMap, Visitor},
@ -356,55 +356,7 @@ crate fn create_resolver<'a>(
let (krate, resolver, _) = &*parts;
let resolver = resolver.borrow().clone();
// Letting the resolver escape at the end of the function leads to inconsistencies between the
// crates the TyCtxt sees and the resolver sees (because the resolver could load more crates
// after escaping). Hopefully `IntraLinkCrateLoader` gets all the crates we need ...
struct IntraLinkCrateLoader {
current_mod: DefId,
resolver: Rc<RefCell<interface::BoxedResolver>>,
}
impl ast::visit::Visitor<'_> for IntraLinkCrateLoader {
fn visit_attribute(&mut self, attr: &ast::Attribute) {
use crate::html::markdown::{markdown_links, MarkdownLink};
use crate::passes::collect_intra_doc_links::Disambiguator;
if let Some(doc) = attr.doc_str() {
for MarkdownLink { link, .. } in markdown_links(&doc.as_str()) {
// FIXME: this misses a *lot* of the preprocessing done in collect_intra_doc_links
// I think most of it shouldn't be necessary since we only need the crate prefix?
let path_str = match Disambiguator::from_str(&link) {
Ok(x) => x.map_or(link.as_str(), |(_, p)| p),
Err(_) => continue,
};
self.resolver.borrow_mut().access(|resolver| {
let _ = resolver.resolve_str_path_error(
attr.span,
path_str,
TypeNS,
self.current_mod,
);
});
}
}
ast::visit::walk_attribute(self, attr);
}
fn visit_item(&mut self, item: &ast::Item) {
use rustc_ast_lowering::ResolverAstLowering;
if let ast::ItemKind::Mod(..) = item.kind {
let new_mod =
self.resolver.borrow_mut().access(|resolver| resolver.local_def_id(item.id));
let old_mod = mem::replace(&mut self.current_mod, new_mod.to_def_id());
ast::visit::walk_item(self, item);
self.current_mod = old_mod;
} else {
ast::visit::walk_item(self, item);
}
}
}
let crate_id = LocalDefId { local_def_index: CRATE_DEF_INDEX }.to_def_id();
let mut loader = IntraLinkCrateLoader { current_mod: crate_id, resolver };
let mut loader = crate::passes::collect_intra_doc_links::IntraLinkCrateLoader::new(resolver);
ast::visit::walk_crate(&mut loader, krate);
loader.resolver

View File

@ -39,13 +39,16 @@ use crate::passes::Pass;
use super::span_of_attrs;
mod early;
crate use early::IntraLinkCrateLoader;
crate const COLLECT_INTRA_DOC_LINKS: Pass = Pass {
name: "collect-intra-doc-links",
run: collect_intra_doc_links,
description: "resolves intra-doc links",
};
crate fn collect_intra_doc_links(krate: Crate, cx: &mut DocContext<'_>) -> Crate {
fn collect_intra_doc_links(krate: Crate, cx: &mut DocContext<'_>) -> Crate {
LinkCollector {
cx,
mod_ids: Vec::new(),
@ -892,6 +895,117 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
}
}
enum PreprocessingError<'a> {
Anchor(AnchorFailure),
Disambiguator(Range<usize>, String),
Resolution(ResolutionFailure<'a>, String, Option<Disambiguator>),
}
impl From<AnchorFailure> for PreprocessingError<'_> {
fn from(err: AnchorFailure) -> Self {
Self::Anchor(err)
}
}
struct PreprocessingInfo {
path_str: String,
disambiguator: Option<Disambiguator>,
extra_fragment: Option<String>,
link_text: String,
}
/// Returns:
/// - `None` if the link should be ignored.
/// - `Some(Err)` if the link should emit an error
/// - `Some(Ok)` if the link is valid
///
/// `link_buffer` is needed for lifetime reasons; it will always be overwritten and the contents ignored.
fn preprocess_link<'a>(
ori_link: &'a MarkdownLink,
) -> Option<Result<PreprocessingInfo, PreprocessingError<'a>>> {
// [] is mostly likely not supposed to be a link
if ori_link.link.is_empty() {
return None;
}
// Bail early for real links.
if ori_link.link.contains('/') {
return None;
}
let stripped = ori_link.link.replace("`", "");
let mut parts = stripped.split('#');
let link = parts.next().unwrap();
if link.trim().is_empty() {
// This is an anchor to an element of the current page, nothing to do in here!
return None;
}
let extra_fragment = parts.next();
if parts.next().is_some() {
// A valid link can't have multiple #'s
return Some(Err(AnchorFailure::MultipleAnchors.into()));
}
// Parse and strip the disambiguator from the link, if present.
let (path_str, disambiguator) = match Disambiguator::from_str(&link) {
Ok(Some((d, path))) => (path.trim(), Some(d)),
Ok(None) => (link.trim(), None),
Err((err_msg, relative_range)) => {
// Only report error if we would not have ignored this link. See issue #83859.
if !should_ignore_link_with_disambiguators(link) {
let no_backticks_range = range_between_backticks(&ori_link);
let disambiguator_range = (no_backticks_range.start + relative_range.start)
..(no_backticks_range.start + relative_range.end);
return Some(Err(PreprocessingError::Disambiguator(disambiguator_range, err_msg)));
} else {
return None;
}
}
};
if should_ignore_link(path_str) {
return None;
}
// We stripped `()` and `!` when parsing the disambiguator.
// Add them back to be displayed, but not prefix disambiguators.
let link_text =
disambiguator.map(|d| d.display_for(path_str)).unwrap_or_else(|| path_str.to_owned());
// Strip generics from the path.
let path_str = if path_str.contains(['<', '>'].as_slice()) {
match strip_generics_from_path(&path_str) {
Ok(path) => path,
Err(err_kind) => {
debug!("link has malformed generics: {}", path_str);
return Some(Err(PreprocessingError::Resolution(
err_kind,
path_str.to_owned(),
disambiguator,
)));
}
}
} else {
path_str.to_owned()
};
// Sanity check to make sure we don't have any angle brackets after stripping generics.
assert!(!path_str.contains(['<', '>'].as_slice()));
// The link is not an intra-doc link if it still contains spaces after stripping generics.
if path_str.contains(' ') {
return None;
}
Some(Ok(PreprocessingInfo {
path_str,
disambiguator,
extra_fragment: extra_fragment.map(String::from),
link_text,
}))
}
impl LinkCollector<'_, '_> {
/// This is the entry point for resolving an intra-doc link.
///
@ -907,16 +1021,6 @@ impl LinkCollector<'_, '_> {
) -> Option<ItemLink> {
trace!("considering link '{}'", ori_link.link);
// Bail early for real links.
if ori_link.link.contains('/') {
return None;
}
// [] is mostly likely not supposed to be a link
if ori_link.link.is_empty() {
return None;
}
let diag_info = DiagnosticInfo {
item,
dox,
@ -924,47 +1028,29 @@ impl LinkCollector<'_, '_> {
link_range: ori_link.range.clone(),
};
let link = ori_link.link.replace("`", "");
let no_backticks_range = range_between_backticks(&ori_link);
let parts = link.split('#').collect::<Vec<_>>();
let (link, extra_fragment) = if parts.len() > 2 {
// A valid link can't have multiple #'s
anchor_failure(self.cx, diag_info, AnchorFailure::MultipleAnchors);
return None;
} else if parts.len() == 2 {
if parts[0].trim().is_empty() {
// This is an anchor to an element of the current page, nothing to do in here!
return None;
}
(parts[0], Some(parts[1].to_owned()))
} else {
(parts[0], None)
};
// Parse and strip the disambiguator from the link, if present.
let (mut path_str, disambiguator) = match Disambiguator::from_str(&link) {
Ok(Some((d, path))) => (path.trim(), Some(d)),
Ok(None) => (link.trim(), None),
Err((err_msg, relative_range)) => {
if !should_ignore_link_with_disambiguators(link) {
// Only report error if we would not have ignored this link.
// See issue #83859.
let disambiguator_range = (no_backticks_range.start + relative_range.start)
..(no_backticks_range.start + relative_range.end);
disambiguator_error(self.cx, diag_info, disambiguator_range, &err_msg);
let PreprocessingInfo { path_str, disambiguator, extra_fragment, link_text } =
match preprocess_link(&ori_link)? {
Ok(x) => x,
Err(err) => {
match err {
PreprocessingError::Anchor(err) => anchor_failure(self.cx, diag_info, err),
PreprocessingError::Disambiguator(range, msg) => {
disambiguator_error(self.cx, diag_info, range, &msg)
}
PreprocessingError::Resolution(err, path_str, disambiguator) => {
resolution_failure(
self,
diag_info,
&path_str,
disambiguator,
smallvec![err],
);
}
}
return None;
}
return None;
}
};
if should_ignore_link(path_str) {
return None;
}
// We stripped `()` and `!` when parsing the disambiguator.
// Add them back to be displayed, but not prefix disambiguators.
let link_text =
disambiguator.map(|d| d.display_for(path_str)).unwrap_or_else(|| path_str.to_owned());
};
let mut path_str = &*path_str;
// In order to correctly resolve intra-doc links we need to
// pick a base AST node to work from. If the documentation for
@ -1029,39 +1115,12 @@ impl LinkCollector<'_, '_> {
module_id = DefId { krate, index: CRATE_DEF_INDEX };
}
// Strip generics from the path.
let stripped_path_string;
if path_str.contains(['<', '>'].as_slice()) {
stripped_path_string = match strip_generics_from_path(path_str) {
Ok(path) => path,
Err(err_kind) => {
debug!("link has malformed generics: {}", path_str);
resolution_failure(
self,
diag_info,
path_str,
disambiguator,
smallvec![err_kind],
);
return None;
}
};
path_str = &stripped_path_string;
}
// Sanity check to make sure we don't have any angle brackets after stripping generics.
assert!(!path_str.contains(['<', '>'].as_slice()));
// The link is not an intra-doc link if it still contains spaces after stripping generics.
if path_str.contains(' ') {
return None;
}
let (mut res, mut fragment) = self.resolve_with_disambiguator_cached(
ResolutionInfo {
module_id,
dis: disambiguator,
path_str: path_str.to_owned(),
extra_fragment,
extra_fragment: extra_fragment.map(String::from),
},
diag_info.clone(), // this struct should really be Copy, but Range is not :(
matches!(ori_link.kind, LinkType::Reference | LinkType::Shortcut),
@ -1438,7 +1497,7 @@ fn should_ignore_link(path_str: &str) -> bool {
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
/// Disambiguators for a link.
crate enum Disambiguator {
enum Disambiguator {
/// `prim@`
///
/// This is buggy, see <https://github.com/rust-lang/rust/pull/77875#discussion_r503583103>
@ -1467,7 +1526,7 @@ impl Disambiguator {
/// This returns `Ok(Some(...))` if a disambiguator was found,
/// `Ok(None)` if no disambiguator was found, or `Err(...)`
/// if there was a problem with the disambiguator.
crate fn from_str(link: &str) -> Result<Option<(Self, &str)>, (String, Range<usize>)> {
fn from_str(link: &str) -> Result<Option<(Self, &str)>, (String, Range<usize>)> {
use Disambiguator::{Kind, Namespace as NS, Primitive};
if let Some(idx) = link.find('@') {

View File

@ -0,0 +1,63 @@
use rustc_ast as ast;
use rustc_hir::def::Namespace::TypeNS;
use rustc_hir::def_id::{DefId, LocalDefId, CRATE_DEF_INDEX};
use rustc_interface::interface;
use std::cell::RefCell;
use std::mem;
use std::rc::Rc;
// Letting the resolver escape at the end of the function leads to inconsistencies between the
// crates the TyCtxt sees and the resolver sees (because the resolver could load more crates
// after escaping). Hopefully `IntraLinkCrateLoader` gets all the crates we need ...
crate struct IntraLinkCrateLoader {
current_mod: DefId,
crate resolver: Rc<RefCell<interface::BoxedResolver>>,
}
impl IntraLinkCrateLoader {
crate fn new(resolver: Rc<RefCell<interface::BoxedResolver>>) -> Self {
let crate_id = LocalDefId { local_def_index: CRATE_DEF_INDEX }.to_def_id();
Self { current_mod: crate_id, resolver }
}
}
impl ast::visit::Visitor<'_> for IntraLinkCrateLoader {
fn visit_attribute(&mut self, attr: &ast::Attribute) {
use crate::html::markdown::markdown_links;
use crate::passes::collect_intra_doc_links::preprocess_link;
if let Some(doc) = attr.doc_str() {
for link in markdown_links(&doc.as_str()) {
let path_str = if let Some(Ok(x)) = preprocess_link(&link) {
x.path_str
} else {
continue;
};
self.resolver.borrow_mut().access(|resolver| {
let _ = resolver.resolve_str_path_error(
attr.span,
&path_str,
TypeNS,
self.current_mod,
);
});
}
}
ast::visit::walk_attribute(self, attr);
}
fn visit_item(&mut self, item: &ast::Item) {
use rustc_ast_lowering::ResolverAstLowering;
if let ast::ItemKind::Mod(..) = item.kind {
let new_mod =
self.resolver.borrow_mut().access(|resolver| resolver.local_def_id(item.id));
let old_mod = mem::replace(&mut self.current_mod, new_mod.to_def_id());
ast::visit::walk_item(self, item);
self.current_mod = old_mod;
} else {
ast::visit::walk_item(self, item);
}
}
}

View File

@ -0,0 +1 @@
// intentionally empty

View File

@ -0,0 +1 @@
// intentionally empty

View File

@ -1,8 +1,19 @@
// This test is just a little cursed.
// aux-build:issue-66159-1.rs
// aux-crate:priv:issue_66159_1=issue-66159-1.rs
// aux-build:empty.rs
// aux-crate:priv:empty=empty.rs
// aux-build:empty2.rs
// aux-crate:priv:empty2=empty2.rs
// build-aux-docs
// compile-flags:-Z unstable-options
// compile-flags:-Z unstable-options --edition 2018
// @has extern_crate_only_used_in_link/index.html
// @has - '//a[@href="../issue_66159_1/struct.Something.html"]' 'issue_66159_1::Something'
//! [issue_66159_1::Something]
// @has - '//a[@href="../empty/index.html"]' 'empty'
//! [`empty`]
// @has - '//a[@href="../empty2/index.html"]' 'empty2'
//! [empty2<x>]