rustdoc-search: single result for items with multiple paths

This change uses the same "exact" paths as trait implementors
and type alias inlining to track items with multiple
reachable paths. This way, if you search for `vec`, you get
only the `std` exports of it, and not the one from `alloc`.

It still includes all the items in the search index so that
you can search for them by all available paths. For example,
try `core::option` and `std::option`, and notice that the
results page doesn't show duplicates, but still shows all
the items in their respective crates.
This commit is contained in:
Michael Howell 2024-01-12 15:32:08 -07:00
parent ab5bda1aa7
commit f36c5af359
13 changed files with 313 additions and 25 deletions

View File

@ -348,16 +348,28 @@ fn is_from_private_dep(tcx: TyCtxt<'_>, cache: &Cache, def_id: DefId) -> bool {
{
let desc =
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
// For searching purposes, a re-export is a duplicate if:
//
// - It's either an inline, or a true re-export
// - It's got the same name
// - Both of them have the same exact path
let defid = (match &*item.kind {
&clean::ItemKind::ImportItem(ref import) => import.source.did,
_ => None,
})
.or_else(|| item.item_id.as_def_id());
// In case this is a field from a tuple struct, we don't add it into
// the search index because its name is something like "0", which is
// not useful for rustdoc search.
self.cache.search_index.push(IndexItem {
ty,
defid,
name: s,
path: join_with_double_colon(path),
desc,
parent,
parent_idx: None,
exact_path: None,
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
self.cache.parent_stack.last()
{

View File

@ -111,11 +111,13 @@ pub(crate) enum RenderMode {
#[derive(Debug)]
pub(crate) struct IndexItem {
pub(crate) ty: ItemType,
pub(crate) defid: Option<DefId>,
pub(crate) name: Symbol,
pub(crate) path: String,
pub(crate) desc: String,
pub(crate) parent: Option<DefId>,
pub(crate) parent_idx: Option<isize>,
pub(crate) exact_path: Option<String>,
pub(crate) impl_id: Option<DefId>,
pub(crate) search_type: Option<IndexItemFunctionType>,
pub(crate) aliases: Box<[Symbol]>,

View File

@ -59,10 +59,13 @@ pub(crate) fn build_index<'tcx>(
cache: &mut Cache,
tcx: TyCtxt<'tcx>,
) -> SerializedSearchIndex {
// Maps from ID to position in the `crate_paths` array.
let mut itemid_to_pathid = FxHashMap::default();
let mut primitives = FxHashMap::default();
let mut associated_types = FxHashMap::default();
let mut crate_paths = vec![];
// item type, display path, re-exported internal path
let mut crate_paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)> = vec![];
// Attach all orphan items to the type's definition if the type
// has since been learned.
@ -72,11 +75,13 @@ pub(crate) fn build_index<'tcx>(
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
cache.search_index.push(IndexItem {
ty: item.type_(),
defid: item.item_id.as_def_id(),
name: item.name.unwrap(),
path: join_with_double_colon(&fqp[..fqp.len() - 1]),
desc,
parent: Some(parent),
parent_idx: None,
exact_path: None,
impl_id,
search_type: get_function_type_for_search(
item,
@ -126,9 +131,10 @@ fn insert_into_map<F: std::hash::Hash + Eq>(
map: &mut FxHashMap<F, isize>,
itemid: F,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
item_type: ItemType,
path: &[Symbol],
exact_path: Option<&[Symbol]>,
) -> RenderTypeId {
match map.entry(itemid) {
Entry::Occupied(entry) => RenderTypeId::Index(*entry.get()),
@ -136,7 +142,11 @@ fn insert_into_map<F: std::hash::Hash + Eq>(
let pathid = *lastpathid;
entry.insert(pathid);
*lastpathid += 1;
crate_paths.push((item_type, path.to_vec()));
crate_paths.push((
item_type,
path.to_vec(),
exact_path.map(|path| path.to_vec()),
));
RenderTypeId::Index(pathid)
}
}
@ -149,14 +159,22 @@ fn convert_render_type_id(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) -> Option<RenderTypeId> {
let Cache { ref paths, ref external_paths, .. } = *cache;
let Cache { ref paths, ref external_paths, ref exact_paths, .. } = *cache;
match id {
RenderTypeId::DefId(defid) => {
if let Some(&(ref fqp, item_type)) =
paths.get(&defid).or_else(|| external_paths.get(&defid))
{
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
// re-exports only count if the name is exactly the same
// this is a size optimization, as well as a DWIM attempt
// since if the names are not the same, the intent probably
// isn't, either
.filter(|fqp| fqp.last() == fqp.last());
Some(insert_into_map(
itemid_to_pathid,
ItemId::DefId(defid),
@ -164,6 +182,7 @@ fn convert_render_type_id(
crate_paths,
item_type,
fqp,
exact_fqp.map(|x| &x[..]).filter(|exact_fqp| exact_fqp != fqp),
))
} else {
None
@ -178,6 +197,7 @@ fn convert_render_type_id(
crate_paths,
ItemType::Primitive,
&[sym],
None,
))
}
RenderTypeId::Index(_) => Some(id),
@ -188,6 +208,7 @@ fn convert_render_type_id(
crate_paths,
ItemType::AssocType,
&[sym],
None,
)),
}
}
@ -199,7 +220,7 @@ fn convert_render_type(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) {
if let Some(generics) = &mut ty.generics {
for item in generics {
@ -296,7 +317,7 @@ fn convert_render_type(
}
}
let Cache { ref paths, .. } = *cache;
let Cache { ref paths, ref exact_paths, ref external_paths, .. } = *cache;
// Then, on parent modules
let crate_items: Vec<&IndexItem> = search_index
@ -311,7 +332,13 @@ fn convert_render_type(
lastpathid += 1;
if let Some(&(ref fqp, short)) = paths.get(&defid) {
crate_paths.push((short, fqp.clone()));
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
.filter(|exact_fqp| {
exact_fqp.last() == Some(&item.name) && *exact_fqp != fqp
});
crate_paths.push((short, fqp.clone(), exact_fqp.cloned()));
Some(pathid)
} else {
None
@ -319,6 +346,40 @@ fn convert_render_type(
}
});
if let Some(defid) = item.defid
&& item.parent_idx.is_none()
{
// If this is a re-export, retain the original path.
// Associated items don't use this.
// Their parent carries the exact fqp instead.
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp));
item.exact_path = exact_fqp.and_then(|fqp| {
// re-exports only count if the name is exactly the same
// this is a size optimization, as well as a DWIM attempt
// since if the names are not the same, the intent probably
// isn't, either
if fqp.last() != Some(&item.name) {
return None;
}
let path =
if item.ty == ItemType::Macro && tcx.has_attr(defid, sym::macro_export) {
// `#[macro_export]` always exports to the crate root.
tcx.crate_name(defid.krate).to_string()
} else {
if fqp.len() < 2 {
return None;
}
join_with_double_colon(&fqp[..fqp.len() - 1])
};
if path == item.path {
return None;
}
Some(path)
});
}
// Omit the parent path if it is same to that of the prior item.
if lastpath == &item.path {
item.path.clear();
@ -356,7 +417,7 @@ fn convert_render_type(
struct CrateData<'a> {
items: Vec<&'a IndexItem>,
paths: Vec<(ItemType, Vec<Symbol>)>,
paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
// The String is alias name and the vec is the list of the elements with this alias.
//
// To be noted: the `usize` elements are indexes to `items`.
@ -374,6 +435,7 @@ struct Paths {
ty: ItemType,
name: Symbol,
path: Option<usize>,
exact_path: Option<usize>,
}
impl Serialize for Paths {
@ -387,6 +449,10 @@ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
if let Some(ref path) = self.path {
seq.serialize_element(path)?;
}
if let Some(ref path) = self.exact_path {
assert!(self.path.is_some());
seq.serialize_element(path)?;
}
seq.end()
}
}
@ -409,14 +475,39 @@ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
mod_paths.insert(&item.path, index);
}
let mut paths = Vec::with_capacity(self.paths.len());
for (ty, path) in &self.paths {
for (ty, path, exact) in &self.paths {
if path.len() < 2 {
paths.push(Paths { ty: *ty, name: path[0], path: None });
paths.push(Paths { ty: *ty, name: path[0], path: None, exact_path: None });
continue;
}
let full_path = join_with_double_colon(&path[..path.len() - 1]);
let full_exact_path = exact
.as_ref()
.filter(|exact| exact.last() == path.last() && exact.len() >= 2)
.map(|exact| join_with_double_colon(&exact[..exact.len() - 1]));
let exact_path = extra_paths.len() + self.items.len();
let exact_path = full_exact_path.as_ref().map(|full_exact_path| match extra_paths
.entry(full_exact_path.clone())
{
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
if let Some(index) = mod_paths.get(&full_exact_path) {
return *index;
}
entry.insert(exact_path);
if !revert_extra_paths.contains_key(&exact_path) {
revert_extra_paths.insert(exact_path, full_exact_path.clone());
}
exact_path
}
});
if let Some(index) = mod_paths.get(&full_path) {
paths.push(Paths { ty: *ty, name: *path.last().unwrap(), path: Some(*index) });
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*index),
exact_path,
});
continue;
}
// It means it comes from an external crate so the item and its path will be
@ -424,28 +515,54 @@ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
//
// `index` is put after the last `mod_paths`
let index = extra_paths.len() + self.items.len();
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path.clone());
}
match extra_paths.entry(full_path) {
match extra_paths.entry(full_path.clone()) {
Entry::Occupied(entry) => {
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*entry.get()),
exact_path,
});
}
Entry::Vacant(entry) => {
entry.insert(index);
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path);
}
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(index),
exact_path,
});
}
}
}
// Direct exports use adjacent arrays for the current crate's items,
// but re-exported exact paths don't.
let mut re_exports = Vec::new();
for (item_index, item) in self.items.iter().enumerate() {
if let Some(exact_path) = item.exact_path.as_ref() {
if let Some(path_index) = mod_paths.get(&exact_path) {
re_exports.push((item_index, *path_index));
} else {
let path_index = extra_paths.len() + self.items.len();
let path_index = match extra_paths.entry(exact_path.clone()) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
entry.insert(path_index);
if !revert_extra_paths.contains_key(&path_index) {
revert_extra_paths.insert(path_index, exact_path.clone());
}
path_index
}
};
re_exports.push((item_index, path_index));
}
}
}
let mut names = Vec::with_capacity(self.items.len());
let mut types = String::with_capacity(self.items.len());
let mut full_paths = Vec::with_capacity(self.items.len());
@ -501,6 +618,7 @@ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
crate_data.serialize_field("f", &functions)?;
crate_data.serialize_field("D", &self.desc_index)?;
crate_data.serialize_field("p", &paths)?;
crate_data.serialize_field("r", &re_exports)?;
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?;
crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?;

View File

@ -79,6 +79,7 @@ const longItemTypes = [
// used for special search precedence
const TY_GENERIC = itemTypes.indexOf("generic");
const TY_IMPORT = itemTypes.indexOf("import");
const ROOT_PATH = typeof window !== "undefined" ? window.rootPath : "../";
// Hard limit on how deep to recurse into generics when doing type-driven search.
@ -1324,14 +1325,23 @@ function initSearch(rawSearchIndex) {
obj.dist = result.dist;
const res = buildHrefAndPath(obj);
obj.displayPath = pathSplitter(res[0]);
obj.fullPath = obj.displayPath + obj.name;
// To be sure than it some items aren't considered as duplicate.
obj.fullPath += "|" + obj.ty;
// To be sure than it some items aren't considered as duplicate.
obj.fullPath = res[2] + "|" + obj.ty;
if (duplicates.has(obj.fullPath)) {
continue;
}
// Exports are specifically not shown if the items they point at
// are already in the results.
if (obj.ty === TY_IMPORT && duplicates.has(res[2])) {
continue;
}
if (duplicates.has(res[2] + "|" + TY_IMPORT)) {
continue;
}
duplicates.add(obj.fullPath);
duplicates.add(res[2]);
obj.href = res[1];
out.push(obj);
@ -2085,6 +2095,7 @@ function initSearch(rawSearchIndex) {
path: item.path,
descShard: item.descShard,
descIndex: item.descIndex,
exactPath: item.exactPath,
ty: item.ty,
parent: item.parent,
type: item.type,
@ -2538,6 +2549,7 @@ function initSearch(rawSearchIndex) {
const type = itemTypes[item.ty];
const name = item.name;
let path = item.path;
let exactPath = item.exactPath;
if (type === "mod") {
displayPath = path + "::";
@ -2559,6 +2571,7 @@ function initSearch(rawSearchIndex) {
const parentType = itemTypes[myparent.ty];
let pageType = parentType;
let pageName = myparent.name;
exactPath = `${myparent.exactPath}::${myparent.name}`;
if (parentType === "primitive") {
displayPath = myparent.name + "::";
@ -2587,7 +2600,7 @@ function initSearch(rawSearchIndex) {
href = ROOT_PATH + item.path.replace(/::/g, "/") +
"/" + type + "." + name + ".html";
}
return [displayPath, href];
return [displayPath, href, `${exactPath}::${name}`];
}
function pathSplitter(path) {
@ -2980,6 +2993,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
id: pathIndex,
ty: TY_GENERIC,
path: null,
exactPath: null,
generics,
bindings,
};
@ -2989,6 +3003,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
id: null,
ty: null,
path: null,
exactPath: null,
generics,
bindings,
};
@ -2998,6 +3013,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
id: buildTypeMapIndex(item.name, isAssocType),
ty: item.ty,
path: item.path,
exactPath: item.exactPath,
generics,
bindings,
};
@ -3453,6 +3469,8 @@ ${item.displayPath}<span class="${type}">${name}</span>\
path: "",
descShard,
descIndex,
exactPath: "",
desc: crateCorpus.doc,
parent: undefined,
type: null,
id,
@ -3478,6 +3496,9 @@ ${item.displayPath}<span class="${type}">${name}</span>\
// i.e. if indices 4 and 11 are present, but 5-10 and 12-13 are not present,
// 5-10 will fall back to the path for 4 and 12-13 will fall back to the path for 11
const itemPaths = new Map(crateCorpus.q);
// An array of [(Number) item index, (Number) path index]
// Used to de-duplicate inlined and re-exported stuff
const itemReexports = new Map(crateCorpus.r);
// an array of (Number) the parent path index + 1 to `paths`, or 0 if none
const itemParentIdxs = crateCorpus.i;
// a map Number, string for impl disambiguators
@ -3511,9 +3532,10 @@ ${item.displayPath}<span class="${type}">${name}</span>\
path = itemPaths.has(elem[2]) ? itemPaths.get(elem[2]) : lastPath;
lastPath = path;
}
const exactPath = elem.length > 3 ? itemPaths.get(elem[3]) : path;
lowercasePaths.push({ty: ty, name: name.toLowerCase(), path: path});
paths[i] = {ty: ty, name: name, path: path};
lowercasePaths.push({ty, name: name.toLowerCase(), path, exactPath});
paths[i] = {ty, name, path, exactPath};
}
// convert `item*` into an object form, and construct word indices.
@ -3572,6 +3594,7 @@ ${item.displayPath}<span class="${type}">${name}</span>\
path,
descShard,
descIndex,
exactPath: itemReexports.has(i) ? itemPaths.get(itemReexports.get(i)) : path,
parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined,
type,
id,

View File

@ -3,17 +3,47 @@ const EXPECTED = [
'query': 'Vec::new',
'others': [
{ 'path': 'std::vec::Vec', 'name': 'new' },
{ 'path': 'alloc::vec::Vec', 'name': 'new' },
{ 'path': 'std::vec::Vec', 'name': 'new_in' },
{ 'path': 'alloc::vec::Vec', 'name': 'new_in' },
],
},
{
'query': 'prelude::vec',
'others': [
{ 'path': 'std::prelude::rust_2024', 'name': 'Vec' },
],
},
{
'query': 'Vec new',
'others': [
{ 'path': 'std::vec::Vec', 'name': 'new' },
{ 'path': 'alloc::vec::Vec', 'name': 'new' },
{ 'path': 'std::vec::Vec', 'name': 'new_in' },
],
},
{
'query': 'std::Vec::new',
'others': [
{ 'path': 'std::vec::Vec', 'name': 'new' },
{ 'path': 'std::vec::Vec', 'name': 'new_in' },
],
},
{
'query': 'std Vec new',
'others': [
{ 'path': 'std::vec::Vec', 'name': 'new' },
{ 'path': 'std::vec::Vec', 'name': 'new_in' },
],
},
{
'query': 'alloc::Vec::new',
'others': [
{ 'path': 'alloc::vec::Vec', 'name': 'new' },
{ 'path': 'alloc::vec::Vec', 'name': 'new_in' },
],
},
{
'query': 'alloc Vec new',
'others': [
{ 'path': 'alloc::vec::Vec', 'name': 'new' },
{ 'path': 'alloc::vec::Vec', 'name': 'new_in' },
],
},

View File

@ -0,0 +1,7 @@
#[macro_use]
mod hidden_macro_module {
#[macro_export]
macro_rules! vec {
() => {};
}
}

View File

@ -0,0 +1,11 @@
// exact-check
const EXPECTED = [
{
'query': 'vec',
'others': [
{ 'path': 'foo', 'name': 'vec', 'exactPath': 'macro_in_module' },
{ 'path': 'foo', 'name': 'myspecialvec', 'exactPath': 'foo' },
],
},
];

View File

@ -0,0 +1,15 @@
//@ aux-crate: macro_in_module=macro-in-module.rs
#![crate_name="foo"]
extern crate macro_in_module;
// Test case based on the relationship between alloc and std.
#[doc(inline)]
pub use macro_in_module::vec;
#[macro_use]
mod hidden_macro_module {
#[macro_export]
macro_rules! myspecialvec {
() => {};
}
}

View File

@ -0,0 +1,16 @@
// exact-check
const EXPECTED = [
{
'query': 'Subscriber dostuff',
'others': [
{ 'path': 'foo::fmt::Subscriber', 'name': 'dostuff' },
],
},
{
'query': 'AnotherOne dostuff',
'others': [
{ 'path': 'foo::AnotherOne', 'name': 'dostuff' },
],
},
];

View File

@ -0,0 +1,18 @@
// This test enforces that the (renamed) reexports are present in the search results.
#![crate_name="foo"]
pub mod fmt {
pub struct Subscriber;
impl Subscriber {
pub fn dostuff(&self) {}
}
}
mod foo {
pub struct AnotherOne;
impl AnotherOne {
pub fn dostuff(&self) {}
}
}
pub use foo::AnotherOne;
pub use fmt::Subscriber;

View File

@ -0,0 +1,22 @@
// exact-check
const EXPECTED = [
{
'query': 'Subscriber',
'others': [
{ 'path': 'foo', 'name': 'Subscriber' },
],
},
{
'query': 'fmt Subscriber',
'others': [
{ 'path': 'foo::fmt', 'name': 'Subscriber' },
],
},
{
'query': 'AnotherOne',
'others': [
{ 'path': 'foo', 'name': 'AnotherOne' },
],
},
];

View File

@ -0,0 +1,12 @@
// This test enforces that the (renamed) reexports are present in the search results.
#![crate_name="foo"]
pub mod fmt {
pub struct Subscriber;
}
mod foo {
pub struct AnotherOne;
}
pub use foo::AnotherOne;
pub use fmt::Subscriber;

View File

@ -1,4 +1,6 @@
// This test enforces that the (renamed) reexports are present in the search results.
// This is a DWIM case, since renaming the export probably means the intent is also different.
// For the de-duplication case of exactly the same name, see reexport-dedup
pub mod fmt {
pub struct Subscriber;