Rollup merge of #83835 - notriddle:sort-index, r=ollie27

rustdoc: sort search index items for compression

This should not affect the appearance of the docs pages themselves.

This makes the pre-compressed search index smaller, thanks to the
empty-string path duplication format, and also the gzipped version,
by giving the algorithm more structure to work with.

    rust$ wc -c search-index-old.js search-index-new.js
    2628334 search-index-old.js
    2586181 search-index-new.js
    5214515 total
    rust$ gzip search-index-*
    rust$ wc -c search-index-old.js.gz search-index-new.js.gz
    239486 search-index-old.js.gz
    237386 search-index-new.js.gz
    476872 total
This commit is contained in:
Yuki Okushi 2021-04-06 06:24:13 +09:00 committed by GitHub
commit 12d007da0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 23 deletions

View File

@ -914,7 +914,7 @@ impl Attributes {
.collect()
}
crate fn get_doc_aliases(&self) -> FxHashSet<String> {
crate fn get_doc_aliases(&self) -> Box<[String]> {
let mut aliases = FxHashSet::default();
for attr in self.other_attrs.lists(sym::doc).filter(|a| a.has_name(sym::alias)) {
@ -931,7 +931,7 @@ impl Attributes {
aliases.insert(attr.value_str().map(|s| s.to_string()).unwrap());
}
}
aliases
aliases.into_iter().collect::<Vec<String>>().into()
}
}

View File

@ -120,10 +120,6 @@ crate struct Cache {
// when gathering trait documentation on a type, hold impls here while
// folding and add them to the cache later on if we find the trait.
orphan_trait_impls: Vec<(DefId, FxHashSet<DefId>, Impl)>,
/// Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias,
/// we need the alias element to have an array of items.
crate aliases: BTreeMap<String, Vec<usize>>,
}
/// This struct is used to wrap the `cache` and `tcx` in order to run `DocFolder`.
@ -309,15 +305,8 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
parent,
parent_idx: None,
search_type: get_index_search_type(&item, &self.empty_cache, self.tcx),
aliases: item.attrs.get_doc_aliases(),
});
for alias in item.attrs.get_doc_aliases() {
self.cache
.aliases
.entry(alias.to_lowercase())
.or_insert(Vec::new())
.push(self.cache.search_index.len() - 1);
}
}
}
(Some(parent), None) if is_inherent_impl_item => {

View File

@ -82,18 +82,31 @@ crate fn build_index<'tcx>(krate: &clean::Crate, cache: &mut Cache, tcx: TyCtxt<
parent: Some(did),
parent_idx: None,
search_type: get_index_search_type(&item, cache, tcx),
aliases: item.attrs.get_doc_aliases(),
});
for alias in item.attrs.get_doc_aliases() {
cache
.aliases
.entry(alias.to_lowercase())
.or_insert(Vec::new())
.push(cache.search_index.len() - 1);
}
}
}
let Cache { ref mut search_index, ref paths, ref mut aliases, .. } = *cache;
let Cache { ref mut search_index, ref paths, .. } = *cache;
// Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias,
// we need the alias element to have an array of items.
let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new();
// Sort search index items. This improves the compressibility of the search index.
search_index.sort_unstable_by(|k1, k2| {
// `sort_unstable_by_key` produces lifetime errors
let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent);
let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent);
std::cmp::Ord::cmp(&k1, &k2)
});
// Set up alias indexes.
for (i, item) in search_index.iter().enumerate() {
for alias in &item.aliases[..] {
aliases.entry(alias.to_lowercase()).or_insert(Vec::new()).push(i);
}
}
// Reduce `DefId` in paths into smaller sequential numbers,
// and prune the paths that do not appear in the index.
@ -201,7 +214,7 @@ crate fn build_index<'tcx>(krate: &clean::Crate, cache: &mut Cache, tcx: TyCtxt<
doc: crate_doc,
items: crate_items,
paths: crate_paths,
aliases,
aliases: &aliases,
})
.expect("failed serde conversion")
// All these `replace` calls are because we have to go through JS string for JSON content.

View File

@ -164,6 +164,7 @@ crate struct IndexItem {
crate parent: Option<DefId>,
crate parent_idx: Option<usize>,
crate search_type: Option<IndexItemFunctionType>,
crate aliases: Box<[String]>,
}
/// A type used for the search index.