From 513cf8664ab074d0fa540f1e1661193aeff0d358 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Thu, 4 Aug 2022 12:13:16 -0700 Subject: [PATCH] rustdoc: use a more compact encoding for implementors/trait.*.js The exact amount that this reduces the size of an implementors file depends on whether most of the impls are synthetic or not. For `Send`, it reduces the file from 128K to 116K, while for `Clone` it went from 64K to 52K. --- src/librustdoc/html/render/write_shared.rs | 90 ++++++++++++++++++---- src/librustdoc/html/static/js/main.js | 14 +++- 2 files changed, 83 insertions(+), 21 deletions(-) diff --git a/src/librustdoc/html/render/write_shared.rs b/src/librustdoc/html/render/write_shared.rs index 6fb41ff3279..246121bd7e3 100644 --- a/src/librustdoc/html/render/write_shared.rs +++ b/src/librustdoc/html/render/write_shared.rs @@ -1,5 +1,4 @@ use std::ffi::OsStr; -use std::fmt::Write; use std::fs::{self, File}; use std::io::prelude::*; use std::io::{self, BufReader}; @@ -10,7 +9,6 @@ use std::sync::LazyLock as Lazy; use itertools::Itertools; use rustc_data_structures::flock; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; -use serde::Serialize; use super::{collect_paths_for_type, ensure_trailing_slash, Context, BASIC_KEYWORDS}; use crate::clean::Crate; @@ -284,25 +282,43 @@ pub(super) fn write_shared( cx.write_shared(SharedResource::Unversioned { name }, contents, &options.emit)?; } - fn collect(path: &Path, krate: &str, key: &str) -> io::Result<(Vec, Vec)> { + /// Read a file and return all lines that match the `"{crate}":{data},` format, + /// and return a tuple `(Vec, Vec)`. + /// + /// This forms the payload of files that look like this: + /// + /// ```javascript + /// var data = { + /// "{crate1}":{data}, + /// "{crate2}":{data} + /// }; + /// use_data(data); + /// ``` + /// + /// The file needs to be formatted so that *only crate data lines start with `"`*. + fn collect(path: &Path, krate: &str) -> io::Result<(Vec, Vec)> { let mut ret = Vec::new(); let mut krates = Vec::new(); if path.exists() { - let prefix = format!(r#"{}["{}"]"#, key, krate); + let prefix = format!("\"{}\"", krate); for line in BufReader::new(File::open(path)?).lines() { let line = line?; - if !line.starts_with(key) { + if !line.starts_with('"') { continue; } if line.starts_with(&prefix) { continue; } - ret.push(line.to_string()); + if line.ends_with(",") { + ret.push(line[..line.len() - 1].to_string()); + } else { + // No comma (it's the case for the last added crate line) + ret.push(line.to_string()); + } krates.push( - line[key.len() + 2..] - .split('"') - .next() + line.split('"') + .find(|s| !s.is_empty()) .map(|s| s.to_owned()) .unwrap_or_else(String::new), ); @@ -311,6 +327,20 @@ pub(super) fn write_shared( Ok((ret, krates)) } + /// Read a file and return all lines that match the "{crate}":{data},\ format, + /// and return a tuple `(Vec, Vec)`. + /// + /// This forms the payload of files that look like this: + /// + /// ```javascript + /// var data = JSON.parse('{\ + /// "{crate1}":{data},\ + /// "{crate2}":{data}\ + /// }'); + /// use_data(data); + /// ``` + /// + /// The file needs to be formatted so that *only crate data lines start with `"`*. fn collect_json(path: &Path, krate: &str) -> io::Result<(Vec, Vec)> { let mut ret = Vec::new(); let mut krates = Vec::new(); @@ -526,13 +556,40 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; }, }; - #[derive(Serialize)] struct Implementor { text: String, synthetic: bool, types: Vec, } + impl Implementor { + fn to_js_string(&self) -> String { + fn single_quote_string(s: &str) -> String { + let mut result = String::with_capacity(s.len() + 2); + result.push_str("'"); + for c in s.chars() { + if c == '"' { + result.push_str("\""); + } else { + result.extend(c.escape_default()); + } + } + result.push_str("'"); + result + } + let text_esc = single_quote_string(&self.text); + if self.synthetic { + let types = self.types.iter().map(|type_| single_quote_string(type_)).join(","); + // use `1` to represent a synthetic, because it's fewer bytes than `true` + format!("[{text_esc},1,[{types}]]") + } else { + // The types list is only used for synthetic impls. + // If this changes, `main.js` and `write_shared.rs` both need changed. + format!("[{text_esc}]") + } + } + } + let implementors = imps .iter() .filter_map(|imp| { @@ -563,9 +620,9 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; } let implementors = format!( - r#"implementors["{}"] = {};"#, + r#""{}":[{}]"#, krate.name(cx.tcx()), - serde_json::to_string(&implementors).unwrap() + implementors.iter().map(Implementor::to_js_string).join(",") ); let mut mydst = dst.clone(); @@ -576,16 +633,15 @@ if (typeof exports !== 'undefined') {exports.searchIndex = searchIndex}; mydst.push(&format!("{}.{}.js", remote_item_type, remote_path[remote_path.len() - 1])); let (mut all_implementors, _) = - try_err!(collect(&mydst, krate.name(cx.tcx()).as_str(), "implementors"), &mydst); + try_err!(collect(&mydst, krate.name(cx.tcx()).as_str()), &mydst); all_implementors.push(implementors); // Sort the implementors by crate so the file will be generated // identically even with rustdoc running in parallel. all_implementors.sort(); - let mut v = String::from("(function() {var implementors = {};\n"); - for implementor in &all_implementors { - writeln!(v, "{}", *implementor).unwrap(); - } + let mut v = String::from("(function() {var implementors = {\n"); + v.push_str(&all_implementors.join(",\n")); + v.push_str("\n};"); v.push_str( "if (window.register_implementors) {\ window.register_implementors(implementors);\ diff --git a/src/librustdoc/html/static/js/main.js b/src/librustdoc/html/static/js/main.js index 0702b2b0b7c..2e05c4be2f3 100644 --- a/src/librustdoc/html/static/js/main.js +++ b/src/librustdoc/html/static/js/main.js @@ -501,6 +501,10 @@ function loadCss(cssFileName) { const synthetic_implementors = document.getElementById("synthetic-implementors-list"); const inlined_types = new Set(); + const TEXT_IDX = 0; + const SYNTHETIC_IDX = 1; + const TYPES_IDX = 2; + if (synthetic_implementors) { // This `inlined_types` variable is used to avoid having the same implementation // showing up twice. For example "String" in the "Sync" doc page. @@ -536,10 +540,12 @@ function loadCss(cssFileName) { struct_loop: for (const struct of structs) { - const list = struct.synthetic ? synthetic_implementors : implementors; + const list = struct[SYNTHETIC_IDX] ? synthetic_implementors : implementors; - if (struct.synthetic) { - for (const struct_type of struct.types) { + // The types list is only used for synthetic impls. + // If this changes, `main.js` and `write_shared.rs` both need changed. + if (struct[SYNTHETIC_IDX]) { + for (const struct_type of struct[TYPES_IDX]) { if (inlined_types.has(struct_type)) { continue struct_loop; } @@ -548,7 +554,7 @@ function loadCss(cssFileName) { } const code = document.createElement("h3"); - code.innerHTML = struct.text; + code.innerHTML = struct[TEXT_IDX]; addClass(code, "code-header"); addClass(code, "in-band");