Auto merge of #78409 - pietroalbini:build-manifest-checksum-cache, r=Mark-Simulacrum
Add checksums cache to build-manifest During the release process we're currently calculating the SHA256 of each file three times: 1. In `build-manifest`, to fill the `hash = "f00"` keys of the manifests. 2. In `promote-release`, to generate the `.sha256` files. 3. In `promote-release`, to generate the `.asc` GPG signatures. Calculations 1. and 2. could be merged into a single one if there was a way for `build-manifest` to pass the checksums it generated over to `promote-release`. Unfortunately calculation 3. can't be merged as GPG requires extra metadata to be hashed. This PR adds support for merging 1. and 2. by creating the `BUILD_MANIFEST_CHECKSUM_CACHE` environment variable, which points to a JSON file storing a cache of all the calculated checksums. `build-manifest` will load it at startup and avoid generating existing checksums, and it will dump its internal checksums cache into it when it exits successfully. This PR also allows to run `build-manifest` multiple times without the need to wait for checksums to be calculated in the following invocations. The speedup will allow to work torwards a fix for https://github.com/rust-lang/promote-release/issues/15 without impacting the release process duration nor our storage costs. This PR can be reviewed commit-by-commit. r? `@Mark-Simulacrum`
This commit is contained in:
commit
717eb6ccea
97
src/tools/build-manifest/src/checksum.rs
Normal file
97
src/tools/build-manifest/src/checksum.rs
Normal file
@ -0,0 +1,97 @@
|
||||
use crate::manifest::{FileHash, Manifest};
|
||||
use rayon::prelude::*;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
|
||||
pub(crate) struct Checksums {
|
||||
cache_path: Option<PathBuf>,
|
||||
collected: Mutex<HashMap<PathBuf, String>>,
|
||||
}
|
||||
|
||||
impl Checksums {
|
||||
pub(crate) fn new() -> Result<Self, Box<dyn Error>> {
|
||||
let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from);
|
||||
|
||||
let mut collected = HashMap::new();
|
||||
if let Some(path) = &cache_path {
|
||||
if path.is_file() {
|
||||
collected = serde_json::from_slice(&std::fs::read(path)?)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Checksums { cache_path, collected: Mutex::new(collected) })
|
||||
}
|
||||
|
||||
pub(crate) fn store_cache(&self) -> Result<(), Box<dyn Error>> {
|
||||
if let Some(path) = &self.cache_path {
|
||||
std::fs::write(path, &serde_json::to_vec(&self.collected)?)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) {
|
||||
let need_checksums = self.find_missing_checksums(manifest);
|
||||
if !need_checksums.is_empty() {
|
||||
self.collect_checksums(&need_checksums);
|
||||
}
|
||||
self.replace_checksums(manifest);
|
||||
}
|
||||
|
||||
fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> {
|
||||
let collected = self.collected.lock().unwrap();
|
||||
let mut need_checksums = HashSet::new();
|
||||
crate::manifest::visit_file_hashes(manifest, |file_hash| {
|
||||
if let FileHash::Missing(path) = file_hash {
|
||||
let path = std::fs::canonicalize(path).unwrap();
|
||||
if !collected.contains_key(&path) {
|
||||
need_checksums.insert(path);
|
||||
}
|
||||
}
|
||||
});
|
||||
need_checksums
|
||||
}
|
||||
|
||||
fn replace_checksums(&mut self, manifest: &mut Manifest) {
|
||||
let collected = self.collected.lock().unwrap();
|
||||
crate::manifest::visit_file_hashes(manifest, |file_hash| {
|
||||
if let FileHash::Missing(path) = file_hash {
|
||||
let path = std::fs::canonicalize(path).unwrap();
|
||||
match collected.get(&path) {
|
||||
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
|
||||
None => panic!("missing hash for file {}", path.display()),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn collect_checksums(&mut self, files: &HashSet<PathBuf>) {
|
||||
let collection_start = Instant::now();
|
||||
println!(
|
||||
"collecting hashes for {} tarballs across {} threads",
|
||||
files.len(),
|
||||
rayon::current_num_threads().min(files.len()),
|
||||
);
|
||||
|
||||
files.par_iter().for_each(|path| match hash(path) {
|
||||
Ok(hash) => {
|
||||
self.collected.lock().unwrap().insert(path.clone(), hash);
|
||||
}
|
||||
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
|
||||
});
|
||||
|
||||
println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed());
|
||||
}
|
||||
}
|
||||
|
||||
fn hash(path: &Path) -> Result<String, Box<dyn Error>> {
|
||||
let mut file = BufReader::new(File::open(path)?);
|
||||
let mut sha256 = Sha256::default();
|
||||
std::io::copy(&mut file, &mut sha256)?;
|
||||
Ok(hex::encode(sha256.finalize()))
|
||||
}
|
@ -4,22 +4,19 @@
|
||||
//! via `x.py dist hash-and-sign`; the cmdline arguments are set up
|
||||
//! by rustbuild (in `src/bootstrap/dist.rs`).
|
||||
|
||||
mod checksum;
|
||||
mod manifest;
|
||||
mod versions;
|
||||
|
||||
use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
|
||||
use crate::checksum::Checksums;
|
||||
use crate::manifest::{Component, Manifest, Package, Rename, Target};
|
||||
use crate::versions::{PkgType, Versions};
|
||||
use rayon::prelude::*;
|
||||
use sha2::Digest;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, BufReader, Read, Write};
|
||||
use std::io::{self, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
|
||||
static HOSTS: &[&str] = &[
|
||||
"aarch64-apple-darwin",
|
||||
@ -186,6 +183,7 @@ macro_rules! t {
|
||||
|
||||
struct Builder {
|
||||
versions: Versions,
|
||||
checksums: Checksums,
|
||||
shipped_files: HashSet<String>,
|
||||
|
||||
input: PathBuf,
|
||||
@ -240,6 +238,7 @@ fn main() {
|
||||
|
||||
Builder {
|
||||
versions: Versions::new(&channel, &input).unwrap(),
|
||||
checksums: t!(Checksums::new()),
|
||||
shipped_files: HashSet::new(),
|
||||
|
||||
input,
|
||||
@ -276,6 +275,8 @@ impl Builder {
|
||||
if let Some(path) = std::env::var_os("BUILD_MANIFEST_SHIPPED_FILES_PATH") {
|
||||
self.write_shipped_files(&Path::new(&path));
|
||||
}
|
||||
|
||||
t!(self.checksums.store_cache());
|
||||
}
|
||||
|
||||
/// If a tool does not pass its tests, don't ship it.
|
||||
@ -321,7 +322,7 @@ impl Builder {
|
||||
self.add_renames_to(&mut manifest);
|
||||
manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));
|
||||
|
||||
self.fill_missing_hashes(&mut manifest);
|
||||
self.checksums.fill_missing_checksums(&mut manifest);
|
||||
|
||||
manifest
|
||||
}
|
||||
@ -595,41 +596,6 @@ impl Builder {
|
||||
assert!(t!(child.wait()).success());
|
||||
}
|
||||
|
||||
fn fill_missing_hashes(&self, manifest: &mut Manifest) {
|
||||
// First collect all files that need hashes
|
||||
let mut need_hashes = HashSet::new();
|
||||
crate::manifest::visit_file_hashes(manifest, |file_hash| {
|
||||
if let FileHash::Missing(path) = file_hash {
|
||||
need_hashes.insert(path.clone());
|
||||
}
|
||||
});
|
||||
|
||||
let collected = Mutex::new(HashMap::new());
|
||||
let collection_start = Instant::now();
|
||||
println!(
|
||||
"collecting hashes for {} tarballs across {} threads",
|
||||
need_hashes.len(),
|
||||
rayon::current_num_threads().min(need_hashes.len()),
|
||||
);
|
||||
need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
|
||||
Ok(hash) => {
|
||||
collected.lock().unwrap().insert(path, hash);
|
||||
}
|
||||
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
|
||||
});
|
||||
let collected = collected.into_inner().unwrap();
|
||||
println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());
|
||||
|
||||
crate::manifest::visit_file_hashes(manifest, |file_hash| {
|
||||
if let FileHash::Missing(path) = file_hash {
|
||||
match collected.get(path) {
|
||||
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
|
||||
None => panic!("missing hash for file {}", path.display()),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) {
|
||||
self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
|
||||
self.write(&manifest.date, channel_name, "-date.txt");
|
||||
@ -660,10 +626,3 @@ impl Builder {
|
||||
t!(std::fs::write(path, content.as_bytes()));
|
||||
}
|
||||
}
|
||||
|
||||
fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
|
||||
let mut file = BufReader::new(File::open(path)?);
|
||||
let mut sha256 = sha2::Sha256::default();
|
||||
std::io::copy(&mut file, &mut sha256)?;
|
||||
Ok(hex::encode(sha256.finalize()))
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user