Auto merge of #123246 - Kobzol:tarball-reproducible, r=Mark-Simulacrum
Make source tarball generation more reproducible This PR performs several changes to source tarball generation (`x dist rustc-src`) in order to make it more reproducible (in light of the recent "xz backdoor"...). I want to follow up on it with making a separate CI workflow for generating the tarball. After this PR, running this locally produces identical checksums: ```bash $ ./x dist rustc-src $ sha256sum build/dist/rustc-1.79.0-src.tar.gz $ ./x dist rustc-src $ sha256sum build/dist/rustc-1.79.0-src.tar.gz ``` r? `@Mark-Simulacrum`
This commit is contained in:
commit
a8cfc83801
@ -995,9 +995,9 @@ fn run(self, builder: &Builder<'_>) -> GeneratedTarball {
|
||||
if builder.rust_info().is_managed_git_subrepository()
|
||||
|| builder.rust_info().is_from_tarball()
|
||||
{
|
||||
if builder.rust_info().is_managed_git_subrepository() {
|
||||
// Ensure we have the submodules checked out.
|
||||
builder.update_submodule(Path::new("src/tools/cargo"));
|
||||
// Ensure we have all submodules from src and other directories checked out.
|
||||
for submodule in builder.get_all_submodules() {
|
||||
builder.update_submodule(Path::new(submodule));
|
||||
}
|
||||
|
||||
// Vendor all Cargo dependencies
|
||||
@ -1028,6 +1028,20 @@ fn run(self, builder: &Builder<'_>) -> GeneratedTarball {
|
||||
builder.create(&cargo_config_dir.join("config.toml"), &config);
|
||||
}
|
||||
|
||||
// Delete extraneous directories
|
||||
// FIXME: if we're managed by git, we should probably instead ask git if the given path
|
||||
// is managed by it?
|
||||
for entry in walkdir::WalkDir::new(tarball.image_dir())
|
||||
.follow_links(true)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
{
|
||||
if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__"))
|
||||
{
|
||||
t!(fs::remove_dir_all(entry.path()));
|
||||
}
|
||||
}
|
||||
|
||||
tarball.bare()
|
||||
}
|
||||
}
|
||||
|
@ -554,29 +554,7 @@ pub fn path(self, path: &str) -> Self {
|
||||
///
|
||||
/// [`path`]: ShouldRun::path
|
||||
pub fn paths(mut self, paths: &[&str]) -> Self {
|
||||
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
|
||||
|
||||
let init_submodules_paths = |src: &PathBuf| {
|
||||
let file = File::open(src.join(".gitmodules")).unwrap();
|
||||
|
||||
let mut submodules_paths = vec![];
|
||||
for line in BufReader::new(file).lines() {
|
||||
if let Ok(line) = line {
|
||||
let line = line.trim();
|
||||
|
||||
if line.starts_with("path") {
|
||||
let actual_path =
|
||||
line.split(' ').last().expect("Couldn't get value of path");
|
||||
submodules_paths.push(actual_path.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
submodules_paths
|
||||
};
|
||||
|
||||
let submodules_paths =
|
||||
SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src));
|
||||
let submodules_paths = self.builder.get_all_submodules();
|
||||
|
||||
self.paths.insert(PathSet::Set(
|
||||
paths
|
||||
@ -2151,6 +2129,37 @@ pub fn ensure<S: Step>(&'a self, step: S) -> S::Output {
|
||||
out
|
||||
}
|
||||
|
||||
/// Return paths of all submodules managed by git.
|
||||
/// If the current checkout is not managed by git, returns an empty slice.
|
||||
pub fn get_all_submodules(&self) -> &[String] {
|
||||
if !self.rust_info().is_managed_git_subrepository() {
|
||||
return &[];
|
||||
}
|
||||
|
||||
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
|
||||
|
||||
let init_submodules_paths = |src: &PathBuf| {
|
||||
let file = File::open(src.join(".gitmodules")).unwrap();
|
||||
|
||||
let mut submodules_paths = vec![];
|
||||
for line in BufReader::new(file).lines() {
|
||||
if let Ok(line) = line {
|
||||
let line = line.trim();
|
||||
|
||||
if line.starts_with("path") {
|
||||
let actual_path =
|
||||
line.split(' ').last().expect("Couldn't get value of path");
|
||||
submodules_paths.push(actual_path.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
submodules_paths
|
||||
};
|
||||
|
||||
&SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src))
|
||||
}
|
||||
|
||||
/// Ensure that a given step is built *only if it's supposed to be built by default*, returning
|
||||
/// its output. This will cache the step, so it's safe (and good!) to call this as often as
|
||||
/// needed to ensure that all dependencies are build.
|
||||
|
@ -2,7 +2,7 @@
|
||||
use std::fs::{read_link, symlink_metadata};
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::Path;
|
||||
use tar::{Builder, Header};
|
||||
use tar::{Builder, Header, HeaderMode};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::{
|
||||
@ -53,14 +53,19 @@ pub fn run(self) -> Result<()> {
|
||||
// Sort files by their suffix, to group files with the same name from
|
||||
// different locations (likely identical) and files with the same
|
||||
// extension (likely containing similar data).
|
||||
let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
|
||||
// Sorting of file and directory paths also helps with the reproducibility
|
||||
// of the resulting archive.
|
||||
let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
|
||||
.context("failed to collect file paths")?;
|
||||
dirs.sort();
|
||||
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));
|
||||
|
||||
// Write the tar into both encoded files. We write all directories
|
||||
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
|
||||
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
|
||||
let mut builder = Builder::new(buf);
|
||||
// Make uid, gid and mtime deterministic to improve reproducibility
|
||||
builder.mode(HeaderMode::Deterministic);
|
||||
|
||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
|
||||
pool.install(move || {
|
||||
@ -91,7 +96,8 @@ pub fn run(self) -> Result<()> {
|
||||
fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
|
||||
let stat = symlink_metadata(src)?;
|
||||
let mut header = Header::new_gnu();
|
||||
header.set_metadata(&stat);
|
||||
header.set_metadata_in_mode(&stat, HeaderMode::Deterministic);
|
||||
|
||||
if stat.file_type().is_symlink() {
|
||||
let link = read_link(src)?;
|
||||
builder.append_link(&mut header, path, &link)?;
|
||||
|
Loading…
Reference in New Issue
Block a user