add unstable support for outputting file checksums for use in cargo

This commit is contained in:
Jacob Kiesel 2024-06-22 01:27:59 -06:00
parent bfe5e8cef6
commit bb5a8276be
16 changed files with 321 additions and 28 deletions

View File

@ -199,6 +199,12 @@ dependencies = [
"object 0.36.4", "object 0.36.4",
] ]
[[package]]
name = "arrayref"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545"
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.7.6" version = "0.7.6"
@ -262,6 +268,19 @@ version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "blake3"
version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d08263faac5cde2a4d52b513dadb80846023aade56fcd8fc99ba73ba8050e92"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq",
]
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
version = "0.10.4" version = "0.10.4"
@ -719,6 +738,12 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "constant_time_eq"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
[[package]] [[package]]
name = "core-foundation-sys" name = "core-foundation-sys"
version = "0.8.7" version = "0.8.7"
@ -4374,6 +4399,7 @@ dependencies = [
name = "rustc_span" name = "rustc_span"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"blake3",
"derive-where", "derive-where",
"indexmap", "indexmap",
"itoa", "itoa",

View File

@ -630,6 +630,7 @@ fn alloc_new_file_metadata<'ll>(
rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5, rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1, rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256, rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
rustc_span::SourceFileHashAlgorithm::Blake3 => llvm::ChecksumKind::None,
}; };
let hash_value = hex_encode(source_file.src_hash.hash_bytes()); let hash_value = hex_encode(source_file.src_hash.hash_bytes());

View File

@ -389,12 +389,13 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
let file_loader = config.file_loader.unwrap_or_else(|| Box::new(RealFileLoader)); let file_loader = config.file_loader.unwrap_or_else(|| Box::new(RealFileLoader));
let path_mapping = config.opts.file_path_mapping(); let path_mapping = config.opts.file_path_mapping();
let hash_kind = config.opts.unstable_opts.src_hash_algorithm(&target); let hash_kind = config.opts.unstable_opts.src_hash_algorithm(&target);
let checksum_hash_kind = config.opts.unstable_opts.checksum_hash_algorithm();
util::run_in_thread_pool_with_globals( util::run_in_thread_pool_with_globals(
&early_dcx, &early_dcx,
config.opts.edition, config.opts.edition,
config.opts.unstable_opts.threads, config.opts.unstable_opts.threads,
SourceMapInputs { file_loader, path_mapping, hash_kind }, SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind },
|current_gcx| { |current_gcx| {
// The previous `early_dcx` can't be reused here because it doesn't // The previous `early_dcx` can't be reused here because it doesn't
// impl `Send`. Creating a new one is fine. // impl `Send`. Creating a new one is fine.

View File

@ -1,6 +1,7 @@
// tidy-alphabetical-start // tidy-alphabetical-start
#![feature(decl_macro)] #![feature(decl_macro)]
#![feature(file_buffered)] #![feature(file_buffered)]
#![feature(iter_intersperse)]
#![feature(let_chains)] #![feature(let_chains)]
#![feature(try_blocks)] #![feature(try_blocks)]
#![warn(unreachable_pub)] #![warn(unreachable_pub)]

View File

@ -32,8 +32,8 @@
use rustc_session::output::{collect_crate_types, filename_for_input, find_crate_name}; use rustc_session::output::{collect_crate_types, filename_for_input, find_crate_name};
use rustc_session::search_paths::PathKind; use rustc_session::search_paths::PathKind;
use rustc_session::{Limit, Session}; use rustc_session::{Limit, Session};
use rustc_span::FileName;
use rustc_span::symbol::{Symbol, sym}; use rustc_span::symbol::{Symbol, sym};
use rustc_span::{FileName, SourceFileHash, SourceFileHashAlgorithm};
use rustc_target::spec::PanicStrategy; use rustc_target::spec::PanicStrategy;
use rustc_trait_selection::traits; use rustc_trait_selection::traits;
use tracing::{info, instrument}; use tracing::{info, instrument};
@ -417,15 +417,23 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
let result: io::Result<()> = try { let result: io::Result<()> = try {
// Build a list of files used to compile the output and // Build a list of files used to compile the output and
// write Makefile-compatible dependency rules // write Makefile-compatible dependency rules
let mut files: Vec<String> = sess let mut files: Vec<(String, u64, Option<SourceFileHash>)> = sess
.source_map() .source_map()
.files() .files()
.iter() .iter()
.filter(|fmap| fmap.is_real_file()) .filter(|fmap| fmap.is_real_file())
.filter(|fmap| !fmap.is_imported()) .filter(|fmap| !fmap.is_imported())
.map(|fmap| escape_dep_filename(&fmap.name.prefer_local().to_string())) .map(|fmap| {
(
escape_dep_filename(&fmap.name.prefer_local().to_string()),
fmap.source_len.0 as u64,
fmap.checksum_hash,
)
})
.collect(); .collect();
let checksum_hash_algo = sess.opts.unstable_opts.checksum_hash_algorithm;
// Account for explicitly marked-to-track files // Account for explicitly marked-to-track files
// (e.g. accessed in proc macros). // (e.g. accessed in proc macros).
let file_depinfo = sess.psess.file_depinfo.borrow(); let file_depinfo = sess.psess.file_depinfo.borrow();
@ -437,22 +445,58 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
// The entries will be used to declare dependencies between files in a // The entries will be used to declare dependencies between files in a
// Makefile-like output, so the iteration order does not matter. // Makefile-like output, so the iteration order does not matter.
fn hash_iter_files<P: AsRef<Path>>(
it: impl Iterator<Item = P>,
checksum_hash_algo: Option<SourceFileHashAlgorithm>,
) -> impl Iterator<Item = (P, u64, Option<SourceFileHash>)> {
it.map(move |path| {
match checksum_hash_algo.and_then(|algo| {
fs::File::open(path.as_ref())
.and_then(|mut file| {
SourceFileHash::new(algo, &mut file).map(|h| (file, h))
})
.and_then(|(file, h)| file.metadata().map(|m| (m.len(), h)))
.map_err(|e| {
tracing::error!(
"failed to compute checksum, omitting it from dep-info {} {e}",
path.as_ref().display()
)
})
.ok()
}) {
Some((file_len, checksum)) => (path, file_len, Some(checksum)),
None => (path, 0, None),
}
})
}
#[allow(rustc::potential_query_instability)] #[allow(rustc::potential_query_instability)]
let extra_tracked_files = let extra_tracked_files = hash_iter_files(
file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str()))); file_depinfo.iter().map(|path_sym| normalize_path(PathBuf::from(path_sym.as_str()))),
checksum_hash_algo,
);
files.extend(extra_tracked_files); files.extend(extra_tracked_files);
// We also need to track used PGO profile files // We also need to track used PGO profile files
if let Some(ref profile_instr) = sess.opts.cg.profile_use { if let Some(ref profile_instr) = sess.opts.cg.profile_use {
files.push(normalize_path(profile_instr.as_path().to_path_buf())); files.extend(hash_iter_files(
iter::once(normalize_path(profile_instr.as_path().to_path_buf())),
checksum_hash_algo,
));
} }
if let Some(ref profile_sample) = sess.opts.unstable_opts.profile_sample_use { if let Some(ref profile_sample) = sess.opts.unstable_opts.profile_sample_use {
files.push(normalize_path(profile_sample.as_path().to_path_buf())); files.extend(hash_iter_files(
iter::once(normalize_path(profile_sample.as_path().to_path_buf())),
checksum_hash_algo,
));
} }
// Debugger visualizer files // Debugger visualizer files
for debugger_visualizer in tcx.debugger_visualizers(LOCAL_CRATE) { for debugger_visualizer in tcx.debugger_visualizers(LOCAL_CRATE) {
files.push(normalize_path(debugger_visualizer.path.clone().unwrap())); files.extend(hash_iter_files(
iter::once(normalize_path(debugger_visualizer.path.clone().unwrap())),
checksum_hash_algo,
));
} }
if sess.binary_dep_depinfo() { if sess.binary_dep_depinfo() {
@ -460,33 +504,54 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
if backend.contains('.') { if backend.contains('.') {
// If the backend name contain a `.`, it is the path to an external dynamic // If the backend name contain a `.`, it is the path to an external dynamic
// library. If not, it is not a path. // library. If not, it is not a path.
files.push(backend.to_string()); files.extend(hash_iter_files(
iter::once(backend.to_string()),
checksum_hash_algo,
));
} }
} }
for &cnum in tcx.crates(()) { for &cnum in tcx.crates(()) {
let source = tcx.used_crate_source(cnum); let source = tcx.used_crate_source(cnum);
if let Some((path, _)) = &source.dylib { if let Some((path, _)) = &source.dylib {
files.push(escape_dep_filename(&path.display().to_string())); files.extend(hash_iter_files(
iter::once(escape_dep_filename(&path.display().to_string())),
checksum_hash_algo,
));
} }
if let Some((path, _)) = &source.rlib { if let Some((path, _)) = &source.rlib {
files.push(escape_dep_filename(&path.display().to_string())); files.extend(hash_iter_files(
iter::once(escape_dep_filename(&path.display().to_string())),
checksum_hash_algo,
));
} }
if let Some((path, _)) = &source.rmeta { if let Some((path, _)) = &source.rmeta {
files.push(escape_dep_filename(&path.display().to_string())); files.extend(hash_iter_files(
iter::once(escape_dep_filename(&path.display().to_string())),
checksum_hash_algo,
));
} }
} }
} }
let write_deps_to_file = |file: &mut dyn Write| -> io::Result<()> { let write_deps_to_file = |file: &mut dyn Write| -> io::Result<()> {
for path in out_filenames { for path in out_filenames {
writeln!(file, "{}: {}\n", path.display(), files.join(" "))?; writeln!(
file,
"{}: {}\n",
path.display(),
files
.iter()
.map(|(path, _file_len, _checksum_hash_algo)| path.as_str())
.intersperse(" ")
.collect::<String>()
)?;
} }
// Emit a fake target for each input file to the compilation. This // Emit a fake target for each input file to the compilation. This
// prevents `make` from spitting out an error if a file is later // prevents `make` from spitting out an error if a file is later
// deleted. For more info see #28735 // deleted. For more info see #28735
for path in files { for (path, _file_len, _checksum_hash_algo) in &files {
writeln!(file, "{path}:")?; writeln!(file, "{path}:")?;
} }
@ -510,6 +575,18 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
} }
} }
// If caller requested this information, add special comments about source file checksums.
// These are not necessarily the same checksums as was used in the debug files.
if sess.opts.unstable_opts.checksum_hash_algorithm().is_some() {
for (path, file_len, checksum_hash) in
files.iter().filter_map(|(path, file_len, hash_algo)| {
hash_algo.map(|hash_algo| (path, file_len, hash_algo))
})
{
writeln!(file, "# checksum:{checksum_hash} file_len:{file_len} {path}")?;
}
}
Ok(()) Ok(())
}; };

View File

@ -44,10 +44,12 @@ fn sess_and_cfg<F>(args: &[&'static str], f: F)
let sysroot = filesearch::materialize_sysroot(sessopts.maybe_sysroot.clone()); let sysroot = filesearch::materialize_sysroot(sessopts.maybe_sysroot.clone());
let target = rustc_session::config::build_target_config(&early_dcx, &sessopts, &sysroot); let target = rustc_session::config::build_target_config(&early_dcx, &sessopts, &sysroot);
let hash_kind = sessopts.unstable_opts.src_hash_algorithm(&target); let hash_kind = sessopts.unstable_opts.src_hash_algorithm(&target);
let checksum_hash_kind = sessopts.unstable_opts.checksum_hash_algorithm();
let sm_inputs = Some(SourceMapInputs { let sm_inputs = Some(SourceMapInputs {
file_loader: Box::new(RealFileLoader) as _, file_loader: Box::new(RealFileLoader) as _,
path_mapping: sessopts.file_path_mapping(), path_mapping: sessopts.file_path_mapping(),
hash_kind, hash_kind,
checksum_hash_kind,
}); });
rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || { rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || {

View File

@ -1702,6 +1702,7 @@ fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> {
let rustc_span::SourceFile { let rustc_span::SourceFile {
mut name, mut name,
src_hash, src_hash,
checksum_hash,
start_pos: original_start_pos, start_pos: original_start_pos,
source_len, source_len,
lines, lines,
@ -1752,6 +1753,7 @@ fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> {
let local_version = sess.source_map().new_imported_source_file( let local_version = sess.source_map().new_imported_source_file(
name, name,
src_hash, src_hash,
checksum_hash,
stable_id, stable_id,
source_len.to_u32(), source_len.to_u32(),
self.cnum, self.cnum,

View File

@ -68,6 +68,8 @@ fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHas
// Do not hash the source as it is not encoded // Do not hash the source as it is not encoded
src: _, src: _,
ref src_hash, ref src_hash,
// Already includes src_hash, this is redundant
checksum_hash: _,
external_src: _, external_src: _,
start_pos: _, start_pos: _,
source_len: _, source_len: _,

View File

@ -1242,6 +1242,10 @@ pub fn src_hash_algorithm(&self, target: &Target) -> SourceFileHashAlgorithm {
} }
}) })
} }
pub fn checksum_hash_algorithm(&self) -> Option<SourceFileHashAlgorithm> {
self.checksum_hash_algorithm
}
} }
// The type of entry function, so users can have their own entry functions // The type of entry function, so users can have their own entry functions

View File

@ -418,7 +418,8 @@ mod desc {
"one of: `legacy`, `v0` (RFC 2603), or `hashed`"; "one of: `legacy`, `v0` (RFC 2603), or `hashed`";
pub(crate) const parse_opt_symbol_visibility: &str = pub(crate) const parse_opt_symbol_visibility: &str =
"one of: `hidden`, `protected`, or `interposable`"; "one of: `hidden`, `protected`, or `interposable`";
pub(crate) const parse_src_file_hash: &str = "either `md5` or `sha1`"; pub(crate) const parse_cargo_src_file_hash: &str = "one of `md5`, `sha1`, or `sha256`";
pub(crate) const parse_src_file_hash: &str = "one of `md5`, `sha1`, or `sha256`";
pub(crate) const parse_relocation_model: &str = pub(crate) const parse_relocation_model: &str =
"one of supported relocation models (`rustc --print relocation-models`)"; "one of supported relocation models (`rustc --print relocation-models`)";
pub(crate) const parse_code_model: &str = pub(crate) const parse_code_model: &str =
@ -1288,6 +1289,23 @@ pub(crate) fn parse_src_file_hash(
true true
} }
pub(crate) fn parse_cargo_src_file_hash(
slot: &mut Option<SourceFileHashAlgorithm>,
v: Option<&str>,
) -> bool {
match v.and_then(|s| SourceFileHashAlgorithm::from_str(s).ok()) {
Some(hash_kind) => {
if hash_kind.supported_in_cargo() {
*slot = Some(hash_kind);
} else {
return false;
}
}
_ => return false,
}
true
}
pub(crate) fn parse_target_feature(slot: &mut String, v: Option<&str>) -> bool { pub(crate) fn parse_target_feature(slot: &mut String, v: Option<&str>) -> bool {
match v { match v {
Some(s) => { Some(s) => {
@ -1688,6 +1706,8 @@ pub(crate) fn parse_mir_include_spans(slot: &mut MirIncludeSpans, v: Option<&str
"instrument control-flow architecture protection"), "instrument control-flow architecture protection"),
check_cfg_all_expected: bool = (false, parse_bool, [UNTRACKED], check_cfg_all_expected: bool = (false, parse_bool, [UNTRACKED],
"show all expected values in check-cfg diagnostics (default: no)"), "show all expected values in check-cfg diagnostics (default: no)"),
checksum_hash_algorithm: Option<SourceFileHashAlgorithm> = (None, parse_cargo_src_file_hash, [TRACKED],
"hash algorithm of source files used to check freshness in cargo (`sha256`)"),
codegen_backend: Option<String> = (None, parse_opt_string, [TRACKED], codegen_backend: Option<String> = (None, parse_opt_string, [TRACKED],
"the backend to use"), "the backend to use"),
combine_cgu: bool = (false, parse_bool, [TRACKED], combine_cgu: bool = (false, parse_bool, [TRACKED],

View File

@ -5,6 +5,7 @@ edition = "2021"
[dependencies] [dependencies]
# tidy-alphabetical-start # tidy-alphabetical-start
blake3 = "1.5.2"
derive-where = "1.2.7" derive-where = "1.2.7"
indexmap = { version = "2.0.0" } indexmap = { version = "2.0.0" }
itoa = "1.0" itoa = "1.0"

View File

@ -75,7 +75,9 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::{self, Ordering}; use std::cmp::{self, Ordering};
use std::fmt::Display;
use std::hash::Hash; use std::hash::Hash;
use std::io::{self, Read};
use std::ops::{Add, Range, Sub}; use std::ops::{Add, Range, Sub};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
@ -1395,6 +1397,27 @@ pub enum SourceFileHashAlgorithm {
Md5, Md5,
Sha1, Sha1,
Sha256, Sha256,
Blake3,
}
impl SourceFileHashAlgorithm {
pub fn supported_in_cargo(&self) -> bool {
match self {
Self::Md5 | Self::Sha1 => false,
Self::Sha256 | Self::Blake3 => true,
}
}
}
impl Display for SourceFileHashAlgorithm {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
Self::Md5 => "md5",
Self::Sha1 => "sha1",
Self::Sha256 => "sha256",
Self::Blake3 => "blake3",
})
}
} }
impl FromStr for SourceFileHashAlgorithm { impl FromStr for SourceFileHashAlgorithm {
@ -1405,12 +1428,13 @@ fn from_str(s: &str) -> Result<SourceFileHashAlgorithm, ()> {
"md5" => Ok(SourceFileHashAlgorithm::Md5), "md5" => Ok(SourceFileHashAlgorithm::Md5),
"sha1" => Ok(SourceFileHashAlgorithm::Sha1), "sha1" => Ok(SourceFileHashAlgorithm::Sha1),
"sha256" => Ok(SourceFileHashAlgorithm::Sha256), "sha256" => Ok(SourceFileHashAlgorithm::Sha256),
"blake3" => Ok(SourceFileHashAlgorithm::Blake3),
_ => Err(()), _ => Err(()),
} }
} }
} }
/// The hash of the on-disk source file used for debug info. /// The hash of the on-disk source file used for debug info and cargo freshness checks.
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
#[derive(HashStable_Generic, Encodable, Decodable)] #[derive(HashStable_Generic, Encodable, Decodable)]
pub struct SourceFileHash { pub struct SourceFileHash {
@ -1418,12 +1442,22 @@ pub struct SourceFileHash {
value: [u8; 32], value: [u8; 32],
} }
impl Display for SourceFileHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}=", self.kind)?;
for byte in self.value[0..self.hash_len()].into_iter() {
write!(f, "{byte:02x}")?;
}
Ok(())
}
}
impl SourceFileHash { impl SourceFileHash {
pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash { pub fn new_in_memory(kind: SourceFileHashAlgorithm, src: impl AsRef<[u8]>) -> SourceFileHash {
let mut hash = SourceFileHash { kind, value: Default::default() }; let mut hash = SourceFileHash { kind, value: Default::default() };
let len = hash.hash_len(); let len = hash.hash_len();
let value = &mut hash.value[..len]; let value = &mut hash.value[..len];
let data = src.as_bytes(); let data = src.as_ref();
match kind { match kind {
SourceFileHashAlgorithm::Md5 => { SourceFileHashAlgorithm::Md5 => {
value.copy_from_slice(&Md5::digest(data)); value.copy_from_slice(&Md5::digest(data));
@ -1434,13 +1468,94 @@ pub fn new(kind: SourceFileHashAlgorithm, src: &str) -> SourceFileHash {
SourceFileHashAlgorithm::Sha256 => { SourceFileHashAlgorithm::Sha256 => {
value.copy_from_slice(&Sha256::digest(data)); value.copy_from_slice(&Sha256::digest(data));
} }
} SourceFileHashAlgorithm::Blake3 => value.copy_from_slice(blake3::hash(data).as_bytes()),
};
hash hash
} }
pub fn new(kind: SourceFileHashAlgorithm, src: impl Read) -> Result<SourceFileHash, io::Error> {
let mut hash = SourceFileHash { kind, value: Default::default() };
let len = hash.hash_len();
let value = &mut hash.value[..len];
// Buffer size is the recommended amount to fully leverage SIMD instructions on AVX-512 as per
// blake3 documentation.
let mut buf = vec![0; 16 * 1024];
fn digest<T>(
mut hasher: T,
mut update: impl FnMut(&mut T, &[u8]),
finish: impl FnOnce(T, &mut [u8]),
mut src: impl Read,
buf: &mut [u8],
value: &mut [u8],
) -> Result<(), io::Error> {
loop {
let bytes_read = src.read(buf)?;
if bytes_read == 0 {
break;
}
update(&mut hasher, &buf[0..bytes_read]);
}
finish(hasher, value);
Ok(())
}
match kind {
SourceFileHashAlgorithm::Sha256 => {
digest(
Sha256::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Sha1 => {
digest(
Sha1::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Md5 => {
digest(
Md5::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(&h.finalize()),
src,
&mut buf,
value,
)?;
}
SourceFileHashAlgorithm::Blake3 => {
digest(
blake3::Hasher::new(),
|h, b| {
h.update(b);
},
|h, out| out.copy_from_slice(h.finalize().as_bytes()),
src,
&mut buf,
value,
)?;
}
}
Ok(hash)
}
/// Check if the stored hash matches the hash of the string. /// Check if the stored hash matches the hash of the string.
pub fn matches(&self, src: &str) -> bool { pub fn matches(&self, src: &str) -> bool {
Self::new(self.kind, src) == *self Self::new_in_memory(self.kind, src.as_bytes()) == *self
} }
/// The bytes of the hash. /// The bytes of the hash.
@ -1453,7 +1568,7 @@ fn hash_len(&self) -> usize {
match self.kind { match self.kind {
SourceFileHashAlgorithm::Md5 => 16, SourceFileHashAlgorithm::Md5 => 16,
SourceFileHashAlgorithm::Sha1 => 20, SourceFileHashAlgorithm::Sha1 => 20,
SourceFileHashAlgorithm::Sha256 => 32, SourceFileHashAlgorithm::Sha256 | SourceFileHashAlgorithm::Blake3 => 32,
} }
} }
} }
@ -1509,6 +1624,10 @@ pub struct SourceFile {
pub src: Option<Lrc<String>>, pub src: Option<Lrc<String>>,
/// The source code's hash. /// The source code's hash.
pub src_hash: SourceFileHash, pub src_hash: SourceFileHash,
/// Used to enable cargo to use checksums to check if a crate is fresh rather
/// than mtimes. This might be the same as `src_hash`, and if the requested algorithm
/// is identical we won't compute it twice.
pub checksum_hash: Option<SourceFileHash>,
/// The external source code (used for external crates, which will have a `None` /// The external source code (used for external crates, which will have a `None`
/// value as `self.src`. /// value as `self.src`.
pub external_src: FreezeLock<ExternalSource>, pub external_src: FreezeLock<ExternalSource>,
@ -1536,6 +1655,7 @@ fn clone(&self) -> Self {
name: self.name.clone(), name: self.name.clone(),
src: self.src.clone(), src: self.src.clone(),
src_hash: self.src_hash, src_hash: self.src_hash,
checksum_hash: self.checksum_hash,
external_src: self.external_src.clone(), external_src: self.external_src.clone(),
start_pos: self.start_pos, start_pos: self.start_pos,
source_len: self.source_len, source_len: self.source_len,
@ -1552,6 +1672,7 @@ impl<S: SpanEncoder> Encodable<S> for SourceFile {
fn encode(&self, s: &mut S) { fn encode(&self, s: &mut S) {
self.name.encode(s); self.name.encode(s);
self.src_hash.encode(s); self.src_hash.encode(s);
self.checksum_hash.encode(s);
// Do not encode `start_pos` as it's global state for this session. // Do not encode `start_pos` as it's global state for this session.
self.source_len.encode(s); self.source_len.encode(s);
@ -1625,6 +1746,7 @@ impl<D: SpanDecoder> Decodable<D> for SourceFile {
fn decode(d: &mut D) -> SourceFile { fn decode(d: &mut D) -> SourceFile {
let name: FileName = Decodable::decode(d); let name: FileName = Decodable::decode(d);
let src_hash: SourceFileHash = Decodable::decode(d); let src_hash: SourceFileHash = Decodable::decode(d);
let checksum_hash: Option<SourceFileHash> = Decodable::decode(d);
let source_len: RelativeBytePos = Decodable::decode(d); let source_len: RelativeBytePos = Decodable::decode(d);
let lines = { let lines = {
let num_lines: u32 = Decodable::decode(d); let num_lines: u32 = Decodable::decode(d);
@ -1650,6 +1772,7 @@ fn decode(d: &mut D) -> SourceFile {
source_len, source_len,
src: None, src: None,
src_hash, src_hash,
checksum_hash,
// Unused - the metadata decoder will construct // Unused - the metadata decoder will construct
// a new SourceFile, filling in `external_src` properly // a new SourceFile, filling in `external_src` properly
external_src: FreezeLock::frozen(ExternalSource::Unneeded), external_src: FreezeLock::frozen(ExternalSource::Unneeded),
@ -1733,9 +1856,17 @@ pub fn new(
name: FileName, name: FileName,
mut src: String, mut src: String,
hash_kind: SourceFileHashAlgorithm, hash_kind: SourceFileHashAlgorithm,
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
) -> Result<Self, OffsetOverflowError> { ) -> Result<Self, OffsetOverflowError> {
// Compute the file hash before any normalization. // Compute the file hash before any normalization.
let src_hash = SourceFileHash::new(hash_kind, &src); let src_hash = SourceFileHash::new_in_memory(hash_kind, src.as_bytes());
let checksum_hash = checksum_hash_kind.map(|checksum_hash_kind| {
if checksum_hash_kind == hash_kind {
src_hash
} else {
SourceFileHash::new_in_memory(checksum_hash_kind, src.as_bytes())
}
});
let normalized_pos = normalize_src(&mut src); let normalized_pos = normalize_src(&mut src);
let stable_id = StableSourceFileId::from_filename_in_current_crate(&name); let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
@ -1748,6 +1879,7 @@ pub fn new(
name, name,
src: Some(Lrc::new(src)), src: Some(Lrc::new(src)),
src_hash, src_hash,
checksum_hash,
external_src: FreezeLock::frozen(ExternalSource::Unneeded), external_src: FreezeLock::frozen(ExternalSource::Unneeded),
start_pos: BytePos::from_u32(0), start_pos: BytePos::from_u32(0),
source_len: RelativeBytePos::from_u32(source_len), source_len: RelativeBytePos::from_u32(source_len),

View File

@ -175,6 +175,7 @@ pub struct SourceMapInputs {
pub file_loader: Box<dyn FileLoader + Send + Sync>, pub file_loader: Box<dyn FileLoader + Send + Sync>,
pub path_mapping: FilePathMapping, pub path_mapping: FilePathMapping,
pub hash_kind: SourceFileHashAlgorithm, pub hash_kind: SourceFileHashAlgorithm,
pub checksum_hash_kind: Option<SourceFileHashAlgorithm>,
} }
pub struct SourceMap { pub struct SourceMap {
@ -187,6 +188,12 @@ pub struct SourceMap {
/// The algorithm used for hashing the contents of each source file. /// The algorithm used for hashing the contents of each source file.
hash_kind: SourceFileHashAlgorithm, hash_kind: SourceFileHashAlgorithm,
/// Similar to `hash_kind`, however this algorithm is used for checksums to determine if a crate is fresh.
/// `cargo` is the primary user of these.
///
/// If this is equal to `hash_kind` then the checksum won't be computed twice.
checksum_hash_kind: Option<SourceFileHashAlgorithm>,
} }
impl SourceMap { impl SourceMap {
@ -195,17 +202,19 @@ pub fn new(path_mapping: FilePathMapping) -> SourceMap {
file_loader: Box::new(RealFileLoader), file_loader: Box::new(RealFileLoader),
path_mapping, path_mapping,
hash_kind: SourceFileHashAlgorithm::Md5, hash_kind: SourceFileHashAlgorithm::Md5,
checksum_hash_kind: None,
}) })
} }
pub fn with_inputs( pub fn with_inputs(
SourceMapInputs { file_loader, path_mapping, hash_kind }: SourceMapInputs, SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind }: SourceMapInputs,
) -> SourceMap { ) -> SourceMap {
SourceMap { SourceMap {
files: Default::default(), files: Default::default(),
file_loader: IntoDynSyncSend(file_loader), file_loader: IntoDynSyncSend(file_loader),
path_mapping, path_mapping,
hash_kind, hash_kind,
checksum_hash_kind,
} }
} }
@ -307,7 +316,8 @@ fn try_new_source_file(
match self.source_file_by_stable_id(stable_id) { match self.source_file_by_stable_id(stable_id) {
Some(lrc_sf) => Ok(lrc_sf), Some(lrc_sf) => Ok(lrc_sf),
None => { None => {
let source_file = SourceFile::new(filename, src, self.hash_kind)?; let source_file =
SourceFile::new(filename, src, self.hash_kind, self.checksum_hash_kind)?;
// Let's make sure the file_id we generated above actually matches // Let's make sure the file_id we generated above actually matches
// the ID we generate for the SourceFile we just created. // the ID we generate for the SourceFile we just created.
@ -326,6 +336,7 @@ pub fn new_imported_source_file(
&self, &self,
filename: FileName, filename: FileName,
src_hash: SourceFileHash, src_hash: SourceFileHash,
checksum_hash: Option<SourceFileHash>,
stable_id: StableSourceFileId, stable_id: StableSourceFileId,
source_len: u32, source_len: u32,
cnum: CrateNum, cnum: CrateNum,
@ -340,6 +351,7 @@ pub fn new_imported_source_file(
name: filename, name: filename,
src: None, src: None,
src_hash, src_hash,
checksum_hash,
external_src: FreezeLock::new(ExternalSource::Foreign { external_src: FreezeLock::new(ExternalSource::Foreign {
kind: ExternalSourceKind::AbsentOk, kind: ExternalSourceKind::AbsentOk,
metadata_index, metadata_index,

View File

@ -229,6 +229,7 @@ fn t10() {
let SourceFile { let SourceFile {
name, name,
src_hash, src_hash,
checksum_hash,
source_len, source_len,
lines, lines,
multibyte_chars, multibyte_chars,
@ -240,6 +241,7 @@ fn t10() {
let imported_src_file = sm.new_imported_source_file( let imported_src_file = sm.new_imported_source_file(
name, name,
src_hash, src_hash,
checksum_hash,
stable_id, stable_id,
source_len.to_u32(), source_len.to_u32(),
CrateNum::ZERO, CrateNum::ZERO,

View File

@ -3,8 +3,12 @@
#[test] #[test]
fn test_lookup_line() { fn test_lookup_line() {
let source = "abcdefghijklm\nabcdefghij\n...".to_owned(); let source = "abcdefghijklm\nabcdefghij\n...".to_owned();
let mut sf = let mut sf = SourceFile::new(
SourceFile::new(FileName::Anon(Hash64::ZERO), source, SourceFileHashAlgorithm::Sha256) FileName::Anon(Hash64::ZERO),
source,
SourceFileHashAlgorithm::Sha256,
Some(SourceFileHashAlgorithm::Sha256),
)
.unwrap(); .unwrap();
sf.start_pos = BytePos(3); sf.start_pos = BytePos(3);
assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]); assert_eq!(sf.lines(), &[RelativeBytePos(0), RelativeBytePos(14), RelativeBytePos(25)]);

View File

@ -88,7 +88,10 @@
const EXCEPTIONS: ExceptionList = &[ const EXCEPTIONS: ExceptionList = &[
// tidy-alphabetical-start // tidy-alphabetical-start
("ar_archive_writer", "Apache-2.0 WITH LLVM-exception"), // rustc ("ar_archive_writer", "Apache-2.0 WITH LLVM-exception"), // rustc
("arrayref", "BSD-2-Clause"), // rustc
("blake3", "CC0-1.0 OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception"), // rustc
("colored", "MPL-2.0"), // rustfmt ("colored", "MPL-2.0"), // rustfmt
("constant_time_eq", "CC0-1.0 OR MIT-0 OR Apache-2.0"), // rustc
("dissimilar", "Apache-2.0"), // rustdoc, rustc_lexer (few tests) via expect-test, (dev deps) ("dissimilar", "Apache-2.0"), // rustdoc, rustc_lexer (few tests) via expect-test, (dev deps)
("fluent-langneg", "Apache-2.0"), // rustc (fluent translations) ("fluent-langneg", "Apache-2.0"), // rustc (fluent translations)
("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot ("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot
@ -249,14 +252,17 @@
"annotate-snippets", "annotate-snippets",
"anstyle", "anstyle",
"ar_archive_writer", "ar_archive_writer",
"arrayref",
"arrayvec", "arrayvec",
"autocfg", "autocfg",
"bitflags", "bitflags",
"blake3",
"block-buffer", "block-buffer",
"byteorder", // via ruzstd in object in thorin-dwp "byteorder", // via ruzstd in object in thorin-dwp
"cc", "cc",
"cfg-if", "cfg-if",
"cfg_aliases", "cfg_aliases",
"constant_time_eq",
"cpufeatures", "cpufeatures",
"crc32fast", "crc32fast",
"crossbeam-channel", "crossbeam-channel",