Fix download hash check on big-endian systems

Ensure the hash_file and hash_dir routines give identical results
on big- and little-endian systems.  The default hash routines for
integer types are endian-dependent, so all such hash inputs need
to be byte-swapped.

This applies in particular to the file hashes used as input when
computing directory hashes.

In addition, the default hash routines for composite types use
a length prefix, which it itself an integer type (usize).  In
order to be able to byte-swap that prefix, we have to re-implement
those bits of the standard library ourselves.
This commit is contained in:
Ulrich Weigand 2024-02-19 13:58:04 +01:00
parent e4584e84d9
commit bc1bca798f

View File

@ -1,5 +1,6 @@
use std::ffi::OsStr;
use std::fs;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use std::process::Command;
@ -71,7 +72,11 @@ fn hash_file(file: &std::path::Path) -> u64 {
let contents = std::fs::read(file).unwrap();
#[allow(deprecated)]
let mut hasher = std::hash::SipHasher::new();
std::hash::Hash::hash(&contents, &mut hasher);
// The following is equivalent to
// std::hash::Hash::hash(&contents, &mut hasher);
// but gives the same result independent of host byte order.
hasher.write_usize(contents.len().to_le());
Hash::hash_slice(&contents, &mut hasher);
std::hash::Hasher::finish(&hasher)
}
@ -80,16 +85,26 @@ fn hash_dir(dir: &std::path::Path) -> u64 {
for entry in std::fs::read_dir(dir).unwrap() {
let entry = entry.unwrap();
if entry.file_type().unwrap().is_dir() {
sub_hashes
.insert(entry.file_name().to_str().unwrap().to_owned(), hash_dir(&entry.path()));
sub_hashes.insert(
entry.file_name().to_str().unwrap().to_owned(),
hash_dir(&entry.path()).to_le(),
);
} else {
sub_hashes
.insert(entry.file_name().to_str().unwrap().to_owned(), hash_file(&entry.path()));
sub_hashes.insert(
entry.file_name().to_str().unwrap().to_owned(),
hash_file(&entry.path()).to_le(),
);
}
}
#[allow(deprecated)]
let mut hasher = std::hash::SipHasher::new();
std::hash::Hash::hash(&sub_hashes, &mut hasher);
// The following is equivalent to
// std::hash::Hash::hash(&sub_hashes, &mut hasher);
// but gives the same result independent of host byte order.
hasher.write_usize(sub_hashes.len().to_le());
for elt in sub_hashes {
elt.hash(&mut hasher);
}
std::hash::Hasher::finish(&hasher)
}