From 6ee3713b08a3612de836a2dab527e5a644517aa1 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Wed, 3 Apr 2024 14:09:21 -0400 Subject: [PATCH] Stabilize the size of incr comp object file names --- compiler/rustc_codegen_gcc/src/context.rs | 5 +- compiler/rustc_codegen_llvm/src/context.rs | 5 +- compiler/rustc_data_structures/src/base_n.rs | 116 ++++++++++++++---- .../rustc_data_structures/src/base_n/tests.rs | 12 +- compiler/rustc_data_structures/src/lib.rs | 2 + compiler/rustc_incremental/src/persist/fs.rs | 54 ++++---- compiler/rustc_middle/src/mir/mono.rs | 11 +- .../src/cfi/typeid/itanium_cxx_abi/encode.rs | 8 +- compiler/rustc_symbol_mangling/src/v0.rs | 4 +- 9 files changed, 146 insertions(+), 71 deletions(-) diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs index 16a85b4e8fa..4a1f5188a80 100644 --- a/compiler/rustc_codegen_gcc/src/context.rs +++ b/compiler/rustc_codegen_gcc/src/context.rs @@ -6,7 +6,8 @@ use rustc_codegen_ssa::base::wants_msvc_seh; use rustc_codegen_ssa::errors as ssa_errors; use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods}; -use rustc_data_structures::base_n; +use rustc_data_structures::base_n::ToBaseN; +use rustc_data_structures::base_n::ALPHANUMERIC_ONLY; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_middle::mir::mono::CodegenUnit; use rustc_middle::span_bug; @@ -621,7 +622,7 @@ pub fn generate_local_symbol_name(&self, prefix: &str) -> String { let mut name = String::with_capacity(prefix.len() + 6); name.push_str(prefix); name.push_str("."); - base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name); + name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY)); name } } diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index d32baa6dc02..653abb4e41b 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -11,7 +11,8 @@ use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh}; use rustc_codegen_ssa::errors as ssa_errors; use rustc_codegen_ssa::traits::*; -use rustc_data_structures::base_n; +use rustc_data_structures::base_n::ToBaseN; +use rustc_data_structures::base_n::ALPHANUMERIC_ONLY; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::small_c_str::SmallCStr; use rustc_hir::def_id::DefId; @@ -1015,7 +1016,7 @@ pub fn generate_local_symbol_name(&self, prefix: &str) -> String { let mut name = String::with_capacity(prefix.len() + 6); name.push_str(prefix); name.push('.'); - base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name); + name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY)); name } } diff --git a/compiler/rustc_data_structures/src/base_n.rs b/compiler/rustc_data_structures/src/base_n.rs index a3eb2b9c416..aed89fadc4c 100644 --- a/compiler/rustc_data_structures/src/base_n.rs +++ b/compiler/rustc_data_structures/src/base_n.rs @@ -1,6 +1,7 @@ /// Converts unsigned integers into a string representation with some base. /// Bases up to and including 36 can be used for case-insensitive things. -use std::str; +use std::ascii; +use std::fmt; #[cfg(test)] mod tests; @@ -9,36 +10,101 @@ pub const ALPHANUMERIC_ONLY: usize = 62; pub const CASE_INSENSITIVE: usize = 36; -const BASE_64: &[u8; MAX_BASE] = - b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; +const BASE_64: [ascii::Char; MAX_BASE] = { + let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; + let Some(ascii) = bytes.as_ascii() else { panic!() }; + *ascii +}; -#[inline] -pub fn push_str(mut n: u128, base: usize, output: &mut String) { - debug_assert!(base >= 2 && base <= MAX_BASE); - let mut s = [0u8; 128]; - let mut index = s.len(); +pub struct BaseNString { + start: usize, + buf: [ascii::Char; 128], +} - let base = base as u128; +impl std::ops::Deref for BaseNString { + type Target = str; - loop { - index -= 1; - s[index] = BASE_64[(n % base) as usize]; - n /= base; + fn deref(&self) -> &str { + self.buf[self.start..].as_str() + } +} - if n == 0 { - break; - } +impl AsRef for BaseNString { + fn as_ref(&self) -> &str { + self.buf[self.start..].as_str() + } +} + +impl fmt::Display for BaseNString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self) + } +} + +// This trait just lets us reserve the exact right amount of space when doing fixed-length +// case-insensitve encoding. Add any impls you need. +pub trait ToBaseN: Into { + fn encoded_len(base: usize) -> usize; + + fn to_base_fixed_len(self, base: usize) -> BaseNString { + let mut encoded = self.to_base(base); + encoded.start = encoded.buf.len() - Self::encoded_len(base); + encoded } - output.push_str(unsafe { - // SAFETY: `s` is populated using only valid utf8 characters from `BASE_64` - str::from_utf8_unchecked(&s[index..]) - }); + fn to_base(self, base: usize) -> BaseNString { + let mut output = [ascii::Char::Digit0; 128]; + + let mut n: u128 = self.into(); + + let mut index = output.len(); + loop { + index -= 1; + output[index] = BASE_64[(n % base as u128) as usize]; + n /= base as u128; + + if n == 0 { + break; + } + } + assert_eq!(n, 0); + + BaseNString { start: index, buf: output } + } } -#[inline] -pub fn encode(n: u128, base: usize) -> String { - let mut s = String::new(); - push_str(n, base, &mut s); - s +impl ToBaseN for u128 { + fn encoded_len(base: usize) -> usize { + let mut max = u128::MAX; + let mut len = 0; + while max > 0 { + len += 1; + max /= base as u128; + } + len + } +} + +impl ToBaseN for u64 { + fn encoded_len(base: usize) -> usize { + let mut max = u64::MAX; + let mut len = 0; + while max > 0 { + len += 1; + max /= base as u64; + } + len + } +} + +impl ToBaseN for u32 { + fn encoded_len(base: usize) -> usize { + let mut max = u32::MAX; + let mut len = 0; + while max > 0 { + len += 1; + max /= base as u32; + } + len + } } diff --git a/compiler/rustc_data_structures/src/base_n/tests.rs b/compiler/rustc_data_structures/src/base_n/tests.rs index 2be2f0532c9..148d8dde02a 100644 --- a/compiler/rustc_data_structures/src/base_n/tests.rs +++ b/compiler/rustc_data_structures/src/base_n/tests.rs @@ -1,9 +1,17 @@ use super::*; #[test] -fn test_encode() { +fn limits() { + assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36)); + assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36)); + assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36)); +} + +#[test] +fn test_to_base() { fn test(n: u128, base: usize) { - assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32)); + assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32)); + assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32)); } for base in 2..37 { diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index b82a9a909e6..b360a1b7bb8 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -16,6 +16,8 @@ #![doc(rust_logo)] #![feature(allocator_api)] #![feature(array_windows)] +#![feature(ascii_char)] +#![feature(ascii_char_variants)] #![feature(auto_traits)] #![feature(cfg_match)] #![feature(core_intrinsics)] diff --git a/compiler/rustc_incremental/src/persist/fs.rs b/compiler/rustc_incremental/src/persist/fs.rs index bf6ee044a0b..45f4d0a7e12 100644 --- a/compiler/rustc_incremental/src/persist/fs.rs +++ b/compiler/rustc_incremental/src/persist/fs.rs @@ -104,10 +104,14 @@ //! implemented. use crate::errors; +use rustc_data_structures::base_n; +use rustc_data_structures::base_n::BaseNString; +use rustc_data_structures::base_n::ToBaseN; +use rustc_data_structures::base_n::CASE_INSENSITIVE; +use rustc_data_structures::flock; use rustc_data_structures::fx::{FxHashSet, FxIndexSet}; use rustc_data_structures::svh::Svh; use rustc_data_structures::unord::{UnordMap, UnordSet}; -use rustc_data_structures::{base_n, flock}; use rustc_errors::ErrorGuaranteed; use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy}; use rustc_session::config::CrateType; @@ -332,31 +336,24 @@ pub fn finalize_session_directory(sess: &Session, svh: Option) { debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display()); - let old_sub_dir_name = incr_comp_session_dir + let mut sub_dir_name = incr_comp_session_dir .file_name() .unwrap() .to_str() - .expect("malformed session dir name: contains non-Unicode characters"); + .expect("malformed session dir name: contains non-Unicode characters") + .to_string(); - // Keep the 's-{timestamp}-{random-number}' prefix, but replace the - // '-working' part with the SVH of the crate - let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect(); - if dash_indices.len() != 3 { - bug!( - "Encountered incremental compilation session directory with \ - malformed name: {}", - incr_comp_session_dir.display() - ) - } + // Keep the 's-{timestamp}-{random-number}' prefix, but replace "working" with the SVH of the crate + sub_dir_name.truncate(sub_dir_name.len() - "working".len()); + // Double-check that we kept this: "s-{timestamp}-{random-number}-" + assert!(sub_dir_name.ends_with('-'), "{:?}", sub_dir_name); + assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3); - // State: "s-{timestamp}-{random-number}-" - let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]); - - // Append the svh - base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name); + // Append the SVH + sub_dir_name.push_str(&svh.as_u128().to_base_fixed_len(CASE_INSENSITIVE)); // Create the full path - let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); + let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name); debug!("finalize_session_directory() - new path: {}", new_path.display()); match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) { @@ -452,11 +449,11 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { let random_number = thread_rng().next_u32(); debug!("generate_session_dir_path: random_number = {}", random_number); - let directory_name = format!( - "s-{}-{}-working", - timestamp, - base_n::encode(random_number as u128, INT_ENCODE_BASE) - ); + // Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while. + let (zeroes, timestamp) = timestamp.split_at(3); + assert_eq!(zeroes, "000"); + let directory_name = + format!("s-{}-{}-working", timestamp, random_number.to_base_fixed_len(CASE_INSENSITIVE)); debug!("generate_session_dir_path: directory_name = {}", directory_name); let directory_path = crate_dir.join(directory_name); debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); @@ -587,10 +584,10 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result String { +fn timestamp_to_string(timestamp: SystemTime) -> BaseNString { let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000; - base_n::encode(micros as u128, INT_ENCODE_BASE) + micros.to_base_fixed_len(CASE_INSENSITIVE) } fn string_to_timestamp(s: &str) -> Result { @@ -621,9 +618,8 @@ fn crate_path(sess: &Session) -> PathBuf { sess.cfg_version, ); - let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE); - - let crate_name = format!("{crate_name}-{stable_crate_id}"); + let crate_name = + format!("{crate_name}-{}", stable_crate_id.as_u64().to_base_fixed_len(CASE_INSENSITIVE)); incr_dir.join(crate_name) } diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index 43e1318a75a..a3fbc36a7a4 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -1,7 +1,9 @@ use crate::dep_graph::{DepNode, WorkProduct, WorkProductId}; use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt}; use rustc_attr::InlineAttr; -use rustc_data_structures::base_n; +use rustc_data_structures::base_n::BaseNString; +use rustc_data_structures::base_n::ToBaseN; +use rustc_data_structures::base_n::CASE_INSENSITIVE; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxIndexMap; @@ -334,14 +336,11 @@ pub fn make_code_coverage_dead_code_cgu(&mut self) { self.is_code_coverage_dead_code_cgu = true; } - pub fn mangle_name(human_readable_name: &str) -> String { - // We generate a 80 bit hash from the name. This should be enough to - // avoid collisions and is still reasonably short for filenames. + pub fn mangle_name(human_readable_name: &str) -> BaseNString { let mut hasher = StableHasher::new(); human_readable_name.hash(&mut hasher); let hash: Hash128 = hasher.finish(); - let hash = hash.as_u128() & ((1u128 << 80) - 1); - base_n::encode(hash, base_n::CASE_INSENSITIVE) + hash.as_u128().to_base_fixed_len(CASE_INSENSITIVE) } pub fn compute_size_estimate(&mut self) { diff --git a/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs index 40cd0c14b05..d67132d2dd4 100644 --- a/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs +++ b/compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs @@ -4,7 +4,9 @@ //! //! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler, //! see design document in the tracking issue #89653. -use rustc_data_structures::base_n; +use rustc_data_structures::base_n::ToBaseN; +use rustc_data_structures::base_n::ALPHANUMERIC_ONLY; +use rustc_data_structures::base_n::CASE_INSENSITIVE; use rustc_data_structures::fx::FxHashMap; use rustc_hir as hir; use rustc_middle::bug; @@ -736,7 +738,7 @@ fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String { /// ). fn to_disambiguator(num: u64) -> String { if let Some(num) = num.checked_sub(1) { - format!("s{}_", base_n::encode(num as u128, base_n::ALPHANUMERIC_ONLY)) + format!("s{}_", num.to_base(ALPHANUMERIC_ONLY)) } else { "s_".to_string() } @@ -746,7 +748,7 @@ fn to_disambiguator(num: u64) -> String { /// ). fn to_seq_id(num: usize) -> String { if let Some(num) = num.checked_sub(1) { - base_n::encode(num as u128, base_n::CASE_INSENSITIVE).to_uppercase() + (num as u64).to_base(CASE_INSENSITIVE).to_uppercase() } else { "".to_string() } diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs index 58b67c77a61..513107332db 100644 --- a/compiler/rustc_symbol_mangling/src/v0.rs +++ b/compiler/rustc_symbol_mangling/src/v0.rs @@ -1,4 +1,4 @@ -use rustc_data_structures::base_n; +use rustc_data_structures::base_n::ToBaseN; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::intern::Interned; use rustc_hir as hir; @@ -831,7 +831,7 @@ fn path_generic_args( /// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc. pub(crate) fn push_integer_62(x: u64, output: &mut String) { if let Some(x) = x.checked_sub(1) { - base_n::push_str(x as u128, base_n::ALPHANUMERIC_ONLY, output); + output.push_str(&x.to_base(62)); } output.push('_'); }