diff --git a/compiler/rustc_data_structures/src/intern.rs b/compiler/rustc_data_structures/src/intern.rs new file mode 100644 index 00000000000..e5d43db327b --- /dev/null +++ b/compiler/rustc_data_structures/src/intern.rs @@ -0,0 +1,102 @@ +use std::cmp::Ordering; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::ptr; + +mod private { + #[derive(Clone, Copy, Debug)] + pub struct PrivateZst; +} + +/// A reference to a value that is interned, and is known to be unique. +/// +/// Note that it is possible to have a `T` and a `Interned` that are (or +/// refer to) equal but different values. But if you have two different +/// `Interned`s, they both refer to the same value, at a single location in +/// memory. This means that equality and hashing can be done on the value's +/// address rather than the value's contents, which can improve performance. +/// +/// The `PrivateZst` field means you can pattern match with `Interned(v, _)` +/// but you can only construct a `Interned` with `new_unchecked`, and not +/// directly. +#[derive(Debug)] +#[cfg_attr(not(bootstrap), rustc_pass_by_value)] +pub struct Interned<'a, T>(pub &'a T, pub private::PrivateZst); + +impl<'a, T> Interned<'a, T> { + /// Create a new `Interned` value. The value referred to *must* be interned + /// and thus be unique, and it *must* remain unique in the future. This + /// function has `_unchecked` in the name but is not `unsafe`, because if + /// the uniqueness condition is violated condition it will cause incorrect + /// behaviour but will not affect memory safety. + #[inline] + pub const fn new_unchecked(t: &'a T) -> Self { + Interned(t, private::PrivateZst) + } +} + +impl<'a, T> Clone for Interned<'a, T> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, T> Copy for Interned<'a, T> {} + +impl<'a, T> Deref for Interned<'a, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.0 + } +} + +impl<'a, T> PartialEq for Interned<'a, T> { + #[inline] + fn eq(&self, other: &Self) -> bool { + // Pointer equality implies equality, due to the uniqueness constraint. + ptr::eq(self.0, other.0) + } +} + +impl<'a, T> Eq for Interned<'a, T> {} + +impl<'a, T: PartialOrd> PartialOrd for Interned<'a, T> { + fn partial_cmp(&self, other: &Interned<'a, T>) -> Option { + // Pointer equality implies equality, due to the uniqueness constraint, + // but the contents must be compared otherwise. + if ptr::eq(self.0, other.0) { + Some(Ordering::Equal) + } else { + let res = self.0.partial_cmp(&other.0); + debug_assert!(res != Some(Ordering::Equal)); + res + } + } +} + +impl<'a, T: Ord> Ord for Interned<'a, T> { + fn cmp(&self, other: &Interned<'a, T>) -> Ordering { + // Pointer equality implies equality, due to the uniqueness constraint, + // but the contents must be compared otherwise. + if ptr::eq(self.0, other.0) { + Ordering::Equal + } else { + let res = self.0.cmp(&other.0); + debug_assert!(res != Ordering::Equal); + res + } + } +} + +impl<'a, T> Hash for Interned<'a, T> { + #[inline] + fn hash(&self, s: &mut H) { + // Pointer hashing is sufficient, due to the uniqueness constraint. + ptr::hash(self.0, s) + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/intern/tests.rs b/compiler/rustc_data_structures/src/intern/tests.rs new file mode 100644 index 00000000000..09810a0850e --- /dev/null +++ b/compiler/rustc_data_structures/src/intern/tests.rs @@ -0,0 +1,59 @@ +use super::*; +use std::cmp::Ordering; + +#[derive(Debug)] +struct S(u32); + +impl PartialEq for S { + fn eq(&self, _other: &Self) -> bool { + panic!("shouldn't be called"); + } +} + +impl Eq for S {} + +impl PartialOrd for S { + fn partial_cmp(&self, other: &S) -> Option { + // The `==` case should be handled by `Interned`. + assert_ne!(self.0, other.0); + self.0.partial_cmp(&other.0) + } +} + +impl Ord for S { + fn cmp(&self, other: &S) -> Ordering { + // The `==` case should be handled by `Interned`. + assert_ne!(self.0, other.0); + self.0.cmp(&other.0) + } +} + +#[test] +fn test_uniq() { + let s1 = S(1); + let s2 = S(2); + let s3 = S(3); + let s4 = S(1); // violates uniqueness + + let v1 = Interned::new_unchecked(&s1); + let v2 = Interned::new_unchecked(&s2); + let v3a = Interned::new_unchecked(&s3); + let v3b = Interned::new_unchecked(&s3); + let v4 = Interned::new_unchecked(&s4); // violates uniqueness + + assert_ne!(v1, v2); + assert_ne!(v2, v3a); + assert_eq!(v1, v1); + assert_eq!(v3a, v3b); + assert_ne!(v1, v4); // same content but different addresses: not equal + + assert_eq!(v1.cmp(&v2), Ordering::Less); + assert_eq!(v3a.cmp(&v2), Ordering::Greater); + assert_eq!(v1.cmp(&v1), Ordering::Equal); // only uses Interned::eq, not S::cmp + assert_eq!(v3a.cmp(&v3b), Ordering::Equal); // only uses Interned::eq, not S::cmp + + assert_eq!(v1.partial_cmp(&v2), Some(Ordering::Less)); + assert_eq!(v3a.partial_cmp(&v2), Some(Ordering::Greater)); + assert_eq!(v1.partial_cmp(&v1), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp + assert_eq!(v3a.partial_cmp(&v3b), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp +} diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index 205f1cd77c0..80f83140f4b 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -21,6 +21,7 @@ #![feature(type_alias_impl_trait)] #![feature(new_uninit)] #![feature(once_cell)] +#![feature(rustc_attrs)] #![feature(test)] #![feature(thread_id_value)] #![feature(vec_into_raw_parts)] @@ -68,12 +69,12 @@ macro_rules! unlikely { pub mod functor; pub mod fx; pub mod graph; +pub mod intern; pub mod jobserver; pub mod macros; pub mod map_in_place; pub mod obligation_forest; pub mod owning_ref; -pub mod ptr_key; pub mod sip128; pub mod small_c_str; pub mod snapshot_map; diff --git a/compiler/rustc_data_structures/src/ptr_key.rs b/compiler/rustc_data_structures/src/ptr_key.rs deleted file mode 100644 index 440ccb05d86..00000000000 --- a/compiler/rustc_data_structures/src/ptr_key.rs +++ /dev/null @@ -1,37 +0,0 @@ -use std::ops::Deref; -use std::{hash, ptr}; - -/// A wrapper around reference that compares and hashes like a pointer. -/// Can be used as a key in sets/maps indexed by pointers to avoid `unsafe`. -#[derive(Debug)] -pub struct PtrKey<'a, T>(pub &'a T); - -impl<'a, T> Clone for PtrKey<'a, T> { - fn clone(&self) -> Self { - *self - } -} - -impl<'a, T> Copy for PtrKey<'a, T> {} - -impl<'a, T> PartialEq for PtrKey<'a, T> { - fn eq(&self, rhs: &Self) -> bool { - ptr::eq(self.0, rhs.0) - } -} - -impl<'a, T> Eq for PtrKey<'a, T> {} - -impl<'a, T> hash::Hash for PtrKey<'a, T> { - fn hash(&self, hasher: &mut H) { - (self.0 as *const T).hash(hasher) - } -} - -impl<'a, T> Deref for PtrKey<'a, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - self.0 - } -} diff --git a/compiler/rustc_resolve/src/imports.rs b/compiler/rustc_resolve/src/imports.rs index e7f76a18ad3..a8c2a5e1424 100644 --- a/compiler/rustc_resolve/src/imports.rs +++ b/compiler/rustc_resolve/src/imports.rs @@ -11,7 +11,7 @@ use rustc_ast::NodeId; use rustc_data_structures::fx::FxHashSet; -use rustc_data_structures::ptr_key::PtrKey; +use rustc_data_structures::intern::Interned; use rustc_errors::{pluralize, struct_span_err, Applicability}; use rustc_hir::def::{self, PartialRes}; use rustc_hir::def_id::DefId; @@ -134,7 +134,7 @@ pub fn is_nested(&self) -> bool { pub struct NameResolution<'a> { /// Single imports that may define the name in the namespace. /// Imports are arena-allocated, so it's ok to use pointers as keys. - single_imports: FxHashSet>>, + single_imports: FxHashSet>>, /// The least shadowable known binding for this name, or None if there are no known bindings. pub binding: Option<&'a NameBinding<'a>>, shadowed_glob: Option<&'a NameBinding<'a>>, @@ -153,7 +153,7 @@ pub(crate) fn binding(&self) -> Option<&'a NameBinding<'a>> { } crate fn add_single_import(&mut self, import: &'a Import<'a>) { - self.single_imports.insert(PtrKey(import)); + self.single_imports.insert(Interned::new_unchecked(import)); } } @@ -850,7 +850,7 @@ fn resolve_import(&mut self, import: &'b Import<'b>) -> bool { Err(Determined) => { let key = this.new_key(target, ns); this.update_resolution(parent, key, |_, resolution| { - resolution.single_imports.remove(&PtrKey(import)); + resolution.single_imports.remove(&Interned::new_unchecked(import)); }); } Ok(binding) if !binding.is_importable() => { diff --git a/compiler/rustc_resolve/src/lib.rs b/compiler/rustc_resolve/src/lib.rs index dbda59e8884..28d8d9247ac 100644 --- a/compiler/rustc_resolve/src/lib.rs +++ b/compiler/rustc_resolve/src/lib.rs @@ -38,7 +38,7 @@ use rustc_ast_lowering::ResolverAstLowering; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap}; -use rustc_data_structures::ptr_key::PtrKey; +use rustc_data_structures::intern::Interned; use rustc_data_structures::sync::Lrc; use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder}; use rustc_expand::base::{DeriveResolutions, SyntaxExtension, SyntaxExtensionKind}; @@ -964,7 +964,7 @@ pub struct Resolver<'a> { /// language items. empty_module: Module<'a>, module_map: FxHashMap>, - binding_parent_modules: FxHashMap>, Module<'a>>, + binding_parent_modules: FxHashMap>, Module<'a>>, underscore_disambiguator: u32, /// Maps glob imports to the names of items actually imported. @@ -1115,7 +1115,7 @@ fn alloc_name_resolution(&'a self) -> &'a RefCell> { self.name_resolutions.alloc(Default::default()) } fn alloc_macro_rules_scope(&'a self, scope: MacroRulesScope<'a>) -> MacroRulesScopeRef<'a> { - PtrKey(self.dropless.alloc(Cell::new(scope))) + Interned::new_unchecked(self.dropless.alloc(Cell::new(scope))) } fn alloc_macro_rules_binding( &'a self, @@ -2938,7 +2938,9 @@ fn is_accessible_from(&self, vis: ty::Visibility, module: Module<'a>) -> bool { } fn set_binding_parent_module(&mut self, binding: &'a NameBinding<'a>, module: Module<'a>) { - if let Some(old_module) = self.binding_parent_modules.insert(PtrKey(binding), module) { + if let Some(old_module) = + self.binding_parent_modules.insert(Interned::new_unchecked(binding), module) + { if !ptr::eq(module, old_module) { span_bug!(binding.span, "parent module is reset for binding"); } @@ -2954,8 +2956,8 @@ fn disambiguate_macro_rules_vs_modularized( // is disambiguated to mitigate regressions from macro modularization. // Scoping for `macro_rules` behaves like scoping for `let` at module level, in general. match ( - self.binding_parent_modules.get(&PtrKey(macro_rules)), - self.binding_parent_modules.get(&PtrKey(modularized)), + self.binding_parent_modules.get(&Interned::new_unchecked(macro_rules)), + self.binding_parent_modules.get(&Interned::new_unchecked(modularized)), ) { (Some(macro_rules), Some(modularized)) => { macro_rules.nearest_parent_mod() == modularized.nearest_parent_mod() diff --git a/compiler/rustc_resolve/src/macros.rs b/compiler/rustc_resolve/src/macros.rs index 82807e2d0a2..89c2a0c74bd 100644 --- a/compiler/rustc_resolve/src/macros.rs +++ b/compiler/rustc_resolve/src/macros.rs @@ -11,7 +11,7 @@ use rustc_ast_pretty::pprust; use rustc_attr::StabilityLevel; use rustc_data_structures::fx::FxHashSet; -use rustc_data_structures::ptr_key::PtrKey; +use rustc_data_structures::intern::Interned; use rustc_data_structures::sync::Lrc; use rustc_errors::struct_span_err; use rustc_expand::base::{Annotatable, DeriveResolutions, Indeterminate, ResolverExpand}; @@ -71,7 +71,7 @@ pub enum MacroRulesScope<'a> { /// This helps to avoid uncontrollable growth of `macro_rules!` scope chains, /// which usually grow lineraly with the number of macro invocations /// in a module (including derives) and hurt performance. -pub(crate) type MacroRulesScopeRef<'a> = PtrKey<'a, Cell>>; +pub(crate) type MacroRulesScopeRef<'a> = Interned<'a, Cell>>; // Macro namespace is separated into two sub-namespaces, one for bang macros and // one for attribute-like macros (attributes, derives).