Auto merge of #117508 - nnethercote:symbols-FxIndexSet, r=cuviper
Use `FxIndexSet` in the symbol interner. It makes the code a little nicer. r? `@ghost`
This commit is contained in:
commit
2429818b20
@ -3,7 +3,7 @@
|
||||
//! type, and vice versa.
|
||||
|
||||
use rustc_arena::DroplessArena;
|
||||
use rustc_data_structures::fx::FxHashMap;
|
||||
use rustc_data_structures::fx::FxIndexSet;
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
|
||||
use rustc_data_structures::sync::Lock;
|
||||
use rustc_macros::HashStable_Generic;
|
||||
@ -2076,43 +2076,33 @@ fn to_stable_hash_key(&self, _: &CTX) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct Interner(Lock<InternerInner>);
|
||||
|
||||
// The `&'static str`s in this type actually point into the arena.
|
||||
//
|
||||
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
|
||||
// found that to regress performance up to 2% in some cases. This might be
|
||||
// revisited after further improvements to `indexmap`.
|
||||
//
|
||||
// This type is private to prevent accidentally constructing more than one
|
||||
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
|
||||
// between `Interner`s.
|
||||
#[derive(Default)]
|
||||
struct InternerInner {
|
||||
arena: DroplessArena,
|
||||
names: FxHashMap<&'static str, Symbol>,
|
||||
strings: Vec<&'static str>,
|
||||
strings: FxIndexSet<&'static str>,
|
||||
}
|
||||
|
||||
impl Interner {
|
||||
fn prefill(init: &[&'static str]) -> Self {
|
||||
Interner(Lock::new(InternerInner {
|
||||
strings: init.into(),
|
||||
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
|
||||
..Default::default()
|
||||
arena: Default::default(),
|
||||
strings: init.iter().copied().collect(),
|
||||
}))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn intern(&self, string: &str) -> Symbol {
|
||||
let mut inner = self.0.lock();
|
||||
if let Some(&name) = inner.names.get(string) {
|
||||
return name;
|
||||
if let Some(idx) = inner.strings.get_index_of(string) {
|
||||
return Symbol::new(idx as u32);
|
||||
}
|
||||
|
||||
let name = Symbol::new(inner.strings.len() as u32);
|
||||
|
||||
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
|
||||
// and immediately convert the clone back to `&[u8]`, all because there
|
||||
// is no `inner.arena.alloc_str()` method. This is clearly safe.
|
||||
@ -2122,20 +2112,21 @@ fn intern(&self, string: &str) -> Symbol {
|
||||
// SAFETY: we can extend the arena allocation to `'static` because we
|
||||
// only access these while the arena is still alive.
|
||||
let string: &'static str = unsafe { &*(string as *const str) };
|
||||
inner.strings.push(string);
|
||||
|
||||
// This second hash table lookup can be avoided by using `RawEntryMut`,
|
||||
// but this code path isn't hot enough for it to be worth it. See
|
||||
// #91445 for details.
|
||||
inner.names.insert(string, name);
|
||||
name
|
||||
let (idx, is_new) = inner.strings.insert_full(string);
|
||||
debug_assert!(is_new); // due to the get_index_of check above
|
||||
|
||||
Symbol::new(idx as u32)
|
||||
}
|
||||
|
||||
/// Get the symbol as a string.
|
||||
///
|
||||
/// [`Symbol::as_str()`] should be used in preference to this function.
|
||||
fn get(&self, symbol: Symbol) -> &str {
|
||||
self.0.lock().strings[symbol.0.as_usize()]
|
||||
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#[test]
|
||||
fn interner_tests() {
|
||||
let i = Interner::default();
|
||||
let i = Interner::prefill(&[]);
|
||||
// first one is zero:
|
||||
assert_eq!(i.intern("dog"), Symbol::new(0));
|
||||
// re-use gets the same entry:
|
||||
|
Loading…
Reference in New Issue
Block a user