Cache lookups into the borrow stack

This adds a very simple LRU-like cache which stores the locations of
often-used tags. While the implementation is very simple, the cache hit
rate is incredible at ~99.9% on most programs, and often the element at
position 0 in the cache has a hit rate of 90%. So the sub-optimality of
this cache basicaly vanishes into the noise in a profile.

Additionally, we keep a range which denotes where there might be an item
granting Unique permission in the stack, so that when we invalidate
Uniques we do not need to scan much of the stack, and often scan nothing
at all.
This commit is contained in:
Ben Kimock 2021-12-07 22:05:13 -05:00
parent 70b97a5d7b
commit 3bbcafe3b5
6 changed files with 398 additions and 108 deletions

View File

@ -50,3 +50,8 @@ rustc_private = true
[[test]]
name = "compiletest"
harness = false
[features]
default = ["stack-cache"]
expensive-debug-assertions = []
stack-cache = []

View File

@ -90,8 +90,8 @@ pub use crate::mono_hash_map::MonoHashMap;
pub use crate::operator::EvalContextExt as OperatorEvalContextExt;
pub use crate::range_map::RangeMap;
pub use crate::stacked_borrows::{
CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, SbTag, SbTagExtra, Stack,
Stacks,
stack::Stack, CallId, EvalContextExt as StackedBorEvalContextExt, Item, Permission, PtrId,
SbTag, SbTagExtra, Stacks,
};
pub use crate::sync::{CondvarId, EvalContextExt as SyncEvalContextExt, MutexId, RwLockId};
pub use crate::thread::{

View File

@ -23,6 +23,10 @@ use crate::*;
pub mod diagnostics;
use diagnostics::{AllocHistory, TagHistory};
pub mod stack;
use stack::Stack;
pub type PtrId = NonZeroU64;
pub type CallId = NonZeroU64;
// Even reading memory can have effects on the stack, so we need a `RefCell` here.
@ -111,23 +115,6 @@ impl fmt::Debug for Item {
}
}
/// Extra per-location state.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Stack {
/// Used *mostly* as a stack; never empty.
/// Invariants:
/// * Above a `SharedReadOnly` there can only be more `SharedReadOnly`.
/// * No tag occurs in the stack more than once.
borrows: Vec<Item>,
/// If this is `Some(id)`, then the actual current stack is unknown. This can happen when
/// wildcard pointers are used to access this location. What we do know is that `borrows` are at
/// the top of the stack, and below it are arbitrarily many items whose `tag` is strictly less
/// than `id`.
/// When the bottom is unknown, `borrows` always has a `SharedReadOnly` or `Unique` at the bottom;
/// we never have the unknown-to-known boundary in an SRW group.
unknown_bottom: Option<SbTag>,
}
/// Extra per-allocation state.
#[derive(Clone, Debug)]
pub struct Stacks {
@ -297,65 +284,10 @@ impl Permission {
/// Core per-location operations: access, dealloc, reborrow.
impl<'tcx> Stack {
/// Find the item granting the given kind of access to the given tag, and return where
/// it is on the stack. For wildcard tags, the given index is approximate, but if *no*
/// index is given it means the match was *not* in the known part of the stack.
/// `Ok(None)` indicates it matched the "unknown" part of the stack.
/// `Err` indicates it was not found.
fn find_granting(
&self,
access: AccessKind,
tag: SbTagExtra,
exposed_tags: &FxHashSet<SbTag>,
) -> Result<Option<usize>, ()> {
let SbTagExtra::Concrete(tag) = tag else {
// Handle the wildcard case.
// Go search the stack for an exposed tag.
if let Some(idx) =
self.borrows
.iter()
.enumerate() // we also need to know *where* in the stack
.rev() // search top-to-bottom
.find_map(|(idx, item)| {
// If the item fits and *might* be this wildcard, use it.
if item.perm.grants(access) && exposed_tags.contains(&item.tag) {
Some(idx)
} else {
None
}
})
{
return Ok(Some(idx));
}
// If we couldn't find it in the stack, check the unknown bottom.
return if self.unknown_bottom.is_some() { Ok(None) } else { Err(()) };
};
if let Some(idx) =
self.borrows
.iter()
.enumerate() // we also need to know *where* in the stack
.rev() // search top-to-bottom
// Return permission of first item that grants access.
// We require a permission with the right tag, ensuring U3 and F3.
.find_map(|(idx, item)| {
if tag == item.tag && item.perm.grants(access) { Some(idx) } else { None }
})
{
return Ok(Some(idx));
}
// Couldn't find it in the stack; but if there is an unknown bottom it might be there.
let found = self.unknown_bottom.is_some_and(|&unknown_limit| {
tag.0 < unknown_limit.0 // unknown_limit is an upper bound for what can be in the unknown bottom.
});
if found { Ok(None) } else { Err(()) }
}
/// Find the first write-incompatible item above the given one --
/// i.e, find the height to which the stack will be truncated when writing to `granting`.
fn find_first_write_incompatible(&self, granting: usize) -> usize {
let perm = self.borrows[granting].perm;
let perm = self.get(granting).unwrap().perm;
match perm {
Permission::SharedReadOnly => bug!("Cannot use SharedReadOnly for writing"),
Permission::Disabled => bug!("Cannot use Disabled for anything"),
@ -366,7 +298,7 @@ impl<'tcx> Stack {
Permission::SharedReadWrite => {
// The SharedReadWrite *just* above us are compatible, to skip those.
let mut idx = granting + 1;
while let Some(item) = self.borrows.get(idx) {
while let Some(item) = self.get(idx) {
if item.perm == Permission::SharedReadWrite {
// Go on.
idx += 1;
@ -461,8 +393,7 @@ impl<'tcx> Stack {
// There is a SRW group boundary between the unknown and the known, so everything is incompatible.
0
};
for item in self.borrows.drain(first_incompatible_idx..).rev() {
trace!("access: popping item {:?}", item);
self.pop_items_after(first_incompatible_idx, |item| {
Stack::item_popped(
&item,
Some((tag, alloc_range, offset, access)),
@ -470,7 +401,8 @@ impl<'tcx> Stack {
alloc_history,
)?;
alloc_history.log_invalidation(item.tag, alloc_range, current_span);
}
Ok(())
})?;
} else {
// On a read, *disable* all `Unique` above the granting item. This ensures U2 for read accesses.
// The reason this is not following the stack discipline (by removing the first Unique and
@ -487,21 +419,16 @@ impl<'tcx> Stack {
// We are reading from something in the unknown part. That means *all* `Unique` we know about are dead now.
0
};
for idx in (first_incompatible_idx..self.borrows.len()).rev() {
let item = &mut self.borrows[idx];
if item.perm == Permission::Unique {
trace!("access: disabling item {:?}", item);
Stack::item_popped(
item,
Some((tag, alloc_range, offset, access)),
global,
alloc_history,
)?;
item.perm = Permission::Disabled;
alloc_history.log_invalidation(item.tag, alloc_range, current_span);
}
}
self.disable_uniques_starting_at(first_incompatible_idx, |item| {
Stack::item_popped(
&item,
Some((tag, alloc_range, offset, access)),
global,
alloc_history,
)?;
alloc_history.log_invalidation(item.tag, alloc_range, current_span);
Ok(())
})?;
}
// If this was an approximate action, we now collapse everything into an unknown.
@ -509,22 +436,22 @@ impl<'tcx> Stack {
// Compute the upper bound of the items that remain.
// (This is why we did all the work above: to reduce the items we have to consider here.)
let mut max = NonZeroU64::new(1).unwrap();
for item in &self.borrows {
for i in 0..self.len() {
let item = self.get(i).unwrap();
// Skip disabled items, they cannot be matched anyway.
if !matches!(item.perm, Permission::Disabled) {
// We are looking for a strict upper bound, so add 1 to this tag.
max = cmp::max(item.tag.0.checked_add(1).unwrap(), max);
}
}
if let Some(unk) = self.unknown_bottom {
if let Some(unk) = self.unknown_bottom() {
max = cmp::max(unk.0, max);
}
// Use `max` as new strict upper bound for everything.
trace!(
"access: forgetting stack to upper bound {max} due to wildcard or unknown access"
);
self.borrows.clear();
self.unknown_bottom = Some(SbTag(max));
self.set_unknown_bottom(SbTag(max));
}
// Done.
@ -553,7 +480,8 @@ impl<'tcx> Stack {
})?;
// Step 2: Remove all items. Also checks for protectors.
for item in self.borrows.drain(..).rev() {
for idx in (0..self.len()).rev() {
let item = self.get(idx).unwrap();
Stack::item_popped(&item, None, global, alloc_history)?;
}
Ok(())
@ -601,8 +529,7 @@ impl<'tcx> Stack {
// The new thing is SRW anyway, so we cannot push it "on top of the unkown part"
// (for all we know, it might join an SRW group inside the unknown).
trace!("reborrow: forgetting stack entirely due to SharedReadWrite reborrow from wildcard or unknown");
self.borrows.clear();
self.unknown_bottom = Some(global.next_ptr_tag);
self.set_unknown_bottom(global.next_ptr_tag);
return Ok(());
};
@ -629,19 +556,18 @@ impl<'tcx> Stack {
// on top of `derived_from`, and we want the new item at the top so that we
// get the strongest possible guarantees.
// This ensures U1 and F1.
self.borrows.len()
self.len()
};
// Put the new item there. As an optimization, deduplicate if it is equal to one of its new neighbors.
// `new_idx` might be 0 if we just cleared the entire stack.
if self.borrows.get(new_idx) == Some(&new)
|| (new_idx > 0 && self.borrows[new_idx - 1] == new)
if self.get(new_idx) == Some(new) || (new_idx > 0 && self.get(new_idx - 1).unwrap() == new)
{
// Optimization applies, done.
trace!("reborrow: avoiding adding redundant item {:?}", new);
} else {
trace!("reborrow: adding item {:?}", new);
self.borrows.insert(new_idx, new);
self.insert(new_idx, new);
}
Ok(())
}
@ -653,8 +579,8 @@ impl<'tcx> Stacks {
/// Creates new stack with initial tag.
fn new(size: Size, perm: Permission, tag: SbTag) -> Self {
let item = Item { perm, tag, protector: None };
let stack = Stack { borrows: vec![item], unknown_bottom: None };
let stack = Stack::new(item);
Stacks {
stacks: RangeMap::new(size, stack),
history: AllocHistory::new(),
@ -900,11 +826,14 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
// We have to use shared references to alloc/memory_extra here since
// `visit_freeze_sensitive` needs to access the global state.
let extra = this.get_alloc_extra(alloc_id)?;
let mut stacked_borrows = extra
.stacked_borrows
.as_ref()
.expect("we should have Stacked Borrows data")
.borrow_mut();
let mut current_span = this.machine.current_span();
this.visit_freeze_sensitive(place, size, |mut range, frozen| {
// Adjust range.
range.start += base_offset;
@ -929,7 +858,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
item,
(alloc_id, range, offset),
&mut global,
current_span,
&mut current_span,
history,
exposed_tags,
)

View File

@ -185,7 +185,10 @@ fn operation_summary(
fn error_cause(stack: &Stack, tag: SbTagExtra) -> &'static str {
if let SbTagExtra::Concrete(tag) = tag {
if stack.borrows.iter().any(|item| item.tag == tag && item.perm != Permission::Disabled) {
if (0..stack.len())
.map(|i| stack.get(i).unwrap())
.any(|item| item.tag == tag && item.perm != Permission::Disabled)
{
", but that tag only grants SharedReadOnly permission for this location"
} else {
", but that tag does not exist in the borrow stack for this location"

View File

@ -0,0 +1,349 @@
use crate::stacked_borrows::{AccessKind, Item, Permission, SbTag, SbTagExtra};
use rustc_data_structures::fx::FxHashSet;
#[cfg(feature = "stack-cache")]
use std::ops::Range;
/// Exactly what cache size we should use is a difficult tradeoff. There will always be some
/// workload which has a `SbTag` working set which exceeds the size of the cache, and ends up
/// falling back to linear searches of the borrow stack very often.
/// The cost of making this value too large is that the loop in `Stack::insert` which ensures the
/// entries in the cache stay correct after an insert becomes expensive.
#[cfg(feature = "stack-cache")]
const CACHE_LEN: usize = 32;
/// Extra per-location state.
#[derive(Clone, Debug)]
pub struct Stack {
/// Used *mostly* as a stack; never empty.
/// Invariants:
/// * Above a `SharedReadOnly` there can only be more `SharedReadOnly`.
/// * Except for `Untagged`, no tag occurs in the stack more than once.
borrows: Vec<Item>,
/// If this is `Some(id)`, then the actual current stack is unknown. This can happen when
/// wildcard pointers are used to access this location. What we do know is that `borrows` are at
/// the top of the stack, and below it are arbitrarily many items whose `tag` is strictly less
/// than `id`.
/// When the bottom is unknown, `borrows` always has a `SharedReadOnly` or `Unique` at the bottom;
/// we never have the unknown-to-known boundary in an SRW group.
unknown_bottom: Option<SbTag>,
/// A small LRU cache of searches of the borrow stack. This only caches accesses of `Tagged`,
/// because those are unique in the stack.
#[cfg(feature = "stack-cache")]
cache: StackCache,
/// On a read, we need to disable all `Unique` above the granting item. We can avoid most of
/// this scan by keeping track of the region of the borrow stack that may contain `Unique`s.
#[cfg(feature = "stack-cache")]
unique_range: Range<usize>,
}
/// A very small cache of searches of the borrow stack
/// This maps tags to locations in the borrow stack. Any use of this still needs to do a
/// probably-cold random access into the borrow stack to figure out what `Permission` an
/// `SbTag` grants. We could avoid this by also storing the `Permission` in the cache, but
/// most lookups into the cache are immediately followed by access of the full borrow stack anyway.
#[cfg(feature = "stack-cache")]
#[derive(Clone, Debug)]
struct StackCache {
tags: [SbTag; CACHE_LEN], // Hot in find_granting
idx: [usize; CACHE_LEN], // Hot in grant
}
#[cfg(feature = "stack-cache")]
impl StackCache {
fn add(&mut self, idx: usize, tag: SbTag) {
self.tags.copy_within(0..CACHE_LEN - 1, 1);
self.tags[0] = tag;
self.idx.copy_within(0..CACHE_LEN - 1, 1);
self.idx[0] = idx;
}
}
impl PartialEq for Stack {
fn eq(&self, other: &Self) -> bool {
// All the semantics of Stack are in self.borrows, everything else is caching
self.borrows == other.borrows
}
}
impl Eq for Stack {}
impl<'tcx> Stack {
/// Panics if any of the caching mechanisms have broken,
/// - The StackCache indices don't refer to the parallel tags,
/// - There are no Unique tags outside of first_unique..last_unique
#[cfg(feature = "expensive-debug-assertions")]
fn verify_cache_consistency(&self) {
if self.borrows.len() > CACHE_LEN {
for (tag, stack_idx) in self.cache.tags.iter().zip(self.cache.idx.iter()) {
assert_eq!(self.borrows[*stack_idx].tag, *tag);
}
}
for (idx, item) in self.borrows.iter().enumerate() {
if item.perm == Permission::Unique {
assert!(
self.unique_range.contains(&idx),
"{:?} {:?}",
self.unique_range,
self.borrows
);
}
}
}
/// Find the item granting the given kind of access to the given tag, and return where
/// it is on the stack. For wildcard tags, the given index is approximate, but if *no*
/// index is given it means the match was *not* in the known part of the stack.
/// `Ok(None)` indicates it matched the "unknown" part of the stack.
/// `Err` indicates it was not found.
pub fn find_granting(
&mut self,
access: AccessKind,
tag: SbTagExtra,
exposed_tags: &FxHashSet<SbTag>,
) -> Result<Option<usize>, ()> {
#[cfg(feature = "expensive-debug-assertions")]
self.verify_cache_consistency();
let SbTagExtra::Concrete(tag) = tag else {
// Handle the wildcard case.
// Go search the stack for an exposed tag.
if let Some(idx) =
self.borrows
.iter()
.enumerate() // we also need to know *where* in the stack
.rev() // search top-to-bottom
.find_map(|(idx, item)| {
// If the item fits and *might* be this wildcard, use it.
if item.perm.grants(access) && exposed_tags.contains(&item.tag) {
Some(idx)
} else {
None
}
})
{
return Ok(Some(idx));
}
// If we couldn't find it in the stack, check the unknown bottom.
return if self.unknown_bottom.is_some() { Ok(None) } else { Err(()) };
};
if let Some(idx) = self.find_granting_tagged(access, tag) {
return Ok(Some(idx));
}
// Couldn't find it in the stack; but if there is an unknown bottom it might be there.
let found = self.unknown_bottom.is_some_and(|&unknown_limit| {
tag.0 < unknown_limit.0 // unknown_limit is an upper bound for what can be in the unknown bottom.
});
if found { Ok(None) } else { Err(()) }
}
fn find_granting_tagged(&mut self, access: AccessKind, tag: SbTag) -> Option<usize> {
#[cfg(feature = "stack-cache")]
if let Some(idx) = self.find_granting_cache(access, tag) {
return Some(idx);
}
// If we didn't find the tag in the cache, fall back to a linear search of the
// whole stack, and add the tag to the stack.
for (stack_idx, item) in self.borrows.iter().enumerate().rev() {
if tag == item.tag && item.perm.grants(access) {
#[cfg(feature = "stack-cache")]
self.cache.add(stack_idx, tag);
return Some(stack_idx);
}
}
None
}
#[cfg(feature = "stack-cache")]
fn find_granting_cache(&mut self, access: AccessKind, tag: SbTag) -> Option<usize> {
// When the borrow stack is empty, there are no tags we could put into the cache that would
// be valid. Additionally, since lookups into the cache are a linear search it doesn't make
// sense to use the cache when it is no smaller than a search of the borrow stack itself.
if self.borrows.len() <= CACHE_LEN {
return None;
}
// Search the cache for the tag we're looking up
let cache_idx = self.cache.tags.iter().position(|t| *t == tag)?;
let stack_idx = self.cache.idx[cache_idx];
// If we found the tag, look up its position in the stack to see if it grants
// the required permission
if self.borrows[stack_idx].perm.grants(access) {
// If it does, and it's already in the most-recently-used position, move it
// there.
if cache_idx != 0 {
self.cache.add(stack_idx, tag);
}
Some(stack_idx)
} else {
// Tag is in the cache, but it doesn't grant the required permission
None
}
}
pub fn insert(&mut self, new_idx: usize, new: Item) {
self.borrows.insert(new_idx, new);
#[cfg(feature = "stack-cache")]
self.insert_cache(new_idx, new);
}
#[cfg(feature = "stack-cache")]
fn insert_cache(&mut self, new_idx: usize, new: Item) {
// Adjust the possibly-unique range if an insert occurs before or within it
if self.unique_range.start >= new_idx {
self.unique_range.start += 1;
}
if self.unique_range.end >= new_idx {
self.unique_range.end += 1;
}
if new.perm == Permission::Unique {
// Make sure the possibly-unique range contains the new borrow
self.unique_range.start = self.unique_range.start.min(new_idx);
self.unique_range.end = self.unique_range.end.max(new_idx + 1);
}
// The above insert changes the meaning of every index in the cache >= new_idx, so now
// we need to find every one of those indexes and increment it.
for idx in &mut self.cache.idx {
if *idx >= new_idx {
*idx += 1;
}
}
// This primes the cache for the next access, which is almost always the just-added tag.
self.cache.add(new_idx, new.tag);
#[cfg(feature = "expensive-debug-assertions")]
self.verify_cache_consistency();
}
/// Construct a new `Stack` using the passed `Item` as the base tag.
pub fn new(item: Item) -> Self {
Stack {
borrows: vec![item],
unknown_bottom: None,
#[cfg(feature = "stack-cache")]
cache: StackCache { idx: [0; CACHE_LEN], tags: [item.tag; CACHE_LEN] },
#[cfg(feature = "stack-cache")]
unique_range: if item.perm == Permission::Unique { 0..1 } else { 0..0 },
}
}
pub fn get(&self, idx: usize) -> Option<Item> {
self.borrows.get(idx).cloned()
}
#[allow(clippy::len_without_is_empty)] // Stacks are never empty
pub fn len(&self) -> usize {
self.borrows.len()
}
pub fn unknown_bottom(&self) -> Option<SbTag> {
self.unknown_bottom
}
pub fn set_unknown_bottom(&mut self, tag: SbTag) {
self.borrows.clear();
self.unknown_bottom = Some(tag);
}
/// Find all `Unique` elements in this borrow stack above `granting_idx`, pass a copy of them
/// to the `visitor`, then set their `Permission` to `Disabled`.
pub fn disable_uniques_starting_at<V: FnMut(Item) -> crate::InterpResult<'tcx>>(
&mut self,
disable_start: usize,
mut visitor: V,
) -> crate::InterpResult<'tcx> {
#[cfg(feature = "stack-cache")]
let unique_range = self.unique_range.clone();
#[cfg(not(feature = "stack-cache"))]
let unique_range = 0..self.len();
if disable_start <= unique_range.end {
// add 1 so we don't disable the granting item
let lower = unique_range.start.max(disable_start);
let upper = (unique_range.end + 1).min(self.borrows.len());
for item in &mut self.borrows[lower..upper] {
if item.perm == Permission::Unique {
log::trace!("access: disabling item {:?}", item);
visitor(*item)?;
item.perm = Permission::Disabled;
}
}
}
#[cfg(feature = "stack-cache")]
if disable_start < self.unique_range.start {
// We disabled all Unique items
self.unique_range.start = 0;
self.unique_range.end = 0;
} else {
// Truncate the range to disable_start. This is + 2 because we are only removing
// elements after disable_start, and this range does not include the end.
self.unique_range.end = self.unique_range.end.min(disable_start + 1);
}
#[cfg(feature = "expensive-debug-assertions")]
self.verify_cache_consistency();
Ok(())
}
/// Produces an iterator which iterates over `range` in reverse, and when dropped removes that
/// range of `Item`s from this `Stack`.
pub fn pop_items_after<V: FnMut(Item) -> crate::InterpResult<'tcx>>(
&mut self,
start: usize,
mut visitor: V,
) -> crate::InterpResult<'tcx> {
while self.borrows.len() > start {
let item = self.borrows.pop().unwrap();
visitor(item)?;
}
#[cfg(feature = "stack-cache")]
if !self.borrows.is_empty() {
// After we remove from the borrow stack, every aspect of our caching may be invalid, but it is
// also possible that the whole cache is still valid. So we call this method to repair what
// aspects of the cache are now invalid, instead of resetting the whole thing to a trivially
// valid default state.
let base_tag = self.borrows[0].tag;
let mut removed = 0;
let mut cursor = 0;
// Remove invalid entries from the cache by rotating them to the end of the cache, then
// keep track of how many invalid elements there are and overwrite them with the base tag.
// The base tag here serves as a harmless default value.
for _ in 0..CACHE_LEN - 1 {
if self.cache.idx[cursor] >= start {
self.cache.idx[cursor..CACHE_LEN - removed].rotate_left(1);
self.cache.tags[cursor..CACHE_LEN - removed].rotate_left(1);
removed += 1;
} else {
cursor += 1;
}
}
for i in CACHE_LEN - removed - 1..CACHE_LEN {
self.cache.idx[i] = 0;
self.cache.tags[i] = base_tag;
}
if start < self.unique_range.start.saturating_sub(1) {
// We removed all the Unique items
self.unique_range = 0..0;
} else {
// Ensure the range doesn't extend past the new top of the stack
self.unique_range.end = self.unique_range.end.min(start + 1);
}
} else {
self.unique_range = 0..0;
}
#[cfg(feature = "expensive-debug-assertions")]
self.verify_cache_consistency();
Ok(())
}
}

View File

@ -14,3 +14,7 @@ crossbeam = "0.8.1"
lazy_static = "1.4.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
[features]
# Doesn't do anything, but the miri script wants to pass the same flags to ui_test and miri itself
expensive-debug-assertions = []