load/save hashes of metadata

This commit reorganizes how the persist code treats hashing. The idea is
that each crate saves a file containing hashes representing the metadata
for each item X. When we see a read from `MetaData(X)`, we can load this
hash up (if we don't find a file for that crate, we just use the SVH for
the entire crate).

To compute the hash for `MetaData(Y)`, where Y is some local item, we
examine all the predecessors of the `MetaData(Y)` node and hash their
hashes together.
This commit is contained in:
Niko Matsakis 2016-05-06 15:09:31 -04:00
parent b01919a144
commit 3a2edd7e61
7 changed files with 255 additions and 98 deletions

@ -11,6 +11,7 @@
//! The data that we will serialize and deserialize.
use rustc::dep_graph::DepNode;
use rustc::hir::def_id::DefIndex;
use super::directory::DefPathIndex;
@ -34,20 +35,11 @@ pub struct SerializedDepGraph {
/// compare them against the hashes we see at that time, which
/// will tell us what has changed, either in this crate or in some
/// crate that we depend on.
pub hashes: Vec<SerializedHash>,
}
/// Data for use when downstream crates get recompiled.
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct SerializedMetadataHashes {
/// For each def-id defined in this crate that appears in the
/// metadata, we hash all the inputs that were used when producing
/// the metadata. We save this after compilation is done. Then,
/// when some downstream crate is being recompiled, it can compare
/// the hashes we saved against the hashes that it saw from
/// before; this will tell it which of the items in this crate
/// changed, which in turn implies what items in the downstream
/// crate need to be recompiled.
///
/// Because they will be reloaded, we don't store the DefId (which
/// will be different when we next compile) related to each node,
/// but rather the `DefPathIndex`. This can then be retraced
/// to find the current def-id.
pub hashes: Vec<SerializedHash>,
}
@ -62,3 +54,38 @@ pub struct SerializedHash {
/// the hash itself, computed by `calculate_item_hash`
pub hash: u64,
}
/// Data for use when downstream crates get recompiled.
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct SerializedMetadataHashes {
/// For each def-id defined in this crate that appears in the
/// metadata, we hash all the inputs that were used when producing
/// the metadata. We save this after compilation is done. Then,
/// when some downstream crate is being recompiled, it can compare
/// the hashes we saved against the hashes that it saw from
/// before; this will tell it which of the items in this crate
/// changed, which in turn implies what items in the downstream
/// crate need to be recompiled.
///
/// Note that we store the def-ids here. This is because we don't
/// reload this file when we recompile this crate, we will just
/// regenerate it completely with the current hashes and new def-ids.
///
/// Then downstream creates will load up their
/// `SerializedDepGraph`, which may contain `MetaData(X)` nodes
/// where `X` refers to some item in this crate. That `X` will be
/// a `DefPathIndex` that gets retracted to the current `DefId`
/// (matching the one found in this structure).
pub hashes: Vec<SerializedMetadataHash>,
}
/// The hash for some metadata that (when saving) will be exported
/// from this crate, or which (when importing) was exported by an
/// upstream crate.
#[derive(Debug, RustcEncodable, RustcDecodable)]
pub struct SerializedMetadataHash {
pub def_index: DefIndex,
/// the hash itself, computed by `calculate_item_hash`
pub hash: u64,
}

@ -64,7 +64,7 @@ impl RetracedDefIdDirectory {
pub struct DefIdDirectoryBuilder<'a,'tcx:'a> {
tcx: TyCtxt<'a, 'tcx, 'tcx>,
hash: DefIdMap<Option<DefPathIndex>>,
hash: DefIdMap<DefPathIndex>,
directory: DefIdDirectory,
}
@ -77,29 +77,22 @@ impl<'a,'tcx> DefIdDirectoryBuilder<'a,'tcx> {
}
}
pub fn add(&mut self, def_id: DefId) -> Option<DefPathIndex> {
if !def_id.is_local() {
// FIXME(#32015) clarify story about cross-crate dep tracking
return None;
}
pub fn add(&mut self, def_id: DefId) -> DefPathIndex {
debug!("DefIdDirectoryBuilder: def_id={:?}", def_id);
let tcx = self.tcx;
let paths = &mut self.directory.paths;
self.hash.entry(def_id)
.or_insert_with(|| {
let def_path = tcx.def_path(def_id);
if !def_path.is_local() {
return None;
}
let index = paths.len() as u32;
paths.push(def_path);
Some(DefPathIndex { index: index })
DefPathIndex { index: index }
})
.clone()
}
pub fn map(&mut self, node: DepNode<DefId>) -> Option<DepNode<DefPathIndex>> {
node.map_def(|&def_id| self.add(def_id))
pub fn map(&mut self, node: DepNode<DefId>) -> DepNode<DefPathIndex> {
node.map_def(|&def_id| Some(self.add(def_id))).unwrap()
}
pub fn into_directory(self) -> DefIdDirectory {

@ -0,0 +1,158 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use calculate_svh::SvhCalculate;
use rbml::Error;
use rbml::opaque::Decoder;
use rustc::dep_graph::DepNode;
use rustc::hir::def_id::DefId;
use rustc::hir::svh::Svh;
use rustc::ty::TyCtxt;
use rustc_data_structures::fnv::FnvHashMap;
use rustc_serialize::Decodable;
use std::io::{ErrorKind, Read};
use std::fs::File;
use syntax::ast;
use super::data::*;
use super::util::*;
pub struct HashContext<'a, 'tcx: 'a> {
pub tcx: TyCtxt<'a, 'tcx, 'tcx>,
item_metadata_hashes: FnvHashMap<DefId, u64>,
crate_hashes: FnvHashMap<ast::CrateNum, Svh>,
}
impl<'a, 'tcx> HashContext<'a, 'tcx> {
pub fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self {
HashContext {
tcx: tcx,
item_metadata_hashes: FnvHashMap(),
crate_hashes: FnvHashMap(),
}
}
pub fn hash(&mut self, dep_node: DepNode<DefId>) -> Option<u64> {
match dep_node {
// HIR nodes (which always come from our crate) are an input:
DepNode::Hir(def_id) => {
assert!(def_id.is_local());
Some(self.hir_hash(def_id))
}
// MetaData from other crates is an *input* to us.
// MetaData nodes from *our* crates are an *output*; we
// don't hash them, but we do compute a hash for them and
// save it for others to use.
DepNode::MetaData(def_id) if !def_id.is_local() => {
Some(self.metadata_hash(def_id))
}
_ => {
// Other kinds of nodes represent computed by-products
// that we don't hash directly; instead, they should
// have some transitive dependency on a Hir or
// MetaData node, so we'll just hash that
None
}
}
}
fn hir_hash(&mut self, def_id: DefId) -> u64 {
assert!(def_id.is_local());
// FIXME(#32753) -- should we use a distinct hash here
self.tcx.calculate_item_hash(def_id)
}
fn metadata_hash(&mut self, def_id: DefId) -> u64 {
debug!("metadata_hash(def_id={:?})", def_id);
assert!(!def_id.is_local());
loop {
// check whether we have a result cached for this def-id
if let Some(&hash) = self.item_metadata_hashes.get(&def_id) {
debug!("metadata_hash: def_id={:?} hash={:?}", def_id, hash);
return hash;
}
// check whether we did not find detailed metadata for this
// krate; in that case, we just use the krate's overall hash
if let Some(&hash) = self.crate_hashes.get(&def_id.krate) {
debug!("metadata_hash: def_id={:?} crate_hash={:?}", def_id, hash);
return hash.as_u64();
}
// otherwise, load the data and repeat.
self.load_data(def_id.krate);
assert!(self.crate_hashes.contains_key(&def_id.krate));
}
}
fn load_data(&mut self, cnum: ast::CrateNum) {
debug!("load_data(cnum={})", cnum);
let svh = self.tcx.sess.cstore.crate_hash(cnum);
let old = self.crate_hashes.insert(cnum, svh);
debug!("load_data: svh={}", svh);
assert!(old.is_none(), "loaded data for crate {:?} twice", cnum);
if let Some(path) = metadata_hash_path(self.tcx, cnum) {
debug!("load_data: path={:?}", path);
let mut data = vec![];
match
File::open(&path)
.and_then(|mut file| file.read_to_end(&mut data))
{
Ok(_) => {
match self.load_from_data(cnum, &data) {
Ok(()) => { }
Err(err) => {
bug!("decoding error in dep-graph from `{}`: {}",
path.display(), err);
}
}
}
Err(err) => {
match err.kind() {
ErrorKind::NotFound => {
// If the file is not found, that's ok.
}
_ => {
self.tcx.sess.err(
&format!("could not load dep information from `{}`: {}",
path.display(), err));
return;
}
}
}
}
}
}
fn load_from_data(&mut self, cnum: ast::CrateNum, data: &[u8]) -> Result<(), Error> {
debug!("load_from_data(cnum={})", cnum);
// Load up the hashes for the def-ids from this crate.
let mut decoder = Decoder::new(data, 0);
let serialized_hashes = try!(SerializedMetadataHashes::decode(&mut decoder));
for serialized_hash in serialized_hashes.hashes {
// the hashes are stored with just a def-index, which is
// always relative to the old crate; convert that to use
// our internal crate number
let def_id = DefId { krate: cnum, index: serialized_hash.def_index };
// record the hash for this dep-node
let old = self.item_metadata_hashes.insert(def_id, serialized_hash.hash);
debug!("load_from_data: def_id={:?} hash={}", def_id, serialized_hash.hash);
assert!(old.is_none(), "already have hash for {:?}", def_id);
}
Ok(())
}
}

@ -24,6 +24,7 @@ use std::path::Path;
use super::data::*;
use super::directory::*;
use super::dirty_clean;
use super::hash::*;
use super::util::*;
type DirtyNodes = FnvHashSet<DepNode<DefId>>;
@ -133,13 +134,13 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
hashes: &[SerializedHash],
retraced: &RetracedDefIdDirectory)
-> DirtyNodes {
let mut hcx = HashContext::new(tcx);
let mut items_removed = false;
let mut dirty_nodes = FnvHashSet();
for hash in hashes {
match hash.node.map_def(|&i| retraced.def_id(i)) {
Some(dep_node) => {
// FIXME(#32753) -- should we use a distinct hash here
let current_hash = dep_node.hash(tcx).unwrap();
let current_hash = hcx.hash(dep_node).unwrap();
debug!("initial_dirty_nodes: hash of {:?} is {:?}, was {:?}",
dep_node, current_hash, hash.hash);
if current_hash != hash.hash {

@ -15,6 +15,7 @@
mod data;
mod directory;
mod dirty_clean;
mod hash;
mod load;
mod save;
mod util;

@ -20,18 +20,23 @@ use std::path::PathBuf;
use super::data::*;
use super::directory::*;
use super::hash::*;
use super::util::*;
pub fn save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) {
let _ignore = tcx.dep_graph.in_ignore();
save_in(tcx, dep_graph_path(tcx), encode_dep_graph);
save_in(tcx, metadata_hash_path(tcx, LOCAL_CRATE), encode_metadata_hashes);
let mut hcx = HashContext::new(tcx);
save_in(&mut hcx, dep_graph_path(tcx), encode_dep_graph);
save_in(&mut hcx, metadata_hash_path(tcx, LOCAL_CRATE), encode_metadata_hashes);
}
fn save_in<'a,'tcx,F>(tcx: TyCtxt<'a, 'tcx, 'tcx>, opt_path_buf: Option<PathBuf>, encode: F)
where F: FnOnce(TyCtxt<'a, 'tcx, 'tcx>, &mut Encoder) -> io::Result<()>
fn save_in<'a, 'tcx, F>(hcx: &mut HashContext<'a, 'tcx>,
opt_path_buf: Option<PathBuf>,
encode: F)
where F: FnOnce(&mut HashContext<'a, 'tcx>, &mut Encoder) -> io::Result<()>
{
let tcx = hcx.tcx;
let path_buf = match opt_path_buf {
Some(p) => p,
None => return
@ -54,7 +59,7 @@ fn save_in<'a,'tcx,F>(tcx: TyCtxt<'a, 'tcx, 'tcx>, opt_path_buf: Option<PathBuf>
// generate the data in a memory buffer
let mut wr = Cursor::new(Vec::new());
match encode(tcx, &mut Encoder::new(&mut wr)) {
match encode(hcx, &mut Encoder::new(&mut wr)) {
Ok(()) => { }
Err(err) => {
tcx.sess.err(
@ -80,9 +85,11 @@ fn save_in<'a,'tcx,F>(tcx: TyCtxt<'a, 'tcx, 'tcx>, opt_path_buf: Option<PathBuf>
}
}
pub fn encode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
pub fn encode_dep_graph<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
encoder: &mut Encoder)
-> io::Result<()> {
-> io::Result<()>
{
let tcx = hcx.tcx;
let query = tcx.dep_graph.query();
let mut builder = DefIdDirectoryBuilder::new(tcx);
@ -92,29 +99,24 @@ pub fn encode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
query.nodes()
.into_iter()
.filter_map(|dep_node| {
dep_node.hash(tcx)
.map(|hash| {
let node = builder.map(dep_node).unwrap();
SerializedHash { node: node, hash: hash }
})
hcx.hash(dep_node)
.map(|hash| {
let node = builder.map(dep_node);
SerializedHash { node: node, hash: hash }
})
})
.collect();
// Create the serialized dep-graph, dropping nodes that are
// from other crates or from inlined items.
//
// FIXME(#32015) fix handling of other crates
// Create the serialized dep-graph.
let graph = SerializedDepGraph {
nodes: query.nodes().into_iter()
.flat_map(|node| builder.map(node))
.map(|node| builder.map(node))
.collect(),
edges: query.edges().into_iter()
.flat_map(|(source_node, target_node)| {
builder.map(source_node)
.and_then(|source| {
builder.map(target_node)
.map(|target| (source, target))
})
.map(|(source_node, target_node)| {
let source = builder.map(source_node);
let target = builder.map(target_node);
(source, target)
})
.collect(),
hashes: hashes,
@ -130,14 +132,13 @@ pub fn encode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
Ok(())
}
pub fn encode_metadata_hashes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
pub fn encode_metadata_hashes<'a, 'tcx>(hcx: &mut HashContext<'a, 'tcx>,
encoder: &mut Encoder)
-> io::Result<()>
{
let tcx = hcx.tcx;
let query = tcx.dep_graph.query();
let mut builder = DefIdDirectoryBuilder::new(tcx);
let serialized_hashes = {
// Identify the `MetaData(X)` nodes where `X` is local. These are
// the metadata items we export. Downstream crates will want to
@ -152,32 +153,31 @@ pub fn encode_metadata_hashes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
});
// To create the hash for each item `X`, we don't hash the raw
// bytes of the metadata (though in principle we could). Instead,
// we walk the predecessors of `MetaData(X)` from the
// dep-graph. This corresponds to all the inputs that were read to
// construct the metadata. To create the hash for the metadata, we
// hash (the hash of) all of those inputs.
// bytes of the metadata (though in principle we
// could). Instead, we walk the predecessors of `MetaData(X)`
// from the dep-graph. This corresponds to all the inputs that
// were read to construct the metadata. To create the hash for
// the metadata, we hash (the hash of) all of those inputs.
let hashes =
meta_data_def_ids
.map(|def_id| {
assert!(def_id.is_local());
let dep_node = DepNode::MetaData(def_id);
let mut state = SipHasher::new();
for node in query.transitive_predecessors(DepNode::MetaData(def_id)) {
if let Some(hash) = node.hash(tcx) {
debug!("save: computing metadata hash for {:?}", dep_node);
for node in query.transitive_predecessors(dep_node) {
if let Some(hash) = hcx.hash(node) {
debug!("save: predecessor {:?} has hash {}", node, hash);
state.write_u64(hash.to_le());
} else {
debug!("save: predecessor {:?} cannot be hashed", node);
}
}
(def_id, state.finish())
});
// Now create the `SerializedHash` data structures that others
// will load later.
let hashes =
hashes
.map(|(def_id, hash)| {
let index = builder.add(def_id).unwrap();
SerializedHash {
node: DepNode::MetaData(index),
hash: hash
let hash = state.finish();
debug!("save: metadata hash for {:?} is {}", dep_node, hash);
SerializedMetadataHash {
def_index: def_id.index,
hash: hash,
}
});
@ -188,8 +188,6 @@ pub fn encode_metadata_hashes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
};
// Encode everything.
let directory = builder.into_directory();
try!(directory.encode(encoder));
try!(serialized_hashes.encode(encoder));
Ok(())

@ -8,9 +8,6 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use calculate_svh::SvhCalculate;
use rustc::dep_graph::DepNode;
use rustc::hir::def_id::DefId;
use rustc::middle::cstore::LOCAL_CRATE;
use rustc::ty::TyCtxt;
@ -72,21 +69,3 @@ fn create_dir_racy(path: &Path) -> io::Result<()> {
}
}
pub trait DepNodeHash {
/// Hash this dep-node, if it is of the kind that we know how to
/// hash.
fn hash<'a, 'tcx>(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<u64>;
}
impl DepNodeHash for DepNode<DefId> {
fn hash<'a, 'tcx>(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<u64> {
match *self {
DepNode::Hir(def_id) => {
// FIXME(#32753) -- should we use a distinct hash here
assert!(def_id.is_local());
Some(tcx.calculate_item_hash(def_id))
}
_ => None
}
}
}