Rollup merge of #37336 - michaelwoerister:debuginfo-type-ids, r=eddyb

debuginfo: Use TypeIdHasher for generating global debuginfo type IDs.

The only requirement for debuginfo type IDs is that they are globally unique. The `TypeIdHasher` (which is used for `std::intrinsic::type_id()` provides that, so we can get rid of some redundancy by re-using it for debuginfo. Values produced by the `TypeIdHasher` are also more stable than the current `UniqueTypeId` generation algorithm produces -- these incorporate the `NodeId`s, which is not good for incremental compilation.

@alexcrichton @eddyb : Could you take a look at the endianess adaptations that I made to the `TypeIdHasher`?

Also, are we sure that a 64 bit hash is wide enough for something that is supposed to be globally unique? For debuginfo I'm using 160 bits to make sure that we don't run into conflicts there.
This commit is contained in:
Jonathan Turner 2016-10-24 15:41:29 -07:00 committed by GitHub
commit 691ab948ce
2 changed files with 81 additions and 228 deletions

View File

@ -392,27 +392,30 @@ pub fn is_adt_dtorck(self, adt: ty::AdtDef) -> bool {
}
}
// When hashing a type this ends up affecting properties like symbol names. We
// want these symbol names to be calculated independent of other factors like
// what architecture you're compiling *from*.
//
// The hashing just uses the standard `Hash` trait, but the implementations of
// `Hash` for the `usize` and `isize` types are *not* architecture independent
// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
// `isize` completely when hashing. To ensure that these don't leak in we use a
// custom hasher implementation here which inflates the size of these to a `u64`
// and `i64`.
struct WidenUsizeHasher<H> {
/// When hashing a type this ends up affecting properties like symbol names. We
/// want these symbol names to be calculated independent of other factors like
/// what architecture you're compiling *from*.
///
/// The hashing just uses the standard `Hash` trait, but the implementations of
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
/// `isize` completely when hashing. To ensure that these don't leak in we use a
/// custom hasher implementation here which inflates the size of these to a `u64`
/// and `i64`.
///
/// The same goes for endianess: We always convert multi-byte integers to little
/// endian before hashing.
pub struct ArchIndependentHasher<H> {
inner: H,
}
impl<H> WidenUsizeHasher<H> {
fn new(inner: H) -> WidenUsizeHasher<H> {
WidenUsizeHasher { inner: inner }
impl<H> ArchIndependentHasher<H> {
pub fn new(inner: H) -> ArchIndependentHasher<H> {
ArchIndependentHasher { inner: inner }
}
}
impl<H: Hasher> Hasher for WidenUsizeHasher<H> {
impl<H: Hasher> Hasher for ArchIndependentHasher<H> {
fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes)
}
@ -425,44 +428,44 @@ fn write_u8(&mut self, i: u8) {
self.inner.write_u8(i)
}
fn write_u16(&mut self, i: u16) {
self.inner.write_u16(i)
self.inner.write_u16(i.to_le())
}
fn write_u32(&mut self, i: u32) {
self.inner.write_u32(i)
self.inner.write_u32(i.to_le())
}
fn write_u64(&mut self, i: u64) {
self.inner.write_u64(i)
self.inner.write_u64(i.to_le())
}
fn write_usize(&mut self, i: usize) {
self.inner.write_u64(i as u64)
self.inner.write_u64((i as u64).to_le())
}
fn write_i8(&mut self, i: i8) {
self.inner.write_i8(i)
}
fn write_i16(&mut self, i: i16) {
self.inner.write_i16(i)
self.inner.write_i16(i.to_le())
}
fn write_i32(&mut self, i: i32) {
self.inner.write_i32(i)
self.inner.write_i32(i.to_le())
}
fn write_i64(&mut self, i: i64) {
self.inner.write_i64(i)
self.inner.write_i64(i.to_le())
}
fn write_isize(&mut self, i: isize) {
self.inner.write_i64(i as i64)
self.inner.write_i64((i as i64).to_le())
}
}
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> {
tcx: TyCtxt<'a, 'gcx, 'tcx>,
state: WidenUsizeHasher<H>,
state: ArchIndependentHasher<H>,
}
impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self {
TypeIdHasher {
tcx: tcx,
state: WidenUsizeHasher::new(state),
state: ArchIndependentHasher::new(state),
}
}
@ -493,6 +496,10 @@ fn def_id(&mut self, did: DefId) {
pub fn def_path(&mut self, def_path: &ast_map::DefPath) {
def_path.deterministic_hash_to(self.tcx, &mut self.state);
}
pub fn into_inner(self) -> H {
self.state.inner
}
}
impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> {

View File

@ -16,7 +16,7 @@
use super::utils::{debug_context, DIB, span_start, bytes_to_bits, size_and_align_of,
get_namespace_and_span_for_item, create_DIArray, is_node_local_to_unit};
use super::namespace::mangled_name_of_item;
use super::type_names::{compute_debuginfo_type_name, push_debuginfo_type_name};
use super::type_names::compute_debuginfo_type_name;
use super::{CrateDebugContext};
use context::SharedCrateContext;
use session::Session;
@ -26,8 +26,11 @@
use rustc::hir::def::CtorKind;
use rustc::hir::def_id::DefId;
use rustc::ty::fold::TypeVisitor;
use rustc::ty::subst::Substs;
use rustc::ty::util::TypeIdHasher;
use rustc::hir;
use rustc_data_structures::blake2b;
use {type_of, machine, monomorphize};
use common::CrateContext;
use type_::Type;
@ -38,6 +41,7 @@
use libc::{c_uint, c_longlong};
use std::ffi::CString;
use std::fmt::Write;
use std::path::Path;
use std::ptr;
use std::rc::Rc;
@ -46,6 +50,7 @@
use syntax::parse::token;
use syntax_pos::{self, Span};
// From DWARF 5.
// See http://www.dwarfstd.org/ShowIssue.php?issue=140129.1
const DW_LANG_RUST: c_uint = 0x1c;
@ -138,219 +143,58 @@ fn get_unique_type_id_as_string(&self, unique_type_id: UniqueTypeId) -> Rc<str>
// ID will be generated and stored for later lookup.
fn get_unique_type_id_of_type<'a>(&mut self, cx: &CrateContext<'a, 'tcx>,
type_: Ty<'tcx>) -> UniqueTypeId {
// basic type -> {:name of the type:}
// tuple -> {tuple_(:param-uid:)*}
// struct -> {struct_:svh: / :node-id:_<(:param-uid:),*> }
// enum -> {enum_:svh: / :node-id:_<(:param-uid:),*> }
// enum variant -> {variant_:variant-name:_:enum-uid:}
// reference (&) -> {& :pointee-uid:}
// mut reference (&mut) -> {&mut :pointee-uid:}
// ptr (*) -> {* :pointee-uid:}
// mut ptr (*mut) -> {*mut :pointee-uid:}
// unique ptr (box) -> {box :pointee-uid:}
// @-ptr (@) -> {@ :pointee-uid:}
// sized vec ([T; x]) -> {[:size:] :element-uid:}
// unsized vec ([T]) -> {[] :element-uid:}
// trait (T) -> {trait_:svh: / :node-id:_<(:param-uid:),*> }
// closure -> {<unsafe_> <once_> :store-sigil: |(:param-uid:),* <,_...>| -> \
// :return-type-uid: : (:bounds:)*}
// function -> {<unsafe_> <abi_> fn( (:param-uid:)* <,_...> ) -> \
// :return-type-uid:}
// Let's see if we already have something in the cache
match self.type_to_unique_id.get(&type_).cloned() {
Some(unique_type_id) => return unique_type_id,
None => { /* generate one */}
};
let mut unique_type_id = String::with_capacity(256);
unique_type_id.push('{');
let mut type_id_hasher = TypeIdHasher::new(cx.tcx(),
DebugInfoTypeIdHasher::new());
type_id_hasher.visit_ty(type_);
let hash = type_id_hasher.into_inner().into_hash();
match type_.sty {
ty::TyNever |
ty::TyBool |
ty::TyChar |
ty::TyStr |
ty::TyInt(_) |
ty::TyUint(_) |
ty::TyFloat(_) => {
push_debuginfo_type_name(cx, type_, false, &mut unique_type_id);
},
ty::TyAdt(def, substs) => {
unique_type_id.push_str(&(String::from(def.descr()) + " "));
from_def_id_and_substs(self, cx, def.did, substs, &mut unique_type_id);
}
ty::TyTuple(component_types) if component_types.is_empty() => {
push_debuginfo_type_name(cx, type_, false, &mut unique_type_id);
},
ty::TyTuple(component_types) => {
unique_type_id.push_str("tuple ");
for &component_type in component_types {
let component_type_id =
self.get_unique_type_id_of_type(cx, component_type);
let component_type_id =
self.get_unique_type_id_as_string(component_type_id);
unique_type_id.push_str(&component_type_id[..]);
}
},
ty::TyBox(inner_type) => {
unique_type_id.push_str("box ");
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyRawPtr(ty::TypeAndMut { ty: inner_type, mutbl } ) => {
unique_type_id.push('*');
if mutbl == hir::MutMutable {
unique_type_id.push_str("mut");
}
let mut unique_type_id = String::with_capacity(TYPE_ID_HASH_LENGTH * 2);
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyRef(_, ty::TypeAndMut { ty: inner_type, mutbl }) => {
unique_type_id.push('&');
if mutbl == hir::MutMutable {
unique_type_id.push_str("mut");
}
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyArray(inner_type, len) => {
unique_type_id.push_str(&format!("[{}]", len));
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TySlice(inner_type) => {
unique_type_id.push_str("[]");
let inner_type_id = self.get_unique_type_id_of_type(cx, inner_type);
let inner_type_id = self.get_unique_type_id_as_string(inner_type_id);
unique_type_id.push_str(&inner_type_id[..]);
},
ty::TyTrait(ref trait_data) => {
unique_type_id.push_str("trait ");
let principal = cx.tcx().erase_late_bound_regions_and_normalize(
&trait_data.principal);
from_def_id_and_substs(self,
cx,
principal.def_id,
principal.substs,
&mut unique_type_id);
},
ty::TyFnDef(.., &ty::BareFnTy{ unsafety, abi, ref sig } ) |
ty::TyFnPtr(&ty::BareFnTy{ unsafety, abi, ref sig } ) => {
if unsafety == hir::Unsafety::Unsafe {
unique_type_id.push_str("unsafe ");
}
unique_type_id.push_str(abi.name());
unique_type_id.push_str(" fn(");
let sig = cx.tcx().erase_late_bound_regions_and_normalize(sig);
for &parameter_type in &sig.inputs {
let parameter_type_id =
self.get_unique_type_id_of_type(cx, parameter_type);
let parameter_type_id =
self.get_unique_type_id_as_string(parameter_type_id);
unique_type_id.push_str(&parameter_type_id[..]);
unique_type_id.push(',');
}
if sig.variadic {
unique_type_id.push_str("...");
}
unique_type_id.push_str(")->");
let return_type_id = self.get_unique_type_id_of_type(cx, sig.output);
let return_type_id = self.get_unique_type_id_as_string(return_type_id);
unique_type_id.push_str(&return_type_id[..]);
},
ty::TyClosure(_, substs) if substs.upvar_tys.is_empty() => {
push_debuginfo_type_name(cx, type_, false, &mut unique_type_id);
},
ty::TyClosure(_, substs) => {
unique_type_id.push_str("closure ");
for upvar_type in substs.upvar_tys {
let upvar_type_id =
self.get_unique_type_id_of_type(cx, upvar_type);
let upvar_type_id =
self.get_unique_type_id_as_string(upvar_type_id);
unique_type_id.push_str(&upvar_type_id[..]);
}
},
_ => {
bug!("get_unique_type_id_of_type() - unexpected type: {:?}",
type_)
}
};
unique_type_id.push('}');
// Trim to size before storing permanently
unique_type_id.shrink_to_fit();
for byte in hash.into_iter() {
write!(&mut unique_type_id, "{:x}", byte).unwrap();
}
let key = self.unique_id_interner.intern(&unique_type_id);
self.type_to_unique_id.insert(type_, UniqueTypeId(key));
return UniqueTypeId(key);
fn from_def_id_and_substs<'a, 'tcx>(type_map: &mut TypeMap<'tcx>,
cx: &CrateContext<'a, 'tcx>,
def_id: DefId,
substs: &Substs<'tcx>,
output: &mut String) {
// First, find out the 'real' def_id of the type. Items inlined from
// other crates have to be mapped back to their source.
let def_id = if let Some(node_id) = cx.tcx().map.as_local_node_id(def_id) {
if cx.tcx().map.is_inlined_node_id(node_id) {
// The given def_id identifies the inlined copy of a
// type definition, let's take the source of the copy.
cx.defid_for_inlined_node(node_id).unwrap()
} else {
def_id
// The hasher we are using to generate the UniqueTypeId. We want
// something that provides more than the 64 bits of the DefaultHasher.
const TYPE_ID_HASH_LENGTH: usize = 20;
struct DebugInfoTypeIdHasher {
state: blake2b::Blake2bCtx
}
impl ::std::hash::Hasher for DebugInfoTypeIdHasher {
fn finish(&self) -> u64 {
unimplemented!()
}
#[inline]
fn write(&mut self, bytes: &[u8]) {
blake2b::blake2b_update(&mut self.state, bytes);
}
}
impl DebugInfoTypeIdHasher {
fn new() -> DebugInfoTypeIdHasher {
DebugInfoTypeIdHasher {
state: blake2b::blake2b_new(TYPE_ID_HASH_LENGTH, &[])
}
} else {
def_id
};
}
// Get the crate name/disambiguator as first part of the identifier.
let crate_name = if def_id.is_local() {
cx.tcx().crate_name.clone()
} else {
cx.sess().cstore.original_crate_name(def_id.krate)
};
let crate_disambiguator = cx.tcx().crate_disambiguator(def_id.krate);
output.push_str(&crate_name[..]);
output.push_str("/");
output.push_str(&crate_disambiguator[..]);
output.push_str("/");
// Add the def-index as the second part
output.push_str(&format!("{:x}", def_id.index.as_usize()));
if substs.types().next().is_some() {
output.push('<');
for type_parameter in substs.types() {
let param_type_id =
type_map.get_unique_type_id_of_type(cx, type_parameter);
let param_type_id =
type_map.get_unique_type_id_as_string(param_type_id);
output.push_str(&param_type_id[..]);
output.push(',');
}
output.push('>');
fn into_hash(self) -> [u8; TYPE_ID_HASH_LENGTH] {
let mut hash = [0u8; TYPE_ID_HASH_LENGTH];
blake2b::blake2b_final(self.state, &mut hash);
hash
}
}
}
@ -1927,15 +1771,17 @@ pub fn create_global_var_metadata(cx: &CrateContext,
return;
}
let tcx = cx.tcx();
// Don't create debuginfo for globals inlined from other crates. The other
// crate should already contain debuginfo for it. More importantly, the
// global might not even exist in un-inlined form anywhere which would lead
// to a linker errors.
if cx.tcx().map.is_inlined_node_id(node_id) {
if tcx.map.is_inlined_node_id(node_id) {
return;
}
let node_def_id = cx.tcx().map.local_def_id(node_id);
let node_def_id = tcx.map.local_def_id(node_id);
let (var_scope, span) = get_namespace_and_span_for_item(cx, node_def_id);
let (file_metadata, line_number) = if span != syntax_pos::DUMMY_SP {
@ -1946,9 +1792,9 @@ pub fn create_global_var_metadata(cx: &CrateContext,
};
let is_local_to_unit = is_node_local_to_unit(cx, node_id);
let variable_type = cx.tcx().node_id_to_type(node_id);
let variable_type = tcx.erase_regions(&tcx.node_id_to_type(node_id));
let type_metadata = type_metadata(cx, variable_type, span);
let var_name = cx.tcx().item_name(node_def_id).to_string();
let var_name = tcx.item_name(node_def_id).to_string();
let linkage_name = mangled_name_of_item(cx, node_def_id, "");
let var_name = CString::new(var_name).unwrap();