From b74219964c1e86129cffc952a5e2ed3c03f052c1 Mon Sep 17 00:00:00 2001 From: Ariel Ben-Yehuda Date: Thu, 17 Sep 2015 22:12:39 +0300 Subject: [PATCH] improve the tyencode abbrev format 3% win on libcore 528828 liballoc-bb943c5a.rlib 1425126 liballoc_jemalloc-bb943c5a.rlib 10090 liballoc_system-bb943c5a.rlib 144904 libarena-bb943c5a.rlib 3773896 libcollections-bb943c5a.rlib 17075242 libcore-bb943c5a.rlib 195770 libflate-bb943c5a.rlib 234702 libfmt_macros-bb943c5a.rlib 536342 libgetopts-bb943c5a.rlib 212028 libgraphviz-bb943c5a.rlib 397068 liblibc-bb943c5a.rlib 185038 liblog-bb943c5a.rlib 680782 librand-bb943c5a.rlib 577574 librbml-bb943c5a.rlib 1381992 librustc_back-bb943c5a.rlib 37554736 librustc-bb943c5a.rlib 12826 librustc_bitflags-bb943c5a.rlib 2257392 librustc_borrowck-bb943c5a.rlib 533858 librustc_data_structures-bb943c5a.rlib 9338878 librustc_driver-bb943c5a.rlib 8960016 librustc_front-bb943c5a.rlib 1594212 librustc_lint-bb943c5a.rlib 79159342 librustc_llvm-bb943c5a.rlib 4590656 librustc_mir-bb943c5a.rlib 3529292 librustc_platform_intrinsics-bb943c5a.rlib 590688 librustc_privacy-bb943c5a.rlib 3084134 librustc_resolve-bb943c5a.rlib 14032890 librustc_trans-bb943c5a.rlib 11833852 librustc_typeck-bb943c5a.rlib 1641496 librustc_unicode-bb943c5a.rlib 15611582 librustdoc-bb943c5a.rlib 2693764 libserialize-bb943c5a.rlib 8266920 libstd-bb943c5a.rlib 29573790 libsyntax-bb943c5a.rlib 895484 libterm-bb943c5a.rlib --- src/librbml/lib.rs | 2 +- src/librustc/metadata/encoder.rs | 4 ++-- src/librustc/metadata/tydecode.rs | 26 ++++++++------------------ src/librustc/metadata/tyencode.rs | 29 +++++++++++++---------------- src/librustc/middle/ty/mod.rs | 1 - src/librustc_trans/back/link.rs | 2 +- 6 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/librbml/lib.rs b/src/librbml/lib.rs index dbd6ba700fa..4f7bbe9e027 100644 --- a/src/librbml/lib.rs +++ b/src/librbml/lib.rs @@ -914,7 +914,7 @@ pub mod writer { } } - fn write_vuint(w: &mut W, n: usize) -> EncodeResult { + pub fn write_vuint(w: &mut W, n: usize) -> EncodeResult { if n < 0x7f { return write_sized_vuint(w, n, 1); } if n < 0x4000 { return write_sized_vuint(w, n, 2); } if n < 0x200000 { return write_sized_vuint(w, n, 3); } diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index fd10639fbc7..822887a9c12 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -2150,7 +2150,7 @@ fn encode_metadata_inner(wr: &mut Cursor>, } // Get the encoded string for a type -pub fn encoded_ty<'tcx>(tcx: &ty::ctxt<'tcx>, t: Ty<'tcx>) -> String { +pub fn encoded_ty<'tcx>(tcx: &ty::ctxt<'tcx>, t: Ty<'tcx>) -> Vec { let mut wr = Cursor::new(Vec::new()); tyencode::enc_ty(&mut Encoder::new(&mut wr), &tyencode::ctxt { diag: tcx.sess.diagnostic(), @@ -2158,5 +2158,5 @@ pub fn encoded_ty<'tcx>(tcx: &ty::ctxt<'tcx>, t: Ty<'tcx>) -> String { tcx: tcx, abbrevs: &RefCell::new(FnvHashMap()) }, t); - String::from_utf8(wr.into_inner()).unwrap() + wr.into_inner() } diff --git a/src/librustc/metadata/tydecode.rs b/src/librustc/metadata/tydecode.rs index abc0429e7d2..8e5b09b81c7 100644 --- a/src/librustc/metadata/tydecode.rs +++ b/src/librustc/metadata/tydecode.rs @@ -125,6 +125,12 @@ impl<'a,'tcx> TyDecoder<'a,'tcx> { return &self.data[start_pos..end_pos]; } + fn parse_vuint(&mut self) -> usize { + let res = rbml::reader::vuint_at(self.data, self.pos).unwrap(); + self.pos = res.next; + res.val + } + fn parse_name(&mut self, last: char) -> ast::Name { fn is_last(b: char, c: char) -> bool { return c == b; } let bytes = self.scan(|a| is_last(last, a)); @@ -405,11 +411,8 @@ impl<'a,'tcx> TyDecoder<'a,'tcx> { // we return it (modulo closure types, see below). But if not, then we // jump to offset 123 and read the type from there. - let pos = self.parse_hex(); - assert_eq!(self.next(), ':'); - let len = self.parse_hex(); - assert_eq!(self.next(), '#'); - let key = ty::CReaderCacheKey {cnum: self.krate, pos: pos, len: len }; + let pos = self.parse_vuint(); + let key = ty::CReaderCacheKey { cnum: self.krate, pos: pos }; match tcx.rcache.borrow().get(&key).cloned() { Some(tt) => { // If there is a closure buried in the type some where, then we @@ -508,19 +511,6 @@ impl<'a,'tcx> TyDecoder<'a,'tcx> { subst::ParamSpace::from_uint(self.parse_uint()) } - fn parse_hex(&mut self) -> usize { - let mut n = 0; - loop { - let cur = self.peek(); - if (cur < '0' || cur > '9') && (cur < 'a' || cur > 'f') { return n; } - self.pos = self.pos + 1; - n *= 16; - if '0' <= cur && cur <= '9' { - n += (cur as usize) - ('0' as usize); - } else { n += 10 + (cur as usize) - ('a' as usize); } - }; - } - fn parse_abi_set(&mut self) -> abi::Abi { assert_eq!(self.next(), '['); let bytes = self.scan(|c| c == ']'); diff --git a/src/librustc/metadata/tyencode.rs b/src/librustc/metadata/tyencode.rs index 489d213879c..4a9257b60c4 100644 --- a/src/librustc/metadata/tyencode.rs +++ b/src/librustc/metadata/tyencode.rs @@ -14,7 +14,7 @@ #![allow(non_camel_case_types)] use std::cell::RefCell; -use std::str; +use std::io::Cursor; use std::io::prelude::*; use middle::def_id::DefId; @@ -31,7 +31,7 @@ use syntax::abi::Abi; use syntax::ast; use syntax::diagnostic::SpanHandler; -use rbml::writer::Encoder; +use rbml::writer::{self, Encoder}; macro_rules! mywrite { ($w:expr, $($arg:tt)*) => ({ write!($w.writer, $($arg)*); }) } @@ -48,14 +48,14 @@ pub struct ctxt<'a, 'tcx: 'a> { // Extra parameters are for converting to/from def_ids in the string rep. // Whatever format you choose should not contain pipe characters. pub struct ty_abbrev { - s: String + s: Vec } pub type abbrev_map<'tcx> = RefCell, ty_abbrev>>; pub fn enc_ty<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, t: Ty<'tcx>) { match cx.abbrevs.borrow_mut().get(&t) { - Some(a) => { w.writer.write_all(a.s.as_bytes()); return; } + Some(a) => { w.writer.write_all(&a.s); return; } None => {} } @@ -167,23 +167,20 @@ pub fn enc_ty<'a, 'tcx>(w: &mut Encoder, cx: &ctxt<'a, 'tcx>, t: Ty<'tcx>) { let end = w.mark_stable_position(); let len = end - pos; - fn estimate_sz(u: u64) -> u64 { - let mut n = u; - let mut len = 0; - while n != 0 { len += 1; n = n >> 4; } - return len; - } - let abbrev_len = 3 + estimate_sz(pos) + estimate_sz(len); + + let buf: &mut [u8] = &mut [0; 16]; // vuint < 15 bytes + let mut abbrev = Cursor::new(buf); + abbrev.write_all(b"#"); + writer::write_vuint(&mut abbrev, pos as usize); + cx.abbrevs.borrow_mut().insert(t, ty_abbrev { - s: if abbrev_len < len { - format!("#{:x}:{:x}#", pos, len) + s: if abbrev.position() < len { + abbrev.get_ref()[..abbrev.position() as usize].to_owned() } else { // if the abbreviation is longer than the real type, // don't use #-notation. However, insert it here so // other won't have to `mark_stable_position` - str::from_utf8( - &w.writer.get_ref()[pos as usize..end as usize] - ).unwrap().to_owned() + w.writer.get_ref()[pos as usize..end as usize].to_owned() } }); } diff --git a/src/librustc/middle/ty/mod.rs b/src/librustc/middle/ty/mod.rs index 7ec39619d9c..45522c4e457 100644 --- a/src/librustc/middle/ty/mod.rs +++ b/src/librustc/middle/ty/mod.rs @@ -366,7 +366,6 @@ pub type MethodMap<'tcx> = FnvHashMap>; pub struct CReaderCacheKey { pub cnum: CrateNum, pub pos: usize, - pub len: usize } /// A restriction that certain types must be the same size. The use of diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs index 2e18b50a45c..3a98df0bdfd 100644 --- a/src/librustc_trans/back/link.rs +++ b/src/librustc_trans/back/link.rs @@ -214,7 +214,7 @@ fn symbol_hash<'tcx>(tcx: &ty::ctxt<'tcx>, symbol_hasher.input_str(&meta[..]); } symbol_hasher.input_str("-"); - symbol_hasher.input_str(&encoder::encoded_ty(tcx, t)); + symbol_hasher.input(&encoder::encoded_ty(tcx, t)); // Prefix with 'h' so that it never blends into adjacent digits let mut hash = String::from("h"); hash.push_str(&truncated_hash_result(symbol_hasher));