auto merge of #10670 : eddyb/rust/node-u32, r=alexcrichton

### Rationale There is no reason to support more than 2³² nodes or names at this moment, as compiling something that big (even without considering the quadratic space usage of some analysis passes) would take at least **64GB**. Meanwhile, some can't (or barely can) compile rustc because it requires almost **1.5GB**. ### Potential problems Can someone confirm this doesn't affect metadata (de)serialization? I can't tell myself, I know nothing about it. ### Results Some structures have a size reduction of 25% to 50%: [before](https://gist.github.com/luqmana/3a82a51fa9c86d9191fa) - [after](https://gist.github.com/eddyb/5a75f8973d3d8018afd3). Sadly, there isn't a massive change in the memory used for compiling stage2 librustc (it doesn't go over **1.4GB** as [before](http://huonw.github.io/isrustfastyet/mem/), but I can barely see the difference). However, my own testcase (previously peaking at **1.6GB** in typeck) shows a reduction of **200**-**400MB**.
2013-11-26 22:07:44 -08:00 · 2013-11-26 22:07:44 -08:00 · faf4c939fb
commit faf4c939fb
parent 82d9033b67 7ed27b5531
10 changed files with 74 additions and 78 deletions
--- a/src/librustc/driver/session.rs
+++ b/src/librustc/driver/session.rs
@ -28,7 +28,6 @@
 use syntax::parse::token;
 use syntax;

-use std::int;
 use std::hashmap::{HashMap,HashSet};

 #[deriving(Clone)]
@ -209,7 +208,7 @@ pub struct Session_ {
    building_library: @mut bool,
    working_dir: Path,
    lints: @mut HashMap<ast::NodeId, ~[(lint::lint, codemap::Span, ~str)]>,
-    node_id: @mut uint,
+    node_id: @mut ast::NodeId,
 }

 pub type Session = @Session_;
@ -274,13 +273,15 @@ pub fn add_lint(&self,
    pub fn next_node_id(&self) -> ast::NodeId {
        self.reserve_node_ids(1)
    }
-    pub fn reserve_node_ids(&self, count: uint) -> ast::NodeId {
+    pub fn reserve_node_ids(&self, count: ast::NodeId) -> ast::NodeId {
        let v = *self.node_id;
-        *self.node_id += count;
-        if v > (int::max_value as uint) {
-            self.bug("Input too large, ran out of node ids!");
+
+        match v.checked_add(&count) {
+            Some(next) => { *self.node_id = next; }
+            None => self.bug("Input too large, ran out of node ids!")
        }
-        v as int
+
+        v
    }
    pub fn diagnostic(&self) -> @mut diagnostic::span_handler {
        self.span_diagnostic
--- a/src/librustc/metadata/creader.rs
+++ b/src/librustc/metadata/creader.rs
@ -67,7 +67,7 @@ fn visit_item(&mut self, a:@ast::item, _:()) {

 #[deriving(Clone)]
 struct cache_entry {
-    cnum: int,
+    cnum: ast::CrateNum,
    span: Span,
    hash: @str,
    metas: @~[@ast::MetaItem]
@ -242,7 +242,7 @@ fn metas_with_ident(ident: @str, metas: ~[@ast::MetaItem])
 }

 fn existing_match(e: &Env, metas: &[@ast::MetaItem], hash: &str)
-               -> Option<int> {
+               -> Option<ast::CrateNum> {
    for c in e.crate_cache.iter() {
        if loader::metadata_matches(*c.metas, metas)
            && (hash.is_empty() || c.hash.as_slice() == hash) {
--- a/src/librustc/metadata/decoder.rs
+++ b/src/librustc/metadata/decoder.rs
@ -76,10 +76,10 @@ fn lookup_hash(d: ebml::Doc, eq_fn: |&[u8]| -> bool, hash: u64) ->

 pub type GetCrateDataCb<'self> = 'self |ast::CrateNum| -> Cmd;

-pub fn maybe_find_item(item_id: int, items: ebml::Doc) -> Option<ebml::Doc> {
-    fn eq_item(bytes: &[u8], item_id: int) -> bool {
+pub fn maybe_find_item(item_id: ast::NodeId, items: ebml::Doc) -> Option<ebml::Doc> {
+    fn eq_item(bytes: &[u8], item_id: ast::NodeId) -> bool {
        return u64_from_be_bytes(
-            bytes.slice(0u, 4u), 0u, 4u) as int
+            bytes.slice(0u, 4u), 0u, 4u) as ast::NodeId
            == item_id;
    }
    lookup_hash(items,
@ -87,7 +87,7 @@ fn eq_item(bytes: &[u8], item_id: int) -> bool {
                (item_id as i64).hash())
 }

-fn find_item(item_id: int, items: ebml::Doc) -> ebml::Doc {
+fn find_item(item_id: ast::NodeId, items: ebml::Doc) -> ebml::Doc {
    match maybe_find_item(item_id, items) {
       None => fail!("lookup_item: id not found: {}", item_id),
       Some(d) => d
@ -96,7 +96,7 @@ fn find_item(item_id: int, items: ebml::Doc) -> ebml::Doc {

 // Looks up an item in the given metadata and returns an ebml doc pointing
 // to the item data.
-pub fn lookup_item(item_id: int, data: @~[u8]) -> ebml::Doc {
+pub fn lookup_item(item_id: ast::NodeId, data: @~[u8]) -> ebml::Doc {
    let items = reader::get_doc(reader::Doc(data), tag_items);
    find_item(item_id, items)
 }
@ -343,7 +343,7 @@ fn item_name(intr: @ident_interner, item: ebml::Doc) -> ast::Ident {
    let string = name.as_str_slice();
    match intr.find_equiv(&string) {
        None => token::str_to_ident(string),
-        Some(val) => ast::Ident::new(val),
+        Some(val) => ast::Ident::new(val as ast::Name),
    }
 }

--- a/src/librustc/metadata/tydecode.rs
+++ b/src/librustc/metadata/tydecode.rs
@ -58,7 +58,7 @@ pub enum DefIdSource {

 pub struct PState<'self> {
    data: &'self [u8],
-    crate: int,
+    crate: ast::CrateNum,
    pos: uint,
    tcx: ty::ctxt
 }
@ -101,7 +101,7 @@ fn parse_ident_(st: &mut PState, is_last: |char| -> bool) -> ast::Ident {
    return st.tcx.sess.ident_of(rslt);
 }

-pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: int,
+pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: ast::CrateNum,
                             pos: uint, tcx: ty::ctxt) -> PState<'a> {
    PState {
        data: data,
@ -111,19 +111,19 @@ pub fn parse_state_from_data<'a>(data: &'a [u8], crate_num: int,
    }
 }

-pub fn parse_ty_data(data: &[u8], crate_num: int, pos: uint, tcx: ty::ctxt,
+pub fn parse_ty_data(data: &[u8], crate_num: ast::CrateNum, pos: uint, tcx: ty::ctxt,
                     conv: conv_did) -> ty::t {
    let mut st = parse_state_from_data(data, crate_num, pos, tcx);
    parse_ty(&mut st, conv)
 }

-pub fn parse_bare_fn_ty_data(data: &[u8], crate_num: int, pos: uint, tcx: ty::ctxt,
+pub fn parse_bare_fn_ty_data(data: &[u8], crate_num: ast::CrateNum, pos: uint, tcx: ty::ctxt,
                             conv: conv_did) -> ty::BareFnTy {
    let mut st = parse_state_from_data(data, crate_num, pos, tcx);
    parse_bare_fn_ty(&mut st, conv)
 }

-pub fn parse_trait_ref_data(data: &[u8], crate_num: int, pos: uint, tcx: ty::ctxt,
+pub fn parse_trait_ref_data(data: &[u8], crate_num: ast::CrateNum, pos: uint, tcx: ty::ctxt,
                            conv: conv_did) -> ty::TraitRef {
    let mut st = parse_state_from_data(data, crate_num, pos, tcx);
    parse_trait_ref(&mut st, conv)
@ -251,7 +251,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
    match next(st) {
      'b' => {
        assert_eq!(next(st), '[');
-        let id = parse_uint(st) as int;
+        let id = parse_uint(st) as ast::NodeId;
        assert_eq!(next(st), '|');
        let br = parse_bound_region(st, |x,y| conv(x,y));
        assert_eq!(next(st), ']');
@ -259,7 +259,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
      }
      'B' => {
        assert_eq!(next(st), '[');
-        let node_id = parse_uint(st) as int;
+        let node_id = parse_uint(st) as ast::NodeId;
        assert_eq!(next(st), '|');
        let index = parse_uint(st);
        assert_eq!(next(st), '|');
@ -268,7 +268,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
      }
      'f' => {
        assert_eq!(next(st), '[');
-        let id = parse_uint(st) as int;
+        let id = parse_uint(st) as ast::NodeId;
        assert_eq!(next(st), '|');
        let br = parse_bound_region(st, |x,y| conv(x,y));
        assert_eq!(next(st), ']');
@ -276,7 +276,7 @@ fn parse_region(st: &mut PState, conv: conv_did) -> ty::Region {
                                    bound_region: br})
      }
      's' => {
-        let id = parse_uint(st) as int;
+        let id = parse_uint(st) as ast::NodeId;
        assert_eq!(next(st), '|');
        ty::ReScope(id)
      }
@ -539,7 +539,7 @@ fn parse_bare_fn_ty(st: &mut PState, conv: conv_did) -> ty::BareFnTy {

 fn parse_sig(st: &mut PState, conv: conv_did) -> ty::FnSig {
    assert_eq!(next(st), '[');
-    let id = parse_uint(st) as int;
+    let id = parse_uint(st) as ast::NodeId;
    assert_eq!(next(st), '|');
    let mut inputs = ~[];
    while peek(st) != ']' {
@ -572,12 +572,12 @@ pub fn parse_def_id(buf: &[u8]) -> ast::DefId {
    let def_part = buf.slice(colon_idx + 1u, len);

    let crate_num = match uint::parse_bytes(crate_part, 10u) {
-       Some(cn) => cn as int,
+       Some(cn) => cn as ast::CrateNum,
       None => fail!("internal error: parse_def_id: crate number expected, but found {:?}",
                     crate_part)
    };
    let def_num = match uint::parse_bytes(def_part, 10u) {
-       Some(dn) => dn as int,
+       Some(dn) => dn as ast::NodeId,
       None => fail!("internal error: parse_def_id: id expected, but found {:?}",
                     def_part)
    };
@ -585,7 +585,7 @@ pub fn parse_def_id(buf: &[u8]) -> ast::DefId {
 }

 pub fn parse_type_param_def_data(data: &[u8], start: uint,
-                                 crate_num: int, tcx: ty::ctxt,
+                                 crate_num: ast::CrateNum, tcx: ty::ctxt,
                                 conv: conv_did) -> ty::TypeParameterDef
 {
    let mut st = parse_state_from_data(data, crate_num, start, tcx);
--- a/src/librustc/middle/astencode.rs
+++ b/src/librustc/middle/astencode.rs
@ -161,8 +161,7 @@ fn reserve_id_range(sess: Session,
    // Handle the case of an empty range:
    if from_id_range.empty() { return from_id_range; }
    let cnt = from_id_range.max - from_id_range.min;
-    assert!(cnt >= 0);
-    let to_id_min = sess.reserve_node_ids(cnt as uint);
+    let to_id_min = sess.reserve_node_ids(cnt);
    let to_id_max = to_id_min + cnt;
    ast_util::id_range { min: to_id_min, max: to_id_max }
 }
@ -1204,7 +1203,7 @@ fn decode_side_tables(xcx: @ExtendedDecodeContext,
    let tbl_doc = ast_doc.get(c::tag_table as uint);
    reader::docs(tbl_doc, |tag, entry_doc| {
        let id0 = entry_doc.get(c::tag_table_id as uint).as_int();
-        let id = xcx.tr_id(id0);
+        let id = xcx.tr_id(id0 as ast::NodeId);

        debug!(">> Side table document with tag 0x{:x} \
                found for id {} (orig {})",
--- a/src/librustc/middle/ty.rs
+++ b/src/librustc/middle/ty.rs
@ -169,7 +169,7 @@ pub struct field_ty {
 // the types of AST nodes.
 #[deriving(Eq,IterBytes)]
 pub struct creader_cache_key {
-    cnum: int,
+    cnum: CrateNum,
    pos: uint,
    len: uint
 }
--- a/src/libsyntax/ast.rs
+++ b/src/libsyntax/ast.rs
@ -69,7 +69,7 @@ fn ne(&self, other: &Ident) -> bool {

 // this uint is a reference to a table stored in thread-local
 // storage.
-pub type SyntaxContext = uint;
+pub type SyntaxContext = u32;

 // the SCTable contains a table of SyntaxContext_'s. It
 // represents a flattened tree structure, to avoid having
@ -87,8 +87,8 @@ pub struct SCTable {
 }

 // NB: these must be placed in any SCTable...
-pub static EMPTY_CTXT : uint = 0;
-pub static ILLEGAL_CTXT : uint = 1;
+pub static EMPTY_CTXT : SyntaxContext = 0;
+pub static ILLEGAL_CTXT : SyntaxContext = 1;

 #[deriving(Eq, Encodable, Decodable,IterBytes)]
 pub enum SyntaxContext_ {
@ -109,10 +109,10 @@ pub enum SyntaxContext_ {

 /// A name is a part of an identifier, representing a string or gensym. It's
 /// the result of interning.
-pub type Name = uint;
+pub type Name = u32;

 /// A mark represents a unique id associated with a macro expansion
-pub type Mrk = uint;
+pub type Mrk = u32;

 impl<S:Encoder> Encodable<S> for Ident {
    fn encode(&self, s: &mut S) {
@ -163,9 +163,9 @@ pub struct PathSegment {
    types: OptVec<Ty>,
 }

-pub type CrateNum = int;
+pub type CrateNum = u32;

-pub type NodeId = int;
+pub type NodeId = u32;

 #[deriving(Clone, TotalEq, TotalOrd, Eq, Encodable, Decodable, IterBytes, ToStr)]
 pub struct DefId {
--- a/src/libsyntax/ast_util.rs
+++ b/src/libsyntax/ast_util.rs
@ -18,7 +18,7 @@
 use visit;

 use std::hashmap::HashMap;
-use std::int;
+use std::u32;
 use std::local_data;
 use std::num;
 use std::option;
@ -382,8 +382,8 @@ pub struct id_range {
 impl id_range {
    pub fn max() -> id_range {
        id_range {
-            min: int::max_value,
-            max: int::min_value,
+            min: u32::max_value,
+            max: u32::min_value,
        }
    }

@ -803,9 +803,9 @@ pub fn display_sctable(table : &SCTable) {


 /// Add a value to the end of a vec, return its index
-fn idx_push<T>(vec: &mut ~[T], val: T) -> uint {
+fn idx_push<T>(vec: &mut ~[T], val: T) -> u32 {
    vec.push(val);
-    vec.len() - 1
+    (vec.len() - 1) as u32
 }

 /// Resolve a syntax object to a name, per MTWT.
@ -917,7 +917,7 @@ pub fn mtwt_outer_mark(ctxt: SyntaxContext) -> Mrk {

 /// Push a name... unless it matches the one on top, in which
 /// case pop and discard (so two of the same marks cancel)
-pub fn xorPush(marks: &mut ~[uint], mark: uint) {
+pub fn xorPush(marks: &mut ~[Mrk], mark: Mrk) {
    if ((marks.len() > 0) && (getLast(marks) == mark)) {
        marks.pop();
    } else {
@ -927,7 +927,7 @@ pub fn xorPush(marks: &mut ~[uint], mark: uint) {

 // get the last element of a mutable array.
 // FIXME #4903: , must be a separate procedure for now.
-pub fn getLast(arr: &~[Mrk]) -> uint {
+pub fn getLast(arr: &~[Mrk]) -> Mrk {
    *arr.last()
 }

@ -1000,14 +1000,8 @@ fn ident_to_segment(id : &Ident) -> PathSegment {
        assert_eq!(s.clone(),~[14]);
    }

-    // convert a list of uints to an @[ident]
-    // (ignores the interner completely)
-    fn uints_to_idents (uints: &~[uint]) -> @~[Ident] {
-        @uints.map(|u| Ident {name:*u, ctxt: EMPTY_CTXT})
-    }
-
-    fn id (u : uint, s: SyntaxContext) -> Ident {
-        Ident{name:u, ctxt: s}
+    fn id(n: Name, s: SyntaxContext) -> Ident {
+        Ident {name: n, ctxt: s}
    }

    // because of the SCTable, I now need a tidy way of
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@ -502,12 +502,12 @@ fn mk_fresh_ident_interner() -> @ident_interner {
    @interner::StrInterner::prefill(init_vec)
 }

-static SELF_KEYWORD_NAME: uint = 8;
-static STATIC_KEYWORD_NAME: uint = 27;
-static STRICT_KEYWORD_START: uint = 32;
-static STRICT_KEYWORD_FINAL: uint = 65;
-static RESERVED_KEYWORD_START: uint = 66;
-static RESERVED_KEYWORD_FINAL: uint = 72;
+static SELF_KEYWORD_NAME: Name = 8;
+static STATIC_KEYWORD_NAME: Name = 27;
+static STRICT_KEYWORD_START: Name = 32;
+static STRICT_KEYWORD_FINAL: Name = 65;
+static RESERVED_KEYWORD_START: Name = 66;
+static RESERVED_KEYWORD_FINAL: Name = 72;

 // if an interner exists in TLS, return it. Otherwise, prepare a
 // fresh one.
--- a/src/libsyntax/util/interner.rs
+++ b/src/libsyntax/util/interner.rs
@ -12,15 +12,17 @@
 // allows bidirectional lookup; i.e. given a value, one can easily find the
 // type, and vice versa.

+use ast::Name;
+
 use std::cmp::Equiv;
 use std::hashmap::HashMap;

 pub struct Interner<T> {
-    priv map: @mut HashMap<T, uint>,
+    priv map: @mut HashMap<T, Name>,
    priv vect: @mut ~[T],
 }

-// when traits can extend traits, we should extend index<uint,T> to get []
+// when traits can extend traits, we should extend index<Name,T> to get []
 impl<T:Eq + IterBytes + Hash + Freeze + Clone + 'static> Interner<T> {
    pub fn new() -> Interner<T> {
        Interner {
@ -37,37 +39,37 @@ pub fn prefill(init: &[T]) -> Interner<T> {
        rv
    }

-    pub fn intern(&self, val: T) -> uint {
+    pub fn intern(&self, val: T) -> Name {
        match self.map.find(&val) {
            Some(&idx) => return idx,
            None => (),
        }

        let vect = &mut *self.vect;
-        let new_idx = vect.len();
+        let new_idx = vect.len() as Name;
        self.map.insert(val.clone(), new_idx);
        vect.push(val);
        new_idx
    }

-    pub fn gensym(&self, val: T) -> uint {
+    pub fn gensym(&self, val: T) -> Name {
        let new_idx = {
            let vect = &*self.vect;
-            vect.len()
+            vect.len() as Name
        };
        // leave out of .map to avoid colliding
        self.vect.push(val);
        new_idx
    }

-    pub fn get(&self, idx: uint) -> T {
+    pub fn get(&self, idx: Name) -> T {
        self.vect[idx].clone()
    }

    pub fn len(&self) -> uint { let vect = &*self.vect; vect.len() }

    pub fn find_equiv<Q:Hash + IterBytes + Equiv<T>>(&self, val: &Q)
-                                              -> Option<uint> {
+                                              -> Option<Name> {
        match self.map.find_equiv(val) {
            Some(v) => Some(*v),
            None => None,
@ -78,11 +80,11 @@ pub fn find_equiv<Q:Hash + IterBytes + Equiv<T>>(&self, val: &Q)
 // A StrInterner differs from Interner<String> in that it accepts
 // borrowed pointers rather than @ ones, resulting in less allocation.
 pub struct StrInterner {
-    priv map: @mut HashMap<@str, uint>,
+    priv map: @mut HashMap<@str, Name>,
    priv vect: @mut ~[@str],
 }

-// when traits can extend traits, we should extend index<uint,T> to get []
+// when traits can extend traits, we should extend index<Name,T> to get []
 impl StrInterner {
    pub fn new() -> StrInterner {
        StrInterner {
@ -97,21 +99,21 @@ pub fn prefill(init: &[&str]) -> StrInterner {
        rv
    }

-    pub fn intern(&self, val: &str) -> uint {
+    pub fn intern(&self, val: &str) -> Name {
        match self.map.find_equiv(&val) {
            Some(&idx) => return idx,
            None => (),
        }

-        let new_idx = self.len();
+        let new_idx = self.len() as Name;
        let val = val.to_managed();
        self.map.insert(val, new_idx);
        self.vect.push(val);
        new_idx
    }

-    pub fn gensym(&self, val: &str) -> uint {
-        let new_idx = self.len();
+    pub fn gensym(&self, val: &str) -> Name {
+        let new_idx = self.len() as Name;
        // leave out of .map to avoid colliding
        self.vect.push(val.to_managed());
        new_idx
@ -127,19 +129,19 @@ pub fn gensym(&self, val: &str) -> uint {

    // create a gensym with the same name as an existing
    // entry.
-    pub fn gensym_copy(&self, idx : uint) -> uint {
-        let new_idx = self.len();
+    pub fn gensym_copy(&self, idx : Name) -> Name {
+        let new_idx = self.len() as Name;
        // leave out of map to avoid colliding
        self.vect.push(self.vect[idx]);
        new_idx
    }

-    pub fn get(&self, idx: uint) -> @str { self.vect[idx] }
+    pub fn get(&self, idx: Name) -> @str { self.vect[idx] }

    pub fn len(&self) -> uint { let vect = &*self.vect; vect.len() }

    pub fn find_equiv<Q:Hash + IterBytes + Equiv<@str>>(&self, val: &Q)
-                                                         -> Option<uint> {
+                                                         -> Option<Name> {
        match self.map.find_equiv(val) {
            Some(v) => Some(*v),
            None => None,