rustc: Write out a path index as well

This commit is contained in:
Patrick Walton 2011-03-31 17:38:03 -07:00
parent 1b65a61cad
commit fffc5d3b3d
2 changed files with 121 additions and 64 deletions

View File

@ -268,14 +268,14 @@ fn parse_def_id(vec[u8] buf) -> ast.def_id {
auto found = false;
while (ebml.bytes_left(ebml_r) > 0u && !found) {
auto ebml_tag = ebml.peek(ebml_r);
if ((ebml_tag.id == metadata.tag_paths_item) ||
(ebml_tag.id == metadata.tag_paths_mod)) {
if ((ebml_tag.id == metadata.tag_paths_data_item) ||
(ebml_tag.id == metadata.tag_paths_data_mod)) {
ebml.move_to_first_child(ebml_r);
auto did_opt = none[ast.def_id];
auto name_opt = none[ast.ident];
while (ebml.bytes_left(ebml_r) > 0u) {
auto inner_tag = ebml.peek(ebml_r);
if (inner_tag.id == metadata.tag_paths_name) {
if (inner_tag.id == metadata.tag_paths_data_name) {
ebml.move_to_first_child(ebml_r);
auto name_data = ebml.read_data(ebml_r);
ebml.move_to_parent(ebml_r);
@ -335,8 +335,8 @@ fn parse_def_id(vec[u8] buf) -> ast.def_id {
impure fn move_to_item(&ebml.reader ebml_r, int item_id) {
ebml.move_to_sibling_with_id(ebml_r, metadata.tag_items);
ebml.move_to_child_with_id(ebml_r, metadata.tag_items_index);
ebml.move_to_child_with_id(ebml_r, metadata.tag_items_index_table);
ebml.move_to_child_with_id(ebml_r, metadata.tag_index);
ebml.move_to_child_with_id(ebml_r, metadata.tag_index_table);
ebml.move_to_first_child(ebml_r);
// Move to the bucket.
@ -347,11 +347,10 @@ fn parse_def_id(vec[u8] buf) -> ast.def_id {
ebml.reset_reader(ebml_r, bucket_pos);
// Search to find the item ID in the bucket.
check (ebml.peek(ebml_r).id == metadata.tag_items_index_buckets_bucket);
check (ebml.peek(ebml_r).id == metadata.tag_index_buckets_bucket);
ebml.move_to_first_child(ebml_r);
while (ebml.bytes_left(ebml_r) > 0u) {
if (ebml.peek(ebml_r).id ==
metadata.tag_items_index_buckets_bucket_elt) {
if (ebml.peek(ebml_r).id == metadata.tag_index_buckets_bucket_elt) {
ebml.move_to_first_child(ebml_r);
auto pos = ebml_r.reader.read_be_uint(4u);
auto this_item_id = ebml_r.reader.read_be_uint(4u) as int;

View File

@ -19,27 +19,28 @@
const uint tag_paths = 0x01u;
const uint tag_items = 0x02u;
const uint tag_paths_name = 0x03u;
const uint tag_paths_item = 0x04u;
const uint tag_paths_mod = 0x05u;
const uint tag_paths_data = 0x03u;
const uint tag_paths_data_name = 0x04u;
const uint tag_paths_data_item = 0x05u;
const uint tag_paths_data_mod = 0x06u;
const uint tag_def_id = 0x06u;
const uint tag_def_id = 0x07u;
const uint tag_items_data = 0x07u;
const uint tag_items_data_item = 0x08u;
const uint tag_items_data_item_kind = 0x09u;
const uint tag_items_data_item_ty_param = 0x0au;
const uint tag_items_data_item_type = 0x0bu;
const uint tag_items_data_item_symbol = 0x0cu;
const uint tag_items_data_item_variant = 0x0du;
const uint tag_items_data_item_tag_id = 0x0eu;
const uint tag_items_data_item_obj_type_id = 0x0fu;
const uint tag_items_data = 0x08u;
const uint tag_items_data_item = 0x09u;
const uint tag_items_data_item_kind = 0x0au;
const uint tag_items_data_item_ty_param = 0x0bu;
const uint tag_items_data_item_type = 0x0cu;
const uint tag_items_data_item_symbol = 0x0du;
const uint tag_items_data_item_variant = 0x0eu;
const uint tag_items_data_item_tag_id = 0x0fu;
const uint tag_items_data_item_obj_type_id = 0x10u;
const uint tag_items_index = 0x10u;
const uint tag_items_index_buckets = 0x11u;
const uint tag_items_index_buckets_bucket = 0x12u;
const uint tag_items_index_buckets_bucket_elt = 0x13u;
const uint tag_items_index_table = 0x14u;
const uint tag_index = 0x11u;
const uint tag_index_buckets = 0x12u;
const uint tag_index_buckets_bucket = 0x13u;
const uint tag_index_buckets_bucket_elt = 0x14u;
const uint tag_index_table = 0x15u;
// Type encoding
@ -164,7 +165,7 @@ fn C_postr(str s) -> ValueRef {
// Path table encoding
fn encode_name(&ebml.writer ebml_w, str name) {
ebml.start_tag(ebml_w, tag_paths_name);
ebml.start_tag(ebml_w, tag_paths_data_name);
ebml_w.writer.write(_str.bytes(name));
ebml.end_tag(ebml_w);
}
@ -177,25 +178,37 @@ fn encode_def_id(&ebml.writer ebml_w, &ast.def_id id) {
fn encode_tag_variant_paths(&ebml.writer ebml_w, vec[ast.variant] variants) {
for (ast.variant variant in variants) {
ebml.start_tag(ebml_w, tag_paths_item);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, variant.node.name);
encode_def_id(ebml_w, variant.node.id);
ebml.end_tag(ebml_w);
}
}
fn add_to_index(&ebml.writer ebml_w,
vec[str] path,
&mutable vec[tup(str, uint)] index,
str name) {
auto full_path = path + vec(name);
index += vec(tup(_str.connect(full_path, "."), ebml_w.writer.tell()));
}
fn encode_native_module_item_paths(&ebml.writer ebml_w,
&ast.native_mod nmod) {
&ast.native_mod nmod,
vec[str] path,
&mutable vec[tup(str, uint)] index) {
for (@ast.native_item nitem in nmod.items) {
alt (nitem.node) {
case (ast.native_item_ty(?id, ?did)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
ebml.end_tag(ebml_w);
}
case (ast.native_item_fn(?id, _, _, _, ?did, _)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
ebml.end_tag(ebml_w);
@ -204,51 +217,62 @@ fn encode_native_module_item_paths(&ebml.writer ebml_w,
}
}
fn encode_module_item_paths(&ebml.writer ebml_w, &ast._mod module) {
fn encode_module_item_paths(&ebml.writer ebml_w,
&ast._mod module,
vec[str] path,
&mutable vec[tup(str, uint)] index) {
// TODO: only encode exported items
for (@ast.item it in module.items) {
alt (it.node) {
case (ast.item_const(?id, _, ?tps, ?did, ?ann)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
ebml.end_tag(ebml_w);
}
case (ast.item_fn(?id, _, ?tps, ?did, ?ann)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
ebml.end_tag(ebml_w);
}
case (ast.item_mod(?id, ?_mod, ?did)) {
ebml.start_tag(ebml_w, tag_paths_mod);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_mod);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
encode_module_item_paths(ebml_w, _mod);
encode_module_item_paths(ebml_w, _mod, path + vec(id), index);
ebml.end_tag(ebml_w);
}
case (ast.item_native_mod(?id, ?nmod, ?did)) {
ebml.start_tag(ebml_w, tag_paths_mod);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_mod);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
encode_native_module_item_paths(ebml_w, nmod);
encode_native_module_item_paths(ebml_w, nmod, path + vec(id),
index);
ebml.end_tag(ebml_w);
}
case (ast.item_ty(?id, _, ?tps, ?did, ?ann)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_def_id(ebml_w, did);
ebml.end_tag(ebml_w);
}
case (ast.item_tag(?id, ?variants, ?tps, ?did)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_tag_variant_paths(ebml_w, variants);
encode_def_id(ebml_w, did);
ebml.end_tag(ebml_w);
}
case (ast.item_obj(?id, _, ?tps, ?odid, ?ann)) {
ebml.start_tag(ebml_w, tag_paths_item);
add_to_index(ebml_w, path, index, id);
ebml.start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, id);
encode_def_id(ebml_w, odid.ctor);
encode_obj_type_id(ebml_w, odid.ty);
@ -258,10 +282,14 @@ fn encode_module_item_paths(&ebml.writer ebml_w, &ast._mod module) {
}
}
fn encode_item_paths(&ebml.writer ebml_w, @ast.crate crate) {
fn encode_item_paths(&ebml.writer ebml_w, @ast.crate crate)
-> vec[tup(str, uint)] {
let vec[tup(str, uint)] index = vec();
let vec[str] path = vec();
ebml.start_tag(ebml_w, tag_paths);
encode_module_item_paths(ebml_w, crate.node.module);
encode_module_item_paths(ebml_w, crate.node.module, path, index);
ebml.end_tag(ebml_w);
ret index;
}
@ -442,51 +470,61 @@ fn encode_info_for_items(@trans.crate_ctxt cx, &ebml.writer ebml_w)
}
// Definition ID indexing
// Path and definition ID indexing
fn hash_def_num(int def_num) -> uint {
// djb's cdb hashes.
fn hash_def_num(&int def_num) -> uint {
ret 177573u ^ (def_num as uint);
}
fn create_index(vec[tup(int, uint)] index) -> vec[vec[tup(int, uint)]] {
let vec[vec[tup(int, uint)]] buckets = vec();
fn hash_path(&str s) -> uint {
auto h = 5381u;
for (u8 ch in _str.bytes(s)) {
h = ((h << 5u) + h) ^ (ch as uint);
}
ret h;
}
fn create_index[T](vec[tup(T, uint)] index, fn(&T) -> uint hash_fn)
-> vec[vec[tup(T, uint)]] {
let vec[vec[tup(T, uint)]] buckets = vec();
for each (uint i in _uint.range(0u, 256u)) {
let vec[tup(int, uint)] bucket = vec();
let vec[tup(T, uint)] bucket = vec();
buckets += vec(bucket);
}
for (tup(int, uint) elt in index) {
auto h = hash_def_num(elt._0);
for (tup(T, uint) elt in index) {
auto h = hash_fn(elt._0);
buckets.(h % 256u) += vec(elt);
}
ret buckets;
}
impure fn encode_index(&ebml.writer ebml_w, vec[tup(int, uint)] index) {
impure fn encode_index[T](&ebml.writer ebml_w, vec[vec[tup(T, uint)]] buckets,
impure fn(io.writer, &T) write_fn) {
auto writer = io.new_writer_(ebml_w.writer);
auto buckets = create_index(index);
ebml.start_tag(ebml_w, tag_items_index);
ebml.start_tag(ebml_w, tag_index);
let vec[uint] bucket_locs = vec();
ebml.start_tag(ebml_w, tag_items_index_buckets);
for (vec[tup(int, uint)] bucket in buckets) {
ebml.start_tag(ebml_w, tag_index_buckets);
for (vec[tup(T, uint)] bucket in buckets) {
bucket_locs += vec(ebml_w.writer.tell());
ebml.start_tag(ebml_w, tag_items_index_buckets_bucket);
for (tup(int, uint) elt in bucket) {
ebml.start_tag(ebml_w, tag_items_index_buckets_bucket_elt);
ebml.start_tag(ebml_w, tag_index_buckets_bucket);
for (tup(T, uint) elt in bucket) {
ebml.start_tag(ebml_w, tag_index_buckets_bucket_elt);
writer.write_be_uint(elt._1, 4u);
writer.write_be_uint(elt._0 as uint, 4u);
write_fn(writer, elt._0);
ebml.end_tag(ebml_w);
}
ebml.end_tag(ebml_w);
}
ebml.end_tag(ebml_w);
ebml.start_tag(ebml_w, tag_items_index_table);
ebml.start_tag(ebml_w, tag_index_table);
for (uint pos in bucket_locs) {
writer.write_be_uint(pos, 4u);
}
@ -496,17 +534,37 @@ fn create_index(vec[tup(int, uint)] index) -> vec[vec[tup(int, uint)]] {
}
impure fn write_str(io.writer writer, &str s) {
writer.write_str(s);
}
impure fn write_int(io.writer writer, &int n) {
writer.write_be_uint(n as uint, 4u);
}
impure fn encode_metadata(@trans.crate_ctxt cx, @ast.crate crate)
-> ValueRef {
auto string_w = io.string_writer();
auto buf_w = string_w.get_writer().get_buf_writer();
auto ebml_w = ebml.create_writer(buf_w);
encode_item_paths(ebml_w, crate);
// Encode and index the paths.
ebml.start_tag(ebml_w, tag_paths);
auto paths_index = encode_item_paths(ebml_w, crate);
auto str_writer = write_str;
auto path_hasher = hash_path;
auto paths_buckets = create_index[str](paths_index, path_hasher);
encode_index[str](ebml_w, paths_buckets, str_writer);
ebml.end_tag(ebml_w);
// Encode and index the items.
ebml.start_tag(ebml_w, tag_items);
auto index = encode_info_for_items(cx, ebml_w);
encode_index(ebml_w, index);
auto items_index = encode_info_for_items(cx, ebml_w);
auto int_writer = write_int;
auto item_hasher = hash_def_num;
auto items_buckets = create_index[int](items_index, item_hasher);
encode_index[int](ebml_w, items_buckets, int_writer);
ebml.end_tag(ebml_w);
ret C_postr(string_w.get_str());