rust/src/comp/metadata/encoder.rs

558 lines
20 KiB
Rust
Raw Normal View History

2011-06-27 18:03:01 -05:00
// Metadata encoding
import std::str;
import std::vec;
import std::uint;
import std::io;
import std::option;
import std::option::some;
import std::option::none;
import std::ebml;
import syntax::ast::*;
import tags::*;
import middle::trans::crate_ctxt;
import middle::trans::node_id_type;
import middle::ty;
2011-06-30 01:42:35 -05:00
import front::attr;
export def_to_str;
export hash_path;
export hash_def_id;
export encode_metadata;
// Path table encoding
fn encode_name(&ebml::writer ebml_w, &str name) {
ebml::start_tag(ebml_w, tag_paths_data_name);
ebml_w.writer.write(str::bytes(name));
ebml::end_tag(ebml_w);
}
fn encode_def_id(&ebml::writer ebml_w, &def_id id) {
ebml::start_tag(ebml_w, tag_def_id);
ebml_w.writer.write(str::bytes(def_to_str(id)));
ebml::end_tag(ebml_w);
}
fn encode_tag_variant_paths(&ebml::writer ebml_w, &vec[variant] variants,
&vec[str] path,
&mutable vec[tup(str, uint)] index) {
for (variant variant in variants) {
add_to_index(ebml_w, path, index, variant.node.name);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, variant.node.name);
encode_def_id(ebml_w, local_def(variant.node.id));
ebml::end_tag(ebml_w);
}
}
fn add_to_index(&ebml::writer ebml_w, &vec[str] path,
&mutable vec[tup(str, uint)] index, &str name) {
auto full_path = path + [name];
index += [tup(str::connect(full_path, "::"), ebml_w.writer.tell())];
}
fn encode_native_module_item_paths(&ebml::writer ebml_w,
&native_mod nmod, &vec[str] path,
&mutable vec[tup(str, uint)] index) {
for (@native_item nitem in nmod.items) {
add_to_index(ebml_w, path, index, nitem.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, nitem.ident);
encode_def_id(ebml_w, local_def(nitem.id));
ebml::end_tag(ebml_w);
}
}
fn encode_module_item_paths(&ebml::writer ebml_w, &_mod module,
&vec[str] path,
&mutable vec[tup(str, uint)] index) {
for (@item it in module.items) {
if (!is_exported(it.ident, module)) { cont; }
alt (it.node) {
case (item_const(_, _)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
ebml::end_tag(ebml_w);
}
case (item_fn(_, ?tps)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
ebml::end_tag(ebml_w);
}
case (item_mod(?_mod)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_mod);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
encode_module_item_paths(ebml_w, _mod, path + [it.ident],
index);
ebml::end_tag(ebml_w);
}
case (item_native_mod(?nmod)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_mod);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
encode_native_module_item_paths(ebml_w, nmod,
path + [it.ident], index);
ebml::end_tag(ebml_w);
}
case (item_ty(_, ?tps)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
ebml::end_tag(ebml_w);
}
case (item_res(_, _, ?tps, ?ctor_id)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(ctor_id));
ebml::end_tag(ebml_w);
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
ebml::end_tag(ebml_w);
}
case (item_tag(?variants, ?tps)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
ebml::end_tag(ebml_w);
encode_tag_variant_paths(ebml_w, variants, path, index);
}
case (item_obj(_, ?tps, ?ctor_id)) {
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(ctor_id));
ebml::end_tag(ebml_w);
add_to_index(ebml_w, path, index, it.ident);
ebml::start_tag(ebml_w, tag_paths_data_item);
encode_name(ebml_w, it.ident);
encode_def_id(ebml_w, local_def(it.id));
ebml::end_tag(ebml_w);
}
}
}
}
fn encode_item_paths(&ebml::writer ebml_w, &@crate crate) ->
vec[tup(str, uint)] {
let vec[tup(str, uint)] index = [];
let vec[str] path = [];
ebml::start_tag(ebml_w, tag_paths);
encode_module_item_paths(ebml_w, crate.node.module, path, index);
ebml::end_tag(ebml_w);
ret index;
}
// Item info table encoding
fn encode_kind(&ebml::writer ebml_w, u8 c) {
ebml::start_tag(ebml_w, tag_items_data_item_kind);
ebml_w.writer.write([c]);
ebml::end_tag(ebml_w);
}
fn def_to_str(&def_id did) -> str { ret #fmt("%d:%d", did._0, did._1); }
fn encode_type_param_count(&ebml::writer ebml_w, &vec[ty_param] tps) {
ebml::start_tag(ebml_w, tag_items_data_item_ty_param_count);
ebml::write_vint(ebml_w.writer, vec::len[ty_param](tps));
ebml::end_tag(ebml_w);
}
fn encode_variant_id(&ebml::writer ebml_w, &def_id vid) {
ebml::start_tag(ebml_w, tag_items_data_item_variant);
ebml_w.writer.write(str::bytes(def_to_str(vid)));
ebml::end_tag(ebml_w);
}
fn encode_type(&@crate_ctxt cx, &ebml::writer ebml_w, &ty::t typ) {
ebml::start_tag(ebml_w, tag_items_data_item_type);
auto f = def_to_str;
auto ty_str_ctxt =
@rec(ds=f, tcx=cx.tcx,
abbrevs=tyencode::ac_use_abbrevs(cx.type_abbrevs));
tyencode::enc_ty(io::new_writer_(ebml_w.writer), ty_str_ctxt, typ);
ebml::end_tag(ebml_w);
}
fn encode_symbol(&@crate_ctxt cx, &ebml::writer ebml_w,
node_id id) {
ebml::start_tag(ebml_w, tag_items_data_item_symbol);
ebml_w.writer.write(str::bytes(cx.item_symbols.get(id)));
ebml::end_tag(ebml_w);
}
fn encode_discriminant(&@crate_ctxt cx, &ebml::writer ebml_w,
node_id id) {
ebml::start_tag(ebml_w, tag_items_data_item_symbol);
ebml_w.writer.write(str::bytes(cx.discrim_symbols.get(id)));
ebml::end_tag(ebml_w);
}
fn encode_tag_id(&ebml::writer ebml_w, &def_id id) {
ebml::start_tag(ebml_w, tag_items_data_item_tag_id);
ebml_w.writer.write(str::bytes(def_to_str(id)));
ebml::end_tag(ebml_w);
}
fn encode_tag_variant_info(&@crate_ctxt cx, &ebml::writer ebml_w,
node_id id, &vec[variant] variants,
&mutable vec[tup(int, uint)] index,
&vec[ty_param] ty_params) {
for (variant variant in variants) {
index += [tup(variant.node.id, ebml_w.writer.tell())];
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(variant.node.id));
encode_kind(ebml_w, 'v' as u8);
encode_tag_id(ebml_w, local_def(id));
encode_type(cx, ebml_w, node_id_type(cx, variant.node.id));
if (vec::len[variant_arg](variant.node.args) > 0u) {
encode_symbol(cx, ebml_w, variant.node.id);
}
encode_discriminant(cx, ebml_w, variant.node.id);
encode_type_param_count(ebml_w, ty_params);
ebml::end_tag(ebml_w);
}
}
fn encode_info_for_item(@crate_ctxt cx, &ebml::writer ebml_w,
@item item, &mutable vec[tup(int, uint)] index) {
alt (item.node) {
case (item_const(_, _)) {
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, 'c' as u8);
encode_type(cx, ebml_w, node_id_type(cx, item.id));
encode_symbol(cx, ebml_w, item.id);
ebml::end_tag(ebml_w);
}
case (item_fn(?fd, ?tps)) {
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, alt (fd.decl.purity) {
case (pure_fn) { 'p' }
case (impure_fn) { 'f' } } as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, node_id_type(cx, item.id));
encode_symbol(cx, ebml_w, item.id);
ebml::end_tag(ebml_w);
}
case (item_mod(_)) {
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, 'm' as u8);
ebml::end_tag(ebml_w);
}
case (item_native_mod(_)) {
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, 'n' as u8);
ebml::end_tag(ebml_w);
}
case (item_ty(_, ?tps)) {
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, 'y' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, node_id_type(cx, item.id));
ebml::end_tag(ebml_w);
}
case (item_tag(?variants, ?tps)) {
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, 't' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, node_id_type(cx, item.id));
for (variant v in variants) {
encode_variant_id(ebml_w, local_def(v.node.id));
}
ebml::end_tag(ebml_w);
encode_tag_variant_info(cx, ebml_w, item.id, variants, index,
tps);
}
case (item_res(_, _, ?tps, ?ctor_id)) {
2011-06-29 14:08:35 -05:00
auto fn_ty = node_id_type(cx, ctor_id);
ebml::start_tag(ebml_w, tag_items_data_item);
2011-06-29 14:08:35 -05:00
encode_def_id(ebml_w, local_def(ctor_id));
encode_kind(ebml_w, 'y' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, ty::ty_fn_ret(cx.tcx, fn_ty));
encode_symbol(cx, ebml_w, item.id);
ebml::end_tag(ebml_w);
index += [tup(ctor_id, ebml_w.writer.tell())];
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(ctor_id));
encode_kind(ebml_w, 'f' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, fn_ty);
encode_symbol(cx, ebml_w, ctor_id);
ebml::end_tag(ebml_w);
}
case (item_obj(_, ?tps, ?ctor_id)) {
auto fn_ty = node_id_type(cx, ctor_id);
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(item.id));
encode_kind(ebml_w, 'y' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, ty::ty_fn_ret(cx.tcx, fn_ty));
ebml::end_tag(ebml_w);
index += [tup(ctor_id, ebml_w.writer.tell())];
ebml::start_tag(ebml_w, tag_items_data_item);
encode_def_id(ebml_w, local_def(ctor_id));
encode_kind(ebml_w, 'f' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, fn_ty);
encode_symbol(cx, ebml_w, ctor_id);
ebml::end_tag(ebml_w);
}
}
}
fn encode_info_for_native_item(&@crate_ctxt cx, &ebml::writer ebml_w,
&@native_item nitem) {
ebml::start_tag(ebml_w, tag_items_data_item);
alt (nitem.node) {
case (native_item_ty) {
encode_def_id(ebml_w, local_def(nitem.id));
encode_kind(ebml_w, 'T' as u8);
encode_type(cx, ebml_w,
ty::mk_native(cx.tcx, local_def(nitem.id)));
}
case (native_item_fn(_, _, ?tps)) {
encode_def_id(ebml_w, local_def(nitem.id));
encode_kind(ebml_w, 'F' as u8);
encode_type_param_count(ebml_w, tps);
encode_type(cx, ebml_w, node_id_type(cx, nitem.id));
encode_symbol(cx, ebml_w, nitem.id);
}
}
ebml::end_tag(ebml_w);
}
fn encode_info_for_items(&@crate_ctxt cx, &ebml::writer ebml_w) ->
vec[tup(int, uint)] {
let vec[tup(int, uint)] index = [];
ebml::start_tag(ebml_w, tag_items_data);
for each (@tup(node_id, middle::ast_map::ast_node) kvp in
cx.ast_map.items()) {
alt (kvp._1) {
case (middle::ast_map::node_item(?i)) {
index += [tup(kvp._0, ebml_w.writer.tell())];
encode_info_for_item(cx, ebml_w, i, index);
}
case (middle::ast_map::node_native_item(?i)) {
index += [tup(kvp._0, ebml_w.writer.tell())];
encode_info_for_native_item(cx, ebml_w, i);
}
case (_) {}
}
}
ebml::end_tag(ebml_w);
ret index;
}
// Path and definition ID indexing
// djb's cdb hashes.
fn hash_def_id(&int def_id) -> uint { ret 177573u ^ (def_id as uint); }
fn hash_path(&str s) -> uint {
auto h = 5381u;
for (u8 ch in str::bytes(s)) { h = (h << 5u) + h ^ (ch as uint); }
ret h;
}
fn create_index[T](&vec[tup(T, uint)] index, fn(&T) -> uint hash_fn) ->
vec[vec[tup(T, uint)]] {
let vec[mutable vec[tup(T, uint)]] buckets = vec::empty_mut();
for each (uint i in uint::range(0u, 256u)) { buckets += [mutable []]; }
for (tup(T, uint) elt in index) {
auto h = hash_fn(elt._0);
buckets.(h % 256u) += [elt];
}
ret vec::freeze(buckets);
}
fn encode_index[T](&ebml::writer ebml_w, &vec[vec[tup(T, uint)]] buckets,
fn(&io::writer, &T) write_fn) {
auto writer = io::new_writer_(ebml_w.writer);
ebml::start_tag(ebml_w, tag_index);
let vec[uint] bucket_locs = [];
ebml::start_tag(ebml_w, tag_index_buckets);
for (vec[tup(T, uint)] bucket in buckets) {
bucket_locs += [ebml_w.writer.tell()];
ebml::start_tag(ebml_w, tag_index_buckets_bucket);
for (tup(T, uint) elt in bucket) {
ebml::start_tag(ebml_w, tag_index_buckets_bucket_elt);
writer.write_be_uint(elt._1, 4u);
write_fn(writer, elt._0);
ebml::end_tag(ebml_w);
}
ebml::end_tag(ebml_w);
}
ebml::end_tag(ebml_w);
ebml::start_tag(ebml_w, tag_index_table);
for (uint pos in bucket_locs) { writer.write_be_uint(pos, 4u); }
ebml::end_tag(ebml_w);
ebml::end_tag(ebml_w);
}
fn write_str(&io::writer writer, &str s) { writer.write_str(s); }
fn write_int(&io::writer writer, &int n) {
writer.write_be_uint(n as uint, 4u);
}
fn encode_meta_item(&ebml::writer ebml_w, &meta_item mi) {
alt (mi.node) {
case (meta_word(?name)) {
ebml::start_tag(ebml_w, tag_meta_item_word);
ebml::start_tag(ebml_w, tag_meta_item_name);
ebml_w.writer.write(str::bytes(name));
ebml::end_tag(ebml_w);
ebml::end_tag(ebml_w);
}
case (meta_name_value(?name, ?value)) {
ebml::start_tag(ebml_w, tag_meta_item_name_value);
ebml::start_tag(ebml_w, tag_meta_item_name);
ebml_w.writer.write(str::bytes(name));
ebml::end_tag(ebml_w);
ebml::start_tag(ebml_w, tag_meta_item_value);
ebml_w.writer.write(str::bytes(value));
ebml::end_tag(ebml_w);
ebml::end_tag(ebml_w);
}
case (meta_list(?name, ?items)) {
ebml::start_tag(ebml_w, tag_meta_item_list);
ebml::start_tag(ebml_w, tag_meta_item_name);
ebml_w.writer.write(str::bytes(name));
ebml::end_tag(ebml_w);
for (@meta_item inner_item in items) {
encode_meta_item(ebml_w, *inner_item);
}
ebml::end_tag(ebml_w);
}
}
}
fn encode_attributes(&ebml::writer ebml_w, &vec[attribute] attrs) {
ebml::start_tag(ebml_w, tag_attributes);
for (attribute attr in attrs) {
ebml::start_tag(ebml_w, tag_attribute);
encode_meta_item(ebml_w, attr.node.value);
ebml::end_tag(ebml_w);
}
ebml::end_tag(ebml_w);
}
// So there's a special crate attribute called 'link' which defines the
// metadata that Rust cares about for linking crates. This attribute requires
2011-06-30 19:03:08 -05:00
// 'name' and 'vers' items, so if the user didn't provide them we will throw
// them in anyway with default values.
fn synthesize_crate_attrs(&@crate_ctxt cx,
&@crate crate) -> vec[attribute] {
fn synthesize_link_attr(&@crate_ctxt cx,
&vec[@meta_item] items)
-> attribute {
2011-06-30 19:03:08 -05:00
assert cx.link_meta.name != "";
assert cx.link_meta.vers != "";
2011-06-30 19:03:08 -05:00
auto name_item = attr::mk_name_value_item("name",
cx.link_meta.name);
auto vers_item = attr::mk_name_value_item("vers",
cx.link_meta.vers);
auto other_items = {
auto tmp = attr::remove_meta_items_by_name(items, "name");
attr::remove_meta_items_by_name(tmp, "vers")
};
2011-06-30 19:03:08 -05:00
auto meta_items = [name_item] + [vers_item] + other_items;
auto link_item = attr::mk_list_item("link", meta_items);
2011-06-30 19:03:08 -05:00
ret attr::mk_attr(link_item);
}
let vec[attribute] attrs = [];
auto found_link_attr = false;
for (attribute attr in crate.node.attrs) {
attrs += if (attr::get_attr_name(attr) != "link") {
[attr]
} else {
alt (attr.node.value.node) {
case (meta_list(?n, ?l)) {
found_link_attr = true;
[synthesize_link_attr(cx, l)]
}
case (_) { [attr] }
}
}
}
if (!found_link_attr) {
attrs += [synthesize_link_attr(cx, [])];
}
ret attrs;
}
fn encode_metadata(&@crate_ctxt cx, &@crate crate) -> str {
auto string_w = io::string_writer();
auto buf_w = string_w.get_writer().get_buf_writer();
auto ebml_w = ebml::create_writer(buf_w);
auto crate_attrs = synthesize_crate_attrs(cx, crate);
encode_attributes(ebml_w, crate_attrs);
// Encode and index the paths.
ebml::start_tag(ebml_w, tag_paths);
auto paths_index = encode_item_paths(ebml_w, crate);
auto str_writer = write_str;
auto path_hasher = hash_path;
auto paths_buckets = create_index[str](paths_index, path_hasher);
encode_index[str](ebml_w, paths_buckets, str_writer);
ebml::end_tag(ebml_w);
// Encode and index the items.
ebml::start_tag(ebml_w, tag_items);
auto items_index = encode_info_for_items(cx, ebml_w);
auto int_writer = write_int;
auto item_hasher = hash_def_id;
auto items_buckets = create_index[int](items_index, item_hasher);
encode_index[int](ebml_w, items_buckets, int_writer);
ebml::end_tag(ebml_w);
// Pad this, since something (LLVM, presumably) is cutting off the
// remaining % 4 bytes.
buf_w.write([0u8, 0u8, 0u8, 0u8]);
ret string_w.get_str();
}
2011-06-27 21:16:16 -05:00
// Local Variables:
// mode: rust
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
// End: