Code sketch and comment notes on textual crate signatures.

This commit is contained in:
Graydon Hoare 2010-11-03 09:50:25 -07:00
parent 7abf6eb7c6
commit 1e22a55ff2
4 changed files with 244 additions and 0 deletions

View File

@ -1,4 +1,5 @@
open Common;;
open Fmt;;
let log (sess:Session.sess) =
Session.log "lib"
@ -12,6 +13,224 @@ let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit =
else ()
;;
(*
* Stuff associated with 'crate interfaces'.
*
* The interface of a crate used to be defined by the accompanying DWARF
* structure in the object file. This was an experiment -- we talked to
* DWARF hackers before hand and they thought it worth trying -- which did
* work, and had the advantage of economy of metadata-emitting, but several
* downsides:
*
* - The reader -- which we want a copy of at runtime in the linker -- has
* to know how to read DWARF. It's not the simplest format.
*
* - The complexity of the encoding meant we didn't always keep pace with
* the AST, and maintaining any degree of inter-change compatibility was
* going ot be a serious challenge.
*
* - Diagnostic tools are atrocious, as is the definition of
* well-formedness. It's largely trial and error when talking to gdb,
* say.
*
* - Because it was doing double-duty as *driving linkage*, we were never
* going to get to the linkage efficiency of native symbols (hash
* lookup) anyway. Runtime linkage -- even when lazy -- really ought to
* be fast.
*
* - LLVM, our "main" backend (in rustc) does not really want to make
* promises about preserving dwarf.
*
* - LLVM also *is* going to emit native symbols; complete with relocs and
* such. We'd actually have to do *extra work* to inhibit that.
*
* - Most tools are set up to think of DWARF as "debug", meaning
* "optional", and may well strip it or otherwise mangle it.
*
* - Many tools want native symbols anyways, and don't know how to look at
* DWARF.
*
* - All the tooling arguments go double on win32. Pretty much only
* objdump and gdb understand DWARF-in-PE. Everything else is just blank
* stares.
*
* For all these reasons we're moving to a self-made format for describing
* our interfaces. This will be stored in the .note.rust section as we
* presently store the meta tags. The encoding is ASCII-compatible (the set
* of "numbers" to encode is small enough, especially compared to dwarf,
* that we can just use a text form) and is very easy to read with a simple
* byte-at-a-time parser.
*
*)
(*
* Encoding goals:
*
* - Simple. Minimal state or read-ambiguity in reader.
*
* - Compact. Shouldn't add a lot to the size of the binary to glue this
* on to it.
*
* - Front-end-y. Doesn't need to contain much beyond parse-level of the
* crate's exported items; it'll be fed into the front-end of the
* pipeline anyway. No need to have all types or names resolved.
*
* - Testable. Human-legible and easy to identify/fix/test errors in.
*
* - Very fast to read the 'identifying' prefix (version, meta tags, hash)
*
* - Tolerably fast to read in its entirety.
*
* - Safe from version-drift (or at least able to notice it and abort).
*
* Anti-goals:
*
* - Random access.
*
* - Generality to other languages.
*
* Structure:
*
* - Line oriented.
*
* - Whitespace-separated and whitespace-agnostic. Indent for legibility.
*
* - Each line is a record. A record is either a full item, an item bracket,
* a comment, or metadata.
*
* - First byte describes type of record, unless first byte is +, in which
* case it's oh-no-we-ran-out-of-tags and it's followed by 2 type-bytes.
* (Continue to +++ if you happen to run out *there* as well. You
* won't.)
*
* - Metadata type is !
*
* - Comment type is #
*
* - Full item types are: y for type, c for const, f for fn, i for iter,
* g for tag constructor.
*
* - Item brackets are those that open/close a scope of
* sub-records. These would be obj (o), mod (m), tag (t) to open. The
* closer is always '.'. So a mod looks like:
*
* m foo
* c bar
* .
*
* - After first byte of openers and full items is whitespace, then an
* ident.
*
* - After that, if it's a ty, fn, iter, obj or tag, there may be [, a
* list of comma-separated ty param names, and ].
*
* - After that, if it's a fn, iter, obj or tag constructor, there is a (,
* a list of comma-separated type-encoded slot/ident pairs, and a ).
*
* - After that, if it's a fn or iter, there's a '->' and a type-encoded
* output.
*
* - After that, a newline '\n'.
*
* - Type encoding is a longer issue! We'll get to that.
*)
let fmt_iface (ff:Format.formatter) (crate:Ast.crate) : unit =
let fmt_ty_param ff (p:Ast.ty_param identified) : unit =
fmt ff "%s" (fst p.node)
in
let rec fmt_ty ff (t:Ast.ty) : unit =
match t with
Ast.TY_any -> fmt ff "a"
| Ast.TY_nil -> fmt ff "n"
| Ast.TY_bool -> fmt ff "b"
| Ast.TY_mach tm -> fmt ff "%s" (string_of_ty_mach tm)
| Ast.TY_int -> fmt ff "i"
| Ast.TY_uint -> fmt ff "u"
| Ast.TY_char -> fmt ff "c"
| Ast.TY_str -> fmt ff "s"
| Ast.TY_tup ttup ->
fmt_bracketed_arr_sep "(" ")" ","
fmt_ty ff ttup
| Ast.TY_vec ty ->
fmt ff "v["; fmt_ty ff ty; fmt ff "]"
| Ast.TY_chan ty ->
fmt ff "C["; fmt_ty ff ty; fmt ff "]"
| Ast.TY_port ty ->
fmt ff "P["; fmt_ty ff ty; fmt ff "]"
| Ast.TY_task ->
fmt ff "T"
| Ast.TY_named n -> fmt ff ":"; fmt_name ff n
| Ast.TY_type -> fmt ff "Y"
| Ast.TY_box t -> fmt ff "@@"; fmt_ty ff t
| Ast.TY_mutable t -> fmt ff "~"; fmt_ty ff t
(* FIXME: finish this. *)
| Ast.TY_rec _
| Ast.TY_tag _
| Ast.TY_fn _
| Ast.TY_obj _
| Ast.TY_native _
| Ast.TY_param _
| Ast.TY_constrained _ -> fmt ff "Z"
and fmt_name ff n =
match n with
Ast.NAME_base (Ast.BASE_ident id) -> fmt ff "%s" id
| Ast.NAME_base (Ast.BASE_temp _) -> failwith "temp in fmt_name"
| Ast.NAME_base (Ast.BASE_app (id, tys)) ->
fmt ff "%s" id;
fmt_bracketed_arr_sep "[" "]" ","
fmt_ty ff tys;
| Ast.NAME_ext (n, Ast.COMP_ident id) ->
fmt_name ff n;
fmt ff ".%s" id
| Ast.NAME_ext (n, Ast.COMP_app (id, tys)) ->
fmt_name ff n;
fmt ff ".%s" id;
fmt_bracketed_arr_sep "[" "]" ","
fmt_ty ff tys;
| Ast.NAME_ext (n, Ast.COMP_idx i) ->
fmt_name ff n;
fmt ff "._%d" i
in
let rec fmt_mod_item (id:Ast.ident) (mi:Ast.mod_item) : unit =
let i c = fmt ff "@\n%c %s" c id in
let o c = fmt ff "@\n"; fmt_obox ff; fmt ff "%c %s" c id in
let p _ =
if (Array.length mi.node.Ast.decl_params) <> 0
then
fmt_bracketed_arr_sep "[" "]" ","
fmt_ty_param ff mi.node.Ast.decl_params
in
let c _ = fmt_cbox ff; fmt ff "@\n." in
match mi.node.Ast.decl_item with
Ast.MOD_ITEM_type _ -> i 'y'; p()
| Ast.MOD_ITEM_tag _ -> i 'g'; p()
| Ast.MOD_ITEM_fn _ -> i 'f'; p();
| Ast.MOD_ITEM_const _ -> i 'c'
| Ast.MOD_ITEM_obj _ ->
o 'o'; p();
c ()
| Ast.MOD_ITEM_mod (_, items) ->
o 'm';
fmt_mod_items items;
c ()
and fmt_mod_items items =
sorted_htab_iter fmt_mod_item items
in
let (_,items) = crate.node.Ast.crate_items in
fmt_mod_items items
;;
(* Mechanisms for scanning libraries. *)
(* FIXME (issue #67): move these to sess. *)
let ar_cache = Hashtbl.create 0 ;;
let sects_cache = Hashtbl.create 0;;

View File

@ -33,6 +33,7 @@ let (sess:Session.sess) =
Session.sess_log_lex = false;
Session.sess_log_parse = false;
Session.sess_log_ast = false;
Session.sess_log_sig = false;
Session.sess_log_passes = false;
Session.sess_log_resolve = false;
Session.sess_log_type = false;
@ -116,6 +117,7 @@ let dump_sig (filename:filename) : unit =
exit 0
;;
let dump_meta (filename:filename) : unit =
begin
match Lib.get_meta sess filename with
@ -168,6 +170,8 @@ let argspecs =
"-lparse" "log parsing");
(flag (fun _ -> sess.Session.sess_log_ast <- true)
"-last" "log AST");
(flag (fun _ -> sess.Session.sess_log_sig <- true)
"-lsig" "log signature");
(flag (fun _ -> sess.Session.sess_log_passes <- true)
"-lpasses" "log passes at high-level");
(flag (fun _ -> sess.Session.sess_log_resolve <- true)
@ -358,6 +362,17 @@ then
Format.set_margin 80;
Printf.fprintf stdout "%s\n" (Fmt.fmt_to_str Ast.fmt_crate crate)
end
;;
if sess.Session.sess_log_sig
then
begin
Printf.fprintf stdout "Post-parse signature:\n";
Format.set_margin 80;
Printf.fprintf stdout "%s\n" (Fmt.fmt_to_str Lib.fmt_iface crate);
end
;;
let list_to_seq ls = Asm.SEQ (Array.of_list ls);;
let select_insns (quads:Il.quads) : Asm.frag =

View File

@ -19,6 +19,7 @@ type sess =
mutable sess_log_lex: bool;
mutable sess_log_parse: bool;
mutable sess_log_ast: bool;
mutable sess_log_sig: bool;
mutable sess_log_passes: bool;
mutable sess_log_resolve: bool;
mutable sess_log_type: bool;

View File

@ -218,6 +218,15 @@ let sorted_htab_keys (tab:('a, 'b) Hashtbl.t) : 'a array =
keys
;;
let sorted_htab_iter
(f:'a -> 'b -> unit)
(tab:('a, 'b) Hashtbl.t)
: unit =
Array.iter
(fun k -> f k (Hashtbl.find tab k))
(sorted_htab_keys tab)
;;
let htab_vals (htab:('a,'b) Hashtbl.t) : ('b list) =
Hashtbl.fold (fun _ v accum -> v :: accum) htab []
;;