From 1e22a55ff25612753665a76eb3b83628379334a0 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Wed, 3 Nov 2010 09:50:25 -0700 Subject: [PATCH] Code sketch and comment notes on textual crate signatures. --- src/boot/driver/lib.ml | 219 +++++++++++++++++++++++++++++++++++++ src/boot/driver/main.ml | 15 +++ src/boot/driver/session.ml | 1 + src/boot/util/common.ml | 9 ++ 4 files changed, 244 insertions(+) diff --git a/src/boot/driver/lib.ml b/src/boot/driver/lib.ml index 0d0e93be15e..a4769e83ed1 100644 --- a/src/boot/driver/lib.ml +++ b/src/boot/driver/lib.ml @@ -1,4 +1,5 @@ open Common;; +open Fmt;; let log (sess:Session.sess) = Session.log "lib" @@ -12,6 +13,224 @@ let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = else () ;; +(* + * Stuff associated with 'crate interfaces'. + * + * The interface of a crate used to be defined by the accompanying DWARF + * structure in the object file. This was an experiment -- we talked to + * DWARF hackers before hand and they thought it worth trying -- which did + * work, and had the advantage of economy of metadata-emitting, but several + * downsides: + * + * - The reader -- which we want a copy of at runtime in the linker -- has + * to know how to read DWARF. It's not the simplest format. + * + * - The complexity of the encoding meant we didn't always keep pace with + * the AST, and maintaining any degree of inter-change compatibility was + * going ot be a serious challenge. + * + * - Diagnostic tools are atrocious, as is the definition of + * well-formedness. It's largely trial and error when talking to gdb, + * say. + * + * - Because it was doing double-duty as *driving linkage*, we were never + * going to get to the linkage efficiency of native symbols (hash + * lookup) anyway. Runtime linkage -- even when lazy -- really ought to + * be fast. + * + * - LLVM, our "main" backend (in rustc) does not really want to make + * promises about preserving dwarf. + * + * - LLVM also *is* going to emit native symbols; complete with relocs and + * such. We'd actually have to do *extra work* to inhibit that. + * + * - Most tools are set up to think of DWARF as "debug", meaning + * "optional", and may well strip it or otherwise mangle it. + * + * - Many tools want native symbols anyways, and don't know how to look at + * DWARF. + * + * - All the tooling arguments go double on win32. Pretty much only + * objdump and gdb understand DWARF-in-PE. Everything else is just blank + * stares. + * + * For all these reasons we're moving to a self-made format for describing + * our interfaces. This will be stored in the .note.rust section as we + * presently store the meta tags. The encoding is ASCII-compatible (the set + * of "numbers" to encode is small enough, especially compared to dwarf, + * that we can just use a text form) and is very easy to read with a simple + * byte-at-a-time parser. + * + *) + +(* + * Encoding goals: + * + * - Simple. Minimal state or read-ambiguity in reader. + * + * - Compact. Shouldn't add a lot to the size of the binary to glue this + * on to it. + * + * - Front-end-y. Doesn't need to contain much beyond parse-level of the + * crate's exported items; it'll be fed into the front-end of the + * pipeline anyway. No need to have all types or names resolved. + * + * - Testable. Human-legible and easy to identify/fix/test errors in. + * + * - Very fast to read the 'identifying' prefix (version, meta tags, hash) + * + * - Tolerably fast to read in its entirety. + * + * - Safe from version-drift (or at least able to notice it and abort). + * + * Anti-goals: + * + * - Random access. + * + * - Generality to other languages. + * + * Structure: + * + * - Line oriented. + * + * - Whitespace-separated and whitespace-agnostic. Indent for legibility. + * + * - Each line is a record. A record is either a full item, an item bracket, + * a comment, or metadata. + * + * - First byte describes type of record, unless first byte is +, in which + * case it's oh-no-we-ran-out-of-tags and it's followed by 2 type-bytes. + * (Continue to +++ if you happen to run out *there* as well. You + * won't.) + * + * - Metadata type is ! + * + * - Comment type is # + * + * - Full item types are: y for type, c for const, f for fn, i for iter, + * g for tag constructor. + * + * - Item brackets are those that open/close a scope of + * sub-records. These would be obj (o), mod (m), tag (t) to open. The + * closer is always '.'. So a mod looks like: + * + * m foo + * c bar + * . + * + * - After first byte of openers and full items is whitespace, then an + * ident. + * + * - After that, if it's a ty, fn, iter, obj or tag, there may be [, a + * list of comma-separated ty param names, and ]. + * + * - After that, if it's a fn, iter, obj or tag constructor, there is a (, + * a list of comma-separated type-encoded slot/ident pairs, and a ). + * + * - After that, if it's a fn or iter, there's a '->' and a type-encoded + * output. + * + * - After that, a newline '\n'. + * + * - Type encoding is a longer issue! We'll get to that. + *) + +let fmt_iface (ff:Format.formatter) (crate:Ast.crate) : unit = + let fmt_ty_param ff (p:Ast.ty_param identified) : unit = + fmt ff "%s" (fst p.node) + in + let rec fmt_ty ff (t:Ast.ty) : unit = + match t with + Ast.TY_any -> fmt ff "a" + | Ast.TY_nil -> fmt ff "n" + | Ast.TY_bool -> fmt ff "b" + | Ast.TY_mach tm -> fmt ff "%s" (string_of_ty_mach tm) + | Ast.TY_int -> fmt ff "i" + | Ast.TY_uint -> fmt ff "u" + | Ast.TY_char -> fmt ff "c" + | Ast.TY_str -> fmt ff "s" + + | Ast.TY_tup ttup -> + fmt_bracketed_arr_sep "(" ")" "," + fmt_ty ff ttup + | Ast.TY_vec ty -> + fmt ff "v["; fmt_ty ff ty; fmt ff "]" + | Ast.TY_chan ty -> + fmt ff "C["; fmt_ty ff ty; fmt ff "]" + + | Ast.TY_port ty -> + fmt ff "P["; fmt_ty ff ty; fmt ff "]" + + | Ast.TY_task -> + fmt ff "T" + + | Ast.TY_named n -> fmt ff ":"; fmt_name ff n + | Ast.TY_type -> fmt ff "Y" + + | Ast.TY_box t -> fmt ff "@@"; fmt_ty ff t + | Ast.TY_mutable t -> fmt ff "~"; fmt_ty ff t + + (* FIXME: finish this. *) + | Ast.TY_rec _ + | Ast.TY_tag _ + | Ast.TY_fn _ + | Ast.TY_obj _ + | Ast.TY_native _ + | Ast.TY_param _ + | Ast.TY_constrained _ -> fmt ff "Z" + + and fmt_name ff n = + match n with + Ast.NAME_base (Ast.BASE_ident id) -> fmt ff "%s" id + | Ast.NAME_base (Ast.BASE_temp _) -> failwith "temp in fmt_name" + | Ast.NAME_base (Ast.BASE_app (id, tys)) -> + fmt ff "%s" id; + fmt_bracketed_arr_sep "[" "]" "," + fmt_ty ff tys; + | Ast.NAME_ext (n, Ast.COMP_ident id) -> + fmt_name ff n; + fmt ff ".%s" id + | Ast.NAME_ext (n, Ast.COMP_app (id, tys)) -> + fmt_name ff n; + fmt ff ".%s" id; + fmt_bracketed_arr_sep "[" "]" "," + fmt_ty ff tys; + | Ast.NAME_ext (n, Ast.COMP_idx i) -> + fmt_name ff n; + fmt ff "._%d" i + in + let rec fmt_mod_item (id:Ast.ident) (mi:Ast.mod_item) : unit = + let i c = fmt ff "@\n%c %s" c id in + + let o c = fmt ff "@\n"; fmt_obox ff; fmt ff "%c %s" c id in + let p _ = + if (Array.length mi.node.Ast.decl_params) <> 0 + then + fmt_bracketed_arr_sep "[" "]" "," + fmt_ty_param ff mi.node.Ast.decl_params + in + let c _ = fmt_cbox ff; fmt ff "@\n." in + match mi.node.Ast.decl_item with + Ast.MOD_ITEM_type _ -> i 'y'; p() + | Ast.MOD_ITEM_tag _ -> i 'g'; p() + | Ast.MOD_ITEM_fn _ -> i 'f'; p(); + | Ast.MOD_ITEM_const _ -> i 'c' + | Ast.MOD_ITEM_obj _ -> + o 'o'; p(); + c () + | Ast.MOD_ITEM_mod (_, items) -> + o 'm'; + fmt_mod_items items; + c () + and fmt_mod_items items = + sorted_htab_iter fmt_mod_item items + in + let (_,items) = crate.node.Ast.crate_items in + fmt_mod_items items +;; + +(* Mechanisms for scanning libraries. *) + (* FIXME (issue #67): move these to sess. *) let ar_cache = Hashtbl.create 0 ;; let sects_cache = Hashtbl.create 0;; diff --git a/src/boot/driver/main.ml b/src/boot/driver/main.ml index 30310b100f0..82057766b37 100644 --- a/src/boot/driver/main.ml +++ b/src/boot/driver/main.ml @@ -33,6 +33,7 @@ let (sess:Session.sess) = Session.sess_log_lex = false; Session.sess_log_parse = false; Session.sess_log_ast = false; + Session.sess_log_sig = false; Session.sess_log_passes = false; Session.sess_log_resolve = false; Session.sess_log_type = false; @@ -116,6 +117,7 @@ let dump_sig (filename:filename) : unit = exit 0 ;; + let dump_meta (filename:filename) : unit = begin match Lib.get_meta sess filename with @@ -168,6 +170,8 @@ let argspecs = "-lparse" "log parsing"); (flag (fun _ -> sess.Session.sess_log_ast <- true) "-last" "log AST"); + (flag (fun _ -> sess.Session.sess_log_sig <- true) + "-lsig" "log signature"); (flag (fun _ -> sess.Session.sess_log_passes <- true) "-lpasses" "log passes at high-level"); (flag (fun _ -> sess.Session.sess_log_resolve <- true) @@ -358,6 +362,17 @@ then Format.set_margin 80; Printf.fprintf stdout "%s\n" (Fmt.fmt_to_str Ast.fmt_crate crate) end +;; + +if sess.Session.sess_log_sig +then + begin + Printf.fprintf stdout "Post-parse signature:\n"; + Format.set_margin 80; + Printf.fprintf stdout "%s\n" (Fmt.fmt_to_str Lib.fmt_iface crate); + end +;; + let list_to_seq ls = Asm.SEQ (Array.of_list ls);; let select_insns (quads:Il.quads) : Asm.frag = diff --git a/src/boot/driver/session.ml b/src/boot/driver/session.ml index 49242ac6c26..d79b7d2067d 100644 --- a/src/boot/driver/session.ml +++ b/src/boot/driver/session.ml @@ -19,6 +19,7 @@ type sess = mutable sess_log_lex: bool; mutable sess_log_parse: bool; mutable sess_log_ast: bool; + mutable sess_log_sig: bool; mutable sess_log_passes: bool; mutable sess_log_resolve: bool; mutable sess_log_type: bool; diff --git a/src/boot/util/common.ml b/src/boot/util/common.ml index 838caa73c3c..7a95573cd4e 100644 --- a/src/boot/util/common.ml +++ b/src/boot/util/common.ml @@ -218,6 +218,15 @@ let sorted_htab_keys (tab:('a, 'b) Hashtbl.t) : 'a array = keys ;; +let sorted_htab_iter + (f:'a -> 'b -> unit) + (tab:('a, 'b) Hashtbl.t) + : unit = + Array.iter + (fun k -> f k (Hashtbl.find tab k)) + (sorted_htab_keys tab) +;; + let htab_vals (htab:('a,'b) Hashtbl.t) : ('b list) = Hashtbl.fold (fun _ v accum -> v :: accum) htab [] ;;