From d07f7533b0f336ff27ca4ba90aec0e0204ca7b92 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Fri, 1 Oct 2010 14:54:40 -0700 Subject: [PATCH] Sketch out #fmt syntax extension in rustboot. --- src/Makefile | 5 +- src/boot/fe/extfmt.ml | 229 ++++++++++++++++++ src/boot/fe/pexp.ml | 220 +++++++++++++---- src/test/run-pass/syntax-extension-fmt.rs | 5 + ...extension.rs => syntax-extension-shell.rs} | 0 5 files changed, 415 insertions(+), 44 deletions(-) create mode 100644 src/boot/fe/extfmt.ml create mode 100644 src/test/run-pass/syntax-extension-fmt.rs rename src/test/run-pass/{syntax-extension.rs => syntax-extension-shell.rs} (100%) diff --git a/src/Makefile b/src/Makefile index 9ca5120d5f7..a0fe6cc3f1d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -235,8 +235,8 @@ IL_MLS := $(addprefix boot/be/, asm.ml il.ml abi.ml) ME_MLS := $(addprefix boot/me/, walk.ml semant.ml resolve.ml alias.ml \ simplify.ml type.ml dead.ml effect.ml typestate.ml loop.ml \ layout.ml transutil.ml trans.ml dwarf.ml) -FE_MLS := $(addprefix boot/fe/, ast.ml token.ml lexer.ml parser.ml pexp.ml \ - item.ml cexp.ml fuzz.ml) +FE_MLS := $(addprefix boot/fe/, ast.ml token.ml lexer.ml parser.ml \ + extfmt.ml pexp.ml item.ml cexp.ml fuzz.ml) DRIVER_TOP_MLS := $(addprefix boot/driver/, lib.ml $(VARIANT)/glue.ml main.ml) BOOT_MLS := $(UTIL_BOT_MLS) $(DRIVER_BOT_MLS) $(FE_MLS) $(IL_MLS) $(ME_MLS) \ @@ -540,6 +540,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \ str-concat.rs \ str-idx.rs \ structured-compare.rs \ + syntax-extension-fmt.rs \ tag.rs \ tail-call-arg-leak.rs \ tail-cps.rs \ diff --git a/src/boot/fe/extfmt.ml b/src/boot/fe/extfmt.ml new file mode 100644 index 00000000000..8b0b149abc3 --- /dev/null +++ b/src/boot/fe/extfmt.ml @@ -0,0 +1,229 @@ +(* The 'fmt' extension is modeled on the posix printf system. + * + * A posix conversion ostensibly looks like this: + * + * %[parameter][flags][width][.precision][length]type + * + * Given the different numeric type bestiary we have, we omit the 'length' + * parameter and support slightly different conversions for 'type': + * + * %[parameter][flags][width][.precision]type + * + * we also only support translating-to-rust a tiny subset of the possible + * combinations at the moment. + *) + +exception Malformed of string +;; + +type case = + CASE_upper + | CASE_lower +;; + +type signedness = + SIGNED + | UNSIGNED +;; + +type ty = + TY_bool + | TY_str + | TY_char + | TY_int of signedness + | TY_bits + | TY_hex of case + (* FIXME: Support more later. *) +;; + +type flag = + FLAG_left_justify + | FLAG_left_zero_pad + | FLAG_left_space_pad + | FLAG_plus_if_positive + | FLAG_alternate +;; + +type count = + COUNT_is of int + | COUNT_is_param of int + | COUNT_is_next_param + | COUNT_implied + +type conv = + { conv_parameter: int option; + conv_flags: flag list; + conv_width: count; + conv_precision: count; + conv_ty: ty } + +type piece = + PIECE_string of string + | PIECE_conversion of conv + + +let rec peek_num (s:string) (i:int) (lim:int) + : (int * int) option = + if i >= lim + then None + else + let c = s.[i] in + if '0' <= c && c <= '9' + then + let n = (Char.code c) - (Char.code '0') in + match peek_num s (i+1) lim with + None -> Some (n, i+1) + | Some (m, i) -> Some (n * 10 + m, i) + else None +;; + +let parse_parameter (s:string) (i:int) (lim:int) + : (int option * int) = + if i >= lim + then (None, i) + else + match peek_num s i lim with + None -> (None, i) + | Some (n, j) -> + if j < (String.length s) && s.[j] = '$' + then (Some n, j+1) + else (None, i) +;; + +let rec parse_flags (s:string) (i:int) (lim:int) + : (flag list * int) = + if i >= lim + then ([], i) + else + let cont flag = + let (rest, j) = parse_flags s (i+1) lim in + (flag :: rest, j) + in + match s.[i] with + '-' -> cont FLAG_left_justify + | '0' -> cont FLAG_left_zero_pad + | ' ' -> cont FLAG_left_space_pad + | '+' -> cont FLAG_plus_if_positive + | '#' -> cont FLAG_alternate + | _ -> ([], i) +;; + +let parse_count (s:string) (i:int) (lim:int) + : (count * int) = + if i >= lim + then (COUNT_implied, i) + else + if s.[i] = '*' + then + begin + match parse_parameter s (i+1) lim with + (None, j) -> (COUNT_is_next_param, j) + | (Some n, j) -> (COUNT_is_param n, j) + end + else + begin + match peek_num s i lim with + None -> (COUNT_implied, i) + | Some (n, j) -> (COUNT_is n, j) + end +;; + +let parse_precision (s:string) (i:int) (lim:int) + : (count * int) = + if i >= lim + then (COUNT_implied, i) + else + if s.[i] = '.' + then parse_count s (i+1) lim + else (COUNT_implied, i) +;; + +let parse_type (s:string) (i:int) (lim:int) + : (ty * int) = + if i >= lim + then raise (Malformed "missing type in conversion") + else + let t = + match s.[i] with + 'b' -> TY_bool + | 's' -> TY_str + | 'c' -> TY_char + | 'd' | 'i' -> TY_int SIGNED + | 'u' -> TY_int UNSIGNED + | 'x' -> TY_hex CASE_lower + | 'X' -> TY_hex CASE_upper + | 't' -> TY_bits + | _ -> raise (Malformed "unknown type in conversion") + in + (t, i+1) +;; + +let parse_conversion (s:string) (i:int) (lim:int) + : (piece * int) = + let (parameter, i) = parse_parameter s i lim in + let (flags, i) = parse_flags s i lim in + let (width, i) = parse_count s i lim in + let (precision, i) = parse_precision s i lim in + let (ty, i) = parse_type s i lim in + (PIECE_conversion { conv_parameter = parameter; + conv_flags = flags; + conv_width = width; + conv_precision = precision; + conv_ty = ty }, i) +;; + +let parse_fmt_string (s:string) : piece array = + let pieces = Queue.create () in + let i = ref 0 in + let lim = String.length s in + let buf = Buffer.create 10 in + let flush_buf _ = + if (Buffer.length buf) <> 0 + then + let piece = + PIECE_string (Buffer.contents buf) + in + Queue.add piece pieces; + Buffer.clear buf; + in + while (!i) < lim + do + if s.[!i] = '%' + then + begin + incr i; + if (!i) >= lim + then raise (Malformed "unterminated conversion at end of string"); + if s.[!i] = '%' + then + begin + Buffer.add_char buf '%'; + incr i; + end + else + begin + flush_buf(); + let (piece, j) = parse_conversion s (!i) lim in + Queue.add piece pieces; + i := j + end + end + else + begin + Buffer.add_char buf s.[!i]; + incr i; + end + done; + flush_buf (); + Common.queue_to_arr pieces +;; + + +(* + * Local Variables: + * fill-column: 78; + * indent-tabs-mode: nil + * buffer-file-coding-system: utf-8-unix + * compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; + * End: + *) diff --git a/src/boot/fe/pexp.ml b/src/boot/fe/pexp.ml index 1ecc530140c..85eb32c46df 100644 --- a/src/boot/fe/pexp.ml +++ b/src/boot/fe/pexp.ml @@ -903,44 +903,6 @@ and parse_mutable_and_pexp_list (ps:pstate) ;; -(* - * FIXME: This is a crude approximation of the syntax-extension system, - * for purposes of prototyping and/or hard-wiring any extensions we - * wish to use in the bootstrap compiler. The eventual aim is to permit - * loading rust crates to process extensions, but this will likely - * require a rust-based frontend, or an ocaml-FFI-based connection to - * rust crates. At the moment we have neither. - *) - -let expand_pexp_custom - (ps:pstate) - (dst_lval:Ast.lval) - (name:Ast.name) - (args:Ast.atom array) - (body:string option) - (spanner:'a -> 'a identified) - : (Ast.stmt array) = - let nstr = Fmt.fmt_to_str Ast.fmt_name name in - match (nstr, (Array.length args), body) with - - ("shell", 0, Some cmd) -> - let c = Unix.open_process_in cmd in - let b = Buffer.create 32 in - let rec r _ = - try - Buffer.add_char b (input_char c); - r () - with - End_of_file -> - ignore (Unix.close_process_in c); - Buffer.contents b - in - [| spanner (Ast.STMT_new_str (dst_lval, r())) |] - - | _ -> - raise (err ("unknown syntax extension: " ^ nstr) ps) -;; - (* * Desugarings depend on context: * @@ -1253,11 +1215,185 @@ and desugar_expr_init aa arg_stmts [| stmt |] | Ast.PEXP_custom (n, a, b) -> - let (arg_stmts, args) = desugar_expr_atoms ps a in - let stmts = - expand_pexp_custom ps dst_lval n args b ss + expand_pexp_custom ps apos bpos dst_lval n a b + +(* + * FIXME: This is a crude approximation of the syntax-extension system, + * for purposes of prototyping and/or hard-wiring any extensions we + * wish to use in the bootstrap compiler. The eventual aim is to permit + * loading rust crates to process extensions, but this will likely + * require a rust-based frontend, or an ocaml-FFI-based connection to + * rust crates. At the moment we have neither. + *) + +and expand_pexp_custom + (ps:pstate) + (apos:pos) + (bpos:pos) + (dst_lval:Ast.lval) + (name:Ast.name) + (pexp_args:Ast.pexp array) + (body:string option) + : (Ast.stmt array) = + let nstr = Fmt.fmt_to_str Ast.fmt_name name in + match (nstr, (Array.length pexp_args), body) with + + ("shell", 0, Some cmd) -> + let c = Unix.open_process_in cmd in + let b = Buffer.create 32 in + let rec r _ = + try + Buffer.add_char b (input_char c); + r () + with + End_of_file -> + ignore (Unix.close_process_in c); + Buffer.contents b in - aa arg_stmts stmts + [| span ps apos bpos + (Ast.STMT_new_str (dst_lval, r())) |] + + | ("fmt", nargs, None) -> + if nargs = 0 + then raise (err "malformed #fmt call" ps) + else + begin + match pexp_args.(0).node with + Ast.PEXP_str s -> + let (arg_stmts, args) = + desugar_expr_atoms ps + (Array.sub pexp_args 1 (nargs-1)) + in + + let pieces = Extfmt.parse_fmt_string s in + let fmt_stmts = + fmt_pieces_to_stmts + ps apos bpos dst_lval pieces args + in + Array.append arg_stmts fmt_stmts + | _ -> + raise (err "malformed #fmt call" ps) + end + + | _ -> + raise (err ("unknown syntax extension: " ^ nstr) ps) + +and fmt_pieces_to_stmts + (ps:pstate) + (apos:pos) + (bpos:pos) + (dst_lval:Ast.lval) + (pieces:Extfmt.piece array) + (args:Ast.atom array) + : (Ast.stmt array) = + + let stmts = Queue.create () in + + let make_new_tmp _ = + let (_, tmp, decl_stmt) = build_tmp ps slot_auto apos bpos in + Queue.add decl_stmt stmts; + tmp + in + + let make_new_str s = + let tmp = make_new_tmp () in + let init_stmt = + span ps apos bpos (Ast.STMT_new_str (clone_lval ps tmp, s)) + in + Queue.add init_stmt stmts; + tmp + in + + let make_append dst_lval src_atom = + let stmt = + span ps apos bpos + (Ast.STMT_copy_binop + ((clone_lval ps dst_lval), Ast.BINOP_add, src_atom)) + in + Queue.add stmt stmts + in + + let make_append_lval dst_lval src_lval = + make_append dst_lval (Ast.ATOM_lval (clone_lval ps src_lval)) + in + + let rec make_lval' path = + match path with + [n] -> + Ast.LVAL_base (span ps apos bpos (Ast.BASE_ident n)) + + | x :: xs -> + Ast.LVAL_ext (make_lval' xs, + Ast.COMP_named (Ast.COMP_ident x)) + + | [] -> (bug () "make_lval on empty list in #fmt") + in + + let make_lval path = make_lval' (List.rev path) in + + let make_call dst path args = + let callee = make_lval path in + let stmt = + span ps apos bpos (Ast.STMT_call (dst, callee, args )) + in + Queue.add stmt stmts + in + + let ulit i = + Ast.ATOM_literal (span ps apos bpos (Ast.LIT_uint (Int64.of_int i))) + in + + let n = ref 0 in + let tmp_lval = make_new_str "" in + let final_stmt = + span ps apos bpos + (Ast.STMT_copy + (clone_lval ps dst_lval, + Ast.EXPR_atom (Ast.ATOM_lval tmp_lval))) + in + Array.iter + begin + fun piece -> + match piece with + Extfmt.PIECE_string s -> + let s_lval = make_new_str s in + make_append_lval tmp_lval s_lval + + | Extfmt.PIECE_conversion conv -> + if not + ((conv.Extfmt.conv_parameter = None) && + (conv.Extfmt.conv_flags = []) && + (conv.Extfmt.conv_width = Extfmt.COUNT_implied) && + (conv.Extfmt.conv_precision = Extfmt.COUNT_implied)) + then + raise (err "conversion not supported in #fmt string" ps); + if !n >= Array.length args + then raise (err "too many conversions in #fmt string" ps); + let arg = args.(!n) in + incr n; + match conv.Extfmt.conv_ty with + Extfmt.TY_str -> + make_append tmp_lval arg + + | Extfmt.TY_int Extfmt.SIGNED -> + let t = make_new_tmp () in + make_call t + ["std"; "_int"; "to_str" ] [| arg; ulit 10 |]; + + make_append_lval tmp_lval t + + | Extfmt.TY_int Extfmt.UNSIGNED -> + let t = make_new_tmp () in + make_call t + ["std"; "_uint"; "to_str" ] [| arg; ulit 10 |]; + make_append_lval tmp_lval t + + | _ -> + raise (err "conversion not supported in #fmt" ps); + end + pieces; + Queue.add final_stmt stmts; + queue_to_arr stmts; and atom_lval (_:pstate) (at:Ast.atom) : Ast.lval = diff --git a/src/test/run-pass/syntax-extension-fmt.rs b/src/test/run-pass/syntax-extension-fmt.rs new file mode 100644 index 00000000000..65e7647ee8b --- /dev/null +++ b/src/test/run-pass/syntax-extension-fmt.rs @@ -0,0 +1,5 @@ +use std; +fn main() { + auto s = #fmt("hello %d friends and %s things", 10, "formatted"); + log s; +} diff --git a/src/test/run-pass/syntax-extension.rs b/src/test/run-pass/syntax-extension-shell.rs similarity index 100% rename from src/test/run-pass/syntax-extension.rs rename to src/test/run-pass/syntax-extension-shell.rs