Pass the fmt! buffer to each conversion method

Achieves a little more speedup and avoids allocations around some strings in conv_str
2013-03-20 23:33:10 -04:00 · 2013-03-20 23:33:10 -04:00 · e93654c96d
commit e93654c96d
parent c0bbc6242f
2 changed files with 268 additions and 58 deletions
--- a/src/libcore/unstable/extfmt.rs
+++ b/src/libcore/unstable/extfmt.rs
@ -470,6 +470,215 @@ pub mod ct {
 // decisions made a runtime. If it proves worthwhile then some of these
 // conditions can be evaluated at compile-time. For now though it's cleaner to
 // implement it this way, I think.
+#[cfg(stage1)]
+#[cfg(stage2)]
+#[cfg(stage3)]
+#[doc(hidden)]
+pub mod rt {
+    use float;
+    use str;
+    use sys;
+    use int;
+    use uint;
+    use vec;
+    use option::{Some, None, Option};
+
+    pub const flag_none : u32 = 0u32;
+    pub const flag_left_justify   : u32 = 0b00000000000001u32;
+    pub const flag_left_zero_pad  : u32 = 0b00000000000010u32;
+    pub const flag_space_for_sign : u32 = 0b00000000000100u32;
+    pub const flag_sign_always    : u32 = 0b00000000001000u32;
+    pub const flag_alternate      : u32 = 0b00000000010000u32;
+
+    pub enum Count { CountIs(uint), CountImplied, }
+
+    pub enum Ty { TyDefault, TyBits, TyHexUpper, TyHexLower, TyOctal, }
+
+    pub struct Conv {
+        flags: u32,
+        width: Count,
+        precision: Count,
+        ty: Ty,
+    }
+
+    pub pure fn conv_int(cv: Conv, i: int, buf: &mut ~str) {
+        let radix = 10;
+        let prec = get_int_precision(cv);
+        let mut s : ~str = uint_to_str_prec(int::abs(i) as uint, radix, prec);
+
+        let head = if i >= 0 {
+            if have_flag(cv.flags, flag_sign_always) {
+                Some('+')
+            } else if have_flag(cv.flags, flag_space_for_sign) {
+                Some(' ')
+            } else {
+                None
+            }
+        } else { Some('-') };
+        unsafe { pad(cv, s, head, PadSigned, buf) };
+    }
+    pub pure fn conv_uint(cv: Conv, u: uint, buf: &mut ~str) {
+        let prec = get_int_precision(cv);
+        let mut rs =
+            match cv.ty {
+              TyDefault => uint_to_str_prec(u, 10, prec),
+              TyHexLower => uint_to_str_prec(u, 16, prec),
+              TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
+              TyBits => uint_to_str_prec(u, 2, prec),
+              TyOctal => uint_to_str_prec(u, 8, prec)
+            };
+        unsafe { pad(cv, rs, None, PadUnsigned, buf) };
+    }
+    pub pure fn conv_bool(cv: Conv, b: bool, buf: &mut ~str) {
+        let s = if b { "true" } else { "false" };
+        // run the boolean conversion through the string conversion logic,
+        // giving it the same rules for precision, etc.
+        conv_str(cv, s, buf);
+    }
+    pub pure fn conv_char(cv: Conv, c: char, buf: &mut ~str) {
+        unsafe { pad(cv, "", Some(c), PadNozero, buf) };
+    }
+    pub pure fn conv_str(cv: Conv, s: &str, buf: &mut ~str) {
+        // For strings, precision is the maximum characters
+        // displayed
+        let mut unpadded = match cv.precision {
+          CountImplied => s,
+          CountIs(max) => if (max as uint) < str::char_len(s) {
+            str::slice(s, 0, max as uint)
+          } else {
+            s
+          }
+        };
+        unsafe { pad(cv, unpadded, None, PadNozero, buf) };
+    }
+    pub pure fn conv_float(cv: Conv, f: float, buf: &mut ~str) {
+        let (to_str, digits) = match cv.precision {
+              CountIs(c) => (float::to_str_exact, c as uint),
+              CountImplied => (float::to_str_digits, 6u)
+        };
+        let mut s = unsafe { to_str(f, digits) };
+        let head = if 0.0 <= f {
+            if have_flag(cv.flags, flag_sign_always) {
+                Some('+')
+            } else if have_flag(cv.flags, flag_space_for_sign) {
+                Some(' ')
+            } else {
+                None
+            }
+        } else { None };
+        unsafe { pad(cv, s, head, PadFloat, buf) };
+    }
+    pub pure fn conv_poly<T>(cv: Conv, v: &T, buf: &mut ~str) {
+        let s = sys::log_str(v);
+        conv_str(cv, s, buf);
+    }
+
+    // Convert a uint to string with a minimum number of digits.  If precision
+    // is 0 and num is 0 then the result is the empty string. Could move this
+    // to uint: but it doesn't seem all that useful.
+    pub pure fn uint_to_str_prec(num: uint, radix: uint,
+                                 prec: uint) -> ~str {
+        return if prec == 0u && num == 0u {
+                ~""
+            } else {
+                let s = uint::to_str_radix(num, radix);
+                let len = str::char_len(s);
+                if len < prec {
+                    let diff = prec - len;
+                    let pad = str::from_chars(vec::from_elem(diff, '0'));
+                    pad + s
+                } else { s }
+            };
+    }
+    pub pure fn get_int_precision(cv: Conv) -> uint {
+        return match cv.precision {
+              CountIs(c) => c as uint,
+              CountImplied => 1u
+            };
+    }
+
+    #[deriving(Eq)]
+    pub enum PadMode { PadSigned, PadUnsigned, PadNozero, PadFloat }
+
+    pub fn pad(cv: Conv, mut s: &str, head: Option<char>, mode: PadMode,
+               buf: &mut ~str) {
+        let headsize = match head { Some(_) => 1, _ => 0 };
+        let uwidth : uint = match cv.width {
+            CountImplied => {
+                for head.each |&c| {
+                    buf.push_char(c);
+                }
+                return buf.push_str(s);
+            }
+            CountIs(width) => { width as uint }
+        };
+        let strlen = str::char_len(s) + headsize;
+        if uwidth <= strlen {
+            for head.each |&c| {
+                buf.push_char(c);
+            }
+            return buf.push_str(s);
+        }
+        let mut padchar = ' ';
+        let diff = uwidth - strlen;
+        if have_flag(cv.flags, flag_left_justify) {
+            for head.each |&c| {
+                buf.push_char(c);
+            }
+            buf.push_str(s);
+            for diff.times {
+                buf.push_char(padchar);
+            }
+            return;
+        }
+        let (might_zero_pad, signed) = match mode {
+          PadNozero   => (false, true),
+          PadSigned   => (true, true),
+          PadFloat    => (true, true),
+          PadUnsigned => (true, false)
+        };
+        pure fn have_precision(cv: Conv) -> bool {
+            return match cv.precision { CountImplied => false, _ => true };
+        }
+        let zero_padding = {
+            if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) &&
+                (!have_precision(cv) || mode == PadFloat) {
+                padchar = '0';
+                true
+            } else {
+                false
+            }
+        };
+        let padstr = str::from_chars(vec::from_elem(diff, padchar));
+        // This is completely heinous. If we have a signed value then
+        // potentially rip apart the intermediate result and insert some
+        // zeros. It may make sense to convert zero padding to a precision
+        // instead.
+
+        if signed && zero_padding {
+            for head.each |&head| {
+                if head == '+' || head == '-' || head == ' ' {
+                    buf.push_char(head);
+                    buf.push_str(padstr);
+                    buf.push_str(s);
+                    return;
+                }
+            }
+        }
+        buf.push_str(padstr);
+        for head.each |&c| {
+            buf.push_char(c);
+        }
+        buf.push_str(s);
+    }
+    #[inline(always)]
+    pub pure fn have_flag(flags: u32, f: u32) -> bool {
+        flags & f != 0
+    }
+}
+
+// XXX: remove after a snapshot of the above changes have gone in
+#[cfg(stage0)]
 #[doc(hidden)]
 pub mod rt {
    use float;
--- a/src/libsyntax/ext/fmt.rs
+++ b/src/libsyntax/ext/fmt.rs
@ -139,19 +139,17 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
        make_conv_struct(cx, sp, rt_conv_flags, rt_conv_width,
                         rt_conv_precision, rt_conv_ty)
    }
-    fn make_conv_call(cx: @ext_ctxt, sp: span, conv_type: ~str, cnv: &Conv,
-                      arg: @ast::expr) -> @ast::expr {
+    fn make_conv_call(cx: @ext_ctxt, sp: span, conv_type: &str, cnv: &Conv,
+                      arg: @ast::expr, buf: @ast::expr) -> @ast::expr {
        let fname = ~"conv_" + conv_type;
        let path = make_path_vec(cx, @fname);
        let cnv_expr = make_rt_conv_expr(cx, sp, cnv);
-        let args = ~[cnv_expr, arg];
+        let args = ~[cnv_expr, arg, buf];
        return mk_call_global(cx, arg.span, path, args);
    }

-    fn make_new_conv(cx: @ext_ctxt, sp: span, cnv: &Conv, arg: @ast::expr) ->
-       @ast::expr {
-        // FIXME: Move validation code into core::extfmt (Issue #2249)
-
+    fn make_new_conv(cx: @ext_ctxt, sp: span, cnv: &Conv,
+                     arg: @ast::expr, buf: @ast::expr) -> @ast::expr {
        fn is_signed_type(cnv: &Conv) -> bool {
            match cnv.ty {
              TyInt(s) => match s {
@ -198,27 +196,17 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
          CountIs(_) => (),
          _ => cx.span_unimpl(sp, unsupported)
        }
-        match cnv.ty {
-          TyStr => return make_conv_call(cx, arg.span, ~"str", cnv, arg),
-          TyInt(sign) => match sign {
-            Signed => return make_conv_call(cx, arg.span, ~"int", cnv, arg),
-            Unsigned => {
-                return make_conv_call(cx, arg.span, ~"uint", cnv, arg)
-            }
-          },
-          TyBool => return make_conv_call(cx, arg.span, ~"bool", cnv, arg),
-          TyChar => return make_conv_call(cx, arg.span, ~"char", cnv, arg),
-          TyHex(_) => {
-            return make_conv_call(cx, arg.span, ~"uint", cnv, arg);
-          }
-          TyBits => return make_conv_call(cx, arg.span, ~"uint", cnv, arg),
-          TyOctal => return make_conv_call(cx, arg.span, ~"uint", cnv, arg),
-          TyFloat => {
-            return make_conv_call(cx, arg.span, ~"float", cnv, arg);
-          }
-          TyPoly => return make_conv_call(cx, arg.span, ~"poly", cnv,
-                       mk_addr_of(cx, sp, arg))
-        }
+        let (name, actual_arg) = match cnv.ty {
+            TyStr => ("str", arg),
+            TyInt(Signed) => ("int", arg),
+            TyBool => ("bool", arg),
+            TyChar => ("char", arg),
+            TyBits | TyOctal | TyHex(_) | TyInt(Unsigned) => ("uint", arg),
+            TyFloat => ("float", arg),
+            TyPoly => ("poly", mk_addr_of(cx, sp, arg))
+        };
+        return make_conv_call(cx, arg.span, name, cnv, actual_arg,
+                              mk_mut_addr_of(cx, arg.span, buf));
    }
    fn log_conv(c: &Conv) {
        debug!("Building conversion:");
@ -270,14 +258,41 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
        }
    }

-    /* Translate each piece (portion of the fmt expression) into a ~str
-       expression to be concatenated below */
    let fmt_sp = args[0].span;
    let mut n = 0u;
    let nargs = args.len();
-    let pieces = do vec::map_consume(pieces) |pc| {
+
+    /* 'ident' is the local buffer building up the result of fmt! */
+    let ident = cx.parse_sess().interner.intern(@~"__fmtbuf");
+    let buf = || mk_path(cx, fmt_sp, ~[ident]);
+    let str_ident = cx.parse_sess().interner.intern(@~"str");
+    let push_ident = cx.parse_sess().interner.intern(@~"push_str");
+    let mut stms = ~[];
+
+    /* Translate each piece (portion of the fmt expression) by invoking the
+       corresponding function in core::unstable::extfmt. Each function takes a
+       buffer to insert data into along with the data being formatted. */
+    do vec::consume(pieces) |i, pc| {
        match pc {
-            PieceString(s) => mk_uniq_str(cx, fmt_sp, s),
+            /* Raw strings get appended via str::push_str */
+            PieceString(s) => {
+                let portion = mk_uniq_str(cx, fmt_sp, s);
+
+                /* If this is the first portion, then initialize the local
+                   buffer with it directly */
+                if i == 0 {
+                    stms.push(mk_local(cx, fmt_sp, true, ident, portion));
+                } else {
+                    let args = ~[mk_mut_addr_of(cx, fmt_sp, buf()), portion];
+                    let call = mk_call_global(cx,
+                                              fmt_sp,
+                                              ~[str_ident, push_ident],
+                                              args);
+                    stms.push(mk_stmt(cx, fmt_sp, call));
+                }
+            }
+
+            /* Invoke the correct conv function in extfmt */
            PieceConv(ref conv) => {
                n += 1u;
                if n >= nargs {
@ -285,11 +300,21 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
                                  ~"not enough arguments to fmt! " +
                                  ~"for the given format string");
                }
+
                log_conv(conv);
-                make_new_conv(cx, fmt_sp, conv, args[n])
+                /* If the first portion is a conversion, then the local buffer
+                   must be initialized as an empty string */
+                if i == 0 {
+                    stms.push(mk_local(cx, fmt_sp, true, ident,
+                                       mk_uniq_str(cx, fmt_sp, ~"")));
+                }
+                stms.push(mk_stmt(cx, fmt_sp,
+                                  make_new_conv(cx, fmt_sp, conv,
+                                                args[n], buf())));
            }
        }
-    };
+    }
+
    let expected_nargs = n + 1u; // n conversions + the fmt string
    if expected_nargs < nargs {
        cx.span_fatal
@ -297,30 +322,6 @@ fn pieces_to_expr(cx: @ext_ctxt, sp: span,
                           nargs, expected_nargs));
    }

-    /* Concatenate all of the strings together with str::push_str. This
-       involves storing the first piece into a local variable, and then
-       pushing each other piece onto the local. The local is contained in its
-       own block to not conflict with other names as much as possible */
-    let ident = cx.parse_sess().interner.intern(@~"__fmtbuf");
-    let buf = || mk_path(cx, fmt_sp, ~[ident]);
-    let str_ident = cx.parse_sess().interner.intern(@~"str");
-    let push_ident = cx.parse_sess().interner.intern(@~"push_str");
-
-    let mut first = true;
-    let stms = do vec::map_consume(pieces) |pc| {
-        if first {
-            first = false;
-            mk_local(cx, fmt_sp, true, ident, pc)
-        } else {
-            let call = mk_call_global(cx,
-                                      fmt_sp,
-                                      ~[str_ident, push_ident],
-                                      ~[mk_mut_addr_of(cx, fmt_sp, buf()),
-                                        pc]);
-            mk_stmt(cx, fmt_sp, call)
-        }
-    };
-
    return mk_block(cx, fmt_sp, ~[], stms, Some(buf()));
 }
 //