rust/src/libcore/extfmt.rs
2012-07-01 19:19:32 -07:00

443 lines
15 KiB
Rust

#[doc(hidden)];
/*
Syntax Extension: fmt
Format a string
The 'fmt' extension is modeled on the posix printf system.
A posix conversion ostensibly looks like this
> %~[parameter]~[flags]~[width]~[.precision]~[length]type
Given the different numeric type bestiary we have, we omit the 'length'
parameter and support slightly different conversions for 'type'
> %~[parameter]~[flags]~[width]~[.precision]type
we also only support translating-to-rust a tiny subset of the possible
combinations at the moment.
Example:
#debug("hello, %s!", "world");
*/
import option::{some, none};
/*
* We have a 'ct' (compile-time) module that parses format strings into a
* sequence of conversions. From those conversions AST fragments are built
* that call into properly-typed functions in the 'rt' (run-time) module.
* Each of those run-time conversion functions accepts another conversion
* description that specifies how to format its output.
*
* The building of the AST is currently done in a module inside the compiler,
* but should migrate over here as the plugin interface is defined.
*/
// Functions used by the fmt extension at compile time
mod ct {
enum signedness { signed, unsigned, }
enum caseness { case_upper, case_lower, }
enum ty {
ty_bool,
ty_str,
ty_char,
ty_int(signedness),
ty_bits,
ty_hex(caseness),
ty_octal,
ty_float,
ty_poly,
}
enum flag {
flag_left_justify,
flag_left_zero_pad,
flag_space_for_sign,
flag_sign_always,
flag_alternate,
}
enum count {
count_is(int),
count_is_param(int),
count_is_next_param,
count_implied,
}
// A formatted conversion from an expression to a string
type conv =
{param: option<int>,
flags: ~[flag],
width: count,
precision: count,
ty: ty};
// A fragment of the output sequence
enum piece { piece_string(str), piece_conv(conv), }
type error_fn = fn@(str) -> ! ;
fn parse_fmt_string(s: str, error: error_fn) -> ~[piece] {
let mut pieces: ~[piece] = ~[];
let lim = str::len(s);
let mut buf = "";
fn flush_buf(buf: str, &pieces: ~[piece]) -> str {
if str::len(buf) > 0u {
let piece = piece_string(buf);
vec::push(pieces, piece);
}
ret "";
}
let mut i = 0u;
while i < lim {
let size = str::utf8_char_width(s[i]);
let curr = str::slice(s, i, i+size);
if str::eq(curr, "%") {
i += 1u;
if i >= lim {
error("unterminated conversion at end of string");
}
let curr2 = str::slice(s, i, i+1u);
if str::eq(curr2, "%") {
buf += curr2;
i += 1u;
} else {
buf = flush_buf(buf, pieces);
let rs = parse_conversion(s, i, lim, error);
vec::push(pieces, rs.piece);
i = rs.next;
}
} else { buf += curr; i += size; }
}
flush_buf(buf, pieces);
ret pieces;
}
fn peek_num(s: str, i: uint, lim: uint) ->
option<{num: uint, next: uint}> {
if i >= lim { ret none; }
let c = s[i];
if !('0' as u8 <= c && c <= '9' as u8) { ret option::none; }
let n = (c - ('0' as u8)) as uint;
ret alt peek_num(s, i + 1u, lim) {
none { some({num: n, next: i + 1u}) }
some(next) {
let m = next.num;
let j = next.next;
some({num: n * 10u + m, next: j})
}
};
}
fn parse_conversion(s: str, i: uint, lim: uint, error: error_fn) ->
{piece: piece, next: uint} {
let parm = parse_parameter(s, i, lim);
let flags = parse_flags(s, parm.next, lim);
let width = parse_count(s, flags.next, lim);
let prec = parse_precision(s, width.next, lim);
let ty = parse_type(s, prec.next, lim, error);
ret {piece:
piece_conv({param: parm.param,
flags: flags.flags,
width: width.count,
precision: prec.count,
ty: ty.ty}),
next: ty.next};
}
fn parse_parameter(s: str, i: uint, lim: uint) ->
{param: option<int>, next: uint} {
if i >= lim { ret {param: none, next: i}; }
let num = peek_num(s, i, lim);
ret alt num {
none { {param: none, next: i} }
some(t) {
let n = t.num;
let j = t.next;
if j < lim && s[j] == '$' as u8 {
{param: some(n as int), next: j + 1u}
} else { {param: none, next: i} }
}
};
}
fn parse_flags(s: str, i: uint, lim: uint) ->
{flags: ~[flag], next: uint} {
let noflags: ~[flag] = ~[];
if i >= lim { ret {flags: noflags, next: i}; }
fn more_(f: flag, s: str, i: uint, lim: uint) ->
{flags: ~[flag], next: uint} {
let next = parse_flags(s, i + 1u, lim);
let rest = next.flags;
let j = next.next;
let curr: ~[flag] = ~[f];
ret {flags: vec::append(curr, rest), next: j};
}
let more = |x| more_(x, s, i, lim);
let f = s[i];
ret if f == '-' as u8 {
more(flag_left_justify)
} else if f == '0' as u8 {
more(flag_left_zero_pad)
} else if f == ' ' as u8 {
more(flag_space_for_sign)
} else if f == '+' as u8 {
more(flag_sign_always)
} else if f == '#' as u8 {
more(flag_alternate)
} else { {flags: noflags, next: i} };
}
fn parse_count(s: str, i: uint, lim: uint) -> {count: count, next: uint} {
ret if i >= lim {
{count: count_implied, next: i}
} else if s[i] == '*' as u8 {
let param = parse_parameter(s, i + 1u, lim);
let j = param.next;
alt param.param {
none { {count: count_is_next_param, next: j} }
some(n) { {count: count_is_param(n), next: j} }
}
} else {
let num = peek_num(s, i, lim);
alt num {
none { {count: count_implied, next: i} }
some(num) {
{count: count_is(num.num as int), next: num.next}
}
}
};
}
fn parse_precision(s: str, i: uint, lim: uint) ->
{count: count, next: uint} {
ret if i >= lim {
{count: count_implied, next: i}
} else if s[i] == '.' as u8 {
let count = parse_count(s, i + 1u, lim);
// If there were no digits specified, i.e. the precision
// was ".", then the precision is 0
alt count.count {
count_implied { {count: count_is(0), next: count.next} }
_ { count }
}
} else { {count: count_implied, next: i} };
}
fn parse_type(s: str, i: uint, lim: uint, error: error_fn) ->
{ty: ty, next: uint} {
if i >= lim { error("missing type in conversion"); }
let tstr = str::slice(s, i, i+1u);
// TODO: Do we really want two signed types here?
// How important is it to be printf compatible?
let t =
if str::eq(tstr, "b") {
ty_bool
} else if str::eq(tstr, "s") {
ty_str
} else if str::eq(tstr, "c") {
ty_char
} else if str::eq(tstr, "d") || str::eq(tstr, "i") {
ty_int(signed)
} else if str::eq(tstr, "u") {
ty_int(unsigned)
} else if str::eq(tstr, "x") {
ty_hex(case_lower)
} else if str::eq(tstr, "X") {
ty_hex(case_upper)
} else if str::eq(tstr, "t") {
ty_bits
} else if str::eq(tstr, "o") {
ty_octal
} else if str::eq(tstr, "f") {
ty_float
} else if str::eq(tstr, "?") {
ty_poly
} else { error("unknown type in conversion: " + tstr) };
ret {ty: t, next: i + 1u};
}
}
// Functions used by the fmt extension at runtime. For now there are a lot of
// decisions made a runtime. If it proves worthwhile then some of these
// conditions can be evaluated at compile-time. For now though it's cleaner to
// implement it 0this way, I think.
mod rt {
const flag_none : u32 = 0u32;
const flag_left_justify : u32 = 0b00000000000000000000000000000001u32;
const flag_left_zero_pad : u32 = 0b00000000000000000000000000000010u32;
const flag_space_for_sign : u32 = 0b00000000000000000000000000000100u32;
const flag_sign_always : u32 = 0b00000000000000000000000000001000u32;
const flag_alternate : u32 = 0b00000000000000000000000000010000u32;
enum count { count_is(int), count_implied, }
enum ty { ty_default, ty_bits, ty_hex_upper, ty_hex_lower, ty_octal, }
type conv = {flags: u32, width: count, precision: count, ty: ty};
fn conv_int(cv: conv, i: int) -> str {
let radix = 10u;
let prec = get_int_precision(cv);
let mut s : str = int_to_str_prec(i, radix, prec);
if 0 <= i {
if have_flag(cv.flags, flag_sign_always) {
str::unshift_char(s, '+');
} else if have_flag(cv.flags, flag_space_for_sign) {
str::unshift_char(s, ' ');
}
}
ret pad(cv, s, pad_signed);
}
fn conv_uint(cv: conv, u: uint) -> str {
let prec = get_int_precision(cv);
let mut rs =
alt cv.ty {
ty_default { uint_to_str_prec(u, 10u, prec) }
ty_hex_lower { uint_to_str_prec(u, 16u, prec) }
ty_hex_upper { str::to_upper(uint_to_str_prec(u, 16u, prec)) }
ty_bits { uint_to_str_prec(u, 2u, prec) }
ty_octal { uint_to_str_prec(u, 8u, prec) }
};
ret pad(cv, rs, pad_unsigned);
}
fn conv_bool(cv: conv, b: bool) -> str {
let s = if b { "true" } else { "false" };
// run the boolean conversion through the string conversion logic,
// giving it the same rules for precision, etc.
ret conv_str(cv, s);
}
fn conv_char(cv: conv, c: char) -> str {
let mut s = str::from_char(c);
ret pad(cv, s, pad_nozero);
}
fn conv_str(cv: conv, s: str) -> str {
// For strings, precision is the maximum characters
// displayed
let mut unpadded = alt cv.precision {
count_implied { s }
count_is(max) {
if max as uint < str::char_len(s) {
str::substr(s, 0u, max as uint)
} else { s }
}
};
ret pad(cv, unpadded, pad_nozero);
}
fn conv_float(cv: conv, f: float) -> str {
let (to_str, digits) = alt cv.precision {
count_is(c) { (float::to_str_exact, c as uint) }
count_implied { (float::to_str, 6u) }
};
let mut s = to_str(f, digits);
if 0.0 <= f {
if have_flag(cv.flags, flag_sign_always) {
s = "+" + s;
} else if have_flag(cv.flags, flag_space_for_sign) {
s = " " + s;
}
}
ret pad(cv, s, pad_float);
}
fn conv_poly<T>(cv: conv, v: T) -> str {
let s = sys::log_str(v);
ret conv_str(cv, s);
}
// Convert an int to string with minimum number of digits. If precision is
// 0 and num is 0 then the result is the empty string.
fn int_to_str_prec(num: int, radix: uint, prec: uint) -> str {
ret if num < 0 {
"-" + uint_to_str_prec(-num as uint, radix, prec)
} else { uint_to_str_prec(num as uint, radix, prec) };
}
// Convert a uint to string with a minimum number of digits. If precision
// is 0 and num is 0 then the result is the empty string. Could move this
// to uint: but it doesn't seem all that useful.
fn uint_to_str_prec(num: uint, radix: uint, prec: uint) -> str {
ret if prec == 0u && num == 0u {
""
} else {
let s = uint::to_str(num, radix);
let len = str::char_len(s);
if len < prec {
let diff = prec - len;
let pad = str::from_chars(vec::from_elem(diff, '0'));
pad + s
} else { s }
};
}
fn get_int_precision(cv: conv) -> uint {
ret alt cv.precision {
count_is(c) { c as uint }
count_implied { 1u }
};
}
enum pad_mode { pad_signed, pad_unsigned, pad_nozero, pad_float }
fn pad(cv: conv, &s: str, mode: pad_mode) -> str {
let uwidth : uint = alt cv.width {
count_implied { ret s; }
count_is(width) {
// FIXME: width should probably be uint (see Issue #1996)
width as uint
}
};
let strlen = str::char_len(s);
if uwidth <= strlen { ret s; }
let mut padchar = ' ';
let diff = uwidth - strlen;
if have_flag(cv.flags, flag_left_justify) {
let padstr = str::from_chars(vec::from_elem(diff, padchar));
ret s + padstr;
}
let {might_zero_pad, signed} = alt mode {
pad_nozero { {might_zero_pad:false, signed:false} }
pad_signed { {might_zero_pad:true, signed:true } }
pad_float { {might_zero_pad:true, signed:true } }
pad_unsigned { {might_zero_pad:true, signed:false} }
};
fn have_precision(cv: conv) -> bool {
ret alt cv.precision { count_implied { false } _ { true } };
}
let zero_padding = {
if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) &&
(!have_precision(cv) || mode == pad_float) {
padchar = '0';
true
} else {
false
}
};
let padstr = str::from_chars(vec::from_elem(diff, padchar));
// This is completely heinous. If we have a signed value then
// potentially rip apart the intermediate result and insert some
// zeros. It may make sense to convert zero padding to a precision
// instead.
if signed && zero_padding && str::len(s) > 0u {
let head = str::shift_char(s);
if head == '+' || head == '-' || head == ' ' {
let headstr = str::from_chars(vec::from_elem(1u, head));
ret headstr + padstr + s;
}
else {
str::unshift_char(s, head);
}
}
ret padstr + s;
}
fn have_flag(flags: u32, f: u32) -> bool {
flags & f != 0
}
}
// Local Variables:
// mode: rust;
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: