rust/src/libcore/extfmt.rs
Marvin Löbel a612e49a21 Converted the floating point types to the new string conversion functions.
Also fixed all conflicting calls of the old functions in the rest of the codebase.

The set of string conversion functions for each float type now consists of those items:
- to_str(), converts to number in base 10
- to_str_hex(), converts to number in base 16
- to_str_radix(), converts to number in given radix
- to_str_exact(), converts to number in base 10 with a exact number of trailing digits
- to_str_digits(), converts to number in base 10 with a maximum number of trailing digits
- implementations for to_str::ToStr and num::ToStrRadix
- from_str(), parses a string as number in base 10 including decimal exponent and special values
- from_str_hex(), parses a string as a number in base 16 including binary exponent and special values
- from_str_radix(), parses a string as a number in a given base excluding any exponent and special values
- implementations for from_str::FromStr and num::FromStrRadix
2013-02-03 15:37:24 -08:00

690 lines
21 KiB
Rust

// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Support for fmt! expressions.
//!
//! The syntax is close to that of Posix format strings:
//!
//! ~~~~~~
//! Format := '%' Parameter? Flag* Width? Precision? Type
//! Parameter := [0-9]+ '$'
//! Flag := [ 0#+-]
//! Width := Parameter | [0-9]+
//! Precision := '.' [0-9]+
//! Type := [bcdfiostuxX?]
//! ~~~~~~
//!
//! * Parameter is the 1-based argument to apply the format to. Currently not
//! implemented.
//! * Flag 0 causes leading zeros to be used for padding when converting
//! numbers.
//! * Flag # causes the conversion to be done in an *alternative* manner.
//! Currently not implemented.
//! * Flag + causes signed numbers to always be prepended with a sign
//! character.
//! * Flag - left justifies the result
//! * Width specifies the minimum field width of the result. By default
//! leading spaces are added.
//! * Precision specifies the minimum number of digits for integral types
//! and the minimum number
//! of decimal places for float.
//!
//! The types currently supported are:
//!
//! * b - bool
//! * c - char
//! * d - int
//! * f - float
//! * i - int (same as d)
//! * o - uint as octal
//! * t - uint as binary
//! * u - uint
//! * x - uint as lower-case hexadecimal
//! * X - uint as upper-case hexadecimal
//! * s - str (any flavor)
//! * ? - arbitrary type (does not use the to_str trait)
// NB: transitionary, de-mode-ing.
#[forbid(deprecated_mode)];
#[forbid(deprecated_pattern)];
// Transitional
#[allow(structural_records)]; // Macros -- needs a snapshot
/*
Syntax Extension: fmt
Format a string
The 'fmt' extension is modeled on the posix printf system.
A posix conversion ostensibly looks like this
> %~[parameter]~[flags]~[width]~[.precision]~[length]type
Given the different numeric type bestiary we have, we omit the 'length'
parameter and support slightly different conversions for 'type'
> %~[parameter]~[flags]~[width]~[.precision]type
we also only support translating-to-rust a tiny subset of the possible
combinations at the moment.
Example:
debug!("hello, %s!", "world");
*/
use cmp::Eq;
use option::{Some, None};
use prelude::*;
use str;
/*
* We have a 'ct' (compile-time) module that parses format strings into a
* sequence of conversions. From those conversions AST fragments are built
* that call into properly-typed functions in the 'rt' (run-time) module.
* Each of those run-time conversion functions accepts another conversion
* description that specifies how to format its output.
*
* The building of the AST is currently done in a module inside the compiler,
* but should migrate over here as the plugin interface is defined.
*/
// Functions used by the fmt extension at compile time
#[doc(hidden)]
pub mod ct {
use char;
use prelude::*;
use str;
use vec;
#[deriving_eq]
pub enum Signedness { Signed, Unsigned, }
#[deriving_eq]
pub enum Caseness { CaseUpper, CaseLower, }
#[deriving_eq]
pub enum Ty {
TyBool,
TyStr,
TyChar,
TyInt(Signedness),
TyBits,
TyHex(Caseness),
TyOctal,
TyFloat,
TyPoly,
}
#[deriving_eq]
pub enum Flag {
FlagLeftJustify,
FlagLeftZeroPad,
FlagSpaceForSign,
FlagSignAlways,
FlagAlternate,
}
#[deriving_eq]
pub enum Count {
CountIs(uint),
CountIsParam(uint),
CountIsNextParam,
CountImplied,
}
#[deriving_eq]
struct Parsed<T> {
val: T,
next: uint
}
impl<T> Parsed<T> {
static pure fn new(val: T, next: uint) -> Parsed<T> {
Parsed {val: val, next: next}
}
}
// A formatted conversion from an expression to a string
#[deriving_eq]
pub struct Conv {
param: Option<uint>,
flags: ~[Flag],
width: Count,
precision: Count,
ty: Ty
}
// A fragment of the output sequence
#[deriving_eq]
pub enum Piece { PieceString(~str), PieceConv(Conv), }
pub type ErrorFn = @fn(&str) -> !;
pub fn parse_fmt_string(s: &str, err: ErrorFn) -> ~[Piece] {
fn push_slice(ps: &mut ~[Piece], s: &str, from: uint, to: uint) {
if to > from {
ps.push(PieceString(s.slice(from, to)));
}
}
let lim = s.len();
let mut h = 0;
let mut i = 0;
let mut pieces = ~[];
while i < lim {
if s[i] == '%' as u8 {
i += 1;
if i >= lim {
err(~"unterminated conversion at end of string");
} else if s[i] == '%' as u8 {
push_slice(&mut pieces, s, h, i);
i += 1;
} else {
push_slice(&mut pieces, s, h, i - 1);
let Parsed {val, next} = parse_conversion(s, i, lim, err);
pieces.push(val);
i = next;
}
h = i;
} else {
i += str::utf8_char_width(s[i]);
}
}
push_slice(&mut pieces, s, h, i);
pieces
}
pub fn peek_num(s: &str, i: uint, lim: uint) -> Option<Parsed<uint>> {
let mut i = i;
let mut accum = 0;
let mut found = false;
while i < lim {
match char::to_digit(s[i] as char, 10) {
Some(x) => {
found = true;
accum *= 10;
accum += x;
i += 1;
}
None => break
}
}
if found {
Some(Parsed::new(accum, i))
} else {
None
}
}
pub fn parse_conversion(s: &str, i: uint, lim: uint, err: ErrorFn) ->
Parsed<Piece> {
let param = parse_parameter(s, i, lim);
// avoid copying ~[Flag] by destructuring
let Parsed {val: flags_val, next: flags_next} = parse_flags(s,
param.next, lim);
let width = parse_count(s, flags_next, lim);
let prec = parse_precision(s, width.next, lim);
let ty = parse_type(s, prec.next, lim, err);
Parsed::new(PieceConv(Conv {
param: param.val,
flags: flags_val,
width: width.val,
precision: prec.val,
ty: ty.val}), ty.next)
}
pub fn parse_parameter(s: &str, i: uint, lim: uint) ->
Parsed<Option<uint>> {
if i >= lim { return Parsed::new(None, i); }
match peek_num(s, i, lim) {
Some(num) if num.next < lim && s[num.next] == '$' as u8 =>
Parsed::new(Some(num.val), num.next + 1),
_ => Parsed::new(None, i)
}
}
pub fn parse_flags(s: &str, i: uint, lim: uint) -> Parsed<~[Flag]> {
let mut i = i;
let mut flags = ~[];
while i < lim {
let f = match s[i] {
'-' as u8 => FlagLeftJustify,
'0' as u8 => FlagLeftZeroPad,
' ' as u8 => FlagSpaceForSign,
'+' as u8 => FlagSignAlways,
'#' as u8 => FlagAlternate,
_ => break
};
flags.push(f);
i += 1;
}
Parsed::new(flags, i)
}
pub fn parse_count(s: &str, i: uint, lim: uint) -> Parsed<Count> {
if i >= lim {
Parsed::new(CountImplied, i)
} else if s[i] == '*' as u8 {
let param = parse_parameter(s, i + 1, lim);
let j = param.next;
match param.val {
None => Parsed::new(CountIsNextParam, j),
Some(n) => Parsed::new(CountIsParam(n), j)
}
} else {
match peek_num(s, i, lim) {
None => Parsed::new(CountImplied, i),
Some(num) => Parsed::new(CountIs(num.val), num.next)
}
}
}
pub fn parse_precision(s: &str, i: uint, lim: uint) -> Parsed<Count> {
if i < lim && s[i] == '.' as u8 {
let count = parse_count(s, i + 1, lim);
// If there were no digits specified, i.e. the precision
// was ".", then the precision is 0
match count.val {
CountImplied => Parsed::new(CountIs(0), count.next),
_ => count
}
} else {
Parsed::new(CountImplied, i)
}
}
pub fn parse_type(s: &str, i: uint, lim: uint, err: ErrorFn) ->
Parsed<Ty> {
if i >= lim { err(~"missing type in conversion"); }
// FIXME (#2249): Do we really want two signed types here?
// How important is it to be printf compatible?
let t = match s[i] {
'b' as u8 => TyBool,
's' as u8 => TyStr,
'c' as u8 => TyChar,
'd' as u8 | 'i' as u8 => TyInt(Signed),
'u' as u8 => TyInt(Unsigned),
'x' as u8 => TyHex(CaseLower),
'X' as u8 => TyHex(CaseUpper),
't' as u8 => TyBits,
'o' as u8 => TyOctal,
'f' as u8 => TyFloat,
'?' as u8 => TyPoly,
_ => err(~"unknown type in conversion: " + s.substr(i, 1))
};
Parsed::new(t, i + 1)
}
#[cfg(test)]
fn die(s: &str) -> ! { die!(s.to_owned()) }
#[test]
fn test_parse_count() {
fn test(s: &str, count: Count, next: uint) -> bool {
parse_count(s, 0, s.len()) == Parsed::new(count, next)
}
assert test("", CountImplied, 0);
assert test("*", CountIsNextParam, 1);
assert test("*1", CountIsNextParam, 1);
assert test("*1$", CountIsParam(1), 3);
assert test("123", CountIs(123), 3);
}
#[test]
fn test_parse_flags() {
fn pack(fs: &[Flag]) -> uint {
fs.foldl(0, |&p, &f| p | (1 << f as uint))
}
fn test(s: &str, flags: &[Flag], next: uint) {
let f = parse_flags(s, 0, s.len());
assert pack(f.val) == pack(flags);
assert f.next == next;
}
test("", [], 0);
test("!#-+ 0", [], 0);
test("#-+", [FlagAlternate, FlagLeftJustify, FlagSignAlways], 3);
test(" 0", [FlagSpaceForSign, FlagLeftZeroPad], 2);
}
#[test]
fn test_parse_fmt_string() {
assert parse_fmt_string("foo %s bar", die) == ~[
PieceString(~"foo "),
PieceConv(Conv {
param: None,
flags: ~[],
width: CountImplied,
precision: CountImplied,
ty: TyStr,
}),
PieceString(~" bar")];
assert parse_fmt_string("%s", die) == ~[
PieceConv(Conv {
param: None,
flags: ~[],
width: CountImplied,
precision: CountImplied,
ty: TyStr,
})];
assert parse_fmt_string("%%%%", die) == ~[
PieceString(~"%"), PieceString(~"%")];
}
#[test]
fn test_parse_parameter() {
fn test(s: &str, param: Option<uint>, next: uint) -> bool {
parse_parameter(s, 0, s.len()) == Parsed::new(param, next)
}
assert test("", None, 0);
assert test("foo", None, 0);
assert test("123", None, 0);
assert test("123$", Some(123), 4);
}
#[test]
fn test_parse_precision() {
fn test(s: &str, count: Count, next: uint) -> bool {
parse_precision(s, 0, s.len()) == Parsed::new(count, next)
}
assert test("", CountImplied, 0);
assert test(".", CountIs(0), 1);
assert test(".*", CountIsNextParam, 2);
assert test(".*1", CountIsNextParam, 2);
assert test(".*1$", CountIsParam(1), 4);
assert test(".123", CountIs(123), 4);
}
#[test]
fn test_parse_type() {
fn test(s: &str, ty: Ty) -> bool {
parse_type(s, 0, s.len(), die) == Parsed::new(ty, 1)
}
assert test("b", TyBool);
assert test("c", TyChar);
assert test("d", TyInt(Signed));
assert test("f", TyFloat);
assert test("i", TyInt(Signed));
assert test("o", TyOctal);
assert test("s", TyStr);
assert test("t", TyBits);
assert test("x", TyHex(CaseLower));
assert test("X", TyHex(CaseUpper));
assert test("?", TyPoly);
}
#[test]
#[should_fail]
#[ignore(cfg(windows))]
fn test_parse_type_missing() {
parse_type("", 0, 0, die);
}
#[test]
#[should_fail]
#[ignore(cfg(windows))]
fn test_parse_type_unknown() {
parse_type("!", 0, 1, die);
}
#[test]
fn test_peek_num() {
let s1 = "";
assert peek_num(s1, 0, s1.len()).is_none();
let s2 = "foo";
assert peek_num(s2, 0, s2.len()).is_none();
let s3 = "123";
assert peek_num(s3, 0, s3.len()) == Some(Parsed::new(123, 3));
let s4 = "123foo";
assert peek_num(s4, 0, s4.len()) == Some(Parsed::new(123, 3));
}
}
// Functions used by the fmt extension at runtime. For now there are a lot of
// decisions made a runtime. If it proves worthwhile then some of these
// conditions can be evaluated at compile-time. For now though it's cleaner to
// implement it this way, I think.
#[doc(hidden)]
pub mod rt {
use float;
use str;
use sys;
use uint;
use vec;
pub const flag_none : u32 = 0u32;
pub const flag_left_justify : u32 = 0b00000000000001u32;
pub const flag_left_zero_pad : u32 = 0b00000000000010u32;
pub const flag_space_for_sign : u32 = 0b00000000000100u32;
pub const flag_sign_always : u32 = 0b00000000001000u32;
pub const flag_alternate : u32 = 0b00000000010000u32;
pub enum Count { CountIs(uint), CountImplied, }
pub enum Ty { TyDefault, TyBits, TyHexUpper, TyHexLower, TyOctal, }
#[cfg(stage0)]
pub type Conv = {flags: u32, width: Count, precision: Count, ty: Ty};
#[cfg(stage1)]
#[cfg(stage2)]
#[cfg(stage3)]
pub struct Conv {
flags: u32,
width: Count,
precision: Count,
ty: Ty,
}
pub pure fn conv_int(cv: Conv, i: int) -> ~str {
let radix = 10;
let prec = get_int_precision(cv);
let mut s : ~str = int_to_str_prec(i, radix, prec);
if 0 <= i {
if have_flag(cv.flags, flag_sign_always) {
unsafe { str::unshift_char(&mut s, '+') };
} else if have_flag(cv.flags, flag_space_for_sign) {
unsafe { str::unshift_char(&mut s, ' ') };
}
}
return unsafe { pad(cv, move s, PadSigned) };
}
pub pure fn conv_uint(cv: Conv, u: uint) -> ~str {
let prec = get_int_precision(cv);
let mut rs =
match cv.ty {
TyDefault => uint_to_str_prec(u, 10, prec),
TyHexLower => uint_to_str_prec(u, 16, prec),
TyHexUpper => str::to_upper(uint_to_str_prec(u, 16, prec)),
TyBits => uint_to_str_prec(u, 2, prec),
TyOctal => uint_to_str_prec(u, 8, prec)
};
return unsafe { pad(cv, move rs, PadUnsigned) };
}
pub pure fn conv_bool(cv: Conv, b: bool) -> ~str {
let s = if b { ~"true" } else { ~"false" };
// run the boolean conversion through the string conversion logic,
// giving it the same rules for precision, etc.
return conv_str(cv, s);
}
pub pure fn conv_char(cv: Conv, c: char) -> ~str {
let mut s = str::from_char(c);
return unsafe { pad(cv, move s, PadNozero) };
}
pub pure fn conv_str(cv: Conv, s: &str) -> ~str {
// For strings, precision is the maximum characters
// displayed
let mut unpadded = match cv.precision {
CountImplied => s.to_owned(),
CountIs(max) => if max as uint < str::char_len(s) {
str::substr(s, 0, max as uint)
} else {
s.to_owned()
}
};
return unsafe { pad(cv, move unpadded, PadNozero) };
}
pub pure fn conv_float(cv: Conv, f: float) -> ~str {
let (to_str, digits) = match cv.precision {
CountIs(c) => (float::to_str_exact, c as uint),
CountImplied => (float::to_str_digits, 6u)
};
let mut s = unsafe { to_str(f, digits) };
if 0.0 <= f {
if have_flag(cv.flags, flag_sign_always) {
s = ~"+" + s;
} else if have_flag(cv.flags, flag_space_for_sign) {
s = ~" " + s;
}
}
return unsafe { pad(cv, move s, PadFloat) };
}
pub pure fn conv_poly<T>(cv: Conv, v: &T) -> ~str {
let s = sys::log_str(v);
return conv_str(cv, s);
}
// Convert an int to string with minimum number of digits. If precision is
// 0 and num is 0 then the result is the empty string.
pub pure fn int_to_str_prec(num: int, radix: uint, prec: uint) -> ~str {
return if num < 0 {
~"-" + uint_to_str_prec(-num as uint, radix, prec)
} else { uint_to_str_prec(num as uint, radix, prec) };
}
// Convert a uint to string with a minimum number of digits. If precision
// is 0 and num is 0 then the result is the empty string. Could move this
// to uint: but it doesn't seem all that useful.
pub pure fn uint_to_str_prec(num: uint, radix: uint,
prec: uint) -> ~str {
return if prec == 0u && num == 0u {
~""
} else {
let s = uint::to_str_radix(num, radix);
let len = str::char_len(s);
if len < prec {
let diff = prec - len;
let pad = str::from_chars(vec::from_elem(diff, '0'));
pad + s
} else { move s }
};
}
pub pure fn get_int_precision(cv: Conv) -> uint {
return match cv.precision {
CountIs(c) => c as uint,
CountImplied => 1u
};
}
#[deriving_eq]
pub enum PadMode { PadSigned, PadUnsigned, PadNozero, PadFloat }
pub fn pad(cv: Conv, s: ~str, mode: PadMode) -> ~str {
let mut s = move s; // sadtimes
let uwidth : uint = match cv.width {
CountImplied => return (move s),
CountIs(width) => { width as uint }
};
let strlen = str::char_len(s);
if uwidth <= strlen { return (move s); }
let mut padchar = ' ';
let diff = uwidth - strlen;
if have_flag(cv.flags, flag_left_justify) {
let padstr = str::from_chars(vec::from_elem(diff, padchar));
return s + padstr;
}
let {might_zero_pad, signed} = match mode {
PadNozero => {might_zero_pad:false, signed:false},
PadSigned => {might_zero_pad:true, signed:true },
PadFloat => {might_zero_pad:true, signed:true},
PadUnsigned => {might_zero_pad:true, signed:false}
};
pure fn have_precision(cv: Conv) -> bool {
return match cv.precision { CountImplied => false, _ => true };
}
let zero_padding = {
if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) &&
(!have_precision(cv) || mode == PadFloat) {
padchar = '0';
true
} else {
false
}
};
let padstr = str::from_chars(vec::from_elem(diff, padchar));
// This is completely heinous. If we have a signed value then
// potentially rip apart the intermediate result and insert some
// zeros. It may make sense to convert zero padding to a precision
// instead.
if signed && zero_padding && s.len() > 0 {
let head = str::shift_char(&mut s);
if head == '+' || head == '-' || head == ' ' {
let headstr = str::from_chars(vec::from_elem(1u, head));
return headstr + padstr + s;
}
else {
str::unshift_char(&mut s, head);
}
}
return padstr + s;
}
pub pure fn have_flag(flags: u32, f: u32) -> bool {
flags & f != 0
}
}
// Bulk of the tests are in src/test/run-pass/syntax-extension-fmt.rs
#[cfg(test)]
mod test {
#[test]
fn fmt_slice() {
let s = "abc";
let _s = fmt!("%s", s);
}
}
// Local Variables:
// mode: rust;
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End: