Merge pull request #126 from erickt/json-f64

Update json number parsing to switch to floats if the number gets too big
This commit is contained in:
Erick Tryzelaar 2015-08-06 10:08:31 -07:00
commit 199ed417bd
5 changed files with 242 additions and 132 deletions

View File

@ -6,7 +6,7 @@ use std::str;
use serde::de;
use serde::iter::LineColIterator;
use super::error::{Error, ErrorCode};
use super::error::{Error, ErrorCode, Result};
pub struct Deserializer<Iter: Iterator<Item=io::Result<u8>>> {
rdr: LineColIterator<Iter>,
@ -28,7 +28,7 @@ impl<Iter> Deserializer<Iter>
{
/// Creates the JSON parser from an `std::iter::Iterator`.
#[inline]
pub fn new(rdr: Iter) -> Result<Deserializer<Iter>, Error> {
pub fn new(rdr: Iter) -> Result<Deserializer<Iter>> {
let mut deserializer = Deserializer {
rdr: LineColIterator::new(rdr),
ch: None,
@ -41,7 +41,7 @@ impl<Iter> Deserializer<Iter>
}
#[inline]
pub fn end(&mut self) -> Result<(), Error> {
pub fn end(&mut self) -> Result<()> {
try!(self.parse_whitespace());
if self.eof() {
Ok(())
@ -54,7 +54,7 @@ impl<Iter> Deserializer<Iter>
fn ch_or_null(&self) -> u8 { self.ch.unwrap_or(b'\x00') }
fn bump(&mut self) -> Result<(), Error> {
fn bump(&mut self) -> Result<()> {
self.ch = match self.rdr.next() {
Some(Err(err)) => { return Err(Error::IoError(err)); }
Some(Ok(ch)) => Some(ch),
@ -64,7 +64,7 @@ impl<Iter> Deserializer<Iter>
Ok(())
}
fn next_char(&mut self) -> Result<Option<u8>, Error> {
fn next_char(&mut self) -> Result<Option<u8>> {
try!(self.bump());
Ok(self.ch)
}
@ -77,7 +77,7 @@ impl<Iter> Deserializer<Iter>
Error::SyntaxError(reason, self.rdr.line(), self.rdr.col())
}
fn parse_whitespace(&mut self) -> Result<(), Error> {
fn parse_whitespace(&mut self) -> Result<()> {
while self.ch_is(b' ') ||
self.ch_is(b'\n') ||
self.ch_is(b'\t') ||
@ -86,7 +86,7 @@ impl<Iter> Deserializer<Iter>
Ok(())
}
fn parse_value<V>(&mut self, mut visitor: V) -> Result<V::Value, Error>
fn parse_value<V>(&mut self, mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
try!(self.parse_whitespace());
@ -108,7 +108,13 @@ impl<Iter> Deserializer<Iter>
try!(self.parse_ident(b"alse"));
visitor.visit_bool(false)
}
b'0' ... b'9' | b'-' => self.parse_number(visitor),
b'-' => {
try!(self.bump());
self.parse_integer(false, visitor)
}
b'0' ... b'9' => {
self.parse_integer(true, visitor)
}
b'"' => {
try!(self.parse_string());
let s = str::from_utf8(&self.str_buf).unwrap();
@ -134,7 +140,7 @@ impl<Iter> Deserializer<Iter>
}
}
fn parse_ident(&mut self, ident: &[u8]) -> Result<(), Error> {
fn parse_ident(&mut self, ident: &[u8]) -> Result<()> {
for c in ident {
if Some(*c) != try!(self.next_char()) {
return Err(self.error(ErrorCode::ExpectedSomeIdent));
@ -145,53 +151,9 @@ impl<Iter> Deserializer<Iter>
Ok(())
}
fn parse_number<V>(&mut self, mut visitor: V) -> Result<V::Value, Error>
fn parse_integer<V>(&mut self, pos: bool, visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
let mut neg = false;
if self.ch_is(b'-') {
try!(self.bump());
neg = true;
}
let res = try!(self.parse_integer());
if self.ch_is(b'.') || self.ch_is(b'e') || self.ch_is(b'E') {
let mut res = res as f64;
if self.ch_is(b'.') {
res = try!(self.parse_decimal(res));
}
if self.ch_is(b'e') || self.ch_is(b'E') {
res = try!(self.parse_exponent(res));
}
if neg {
visitor.visit_f64(-res)
} else {
visitor.visit_f64(res)
}
} else {
if neg {
let res = -(res as i64);
// Make sure we didn't underflow.
if res > 0 {
Err(self.error(ErrorCode::InvalidNumber))
} else {
visitor.visit_i64(res)
}
} else {
visitor.visit_u64(res)
}
}
}
fn parse_integer(&mut self) -> Result<u64, Error> {
let mut accum: u64 = 0;
match self.ch_or_null() {
b'0' => {
try!(self.bump());
@ -199,41 +161,138 @@ impl<Iter> Deserializer<Iter>
// There can be only one leading '0'.
match self.ch_or_null() {
b'0' ... b'9' => {
return Err(self.error(ErrorCode::InvalidNumber));
Err(self.error(ErrorCode::InvalidNumber))
}
_ => {
self.parse_number(pos, 0, visitor)
}
_ => ()
}
},
b'1' ... b'9' => {
while !self.eof() {
c @ b'1' ... b'9' => {
try!(self.bump());
let mut res: u64 = (c as u64) - ('0' as u64);
loop {
match self.ch_or_null() {
c @ b'0' ... b'9' => {
accum = try_or_invalid!(self, accum.checked_mul(10));
accum = try_or_invalid!(self, accum.checked_add((c as u64) - ('0' as u64)));
try!(self.bump());
let digit = (c as u64) - ('0' as u64);
// We need to be careful with overflow. If we can, try to keep the
// number as a `u64` until we grow too large. At that point, switch to
// parsing the value as a `f64`.
match res.checked_mul(10).and_then(|val| val.checked_add(digit)) {
Some(res_) => { res = res_; }
None => {
return self.parse_float(
pos,
(res as f64) * 10.0 + (digit as f64),
visitor);
}
}
}
_ => {
return self.parse_number(pos, res, visitor);
}
_ => break,
}
}
}
_ => { return Err(self.error(ErrorCode::InvalidNumber)); }
_ => {
Err(self.error(ErrorCode::InvalidNumber))
}
}
Ok(accum)
}
fn parse_decimal(&mut self, res: f64) -> Result<f64, Error> {
fn parse_float<V>(&mut self,
pos: bool,
mut res: f64,
mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
loop {
match self.ch_or_null() {
c @ b'0' ... b'9' => {
try!(self.bump());
let digit = (c as u64) - ('0' as u64);
res *= 10.0;
res += digit as f64;
}
_ => {
match self.ch_or_null() {
b'.' => {
return self.parse_decimal(pos, res, visitor);
}
b'e' | b'E' => {
return self.parse_exponent(pos, res, visitor);
}
_ => {
if !pos {
res = -res;
}
return visitor.visit_f64(res);
}
}
}
}
}
}
fn parse_number<V>(&mut self,
pos: bool,
res: u64,
mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
match self.ch_or_null() {
b'.' => {
self.parse_decimal(pos, res as f64, visitor)
}
b'e' | b'E' => {
self.parse_exponent(pos, res as f64, visitor)
}
_ => {
if pos {
visitor.visit_u64(res)
} else {
// FIXME: `wrapping_neg` will be stable in Rust 1.2
//let res_i64 = (res as i64).wrapping_neg();
let res_i64 = (!res + 1) as i64;
// Convert into a float if we underflow.
if res_i64 > 0 {
visitor.visit_f64(-(res as f64))
} else {
visitor.visit_i64(res_i64)
}
}
}
}
}
fn parse_decimal<V>(&mut self,
pos: bool,
mut res: f64,
mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
try!(self.bump());
let mut dec = 0.1;
// Make sure a digit follows the decimal place.
match self.ch_or_null() {
b'0' ... b'9' => (),
c @ b'0' ... b'9' => {
try!(self.bump());
res += (((c as u64) - (b'0' as u64)) as f64) * dec;
}
_ => { return Err(self.error(ErrorCode::InvalidNumber)); }
}
let mut res = res;
let mut dec = 1.0;
while !self.eof() {
match self.ch_or_null() {
c @ b'0' ... b'9' => {
@ -245,36 +304,53 @@ impl<Iter> Deserializer<Iter>
}
}
Ok(res)
match self.ch_or_null() {
b'e' | b'E' => {
self.parse_exponent(pos, res, visitor)
}
_ => {
if pos {
visitor.visit_f64(res)
} else {
visitor.visit_f64(-res)
}
}
}
}
fn parse_exponent(&mut self, mut res: f64) -> Result<f64, Error> {
fn parse_exponent<V>(&mut self,
pos: bool,
mut res: f64,
mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
try!(self.bump());
let mut exp: u64 = 0;
let mut neg_exp = false;
if self.ch_is(b'+') {
try!(self.bump());
} else if self.ch_is(b'-') {
try!(self.bump());
neg_exp = true;
}
let pos_exp = match self.ch_or_null() {
b'+' => { try!(self.bump()); true }
b'-' => { try!(self.bump()); false }
_ => { true }
};
// Make sure a digit follows the exponent place.
match self.ch_or_null() {
b'0' ... b'9' => (),
let mut exp = match self.ch_or_null() {
c @ b'0' ... b'9' => {
try!(self.bump());
(c as u64) - (b'0' as u64)
}
_ => { return Err(self.error(ErrorCode::InvalidNumber)); }
}
while !self.eof() {
};
loop {
match self.ch_or_null() {
c @ b'0' ... b'9' => {
try!(self.bump());
exp = try_or_invalid!(self, exp.checked_mul(10));
exp = try_or_invalid!(self, exp.checked_add((c as u64) - (b'0' as u64)));
try!(self.bump());
}
_ => break
_ => { break; }
}
}
@ -284,16 +360,20 @@ impl<Iter> Deserializer<Iter>
return Err(self.error(ErrorCode::InvalidNumber));
};
if neg_exp {
res /= exp;
} else {
if pos_exp {
res *= exp;
} else {
res /= exp;
}
Ok(res)
if pos {
visitor.visit_f64(res)
} else {
visitor.visit_f64(-res)
}
}
fn decode_hex_escape(&mut self) -> Result<u16, Error> {
fn decode_hex_escape(&mut self) -> Result<u16> {
let mut i = 0;
let mut n = 0u16;
while i < 4 && !self.eof() {
@ -320,7 +400,7 @@ impl<Iter> Deserializer<Iter>
Ok(n)
}
fn parse_string(&mut self) -> Result<(), Error> {
fn parse_string(&mut self) -> Result<()> {
self.str_buf.clear();
loop {
@ -409,7 +489,7 @@ impl<Iter> Deserializer<Iter>
}
}
fn parse_object_colon(&mut self) -> Result<(), Error> {
fn parse_object_colon(&mut self) -> Result<()> {
try!(self.parse_whitespace());
if self.ch_is(b':') {
@ -429,14 +509,14 @@ impl<Iter> de::Deserializer for Deserializer<Iter>
type Error = Error;
#[inline]
fn visit<V>(&mut self, visitor: V) -> Result<V::Value, Error>
fn visit<V>(&mut self, visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
self.parse_value(visitor)
}
#[inline]
fn visit_option<V>(&mut self, mut visitor: V) -> Result<V::Value, Error>
fn visit_option<V>(&mut self, mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
try!(self.parse_whitespace());
@ -456,7 +536,7 @@ impl<Iter> de::Deserializer for Deserializer<Iter>
#[inline]
fn visit_newtype_struct<V>(&mut self,
_name: &str,
mut visitor: V) -> Result<V::Value, Error>
mut visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
visitor.visit_newtype_struct(self)
@ -466,7 +546,7 @@ impl<Iter> de::Deserializer for Deserializer<Iter>
fn visit_enum<V>(&mut self,
_name: &str,
_variants: &'static [&'static str],
mut visitor: V) -> Result<V::Value, Error>
mut visitor: V) -> Result<V::Value>
where V: de::EnumVisitor,
{
try!(self.parse_whitespace());
@ -517,7 +597,7 @@ impl<'a, Iter> de::SeqVisitor for SeqVisitor<'a, Iter>
{
type Error = Error;
fn visit<T>(&mut self) -> Result<Option<T>, Error>
fn visit<T>(&mut self) -> Result<Option<T>>
where T: de::Deserialize,
{
try!(self.de.parse_whitespace());
@ -542,7 +622,7 @@ impl<'a, Iter> de::SeqVisitor for SeqVisitor<'a, Iter>
Ok(Some(value))
}
fn end(&mut self) -> Result<(), Error> {
fn end(&mut self) -> Result<()> {
try!(self.de.parse_whitespace());
if self.de.ch_is(b']') {
@ -574,7 +654,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter>
{
type Error = Error;
fn visit_key<K>(&mut self) -> Result<Option<K>, Error>
fn visit_key<K>(&mut self) -> Result<Option<K>>
where K: de::Deserialize,
{
try!(self.de.parse_whitespace());
@ -607,7 +687,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter>
Ok(Some(try!(de::Deserialize::deserialize(self.de))))
}
fn visit_value<V>(&mut self) -> Result<V, Error>
fn visit_value<V>(&mut self) -> Result<V>
where V: de::Deserialize,
{
try!(self.de.parse_object_colon());
@ -615,7 +695,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter>
Ok(try!(de::Deserialize::deserialize(self.de)))
}
fn end(&mut self) -> Result<(), Error> {
fn end(&mut self) -> Result<()> {
try!(self.de.parse_whitespace());
if self.de.ch_is(b'}') {
@ -628,7 +708,7 @@ impl<'a, Iter> de::MapVisitor for MapVisitor<'a, Iter>
}
}
fn missing_field<V>(&mut self, _field: &'static str) -> Result<V, Error>
fn missing_field<V>(&mut self, _field: &'static str) -> Result<V>
where V: de::Deserialize,
{
let mut de = de::value::ValueDeserializer::into_deserializer(());
@ -641,7 +721,7 @@ impl<Iter> de::VariantVisitor for Deserializer<Iter>
{
type Error = Error;
fn visit_variant<V>(&mut self) -> Result<V, Error>
fn visit_variant<V>(&mut self) -> Result<V>
where V: de::Deserialize
{
let val = try!(de::Deserialize::deserialize(self));
@ -649,11 +729,11 @@ impl<Iter> de::VariantVisitor for Deserializer<Iter>
Ok(val)
}
fn visit_unit(&mut self) -> Result<(), Error> {
fn visit_unit(&mut self) -> Result<()> {
de::Deserialize::deserialize(self)
}
fn visit_newtype<T>(&mut self) -> Result<T, Error>
fn visit_newtype<T>(&mut self) -> Result<T>
where T: de::Deserialize,
{
de::Deserialize::deserialize(self)
@ -661,7 +741,7 @@ impl<Iter> de::VariantVisitor for Deserializer<Iter>
fn visit_tuple<V>(&mut self,
_len: usize,
visitor: V) -> Result<V::Value, Error>
visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
de::Deserializer::visit(self, visitor)
@ -669,7 +749,7 @@ impl<Iter> de::VariantVisitor for Deserializer<Iter>
fn visit_struct<V>(&mut self,
_fields: &'static [&'static str],
visitor: V) -> Result<V::Value, Error>
visitor: V) -> Result<V::Value>
where V: de::Visitor,
{
de::Deserializer::visit(self, visitor)
@ -677,7 +757,7 @@ impl<Iter> de::VariantVisitor for Deserializer<Iter>
}
/// Decodes a json value from a `std::io::Read`.
pub fn from_iter<I, T>(iter: I) -> Result<T, Error>
pub fn from_iter<I, T>(iter: I) -> Result<T>
where I: Iterator<Item=io::Result<u8>>,
T: de::Deserialize,
{
@ -690,7 +770,7 @@ pub fn from_iter<I, T>(iter: I) -> Result<T, Error>
}
/// Decodes a json value from a `std::io::Read`.
pub fn from_reader<R, T>(rdr: R) -> Result<T, Error>
pub fn from_reader<R, T>(rdr: R) -> Result<T>
where R: io::Read,
T: de::Deserialize,
{
@ -698,14 +778,14 @@ pub fn from_reader<R, T>(rdr: R) -> Result<T, Error>
}
/// Decodes a json value from a `&str`.
pub fn from_slice<T>(v: &[u8]) -> Result<T, Error>
pub fn from_slice<T>(v: &[u8]) -> Result<T>
where T: de::Deserialize
{
from_iter(v.iter().map(|byte| Ok(*byte)))
}
/// Decodes a json value from a `&str`.
pub fn from_str<T>(s: &str) -> Result<T, Error>
pub fn from_str<T>(s: &str) -> Result<T>
where T: de::Deserialize
{
from_slice(s.as_bytes())

View File

@ -1,6 +1,7 @@
use std::error;
use std::fmt;
use std::io;
use std::result;
use serde::de;
@ -183,3 +184,6 @@ impl de::Error for Error {
Error::MissingFieldError(field)
}
}
/// Helper alias for `Result` objects that return a JSON `Error`.
pub type Result<T> = result::Result<T, Error>;

View File

@ -96,7 +96,7 @@ extern crate num;
extern crate serde;
pub use self::de::{Deserializer, from_str};
pub use self::error::{Error, ErrorCode};
pub use self::error::{Error, ErrorCode, Result};
pub use self::ser::{
Serializer,
to_writer,

View File

@ -465,12 +465,7 @@ fn fmt_f32_or_null<W>(wr: &mut W, value: f32) -> io::Result<()>
match value.classify() {
FpCategory::Nan | FpCategory::Infinite => wr.write_all(b"null"),
_ => {
let s = format!("{:?}", value);
try!(wr.write_all(s.as_bytes()));
if !s.contains('.') {
try!(wr.write_all(b".0"))
}
Ok(())
write!(wr, "{:?}", value)
}
}
}
@ -481,12 +476,7 @@ fn fmt_f64_or_null<W>(wr: &mut W, value: f64) -> io::Result<()>
match value.classify() {
FpCategory::Nan | FpCategory::Infinite => wr.write_all(b"null"),
_ => {
let s = format!("{:?}", value);
try!(wr.write_all(s.as_bytes()));
if !s.contains('.') {
try!(wr.write_all(b".0"))
}
Ok(())
write!(wr, "{:?}", value)
}
}
}

View File

@ -1,6 +1,9 @@
use std::collections::BTreeMap;
use std::f64;
use std::fmt::Debug;
use std::i64;
use std::marker::PhantomData;
use std::u64;
use serde::de;
use serde::ser;
@ -82,12 +85,23 @@ fn test_write_null() {
test_pretty_encode_ok(tests);
}
#[test]
fn test_write_u64() {
let tests = &[
(3u64, "3"),
(u64::MAX, &u64::MAX.to_string()),
];
test_encode_ok(tests);
test_pretty_encode_ok(tests);
}
#[test]
fn test_write_i64() {
let tests = &[
(3i64, "3"),
(-2i64, "-2"),
(-1234i64, "-1234"),
(i64::MIN, &i64::MIN.to_string()),
];
test_encode_ok(tests);
test_pretty_encode_ok(tests);
@ -95,11 +109,18 @@ fn test_write_i64() {
#[test]
fn test_write_f64() {
let min_string = format!("{:?}", f64::MIN);
let max_string = format!("{:?}", f64::MAX);
let epsilon_string = format!("{:?}", f64::EPSILON);
let tests = &[
(3.0, "3.0"),
(3.0, "3"),
(3.1, "3.1"),
(-1.5, "-1.5"),
(0.5, "0.5"),
(f64::MIN, &min_string),
(f64::MAX, &max_string),
(f64::EPSILON, &epsilon_string),
];
test_encode_ok(tests);
test_pretty_encode_ok(tests);
@ -621,7 +642,7 @@ fn test_write_option() {
]);
}
fn test_parse_ok<T>(errors: Vec<(&'static str, T)>)
fn test_parse_ok<T>(errors: Vec<(&str, T)>)
where T: Clone + Debug + PartialEq + ser::Serialize + de::Deserialize,
{
for (s, value) in errors {
@ -707,8 +728,7 @@ fn test_parse_number_errors() {
("1e", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 2)),
("1e+", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 3)),
("1a", Error::SyntaxError(ErrorCode::TrailingCharacters, 1, 2)),
("777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 20)),
("1e777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 22)),
("1e777777777777777777777777777", Error::SyntaxError(ErrorCode::InvalidNumber, 1, 23)),
]);
}
@ -718,28 +738,44 @@ fn test_parse_i64() {
("-2", -2),
("-1234", -1234),
(" -1234 ", -1234),
(&i64::MIN.to_string(), i64::MIN),
(&i64::MAX.to_string(), i64::MAX),
]);
}
#[test]
fn test_parse_u64() {
test_parse_ok(vec![
("0", 0u64),
("3", 3u64),
("1234", 1234),
(&u64::MAX.to_string(), u64::MAX),
]);
}
#[test]
fn test_parse_f64() {
test_parse_ok(vec![
("0.0", 0.0f64),
("3.0", 3.0f64),
("3.00", 3.0f64),
("3.1", 3.1),
("-1.2", -1.2),
("0.4", 0.4),
("0.4e5", 0.4e5),
("0.4e+5", 0.4e5),
("0.4e15", 0.4e15),
("0.4e-01", 0.4e-01),
(" 0.4e-01 ", 0.4e-01),
("0.4e+15", 0.4e15),
("0.4e-01", 0.4e-1),
(" 0.4e-01 ", 0.4e-1),
("0.4e-001", 0.4e-1),
("0.4e-0", 0.4e0),
("0.00e00", 0.0),
("0.00e+00", 0.0),
("0.00e-00", 0.0),
(&format!("{:?}", (i64::MIN as f64) - 1.0), (i64::MIN as f64) - 1.0),
(&format!("{:?}", (u64::MAX as f64) + 1.0), (u64::MAX as f64) + 1.0),
(&format!("{:?}", f64::EPSILON), f64::EPSILON),
]);
}