From 55eb390778a32c4c87d085a5d3178c741995c77e Mon Sep 17 00:00:00 2001 From: Erick Tryzelaar Date: Tue, 31 Mar 2015 22:16:15 -0700 Subject: [PATCH] Update the readme --- README.md | 170 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 150 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 8beed340..0baf0b5d 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,164 @@ -Experimental Rust Serialization Library. +Serde Rust Serialization Framework +================================== [![Build Status](https://travis-ci.org/erickt/rust-serde.png?branch=master)](https://travis-ci.org/erickt/rust-serde) -This is an experiment to modernize rust's `libserialize` library. It is designed to implement https://github.com/rust-lang/rfcs/pull/22. `rust-serde` is an attempt to address a major shortcoming in `libserialize`. For normal structures, when you say you want to deserialize into: +Serde is a powerful framework that enables serialization libraries to +generically serialize Rust data structures without the overhead of runtime type +information. In many situations, the handshake protocol between serializers and +serializees can be completely optimized away, leaving serde to perform roughly +the same speed as a hand written serializer for a specific type. + +Documentation is available at http://erickt.github.io/rust-serde/serde + +Example +======= + +Serde works by threading visitors between the serializer and the serializee. +This allows data to be generically shared between the two without needing to +wrap the values in a separate structure. Here's an example struct serializer. +It works by reinterpreting the the structure as a named map, with the keys +being the stringified field name, and a simple state machine to step +through each field: ```rust -struct Foo { - x: int, - y: int, +struct Point { + x: i32, + y: i32, +} + +impl serde::Serialize for Point { + fn serialize(&self, serializer: &mut S) -> Result<(), S::Error> + where S: serialize::Serializer + { + struct MapVisitor<'a> { + value: &'a Point, + state: u8, + } + + impl<'a> serde::ser::MapVisitor for MapVisitor { + fn visit(&mut self, serializer: &mut S) -> Result { + match self.state { + 0 => { + self.state += 1; + Ok(Some(try!(serializer.visit_map_elt("x", &self.x))) + } + 1 => { + self.state += 1; + Ok(Some(try!(serializer.visit_map_elt("y", &self.y)))) + } + _ => { + Ok(None) + } + } + } + } + + serializer.visit_named_map("Point", MapVisitor { + value: self, + state: 0, + }) + } } ``` -`libserialize`'s deserializer essentially asks for: +Deserialization is a bit more tricky. We need to deserialize a field from a string, but in order to +avoid some borrow checker issues and in desire to avoid allocations, we deserialize field names +into an enum: -* Is the next value a struct named "Foo"? If not, error. -* Is the next field named "x"? If not, error. -* Is the next value an "int"? If not, error. -* Is the next field named "y"? If not, error. -* Is the next value an "int"? If not, error. -* Is the struct finished? If not, error. +```rust +enum PointField { + X, + Y, +} -While this works for user defined structures, it cannot support deserializing into a value like `json::Json`, which is an enum that can represent every JSON value. In order to support that, it needs to be able to do some lookahead: +impl serde::Deserialize for PointField { + fn deserialize(deserializer: &mut D) -> Result + where D: serde::de::Deserializer + { + struct FieldVisitor; -* What is the next value type? - * If a struct, parse a struct. - * If an integer, parse an integer. - * ... + impl serde::de::Visitor for FieldVisitor { + type Value = Field; -More formally, `libserialize` implements a LL(0) grammar, whereas `json::Json` requires a LL(1) grammar. `rust-serde` provides this by implementing a serializer and deserializer that produces a tagged token stream of values. This enables a `Deserializable` for `json::Json` to look at the next token before deciding on how to parse the value. + fn visit_str(&mut self, value: &str) -> Result + where E: serde::de::Error + { + match value { + "x" => Ok(Field::X), + "y" => Ok(Field::Y), + _ => Err(serde::de::Error::syntax_error()), + } + } + } ---- + deserializer.visit(FieldVisitor) + } +} +``` -There is now also a new library variation called `serde2`. This removes the need for tagged values and replaces them with a `Visitor` pattern. This pattern is very similar to the `Iterator` pattern, but it threads some custom state through visiting each type. This gets many of the benefits of the `serde` library without needing to always pay for tagging the variants. +This is then used in our actual deserializer: + +```rust +impl serde::Deserialize for Point { + fn deserialize(deserializer: &mut D) -> Result + where D: serde::de::Deserializer + { + struct PointVisitor; + + impl serde::de::Visitor for PointVisitor { + type Value = Point; + + fn visit_map(&mut self, mut visitor: V) -> Result + where V: serde::de::MapVisitor + { + let mut x = None; + let mut y = None; + + loop { + match try!(visitor.visit_key()) { + Some(Field::X) => { x = Some(try!(visitor.visit_value())); } + Some(Field::Y) => { y = Some(try!(visitor.visit_value())); } + None => { break; } + } + } + + let x = match x { + Some(x) => x, + None => try!(visitor.missing_field("x")), + }; + + let y = match y { + Some(y) => y, + None => try!(visitor.missing_field("y")), + }; + + try!(visitor.end()); + + Ok(Point{ x: x, y: y }) + } + } + + deserializer.visit_named_map("Point", PointVisitor) + } +} + +``` + +There's a bit of machinery required to write implementations of `Serialize` and +`Deserialize`. Fortunately it is not necessary in most circumstances. Instead, +it's much easier to use the `serde_macros` plugin. The prior code can be +rewritten as: + +```rust +#![feature(custom_derive)] +#![plugin(serde_macros)] + +extern crate serde; + +#[derive(Serialize, Deserialize)] +struct Point { + x: i32, + y: i32, +} +```