// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! Really Bad Markup Language (rbml) is an internal serialization format of rustc. //! This is not intended to be used by users. //! //! Originally based on the Extensible Binary Markup Language //! (ebml; http://www.matroska.org/technical/specs/rfc/index.html), //! it is now a separate format tuned for the rust object metadata. //! //! # Encoding //! //! RBML document consists of the tag, length and data. //! The encoded data can contain multiple RBML documents concatenated. //! //! **Tags** are a hint for the following data. //! Tags are a number from 0x000 to 0xfff, where 0xf0 through 0xff is reserved. //! Tags less than 0xf0 are encoded in one literal byte. //! Tags greater than 0xff are encoded in two big-endian bytes, //! where the tag number is ORed with 0xf000. (E.g. tag 0x123 = `f1 23`) //! //! **Lengths** encode the length of the following data. //! It is a variable-length unsigned isize, and one of the following forms: //! //! - `80` through `fe` for lengths up to 0x7e; //! - `40 ff` through `7f ff` for lengths up to 0x3fff; //! - `20 40 00` through `3f ff ff` for lengths up to 0x1fffff; //! - `10 20 00 00` through `1f ff ff ff` for lengths up to 0xfffffff. //! //! The "overlong" form is allowed so that the length can be encoded //! without the prior knowledge of the encoded data. //! For example, the length 0 can be represented either by `80`, `40 00`, //! `20 00 00` or `10 00 00 00`. //! The encoder tries to minimize the length if possible. //! Also, some predefined tags listed below are so commonly used that //! their lengths are omitted ("implicit length"). //! //! **Data** can be either binary bytes or zero or more nested RBML documents. //! Nested documents cannot overflow, and should be entirely contained //! within a parent document. //! //! # Predefined Tags //! //! Most RBML tags are defined by the application. //! (For the rust object metadata, see also `rustc::metadata::common`.) //! RBML itself does define a set of predefined tags however, //! intended for the auto-serialization implementation. //! //! Predefined tags with an implicit length: //! //! - `U8` (`00`): 1-byte unsigned integer. //! - `U16` (`01`): 2-byte big endian unsigned integer. //! - `U32` (`02`): 4-byte big endian unsigned integer. //! - `U64` (`03`): 8-byte big endian unsigned integer. //! Any of `U*` tags can be used to encode primitive unsigned integer types, //! as long as it is no greater than the actual size. //! For example, `u8` can only be represented via the `U8` tag. //! //! - `I8` (`04`): 1-byte signed integer. //! - `I16` (`05`): 2-byte big endian signed integer. //! - `I32` (`06`): 4-byte big endian signed integer. //! - `I64` (`07`): 8-byte big endian signed integer. //! Similar to `U*` tags. Always uses two's complement encoding. //! //! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true. //! //! - `Char` (`09`): 4-byte big endian Unicode scalar value. //! Surrogate pairs or out-of-bound values are invalid. //! //! - `F32` (`0a`): 4-byte big endian unsigned integer representing //! IEEE 754 binary32 floating-point format. //! - `F64` (`0b`): 8-byte big endian unsigned integer representing //! IEEE 754 binary64 floating-point format. //! //! - `Sub8` (`0c`): 1-byte unsigned integer for supplementary information. //! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information. //! Those two tags normally occur as the first subdocument of certain tags, //! namely `Enum`, `Vec` and `Map`, to provide a variant or size information. //! They can be used interchangeably. //! //! Predefined tags with an explicit length: //! //! - `Str` (`10`): A UTF-8-encoded string. //! //! - `Enum` (`11`): An enum. //! The first subdocument should be `Sub*` tags with a variant ID. //! Subsequent subdocuments, if any, encode variant arguments. //! //! - `Vec` (`12`): A vector (sequence). //! - `VecElt` (`13`): A vector element. //! The first subdocument should be `Sub*` tags with the number of elements. //! Subsequent subdocuments should be `VecElt` tag per each element. //! //! - `Map` (`14`): A map (associated array). //! - `MapKey` (`15`): A key part of the map entry. //! - `MapVal` (`16`): A value part of the map entry. //! The first subdocument should be `Sub*` tags with the number of entries. //! Subsequent subdocuments should be an alternating sequence of //! `MapKey` and `MapVal` tags per each entry. //! //! - `Opaque` (`17`): An opaque, custom-format tag. //! Used to wrap ordinary custom tags or data in the auto-serialized context. //! Rustc typically uses this to encode type informations. //! //! First 0x20 tags are reserved by RBML; custom tags start at 0x20. // Do not remove on snapshot creation. Needed for bootstrap. (Issue #22364) #![cfg_attr(stage0, feature(custom_attribute))] #![crate_name = "rbml"] #![unstable(feature = "rustc_private", issue = "27812")] #![staged_api] #![crate_type = "rlib"] #![crate_type = "dylib"] #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", html_favicon_url = "https://doc.rust-lang.org/favicon.ico", html_root_url = "https://doc.rust-lang.org/nightly/", html_playground_url = "https://play.rust-lang.org/", test(attr(deny(warnings))))] #![feature(rustc_private)] #![feature(staged_api)] #![feature(slice_bytes)] #![cfg_attr(test, feature(test))] extern crate serialize; #[macro_use] extern crate log; #[cfg(test)] extern crate test; pub use self::EbmlEncoderTag::*; pub use self::Error::*; use std::str; use std::fmt; /// Common data structures #[derive(Clone, Copy)] pub struct Doc<'a> { pub data: &'a [u8], pub start: usize, pub end: usize, } impl<'doc> Doc<'doc> { pub fn new(data: &'doc [u8]) -> Doc<'doc> { Doc { data: data, start: 0, end: data.len() } } pub fn get<'a>(&'a self, tag: usize) -> Doc<'a> { reader::get_doc(*self, tag) } pub fn is_empty(&self) -> bool { self.start == self.end } pub fn as_str_slice<'a>(&'a self) -> &'a str { str::from_utf8(&self.data[self.start..self.end]).unwrap() } pub fn as_str(&self) -> String { self.as_str_slice().to_string() } } pub struct TaggedDoc<'a> { tag: usize, pub doc: Doc<'a>, } #[derive(Copy, Clone, Debug)] pub enum EbmlEncoderTag { // tags 00..1f are reserved for auto-serialization. // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded. EsU8 = 0x00, // + 1 byte EsU16 = 0x01, // + 2 bytes EsU32 = 0x02, // + 4 bytes EsU64 = 0x03, // + 8 bytes EsI8 = 0x04, // + 1 byte EsI16 = 0x05, // + 2 bytes EsI32 = 0x06, // + 4 bytes EsI64 = 0x07, // + 8 bytes EsBool = 0x08, // + 1 byte EsChar = 0x09, // + 4 bytes EsF32 = 0x0a, // + 4 bytes EsF64 = 0x0b, // + 8 bytes EsSub8 = 0x0c, // + 1 byte EsSub32 = 0x0d, // + 4 bytes // 0x0e and 0x0f are reserved EsStr = 0x10, EsEnum = 0x11, // encodes the variant id as the first EsSub* EsVec = 0x12, // encodes the # of elements as the first EsSub* EsVecElt = 0x13, EsMap = 0x14, // encodes the # of pairs as the first EsSub* EsMapKey = 0x15, EsMapVal = 0x16, EsOpaque = 0x17, } const NUM_TAGS: usize = 0x1000; const NUM_IMPLICIT_TAGS: usize = 0x0e; static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [ 1, 2, 4, 8, // EsU* 1, 2, 4, 8, // ESI* 1, // EsBool 4, // EsChar 4, 8, // EsF* 1, 4, // EsSub* ]; #[derive(Debug)] pub enum Error { IntTooBig(usize), InvalidTag(usize), Expected(String), IoError(std::io::Error), ApplicationError(String) } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // FIXME: this should be a more useful display form fmt::Debug::fmt(self, f) } } // -------------------------------------- pub mod reader { use std::char; use std::isize; use std::mem::transmute; use std::slice::bytes; use serialize; use super::{ ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN }; pub type DecodeResult = Result; // rbml reading macro_rules! try_or { ($e:expr, $r:expr) => ( match $e { Ok(e) => e, Err(e) => { debug!("ignored error: {:?}", e); return $r } } ) } #[derive(Copy, Clone)] pub struct Res { pub val: usize, pub next: usize } pub fn tag_at(data: &[u8], start: usize) -> DecodeResult { let v = data[start] as usize; if v < 0xf0 { Ok(Res { val: v, next: start + 1 }) } else if v > 0xf0 { Ok(Res { val: ((v & 0xf) << 8) | data[start + 1] as usize, next: start + 2 }) } else { // every tag starting with byte 0xf0 is an overlong form, which is prohibited. Err(InvalidTag(v)) } } #[inline(never)] fn vuint_at_slow(data: &[u8], start: usize) -> DecodeResult { let a = data[start]; if a & 0x80 != 0 { return Ok(Res {val: (a & 0x7f) as usize, next: start + 1}); } if a & 0x40 != 0 { return Ok(Res {val: ((a & 0x3f) as usize) << 8 | (data[start + 1] as usize), next: start + 2}); } if a & 0x20 != 0 { return Ok(Res {val: ((a & 0x1f) as usize) << 16 | (data[start + 1] as usize) << 8 | (data[start + 2] as usize), next: start + 3}); } if a & 0x10 != 0 { return Ok(Res {val: ((a & 0x0f) as usize) << 24 | (data[start + 1] as usize) << 16 | (data[start + 2] as usize) << 8 | (data[start + 3] as usize), next: start + 4}); } Err(IntTooBig(a as usize)) } pub fn vuint_at(data: &[u8], start: usize) -> DecodeResult { if data.len() - start < 4 { return vuint_at_slow(data, start); } // Lookup table for parsing EBML Element IDs as per // http://ebml.sourceforge.net/specs/ The Element IDs are parsed by // reading a big endian u32 positioned at data[start]. Using the four // most significant bits of the u32 we lookup in the table below how // the element ID should be derived from it. // // The table stores tuples (shift, mask) where shift is the number the // u32 should be right shifted with and mask is the value the right // shifted value should be masked with. If for example the most // significant bit is set this means it's a class A ID and the u32 // should be right shifted with 24 and masked with 0x7f. Therefore we // store (24, 0x7f) at index 0x8 - 0xF (four bit numbers where the most // significant bit is set). // // By storing the number of shifts and masks in a table instead of // checking in order if the most significant bit is set, the second // most significant bit is set etc. we can replace up to three // "and+branch" with a single table lookup which gives us a measured // speedup of around 2x on x86_64. static SHIFT_MASK_TABLE: [(usize, u32); 16] = [ (0, 0x0), (0, 0x0fffffff), (8, 0x1fffff), (8, 0x1fffff), (16, 0x3fff), (16, 0x3fff), (16, 0x3fff), (16, 0x3fff), (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f) ]; unsafe { let ptr = data.as_ptr().offset(start as isize) as *const u32; let val = u32::from_be(*ptr); let i = (val >> 28) as usize; let (shift, mask) = SHIFT_MASK_TABLE[i]; Ok(Res { val: ((val >> shift) & mask) as usize, next: start + ((32 - shift) >> 3), }) } } pub fn tag_len_at(data: &[u8], tag: Res) -> DecodeResult { if tag.val < NUM_IMPLICIT_TAGS && TAG_IMPLICIT_LEN[tag.val] >= 0 { Ok(Res { val: TAG_IMPLICIT_LEN[tag.val] as usize, next: tag.next }) } else { vuint_at(data, tag.next) } } pub fn doc_at<'a>(data: &'a [u8], start: usize) -> DecodeResult> { let elt_tag = try!(tag_at(data, start)); let elt_size = try!(tag_len_at(data, elt_tag)); let end = elt_size.next + elt_size.val; Ok(TaggedDoc { tag: elt_tag.val, doc: Doc { data: data, start: elt_size.next, end: end } }) } pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: usize) -> Option> { let mut pos = d.start; while pos < d.end { let elt_tag = try_or!(tag_at(d.data, pos), None); let elt_size = try_or!(tag_len_at(d.data, elt_tag), None); pos = elt_size.next + elt_size.val; if elt_tag.val == tg { return Some(Doc { data: d.data, start: elt_size.next, end: pos }); } } None } pub fn get_doc<'a>(d: Doc<'a>, tg: usize) -> Doc<'a> { match maybe_get_doc(d, tg) { Some(d) => d, None => { error!("failed to find block with tag {:?}", tg); panic!(); } } } pub fn docs<'a>(d: Doc<'a>) -> DocsIterator<'a> { DocsIterator { d: d } } pub struct DocsIterator<'a> { d: Doc<'a>, } impl<'a> Iterator for DocsIterator<'a> { type Item = (usize, Doc<'a>); fn next(&mut self) -> Option<(usize, Doc<'a>)> { if self.d.start >= self.d.end { return None; } let elt_tag = try_or!(tag_at(self.d.data, self.d.start), { self.d.start = self.d.end; None }); let elt_size = try_or!(tag_len_at(self.d.data, elt_tag), { self.d.start = self.d.end; None }); let end = elt_size.next + elt_size.val; let doc = Doc { data: self.d.data, start: elt_size.next, end: end, }; self.d.start = end; return Some((elt_tag.val, doc)); } } pub fn tagged_docs<'a>(d: Doc<'a>, tag: usize) -> TaggedDocsIterator<'a> { TaggedDocsIterator { iter: docs(d), tag: tag, } } pub struct TaggedDocsIterator<'a> { iter: DocsIterator<'a>, tag: usize, } impl<'a> Iterator for TaggedDocsIterator<'a> { type Item = Doc<'a>; fn next(&mut self) -> Option> { while let Some((tag, doc)) = self.iter.next() { if tag == self.tag { return Some(doc); } } None } } pub fn with_doc_data(d: Doc, f: F) -> T where F: FnOnce(&[u8]) -> T, { f(&d.data[d.start..d.end]) } pub fn doc_as_u8(d: Doc) -> u8 { assert_eq!(d.end, d.start + 1); d.data[d.start] } pub fn doc_as_u64(d: Doc) -> u64 { if d.end >= 8 { // For performance, we read 8 big-endian bytes, // and mask off the junk if there is any. This // obviously won't work on the first 8 bytes // of a file - we will fall of the start // of the page and segfault. let mut b = [0; 8]; bytes::copy_memory(&d.data[d.end-8..d.end], &mut b); let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() }; let len = d.end - d.start; if len < 8 { data & ((1<<(len*8))-1) } else { data } } else { let mut result = 0; for b in &d.data[d.start..d.end] { result = (result<<8) + (*b as u64); } result } } #[inline] pub fn doc_as_u16(d: Doc) -> u16 { doc_as_u64(d) as u16 } #[inline] pub fn doc_as_u32(d: Doc) -> u32 { doc_as_u64(d) as u32 } #[inline] pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 } #[inline] pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 } #[inline] pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 } #[inline] pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 } pub struct Decoder<'a> { parent: Doc<'a>, pos: usize, } impl<'doc> Decoder<'doc> { pub fn new(d: Doc<'doc>) -> Decoder<'doc> { Decoder { parent: d, pos: d.start } } fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult> { debug!(". next_doc(exp_tag={:?})", exp_tag); if self.pos >= self.parent.end { return Err(Expected(format!("no more documents in \ current node!"))); } let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos)); debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}", self.parent.start, self.parent.end, self.pos, r_tag, r_doc.start, r_doc.end); if r_tag != (exp_tag as usize) { return Err(Expected(format!("expected EBML doc with tag {:?} but \ found tag {:?}", exp_tag, r_tag))); } if r_doc.end > self.parent.end { return Err(Expected(format!("invalid EBML, child extends to \ {:#x}, parent to {:#x}", r_doc.end, self.parent.end))); } self.pos = r_doc.end; Ok(r_doc) } fn push_doc(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { let d = try!(self.next_doc(exp_tag)); let old_parent = self.parent; let old_pos = self.pos; self.parent = d; self.pos = d.start; let r = try!(f(self)); self.parent = old_parent; self.pos = old_pos; Ok(r) } fn _next_sub(&mut self) -> DecodeResult { // empty vector/map optimization if self.parent.is_empty() { return Ok(0); } let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos)); let r = if r_tag == (EsSub8 as usize) { doc_as_u8(r_doc) as usize } else if r_tag == (EsSub32 as usize) { doc_as_u32(r_doc) as usize } else { return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but \ found tag {:?}", EsSub8, EsSub32, r_tag))); }; if r_doc.end > self.parent.end { return Err(Expected(format!("invalid EBML, child extends to \ {:#x}, parent to {:#x}", r_doc.end, self.parent.end))); } self.pos = r_doc.end; debug!("_next_sub result={:?}", r); Ok(r) } // variable-length unsigned integer with different tags. // `first_tag` should be a tag for u8 or i8. // `last_tag` should be the largest allowed integer tag with the matching signedness. // all tags between them should be valid, in the order of u8, u16, u32 and u64. fn _next_int(&mut self, first_tag: EbmlEncoderTag, last_tag: EbmlEncoderTag) -> DecodeResult { if self.pos >= self.parent.end { return Err(Expected(format!("no more documents in \ current node!"))); } let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos)); let r = if first_tag as usize <= r_tag && r_tag <= last_tag as usize { match r_tag - first_tag as usize { 0 => doc_as_u8(r_doc) as u64, 1 => doc_as_u16(r_doc) as u64, 2 => doc_as_u32(r_doc) as u64, 3 => doc_as_u64(r_doc), _ => unreachable!(), } } else { return Err(Expected(format!("expected EBML doc with tag {:?} through {:?} but \ found tag {:?}", first_tag, last_tag, r_tag))); }; if r_doc.end > self.parent.end { return Err(Expected(format!("invalid EBML, child extends to \ {:#x}, parent to {:#x}", r_doc.end, self.parent.end))); } self.pos = r_doc.end; debug!("_next_int({:?}, {:?}) result={:?}", first_tag, last_tag, r); Ok(r) } pub fn read_opaque(&mut self, op: F) -> DecodeResult where F: FnOnce(&mut Decoder, Doc) -> DecodeResult, { let doc = try!(self.next_doc(EsOpaque)); let (old_parent, old_pos) = (self.parent, self.pos); self.parent = doc; self.pos = doc.start; let result = try!(op(self, doc)); self.parent = old_parent; self.pos = old_pos; Ok(result) } } impl<'doc> serialize::Decoder for Decoder<'doc> { type Error = Error; fn read_nil(&mut self) -> DecodeResult<()> { Ok(()) } fn read_u64(&mut self) -> DecodeResult { self._next_int(EsU8, EsU64) } fn read_u32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU8, EsU32)) as u32) } fn read_u16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsU8, EsU16)) as u16) } fn read_u8(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsU8)))) } fn read_uint(&mut self) -> DecodeResult { let v = try!(self._next_int(EsU8, EsU64)); if v > (::std::usize::MAX as u64) { Err(IntTooBig(v as usize)) } else { Ok(v as usize) } } fn read_i64(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI8, EsI64)) as i64) } fn read_i32(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI8, EsI32)) as i32) } fn read_i16(&mut self) -> DecodeResult { Ok(try!(self._next_int(EsI8, EsI16)) as i16) } fn read_i8(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsI8))) as i8) } fn read_int(&mut self) -> DecodeResult { let v = try!(self._next_int(EsI8, EsI64)) as i64; if v > (isize::MAX as i64) || v < (isize::MIN as i64) { debug!("FIXME \\#6122: Removing this makes this function miscompile"); Err(IntTooBig(v as usize)) } else { Ok(v as isize) } } fn read_bool(&mut self) -> DecodeResult { Ok(doc_as_u8(try!(self.next_doc(EsBool))) != 0) } fn read_f64(&mut self) -> DecodeResult { let bits = doc_as_u64(try!(self.next_doc(EsF64))); Ok(unsafe { transmute(bits) }) } fn read_f32(&mut self) -> DecodeResult { let bits = doc_as_u32(try!(self.next_doc(EsF32))); Ok(unsafe { transmute(bits) }) } fn read_char(&mut self) -> DecodeResult { Ok(char::from_u32(doc_as_u32(try!(self.next_doc(EsChar)))).unwrap()) } fn read_str(&mut self) -> DecodeResult { Ok(try!(self.next_doc(EsStr)).as_str()) } // Compound types: fn read_enum(&mut self, name: &str, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_enum({})", name); let doc = try!(self.next_doc(EsEnum)); let (old_parent, old_pos) = (self.parent, self.pos); self.parent = doc; self.pos = self.parent.start; let result = try!(f(self)); self.parent = old_parent; self.pos = old_pos; Ok(result) } fn read_enum_variant(&mut self, _: &[&str], mut f: F) -> DecodeResult where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult, { debug!("read_enum_variant()"); let idx = try!(self._next_sub()); debug!(" idx={}", idx); f(self, idx) } fn read_enum_variant_arg(&mut self, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_enum_variant_arg(idx={})", idx); f(self) } fn read_enum_struct_variant(&mut self, _: &[&str], mut f: F) -> DecodeResult where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult, { debug!("read_enum_struct_variant()"); let idx = try!(self._next_sub()); debug!(" idx={}", idx); f(self, idx) } fn read_enum_struct_variant_field(&mut self, name: &str, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx); f(self) } fn read_struct(&mut self, name: &str, _: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_struct(name={})", name); f(self) } fn read_struct_field(&mut self, name: &str, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_struct_field(name={}, idx={})", name, idx); f(self) } fn read_tuple(&mut self, tuple_len: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_tuple()"); self.read_seq(move |d, len| { if len == tuple_len { f(d) } else { Err(Expected(format!("Expected tuple of length `{}`, \ found tuple of length `{}`", tuple_len, len))) } }) } fn read_tuple_arg(&mut self, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_tuple_arg(idx={})", idx); self.read_seq_elt(idx, f) } fn read_tuple_struct(&mut self, name: &str, len: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_tuple_struct(name={})", name); self.read_tuple(len, f) } fn read_tuple_struct_arg(&mut self, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_tuple_struct_arg(idx={})", idx); self.read_tuple_arg(idx, f) } fn read_option(&mut self, mut f: F) -> DecodeResult where F: FnMut(&mut Decoder<'doc>, bool) -> DecodeResult, { debug!("read_option()"); self.read_enum("Option", move |this| { this.read_enum_variant(&["None", "Some"], move |this, idx| { match idx { 0 => f(this, false), 1 => f(this, true), _ => { Err(Expected(format!("Expected None or Some"))) } } }) }) } fn read_seq(&mut self, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult, { debug!("read_seq()"); self.push_doc(EsVec, move |d| { let len = try!(d._next_sub()); debug!(" len={}", len); f(d, len) }) } fn read_seq_elt(&mut self, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_seq_elt(idx={})", idx); self.push_doc(EsVecElt, f) } fn read_map(&mut self, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult, { debug!("read_map()"); self.push_doc(EsMap, move |d| { let len = try!(d._next_sub()); debug!(" len={}", len); f(d, len) }) } fn read_map_elt_key(&mut self, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_map_elt_key(idx={})", idx); self.push_doc(EsMapKey, f) } fn read_map_elt_val(&mut self, idx: usize, f: F) -> DecodeResult where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult, { debug!("read_map_elt_val(idx={})", idx); self.push_doc(EsMapVal, f) } fn error(&mut self, err: &str) -> Error { ApplicationError(err.to_string()) } } } pub mod writer { use std::mem; use std::io::prelude::*; use std::io::{self, SeekFrom, Cursor}; use std::slice::bytes; use super::{ EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS }; use serialize; pub type EncodeResult = io::Result<()>; // rbml writing pub struct Encoder<'a> { pub writer: &'a mut Cursor>, size_positions: Vec, relax_limit: u64, // do not move encoded bytes before this position } fn write_tag(w: &mut W, n: usize) -> EncodeResult { if n < 0xf0 { w.write_all(&[n as u8]) } else if 0x100 <= n && n < NUM_TAGS { w.write_all(&[0xf0 | (n >> 8) as u8, n as u8]) } else { Err(io::Error::new(io::ErrorKind::Other, &format!("invalid tag: {}", n)[..])) } } fn write_sized_vuint(w: &mut W, n: usize, size: usize) -> EncodeResult { match size { 1 => w.write_all(&[0x80 | (n as u8)]), 2 => w.write_all(&[0x40 | ((n >> 8) as u8), n as u8]), 3 => w.write_all(&[0x20 | ((n >> 16) as u8), (n >> 8) as u8, n as u8]), 4 => w.write_all(&[0x10 | ((n >> 24) as u8), (n >> 16) as u8, (n >> 8) as u8, n as u8]), _ => Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..])) } } pub fn write_vuint(w: &mut W, n: usize) -> EncodeResult { if n < 0x7f { return write_sized_vuint(w, n, 1); } if n < 0x4000 { return write_sized_vuint(w, n, 2); } if n < 0x200000 { return write_sized_vuint(w, n, 3); } if n < 0x10000000 { return write_sized_vuint(w, n, 4); } Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..])) } impl<'a> Encoder<'a> { pub fn new(w: &'a mut Cursor>) -> Encoder<'a> { Encoder { writer: w, size_positions: vec!(), relax_limit: 0, } } pub fn start_tag(&mut self, tag_id: usize) -> EncodeResult { debug!("Start tag {:?}", tag_id); assert!(tag_id >= NUM_IMPLICIT_TAGS); // Write the enum ID: try!(write_tag(self.writer, tag_id)); // Write a placeholder four-byte size. let cur_pos = try!(self.writer.seek(SeekFrom::Current(0))); self.size_positions.push(cur_pos); let zeroes: &[u8] = &[0, 0, 0, 0]; self.writer.write_all(zeroes) } pub fn end_tag(&mut self) -> EncodeResult { let last_size_pos = self.size_positions.pop().unwrap(); let cur_pos = try!(self.writer.seek(SeekFrom::Current(0))); try!(self.writer.seek(SeekFrom::Start(last_size_pos))); let size = (cur_pos - last_size_pos - 4) as usize; // relax the size encoding for small tags (bigger tags are costly to move). // we should never try to move the stable positions, however. const RELAX_MAX_SIZE: usize = 0x100; if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit { // we can't alter the buffer in place, so have a temporary buffer let mut buf = [0u8; RELAX_MAX_SIZE]; { let last_size_pos = last_size_pos as usize; let data = &self.writer.get_ref()[last_size_pos+4..cur_pos as usize]; bytes::copy_memory(data, &mut buf); } // overwrite the size and data and continue try!(write_vuint(self.writer, size)); try!(self.writer.write_all(&buf[..size])); } else { // overwrite the size with an overlong encoding and skip past the data try!(write_sized_vuint(self.writer, size, 4)); try!(self.writer.seek(SeekFrom::Start(cur_pos))); } debug!("End tag (size = {:?})", size); Ok(()) } pub fn wr_tag(&mut self, tag_id: usize, blk: F) -> EncodeResult where F: FnOnce() -> EncodeResult, { try!(self.start_tag(tag_id)); try!(blk()); self.end_tag() } pub fn wr_tagged_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult { assert!(tag_id >= NUM_IMPLICIT_TAGS); try!(write_tag(self.writer, tag_id)); try!(write_vuint(self.writer, b.len())); self.writer.write_all(b) } pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult { let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) }; // tagged integers are emitted in big-endian, with no // leading zeros. let leading_zero_bytes = v.leading_zeros()/8; self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..]) } #[inline] pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult { self.wr_tagged_u64(tag_id, v as u64) } #[inline] pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult { self.wr_tagged_u64(tag_id, v as u64) } #[inline] pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult { self.wr_tagged_bytes(tag_id, &[v]) } #[inline] pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult { self.wr_tagged_u64(tag_id, v as u64) } #[inline] pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult { self.wr_tagged_u32(tag_id, v as u32) } #[inline] pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult { self.wr_tagged_u16(tag_id, v as u16) } #[inline] pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult { self.wr_tagged_bytes(tag_id, &[v as u8]) } pub fn wr_tagged_str(&mut self, tag_id: usize, v: &str) -> EncodeResult { self.wr_tagged_bytes(tag_id, v.as_bytes()) } // for auto-serialization fn wr_tagged_raw_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult { try!(write_tag(self.writer, tag_id)); self.writer.write_all(b) } fn wr_tagged_raw_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult { let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) }; self.wr_tagged_raw_bytes(tag_id, &bytes) } fn wr_tagged_raw_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult{ let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) }; self.wr_tagged_raw_bytes(tag_id, &bytes) } fn wr_tagged_raw_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult { let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) }; self.wr_tagged_raw_bytes(tag_id, &bytes) } fn wr_tagged_raw_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult { self.wr_tagged_raw_bytes(tag_id, &[v]) } fn wr_tagged_raw_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult { self.wr_tagged_raw_u64(tag_id, v as u64) } fn wr_tagged_raw_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult { self.wr_tagged_raw_u32(tag_id, v as u32) } fn wr_tagged_raw_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult { self.wr_tagged_raw_u16(tag_id, v as u16) } fn wr_tagged_raw_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult { self.wr_tagged_raw_bytes(tag_id, &[v as u8]) } pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult { debug!("Write {:?} bytes", b.len()); self.writer.write_all(b) } pub fn wr_str(&mut self, s: &str) -> EncodeResult { debug!("Write str: {:?}", s); self.writer.write_all(s.as_bytes()) } /// Returns the current position while marking it stable, i.e. /// generated bytes so far wouldn't be affected by relaxation. pub fn mark_stable_position(&mut self) -> u64 { let pos = self.writer.seek(SeekFrom::Current(0)).unwrap(); if self.relax_limit < pos { self.relax_limit = pos; } pos } } impl<'a> Encoder<'a> { // used internally to emit things like the vector length and so on fn _emit_tagged_sub(&mut self, v: usize) -> EncodeResult { if v as u8 as usize == v { self.wr_tagged_raw_u8(EsSub8 as usize, v as u8) } else if v as u32 as usize == v { self.wr_tagged_raw_u32(EsSub32 as usize, v as u32) } else { Err(io::Error::new(io::ErrorKind::Other, &format!("length or variant id too big: {}", v)[..])) } } pub fn emit_opaque(&mut self, f: F) -> EncodeResult where F: FnOnce(&mut Encoder) -> EncodeResult, { try!(self.start_tag(EsOpaque as usize)); try!(f(self)); self.end_tag() } } impl<'a> serialize::Encoder for Encoder<'a> { type Error = io::Error; fn emit_nil(&mut self) -> EncodeResult { Ok(()) } fn emit_uint(&mut self, v: usize) -> EncodeResult { self.emit_u64(v as u64) } fn emit_u64(&mut self, v: u64) -> EncodeResult { if v as u32 as u64 == v { self.emit_u32(v as u32) } else { self.wr_tagged_raw_u64(EsU64 as usize, v) } } fn emit_u32(&mut self, v: u32) -> EncodeResult { if v as u16 as u32 == v { self.emit_u16(v as u16) } else { self.wr_tagged_raw_u32(EsU32 as usize, v) } } fn emit_u16(&mut self, v: u16) -> EncodeResult { if v as u8 as u16 == v { self.emit_u8(v as u8) } else { self.wr_tagged_raw_u16(EsU16 as usize, v) } } fn emit_u8(&mut self, v: u8) -> EncodeResult { self.wr_tagged_raw_u8(EsU8 as usize, v) } fn emit_int(&mut self, v: isize) -> EncodeResult { self.emit_i64(v as i64) } fn emit_i64(&mut self, v: i64) -> EncodeResult { if v as i32 as i64 == v { self.emit_i32(v as i32) } else { self.wr_tagged_raw_i64(EsI64 as usize, v) } } fn emit_i32(&mut self, v: i32) -> EncodeResult { if v as i16 as i32 == v { self.emit_i16(v as i16) } else { self.wr_tagged_raw_i32(EsI32 as usize, v) } } fn emit_i16(&mut self, v: i16) -> EncodeResult { if v as i8 as i16 == v { self.emit_i8(v as i8) } else { self.wr_tagged_raw_i16(EsI16 as usize, v) } } fn emit_i8(&mut self, v: i8) -> EncodeResult { self.wr_tagged_raw_i8(EsI8 as usize, v) } fn emit_bool(&mut self, v: bool) -> EncodeResult { self.wr_tagged_raw_u8(EsBool as usize, v as u8) } fn emit_f64(&mut self, v: f64) -> EncodeResult { let bits = unsafe { mem::transmute(v) }; self.wr_tagged_raw_u64(EsF64 as usize, bits) } fn emit_f32(&mut self, v: f32) -> EncodeResult { let bits = unsafe { mem::transmute(v) }; self.wr_tagged_raw_u32(EsF32 as usize, bits) } fn emit_char(&mut self, v: char) -> EncodeResult { self.wr_tagged_raw_u32(EsChar as usize, v as u32) } fn emit_str(&mut self, v: &str) -> EncodeResult { self.wr_tagged_str(EsStr as usize, v) } fn emit_enum(&mut self, _name: &str, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsEnum as usize)); try!(f(self)); self.end_tag() } fn emit_enum_variant(&mut self, _: &str, v_id: usize, _: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self._emit_tagged_sub(v_id)); f(self) } fn emit_enum_variant_arg(&mut self, _: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { f(self) } fn emit_enum_struct_variant(&mut self, v_name: &str, v_id: usize, cnt: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum_variant(v_name, v_id, cnt, f) } fn emit_enum_struct_variant_field(&mut self, _: &str, idx: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum_variant_arg(idx, f) } fn emit_struct(&mut self, _: &str, _len: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { f(self) } fn emit_struct_field(&mut self, _name: &str, _: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { f(self) } fn emit_tuple(&mut self, len: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq(len, f) } fn emit_tuple_arg(&mut self, idx: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq_elt(idx, f) } fn emit_tuple_struct(&mut self, _: &str, len: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq(len, f) } fn emit_tuple_struct_arg(&mut self, idx: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_seq_elt(idx, f) } fn emit_option(&mut self, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum("Option", f) } fn emit_option_none(&mut self) -> EncodeResult { self.emit_enum_variant("None", 0, 0, |_| Ok(())) } fn emit_option_some(&mut self, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { self.emit_enum_variant("Some", 1, 1, f) } fn emit_seq(&mut self, len: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { if len == 0 { // empty vector optimization return self.wr_tagged_bytes(EsVec as usize, &[]); } try!(self.start_tag(EsVec as usize)); try!(self._emit_tagged_sub(len)); try!(f(self)); self.end_tag() } fn emit_seq_elt(&mut self, _idx: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsVecElt as usize)); try!(f(self)); self.end_tag() } fn emit_map(&mut self, len: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { if len == 0 { // empty map optimization return self.wr_tagged_bytes(EsMap as usize, &[]); } try!(self.start_tag(EsMap as usize)); try!(self._emit_tagged_sub(len)); try!(f(self)); self.end_tag() } fn emit_map_elt_key(&mut self, _idx: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsMapKey as usize)); try!(f(self)); self.end_tag() } fn emit_map_elt_val(&mut self, _idx: usize, f: F) -> EncodeResult where F: FnOnce(&mut Encoder<'a>) -> EncodeResult, { try!(self.start_tag(EsMapVal as usize)); try!(f(self)); self.end_tag() } } } // ___________________________________________________________________________ // Testing #[cfg(test)] mod tests { use super::{Doc, reader, writer}; use serialize::{Encodable, Decodable}; use std::io::Cursor; #[test] fn test_vuint_at() { let data = &[ 0x80, 0xff, 0x40, 0x00, 0x7f, 0xff, 0x20, 0x00, 0x00, 0x3f, 0xff, 0xff, 0x10, 0x00, 0x00, 0x00, 0x1f, 0xff, 0xff, 0xff ]; let mut res: reader::Res; // Class A res = reader::vuint_at(data, 0).unwrap(); assert_eq!(res.val, 0); assert_eq!(res.next, 1); res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, (1 << 7) - 1); assert_eq!(res.next, 2); // Class B res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, 0); assert_eq!(res.next, 4); res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, (1 << 14) - 1); assert_eq!(res.next, 6); // Class C res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, 0); assert_eq!(res.next, 9); res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, (1 << 21) - 1); assert_eq!(res.next, 12); // Class D res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, 0); assert_eq!(res.next, 16); res = reader::vuint_at(data, res.next).unwrap(); assert_eq!(res.val, (1 << 28) - 1); assert_eq!(res.next, 20); } #[test] fn test_option_int() { fn test_v(v: Option) { debug!("v == {:?}", v); let mut wr = Cursor::new(Vec::new()); { let mut rbml_w = writer::Encoder::new(&mut wr); let _ = v.encode(&mut rbml_w); } let rbml_doc = Doc::new(wr.get_ref()); let mut deser = reader::Decoder::new(rbml_doc); let v1 = Decodable::decode(&mut deser).unwrap(); debug!("v1 == {:?}", v1); assert_eq!(v, v1); } test_v(Some(22)); test_v(None); test_v(Some(3)); } } #[cfg(test)] mod bench { #![allow(non_snake_case)] use test::Bencher; use super::reader; #[bench] pub fn vuint_at_A_aligned(b: &mut Bencher) { let data = (0..4*100).map(|i| { match i % 2 { 0 => 0x80, _ => i as u8, } }).collect::>(); let mut sum = 0; b.iter(|| { let mut i = 0; while i < data.len() { sum += reader::vuint_at(&data, i).unwrap().val; i += 4; } }); } #[bench] pub fn vuint_at_A_unaligned(b: &mut Bencher) { let data = (0..4*100+1).map(|i| { match i % 2 { 1 => 0x80, _ => i as u8 } }).collect::>(); let mut sum = 0; b.iter(|| { let mut i = 1; while i < data.len() { sum += reader::vuint_at(&data, i).unwrap().val; i += 4; } }); } #[bench] pub fn vuint_at_D_aligned(b: &mut Bencher) { let data = (0..4*100).map(|i| { match i % 4 { 0 => 0x10, 3 => i as u8, _ => 0 } }).collect::>(); let mut sum = 0; b.iter(|| { let mut i = 0; while i < data.len() { sum += reader::vuint_at(&data, i).unwrap().val; i += 4; } }); } #[bench] pub fn vuint_at_D_unaligned(b: &mut Bencher) { let data = (0..4*100+1).map(|i| { match i % 4 { 1 => 0x10, 0 => i as u8, _ => 0 } }).collect::>(); let mut sum = 0; b.iter(|| { let mut i = 1; while i < data.len() { sum += reader::vuint_at(&data, i).unwrap().val; i += 4; } }); } }