rust/src/lib/ebml.rs

187 lines
5.4 KiB
Rust
Raw Normal View History

// Simple Extensible Binary Markup Language (ebml) reader and writer on a
2011-03-22 15:59:17 -05:00
// cursor model. See the specification here:
// http://www.matroska.org/technical/specs/rfc/index.html
import option::none;
import option::some;
2011-03-22 15:59:17 -05:00
type ebml_tag = rec(uint id, uint size);
2011-03-25 19:53:46 -05:00
type ebml_state = rec(ebml_tag ebml_tag, uint tag_pos, uint data_pos);
2011-03-22 15:59:17 -05:00
// TODO: When we have module renaming, make "reader" and "writer" separate
// modules within this file.
// ebml reading
2011-03-22 15:59:17 -05:00
type doc = rec(vec[u8] data,
uint start,
uint end);
fn vint_at(vec[u8] data, uint start) -> tup(uint, uint) {
auto a = data.(start);
if (a & 0x80u8 != 0u8) { ret tup((a & 0x7fu8) as uint, start + 1u); }
2011-03-22 15:59:17 -05:00
if (a & 0x40u8 != 0u8) {
ret tup((((a & 0x3fu8) as uint) << 8u) | (data.(start + 1u) as uint),
start + 2u);
} else if (a & 0x20u8 != 0u8) {
ret tup((((a & 0x1fu8) as uint) << 16u) |
((data.(start + 1u) as uint) << 8u) |
(data.(start + 2u) as uint), start + 3u);
} else if (a & 0x10u8 != 0u8) {
ret tup((((a & 0x0fu8) as uint) << 24u) |
((data.(start + 1u) as uint) << 16u) |
((data.(start + 2u) as uint) << 8u) |
(data.(start + 3u) as uint), start + 4u);
} else {
log_err "vint too big"; fail;
2011-03-22 15:59:17 -05:00
}
}
fn new_doc(vec[u8] data) -> doc {
ret rec(data=data, start=0u, end=_vec::len[u8](data));
}
2011-03-22 15:59:17 -05:00
fn doc_at(vec[u8] data, uint start) -> doc {
auto elt_tag = vint_at(data, start);
auto elt_size = vint_at(data, elt_tag._1);
auto end = elt_size._1 + elt_size._0;
ret rec(data=data, start=elt_size._1, end=end);
2011-03-22 15:59:17 -05:00
}
fn maybe_get_doc(doc d, uint tg) -> option::t[doc] {
auto pos = d.start;
while (pos < d.end) {
auto elt_tag = vint_at(d.data, pos);
auto elt_size = vint_at(d.data, elt_tag._1);
pos = elt_size._1 + elt_size._0;
if (elt_tag._0 == tg) {
ret some[doc](rec(data=d.data, start=elt_size._1, end=pos));
2011-03-25 19:53:46 -05:00
}
2011-03-22 15:59:17 -05:00
}
ret none[doc];
2011-03-22 15:59:17 -05:00
}
fn get_doc(doc d, uint tg) -> doc {
alt (maybe_get_doc(d, tg)) {
case (some[doc](?d)) {ret d;}
case (none[doc]) {
log_err "failed to find block with tag " + _uint::to_str(tg, 10u);
fail;
}
}
2011-03-22 15:59:17 -05:00
}
iter docs(doc d) -> tup(uint, doc) {
auto pos = d.start;
while (pos < d.end) {
auto elt_tag = vint_at(d.data, pos);
auto elt_size = vint_at(d.data, elt_tag._1);
pos = elt_size._1 + elt_size._0;
put tup(elt_tag._0, rec(data=d.data, start=elt_size._1, end=pos));
}
}
iter tagged_docs(doc d, uint tg) -> doc {
auto pos = d.start;
while (pos < d.end) {
auto elt_tag = vint_at(d.data, pos);
auto elt_size = vint_at(d.data, elt_tag._1);
pos = elt_size._1 + elt_size._0;
if (elt_tag._0 == tg) {
put rec(data=d.data, start=elt_size._1, end=pos);
}
}
}
fn doc_data(doc d) -> vec[u8] {
ret _vec::slice[u8](d.data, d.start, d.end);
2011-03-22 15:59:17 -05:00
}
fn be_uint_from_bytes(vec[u8] data, uint start, uint size) -> uint {
auto sz = size;
assert (sz <= 4u);
auto val = 0u;
auto pos = start;
while (sz > 0u) {
sz -= 1u;
val += (data.(pos) as uint) << (sz * 8u);
pos += 1u;
}
ret val;
}
fn doc_as_uint(doc d) -> uint {
ret be_uint_from_bytes(d.data, d.start, d.end - d.start);
2011-03-22 15:59:17 -05:00
}
// ebml writing
2011-03-22 15:59:17 -05:00
type writer = rec(io::buf_writer writer, mutable vec[uint] size_positions);
2011-03-22 19:28:35 -05:00
fn write_sized_vint(&io::buf_writer w, uint n, uint size) {
2011-03-22 19:28:35 -05:00
let vec[u8] buf;
alt (size) {
case (1u) {
buf = [0x80u8 | (n as u8)];
2011-03-22 19:28:35 -05:00
}
case (2u) {
buf = [0x40u8 | ((n >> 8u) as u8),
(n & 0xffu) as u8];
2011-03-22 19:28:35 -05:00
}
case (3u) {
buf = [0x20u8 | ((n >> 16u) as u8),
2011-03-22 19:28:35 -05:00
((n >> 8u) & 0xffu) as u8,
(n & 0xffu) as u8];
2011-03-22 19:28:35 -05:00
}
case (4u) {
buf = [0x10u8 | ((n >> 24u) as u8),
2011-03-22 19:28:35 -05:00
((n >> 16u) & 0xffu) as u8,
((n >> 8u) & 0xffu) as u8,
(n & 0xffu) as u8];
2011-03-22 19:28:35 -05:00
}
case (_) {
log_err "vint to write too big";
2011-03-22 19:28:35 -05:00
fail;
}
}
w.write(buf);
}
fn write_vint(&io::buf_writer w, uint n) {
2011-03-22 19:28:35 -05:00
if (n < 0x7fu) { write_sized_vint(w, n, 1u); ret; }
if (n < 0x4000u) { write_sized_vint(w, n, 2u); ret; }
if (n < 0x200000u) { write_sized_vint(w, n, 3u); ret; }
if (n < 0x10000000u) { write_sized_vint(w, n, 4u); ret; }
log_err "vint to write too big";
2011-03-22 19:28:35 -05:00
fail;
}
fn create_writer(&io::buf_writer w) -> writer {
let vec[uint] size_positions = [];
2011-03-22 19:28:35 -05:00
ret rec(writer=w, mutable size_positions=size_positions);
}
// TODO: Provide a function to write the standard ebml header.
2011-03-22 19:28:35 -05:00
fn start_tag(&writer w, uint tag_id) {
// Write the tag ID:
2011-03-22 19:28:35 -05:00
write_vint(w.writer, tag_id);
// Write a placeholder four-byte size.
w.size_positions += [w.writer.tell()];
let vec[u8] zeroes = [0u8, 0u8, 0u8, 0u8];
2011-03-22 19:28:35 -05:00
w.writer.write(zeroes);
}
fn end_tag(&writer w) {
auto last_size_pos = _vec::pop[uint](w.size_positions);
2011-03-22 19:28:35 -05:00
auto cur_pos = w.writer.tell();
w.writer.seek(last_size_pos as int, io::seek_set);
2011-03-22 19:28:35 -05:00
write_sized_vint(w.writer, cur_pos - last_size_pos - 4u, 4u);
w.writer.seek(cur_pos as int, io::seek_set);
2011-03-22 19:28:35 -05:00
}
2011-03-22 15:59:17 -05:00
2011-03-22 19:28:35 -05:00
// TODO: optionally perform "relaxations" on end_tag to more efficiently
// encode sizes; this is a fixed point iteration