2011-05-12 10:24:54 -05:00
|
|
|
// Simple Extensible Binary Markup Language (ebml) reader and writer on a
|
2011-03-22 15:59:17 -05:00
|
|
|
// cursor model. See the specification here:
|
|
|
|
// http://www.matroska.org/technical/specs/rfc/index.html
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
import option::none;
|
|
|
|
import option::some;
|
2011-03-22 15:59:17 -05:00
|
|
|
|
|
|
|
type ebml_tag = rec(uint id, uint size);
|
2011-03-25 19:53:46 -05:00
|
|
|
type ebml_state = rec(ebml_tag ebml_tag, uint tag_pos, uint data_pos);
|
2011-03-22 15:59:17 -05:00
|
|
|
|
|
|
|
// TODO: When we have module renaming, make "reader" and "writer" separate
|
|
|
|
// modules within this file.
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
// ebml reading
|
2011-03-22 15:59:17 -05:00
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
type doc = rec(vec[u8] data,
|
|
|
|
uint start,
|
|
|
|
uint end);
|
|
|
|
|
|
|
|
fn vint_at(vec[u8] data, uint start) -> tup(uint, uint) {
|
|
|
|
auto a = data.(start);
|
|
|
|
if (a & 0x80u8 != 0u8) { ret tup((a & 0x7fu8) as uint, start + 1u); }
|
2011-03-22 15:59:17 -05:00
|
|
|
if (a & 0x40u8 != 0u8) {
|
2011-04-06 19:53:25 -05:00
|
|
|
ret tup((((a & 0x3fu8) as uint) << 8u) | (data.(start + 1u) as uint),
|
|
|
|
start + 2u);
|
|
|
|
} else if (a & 0x20u8 != 0u8) {
|
|
|
|
ret tup((((a & 0x1fu8) as uint) << 16u) |
|
|
|
|
((data.(start + 1u) as uint) << 8u) |
|
|
|
|
(data.(start + 2u) as uint), start + 3u);
|
|
|
|
} else if (a & 0x10u8 != 0u8) {
|
|
|
|
ret tup((((a & 0x0fu8) as uint) << 24u) |
|
|
|
|
((data.(start + 1u) as uint) << 16u) |
|
|
|
|
((data.(start + 2u) as uint) << 8u) |
|
|
|
|
(data.(start + 3u) as uint), start + 4u);
|
|
|
|
} else {
|
2011-04-19 05:02:06 -05:00
|
|
|
log_err "vint too big"; fail;
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
fn new_doc(vec[u8] data) -> doc {
|
2011-05-17 13:41:41 -05:00
|
|
|
ret rec(data=data, start=0u, end=vec::len[u8](data));
|
2011-04-06 19:53:25 -05:00
|
|
|
}
|
2011-03-22 15:59:17 -05:00
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
fn doc_at(vec[u8] data, uint start) -> doc {
|
|
|
|
auto elt_tag = vint_at(data, start);
|
|
|
|
auto elt_size = vint_at(data, elt_tag._1);
|
|
|
|
auto end = elt_size._1 + elt_size._0;
|
|
|
|
ret rec(data=data, start=elt_size._1, end=end);
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
fn maybe_get_doc(doc d, uint tg) -> option::t[doc] {
|
2011-04-06 19:53:25 -05:00
|
|
|
auto pos = d.start;
|
|
|
|
while (pos < d.end) {
|
|
|
|
auto elt_tag = vint_at(d.data, pos);
|
|
|
|
auto elt_size = vint_at(d.data, elt_tag._1);
|
|
|
|
pos = elt_size._1 + elt_size._0;
|
|
|
|
if (elt_tag._0 == tg) {
|
|
|
|
ret some[doc](rec(data=d.data, start=elt_size._1, end=pos));
|
2011-03-25 19:53:46 -05:00
|
|
|
}
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
2011-04-06 19:53:25 -05:00
|
|
|
ret none[doc];
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
fn get_doc(doc d, uint tg) -> doc {
|
|
|
|
alt (maybe_get_doc(d, tg)) {
|
2011-05-30 23:13:37 -05:00
|
|
|
case (some(?d)) {ret d;}
|
|
|
|
case (none) {
|
2011-05-17 13:41:41 -05:00
|
|
|
log_err "failed to find block with tag " + uint::to_str(tg, 10u);
|
2011-04-06 19:53:25 -05:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
}
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
iter docs(doc d) -> tup(uint, doc) {
|
|
|
|
auto pos = d.start;
|
|
|
|
while (pos < d.end) {
|
|
|
|
auto elt_tag = vint_at(d.data, pos);
|
|
|
|
auto elt_size = vint_at(d.data, elt_tag._1);
|
|
|
|
pos = elt_size._1 + elt_size._0;
|
|
|
|
put tup(elt_tag._0, rec(data=d.data, start=elt_size._1, end=pos));
|
2011-03-31 16:37:09 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
iter tagged_docs(doc d, uint tg) -> doc {
|
|
|
|
auto pos = d.start;
|
|
|
|
while (pos < d.end) {
|
|
|
|
auto elt_tag = vint_at(d.data, pos);
|
|
|
|
auto elt_size = vint_at(d.data, elt_tag._1);
|
|
|
|
pos = elt_size._1 + elt_size._0;
|
|
|
|
if (elt_tag._0 == tg) {
|
|
|
|
put rec(data=d.data, start=elt_size._1, end=pos);
|
|
|
|
}
|
|
|
|
}
|
2011-03-31 16:37:09 -05:00
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
fn doc_data(doc d) -> vec[u8] {
|
2011-05-17 13:41:41 -05:00
|
|
|
ret vec::slice[u8](d.data, d.start, d.end);
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
fn be_uint_from_bytes(vec[u8] data, uint start, uint size) -> uint {
|
|
|
|
auto sz = size;
|
2011-05-02 19:47:24 -05:00
|
|
|
assert (sz <= 4u);
|
2011-04-06 19:53:25 -05:00
|
|
|
auto val = 0u;
|
|
|
|
auto pos = start;
|
|
|
|
while (sz > 0u) {
|
|
|
|
sz -= 1u;
|
|
|
|
val += (data.(pos) as uint) << (sz * 8u);
|
|
|
|
pos += 1u;
|
|
|
|
}
|
|
|
|
ret val;
|
2011-03-31 16:37:09 -05:00
|
|
|
}
|
|
|
|
|
2011-04-06 19:53:25 -05:00
|
|
|
fn doc_as_uint(doc d) -> uint {
|
|
|
|
ret be_uint_from_bytes(d.data, d.start, d.end - d.start);
|
2011-03-22 15:59:17 -05:00
|
|
|
}
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
// ebml writing
|
2011-03-22 15:59:17 -05:00
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
type writer = rec(io::buf_writer writer, mutable vec[uint] size_positions);
|
2011-03-22 19:28:35 -05:00
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
fn write_sized_vint(&io::buf_writer w, uint n, uint size) {
|
2011-03-22 19:28:35 -05:00
|
|
|
let vec[u8] buf;
|
|
|
|
alt (size) {
|
|
|
|
case (1u) {
|
2011-05-16 20:21:22 -05:00
|
|
|
buf = [0x80u8 | (n as u8)];
|
2011-03-22 19:28:35 -05:00
|
|
|
}
|
|
|
|
case (2u) {
|
2011-05-16 20:21:22 -05:00
|
|
|
buf = [0x40u8 | ((n >> 8u) as u8),
|
|
|
|
(n & 0xffu) as u8];
|
2011-03-22 19:28:35 -05:00
|
|
|
}
|
|
|
|
case (3u) {
|
2011-05-16 20:21:22 -05:00
|
|
|
buf = [0x20u8 | ((n >> 16u) as u8),
|
2011-03-22 19:28:35 -05:00
|
|
|
((n >> 8u) & 0xffu) as u8,
|
2011-05-16 20:21:22 -05:00
|
|
|
(n & 0xffu) as u8];
|
2011-03-22 19:28:35 -05:00
|
|
|
}
|
|
|
|
case (4u) {
|
2011-05-16 20:21:22 -05:00
|
|
|
buf = [0x10u8 | ((n >> 24u) as u8),
|
2011-03-22 19:28:35 -05:00
|
|
|
((n >> 16u) & 0xffu) as u8,
|
|
|
|
((n >> 8u) & 0xffu) as u8,
|
2011-05-16 20:21:22 -05:00
|
|
|
(n & 0xffu) as u8];
|
2011-03-22 19:28:35 -05:00
|
|
|
}
|
|
|
|
case (_) {
|
2011-04-19 05:02:06 -05:00
|
|
|
log_err "vint to write too big";
|
2011-03-22 19:28:35 -05:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
w.write(buf);
|
|
|
|
}
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
fn write_vint(&io::buf_writer w, uint n) {
|
2011-03-22 19:28:35 -05:00
|
|
|
if (n < 0x7fu) { write_sized_vint(w, n, 1u); ret; }
|
|
|
|
if (n < 0x4000u) { write_sized_vint(w, n, 2u); ret; }
|
|
|
|
if (n < 0x200000u) { write_sized_vint(w, n, 3u); ret; }
|
|
|
|
if (n < 0x10000000u) { write_sized_vint(w, n, 4u); ret; }
|
2011-04-19 05:02:06 -05:00
|
|
|
log_err "vint to write too big";
|
2011-03-22 19:28:35 -05:00
|
|
|
fail;
|
|
|
|
}
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
fn create_writer(&io::buf_writer w) -> writer {
|
2011-05-16 20:21:22 -05:00
|
|
|
let vec[uint] size_positions = [];
|
2011-03-22 19:28:35 -05:00
|
|
|
ret rec(writer=w, mutable size_positions=size_positions);
|
|
|
|
}
|
|
|
|
|
2011-05-12 10:24:54 -05:00
|
|
|
// TODO: Provide a function to write the standard ebml header.
|
2011-03-22 19:28:35 -05:00
|
|
|
|
|
|
|
fn start_tag(&writer w, uint tag_id) {
|
2011-05-12 10:24:54 -05:00
|
|
|
// Write the tag ID:
|
2011-03-22 19:28:35 -05:00
|
|
|
write_vint(w.writer, tag_id);
|
|
|
|
|
|
|
|
// Write a placeholder four-byte size.
|
2011-05-16 20:21:22 -05:00
|
|
|
w.size_positions += [w.writer.tell()];
|
|
|
|
let vec[u8] zeroes = [0u8, 0u8, 0u8, 0u8];
|
2011-03-22 19:28:35 -05:00
|
|
|
w.writer.write(zeroes);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn end_tag(&writer w) {
|
2011-05-17 13:41:41 -05:00
|
|
|
auto last_size_pos = vec::pop[uint](w.size_positions);
|
2011-03-22 19:28:35 -05:00
|
|
|
auto cur_pos = w.writer.tell();
|
2011-05-12 10:24:54 -05:00
|
|
|
w.writer.seek(last_size_pos as int, io::seek_set);
|
2011-03-22 19:28:35 -05:00
|
|
|
write_sized_vint(w.writer, cur_pos - last_size_pos - 4u, 4u);
|
2011-05-12 10:24:54 -05:00
|
|
|
w.writer.seek(cur_pos as int, io::seek_set);
|
2011-03-22 19:28:35 -05:00
|
|
|
}
|
2011-03-22 15:59:17 -05:00
|
|
|
|
2011-03-22 19:28:35 -05:00
|
|
|
// TODO: optionally perform "relaxations" on end_tag to more efficiently
|
|
|
|
// encode sizes; this is a fixed point iteration
|