209 lines
6.6 KiB
Rust
Raw Normal View History

// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::io::{Cursor, Write};
use std::slice;
use std::u32;
use syntax::ast::NodeId;
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
pub struct IndexEntry {
pub node: NodeId,
pub pos: u64
}
#[derive(Debug)]
pub struct IndexArrayEntry {
bits: u32,
first_pos: u32
}
impl IndexArrayEntry {
fn encode_to<W: Write>(&self, b: &mut W) {
write_be_u32(b, self.bits);
write_be_u32(b, self.first_pos);
}
fn decode_from(b: &[u32]) -> Self {
IndexArrayEntry {
bits: b[0].to_be(),
first_pos: b[1].to_be()
}
}
}
/// The Item Index
///
/// This index maps the NodeId of each item to its location in the
/// metadata.
///
/// The index is a sparse bit-vector consisting of a index-array
/// and a position-array. Each entry in the index-array handles 32 nodes.
/// The first word is a bit-array consisting of the nodes that hold items,
/// the second is the index of the first of the items in the position-array.
/// If there is a large set of non-item trailing nodes, they can be omitted
/// from the index-array.
///
/// The index is serialized as an array of big-endian 32-bit words.
/// The first word is the number of items in the position-array.
/// Then, for each item, its position in the metadata follows.
/// After that the index-array is stored.
///
/// struct index {
/// u32 item_count;
/// u32 items[self.item_count];
/// struct { u32 bits; u32 offset; } positions[..];
/// }
pub struct Index {
position_start: usize,
index_start: usize,
index_end: usize,
}
pub fn write_index(mut entries: Vec<IndexEntry>, buf: &mut Cursor<Vec<u8>>) {
assert!(entries.len() < u32::MAX as usize);
entries.sort();
let mut last_entry = IndexArrayEntry { bits: 0, first_pos: 0 };
write_be_u32(buf, entries.len() as u32);
for &IndexEntry { pos, .. } in &entries {
assert!(pos < u32::MAX as u64);
write_be_u32(buf, pos as u32);
}
let mut pos_in_index_array = 0;
for (i, &IndexEntry { node, .. }) in entries.iter().enumerate() {
let (x, s) = (node / 32 as u32, node % 32 as u32);
while x > pos_in_index_array {
pos_in_index_array += 1;
last_entry.encode_to(buf);
last_entry = IndexArrayEntry { bits: 0, first_pos: i as u32 };
}
last_entry.bits |= 1<<s;
}
last_entry.encode_to(buf);
info!("write_index: {} items, {} array entries",
entries.len(), pos_in_index_array);
}
impl Index {
fn lookup_index(&self, index: &[u32], i: u32) -> Option<IndexArrayEntry> {
let ix = (i as usize)*2;
if ix >= index.len() {
None
} else {
Some(IndexArrayEntry::decode_from(&index[ix..ix+2]))
}
}
fn item_from_pos(&self, positions: &[u32], pos: u32) -> u32 {
positions[pos as usize].to_be()
}
#[inline(never)]
pub fn lookup_item(&self, buf: &[u8], node: NodeId) -> Option<u32> {
let index = bytes_to_words(&buf[self.index_start..self.index_end]);
let positions = bytes_to_words(&buf[self.position_start..self.index_start]);
let (x, s) = (node / 32 as u32, node % 32 as u32);
let result = match self.lookup_index(index, x) {
Some(IndexArrayEntry { bits, first_pos }) => {
let bit = 1<<s;
if bits & bit == 0 {
None
} else {
let prev_nodes_for_entry = (bits&(bit-1)).count_ones();
Some(self.item_from_pos(
positions,
first_pos+prev_nodes_for_entry))
}
}
None => None // trailing zero
};
debug!("lookup_item({:?}) = {:?}", node, result);
result
}
pub fn from_buf(buf: &[u8], start: usize, end: usize) -> Self {
let buf = bytes_to_words(&buf[start..end]);
let position_count = buf[0].to_be() as usize;
let position_len = position_count*4;
info!("loaded index - position: {}-{}-{}", start, start+position_len, end);
debug!("index contents are {:?}",
buf.iter().map(|b| format!("{:08x}", b)).collect::<Vec<_>>().concat());
assert!(end-4-start >= position_len);
assert_eq!((end-4-start-position_len)%8, 0);
Index {
position_start: start+4,
index_start: start+position_len+4,
index_end: end
}
}
}
fn write_be_u32<W: Write>(w: &mut W, u: u32) {
let _ = w.write_all(&[
(u >> 24) as u8,
(u >> 16) as u8,
(u >> 8) as u8,
(u >> 0) as u8,
]);
}
fn bytes_to_words(b: &[u8]) -> &[u32] {
assert!(b.len() % 4 == 0);
unsafe { slice::from_raw_parts(b.as_ptr() as *const u32, b.len()/4) }
}
#[test]
fn test_index() {
let entries = vec![
IndexEntry { node: 0, pos: 17 },
IndexEntry { node: 31, pos: 29 },
IndexEntry { node: 32, pos: 1175 },
IndexEntry { node: 191, pos: 21 },
IndexEntry { node: 128, pos: 34 },
IndexEntry { node: 145, pos: 70 },
IndexEntry { node: 305, pos: 93214 },
IndexEntry { node: 138, pos: 64 },
IndexEntry { node: 129, pos: 53 },
IndexEntry { node: 192, pos: 33334 },
IndexEntry { node: 200, pos: 80123 },
];
let mut c = Cursor::new(vec![]);
write_index(entries.clone(), &mut c);
let mut buf = c.into_inner();
let expected: &[u8] = &[
0, 0, 0, 11, // # entries
// values:
0,0,0,17, 0,0,0,29, 0,0,4,151, 0,0,0,34,
0,0,0,53, 0,0,0,64, 0,0,0,70, 0,0,0,21,
0,0,130,54, 0,1,56,251, 0,1,108,30,
// index:
128,0,0,1,0,0,0,0, 0,0,0,1,0,0,0,2,
0,0,0,0,0,0,0,3, 0,0,0,0,0,0,0,3,
0,2,4,3,0,0,0,3, 128,0,0,0,0,0,0,7,
0,0,1,1,0,0,0,8, 0,0,0,0,0,0,0,10,
0,0,0,0,0,0,0,10, 0,2,0,0,0,0,0,10
];
assert_eq!(buf, expected);
// insert some junk padding
for i in 0..17 { buf.insert(0, i); buf.push(i) }
let index = Index::from_buf(&buf, 17, buf.len()-17);
// test round-trip
for i in 0..4096 {
assert_eq!(index.lookup_item(&buf, i),
entries.iter().find(|e| e.node == i).map(|n| n.pos as u32));
}
}