rust/src/librustc/metadata/index.rs

// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::io::{Cursor, Write};
use std::slice;
use std::u32;
use syntax::ast::NodeId;

#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
pub struct IndexEntry {
    pub node: NodeId,
    pub pos: u64
}

#[derive(Debug)]
pub struct IndexArrayEntry {
    bits: u32,
    first_pos: u32
}

impl IndexArrayEntry {
    fn encode_to<W: Write>(&self, b: &mut W) {
        write_be_u32(b, self.bits);
        write_be_u32(b, self.first_pos);
    }

    fn decode_from(b: &[u32]) -> Self {
        IndexArrayEntry {
            bits: b[0].to_be(),
            first_pos: b[1].to_be()
        }
    }
}

/// The Item Index
///
/// This index maps the NodeId of each item to its location in the
/// metadata.
///
/// The index is a sparse bit-vector consisting of a index-array
/// and a position-array. Each entry in the index-array handles 32 nodes.
/// The first word is a bit-array consisting of the nodes that hold items,
/// the second is the index of the first of the items in the position-array.
/// If there is a large set of non-item trailing nodes, they can be omitted
/// from the index-array.
///
/// The index is serialized as an array of big-endian 32-bit words.
/// The first word is the number of items in the position-array.
/// Then, for each item, its position in the metadata follows.
/// After that the index-array is stored.
///
/// struct index {
///     u32 item_count;
///     u32 items[self.item_count];
///     struct { u32 bits; u32 offset; } positions[..];
/// }
pub struct Index {
    position_start: usize,
    index_start: usize,
    index_end: usize,
}

pub fn write_index(mut entries: Vec<IndexEntry>, buf: &mut Cursor<Vec<u8>>) {
    assert!(entries.len() < u32::MAX as usize);
    entries.sort();

    let mut last_entry = IndexArrayEntry { bits: 0, first_pos: 0 };

    write_be_u32(buf, entries.len() as u32);
    for &IndexEntry { pos, .. } in &entries {
        assert!(pos < u32::MAX as u64);
        write_be_u32(buf, pos as u32);
    }

    let mut pos_in_index_array = 0;
    for (i, &IndexEntry { node, .. }) in entries.iter().enumerate() {
        let (x, s) = (node / 32 as u32, node % 32 as u32);
        while x > pos_in_index_array {
            pos_in_index_array += 1;
            last_entry.encode_to(buf);
            last_entry = IndexArrayEntry { bits: 0, first_pos: i as u32 };
        }
        last_entry.bits |= 1<<s;
    }
    last_entry.encode_to(buf);

    info!("write_index: {} items, {} array entries",
          entries.len(), pos_in_index_array);
}

impl Index {
    fn lookup_index(&self, index: &[u32], i: u32) -> Option<IndexArrayEntry> {
        let ix = (i as usize)*2;
        if ix >= index.len() {
            None
        } else {
            Some(IndexArrayEntry::decode_from(&index[ix..ix+2]))
        }
    }

    fn item_from_pos(&self, positions: &[u32], pos: u32) -> u32 {
        positions[pos as usize].to_be()
    }

    #[inline(never)]
    pub fn lookup_item(&self, buf: &[u8], node: NodeId) -> Option<u32> {
        let index = bytes_to_words(&buf[self.index_start..self.index_end]);
        let positions = bytes_to_words(&buf[self.position_start..self.index_start]);
        let (x, s) = (node / 32 as u32, node % 32 as u32);
        let result = match self.lookup_index(index, x) {
            Some(IndexArrayEntry { bits, first_pos }) => {
                let bit = 1<<s;
                if bits & bit == 0 {
                    None
                } else {
                    let prev_nodes_for_entry = (bits&(bit-1)).count_ones();
                    Some(self.item_from_pos(
                        positions,
                        first_pos+prev_nodes_for_entry))
                }
            }
            None => None // trailing zero
        };
        debug!("lookup_item({:?}) = {:?}", node, result);
        result
    }

    pub fn from_buf(buf: &[u8], start: usize, end: usize) -> Self {
        let buf = bytes_to_words(&buf[start..end]);
        let position_count = buf[0].to_be() as usize;
        let position_len = position_count*4;
        info!("loaded index - position: {}-{}-{}", start, start+position_len, end);
        debug!("index contents are {:?}",
               buf.iter().map(|b| format!("{:08x}", b)).collect::<Vec<_>>().concat());
        assert!(end-4-start >= position_len);
        assert_eq!((end-4-start-position_len)%8, 0);
        Index {
            position_start: start+4,
            index_start: start+position_len+4,
            index_end: end
        }
    }
}

fn write_be_u32<W: Write>(w: &mut W, u: u32) {
    let _ = w.write_all(&[
        (u >> 24) as u8,
        (u >> 16) as u8,
        (u >>  8) as u8,
        (u >>  0) as u8,
    ]);
}

fn bytes_to_words(b: &[u8]) -> &[u32] {
    assert!(b.len() % 4 == 0);
    unsafe { slice::from_raw_parts(b.as_ptr() as *const u32, b.len()/4) }
}

#[test]
fn test_index() {
    let entries = vec![
        IndexEntry { node: 0, pos: 17 },
        IndexEntry { node: 31, pos: 29 },
        IndexEntry { node: 32, pos: 1175 },
        IndexEntry { node: 191, pos: 21 },
        IndexEntry { node: 128, pos: 34 },
        IndexEntry { node: 145, pos: 70 },
        IndexEntry { node: 305, pos: 93214 },
        IndexEntry { node: 138, pos: 64 },
        IndexEntry { node: 129, pos: 53 },
        IndexEntry { node: 192, pos: 33334 },
        IndexEntry { node: 200, pos: 80123 },
    ];
    let mut c = Cursor::new(vec![]);
    write_index(entries.clone(), &mut c);
    let mut buf = c.into_inner();
    let expected: &[u8] = &[
        0, 0, 0, 11, // # entries
        // values:
        0,0,0,17, 0,0,0,29, 0,0,4,151, 0,0,0,34,
        0,0,0,53, 0,0,0,64, 0,0,0,70, 0,0,0,21,
        0,0,130,54, 0,1,56,251, 0,1,108,30,
        // index:
        128,0,0,1,0,0,0,0, 0,0,0,1,0,0,0,2,
        0,0,0,0,0,0,0,3,   0,0,0,0,0,0,0,3,
        0,2,4,3,0,0,0,3,   128,0,0,0,0,0,0,7,
        0,0,1,1,0,0,0,8,   0,0,0,0,0,0,0,10,
        0,0,0,0,0,0,0,10,  0,2,0,0,0,0,0,10
    ];
    assert_eq!(buf, expected);

    // insert some junk padding
    for i in 0..17 { buf.insert(0, i); buf.push(i) }
    let index = Index::from_buf(&buf, 17, buf.len()-17);

    // test round-trip
    for i in 0..4096 {
        assert_eq!(index.lookup_item(&buf, i),
                   entries.iter().find(|e| e.node == i).map(|n| n.pos as u32));
    }
}
rewrite metadata indexing this improves the compilation time for small crates by ~20% 2015-09-03 01:22:31 +03:00			`// Copyright 2015 The Rust Project Developers. See the COPYRIGHT`
			`// file at the top-level directory of this distribution and at`
			`// http://rust-lang.org/COPYRIGHT.`
			`//`
			`// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or`
			`// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license`
			`// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your`
			`// option. This file may not be copied, modified, or distributed`
			`// except according to those terms.`

			`use std::io::{Cursor, Write};`
			`use std::slice;`
			`use std::u32;`
			`use syntax::ast::NodeId;`

			`#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]`
			`pub struct IndexEntry {`
			`pub node: NodeId,`
			`pub pos: u64`
			`}`

			`#[derive(Debug)]`
			`pub struct IndexArrayEntry {`
			`bits: u32,`
			`first_pos: u32`
			`}`

			`impl IndexArrayEntry {`
			`fn encode_to<W: Write>(&self, b: &mut W) {`
			`write_be_u32(b, self.bits);`
			`write_be_u32(b, self.first_pos);`
			`}`

			`fn decode_from(b: &[u32]) -> Self {`
			`IndexArrayEntry {`
			`bits: b[0].to_be(),`
			`first_pos: b[1].to_be()`
			`}`
			`}`
			`}`

			`/// The Item Index`
			`///`
			`/// This index maps the NodeId of each item to its location in the`
			`/// metadata.`
			`///`
			`/// The index is a sparse bit-vector consisting of a index-array`
			`/// and a position-array. Each entry in the index-array handles 32 nodes.`
			`/// The first word is a bit-array consisting of the nodes that hold items,`
			`/// the second is the index of the first of the items in the position-array.`
			`/// If there is a large set of non-item trailing nodes, they can be omitted`
			`/// from the index-array.`
			`///`
			`/// The index is serialized as an array of big-endian 32-bit words.`
			`/// The first word is the number of items in the position-array.`
			`/// Then, for each item, its position in the metadata follows.`
			`/// After that the index-array is stored.`
			`///`
			`/// struct index {`
			`/// u32 item_count;`
			`/// u32 items[self.item_count];`
			`/// struct { u32 bits; u32 offset; } positions[..];`
			`/// }`
			`pub struct Index {`
			`position_start: usize,`
			`index_start: usize,`
			`index_end: usize,`
			`}`

			`pub fn write_index(mut entries: Vec<IndexEntry>, buf: &mut Cursor<Vec<u8>>) {`
			`assert!(entries.len() < u32::MAX as usize);`
			`entries.sort();`

			`let mut last_entry = IndexArrayEntry { bits: 0, first_pos: 0 };`

			`write_be_u32(buf, entries.len() as u32);`
			`for &IndexEntry { pos, .. } in &entries {`
			`assert!(pos < u32::MAX as u64);`
			`write_be_u32(buf, pos as u32);`
			`}`

			`let mut pos_in_index_array = 0;`
			`for (i, &IndexEntry { node, .. }) in entries.iter().enumerate() {`
			`let (x, s) = (node / 32 as u32, node % 32 as u32);`
			`while x > pos_in_index_array {`
			`pos_in_index_array += 1;`
			`last_entry.encode_to(buf);`
			`last_entry = IndexArrayEntry { bits: 0, first_pos: i as u32 };`
			`}`
			`last_entry.bits \|= 1<<s;`
			`}`
			`last_entry.encode_to(buf);`

			`info!("write_index: {} items, {} array entries",`
			`entries.len(), pos_in_index_array);`
			`}`

			`impl Index {`
			`fn lookup_index(&self, index: &[u32], i: u32) -> Option<IndexArrayEntry> {`
			`let ix = (i as usize)*2;`
			`if ix >= index.len() {`
			`None`
			`} else {`
			`Some(IndexArrayEntry::decode_from(&index[ix..ix+2]))`
			`}`
			`}`

			`fn item_from_pos(&self, positions: &[u32], pos: u32) -> u32 {`
			`positions[pos as usize].to_be()`
			`}`

			`#[inline(never)]`
			`pub fn lookup_item(&self, buf: &[u8], node: NodeId) -> Option<u32> {`
			`let index = bytes_to_words(&buf[self.index_start..self.index_end]);`
			`let positions = bytes_to_words(&buf[self.position_start..self.index_start]);`
			`let (x, s) = (node / 32 as u32, node % 32 as u32);`
			`let result = match self.lookup_index(index, x) {`
			`Some(IndexArrayEntry { bits, first_pos }) => {`
			`let bit = 1<<s;`
			`if bits & bit == 0 {`
			`None`
			`} else {`
			`let prev_nodes_for_entry = (bits&(bit-1)).count_ones();`
			`Some(self.item_from_pos(`
			`positions,`
			`first_pos+prev_nodes_for_entry))`
			`}`
			`}`
			`None => None // trailing zero`
			`};`
			`debug!("lookup_item({:?}) = {:?}", node, result);`
			`result`
			`}`

			`pub fn from_buf(buf: &[u8], start: usize, end: usize) -> Self {`
			`let buf = bytes_to_words(&buf[start..end]);`
			`let position_count = buf[0].to_be() as usize;`
			`let position_len = position_count*4;`
			`info!("loaded index - position: {}-{}-{}", start, start+position_len, end);`
			`debug!("index contents are {:?}",`
			`buf.iter().map(\|b\| format!("{:08x}", b)).collect::<Vec<_>>().concat());`
			`assert!(end-4-start >= position_len);`
			`assert_eq!((end-4-start-position_len)%8, 0);`
			`Index {`
			`position_start: start+4,`
			`index_start: start+position_len+4,`
			`index_end: end`
			`}`
			`}`
			`}`

			`fn write_be_u32<W: Write>(w: &mut W, u: u32) {`
			`let _ = w.write_all(&[`
			`(u >> 24) as u8,`
			`(u >> 16) as u8,`
			`(u >> 8) as u8,`
			`(u >> 0) as u8,`
			`]);`
			`}`

			`fn bytes_to_words(b: &[u8]) -> &[u32] {`
			`assert!(b.len() % 4 == 0);`
			`unsafe { slice::from_raw_parts(b.as_ptr() as *const u32, b.len()/4) }`
			`}`

			`#[test]`
			`fn test_index() {`
			`let entries = vec![`
			`IndexEntry { node: 0, pos: 17 },`
			`IndexEntry { node: 31, pos: 29 },`
			`IndexEntry { node: 32, pos: 1175 },`
			`IndexEntry { node: 191, pos: 21 },`
			`IndexEntry { node: 128, pos: 34 },`
			`IndexEntry { node: 145, pos: 70 },`
			`IndexEntry { node: 305, pos: 93214 },`
			`IndexEntry { node: 138, pos: 64 },`
			`IndexEntry { node: 129, pos: 53 },`
			`IndexEntry { node: 192, pos: 33334 },`
			`IndexEntry { node: 200, pos: 80123 },`
			`];`
			`let mut c = Cursor::new(vec![]);`
			`write_index(entries.clone(), &mut c);`
			`let mut buf = c.into_inner();`
			`let expected: &[u8] = &[`
			`0, 0, 0, 11, // # entries`
			`// values:`
			`0,0,0,17, 0,0,0,29, 0,0,4,151, 0,0,0,34,`
			`0,0,0,53, 0,0,0,64, 0,0,0,70, 0,0,0,21,`
			`0,0,130,54, 0,1,56,251, 0,1,108,30,`
			`// index:`
			`128,0,0,1,0,0,0,0, 0,0,0,1,0,0,0,2,`
			`0,0,0,0,0,0,0,3, 0,0,0,0,0,0,0,3,`
			`0,2,4,3,0,0,0,3, 128,0,0,0,0,0,0,7,`
			`0,0,1,1,0,0,0,8, 0,0,0,0,0,0,0,10,`
			`0,0,0,0,0,0,0,10, 0,2,0,0,0,0,0,10`
			`];`
			`assert_eq!(buf, expected);`

			`// insert some junk padding`
			`for i in 0..17 { buf.insert(0, i); buf.push(i) }`
			`let index = Index::from_buf(&buf, 17, buf.len()-17);`

			`// test round-trip`
			`for i in 0..4096 {`
			`assert_eq!(index.lookup_item(&buf, i),`
			`entries.iter().find(\|e\| e.node == i).map(\|n\| n.pos as u32));`
			`}`
			`}`