2012-12-03 18:48:01 -06:00
|
|
|
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2011-07-05 04:48:19 -05:00
|
|
|
// An "interner" is a data structure that associates values with uint tags and
|
|
|
|
// allows bidirectional lookup; i.e. given a value, one can easily find the
|
|
|
|
// type, and vice versa.
|
2013-01-08 21:37:25 -06:00
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
use ast::Name;
|
|
|
|
|
2014-01-08 12:35:15 -06:00
|
|
|
use std::cast;
|
2013-12-27 18:20:10 -06:00
|
|
|
use std::cell::RefCell;
|
2013-06-24 19:40:33 -05:00
|
|
|
use std::cmp::Equiv;
|
|
|
|
use std::hashmap::HashMap;
|
2011-07-05 04:48:19 -05:00
|
|
|
|
2013-02-09 15:22:21 -06:00
|
|
|
pub struct Interner<T> {
|
2013-12-27 18:20:10 -06:00
|
|
|
priv map: @RefCell<HashMap<T, Name>>,
|
2013-12-30 18:33:52 -06:00
|
|
|
priv vect: @RefCell<~[T]>,
|
2012-08-02 17:34:13 -05:00
|
|
|
}
|
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
// when traits can extend traits, we should extend index<Name,T> to get []
|
2013-07-18 19:12:46 -05:00
|
|
|
impl<T:Eq + IterBytes + Hash + Freeze + Clone + 'static> Interner<T> {
|
2013-05-31 17:17:22 -05:00
|
|
|
pub fn new() -> Interner<T> {
|
2013-02-09 15:22:21 -06:00
|
|
|
Interner {
|
2013-12-27 18:20:10 -06:00
|
|
|
map: @RefCell::new(HashMap::new()),
|
2013-12-30 18:33:52 -06:00
|
|
|
vect: @RefCell::new(~[]),
|
2013-02-09 15:22:21 -06:00
|
|
|
}
|
|
|
|
}
|
2012-08-02 17:34:13 -05:00
|
|
|
|
2013-05-31 17:17:22 -05:00
|
|
|
pub fn prefill(init: &[T]) -> Interner<T> {
|
2013-02-09 15:22:21 -06:00
|
|
|
let rv = Interner::new();
|
2013-08-03 11:45:23 -05:00
|
|
|
for v in init.iter() {
|
2013-07-02 14:47:32 -05:00
|
|
|
rv.intern((*v).clone());
|
|
|
|
}
|
2013-02-09 15:22:21 -06:00
|
|
|
rv
|
|
|
|
}
|
2011-08-04 12:46:10 -05:00
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
pub fn intern(&self, val: T) -> Name {
|
2013-12-27 18:20:10 -06:00
|
|
|
let mut map = self.map.borrow_mut();
|
|
|
|
match map.get().find(&val) {
|
2013-02-09 15:22:21 -06:00
|
|
|
Some(&idx) => return idx,
|
|
|
|
None => (),
|
2012-07-17 13:22:11 -05:00
|
|
|
}
|
2013-02-09 15:22:21 -06:00
|
|
|
|
2013-12-30 18:33:52 -06:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
|
|
|
let new_idx = vect.get().len() as Name;
|
2013-12-27 18:20:10 -06:00
|
|
|
map.get().insert(val.clone(), new_idx);
|
2013-12-30 18:33:52 -06:00
|
|
|
vect.get().push(val);
|
2013-02-09 15:22:21 -06:00
|
|
|
new_idx
|
2011-09-24 18:33:26 -05:00
|
|
|
}
|
2013-02-09 15:22:21 -06:00
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
pub fn gensym(&self, val: T) -> Name {
|
2013-12-30 18:33:52 -06:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
|
|
|
let new_idx = vect.get().len() as Name;
|
2012-07-18 18:18:02 -05:00
|
|
|
// leave out of .map to avoid colliding
|
2013-12-30 18:33:52 -06:00
|
|
|
vect.get().push(val);
|
2013-02-09 15:22:21 -06:00
|
|
|
new_idx
|
2012-07-18 18:18:02 -05:00
|
|
|
}
|
2011-08-04 12:46:10 -05:00
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
pub fn get(&self, idx: Name) -> T {
|
2013-12-30 18:33:52 -06:00
|
|
|
let vect = self.vect.borrow();
|
|
|
|
vect.get()[idx].clone()
|
2013-07-02 14:47:32 -05:00
|
|
|
}
|
2012-07-17 19:05:38 -05:00
|
|
|
|
2013-12-30 18:33:52 -06:00
|
|
|
pub fn len(&self) -> uint {
|
|
|
|
let vect = self.vect.borrow();
|
|
|
|
vect.get().len()
|
|
|
|
}
|
2013-04-02 18:20:02 -05:00
|
|
|
|
2013-05-31 17:17:22 -05:00
|
|
|
pub fn find_equiv<Q:Hash + IterBytes + Equiv<T>>(&self, val: &Q)
|
2013-11-26 23:02:25 -06:00
|
|
|
-> Option<Name> {
|
2013-12-27 18:20:10 -06:00
|
|
|
let map = self.map.borrow();
|
|
|
|
match map.get().find_equiv(val) {
|
2013-04-02 18:20:02 -05:00
|
|
|
Some(v) => Some(*v),
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
2012-09-19 11:41:06 -05:00
|
|
|
}
|
2013-01-23 19:29:08 -06:00
|
|
|
|
2013-05-07 14:34:52 -05:00
|
|
|
// A StrInterner differs from Interner<String> in that it accepts
|
2014-01-07 20:49:13 -06:00
|
|
|
// references rather than @ ones, resulting in less allocation.
|
2013-05-02 03:16:07 -05:00
|
|
|
pub struct StrInterner {
|
2013-12-27 18:20:10 -06:00
|
|
|
priv map: @RefCell<HashMap<@str, Name>>,
|
2013-12-30 18:33:52 -06:00
|
|
|
priv vect: @RefCell<~[@str]>,
|
2013-05-02 03:16:07 -05:00
|
|
|
}
|
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
// when traits can extend traits, we should extend index<Name,T> to get []
|
2013-05-31 17:17:22 -05:00
|
|
|
impl StrInterner {
|
|
|
|
pub fn new() -> StrInterner {
|
2013-05-02 03:16:07 -05:00
|
|
|
StrInterner {
|
2013-12-27 18:20:10 -06:00
|
|
|
map: @RefCell::new(HashMap::new()),
|
2013-12-30 18:33:52 -06:00
|
|
|
vect: @RefCell::new(~[]),
|
2013-05-02 03:16:07 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-31 17:17:22 -05:00
|
|
|
pub fn prefill(init: &[&str]) -> StrInterner {
|
2013-05-02 03:16:07 -05:00
|
|
|
let rv = StrInterner::new();
|
2013-08-03 11:45:23 -05:00
|
|
|
for &v in init.iter() { rv.intern(v); }
|
2013-05-02 03:16:07 -05:00
|
|
|
rv
|
|
|
|
}
|
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
pub fn intern(&self, val: &str) -> Name {
|
2013-12-27 18:20:10 -06:00
|
|
|
let mut map = self.map.borrow_mut();
|
|
|
|
match map.get().find_equiv(&val) {
|
2013-05-02 03:16:07 -05:00
|
|
|
Some(&idx) => return idx,
|
|
|
|
None => (),
|
|
|
|
}
|
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
let new_idx = self.len() as Name;
|
2013-06-12 12:02:55 -05:00
|
|
|
let val = val.to_managed();
|
2013-12-27 18:20:10 -06:00
|
|
|
map.get().insert(val, new_idx);
|
2013-12-30 18:33:52 -06:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
|
|
|
vect.get().push(val);
|
2013-05-02 03:16:07 -05:00
|
|
|
new_idx
|
|
|
|
}
|
|
|
|
|
2013-11-26 23:02:25 -06:00
|
|
|
pub fn gensym(&self, val: &str) -> Name {
|
|
|
|
let new_idx = self.len() as Name;
|
2013-05-02 03:16:07 -05:00
|
|
|
// leave out of .map to avoid colliding
|
2013-12-30 18:33:52 -06:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
|
|
|
vect.get().push(val.to_managed());
|
2013-05-02 03:16:07 -05:00
|
|
|
new_idx
|
|
|
|
}
|
|
|
|
|
2013-06-26 12:11:19 -05:00
|
|
|
// I want these gensyms to share name pointers
|
|
|
|
// with existing entries. This would be automatic,
|
|
|
|
// except that the existing gensym creates its
|
|
|
|
// own managed ptr using to_managed. I think that
|
|
|
|
// adding this utility function is the most
|
|
|
|
// lightweight way to get what I want, though not
|
|
|
|
// necessarily the cleanest.
|
|
|
|
|
|
|
|
// create a gensym with the same name as an existing
|
|
|
|
// entry.
|
2013-11-26 23:02:25 -06:00
|
|
|
pub fn gensym_copy(&self, idx : Name) -> Name {
|
|
|
|
let new_idx = self.len() as Name;
|
2013-06-26 12:11:19 -05:00
|
|
|
// leave out of map to avoid colliding
|
2013-12-30 18:33:52 -06:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
|
|
|
let existing = vect.get()[idx];
|
|
|
|
vect.get().push(existing);
|
2013-06-26 12:11:19 -05:00
|
|
|
new_idx
|
|
|
|
}
|
|
|
|
|
2013-12-30 18:33:52 -06:00
|
|
|
pub fn get(&self, idx: Name) -> @str {
|
|
|
|
let vect = self.vect.borrow();
|
|
|
|
vect.get()[idx]
|
|
|
|
}
|
2013-05-02 03:16:07 -05:00
|
|
|
|
2014-01-08 12:35:15 -06:00
|
|
|
/// Returns this string with lifetime tied to the interner. Since
|
|
|
|
/// strings may never be removed from the interner, this is safe.
|
|
|
|
pub fn get_ref<'a>(&'a self, idx: Name) -> &'a str {
|
|
|
|
let vect = self.vect.borrow();
|
|
|
|
let s: &str = vect.get()[idx];
|
|
|
|
unsafe {
|
|
|
|
cast::transmute(s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-30 18:33:52 -06:00
|
|
|
pub fn len(&self) -> uint {
|
|
|
|
let vect = self.vect.borrow();
|
|
|
|
vect.get().len()
|
|
|
|
}
|
2013-05-02 03:16:07 -05:00
|
|
|
|
2013-06-12 12:02:55 -05:00
|
|
|
pub fn find_equiv<Q:Hash + IterBytes + Equiv<@str>>(&self, val: &Q)
|
2013-11-26 23:02:25 -06:00
|
|
|
-> Option<Name> {
|
2013-12-27 18:20:10 -06:00
|
|
|
let map = self.map.borrow();
|
|
|
|
match map.get().find_equiv(val) {
|
2013-05-02 03:16:07 -05:00
|
|
|
Some(v) => Some(*v),
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-15 10:08:52 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn i1 () {
|
2013-06-12 12:02:55 -05:00
|
|
|
let i : Interner<@str> = Interner::new();
|
2013-04-15 10:08:52 -05:00
|
|
|
i.get(13);
|
|
|
|
}
|
2013-01-23 19:29:08 -06:00
|
|
|
|
2013-04-15 10:08:52 -05:00
|
|
|
#[test]
|
2013-06-26 12:11:19 -05:00
|
|
|
fn interner_tests () {
|
2013-06-12 12:02:55 -05:00
|
|
|
let i : Interner<@str> = Interner::new();
|
2013-04-15 10:08:52 -05:00
|
|
|
// first one is zero:
|
2013-06-26 12:11:19 -05:00
|
|
|
assert_eq!(i.intern(@"dog"), 0);
|
2013-04-15 10:08:52 -05:00
|
|
|
// re-use gets the same entry:
|
2013-06-26 12:11:19 -05:00
|
|
|
assert_eq!(i.intern(@"dog"), 0);
|
2013-04-15 10:08:52 -05:00
|
|
|
// different string gets a different #:
|
2013-06-26 12:11:19 -05:00
|
|
|
assert_eq!(i.intern(@"cat"), 1);
|
|
|
|
assert_eq!(i.intern(@"cat"), 1);
|
2013-04-15 10:08:52 -05:00
|
|
|
// dog is still at zero
|
2013-06-26 12:11:19 -05:00
|
|
|
assert_eq!(i.intern(@"dog"), 0);
|
2013-04-15 10:08:52 -05:00
|
|
|
// gensym gets 3
|
2013-06-26 12:11:19 -05:00
|
|
|
assert_eq!(i.gensym(@"zebra" ), 2);
|
2013-04-15 10:08:52 -05:00
|
|
|
// gensym of same string gets new number :
|
2013-06-12 12:02:55 -05:00
|
|
|
assert_eq!(i.gensym (@"zebra" ), 3);
|
2013-04-15 10:08:52 -05:00
|
|
|
// gensym of *existing* string gets new number:
|
2013-06-26 12:11:19 -05:00
|
|
|
assert_eq!(i.gensym(@"dog"), 4);
|
2013-06-12 12:02:55 -05:00
|
|
|
assert_eq!(i.get(0), @"dog");
|
|
|
|
assert_eq!(i.get(1), @"cat");
|
|
|
|
assert_eq!(i.get(2), @"zebra");
|
|
|
|
assert_eq!(i.get(3), @"zebra");
|
|
|
|
assert_eq!(i.get(4), @"dog");
|
2013-04-15 10:08:52 -05:00
|
|
|
}
|
2013-01-23 19:29:08 -06:00
|
|
|
|
2013-04-15 10:08:52 -05:00
|
|
|
#[test]
|
|
|
|
fn i3 () {
|
2013-06-12 12:02:55 -05:00
|
|
|
let i : Interner<@str> = Interner::prefill([@"Alan",@"Bob",@"Carol"]);
|
|
|
|
assert_eq!(i.get(0), @"Alan");
|
|
|
|
assert_eq!(i.get(1), @"Bob");
|
|
|
|
assert_eq!(i.get(2), @"Carol");
|
|
|
|
assert_eq!(i.intern(@"Bob"), 1);
|
2013-04-15 10:08:52 -05:00
|
|
|
}
|
2013-06-26 12:11:19 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn string_interner_tests() {
|
|
|
|
let i : StrInterner = StrInterner::new();
|
|
|
|
// first one is zero:
|
|
|
|
assert_eq!(i.intern("dog"), 0);
|
|
|
|
// re-use gets the same entry:
|
|
|
|
assert_eq!(i.intern ("dog"), 0);
|
|
|
|
// different string gets a different #:
|
|
|
|
assert_eq!(i.intern("cat"), 1);
|
|
|
|
assert_eq!(i.intern("cat"), 1);
|
|
|
|
// dog is still at zero
|
|
|
|
assert_eq!(i.intern("dog"), 0);
|
|
|
|
// gensym gets 3
|
|
|
|
assert_eq!(i.gensym("zebra"), 2);
|
|
|
|
// gensym of same string gets new number :
|
|
|
|
assert_eq!(i.gensym("zebra"), 3);
|
|
|
|
// gensym of *existing* string gets new number:
|
|
|
|
assert_eq!(i.gensym("dog"), 4);
|
|
|
|
// gensym tests again with gensym_copy:
|
|
|
|
assert_eq!(i.gensym_copy(2), 5);
|
|
|
|
assert_eq!(i.get(5), @"zebra");
|
|
|
|
assert_eq!(i.gensym_copy(2), 6);
|
|
|
|
assert_eq!(i.get(6), @"zebra");
|
|
|
|
assert_eq!(i.get(0), @"dog");
|
|
|
|
assert_eq!(i.get(1), @"cat");
|
|
|
|
assert_eq!(i.get(2), @"zebra");
|
|
|
|
assert_eq!(i.get(3), @"zebra");
|
|
|
|
assert_eq!(i.get(4), @"dog");
|
|
|
|
}
|
2013-04-03 11:41:40 -05:00
|
|
|
}
|