2012-12-03 16:48:01 -08:00
|
|
|
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2014-06-09 13:12:30 -07:00
|
|
|
//! An "interner" is a data structure that associates values with uint tags and
|
|
|
|
//! allows bidirectional lookup; i.e. given a value, one can easily find the
|
|
|
|
//! type, and vice versa.
|
2013-01-08 19:37:25 -08:00
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
use ast::Name;
|
|
|
|
|
2014-11-12 15:51:51 -08:00
|
|
|
use std::borrow::BorrowFrom;
|
2014-05-29 19:03:06 -07:00
|
|
|
use std::collections::HashMap;
|
2013-12-27 16:20:10 -08:00
|
|
|
use std::cell::RefCell;
|
2014-02-28 01:23:06 -08:00
|
|
|
use std::fmt;
|
2014-02-23 21:29:35 +11:00
|
|
|
use std::hash::Hash;
|
2014-01-31 16:42:33 -08:00
|
|
|
use std::rc::Rc;
|
2011-07-05 11:48:19 +02:00
|
|
|
|
2013-02-09 13:22:21 -08:00
|
|
|
pub struct Interner<T> {
|
2014-03-27 15:39:48 -07:00
|
|
|
map: RefCell<HashMap<T, Name>>,
|
|
|
|
vect: RefCell<Vec<T> >,
|
2012-08-02 15:34:13 -07:00
|
|
|
}
|
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
// when traits can extend traits, we should extend index<Name,T> to get []
|
2014-05-31 10:43:52 -07:00
|
|
|
impl<T: Eq + Hash + Clone + 'static> Interner<T> {
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn new() -> Interner<T> {
|
2013-02-09 13:22:21 -08:00
|
|
|
Interner {
|
2014-02-14 07:07:09 +02:00
|
|
|
map: RefCell::new(HashMap::new()),
|
2014-02-28 13:09:09 -08:00
|
|
|
vect: RefCell::new(Vec::new()),
|
2013-02-09 13:22:21 -08:00
|
|
|
}
|
|
|
|
}
|
2012-08-02 15:34:13 -07:00
|
|
|
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn prefill(init: &[T]) -> Interner<T> {
|
2013-02-09 13:22:21 -08:00
|
|
|
let rv = Interner::new();
|
2013-08-03 12:45:23 -04:00
|
|
|
for v in init.iter() {
|
2013-07-02 12:47:32 -07:00
|
|
|
rv.intern((*v).clone());
|
|
|
|
}
|
2013-02-09 13:22:21 -08:00
|
|
|
rv
|
|
|
|
}
|
2011-08-04 10:46:10 -07:00
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
pub fn intern(&self, val: T) -> Name {
|
2013-12-27 16:20:10 -08:00
|
|
|
let mut map = self.map.borrow_mut();
|
2014-11-06 12:25:16 -05:00
|
|
|
match (*map).get(&val) {
|
2013-02-09 13:22:21 -08:00
|
|
|
Some(&idx) => return idx,
|
|
|
|
None => (),
|
2012-07-17 11:22:11 -07:00
|
|
|
}
|
2013-02-09 13:22:21 -08:00
|
|
|
|
2013-12-30 16:33:52 -08:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
2014-07-06 01:17:59 -07:00
|
|
|
let new_idx = Name((*vect).len() as u32);
|
2014-03-20 15:05:37 -07:00
|
|
|
(*map).insert(val.clone(), new_idx);
|
|
|
|
(*vect).push(val);
|
2013-02-09 13:22:21 -08:00
|
|
|
new_idx
|
2011-09-24 16:33:26 -07:00
|
|
|
}
|
2013-02-09 13:22:21 -08:00
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
pub fn gensym(&self, val: T) -> Name {
|
2013-12-30 16:33:52 -08:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
2014-07-06 01:17:59 -07:00
|
|
|
let new_idx = Name((*vect).len() as u32);
|
2012-07-18 16:18:02 -07:00
|
|
|
// leave out of .map to avoid colliding
|
2014-03-20 15:05:37 -07:00
|
|
|
(*vect).push(val);
|
2013-02-09 13:22:21 -08:00
|
|
|
new_idx
|
2012-07-18 16:18:02 -07:00
|
|
|
}
|
2011-08-04 10:46:10 -07:00
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
pub fn get(&self, idx: Name) -> T {
|
2013-12-30 16:33:52 -08:00
|
|
|
let vect = self.vect.borrow();
|
2014-10-14 23:05:01 -07:00
|
|
|
(*vect)[idx.uint()].clone()
|
2013-07-02 12:47:32 -07:00
|
|
|
}
|
2012-07-17 17:05:38 -07:00
|
|
|
|
2013-12-30 16:33:52 -08:00
|
|
|
pub fn len(&self) -> uint {
|
|
|
|
let vect = self.vect.borrow();
|
2014-03-20 15:05:37 -07:00
|
|
|
(*vect).len()
|
2013-12-30 16:33:52 -08:00
|
|
|
}
|
2013-04-02 16:20:02 -07:00
|
|
|
|
2014-11-12 15:51:51 -08:00
|
|
|
pub fn find<Sized? Q>(&self, val: &Q) -> Option<Name>
|
|
|
|
where Q: BorrowFrom<T> + Eq + Hash {
|
2013-12-27 16:20:10 -08:00
|
|
|
let map = self.map.borrow();
|
2014-11-12 15:51:51 -08:00
|
|
|
match (*map).get(val) {
|
2013-04-02 16:20:02 -07:00
|
|
|
Some(v) => Some(*v),
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
2014-03-09 00:18:58 +02:00
|
|
|
|
|
|
|
pub fn clear(&self) {
|
2014-03-20 15:05:37 -07:00
|
|
|
*self.map.borrow_mut() = HashMap::new();
|
|
|
|
*self.vect.borrow_mut() = Vec::new();
|
2014-03-09 00:18:58 +02:00
|
|
|
}
|
2012-09-19 09:41:06 -07:00
|
|
|
}
|
2013-01-23 17:29:08 -08:00
|
|
|
|
2014-05-29 17:45:07 -07:00
|
|
|
#[deriving(Clone, PartialEq, Hash, PartialOrd)]
|
2014-01-31 16:42:33 -08:00
|
|
|
pub struct RcStr {
|
2014-05-22 16:57:53 -07:00
|
|
|
string: Rc<String>,
|
2014-01-31 16:42:33 -08:00
|
|
|
}
|
|
|
|
|
2014-12-10 19:46:38 -08:00
|
|
|
impl RcStr {
|
|
|
|
pub fn new(string: &str) -> RcStr {
|
|
|
|
RcStr {
|
|
|
|
string: Rc::new(string.to_string()),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-31 10:43:52 -07:00
|
|
|
impl Eq for RcStr {}
|
2014-01-31 16:42:33 -08:00
|
|
|
|
2014-05-31 10:43:52 -07:00
|
|
|
impl Ord for RcStr {
|
2014-10-29 20:21:37 -05:00
|
|
|
fn cmp(&self, other: &RcStr) -> Ordering {
|
2014-12-10 19:46:38 -08:00
|
|
|
self[].cmp(other[])
|
2014-01-31 16:42:33 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-28 01:23:06 -08:00
|
|
|
impl fmt::Show for RcStr {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
use std::fmt::Show;
|
2014-12-10 19:46:38 -08:00
|
|
|
self[].fmt(f)
|
2014-02-28 01:23:06 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-12 15:51:51 -08:00
|
|
|
impl BorrowFrom<RcStr> for str {
|
|
|
|
fn borrow_from(owned: &RcStr) -> &str {
|
2014-12-10 19:46:38 -08:00
|
|
|
owned.string[]
|
2014-11-12 15:51:51 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-10 19:46:38 -08:00
|
|
|
impl Deref<str> for RcStr {
|
|
|
|
fn deref(&self) -> &str { self.string[] }
|
2014-01-31 16:42:33 -08:00
|
|
|
}
|
|
|
|
|
2014-05-21 17:20:52 -07:00
|
|
|
/// A StrInterner differs from Interner<String> in that it accepts
|
|
|
|
/// &str rather than RcStr, resulting in less allocation.
|
2013-05-02 10:16:07 +02:00
|
|
|
pub struct StrInterner {
|
2014-03-27 15:39:48 -07:00
|
|
|
map: RefCell<HashMap<RcStr, Name>>,
|
|
|
|
vect: RefCell<Vec<RcStr> >,
|
2013-05-02 10:16:07 +02:00
|
|
|
}
|
|
|
|
|
2014-05-21 17:20:52 -07:00
|
|
|
/// When traits can extend traits, we should extend index<Name,T> to get []
|
2013-05-31 15:17:22 -07:00
|
|
|
impl StrInterner {
|
|
|
|
pub fn new() -> StrInterner {
|
2013-05-02 10:16:07 +02:00
|
|
|
StrInterner {
|
2014-02-14 07:07:09 +02:00
|
|
|
map: RefCell::new(HashMap::new()),
|
2014-02-28 13:09:09 -08:00
|
|
|
vect: RefCell::new(Vec::new()),
|
2013-05-02 10:16:07 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-31 15:17:22 -07:00
|
|
|
pub fn prefill(init: &[&str]) -> StrInterner {
|
2013-05-02 10:16:07 +02:00
|
|
|
let rv = StrInterner::new();
|
2013-08-03 12:45:23 -04:00
|
|
|
for &v in init.iter() { rv.intern(v); }
|
2013-05-02 10:16:07 +02:00
|
|
|
rv
|
|
|
|
}
|
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
pub fn intern(&self, val: &str) -> Name {
|
2013-12-27 16:20:10 -08:00
|
|
|
let mut map = self.map.borrow_mut();
|
2014-11-12 15:51:51 -08:00
|
|
|
match map.get(val) {
|
2013-05-02 10:16:07 +02:00
|
|
|
Some(&idx) => return idx,
|
|
|
|
None => (),
|
|
|
|
}
|
|
|
|
|
2014-07-06 01:17:59 -07:00
|
|
|
let new_idx = Name(self.len() as u32);
|
2014-01-31 16:42:33 -08:00
|
|
|
let val = RcStr::new(val);
|
2014-03-20 15:05:37 -07:00
|
|
|
map.insert(val.clone(), new_idx);
|
|
|
|
self.vect.borrow_mut().push(val);
|
2013-05-02 10:16:07 +02:00
|
|
|
new_idx
|
|
|
|
}
|
|
|
|
|
2013-11-27 07:02:25 +02:00
|
|
|
pub fn gensym(&self, val: &str) -> Name {
|
2014-07-06 01:17:59 -07:00
|
|
|
let new_idx = Name(self.len() as u32);
|
2013-05-02 10:16:07 +02:00
|
|
|
// leave out of .map to avoid colliding
|
2014-03-20 15:05:37 -07:00
|
|
|
self.vect.borrow_mut().push(RcStr::new(val));
|
2013-05-02 10:16:07 +02:00
|
|
|
new_idx
|
|
|
|
}
|
|
|
|
|
2013-06-26 10:11:19 -07:00
|
|
|
// I want these gensyms to share name pointers
|
|
|
|
// with existing entries. This would be automatic,
|
|
|
|
// except that the existing gensym creates its
|
|
|
|
// own managed ptr using to_managed. I think that
|
|
|
|
// adding this utility function is the most
|
|
|
|
// lightweight way to get what I want, though not
|
|
|
|
// necessarily the cleanest.
|
|
|
|
|
2014-05-21 17:20:52 -07:00
|
|
|
/// Create a gensym with the same name as an existing
|
|
|
|
/// entry.
|
2013-11-27 07:02:25 +02:00
|
|
|
pub fn gensym_copy(&self, idx : Name) -> Name {
|
2014-07-06 01:17:59 -07:00
|
|
|
let new_idx = Name(self.len() as u32);
|
2013-06-26 10:11:19 -07:00
|
|
|
// leave out of map to avoid colliding
|
2013-12-30 16:33:52 -08:00
|
|
|
let mut vect = self.vect.borrow_mut();
|
2014-10-14 23:05:01 -07:00
|
|
|
let existing = (*vect)[idx.uint()].clone();
|
2014-03-20 15:05:37 -07:00
|
|
|
vect.push(existing);
|
2013-06-26 10:11:19 -07:00
|
|
|
new_idx
|
|
|
|
}
|
|
|
|
|
2014-01-31 16:42:33 -08:00
|
|
|
pub fn get(&self, idx: Name) -> RcStr {
|
2014-10-14 23:05:01 -07:00
|
|
|
(*self.vect.borrow())[idx.uint()].clone()
|
2013-12-30 16:33:52 -08:00
|
|
|
}
|
2013-05-02 10:16:07 +02:00
|
|
|
|
2013-12-30 16:33:52 -08:00
|
|
|
pub fn len(&self) -> uint {
|
2014-03-20 15:05:37 -07:00
|
|
|
self.vect.borrow().len()
|
2013-12-30 16:33:52 -08:00
|
|
|
}
|
2013-05-02 10:16:07 +02:00
|
|
|
|
2014-11-12 15:51:51 -08:00
|
|
|
pub fn find<Sized? Q>(&self, val: &Q) -> Option<Name>
|
|
|
|
where Q: BorrowFrom<RcStr> + Eq + Hash {
|
|
|
|
match (*self.map.borrow()).get(val) {
|
2013-05-02 10:16:07 +02:00
|
|
|
Some(v) => Some(*v),
|
|
|
|
None => None,
|
|
|
|
}
|
|
|
|
}
|
2014-03-09 00:18:58 +02:00
|
|
|
|
|
|
|
pub fn clear(&self) {
|
2014-03-20 15:05:37 -07:00
|
|
|
*self.map.borrow_mut() = HashMap::new();
|
|
|
|
*self.vect.borrow_mut() = Vec::new();
|
2014-03-09 00:18:58 +02:00
|
|
|
}
|
2014-11-28 21:56:09 -07:00
|
|
|
|
|
|
|
pub fn reset(&self, other: StrInterner) {
|
|
|
|
*self.map.borrow_mut() = other.map.into_inner();
|
|
|
|
*self.vect.borrow_mut() = other.vect.into_inner();
|
|
|
|
}
|
2013-05-02 10:16:07 +02:00
|
|
|
}
|
|
|
|
|
2013-04-16 01:08:52 +10:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
2014-07-08 22:28:52 -07:00
|
|
|
use ast::Name;
|
|
|
|
|
2013-04-16 01:08:52 +10:00
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
|
|
|
fn i1 () {
|
2014-01-31 16:42:33 -08:00
|
|
|
let i : Interner<RcStr> = Interner::new();
|
2014-07-08 22:28:52 -07:00
|
|
|
i.get(Name(13));
|
2013-04-16 01:08:52 +10:00
|
|
|
}
|
2013-01-23 17:29:08 -08:00
|
|
|
|
2013-04-16 01:08:52 +10:00
|
|
|
#[test]
|
2013-06-26 10:11:19 -07:00
|
|
|
fn interner_tests () {
|
2014-01-31 16:42:33 -08:00
|
|
|
let i : Interner<RcStr> = Interner::new();
|
2013-04-16 01:08:52 +10:00
|
|
|
// first one is zero:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern(RcStr::new("dog")), Name(0));
|
2013-04-16 01:08:52 +10:00
|
|
|
// re-use gets the same entry:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern(RcStr::new("dog")), Name(0));
|
2013-04-16 01:08:52 +10:00
|
|
|
// different string gets a different #:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern(RcStr::new("cat")), Name(1));
|
|
|
|
assert_eq!(i.intern(RcStr::new("cat")), Name(1));
|
2013-04-16 01:08:52 +10:00
|
|
|
// dog is still at zero
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern(RcStr::new("dog")), Name(0));
|
2013-04-16 01:08:52 +10:00
|
|
|
// gensym gets 3
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2));
|
2013-04-16 01:08:52 +10:00
|
|
|
// gensym of same string gets new number :
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3));
|
2013-04-16 01:08:52 +10:00
|
|
|
// gensym of *existing* string gets new number:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym(RcStr::new("dog")), Name(4));
|
|
|
|
assert_eq!(i.get(Name(0)), RcStr::new("dog"));
|
|
|
|
assert_eq!(i.get(Name(1)), RcStr::new("cat"));
|
|
|
|
assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
|
|
|
|
assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
|
|
|
|
assert_eq!(i.get(Name(4)), RcStr::new("dog"));
|
2013-04-16 01:08:52 +10:00
|
|
|
}
|
2013-01-23 17:29:08 -08:00
|
|
|
|
2013-04-16 01:08:52 +10:00
|
|
|
#[test]
|
|
|
|
fn i3 () {
|
2014-11-17 21:39:01 +13:00
|
|
|
let i : Interner<RcStr> = Interner::prefill(&[
|
2014-01-31 16:42:33 -08:00
|
|
|
RcStr::new("Alan"),
|
|
|
|
RcStr::new("Bob"),
|
|
|
|
RcStr::new("Carol")
|
|
|
|
]);
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.get(Name(0)), RcStr::new("Alan"));
|
|
|
|
assert_eq!(i.get(Name(1)), RcStr::new("Bob"));
|
|
|
|
assert_eq!(i.get(Name(2)), RcStr::new("Carol"));
|
|
|
|
assert_eq!(i.intern(RcStr::new("Bob")), Name(1));
|
2013-04-16 01:08:52 +10:00
|
|
|
}
|
2013-06-26 10:11:19 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn string_interner_tests() {
|
|
|
|
let i : StrInterner = StrInterner::new();
|
|
|
|
// first one is zero:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern("dog"), Name(0));
|
2013-06-26 10:11:19 -07:00
|
|
|
// re-use gets the same entry:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern ("dog"), Name(0));
|
2013-06-26 10:11:19 -07:00
|
|
|
// different string gets a different #:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern("cat"), Name(1));
|
|
|
|
assert_eq!(i.intern("cat"), Name(1));
|
2013-06-26 10:11:19 -07:00
|
|
|
// dog is still at zero
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.intern("dog"), Name(0));
|
2013-06-26 10:11:19 -07:00
|
|
|
// gensym gets 3
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym("zebra"), Name(2));
|
2013-06-26 10:11:19 -07:00
|
|
|
// gensym of same string gets new number :
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym("zebra"), Name(3));
|
2013-06-26 10:11:19 -07:00
|
|
|
// gensym of *existing* string gets new number:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym("dog"), Name(4));
|
2013-06-26 10:11:19 -07:00
|
|
|
// gensym tests again with gensym_copy:
|
2014-07-08 22:28:52 -07:00
|
|
|
assert_eq!(i.gensym_copy(Name(2)), Name(5));
|
|
|
|
assert_eq!(i.get(Name(5)), RcStr::new("zebra"));
|
|
|
|
assert_eq!(i.gensym_copy(Name(2)), Name(6));
|
|
|
|
assert_eq!(i.get(Name(6)), RcStr::new("zebra"));
|
|
|
|
assert_eq!(i.get(Name(0)), RcStr::new("dog"));
|
|
|
|
assert_eq!(i.get(Name(1)), RcStr::new("cat"));
|
|
|
|
assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
|
|
|
|
assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
|
|
|
|
assert_eq!(i.get(Name(4)), RcStr::new("dog"));
|
2013-06-26 10:11:19 -07:00
|
|
|
}
|
2013-04-03 09:41:40 -07:00
|
|
|
}
|