2012-12-03 16:48:01 -08:00
|
|
|
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2011-07-05 11:48:19 +02:00
|
|
|
// An "interner" is a data structure that associates values with uint tags and
|
|
|
|
// allows bidirectional lookup; i.e. given a value, one can easily find the
|
|
|
|
// type, and vice versa.
|
2013-01-08 19:37:25 -08:00
|
|
|
|
|
|
|
use core::prelude::*;
|
|
|
|
|
|
|
|
use core::dvec::DVec;
|
2013-02-01 02:13:36 -05:00
|
|
|
use std::oldmap::HashMap;
|
|
|
|
use std::oldmap;
|
2011-07-05 11:48:19 +02:00
|
|
|
|
2013-01-29 14:41:40 -08:00
|
|
|
pub type hash_interner<T> = {map: HashMap<T, uint>, vect: DVec<T>};
|
2011-07-05 11:48:19 +02:00
|
|
|
|
2013-01-29 14:41:40 -08:00
|
|
|
pub fn mk<T:Eq IterBytes Hash Const Copy>() -> Interner<T> {
|
2013-02-01 02:13:36 -05:00
|
|
|
let m = oldmap::HashMap::<T, uint>();
|
2012-07-17 11:22:11 -07:00
|
|
|
let hi: hash_interner<T> =
|
2012-09-07 17:24:02 -07:00
|
|
|
{map: m, vect: DVec()};
|
2012-10-15 14:56:42 -07:00
|
|
|
move ((move hi) as Interner::<T>)
|
2011-07-05 11:48:19 +02:00
|
|
|
}
|
2011-08-04 10:46:10 -07:00
|
|
|
|
2013-01-29 14:41:40 -08:00
|
|
|
pub fn mk_prefill<T:Eq IterBytes Hash Const Copy>(init: &[T]) -> Interner<T> {
|
2012-09-07 17:24:02 -07:00
|
|
|
let rv = mk();
|
2012-09-19 16:55:01 -07:00
|
|
|
for init.each() |v| { rv.intern(*v); }
|
2012-08-02 15:34:13 -07:00
|
|
|
return rv;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-07-17 11:22:11 -07:00
|
|
|
/* when traits can extend traits, we should extend index<uint,T> to get [] */
|
2013-01-29 14:41:40 -08:00
|
|
|
pub trait Interner<T:Eq IterBytes Hash Const Copy> {
|
2012-07-17 11:22:11 -07:00
|
|
|
fn intern(T) -> uint;
|
2012-07-18 16:18:02 -07:00
|
|
|
fn gensym(T) -> uint;
|
2012-07-17 11:22:11 -07:00
|
|
|
pure fn get(uint) -> T;
|
|
|
|
fn len() -> uint;
|
2011-07-05 11:48:19 +02:00
|
|
|
}
|
2011-08-04 10:46:10 -07:00
|
|
|
|
2013-01-29 14:41:40 -08:00
|
|
|
pub impl <T:Eq IterBytes Hash Const Copy> hash_interner<T>: Interner<T> {
|
2012-07-17 11:22:11 -07:00
|
|
|
fn intern(val: T) -> uint {
|
2012-08-06 12:34:08 -07:00
|
|
|
match self.map.find(val) {
|
2012-08-20 12:23:37 -07:00
|
|
|
Some(idx) => return idx,
|
|
|
|
None => {
|
2012-07-17 11:22:11 -07:00
|
|
|
let new_idx = self.vect.len();
|
|
|
|
self.map.insert(val, new_idx);
|
|
|
|
self.vect.push(val);
|
2012-08-01 17:30:05 -07:00
|
|
|
return new_idx;
|
2012-07-17 11:22:11 -07:00
|
|
|
}
|
|
|
|
}
|
2011-09-24 16:33:26 -07:00
|
|
|
}
|
2012-07-18 16:18:02 -07:00
|
|
|
fn gensym(val: T) -> uint {
|
|
|
|
let new_idx = self.vect.len();
|
|
|
|
// leave out of .map to avoid colliding
|
|
|
|
self.vect.push(val);
|
|
|
|
return new_idx;
|
|
|
|
}
|
2011-08-04 10:46:10 -07:00
|
|
|
|
2012-07-17 11:22:11 -07:00
|
|
|
// this isn't "pure" in the traditional sense, because it can go from
|
|
|
|
// failing to returning a value as items are interned. But for typestate,
|
|
|
|
// where we first check a pred and then rely on it, ceasing to fail is ok.
|
|
|
|
pure fn get(idx: uint) -> T { self.vect.get_elt(idx) }
|
2012-07-17 17:05:38 -07:00
|
|
|
|
2012-08-01 17:30:05 -07:00
|
|
|
fn len() -> uint { return self.vect.len(); }
|
2012-09-19 09:41:06 -07:00
|
|
|
}
|
2013-01-23 17:29:08 -08:00
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_fail]
|
2013-01-29 14:41:40 -08:00
|
|
|
pub fn i1 () {
|
2013-01-23 17:29:08 -08:00
|
|
|
let i : Interner<@~str> = mk();
|
|
|
|
i.get(13);
|
2013-01-23 17:41:54 -08:00
|
|
|
}
|
2013-01-23 17:29:08 -08:00
|
|
|
|
|
|
|
#[test]
|
2013-01-29 14:41:40 -08:00
|
|
|
pub fn i2 () {
|
2013-01-23 17:29:08 -08:00
|
|
|
let i : Interner<@~str> = mk();
|
|
|
|
// first one is zero:
|
|
|
|
assert i.intern (@~"dog") == 0;
|
|
|
|
// re-use gets the same entry:
|
|
|
|
assert i.intern (@~"dog") == 0;
|
|
|
|
// different string gets a different #:
|
|
|
|
assert i.intern (@~"cat") == 1;
|
|
|
|
assert i.intern (@~"cat") == 1;
|
|
|
|
// dog is still at zero
|
|
|
|
assert i.intern (@~"dog") == 0;
|
|
|
|
// gensym gets 3
|
|
|
|
assert i.gensym (@~"zebra" ) == 2;
|
|
|
|
// gensym of same string gets new number :
|
|
|
|
assert i.gensym (@~"zebra" ) == 3;
|
|
|
|
// gensym of *existing* string gets new number:
|
|
|
|
assert i.gensym (@~"dog") == 4;
|
|
|
|
assert i.get(0) == @~"dog";
|
|
|
|
assert i.get(1) == @~"cat";
|
|
|
|
assert i.get(2) == @~"zebra";
|
|
|
|
assert i.get(3) == @~"zebra";
|
|
|
|
assert i.get(4) == @~"dog";
|
2013-01-23 17:41:54 -08:00
|
|
|
}
|
2013-01-23 17:29:08 -08:00
|
|
|
|
|
|
|
#[test]
|
2013-01-29 14:41:40 -08:00
|
|
|
pub fn i3 () {
|
2013-01-23 17:29:08 -08:00
|
|
|
let i : Interner<@~str> = mk_prefill([@~"Alan",@~"Bob",@~"Carol"]);
|
|
|
|
assert i.get(0) == @~"Alan";
|
|
|
|
assert i.get(1) == @~"Bob";
|
|
|
|
assert i.get(2) == @~"Carol";
|
|
|
|
assert i.intern(@~"Bob") == 1;
|
|
|
|
}
|