From a4c0468a21ce48998310947869dcf028ee468b8d Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 23 Jun 2014 17:35:43 -0700 Subject: [PATCH 01/10] collections::bitv: implement BitvSet directly as a Bitv --- src/libcollections/bitv.rs | 245 ++++++++++++++++++++++--------------- 1 file changed, 143 insertions(+), 102 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 234393ce561..59ad188b530 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -15,7 +15,8 @@ use core::prelude::*; use core::cmp; use core::default::Default; use core::fmt; -use core::iter::{Enumerate, Repeat, Map, Zip}; +use core::iter::{Map, Zip}; +use core::option; use core::ops; use core::slice; use core::uint; @@ -268,6 +269,33 @@ fn die() -> ! { fail!("Tried to do operation on bit vectors with different sizes"); } +enum WordsVariant<'a> { + NoneIter, + OneIter(option::Item), + VecIter(slice::Items<'a, uint>) +} + +struct Words<'a> { + rep: WordsVariant<'a>, + offset: uint +} + +impl<'a> Iterator<(uint, uint)> for Words<'a> { + /// Returns (offset, word) + fn next<'a>(&'a mut self) -> Option<(uint, uint)> { + let ret = match self.rep { + NoneIter => None, + OneIter(ref mut it) => it.next(), + VecIter(ref mut it) => it.next().map(|n| *n) + }; + self.offset += 1; + match ret { + Some(n) => Some((self.offset - 1, n)), + None => None + } + } +} + impl Bitv { #[inline] fn do_op(&mut self, op: Op, other: &Bitv) -> bool { @@ -295,6 +323,18 @@ impl Bitv { } } } + + #[inline] + fn words<'a>(&'a self, start: uint) -> Words<'a> { + Words { + rep: match self.rep { + Small(_) if start > 0 => NoneIter, + Small(ref s) => OneIter(Some(s.bits).move_iter()), + Big(ref b) => VecIter(b.storage.slice_from(start).iter()) + }, + offset: start + } + } } impl Bitv { @@ -687,15 +727,8 @@ impl<'a> RandomAccessIterator for Bits<'a> { /// It should also be noted that the amount of storage necessary for holding a /// set of objects is proportional to the maximum of the objects when viewed /// as a `uint`. -#[deriving(Clone)] -pub struct BitvSet { - size: uint, - - // In theory this is a `Bitv` instead of always a `BigBitv`, but knowing that - // there's an array of storage makes our lives a whole lot easier when - // performing union/intersection/etc operations - bitv: BigBitv -} +#[deriving(Clone, PartialEq, Eq)] +pub struct BitvSet(Bitv); impl Default for BitvSet { #[inline] @@ -705,56 +738,87 @@ impl Default for BitvSet { impl BitvSet { /// Creates a new bit vector set with initially no contents pub fn new() -> BitvSet { - BitvSet{ size: 0, bitv: BigBitv::new(vec!(0)) } + BitvSet(Bitv::new(0, false)) } /// Creates a new bit vector set from the given bit vector pub fn from_bitv(bitv: Bitv) -> BitvSet { - let mut size = 0; - bitv.ones(|_| { - size += 1; - true - }); - let Bitv{rep, ..} = bitv; - match rep { - Big(b) => BitvSet{ size: size, bitv: b }, - Small(SmallBitv{bits}) => - BitvSet{ size: size, bitv: BigBitv{ storage: vec!(bits) } }, - } + BitvSet(bitv) } /// Returns the capacity in bits for this bit vector. Inserting any /// element less than this amount will not trigger a resizing. - pub fn capacity(&self) -> uint { self.bitv.storage.len() * uint::BITS } + pub fn capacity(&self) -> uint { + let &BitvSet(ref bitv) = self; + match bitv.rep { + Small(_) => uint::BITS, + Big(ref s) => s.storage.len() * uint::BITS + } + } /// Consumes this set to return the underlying bit vector pub fn unwrap(self) -> Bitv { - let cap = self.capacity(); - let BitvSet{bitv, ..} = self; - return Bitv{ nbits:cap, rep: Big(bitv) }; + let BitvSet(bitv) = self; + bitv + } + + #[inline] + /// Grows the vector to be able to store bits with indices `[0, size - 1]` + fn grow(&mut self, size: uint) { + let &BitvSet(ref mut bitv) = self; + let small_to_big = match bitv.rep { Small(s) if size >= uint::BITS => Some(s.bits), _ => None }; + if small_to_big.is_some() { + bitv.rep = Big(BigBitv { storage: vec![small_to_big.unwrap()] }); + } + match bitv.rep { + Small(_) => {}, + Big(ref mut b) => { + let size = (size + uint::BITS - 1) / uint::BITS; + if b.storage.len() < size { + b.storage.grow(size, &0); + } + } + }; } #[inline] fn other_op(&mut self, other: &BitvSet, f: |uint, uint| -> uint) { - fn nbits(mut w: uint) -> uint { - let mut bits = 0; - for _ in range(0u, uint::BITS) { - if w == 0 { - break; + // Expand the vector if necessary + self.grow(other.capacity()); + // Unwrap Bitvs + let &BitvSet(ref mut self_bitv) = self; + let &BitvSet(ref other_bitv) = other; + for (i, w) in other_bitv.words(0) { + match self_bitv.rep { + Small(ref mut s) => { s.bits = f(s.bits, w); } + Big(ref mut b) => { + let old = *b.storage.get(i); + let new = f(old, w); + *b.storage.get_mut(i) = new; + *b.storage.get_mut(i) = f(*b.storage.get(i), w); } - bits += w & 1; - w >>= 1; } - return bits; } - if self.capacity() < other.capacity() { - self.bitv.storage.grow(other.capacity() / uint::BITS, &0); - } - for (i, &w) in other.bitv.storage.iter().enumerate() { - let old = *self.bitv.storage.get(i); - let new = f(old, w); - *self.bitv.storage.get_mut(i) = new; - self.size += nbits(new) - nbits(old); + } + + #[inline] + /// Truncate the underlying vector to the least length required + pub fn shrink_to_fit(&mut self) { + let &BitvSet(ref mut bitv) = self; + // Two steps: we borrow b as immutable to get the length... + let old_len = match bitv.rep { + Small(_) => 1, + Big(ref b) => b.storage.len() + }; + // ...and as mutable to change it. + match bitv.rep { + Small(_) => {}, + Big(ref mut b) => { + let n = b.storage.iter().rev().take_while(|&&n| n == 0).count(); + let trunc_len = cmp::max(old_len - n, 1); + b.storage.truncate(trunc_len); + bitv.nbits = trunc_len * uint::BITS; + } } } @@ -818,29 +882,6 @@ impl BitvSet { } } -impl cmp::PartialEq for BitvSet { - fn eq(&self, other: &BitvSet) -> bool { - if self.size != other.size { - return false; - } - for (_, w1, w2) in self.commons(other) { - if w1 != w2 { - return false; - } - } - for (_, _, w) in self.outliers(other) { - if w != 0 { - return false; - } - } - return true; - } - - fn ne(&self, other: &BitvSet) -> bool { !self.eq(other) } -} - -impl cmp::Eq for BitvSet {} - impl fmt::Show for BitvSet { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { try!(write!(fmt, "{{")); @@ -866,19 +907,26 @@ impl hash::Hash for BitvSet { impl Collection for BitvSet { #[inline] - fn len(&self) -> uint { self.size } + fn len(&self) -> uint { + let &BitvSet(ref bitv) = self; + match bitv.rep { + Small(ref s) => s.bits.count_ones(), + Big(ref b) => b.storage.iter().fold(0, |acc, &n| acc + n.count_ones()) + } + } } impl Mutable for BitvSet { fn clear(&mut self) { - self.bitv.each_storage(|w| { *w = 0; true }); - self.size = 0; + let &BitvSet(ref mut bitv) = self; + bitv.clear(); } } impl Set for BitvSet { fn contains(&self, value: &uint) -> bool { - *value < self.bitv.storage.len() * uint::BITS && self.bitv.get(*value) + let &BitvSet(ref bitv) = self; + *value < bitv.nbits && bitv.get(*value) } fn is_disjoint(&self, other: &BitvSet) -> bool { @@ -914,14 +962,15 @@ impl MutableSet for BitvSet { if self.contains(&value) { return false; } - let nbits = self.capacity(); - if value >= nbits { - let newsize = cmp::max(value, nbits * 2) / uint::BITS + 1; - assert!(newsize > self.bitv.storage.len()); - self.bitv.storage.grow(newsize, &0); + if value >= self.capacity() { + let new_cap = cmp::max(value + 1, self.capacity() * 2); + self.grow(new_cap); } - self.size += 1; - self.bitv.set(value, true); + let &BitvSet(ref mut bitv) = self; + if value >= bitv.nbits { + bitv.nbits = value + 1; + } + bitv.set(value, true); return true; } @@ -929,16 +978,8 @@ impl MutableSet for BitvSet { if !self.contains(value) { return false; } - self.size -= 1; - self.bitv.set(*value, false); - - // Attempt to truncate our storage - let mut i = self.bitv.storage.len(); - while i > 1 && *self.bitv.storage.get(i - 1) == 0 { - i -= 1; - } - self.bitv.storage.truncate(i); - + let &BitvSet(ref mut bitv) = self; + bitv.set(*value, false); return true; } } @@ -949,12 +990,12 @@ impl BitvSet { /// w1, w2) where the bit location is the number of bits offset so far, /// and w1/w2 are the words coming from the two vectors self, other. fn commons<'a>(&'a self, other: &'a BitvSet) - -> Map<'static, ((uint, &'a uint), &'a Vec), (uint, uint, uint), - Zip>, Repeat<&'a Vec>>> { - let min = cmp::min(self.bitv.storage.len(), other.bitv.storage.len()); - self.bitv.storage.slice(0, min).iter().enumerate() - .zip(Repeat::new(&other.bitv.storage)) - .map(|((i, &w), o_store)| (i * uint::BITS, w, *o_store.get(i))) + -> Map<((uint, uint), (uint, uint)), (uint, uint, uint), + Zip, Words<'a>>> { + let &BitvSet(ref self_bitv) = self; + let &BitvSet(ref other_bitv) = other; + self_bitv.words(0).zip(other_bitv.words(0)) + .map(|((i, w1), (_, w2))| (i * uint::BITS, w1, w2)) } /// Visits each word in `self` or `other` that extends beyond the other. This @@ -965,19 +1006,18 @@ impl BitvSet { /// is true if the word comes from `self`, and `false` if it comes from /// `other`. fn outliers<'a>(&'a self, other: &'a BitvSet) - -> Map<'static, ((uint, &'a uint), uint), (bool, uint, uint), - Zip>, Repeat>> { - let slen = self.bitv.storage.len(); - let olen = other.bitv.storage.len(); + -> Map<(uint, uint), (bool, uint, uint), Words<'a>> { + let slen = self.capacity() / uint::BITS; + let olen = other.capacity() / uint::BITS; + let &BitvSet(ref self_bitv) = self; + let &BitvSet(ref other_bitv) = other; if olen < slen { - self.bitv.storage.slice_from(olen).iter().enumerate() - .zip(Repeat::new(olen)) - .map(|((i, &w), min)| (true, (i + min) * uint::BITS, w)) + self_bitv.words(olen) + .map(|(i, w)| (true, i * uint::BITS, w)) } else { - other.bitv.storage.slice_from(slen).iter().enumerate() - .zip(Repeat::new(slen)) - .map(|((i, &w), min)| (false, (i + min) * uint::BITS, w)) + other_bitv.words(slen) + .map(|(i, w)| (false, i * uint::BITS, w)) } } } @@ -1600,6 +1640,7 @@ mod tests { assert!(a.insert(1000)); assert!(a.remove(&1000)); + a.shrink_to_fit(); assert_eq!(a.capacity(), uint::BITS); } From 2d23319e334a4fa1a6ec440b2cb0e48d0cadc542 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 30 Jun 2014 06:43:39 -0700 Subject: [PATCH 02/10] collections::bitv: Remove SmallBitv/BigBitv dichotomy The old `Bitv` structure had two variations: one represented by a vector of uints, and another represented by a single uint for bit vectors containing fewer than uint::BITS bits. The purpose of this is to avoid the indirection of using a Vec, but the speedup is only available to users who (a) are storing less than uints::BITS bits (b) know this when they create the vector (since `Bitv`s cannot be resized) (c) don't know this at compile time (else they could use uint directly) Giving such specific users a (questionable) speed benefit at the cost of adding explicit checks to almost every single bit call, frequently writing the same method twice and making iteration much much more difficult, does not seem like a worthwhile tradeoff to me. Also, rustc does not use Bitv anywhere, only through BitvSet, which does not have this optimization. For reference, here is some speed data from before and after this PR: BEFORE: test bitv::tests::bench_bitv_big ... bench: 4 ns/iter (+/- 1) test bitv::tests::bench_bitv_big_iter ... bench: 4858 ns/iter (+/- 22) test bitv::tests::bench_bitv_big_union ... bench: 507 ns/iter (+/- 35) test bitv::tests::bench_bitv_set_big ... bench: 6 ns/iter (+/- 1) test bitv::tests::bench_bitv_set_small ... bench: 6 ns/iter (+/- 0) test bitv::tests::bench_bitv_small ... bench: 5 ns/iter (+/- 1) test bitv::tests::bench_bitvset_iter ... bench: 12930 ns/iter (+/- 662) test bitv::tests::bench_btv_small_iter ... bench: 39 ns/iter (+/- 1) test bitv::tests::bench_uint_small ... bench: 4 ns/iter (+/- 1) AFTER: test bitv::tests::bench_bitv_big ... bench: 5 ns/iter (+/- 1) test bitv::tests::bench_bitv_big_iter ... bench: 5004 ns/iter (+/- 102) test bitv::tests::bench_bitv_big_union ... bench: 356 ns/iter (+/- 26) test bitv::tests::bench_bitv_set_big ... bench: 6 ns/iter (+/- 0) test bitv::tests::bench_bitv_set_small ... bench: 6 ns/iter (+/- 1) test bitv::tests::bench_bitv_small ... bench: 4 ns/iter (+/- 1) test bitv::tests::bench_bitvset_iter ... bench: 12918 ns/iter (+/- 621) test bitv::tests::bench_btv_small_iter ... bench: 50 ns/iter (+/- 5) test bitv::tests::bench_uint_small ... bench: 4 ns/iter (+/- 1) --- src/libcollections/bitv.rs | 503 ++++++++----------------------------- 1 file changed, 106 insertions(+), 397 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 59ad188b530..20d7c3ef2cf 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -16,7 +16,6 @@ use core::cmp; use core::default::Default; use core::fmt; use core::iter::{Map, Zip}; -use core::option; use core::ops; use core::slice; use core::uint; @@ -25,110 +24,14 @@ use std::hash; use {Collection, Mutable, Set, MutableSet}; use vec::Vec; -#[deriving(Clone)] -struct SmallBitv { - /// only the lowest nbits of this value are used. the rest is undefined. - bits: uint -} - -/// a mask that has a 1 for each defined bit in a small_bitv, assuming n bits -#[inline] -fn small_mask(nbits: uint) -> uint { - (1 << nbits) - 1 -} - -impl SmallBitv { - fn new(bits: uint) -> SmallBitv { - SmallBitv {bits: bits} - } - - #[inline] - fn bits_op(&mut self, - right_bits: uint, - nbits: uint, - f: |uint, uint| -> uint) - -> bool { - let mask = small_mask(nbits); - let old_b: uint = self.bits; - let new_b = f(old_b, right_bits); - self.bits = new_b; - mask & old_b != mask & new_b - } - - #[inline] - fn union(&mut self, s: &SmallBitv, nbits: uint) -> bool { - self.bits_op(s.bits, nbits, |u1, u2| u1 | u2) - } - - #[inline] - fn intersect(&mut self, s: &SmallBitv, nbits: uint) -> bool { - self.bits_op(s.bits, nbits, |u1, u2| u1 & u2) - } - - #[inline] - fn become(&mut self, s: &SmallBitv, nbits: uint) -> bool { - self.bits_op(s.bits, nbits, |_u1, u2| u2) - } - - #[inline] - fn difference(&mut self, s: &SmallBitv, nbits: uint) -> bool { - self.bits_op(s.bits, nbits, |u1, u2| u1 & !u2) - } - - #[inline] - fn get(&self, i: uint) -> bool { - (self.bits & (1 << i)) != 0 - } - - #[inline] - fn set(&mut self, i: uint, x: bool) { - if x { - self.bits |= 1< bool { - let mask = small_mask(nbits); - mask & self.bits == mask & b.bits - } - - #[inline] - fn clear(&mut self) { self.bits = 0; } - - #[inline] - fn set_all(&mut self) { self.bits = !0; } - - #[inline] - fn all(&self, nbits: uint) -> bool { - small_mask(nbits) & !self.bits == 0 - } - - #[inline] - fn none(&self, nbits: uint) -> bool { - small_mask(nbits) & self.bits == 0 - } - - #[inline] - fn negate(&mut self) { self.bits = !self.bits; } -} - -#[deriving(Clone)] -struct BigBitv { - storage: Vec -} - /** - * A mask that has a 1 for each defined bit in the n'th element of a `BigBitv`, + * A mask that has a 1 for each defined bit in the n'th element of a `Bitv`, * assuming n bits. */ #[inline] fn big_mask(nbits: uint, elem: uint) -> uint { let rmd = nbits % uint::BITS; - let nelems = nbits/uint::BITS + if rmd == 0 {0} else {1}; + let nelems = (nbits + uint::BITS - 1) / uint::BITS; if elem < nelems - 1 || rmd == 0 { !0 @@ -137,99 +40,6 @@ fn big_mask(nbits: uint, elem: uint) -> uint { } } -impl BigBitv { - fn new(storage: Vec) -> BigBitv { - BigBitv {storage: storage} - } - - #[inline] - fn process(&mut self, - b: &BigBitv, - nbits: uint, - op: |uint, uint| -> uint) - -> bool { - let len = b.storage.len(); - assert_eq!(self.storage.len(), len); - let mut changed = false; - for (i, (a, b)) in self.storage.mut_iter() - .zip(b.storage.iter()) - .enumerate() { - let mask = big_mask(nbits, i); - let w0 = *a & mask; - let w1 = *b & mask; - let w = op(w0, w1) & mask; - if w0 != w { - changed = true; - *a = w; - } - } - changed - } - - #[inline] - fn each_storage(&mut self, op: |v: &mut uint| -> bool) -> bool { - self.storage.mut_iter().advance(|elt| op(elt)) - } - - #[inline] - fn negate(&mut self) { - self.each_storage(|w| { *w = !*w; true }); - } - - #[inline] - fn union(&mut self, b: &BigBitv, nbits: uint) -> bool { - self.process(b, nbits, |w1, w2| w1 | w2) - } - - #[inline] - fn intersect(&mut self, b: &BigBitv, nbits: uint) -> bool { - self.process(b, nbits, |w1, w2| w1 & w2) - } - - #[inline] - fn become(&mut self, b: &BigBitv, nbits: uint) -> bool { - self.process(b, nbits, |_, w| w) - } - - #[inline] - fn difference(&mut self, b: &BigBitv, nbits: uint) -> bool { - self.process(b, nbits, |w1, w2| w1 & !w2) - } - - #[inline] - fn get(&self, i: uint) -> bool { - let w = i / uint::BITS; - let b = i % uint::BITS; - let x = 1 & self.storage.get(w) >> b; - x == 1 - } - - #[inline] - fn set(&mut self, i: uint, x: bool) { - let w = i / uint::BITS; - let b = i % uint::BITS; - let flag = 1 << b; - *self.storage.get_mut(w) = if x { *self.storage.get(w) | flag } - else { *self.storage.get(w) & !flag }; - } - - #[inline] - fn equals(&self, b: &BigBitv, nbits: uint) -> bool { - for (i, elt) in b.storage.iter().enumerate() { - let mask = big_mask(nbits, i); - if mask & *self.storage.get(i) != mask & *elt { - return false; - } - } - true - } -} - -#[deriving(Clone)] -enum BitvVariant { Big(BigBitv), Small(SmallBitv) } - -enum Op {Union, Intersect, Assign, Difference} - /// The bitvector type /// /// # Example @@ -259,108 +69,72 @@ enum Op {Union, Intersect, Assign, Difference} /// ``` #[deriving(Clone)] pub struct Bitv { - /// Internal representation of the bit vector (small or large) - rep: BitvVariant, + /// Internal representation of the bit vector + storage: Vec, /// The number of valid bits in the internal representation nbits: uint } -fn die() -> ! { - fail!("Tried to do operation on bit vectors with different sizes"); -} - -enum WordsVariant<'a> { - NoneIter, - OneIter(option::Item), - VecIter(slice::Items<'a, uint>) -} - struct Words<'a> { - rep: WordsVariant<'a>, + iter: slice::Items<'a, uint>, offset: uint } impl<'a> Iterator<(uint, uint)> for Words<'a> { /// Returns (offset, word) fn next<'a>(&'a mut self) -> Option<(uint, uint)> { - let ret = match self.rep { - NoneIter => None, - OneIter(ref mut it) => it.next(), - VecIter(ref mut it) => it.next().map(|n| *n) - }; + let ret = self.iter.next().map(|&n| (self.offset, n)); self.offset += 1; - match ret { - Some(n) => Some((self.offset - 1, n)), - None => None - } + ret } } impl Bitv { #[inline] - fn do_op(&mut self, op: Op, other: &Bitv) -> bool { - if self.nbits != other.nbits { - die(); - } - match self.rep { - Small(ref mut s) => match other.rep { - Small(ref s1) => match op { - Union => s.union(s1, self.nbits), - Intersect => s.intersect(s1, self.nbits), - Assign => s.become(s1, self.nbits), - Difference => s.difference(s1, self.nbits) - }, - Big(_) => die() - }, - Big(ref mut s) => match other.rep { - Small(_) => die(), - Big(ref s1) => match op { - Union => s.union(s1, self.nbits), - Intersect => s.intersect(s1, self.nbits), - Assign => s.become(s1, self.nbits), - Difference => s.difference(s1, self.nbits) + fn process(&mut self, other: &Bitv, nbits: uint, + op: |uint, uint| -> uint) -> bool { + let len = other.storage.len(); + assert_eq!(self.storage.len(), len); + let mut changed = false; + for (i, (a, b)) in self.storage.mut_iter() + .zip(other.storage.iter()) + .enumerate() { + let mask = big_mask(nbits, i); + let w0 = *a & mask; + let w1 = *b & mask; + let w = op(w0, w1) & mask; + if w0 != w { + changed = true; + *a = w; } - } } + changed } + #[inline] #[inline] fn words<'a>(&'a self, start: uint) -> Words<'a> { Words { - rep: match self.rep { - Small(_) if start > 0 => NoneIter, - Small(ref s) => OneIter(Some(s.bits).move_iter()), - Big(ref b) => VecIter(b.storage.slice_from(start).iter()) - }, + iter: self.storage.slice_from(start).iter(), offset: start } } -} -impl Bitv { /// Creates an empty Bitv that holds `nbits` elements, setting each element /// to `init`. pub fn new(nbits: uint, init: bool) -> Bitv { - let rep = if nbits < uint::BITS { - Small(SmallBitv::new(if init {(1< 0 { + *v.get_mut(nelems - 1) &= (1 << nbits % uint::BITS) - 1; + } + v + }, + nbits: nbits + } } /** @@ -370,7 +144,10 @@ impl Bitv { * the same length. Returns `true` if `self` changed. */ #[inline] - pub fn union(&mut self, v1: &Bitv) -> bool { self.do_op(Union, v1) } + pub fn union(&mut self, other: &Bitv) -> bool { + let nbits = self.nbits; + self.process(other, nbits, |w1, w2| w1 | w2) + } /** * Calculates the intersection of two bitvectors @@ -379,8 +156,9 @@ impl Bitv { * must be the same length. Returns `true` if `self` changed. */ #[inline] - pub fn intersect(&mut self, v1: &Bitv) -> bool { - self.do_op(Intersect, v1) + pub fn intersect(&mut self, other: &Bitv) -> bool { + let nbits = self.nbits; + self.process(other, nbits, |w1, w2| w1 & w2) } /** @@ -390,16 +168,19 @@ impl Bitv { * changed */ #[inline] - pub fn assign(&mut self, v: &Bitv) -> bool { self.do_op(Assign, v) } + pub fn assign(&mut self, other: &Bitv) -> bool { + let nbits = self.nbits; + self.process(other, nbits, |_, w| w) + } /// Retrieve the value at index `i` #[inline] pub fn get(&self, i: uint) -> bool { - assert!((i < self.nbits)); - match self.rep { - Big(ref b) => b.get(i), - Small(ref s) => s.get(i) - } + assert!(i < self.nbits); + let w = i / uint::BITS; + let b = i % uint::BITS; + let x = self.storage.get(w) & (1 << b); + x != 0 } /** @@ -409,42 +190,30 @@ impl Bitv { */ #[inline] pub fn set(&mut self, i: uint, x: bool) { - assert!((i < self.nbits)); - match self.rep { - Big(ref mut b) => b.set(i, x), - Small(ref mut s) => s.set(i, x) - } + assert!(i < self.nbits); + let w = i / uint::BITS; + let b = i % uint::BITS; + let flag = 1 << b; + *self.storage.get_mut(w) = if x { *self.storage.get(w) | flag } + else { *self.storage.get(w) & !flag }; } /// Set all bits to 0 #[inline] pub fn clear(&mut self) { - match self.rep { - Small(ref mut b) => b.clear(), - Big(ref mut s) => { - s.each_storage(|w| { *w = 0u; true }); - } - } + for w in self.storage.mut_iter() { *w = 0u; } } /// Set all bits to 1 #[inline] pub fn set_all(&mut self) { - match self.rep { - Small(ref mut b) => b.set_all(), - Big(ref mut s) => { - s.each_storage(|w| { *w = !0u; true }); - } - } + for w in self.storage.mut_iter() { *w = !0u; } } /// Flip all bits #[inline] pub fn negate(&mut self) { - match self.rep { - Small(ref mut s) => s.negate(), - Big(ref mut b) => b.negate(), - } + for w in self.storage.mut_iter() { *w = !*w; } } /** @@ -457,17 +226,21 @@ impl Bitv { * Returns `true` if `v0` was changed. */ #[inline] - pub fn difference(&mut self, v: &Bitv) -> bool { - self.do_op(Difference, v) + pub fn difference(&mut self, other: &Bitv) -> bool { + let nbits = self.nbits; + self.process(other, nbits, |w1, w2| w1 & !w2) } /// Returns `true` if all bits are 1 #[inline] pub fn all(&self) -> bool { - match self.rep { - Small(ref b) => b.all(self.nbits), - _ => self.iter().all(|x| x) - } + for (i, &elem) in self.storage.iter().enumerate() { + let mask = big_mask(self.nbits, i); + if elem & mask != mask { + return false; + } + } + true } /// Returns an iterator over the elements of the vector in order. @@ -492,10 +265,13 @@ impl Bitv { /// Returns `true` if all bits are 0 pub fn none(&self) -> bool { - match self.rep { - Small(ref b) => b.none(self.nbits), - _ => self.iter().all(|x| !x) - } + for (i, &elem) in self.storage.iter().enumerate() { + let mask = big_mask(self.nbits, i); + if elem & mask != 0 { + return false; + } + } + true } #[inline] @@ -621,13 +397,8 @@ impl fmt::Show for Bitv { impl hash::Hash for Bitv { fn hash(&self, state: &mut S) { self.nbits.hash(state); - match self.rep { - Small(ref s) => (s.bits & small_mask(self.nbits)).hash(state), - Big(ref b) => { - for (i, ele) in b.storage.iter().enumerate() { - (ele & big_mask(self.nbits, i)).hash(state); - } - } + for (i, elem) in self.storage.iter().enumerate() { + (elem & big_mask(self.nbits, i)).hash(state); } } } @@ -635,17 +406,16 @@ impl hash::Hash for Bitv { impl cmp::PartialEq for Bitv { #[inline] fn eq(&self, other: &Bitv) -> bool { - if self.nbits != other.nbits { return false; } - match self.rep { - Small(ref b) => match other.rep { - Small(ref b1) => b.equals(b1, self.nbits), - _ => false - }, - Big(ref s) => match other.rep { - Big(ref s1) => s.equals(s1, self.nbits), - Small(_) => return false + if self.nbits != other.nbits { + return false; + } + for (i, (&w1, &w2)) in self.storage.iter().zip(other.storage.iter()).enumerate() { + let mask = big_mask(self.nbits, i); + if w1 & mask != w2 & mask { + return false; } } + true } } @@ -750,10 +520,7 @@ impl BitvSet { /// element less than this amount will not trigger a resizing. pub fn capacity(&self) -> uint { let &BitvSet(ref bitv) = self; - match bitv.rep { - Small(_) => uint::BITS, - Big(ref s) => s.storage.len() * uint::BITS - } + bitv.storage.len() * uint::BITS } /// Consumes this set to return the underlying bit vector @@ -766,19 +533,10 @@ impl BitvSet { /// Grows the vector to be able to store bits with indices `[0, size - 1]` fn grow(&mut self, size: uint) { let &BitvSet(ref mut bitv) = self; - let small_to_big = match bitv.rep { Small(s) if size >= uint::BITS => Some(s.bits), _ => None }; - if small_to_big.is_some() { - bitv.rep = Big(BigBitv { storage: vec![small_to_big.unwrap()] }); + let size = (size + uint::BITS - 1) / uint::BITS; + if bitv.storage.len() < size { + bitv.storage.grow(size, &0); } - match bitv.rep { - Small(_) => {}, - Big(ref mut b) => { - let size = (size + uint::BITS - 1) / uint::BITS; - if b.storage.len() < size { - b.storage.grow(size, &0); - } - } - }; } #[inline] @@ -789,15 +547,9 @@ impl BitvSet { let &BitvSet(ref mut self_bitv) = self; let &BitvSet(ref other_bitv) = other; for (i, w) in other_bitv.words(0) { - match self_bitv.rep { - Small(ref mut s) => { s.bits = f(s.bits, w); } - Big(ref mut b) => { - let old = *b.storage.get(i); - let new = f(old, w); - *b.storage.get_mut(i) = new; - *b.storage.get_mut(i) = f(*b.storage.get(i), w); - } - } + let old = *self_bitv.storage.get(i); + let new = f(old, w); + *self_bitv.storage.get_mut(i) = new; } } @@ -805,21 +557,14 @@ impl BitvSet { /// Truncate the underlying vector to the least length required pub fn shrink_to_fit(&mut self) { let &BitvSet(ref mut bitv) = self; - // Two steps: we borrow b as immutable to get the length... - let old_len = match bitv.rep { - Small(_) => 1, - Big(ref b) => b.storage.len() - }; - // ...and as mutable to change it. - match bitv.rep { - Small(_) => {}, - Big(ref mut b) => { - let n = b.storage.iter().rev().take_while(|&&n| n == 0).count(); - let trunc_len = cmp::max(old_len - n, 1); - b.storage.truncate(trunc_len); - bitv.nbits = trunc_len * uint::BITS; - } - } + // Obtain original length + let old_len = bitv.storage.len(); + // Obtain coarse trailing zero length + let n = bitv.storage.iter().rev().take_while(|&&n| n == 0).count(); + // Truncate + let trunc_len = cmp::max(old_len - n, 1); + bitv.storage.truncate(cmp::max(old_len - n, 1)); + bitv.nbits = trunc_len * uint::BITS; } /// Union in-place with the specified other bit vector @@ -909,10 +654,7 @@ impl Collection for BitvSet { #[inline] fn len(&self) -> uint { let &BitvSet(ref bitv) = self; - match bitv.rep { - Small(ref s) => s.bits.count_ones(), - Big(ref b) => b.storage.iter().fold(0, |acc, &n| acc + n.count_ones()) - } + bitv.storage.iter().fold(0, |acc, &n| acc + n.count_ones()) } } @@ -1056,8 +798,7 @@ mod tests { use test::Bencher; use {Set, Mutable, MutableSet}; - use bitv::{Bitv, SmallBitv, BigBitv, BitvSet, from_bools, from_fn, - from_bytes}; + use bitv::{Bitv, BitvSet, from_bools, from_fn, from_bytes}; use bitv; use vec::Vec; @@ -1733,38 +1474,6 @@ mod tests { }) } - #[bench] - fn bench_small_bitv_small(b: &mut Bencher) { - let mut r = rng(); - let mut bitv = SmallBitv::new(uint::BITS); - b.iter(|| { - bitv.set((r.next_u32() as uint) % uint::BITS, true); - &bitv - }) - } - - #[bench] - fn bench_big_bitv_small(b: &mut Bencher) { - let mut r = rng(); - let mut bitv = BigBitv::new(vec!(0)); - b.iter(|| { - bitv.set((r.next_u32() as uint) % uint::BITS, true); - &bitv - }) - } - - #[bench] - fn bench_big_bitv_big(b: &mut Bencher) { - let mut r = rng(); - let mut storage = vec!(); - storage.grow(BENCH_BITS / uint::BITS, &0u); - let mut bitv = BigBitv::new(storage); - b.iter(|| { - bitv.set((r.next_u32() as uint) % BENCH_BITS, true); - &bitv - }) - } - #[bench] fn bench_bitv_big(b: &mut Bencher) { let mut r = rng(); From a698b81ebfe02a614f9b41e68c6b604597a81229 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Wed, 2 Jul 2014 08:24:55 -0700 Subject: [PATCH 03/10] collections::bitv: ensure correct masking behaviour The internal masking behaviour for `Bitv` is now defined as: - Any entirely words in self.storage must be all zeroes. - Any partially used words may have anything at all in their unused bits. This means: - When decreasing self.nbits, care must be taken that any no-longer-used words are zeroed out. - When increasing self.nbits, care must be taken that any newly-unmasked bits are set to their correct values. - When reading words, care should be taken that the values of unused bits are not used. (Preferably, use `Bitv::mask_words` which zeroes them out for you.) The old behaviour was that every unused bit was always set to zero. The problem with this is that unused bits are almost never read, so forgetting to do this will result in very subtle and hard-to-track down bugs. This way the responsibility for masking falls on the places which might cause unused bits to be read: for now, this is only `Bitv::mask_words` and `BitvSet::insert`. --- src/libcollections/bitv.rs | 170 +++++++++++++++++++------------------ 1 file changed, 87 insertions(+), 83 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 20d7c3ef2cf..b480b88b4d4 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -24,22 +24,6 @@ use std::hash; use {Collection, Mutable, Set, MutableSet}; use vec::Vec; -/** - * A mask that has a 1 for each defined bit in the n'th element of a `Bitv`, - * assuming n bits. - */ -#[inline] -fn big_mask(nbits: uint, elem: uint) -> uint { - let rmd = nbits % uint::BITS; - let nelems = (nbits + uint::BITS - 1) / uint::BITS; - - if elem < nelems - 1 || rmd == 0 { - !0 - } else { - (1 << rmd) - 1 - } -} - /// The bitvector type /// /// # Example @@ -75,35 +59,47 @@ pub struct Bitv { nbits: uint } -struct Words<'a> { +struct MaskWords<'a> { iter: slice::Items<'a, uint>, + next_word: Option<&'a uint>, + last_word_mask: uint, offset: uint } -impl<'a> Iterator<(uint, uint)> for Words<'a> { +impl<'a> Iterator<(uint, uint)> for MaskWords<'a> { /// Returns (offset, word) fn next<'a>(&'a mut self) -> Option<(uint, uint)> { - let ret = self.iter.next().map(|&n| (self.offset, n)); - self.offset += 1; - ret + let ret = self.next_word; + match ret { + Some(&w) => { + self.next_word = self.iter.next(); + self.offset += 1; + // The last word may need to be masked + if self.next_word.is_none() { + Some((self.offset - 1, w & self.last_word_mask)) + } else { + Some((self.offset - 1, w)) + } + }, + None => None + } } } impl Bitv { #[inline] - fn process(&mut self, other: &Bitv, nbits: uint, - op: |uint, uint| -> uint) -> bool { + fn process(&mut self, other: &Bitv, op: |uint, uint| -> uint) -> bool { let len = other.storage.len(); assert_eq!(self.storage.len(), len); let mut changed = false; - for (i, (a, b)) in self.storage.mut_iter() - .zip(other.storage.iter()) - .enumerate() { - let mask = big_mask(nbits, i); - let w0 = *a & mask; - let w1 = *b & mask; - let w = op(w0, w1) & mask; - if w0 != w { + // Notice: `a` is *not* masked here, which is fine as long as + // `op` is a bitwise operation, since any bits that should've + // been masked were fine to change anyway. `b` is masked to + // make sure its unmasked bits do not cause damage. + for (a, (_, b)) in self.storage.mut_iter() + .zip(other.mask_words(0)) { + let w = op(*a, b); + if *a != w { changed = true; *a = w; } @@ -112,10 +108,20 @@ impl Bitv { } #[inline] - #[inline] - fn words<'a>(&'a self, start: uint) -> Words<'a> { - Words { - iter: self.storage.slice_from(start).iter(), + fn mask_words<'a>(&'a self, mut start: uint) -> MaskWords<'a> { + if start > self.storage.len() { + start = self.storage.len(); + } + let mut iter = self.storage.slice_from(start).iter(); + MaskWords { + next_word: iter.next(), + iter: iter, + last_word_mask: { + let rem = self.nbits % uint::BITS; + if rem > 0 { + (1 << rem) - 1 + } else { !0 } + }, offset: start } } @@ -124,15 +130,8 @@ impl Bitv { /// to `init`. pub fn new(nbits: uint, init: bool) -> Bitv { Bitv { - storage: { - let nelems = (nbits + uint::BITS - 1) / uint::BITS; - let mut v = Vec::from_elem(nelems, if init { !0u } else { 0u }); - // Zero out any remainder bits - if nbits % uint::BITS > 0 { - *v.get_mut(nelems - 1) &= (1 << nbits % uint::BITS) - 1; - } - v - }, + storage: Vec::from_elem((nbits + uint::BITS - 1) / uint::BITS, + if init { !0u } else { 0u }), nbits: nbits } } @@ -145,8 +144,7 @@ impl Bitv { */ #[inline] pub fn union(&mut self, other: &Bitv) -> bool { - let nbits = self.nbits; - self.process(other, nbits, |w1, w2| w1 | w2) + self.process(other, |w1, w2| w1 | w2) } /** @@ -157,8 +155,7 @@ impl Bitv { */ #[inline] pub fn intersect(&mut self, other: &Bitv) -> bool { - let nbits = self.nbits; - self.process(other, nbits, |w1, w2| w1 & w2) + self.process(other, |w1, w2| w1 & w2) } /** @@ -169,8 +166,7 @@ impl Bitv { */ #[inline] pub fn assign(&mut self, other: &Bitv) -> bool { - let nbits = self.nbits; - self.process(other, nbits, |_, w| w) + self.process(other, |_, w| w) } /// Retrieve the value at index `i` @@ -227,20 +223,18 @@ impl Bitv { */ #[inline] pub fn difference(&mut self, other: &Bitv) -> bool { - let nbits = self.nbits; - self.process(other, nbits, |w1, w2| w1 & !w2) + self.process(other, |w1, w2| w1 & !w2) } /// Returns `true` if all bits are 1 #[inline] pub fn all(&self) -> bool { - for (i, &elem) in self.storage.iter().enumerate() { - let mask = big_mask(self.nbits, i); - if elem & mask != mask { - return false; - } - } - true + let mut last_word = !0u; + // Check that every word but the last is all-ones... + self.mask_words(0).all(|(_, elem)| + { let tmp = last_word; last_word = elem; tmp == !0u }) && + // ...and that the last word is ones as far as it needs to be + (last_word == ((1 << self.nbits % uint::BITS) - 1) || last_word == !0u) } /// Returns an iterator over the elements of the vector in order. @@ -265,13 +259,7 @@ impl Bitv { /// Returns `true` if all bits are 0 pub fn none(&self) -> bool { - for (i, &elem) in self.storage.iter().enumerate() { - let mask = big_mask(self.nbits, i); - if elem & mask != 0 { - return false; - } - } - true + self.mask_words(0).all(|(_, w)| w == 0) } #[inline] @@ -397,8 +385,8 @@ impl fmt::Show for Bitv { impl hash::Hash for Bitv { fn hash(&self, state: &mut S) { self.nbits.hash(state); - for (i, elem) in self.storage.iter().enumerate() { - (elem & big_mask(self.nbits, i)).hash(state); + for (_, elem) in self.mask_words(0) { + elem.hash(state); } } } @@ -409,13 +397,7 @@ impl cmp::PartialEq for Bitv { if self.nbits != other.nbits { return false; } - for (i, (&w1, &w2)) in self.storage.iter().zip(other.storage.iter()).enumerate() { - let mask = big_mask(self.nbits, i); - if w1 & mask != w2 & mask { - return false; - } - } - true + self.mask_words(0).zip(other.mask_words(0)).all(|((_, w1), (_, w2))| w1 == w2) } } @@ -546,7 +528,7 @@ impl BitvSet { // Unwrap Bitvs let &BitvSet(ref mut self_bitv) = self; let &BitvSet(ref other_bitv) = other; - for (i, w) in other_bitv.words(0) { + for (i, w) in other_bitv.mask_words(0) { let old = *self_bitv.storage.get(i); let new = f(old, w); *self_bitv.storage.get_mut(i) = new; @@ -563,7 +545,7 @@ impl BitvSet { let n = bitv.storage.iter().rev().take_while(|&&n| n == 0).count(); // Truncate let trunc_len = cmp::max(old_len - n, 1); - bitv.storage.truncate(cmp::max(old_len - n, 1)); + bitv.storage.truncate(trunc_len); bitv.nbits = trunc_len * uint::BITS; } @@ -710,6 +692,12 @@ impl MutableSet for BitvSet { } let &BitvSet(ref mut bitv) = self; if value >= bitv.nbits { + // If we are increasing nbits, make sure we mask out any previously-unconsidered bits + let old_rem = bitv.nbits % uint::BITS; + if old_rem != 0 { + let old_last_word = (bitv.nbits + uint::BITS - 1) / uint::BITS - 1; + *bitv.storage.get_mut(old_last_word) &= (1 << old_rem) - 1; + } bitv.nbits = value + 1; } bitv.set(value, true); @@ -733,10 +721,10 @@ impl BitvSet { /// and w1/w2 are the words coming from the two vectors self, other. fn commons<'a>(&'a self, other: &'a BitvSet) -> Map<((uint, uint), (uint, uint)), (uint, uint, uint), - Zip, Words<'a>>> { + Zip, MaskWords<'a>>> { let &BitvSet(ref self_bitv) = self; let &BitvSet(ref other_bitv) = other; - self_bitv.words(0).zip(other_bitv.words(0)) + self_bitv.mask_words(0).zip(other_bitv.mask_words(0)) .map(|((i, w1), (_, w2))| (i * uint::BITS, w1, w2)) } @@ -748,17 +736,17 @@ impl BitvSet { /// is true if the word comes from `self`, and `false` if it comes from /// `other`. fn outliers<'a>(&'a self, other: &'a BitvSet) - -> Map<(uint, uint), (bool, uint, uint), Words<'a>> { + -> Map<(uint, uint), (bool, uint, uint), MaskWords<'a>> { let slen = self.capacity() / uint::BITS; let olen = other.capacity() / uint::BITS; let &BitvSet(ref self_bitv) = self; let &BitvSet(ref other_bitv) = other; if olen < slen { - self_bitv.words(olen) + self_bitv.mask_words(olen) .map(|(i, w)| (true, i * uint::BITS, w)) } else { - other_bitv.words(slen) + other_bitv.mask_words(slen) .map(|(i, w)| (false, i * uint::BITS, w)) } } @@ -1250,16 +1238,32 @@ mod tests { }); } + #[test] + fn test_bitv_masking() { + let b = Bitv::new(140, true); + let mut bs = BitvSet::from_bitv(b); + assert!(bs.contains(&139)); + assert!(!bs.contains(&140)); + assert!(bs.insert(150)); + assert!(!bs.contains(&140)); + assert!(!bs.contains(&149)); + assert!(bs.contains(&150)); + assert!(!bs.contains(&151)); + } + #[test] fn test_bitv_set_basic() { let mut b = BitvSet::new(); assert!(b.insert(3)); assert!(!b.insert(3)); assert!(b.contains(&3)); + assert!(b.insert(4)); + assert!(!b.insert(4)); + assert!(b.contains(&3)); assert!(b.insert(400)); assert!(!b.insert(400)); assert!(b.contains(&400)); - assert_eq!(b.len(), 2); + assert_eq!(b.len(), 3); } #[test] From 7a7ae993ce694bf75a11632b394916e055a4d8ec Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 30 Jun 2014 13:05:05 -0700 Subject: [PATCH 04/10] collections::bitv: correct use of Vec::grow The argument passed to Vec::grow is the number of elements to grow the vector by, not the target number of elements. The old `Bitv` code did the wrong thing, allocating more memory than it needed to. --- src/libcollections/bitv.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index b480b88b4d4..18e3390dff5 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -515,9 +515,10 @@ impl BitvSet { /// Grows the vector to be able to store bits with indices `[0, size - 1]` fn grow(&mut self, size: uint) { let &BitvSet(ref mut bitv) = self; + let old_size = bitv.storage.len(); let size = (size + uint::BITS - 1) / uint::BITS; - if bitv.storage.len() < size { - bitv.storage.grow(size, &0); + if old_size < size { + bitv.storage.grow(size - old_size, &0); } } @@ -1253,14 +1254,22 @@ mod tests { #[test] fn test_bitv_set_basic() { + // calculate nbits with uint::BITS granularity + fn calc_nbits(bits: uint) -> uint { + uint::BITS * ((bits + uint::BITS - 1) / uint::BITS) + } + let mut b = BitvSet::new(); + assert_eq!(b.capacity(), calc_nbits(0)); assert!(b.insert(3)); + assert_eq!(b.capacity(), calc_nbits(3)); assert!(!b.insert(3)); assert!(b.contains(&3)); assert!(b.insert(4)); assert!(!b.insert(4)); assert!(b.contains(&3)); assert!(b.insert(400)); + assert_eq!(b.capacity(), calc_nbits(400)); assert!(!b.insert(400)); assert!(b.contains(&400)); assert_eq!(b.len(), 3); From da0d4be378d289e9e90a48deec674d42205ae4c9 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 30 Jun 2014 07:25:58 -0700 Subject: [PATCH 05/10] collections::bitv: remove some ancient interfaces Removes the following methods from `Bitv`: - `to_vec`: translates a `Bitv` into a bulky `Vec` of 0's and 1's replace with: `bitv.iter().map(|b| if b {1} else {0}).collect()` - `to_bools`: translates a `Bitv` into a `Vec` replace with: `bitv.iter().collect()` - `ones`: internal iterator over all 1 bits in a `Bitv` replace with: `BitvSet::from_bitv(bitv).iter().advance(fn)` These methods had specific functionality which can be replicated more generally by the modern iterator system. (Also `to_vec` was not even unit tested!) --- src/libcollections/bitv.rs | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 18e3390dff5..3aeb15eef6f 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -268,15 +268,6 @@ impl Bitv { !self.none() } - /** - * Converts `self` to a vector of `uint` with the same length. - * - * Each `uint` in the resulting vector has either value `0u` or `1u`. - */ - pub fn to_vec(&self) -> Vec { - Vec::from_fn(self.nbits, |i| if self.get(i) { 1 } else { 0 }) - } - /** * Organise the bits into bytes, such that the first bit in the * `Bitv` becomes the high-order bit of the first byte. If the @@ -307,13 +298,6 @@ impl Bitv { ) } - /** - * Transform `self` into a `Vec` by turning each bit into a `bool`. - */ - pub fn to_bools(&self) -> Vec { - Vec::from_fn(self.nbits, |i| self[i]) - } - /** * Compare a bitvector to a vector of `bool`. * @@ -328,11 +312,6 @@ impl Bitv { } true } - - pub fn ones(&self, f: |uint| -> bool) -> bool { - range(0u, self.nbits).advance(|i| !self.get(i) || f(i)) - } - } /** @@ -1157,7 +1136,7 @@ mod tests { #[test] fn test_to_bools() { let bools = vec!(false, false, true, false, false, true, true, false); - assert_eq!(from_bytes([0b00100110]).to_bools(), bools); + assert_eq!(from_bytes([0b00100110]).iter().collect::>(), bools); } #[test] @@ -1225,7 +1204,7 @@ mod tests { fn test_small_clear() { let mut b = Bitv::new(14, true); b.clear(); - b.ones(|i| { + BitvSet::from_bitv(b).iter().advance(|i| { fail!("found 1 at {:?}", i) }); } @@ -1234,7 +1213,7 @@ mod tests { fn test_big_clear() { let mut b = Bitv::new(140, true); b.clear(); - b.ones(|i| { + BitvSet::from_bitv(b).iter().advance(|i| { fail!("found 1 at {:?}", i) }); } From a7f335a09ccfa0777847cd7b36d117322b965ad1 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 30 Jun 2014 08:25:11 -0700 Subject: [PATCH 06/10] collections::bitv: replace internal iterators with external ones --- src/libcollections/bitv.rs | 138 +++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 52 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 3aeb15eef6f..7dd4535f205 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -15,7 +15,7 @@ use core::prelude::*; use core::cmp; use core::default::Default; use core::fmt; -use core::iter::{Map, Zip}; +use core::iter::{Map, Take, Zip}; use core::ops; use core::slice; use core::uint; @@ -382,21 +382,6 @@ impl cmp::PartialEq for Bitv { impl cmp::Eq for Bitv {} -#[inline] -fn iterate_bits(base: uint, bits: uint, f: |uint| -> bool) -> bool { - if bits == 0 { - return true; - } - for i in range(0u, uint::BITS) { - if bits & (1 << i) != 0 { - if !f(base + i) { - return false; - } - } - } - return true; -} - /// An iterator for `Bitv`. pub struct Bits<'a> { bitv: &'a Bitv, @@ -553,39 +538,45 @@ impl BitvSet { BitPositions {set: self, next_idx: 0} } - pub fn difference(&self, other: &BitvSet, f: |&uint| -> bool) -> bool { - for (i, w1, w2) in self.commons(other) { - if !iterate_bits(i, w1 & !w2, |b| f(&b)) { - return false - } - }; - /* everything we have that they don't also shows up */ - self.outliers(other).advance(|(mine, i, w)| - !mine || iterate_bits(i, w, |b| f(&b)) - ) + pub fn difference<'a>(&'a self, other: &'a BitvSet) -> TwoBitPositions<'a> { + TwoBitPositions { + set: self, + other: other, + merge: |w1, w2| w1 & !w2, + current_word: 0, + next_idx: 0 + } } - pub fn symmetric_difference(&self, other: &BitvSet, f: |&uint| -> bool) - -> bool { - for (i, w1, w2) in self.commons(other) { - if !iterate_bits(i, w1 ^ w2, |b| f(&b)) { - return false - } - }; - self.outliers(other).advance(|(_, i, w)| iterate_bits(i, w, |b| f(&b))) + pub fn symmetric_difference<'a>(&'a self, other: &'a BitvSet) -> TwoBitPositions<'a> { + TwoBitPositions { + set: self, + other: other, + merge: |w1, w2| w1 ^ w2, + current_word: 0, + next_idx: 0 + } } - pub fn intersection(&self, other: &BitvSet, f: |&uint| -> bool) -> bool { - self.commons(other).advance(|(i, w1, w2)| iterate_bits(i, w1 & w2, |b| f(&b))) + pub fn intersection<'a>(&'a self, other: &'a BitvSet) -> Take> { + let min = cmp::min(self.capacity(), other.capacity()); + TwoBitPositions { + set: self, + other: other, + merge: |w1, w2| w1 & w2, + current_word: 0, + next_idx: 0 + }.take(min) } - pub fn union(&self, other: &BitvSet, f: |&uint| -> bool) -> bool { - for (i, w1, w2) in self.commons(other) { - if !iterate_bits(i, w1 | w2, |b| f(&b)) { - return false - } - }; - self.outliers(other).advance(|(_, i, w)| iterate_bits(i, w, |b| f(&b))) + pub fn union<'a>(&'a self, other: &'a BitvSet) -> TwoBitPositions<'a> { + TwoBitPositions { + set: self, + other: other, + merge: |w1, w2| w1 | w2, + current_word: 0, + next_idx: 0 + } } } @@ -634,7 +625,7 @@ impl Set for BitvSet { } fn is_disjoint(&self, other: &BitvSet) -> bool { - self.intersection(other, |_| false) + self.intersection(other).count() > 0 } fn is_subset(&self, other: &BitvSet) -> bool { @@ -737,6 +728,14 @@ pub struct BitPositions<'a> { next_idx: uint } +pub struct TwoBitPositions<'a> { + set: &'a BitvSet, + other: &'a BitvSet, + merge: |uint, uint|: 'a -> uint, + current_word: uint, + next_idx: uint +} + impl<'a> Iterator for BitPositions<'a> { #[inline] fn next(&mut self) -> Option { @@ -757,6 +756,41 @@ impl<'a> Iterator for BitPositions<'a> { } } +impl<'a> Iterator for TwoBitPositions<'a> { + #[inline] + fn next(&mut self) -> Option { + while self.next_idx < self.set.capacity() || + self.next_idx < self.other.capacity() { + let bit_idx = self.next_idx % uint::BITS; + if bit_idx == 0 { + let &BitvSet(ref s_bitv) = self.set; + let &BitvSet(ref o_bitv) = self.other; + // Merging the two words is a bit of an awkward dance since + // one Bitv might be longer than the other + let word_idx = self.next_idx / uint::BITS; + let w1 = if word_idx < s_bitv.storage.len() { + *s_bitv.storage.get(word_idx) + } else { 0 }; + let w2 = if word_idx < o_bitv.storage.len() { + *o_bitv.storage.get(word_idx) + } else { 0 }; + self.current_word = (self.merge)(w1, w2); + } + + self.next_idx += 1; + if self.current_word & (1 << bit_idx) != 0 { + return Some(self.next_idx - 1); + } + } + return None; + } + + fn size_hint(&self) -> (uint, Option) { + let cap = cmp::max(self.set.capacity(), self.other.capacity()); + (0, Some(cap - self.next_idx)) + } +} + #[cfg(test)] mod tests { use std::prelude::*; @@ -1274,8 +1308,8 @@ mod tests { let mut i = 0; let expected = [3, 5, 11, 77]; - a.intersection(&b, |x| { - assert_eq!(*x, expected[i]); + a.intersection(&b).advance(|x| { + assert_eq!(x, expected[i]); i += 1; true }); @@ -1298,8 +1332,8 @@ mod tests { let mut i = 0; let expected = [1, 5, 500]; - a.difference(&b, |x| { - assert_eq!(*x, expected[i]); + a.difference(&b).advance(|x| { + assert_eq!(x, expected[i]); i += 1; true }); @@ -1324,8 +1358,8 @@ mod tests { let mut i = 0; let expected = [1, 5, 11, 14, 220]; - a.symmetric_difference(&b, |x| { - assert_eq!(*x, expected[i]); + a.symmetric_difference(&b).advance(|x| { + assert_eq!(x, expected[i]); i += 1; true }); @@ -1353,8 +1387,8 @@ mod tests { let mut i = 0; let expected = [1, 3, 5, 9, 11, 13, 19, 24, 160]; - a.union(&b, |x| { - assert_eq!(*x, expected[i]); + a.union(&b).advance(|x| { + assert_eq!(x, expected[i]); i += 1; true }); From b5c54df59fa0a99fa860db82e6e82f9c0fbd7115 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 30 Jun 2014 09:41:23 -0700 Subject: [PATCH 07/10] collections::bitv: Add documentation and #[inline]'s Add documentation to methods on BitvSet that were missing them. Also make sure #[inline] is on all methods that are (a) one-liners or (b) private methods whose only purpose is code deduplication. --- src/libcollections/bitv.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 7dd4535f205..0f8e93cbaef 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -68,6 +68,7 @@ struct MaskWords<'a> { impl<'a> Iterator<(uint, uint)> for MaskWords<'a> { /// Returns (offset, word) + #[inline] fn next<'a>(&'a mut self) -> Option<(uint, uint)> { let ret = self.next_word; match ret { @@ -347,6 +348,7 @@ pub fn from_fn(len: uint, f: |index: uint| -> bool) -> Bitv { } impl ops::Index for Bitv { + #[inline] fn index(&self, i: &uint) -> bool { self.get(*i) } @@ -453,23 +455,27 @@ impl Default for BitvSet { impl BitvSet { /// Creates a new bit vector set with initially no contents + #[inline] pub fn new() -> BitvSet { BitvSet(Bitv::new(0, false)) } /// Creates a new bit vector set from the given bit vector + #[inline] pub fn from_bitv(bitv: Bitv) -> BitvSet { BitvSet(bitv) } /// Returns the capacity in bits for this bit vector. Inserting any /// element less than this amount will not trigger a resizing. + #[inline] pub fn capacity(&self) -> uint { let &BitvSet(ref bitv) = self; bitv.storage.len() * uint::BITS } /// Consumes this set to return the underlying bit vector + #[inline] pub fn unwrap(self) -> Bitv { let BitvSet(bitv) = self; bitv @@ -515,29 +521,37 @@ impl BitvSet { } /// Union in-place with the specified other bit vector + #[inline] pub fn union_with(&mut self, other: &BitvSet) { self.other_op(other, |w1, w2| w1 | w2); } /// Intersect in-place with the specified other bit vector + #[inline] pub fn intersect_with(&mut self, other: &BitvSet) { self.other_op(other, |w1, w2| w1 & w2); } /// Difference in-place with the specified other bit vector + #[inline] pub fn difference_with(&mut self, other: &BitvSet) { self.other_op(other, |w1, w2| w1 & !w2); } /// Symmetric difference in-place with the specified other bit vector + #[inline] pub fn symmetric_difference_with(&mut self, other: &BitvSet) { self.other_op(other, |w1, w2| w1 ^ w2); } + /// Iterator over each uint stored in the BitvSet + #[inline] pub fn iter<'a>(&'a self) -> BitPositions<'a> { BitPositions {set: self, next_idx: 0} } + /// Iterator over each uint stored in the `self` setminus `other` + #[inline] pub fn difference<'a>(&'a self, other: &'a BitvSet) -> TwoBitPositions<'a> { TwoBitPositions { set: self, @@ -548,6 +562,8 @@ impl BitvSet { } } + /// Iterator over each uint stored in the symmetric difference of `self` and `other` + #[inline] pub fn symmetric_difference<'a>(&'a self, other: &'a BitvSet) -> TwoBitPositions<'a> { TwoBitPositions { set: self, @@ -558,6 +574,8 @@ impl BitvSet { } } + /// Iterator over each uint stored in `self` intersect `other` + #[inline] pub fn intersection<'a>(&'a self, other: &'a BitvSet) -> Take> { let min = cmp::min(self.capacity(), other.capacity()); TwoBitPositions { @@ -569,6 +587,8 @@ impl BitvSet { }.take(min) } + /// Iterator over each uint stored in `self` union `other` + #[inline] pub fn union<'a>(&'a self, other: &'a BitvSet) -> TwoBitPositions<'a> { TwoBitPositions { set: self, @@ -612,6 +632,7 @@ impl Collection for BitvSet { } impl Mutable for BitvSet { + #[inline] fn clear(&mut self) { let &BitvSet(ref mut bitv) = self; bitv.clear(); @@ -619,11 +640,13 @@ impl Mutable for BitvSet { } impl Set for BitvSet { + #[inline] fn contains(&self, value: &uint) -> bool { let &BitvSet(ref bitv) = self; *value < bitv.nbits && bitv.get(*value) } + #[inline] fn is_disjoint(&self, other: &BitvSet) -> bool { self.intersection(other).count() > 0 } @@ -647,6 +670,7 @@ impl Set for BitvSet { return true; } + #[inline] fn is_superset(&self, other: &BitvSet) -> bool { other.is_subset(self) } @@ -737,7 +761,6 @@ pub struct TwoBitPositions<'a> { } impl<'a> Iterator for BitPositions<'a> { - #[inline] fn next(&mut self) -> Option { while self.next_idx < self.set.capacity() { let idx = self.next_idx; @@ -751,13 +774,13 @@ impl<'a> Iterator for BitPositions<'a> { return None; } + #[inline] fn size_hint(&self) -> (uint, Option) { (0, Some(self.set.capacity() - self.next_idx)) } } impl<'a> Iterator for TwoBitPositions<'a> { - #[inline] fn next(&mut self) -> Option { while self.next_idx < self.set.capacity() || self.next_idx < self.other.capacity() { @@ -785,6 +808,7 @@ impl<'a> Iterator for TwoBitPositions<'a> { return None; } + #[inline] fn size_hint(&self) -> (uint, Option) { let cap = cmp::max(self.set.capacity(), self.other.capacity()); (0, Some(cap - self.next_idx)) From 9eb81edfea77603b1cf742817dd46a8b1ec0455e Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Mon, 30 Jun 2014 10:53:52 -0700 Subject: [PATCH 08/10] collections::bitv: Implement several methods for `Bitv` and `BitvSet` On Bitv: - Add .push() and .pop() which take and return bool, respectively - Add .truncate() which truncates a Bitv to a specific length - Add .grow() which grows a Bitv by a specific length - Add .reserve() which grows the underlying storage to be able to hold a specified number of bits without resizing - Implement FromIterator> - Implement Extendable - Implement Collection - Implement Mutable - Remove .from_bools() since FromIterator> now accomplishes this. - Remove .assign() since Clone::clone_from() accomplishes this. On BitvSet: - Add .reserve() which grows the underlying storage to be able to hold a specified number of bits without resizing - Add .get_ref() and .get_mut_ref() to return references to the underlying Bitv --- src/libcollections/bitv.rs | 336 ++++++++++++++++++++++++++++++++----- 1 file changed, 295 insertions(+), 41 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index 0f8e93cbaef..e041906839c 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -51,7 +51,6 @@ use vec::Vec; /// println!("{}", bv.to_str()); /// println!("total bits set to true: {}", bv.iter().filter(|x| *x).count()); /// ``` -#[deriving(Clone)] pub struct Bitv { /// Internal representation of the bit vector storage: Vec, @@ -159,17 +158,6 @@ impl Bitv { self.process(other, |w1, w2| w1 & w2) } - /** - * Assigns the value of `v1` to `self` - * - * Both bitvectors must be the same length. Returns `true` if `self` was - * changed - */ - #[inline] - pub fn assign(&mut self, other: &Bitv) -> bool { - self.process(other, |_, w| w) - } - /// Retrieve the value at index `i` #[inline] pub fn get(&self, i: uint) -> bool { @@ -195,12 +183,6 @@ impl Bitv { else { *self.storage.get(w) & !flag }; } - /// Set all bits to 0 - #[inline] - pub fn clear(&mut self) { - for w in self.storage.mut_iter() { *w = 0u; } - } - /// Set all bits to 1 #[inline] pub fn set_all(&mut self) { @@ -313,6 +295,132 @@ impl Bitv { } true } + + /// Shorten a Bitv, dropping excess elements. + /// + /// If `len` is greater than the vector's current length, this has no + /// effect. + /// + /// # Example + /// + /// ```rust + /// use collections::bitv::Bitv; + /// let mut bvec: Bitv = vec![false, true, true, false].iter().map(|n| *n).collect(); + /// let expected: Bitv = vec![false, true].iter().map(|n| *n).collect(); + /// bvec.truncate(2); + /// assert_eq!(bvec, expected); + /// ``` + pub fn truncate(&mut self, len: uint) { + if len < self.len() { + self.nbits = len; + let word_len = (len + uint::BITS - 1) / uint::BITS; + self.storage.truncate(word_len); + if len % uint::BITS > 0 { + let mask = (1 << len % uint::BITS) - 1; + *self.storage.get_mut(word_len - 1) &= mask; + } + } + } + + /// Grows the vector to be able to store `size` bits without resizing + pub fn reserve(&mut self, size: uint) { + let old_size = self.storage.len(); + let size = (size + uint::BITS - 1) / uint::BITS; + if old_size < size { + self.storage.grow(size - old_size, &0); + } + } + + /// Returns the capacity in bits for this bit vector. Inserting any + /// element less than this amount will not trigger a resizing. + #[inline] + pub fn capacity(&self) -> uint { + self.storage.len() * uint::BITS + } + + /// Grows the `Bitv` in-place. + /// + /// Adds `n` copies of `value` to the `Bitv`. + /// + /// # Example + /// + /// ```rust + /// use collections::bitv::Bitv; + /// let mut bvec: Bitv = vec![false, true, true, false].iter().map(|n| *n).collect(); + /// bvec.grow(2, true); + /// assert_eq!(bvec, vec![false, true, true, false, true, true].iter().map(|n| *n).collect()); + /// ``` + pub fn grow(&mut self, n: uint, value: bool) { + let new_nbits = self.nbits + n; + let new_nwords = (new_nbits + uint::BITS - 1) / uint::BITS; + let full_value = if value { !0 } else { 0 }; + // Correct the old tail word + let old_last_word = (self.nbits + uint::BITS - 1) / uint::BITS - 1; + if self.nbits % uint::BITS > 0 { + let overhang = self.nbits % uint::BITS; // # of already-used bits + let mask = !((1 << overhang) - 1); // e.g. 5 unused bits => 111110....0 + if value { + *self.storage.get_mut(old_last_word) |= mask; + } else { + *self.storage.get_mut(old_last_word) &= !mask; + } + } + // Fill in words after the old tail word + let stop_idx = cmp::min(self.storage.len(), new_nwords); + for idx in range(old_last_word + 1, stop_idx) { + *self.storage.get_mut(idx) = full_value; + } + // Allocate new words, if needed + if new_nwords > self.storage.len() { + let to_add = new_nwords - self.storage.len(); + self.storage.grow(to_add, &full_value); + } + // Adjust internal bit count + self.nbits = new_nbits; + } + + /// Shorten a `Bitv` by one, returning the removed element + /// + /// # Example + /// + /// ```rust + /// use collections::bitv::Bitv; + /// let mut bvec: Bitv = vec![false, true, true, false].iter().map(|n| *n).collect(); + /// let expected: Bitv = vec![false, true, true].iter().map(|n| *n).collect(); + /// let popped = bvec.pop(); + /// assert_eq!(popped, false); + /// assert_eq!(bvec, expected); + /// ``` + pub fn pop(&mut self) -> bool { + let ret = self.get(self.nbits - 1); + // If we are unusing a whole word, make sure it is zeroed out + if self.nbits % uint::BITS == 1 { + *self.storage.get_mut(self.nbits / uint::BITS) = 0; + } + self.nbits -= 1; + ret + } + + /// Pushes a `bool` onto the `Bitv` + /// + /// # Example + /// + /// ```rust + /// use collections::bitv::Bitv; + /// let prototype: Bitv = vec![false, true, true, false].iter().map(|n| *n).collect(); + /// let mut bvec: Bitv = vec![false, true].iter().map(|n| *n).collect(); + /// bvec.push(true); + /// bvec.push(false); + /// assert_eq!(prototype, bvec); + /// ``` + pub fn push(&mut self, elem: bool) { + let insert_pos = self.nbits; + self.nbits += 1; + if self.storage.len() * uint::BITS < self.nbits { + self.storage.push(0); + } + self.set(insert_pos, elem); + } } /** @@ -328,13 +436,6 @@ pub fn from_bytes(bytes: &[u8]) -> Bitv { }) } -/** - * Transform a `[bool]` into a `Bitv` by converting each `bool` into a bit. - */ -pub fn from_bools(bools: &[bool]) -> Bitv { - from_fn(bools.len(), |i| bools[i]) -} - /** * Create a `Bitv` of the specified length where the value at each * index is `f(index)`. @@ -347,6 +448,57 @@ pub fn from_fn(len: uint, f: |index: uint| -> bool) -> Bitv { bitv } +impl Default for Bitv { + #[inline] + fn default() -> Bitv { Bitv::new(0, false) } +} + +impl Collection for Bitv { + #[inline] + fn len(&self) -> uint { self.nbits } +} + +impl Mutable for Bitv { + #[inline] + fn clear(&mut self) { + for w in self.storage.mut_iter() { *w = 0u; } + } +} + +impl FromIterator for Bitv { + fn from_iter>(iterator: I) -> Bitv { + let mut ret = Bitv::new(0, false); + ret.extend(iterator); + ret + } +} + +impl Extendable for Bitv { + #[inline] + fn extend>(&mut self, mut iterator: I) { + let (min, _) = iterator.size_hint(); + let nbits = self.nbits; + self.reserve(nbits + min); + for element in iterator { + self.push(element) + } + } +} + +impl Clone for Bitv { + #[inline] + fn clone(&self) -> Bitv { + Bitv { storage: self.storage.clone(), nbits: self.nbits } + } + + #[inline] + fn clone_from(&mut self, source: &Bitv) { + self.nbits = source.nbits; + self.storage.reserve(source.storage.len()); + for (i, w) in self.storage.mut_iter().enumerate() { *w = *source.storage.get(i); } + } +} + impl ops::Index for Bitv { #[inline] fn index(&self, i: &uint) -> bool { @@ -471,7 +623,13 @@ impl BitvSet { #[inline] pub fn capacity(&self) -> uint { let &BitvSet(ref bitv) = self; - bitv.storage.len() * uint::BITS + bitv.capacity() + } + + /// Grows the underlying vector to be able to store `size` bits + pub fn reserve(&mut self, size: uint) { + let &BitvSet(ref mut bitv) = self; + bitv.reserve(size) } /// Consumes this set to return the underlying bit vector @@ -481,24 +639,28 @@ impl BitvSet { bitv } + /// Returns a reference to the underlying bit vector #[inline] - /// Grows the vector to be able to store bits with indices `[0, size - 1]` - fn grow(&mut self, size: uint) { + pub fn get_ref<'a>(&'a self) -> &'a Bitv { + let &BitvSet(ref bitv) = self; + bitv + } + + /// Returns a mutable reference to the underlying bit vector + #[inline] + pub fn get_mut_ref<'a>(&'a mut self) -> &'a mut Bitv { let &BitvSet(ref mut bitv) = self; - let old_size = bitv.storage.len(); - let size = (size + uint::BITS - 1) / uint::BITS; - if old_size < size { - bitv.storage.grow(size - old_size, &0); - } + bitv } #[inline] fn other_op(&mut self, other: &BitvSet, f: |uint, uint| -> uint) { - // Expand the vector if necessary - self.grow(other.capacity()); // Unwrap Bitvs let &BitvSet(ref mut self_bitv) = self; let &BitvSet(ref other_bitv) = other; + // Expand the vector if necessary + self_bitv.reserve(other_bitv.capacity()); + // Apply values for (i, w) in other_bitv.mask_words(0) { let old = *self_bitv.storage.get(i); let new = f(old, w); @@ -683,7 +845,7 @@ impl MutableSet for BitvSet { } if value >= self.capacity() { let new_cap = cmp::max(value + 1, self.capacity() * 2); - self.grow(new_cap); + self.reserve(new_cap); } let &BitvSet(ref mut bitv) = self; if value >= bitv.nbits { @@ -824,7 +986,7 @@ mod tests { use test::Bencher; use {Set, Mutable, MutableSet}; - use bitv::{Bitv, BitvSet, from_bools, from_fn, from_bytes}; + use bitv::{Bitv, BitvSet, from_fn, from_bytes}; use bitv; use vec::Vec; @@ -1187,8 +1349,9 @@ mod tests { #[test] fn test_from_bools() { - assert!(from_bools([true, false, true, true]).to_str().as_slice() == - "1011"); + let bools = vec![true, false, true, true]; + let bitv: Bitv = bools.iter().map(|n| *n).collect(); + assert_eq!(bitv.to_str().as_slice(), "1011"); } #[test] @@ -1200,7 +1363,7 @@ mod tests { #[test] fn test_bitv_iterator() { let bools = [true, false, true, true]; - let bitv = from_bools(bools); + let bitv: Bitv = bools.iter().map(|n| *n).collect(); for (act, &ex) in bitv.iter().zip(bools.iter()) { assert_eq!(ex, act); @@ -1210,7 +1373,7 @@ mod tests { #[test] fn test_bitv_set_iterator() { let bools = [true, false, true, true]; - let bitv = BitvSet::from_bitv(from_bools(bools)); + let bitv = BitvSet::from_bitv(bools.iter().map(|n| *n).collect()); let idxs: Vec = bitv.iter().collect(); assert_eq!(idxs, vec!(0, 2, 3)); @@ -1499,6 +1662,97 @@ mod tests { assert!(!v.none()); } + #[test] + fn test_bitv_push_pop() { + let mut s = Bitv::new(5 * uint::BITS - 2, false); + assert_eq!(s.len(), 5 * uint::BITS - 2); + assert_eq!(s.get(5 * uint::BITS - 3), false); + s.push(true); + s.push(true); + assert_eq!(s.get(5 * uint::BITS - 2), true); + assert_eq!(s.get(5 * uint::BITS - 1), true); + // Here the internal vector will need to be extended + s.push(false); + assert_eq!(s.get(5 * uint::BITS), false); + s.push(false); + assert_eq!(s.get(5 * uint::BITS + 1), false); + assert_eq!(s.len(), 5 * uint::BITS + 2); + // Pop it all off + assert_eq!(s.pop(), false); + assert_eq!(s.pop(), false); + assert_eq!(s.pop(), true); + assert_eq!(s.pop(), true); + assert_eq!(s.len(), 5 * uint::BITS - 2); + } + + #[test] + fn test_bitv_truncate() { + let mut s = Bitv::new(5 * uint::BITS, true); + + assert_eq!(s, Bitv::new(5 * uint::BITS, true)); + assert_eq!(s.len(), 5 * uint::BITS); + s.truncate(4 * uint::BITS); + assert_eq!(s, Bitv::new(4 * uint::BITS, true)); + assert_eq!(s.len(), 4 * uint::BITS); + // Truncating to a size > s.len() should be a noop + s.truncate(5 * uint::BITS); + assert_eq!(s, Bitv::new(4 * uint::BITS, true)); + assert_eq!(s.len(), 4 * uint::BITS); + s.truncate(3 * uint::BITS - 10); + assert_eq!(s, Bitv::new(3 * uint::BITS - 10, true)); + assert_eq!(s.len(), 3 * uint::BITS - 10); + s.truncate(0); + assert_eq!(s, Bitv::new(0, true)); + assert_eq!(s.len(), 0); + } + + #[test] + fn test_bitv_reserve() { + let mut s = Bitv::new(5 * uint::BITS, true); + // Check capacity + assert_eq!(s.capacity(), 5 * uint::BITS); + s.reserve(2 * uint::BITS); + assert_eq!(s.capacity(), 5 * uint::BITS); + s.reserve(7 * uint::BITS); + assert_eq!(s.capacity(), 7 * uint::BITS); + s.reserve(7 * uint::BITS); + assert_eq!(s.capacity(), 7 * uint::BITS); + s.reserve(7 * uint::BITS + 1); + assert_eq!(s.capacity(), 8 * uint::BITS); + // Check that length hasn't changed + assert_eq!(s.len(), 5 * uint::BITS); + s.push(true); + s.push(false); + s.push(true); + assert_eq!(s.get(5 * uint::BITS - 1), true); + assert_eq!(s.get(5 * uint::BITS - 0), true); + assert_eq!(s.get(5 * uint::BITS + 1), false); + assert_eq!(s.get(5 * uint::BITS + 2), true); + } + + #[test] + fn test_bitv_grow() { + let mut bitv = from_bytes([0b10110110, 0b00000000, 0b10101010]); + bitv.grow(32, true); + assert_eq!(bitv, from_bytes([0b10110110, 0b00000000, 0b10101010, + 0xFF, 0xFF, 0xFF, 0xFF])); + bitv.grow(64, false); + assert_eq!(bitv, from_bytes([0b10110110, 0b00000000, 0b10101010, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0])); + bitv.grow(16, true); + assert_eq!(bitv, from_bytes([0b10110110, 0b00000000, 0b10101010, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF])); + } + + #[test] + fn test_bitv_extend() { + let mut bitv = from_bytes([0b10110110, 0b00000000, 0b11111111]); + let ext = from_bytes([0b01001001, 0b10010010, 0b10111101]); + bitv.extend(ext.iter()); + assert_eq!(bitv, from_bytes([0b10110110, 0b00000000, 0b11111111, + 0b01001001, 0b10010010, 0b10111101])); + } + #[test] fn test_bitv_set_show() { let mut s = BitvSet::new(); From 78b674152ed09f2e7f30f0080171b43e11f56f31 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Wed, 2 Jul 2014 10:17:27 -0700 Subject: [PATCH 09/10] collections::bitv: change constructors for `Bitv` and `BitvSet` `Bitv::new` has been renamed `Bitv::with_capacity`. The new function `Bitv::new` now creates a `Bitv` with no elements. The new function `BitvSet::with_capacity` creates a `BitvSet` with a specified capacity. --- src/libcollections/bitv.rs | 150 +++++++++++++++------------- src/test/run-pass/bitv-perf-test.rs | 4 +- src/test/run-pass/issue-11736.rs | 2 +- 3 files changed, 84 insertions(+), 72 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index e041906839c..fdd690ccdd9 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -31,7 +31,7 @@ use vec::Vec; /// ```rust /// use collections::bitv::Bitv; /// -/// let mut bv = Bitv::new(10, false); +/// let mut bv = Bitv::with_capacity(10, false); /// /// // insert all primes less than 10 /// bv.set(2, true); @@ -126,9 +126,14 @@ impl Bitv { } } - /// Creates an empty Bitv that holds `nbits` elements, setting each element + /// Creates an empty Bitv + pub fn new() -> Bitv { + Bitv { storage: Vec::new(), nbits: 0 } + } + + /// Creates a Bitv that holds `nbits` elements, setting each element /// to `init`. - pub fn new(nbits: uint, init: bool) -> Bitv { + pub fn with_capacity(nbits: uint, init: bool) -> Bitv { Bitv { storage: Vec::from_elem((nbits + uint::BITS - 1) / uint::BITS, if init { !0u } else { 0u }), @@ -226,7 +231,7 @@ impl Bitv { /// /// ```rust /// use collections::bitv::Bitv; - /// let mut bv = Bitv::new(10, false); + /// let mut bv = Bitv::with_capacity(10, false); /// bv.set(1, true); /// bv.set(2, true); /// bv.set(3, true); @@ -441,7 +446,7 @@ pub fn from_bytes(bytes: &[u8]) -> Bitv { * index is `f(index)`. */ pub fn from_fn(len: uint, f: |index: uint| -> bool) -> Bitv { - let mut bitv = Bitv::new(len, false); + let mut bitv = Bitv::with_capacity(len, false); for i in range(0u, len) { bitv.set(i, f(i)); } @@ -450,7 +455,7 @@ pub fn from_fn(len: uint, f: |index: uint| -> bool) -> Bitv { impl Default for Bitv { #[inline] - fn default() -> Bitv { Bitv::new(0, false) } + fn default() -> Bitv { Bitv::new() } } impl Collection for Bitv { @@ -467,7 +472,7 @@ impl Mutable for Bitv { impl FromIterator for Bitv { fn from_iter>(iterator: I) -> Bitv { - let mut ret = Bitv::new(0, false); + let mut ret = Bitv::new(); ret.extend(iterator); ret } @@ -609,7 +614,14 @@ impl BitvSet { /// Creates a new bit vector set with initially no contents #[inline] pub fn new() -> BitvSet { - BitvSet(Bitv::new(0, false)) + BitvSet(Bitv::new()) + } + + /// Creates a new bit vector set with initially no contents, able to + /// hold `nbits` elements without resizing + #[inline] + pub fn with_capacity(nbits: uint) -> BitvSet { + BitvSet(Bitv::with_capacity(nbits, false)) } /// Creates a new bit vector set from the given bit vector @@ -994,31 +1006,31 @@ mod tests { #[test] fn test_to_str() { - let zerolen = Bitv::new(0u, false); + let zerolen = Bitv::new(); assert_eq!(zerolen.to_str().as_slice(), ""); - let eightbits = Bitv::new(8u, false); + let eightbits = Bitv::with_capacity(8u, false); assert_eq!(eightbits.to_str().as_slice(), "00000000") } #[test] fn test_0_elements() { - let act = Bitv::new(0u, false); + let act = Bitv::new(); let exp = Vec::from_elem(0u, false); assert!(act.eq_vec(exp.as_slice())); } #[test] fn test_1_element() { - let mut act = Bitv::new(1u, false); + let mut act = Bitv::with_capacity(1u, false); assert!(act.eq_vec([false])); - act = Bitv::new(1u, true); + act = Bitv::with_capacity(1u, true); assert!(act.eq_vec([true])); } #[test] fn test_2_elements() { - let mut b = bitv::Bitv::new(2, false); + let mut b = bitv::Bitv::with_capacity(2, false); b.set(0, true); b.set(1, false); assert_eq!(b.to_str().as_slice(), "10"); @@ -1029,16 +1041,16 @@ mod tests { let mut act; // all 0 - act = Bitv::new(10u, false); + act = Bitv::with_capacity(10u, false); assert!((act.eq_vec( [false, false, false, false, false, false, false, false, false, false]))); // all 1 - act = Bitv::new(10u, true); + act = Bitv::with_capacity(10u, true); assert!((act.eq_vec([true, true, true, true, true, true, true, true, true, true]))); // mixed - act = Bitv::new(10u, false); + act = Bitv::with_capacity(10u, false); act.set(0u, true); act.set(1u, true); act.set(2u, true); @@ -1047,7 +1059,7 @@ mod tests { assert!((act.eq_vec([true, true, true, true, true, false, false, false, false, false]))); // mixed - act = Bitv::new(10u, false); + act = Bitv::with_capacity(10u, false); act.set(5u, true); act.set(6u, true); act.set(7u, true); @@ -1056,7 +1068,7 @@ mod tests { assert!((act.eq_vec([false, false, false, false, false, true, true, true, true, true]))); // mixed - act = Bitv::new(10u, false); + act = Bitv::with_capacity(10u, false); act.set(0u, true); act.set(3u, true); act.set(6u, true); @@ -1069,21 +1081,21 @@ mod tests { let mut act; // all 0 - act = Bitv::new(31u, false); + act = Bitv::with_capacity(31u, false); assert!(act.eq_vec( [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false])); // all 1 - act = Bitv::new(31u, true); + act = Bitv::with_capacity(31u, true); assert!(act.eq_vec( [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true])); // mixed - act = Bitv::new(31u, false); + act = Bitv::with_capacity(31u, false); act.set(0u, true); act.set(1u, true); act.set(2u, true); @@ -1098,7 +1110,7 @@ mod tests { false, false, false, false, false, false])); // mixed - act = Bitv::new(31u, false); + act = Bitv::with_capacity(31u, false); act.set(16u, true); act.set(17u, true); act.set(18u, true); @@ -1113,7 +1125,7 @@ mod tests { false, false, false, false, false, false, false])); // mixed - act = Bitv::new(31u, false); + act = Bitv::with_capacity(31u, false); act.set(24u, true); act.set(25u, true); act.set(26u, true); @@ -1127,7 +1139,7 @@ mod tests { false, true, true, true, true, true, true, true])); // mixed - act = Bitv::new(31u, false); + act = Bitv::with_capacity(31u, false); act.set(3u, true); act.set(17u, true); act.set(30u, true); @@ -1142,21 +1154,21 @@ mod tests { let mut act; // all 0 - act = Bitv::new(32u, false); + act = Bitv::with_capacity(32u, false); assert!(act.eq_vec( [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false])); // all 1 - act = Bitv::new(32u, true); + act = Bitv::with_capacity(32u, true); assert!(act.eq_vec( [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true])); // mixed - act = Bitv::new(32u, false); + act = Bitv::with_capacity(32u, false); act.set(0u, true); act.set(1u, true); act.set(2u, true); @@ -1171,7 +1183,7 @@ mod tests { false, false, false, false, false, false, false])); // mixed - act = Bitv::new(32u, false); + act = Bitv::with_capacity(32u, false); act.set(16u, true); act.set(17u, true); act.set(18u, true); @@ -1186,7 +1198,7 @@ mod tests { false, false, false, false, false, false, false, false])); // mixed - act = Bitv::new(32u, false); + act = Bitv::with_capacity(32u, false); act.set(24u, true); act.set(25u, true); act.set(26u, true); @@ -1201,7 +1213,7 @@ mod tests { false, true, true, true, true, true, true, true, true])); // mixed - act = Bitv::new(32u, false); + act = Bitv::with_capacity(32u, false); act.set(3u, true); act.set(17u, true); act.set(30u, true); @@ -1217,21 +1229,21 @@ mod tests { let mut act; // all 0 - act = Bitv::new(33u, false); + act = Bitv::with_capacity(33u, false); assert!(act.eq_vec( [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false])); // all 1 - act = Bitv::new(33u, true); + act = Bitv::with_capacity(33u, true); assert!(act.eq_vec( [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true])); // mixed - act = Bitv::new(33u, false); + act = Bitv::with_capacity(33u, false); act.set(0u, true); act.set(1u, true); act.set(2u, true); @@ -1246,7 +1258,7 @@ mod tests { false, false, false, false, false, false, false, false])); // mixed - act = Bitv::new(33u, false); + act = Bitv::with_capacity(33u, false); act.set(16u, true); act.set(17u, true); act.set(18u, true); @@ -1261,7 +1273,7 @@ mod tests { false, false, false, false, false, false, false, false, false])); // mixed - act = Bitv::new(33u, false); + act = Bitv::with_capacity(33u, false); act.set(24u, true); act.set(25u, true); act.set(26u, true); @@ -1276,7 +1288,7 @@ mod tests { false, true, true, true, true, true, true, true, true, false])); // mixed - act = Bitv::new(33u, false); + act = Bitv::with_capacity(33u, false); act.set(3u, true); act.set(17u, true); act.set(30u, true); @@ -1290,24 +1302,24 @@ mod tests { #[test] fn test_equal_differing_sizes() { - let v0 = Bitv::new(10u, false); - let v1 = Bitv::new(11u, false); + let v0 = Bitv::with_capacity(10u, false); + let v1 = Bitv::with_capacity(11u, false); assert!(v0 != v1); } #[test] fn test_equal_greatly_differing_sizes() { - let v0 = Bitv::new(10u, false); - let v1 = Bitv::new(110u, false); + let v0 = Bitv::with_capacity(10u, false); + let v1 = Bitv::with_capacity(110u, false); assert!(v0 != v1); } #[test] fn test_equal_sneaky_small() { - let mut a = bitv::Bitv::new(1, false); + let mut a = bitv::Bitv::with_capacity(1, false); a.set(0, true); - let mut b = bitv::Bitv::new(1, true); + let mut b = bitv::Bitv::with_capacity(1, true); b.set(0, true); assert_eq!(a, b); @@ -1315,12 +1327,12 @@ mod tests { #[test] fn test_equal_sneaky_big() { - let mut a = bitv::Bitv::new(100, false); + let mut a = bitv::Bitv::with_capacity(100, false); for i in range(0u, 100) { a.set(i, true); } - let mut b = bitv::Bitv::new(100, true); + let mut b = bitv::Bitv::with_capacity(100, true); for i in range(0u, 100) { b.set(i, true); } @@ -1337,11 +1349,11 @@ mod tests { #[test] fn test_to_bytes() { - let mut bv = Bitv::new(3, true); + let mut bv = Bitv::with_capacity(3, true); bv.set(1, false); assert_eq!(bv.to_bytes(), vec!(0b10100000)); - let mut bv = Bitv::new(9, false); + let mut bv = Bitv::with_capacity(9, false); bv.set(2, true); bv.set(8, true); assert_eq!(bv.to_bytes(), vec!(0b00100000, 0b10000000)); @@ -1385,7 +1397,7 @@ mod tests { let lengths = [10, 64, 100]; for &b in bools.iter() { for &l in lengths.iter() { - let bitset = BitvSet::from_bitv(Bitv::new(l, b)); + let bitset = BitvSet::from_bitv(Bitv::with_capacity(l, b)); assert_eq!(bitset.contains(&1u), b) assert_eq!(bitset.contains(&(l-1u)), b) assert!(!bitset.contains(&l)) @@ -1395,8 +1407,8 @@ mod tests { #[test] fn test_small_difference() { - let mut b1 = Bitv::new(3, false); - let mut b2 = Bitv::new(3, false); + let mut b1 = Bitv::with_capacity(3, false); + let mut b2 = Bitv::with_capacity(3, false); b1.set(0, true); b1.set(1, true); b2.set(1, true); @@ -1409,8 +1421,8 @@ mod tests { #[test] fn test_big_difference() { - let mut b1 = Bitv::new(100, false); - let mut b2 = Bitv::new(100, false); + let mut b1 = Bitv::with_capacity(100, false); + let mut b2 = Bitv::with_capacity(100, false); b1.set(0, true); b1.set(40, true); b2.set(40, true); @@ -1423,7 +1435,7 @@ mod tests { #[test] fn test_small_clear() { - let mut b = Bitv::new(14, true); + let mut b = Bitv::with_capacity(14, true); b.clear(); BitvSet::from_bitv(b).iter().advance(|i| { fail!("found 1 at {:?}", i) @@ -1432,7 +1444,7 @@ mod tests { #[test] fn test_big_clear() { - let mut b = Bitv::new(140, true); + let mut b = Bitv::with_capacity(140, true); b.clear(); BitvSet::from_bitv(b).iter().advance(|i| { fail!("found 1 at {:?}", i) @@ -1441,7 +1453,7 @@ mod tests { #[test] fn test_bitv_masking() { - let b = Bitv::new(140, true); + let b = Bitv::with_capacity(140, true); let mut bs = BitvSet::from_bitv(b); assert!(bs.contains(&139)); assert!(!bs.contains(&140)); @@ -1664,7 +1676,7 @@ mod tests { #[test] fn test_bitv_push_pop() { - let mut s = Bitv::new(5 * uint::BITS - 2, false); + let mut s = Bitv::with_capacity(5 * uint::BITS - 2, false); assert_eq!(s.len(), 5 * uint::BITS - 2); assert_eq!(s.get(5 * uint::BITS - 3), false); s.push(true); @@ -1687,28 +1699,28 @@ mod tests { #[test] fn test_bitv_truncate() { - let mut s = Bitv::new(5 * uint::BITS, true); + let mut s = Bitv::with_capacity(5 * uint::BITS, true); - assert_eq!(s, Bitv::new(5 * uint::BITS, true)); + assert_eq!(s, Bitv::with_capacity(5 * uint::BITS, true)); assert_eq!(s.len(), 5 * uint::BITS); s.truncate(4 * uint::BITS); - assert_eq!(s, Bitv::new(4 * uint::BITS, true)); + assert_eq!(s, Bitv::with_capacity(4 * uint::BITS, true)); assert_eq!(s.len(), 4 * uint::BITS); // Truncating to a size > s.len() should be a noop s.truncate(5 * uint::BITS); - assert_eq!(s, Bitv::new(4 * uint::BITS, true)); + assert_eq!(s, Bitv::with_capacity(4 * uint::BITS, true)); assert_eq!(s.len(), 4 * uint::BITS); s.truncate(3 * uint::BITS - 10); - assert_eq!(s, Bitv::new(3 * uint::BITS - 10, true)); + assert_eq!(s, Bitv::with_capacity(3 * uint::BITS - 10, true)); assert_eq!(s.len(), 3 * uint::BITS - 10); s.truncate(0); - assert_eq!(s, Bitv::new(0, true)); + assert_eq!(s, Bitv::with_capacity(0, true)); assert_eq!(s.len(), 0); } #[test] fn test_bitv_reserve() { - let mut s = Bitv::new(5 * uint::BITS, true); + let mut s = Bitv::with_capacity(5 * uint::BITS, true); // Check capacity assert_eq!(s.capacity(), 5 * uint::BITS); s.reserve(2 * uint::BITS); @@ -1781,7 +1793,7 @@ mod tests { #[bench] fn bench_bitv_big(b: &mut Bencher) { let mut r = rng(); - let mut bitv = Bitv::new(BENCH_BITS, false); + let mut bitv = Bitv::with_capacity(BENCH_BITS, false); b.iter(|| { bitv.set((r.next_u32() as uint) % BENCH_BITS, true); &bitv @@ -1791,7 +1803,7 @@ mod tests { #[bench] fn bench_bitv_small(b: &mut Bencher) { let mut r = rng(); - let mut bitv = Bitv::new(uint::BITS, false); + let mut bitv = Bitv::with_capacity(uint::BITS, false); b.iter(|| { bitv.set((r.next_u32() as uint) % uint::BITS, true); &bitv @@ -1820,8 +1832,8 @@ mod tests { #[bench] fn bench_bitv_big_union(b: &mut Bencher) { - let mut b1 = Bitv::new(BENCH_BITS, false); - let b2 = Bitv::new(BENCH_BITS, false); + let mut b1 = Bitv::with_capacity(BENCH_BITS, false); + let b2 = Bitv::with_capacity(BENCH_BITS, false); b.iter(|| { b1.union(&b2); }) @@ -1829,7 +1841,7 @@ mod tests { #[bench] fn bench_btv_small_iter(b: &mut Bencher) { - let bitv = Bitv::new(uint::BITS, false); + let bitv = Bitv::with_capacity(uint::BITS, false); b.iter(|| { let mut _sum = 0; for pres in bitv.iter() { @@ -1840,7 +1852,7 @@ mod tests { #[bench] fn bench_bitv_big_iter(b: &mut Bencher) { - let bitv = Bitv::new(BENCH_BITS, false); + let bitv = Bitv::with_capacity(BENCH_BITS, false); b.iter(|| { let mut _sum = 0; for pres in bitv.iter() { diff --git a/src/test/run-pass/bitv-perf-test.rs b/src/test/run-pass/bitv-perf-test.rs index 8c2dba243c8..3823a7033f5 100644 --- a/src/test/run-pass/bitv-perf-test.rs +++ b/src/test/run-pass/bitv-perf-test.rs @@ -13,8 +13,8 @@ extern crate collections; use std::collections::Bitv; fn bitv_test() { - let mut v1 = box Bitv::new(31, false); - let v2 = box Bitv::new(31, true); + let mut v1 = box Bitv::with_capacity(31, false); + let v2 = box Bitv::with_capacity(31, true); v1.union(v2); } diff --git a/src/test/run-pass/issue-11736.rs b/src/test/run-pass/issue-11736.rs index c300b8c9335..255807b4c0e 100644 --- a/src/test/run-pass/issue-11736.rs +++ b/src/test/run-pass/issue-11736.rs @@ -16,7 +16,7 @@ use std::collections::Bitv; fn main() { // Generate sieve of Eratosthenes for n up to 1e6 let n = 1000000u; - let sieve = Bitv::new(n+1, true); + let sieve = Bitv::with_capacity(n+1, true); let limit: uint = (n as f32).sqrt() as uint; for i in range(2, limit+1) { if sieve[i] { From 8ef0165a56bc788968fcec6debb510b58134f0d9 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Wed, 2 Jul 2014 11:10:32 -0700 Subject: [PATCH 10/10] collections::bitv: clean up and unit test `BitvSet::is_subset` --- src/libcollections/bitv.rs | 91 +++++++++++++++----------------------- 1 file changed, 36 insertions(+), 55 deletions(-) diff --git a/src/libcollections/bitv.rs b/src/libcollections/bitv.rs index fdd690ccdd9..6d7c91ccfee 100644 --- a/src/libcollections/bitv.rs +++ b/src/libcollections/bitv.rs @@ -15,7 +15,7 @@ use core::prelude::*; use core::cmp; use core::default::Default; use core::fmt; -use core::iter::{Map, Take, Zip}; +use core::iter::Take; use core::ops; use core::slice; use core::uint; @@ -825,23 +825,16 @@ impl Set for BitvSet { self.intersection(other).count() > 0 } + #[inline] fn is_subset(&self, other: &BitvSet) -> bool { - for (_, w1, w2) in self.commons(other) { - if w1 & w2 != w1 { - return false; - } - } - /* If anything is not ours, then everything is not ours so we're - definitely a subset in that case. Otherwise if there's any stray - ones that 'other' doesn't have, we're not a subset. */ - for (mine, _, w) in self.outliers(other) { - if !mine { - return true; - } else if w != 0 { - return false; - } - } - return true; + let &BitvSet(ref self_bitv) = self; + let &BitvSet(ref other_bitv) = other; + + // Check that `self` intersect `other` is self + self_bitv.mask_words(0).zip(other_bitv.mask_words(0)) + .all(|((_, w1), (_, w2))| w1 & w2 == w1) && + // Check that `self` setminus `other` is empty + self_bitv.mask_words(other_bitv.storage.len()).all(|(_, w)| w == 0) } #[inline] @@ -883,44 +876,6 @@ impl MutableSet for BitvSet { } } -impl BitvSet { - /// Visits each of the words that the two bit vectors (`self` and `other`) - /// both have in common. The three yielded arguments are (bit location, - /// w1, w2) where the bit location is the number of bits offset so far, - /// and w1/w2 are the words coming from the two vectors self, other. - fn commons<'a>(&'a self, other: &'a BitvSet) - -> Map<((uint, uint), (uint, uint)), (uint, uint, uint), - Zip, MaskWords<'a>>> { - let &BitvSet(ref self_bitv) = self; - let &BitvSet(ref other_bitv) = other; - self_bitv.mask_words(0).zip(other_bitv.mask_words(0)) - .map(|((i, w1), (_, w2))| (i * uint::BITS, w1, w2)) - } - - /// Visits each word in `self` or `other` that extends beyond the other. This - /// will only iterate through one of the vectors, and it only iterates - /// over the portion that doesn't overlap with the other one. - /// - /// The yielded arguments are a `bool`, the bit offset, and a word. The `bool` - /// is true if the word comes from `self`, and `false` if it comes from - /// `other`. - fn outliers<'a>(&'a self, other: &'a BitvSet) - -> Map<(uint, uint), (bool, uint, uint), MaskWords<'a>> { - let slen = self.capacity() / uint::BITS; - let olen = other.capacity() / uint::BITS; - let &BitvSet(ref self_bitv) = self; - let &BitvSet(ref other_bitv) = other; - - if olen < slen { - self_bitv.mask_words(olen) - .map(|(i, w)| (true, i * uint::BITS, w)) - } else { - other_bitv.mask_words(slen) - .map(|(i, w)| (false, i * uint::BITS, w)) - } - } -} - pub struct BitPositions<'a> { set: &'a BitvSet, next_idx: uint @@ -1594,6 +1549,32 @@ mod tests { assert_eq!(i, expected.len()); } + #[test] + fn test_bitv_set_subset() { + let mut set1 = BitvSet::new(); + let mut set2 = BitvSet::new(); + + assert!(set1.is_subset(&set2)); // {} {} + set2.insert(100); + assert!(set1.is_subset(&set2)); // {} { 1 } + set2.insert(200); + assert!(set1.is_subset(&set2)); // {} { 1, 2 } + set1.insert(200); + assert!(set1.is_subset(&set2)); // { 2 } { 1, 2 } + set1.insert(300); + assert!(!set1.is_subset(&set2)); // { 2, 3 } { 1, 2 } + set2.insert(300); + assert!(set1.is_subset(&set2)); // { 2, 3 } { 1, 2, 3 } + set2.insert(400); + assert!(set1.is_subset(&set2)); // { 2, 3 } { 1, 2, 3, 4 } + set2.remove(&100); + assert!(set1.is_subset(&set2)); // { 2, 3 } { 2, 3, 4 } + set2.remove(&300); + assert!(!set1.is_subset(&set2)); // { 2, 3 } { 2, 4 } + set1.remove(&300); + assert!(set1.is_subset(&set2)); // { 2 } { 2, 4 } + } + #[test] fn test_bitv_remove() { let mut a = BitvSet::new();