2013-01-15 17:30:35 -08:00
|
|
|
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2014-10-27 15:37:07 -07:00
|
|
|
#![allow(missing_docs)]
|
2013-10-12 19:02:46 -07:00
|
|
|
|
2015-01-03 22:42:21 -05:00
|
|
|
use std::cmp::Ordering::{self, Less, Greater, Equal};
|
2014-12-13 11:15:18 -05:00
|
|
|
use std::collections::hash_map::Entry::{Occupied, Vacant};
|
std: Stabilize the std::hash module
This commit aims to prepare the `std::hash` module for alpha by formalizing its
current interface whileholding off on adding `#[stable]` to the new APIs. The
current usage with the `HashMap` and `HashSet` types is also reconciled by
separating out composable parts of the design. The primary goal of this slight
redesign is to separate the concepts of a hasher's state from a hashing
algorithm itself.
The primary change of this commit is to separate the `Hasher` trait into a
`Hasher` and a `HashState` trait. Conceptually the old `Hasher` trait was
actually just a factory for various states, but hashing had very little control
over how these states were used. Additionally the old `Hasher` trait was
actually fairly unrelated to hashing.
This commit redesigns the existing `Hasher` trait to match what the notion of a
`Hasher` normally implies with the following definition:
trait Hasher {
type Output;
fn reset(&mut self);
fn finish(&self) -> Output;
}
This `Hasher` trait emphasizes that hashing algorithms may produce outputs other
than a `u64`, so the output type is made generic. Other than that, however, very
little is assumed about a particular hasher. It is left up to implementors to
provide specific methods or trait implementations to feed data into a hasher.
The corresponding `Hash` trait becomes:
trait Hash<H: Hasher> {
fn hash(&self, &mut H);
}
The old default of `SipState` was removed from this trait as it's not something
that we're willing to stabilize until the end of time, but the type parameter is
always required to implement `Hasher`. Note that the type parameter `H` remains
on the trait to enable multidispatch for specialization of hashing for
particular hashers.
Note that `Writer` is not mentioned in either of `Hash` or `Hasher`, it is
simply used as part `derive` and the implementations for all primitive types.
With these definitions, the old `Hasher` trait is realized as a new `HashState`
trait in the `collections::hash_state` module as an unstable addition for
now. The current definition looks like:
trait HashState {
type Hasher: Hasher;
fn hasher(&self) -> Hasher;
}
The purpose of this trait is to emphasize that the one piece of functionality
for implementors is that new instances of `Hasher` can be created. This
conceptually represents the two keys from which more instances of a
`SipHasher` can be created, and a `HashState` is what's stored in a
`HashMap`, not a `Hasher`.
Implementors of custom hash algorithms should implement the `Hasher` trait, and
only hash algorithms intended for use in hash maps need to implement or worry
about the `HashState` trait.
The entire module and `HashState` infrastructure remains `#[unstable]` due to it
being recently redesigned, but some other stability decision made for the
`std::hash` module are:
* The `Writer` trait remains `#[experimental]` as it's intended to be replaced
with an `io::Writer` (more details soon).
* The top-level `hash` function is `#[unstable]` as it is intended to be generic
over the hashing algorithm instead of hardwired to `SipHasher`
* The inner `sip` module is now private as its one export, `SipHasher` is
reexported in the `hash` module.
And finally, a few changes were made to the default parameters on `HashMap`.
* The `RandomSipHasher` default type parameter was renamed to `RandomState`.
This renaming emphasizes that it is not a hasher, but rather just state to
generate hashers. It also moves away from the name "sip" as it may not always
be implemented as `SipHasher`. This type lives in the
`std::collections::hash_map` module as `#[unstable]`
* The associated `Hasher` type of `RandomState` is creatively called...
`Hasher`! This concrete structure lives next to `RandomState` as an
implemenation of the "default hashing algorithm" used for a `HashMap`. Under
the hood this is currently implemented as `SipHasher`, but it draws an
explicit interface for now and allows us to modify the implementation over
time if necessary.
There are many breaking changes outlined above, and as a result this commit is
a:
[breaking-change]
2014-12-09 12:37:23 -08:00
|
|
|
use std::collections::hash_map::{self, Hasher};
|
2014-02-23 21:29:35 +11:00
|
|
|
use std::hash::Hash;
|
2014-02-01 04:35:36 +08:00
|
|
|
use std::mem;
|
2015-01-05 21:14:50 +11:00
|
|
|
use std::num::{Float, FromPrimitive};
|
2013-01-15 17:30:35 -08:00
|
|
|
|
2014-04-28 17:14:18 +02:00
|
|
|
fn local_cmp<T:Float>(x: T, y: T) -> Ordering {
|
2013-12-20 14:42:00 +11:00
|
|
|
// arbitrarily decide that NaNs are larger than everything.
|
|
|
|
if y.is_nan() {
|
|
|
|
Less
|
|
|
|
} else if x.is_nan() {
|
|
|
|
Greater
|
|
|
|
} else if x < y {
|
|
|
|
Less
|
|
|
|
} else if x == y {
|
|
|
|
Equal
|
|
|
|
} else {
|
|
|
|
Greater
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-28 17:14:18 +02:00
|
|
|
fn local_sort<T: Float>(v: &mut [T]) {
|
|
|
|
v.sort_by(|x: &T, y: &T| local_cmp(*x, *y));
|
2013-12-20 14:42:00 +11:00
|
|
|
}
|
|
|
|
|
2013-06-30 17:34:23 -07:00
|
|
|
/// Trait that provides simple descriptive statistics on a univariate set of numeric samples.
|
2015-01-05 21:14:50 +11:00
|
|
|
pub trait Stats <T: Float + FromPrimitive> {
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Sum of the samples.
|
2013-12-12 01:57:13 +02:00
|
|
|
///
|
|
|
|
/// Note: this method sacrifices performance at the altar of accuracy
|
|
|
|
/// Depends on IEEE-754 arithmetic guarantees. See proof of correctness at:
|
|
|
|
/// ["Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates"]
|
|
|
|
/// (http://www.cs.cmu.edu/~quake-papers/robust-arithmetic.ps)
|
|
|
|
/// *Discrete & Computational Geometry 18*, 3 (Oct 1997), 305-363, Shewchuk J.R.
|
2014-11-17 15:13:56 -05:00
|
|
|
fn sum(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Minimum value of the samples.
|
2014-11-17 15:13:56 -05:00
|
|
|
fn min(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Maximum value of the samples.
|
2014-11-17 15:13:56 -05:00
|
|
|
fn max(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Arithmetic mean (average) of the samples: sum divided by sample-count.
|
|
|
|
///
|
|
|
|
/// See: https://en.wikipedia.org/wiki/Arithmetic_mean
|
2014-11-17 15:13:56 -05:00
|
|
|
fn mean(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Median of the samples: value separating the lower half of the samples from the higher half.
|
|
|
|
/// Equal to `self.percentile(50.0)`.
|
|
|
|
///
|
|
|
|
/// See: https://en.wikipedia.org/wiki/Median
|
2014-11-17 15:13:56 -05:00
|
|
|
fn median(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Variance of the samples: bias-corrected mean of the squares of the differences of each
|
|
|
|
/// sample from the sample mean. Note that this calculates the _sample variance_ rather than the
|
|
|
|
/// population variance, which is assumed to be unknown. It therefore corrects the `(n-1)/n`
|
|
|
|
/// bias that would appear if we calculated a population variance, by dividing by `(n-1)` rather
|
|
|
|
/// than `n`.
|
|
|
|
///
|
|
|
|
/// See: https://en.wikipedia.org/wiki/Variance
|
2014-11-17 15:13:56 -05:00
|
|
|
fn var(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Standard deviation: the square root of the sample variance.
|
|
|
|
///
|
|
|
|
/// Note: this is not a robust statistic for non-normal distributions. Prefer the
|
|
|
|
/// `median_abs_dev` for unknown distributions.
|
|
|
|
///
|
|
|
|
/// See: https://en.wikipedia.org/wiki/Standard_deviation
|
2014-11-17 15:13:56 -05:00
|
|
|
fn std_dev(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Standard deviation as a percent of the mean value. See `std_dev` and `mean`.
|
|
|
|
///
|
|
|
|
/// Note: this is not a robust statistic for non-normal distributions. Prefer the
|
|
|
|
/// `median_abs_dev_pct` for unknown distributions.
|
2014-11-17 15:13:56 -05:00
|
|
|
fn std_dev_pct(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Scaled median of the absolute deviations of each sample from the sample median. This is a
|
|
|
|
/// robust (distribution-agnostic) estimator of sample variability. Use this in preference to
|
|
|
|
/// `std_dev` if you cannot assume your sample is normally distributed. Note that this is scaled
|
|
|
|
/// by the constant `1.4826` to allow its use as a consistent estimator for the standard
|
|
|
|
/// deviation.
|
|
|
|
///
|
|
|
|
/// See: http://en.wikipedia.org/wiki/Median_absolute_deviation
|
2014-11-17 15:13:56 -05:00
|
|
|
fn median_abs_dev(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Median absolute deviation as a percent of the median. See `median_abs_dev` and `median`.
|
2014-11-17 15:13:56 -05:00
|
|
|
fn median_abs_dev_pct(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Percentile: the value below which `pct` percent of the values in `self` fall. For example,
|
2014-03-06 16:35:12 +09:00
|
|
|
/// percentile(95.0) will return the value `v` such that 95% of the samples `s` in `self`
|
2013-06-30 17:34:23 -07:00
|
|
|
/// satisfy `s <= v`.
|
|
|
|
///
|
|
|
|
/// Calculated by linear interpolation between closest ranks.
|
|
|
|
///
|
|
|
|
/// See: http://en.wikipedia.org/wiki/Percentile
|
2014-11-17 15:13:56 -05:00
|
|
|
fn percentile(&self, pct: T) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Quartiles of the sample: three values that divide the sample into four equal groups, each
|
|
|
|
/// with 1/4 of the data. The middle value is the median. See `median` and `percentile`. This
|
|
|
|
/// function may calculate the 3 quartiles more efficiently than 3 calls to `percentile`, but
|
|
|
|
/// is otherwise equivalent.
|
|
|
|
///
|
|
|
|
/// See also: https://en.wikipedia.org/wiki/Quartile
|
2014-11-17 15:13:56 -05:00
|
|
|
fn quartiles(&self) -> (T,T,T);
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
/// Inter-quartile range: the difference between the 25th percentile (1st quartile) and the 75th
|
|
|
|
/// percentile (3rd quartile). See `quartiles`.
|
|
|
|
///
|
|
|
|
/// See also: https://en.wikipedia.org/wiki/Interquartile_range
|
2014-11-17 15:13:56 -05:00
|
|
|
fn iqr(&self) -> T;
|
2013-06-30 17:34:23 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Extracted collection of all the summary statistics of a sample set.
|
2015-01-03 22:54:18 -05:00
|
|
|
#[derive(Clone, PartialEq)]
|
2014-10-27 15:37:07 -07:00
|
|
|
#[allow(missing_docs)]
|
2014-04-28 17:14:18 +02:00
|
|
|
pub struct Summary<T> {
|
|
|
|
pub sum: T,
|
|
|
|
pub min: T,
|
|
|
|
pub max: T,
|
|
|
|
pub mean: T,
|
|
|
|
pub median: T,
|
|
|
|
pub var: T,
|
|
|
|
pub std_dev: T,
|
|
|
|
pub std_dev_pct: T,
|
|
|
|
pub median_abs_dev: T,
|
|
|
|
pub median_abs_dev_pct: T,
|
|
|
|
pub quartiles: (T,T,T),
|
|
|
|
pub iqr: T,
|
2013-06-30 17:34:23 -07:00
|
|
|
}
|
|
|
|
|
2015-01-05 21:14:50 +11:00
|
|
|
impl<T: Float + FromPrimitive> Summary<T> {
|
2013-07-07 15:43:31 -07:00
|
|
|
/// Construct a new summary of a sample set.
|
2014-04-28 17:14:18 +02:00
|
|
|
pub fn new(samples: &[T]) -> Summary<T> {
|
2013-06-30 17:34:23 -07:00
|
|
|
Summary {
|
|
|
|
sum: samples.sum(),
|
|
|
|
min: samples.min(),
|
|
|
|
max: samples.max(),
|
|
|
|
mean: samples.mean(),
|
|
|
|
median: samples.median(),
|
|
|
|
var: samples.var(),
|
|
|
|
std_dev: samples.std_dev(),
|
|
|
|
std_dev_pct: samples.std_dev_pct(),
|
|
|
|
median_abs_dev: samples.median_abs_dev(),
|
|
|
|
median_abs_dev_pct: samples.median_abs_dev_pct(),
|
|
|
|
quartiles: samples.quartiles(),
|
|
|
|
iqr: samples.iqr()
|
|
|
|
}
|
|
|
|
}
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2015-01-05 21:14:50 +11:00
|
|
|
impl<T: Float + FromPrimitive> Stats<T> for [T] {
|
2013-12-12 01:57:13 +02:00
|
|
|
// FIXME #11059 handle NaN, inf and overflow
|
2014-11-17 15:13:56 -05:00
|
|
|
fn sum(&self) -> T {
|
2014-04-09 20:02:26 +10:00
|
|
|
let mut partials = vec![];
|
2013-12-12 01:57:13 +02:00
|
|
|
|
2015-01-31 12:20:46 -05:00
|
|
|
for &x in self {
|
2014-12-05 15:56:25 -08:00
|
|
|
let mut x = x;
|
2013-12-12 01:57:13 +02:00
|
|
|
let mut j = 0;
|
|
|
|
// This inner loop applies `hi`/`lo` summation to each
|
|
|
|
// partial so that the list of partial sums remains exact.
|
2015-01-26 16:05:07 -05:00
|
|
|
for i in 0..partials.len() {
|
2014-11-09 16:59:28 +11:00
|
|
|
let mut y: T = partials[i];
|
|
|
|
if x.abs() < y.abs() {
|
2014-02-01 04:35:36 +08:00
|
|
|
mem::swap(&mut x, &mut y);
|
2013-12-12 01:57:13 +02:00
|
|
|
}
|
|
|
|
// Rounded `x+y` is stored in `hi` with round-off stored in
|
|
|
|
// `lo`. Together `hi+lo` are exactly equal to `x+y`.
|
|
|
|
let hi = x + y;
|
|
|
|
let lo = y - (hi - x);
|
2014-11-10 09:35:53 +11:00
|
|
|
if lo != Float::zero() {
|
2014-10-23 08:42:21 -07:00
|
|
|
partials[j] = lo;
|
2013-12-12 01:57:13 +02:00
|
|
|
j += 1;
|
|
|
|
}
|
|
|
|
x = hi;
|
|
|
|
}
|
|
|
|
if j >= partials.len() {
|
|
|
|
partials.push(x);
|
|
|
|
} else {
|
2014-10-23 08:42:21 -07:00
|
|
|
partials[j] = x;
|
2013-12-12 01:57:13 +02:00
|
|
|
partials.truncate(j+1);
|
|
|
|
}
|
|
|
|
}
|
2014-11-10 09:35:53 +11:00
|
|
|
let zero: T = Float::zero();
|
2014-04-28 17:14:18 +02:00
|
|
|
partials.iter().fold(zero, |p, q| p + *q)
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn min(&self) -> T {
|
2013-03-28 18:39:09 -07:00
|
|
|
assert!(self.len() != 0);
|
2014-03-05 16:40:26 -05:00
|
|
|
self.iter().fold(self[0], |p, q| p.min(*q))
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn max(&self) -> T {
|
2013-03-28 18:39:09 -07:00
|
|
|
assert!(self.len() != 0);
|
2014-03-05 16:40:26 -05:00
|
|
|
self.iter().fold(self[0], |p, q| p.max(*q))
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn mean(&self) -> T {
|
2013-03-28 18:39:09 -07:00
|
|
|
assert!(self.len() != 0);
|
2014-04-28 17:14:18 +02:00
|
|
|
self.sum() / FromPrimitive::from_uint(self.len()).unwrap()
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn median(&self) -> T {
|
2014-04-28 17:14:18 +02:00
|
|
|
self.percentile(FromPrimitive::from_uint(50).unwrap())
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn var(&self) -> T {
|
2013-06-30 17:34:23 -07:00
|
|
|
if self.len() < 2 {
|
2014-11-10 09:35:53 +11:00
|
|
|
Float::zero()
|
2013-01-15 17:30:35 -08:00
|
|
|
} else {
|
|
|
|
let mean = self.mean();
|
2014-11-10 09:35:53 +11:00
|
|
|
let mut v: T = Float::zero();
|
2015-01-31 12:20:46 -05:00
|
|
|
for s in self {
|
2013-01-15 17:30:35 -08:00
|
|
|
let x = *s - mean;
|
2014-04-28 17:14:18 +02:00
|
|
|
v = v + x*x;
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
2013-06-30 17:34:23 -07:00
|
|
|
// NB: this is _supposed to be_ len-1, not len. If you
|
|
|
|
// change it back to len, you will be calculating a
|
|
|
|
// population variance, not a sample variance.
|
2014-04-28 17:14:18 +02:00
|
|
|
let denom = FromPrimitive::from_uint(self.len()-1).unwrap();
|
|
|
|
v/denom
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn std_dev(&self) -> T {
|
Replaces the free-standing functions in f32, &c.
The free-standing functions in f32, f64, i8, i16, i32, i64, u8, u16,
u32, u64, float, int, and uint are replaced with generic functions in
num instead.
If you were previously using any of those functions, just replace them
with the corresponding function with the same name in num.
Note: If you were using a function that corresponds to an operator, use
the operator instead.
2013-07-08 18:05:17 +02:00
|
|
|
self.var().sqrt()
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn std_dev_pct(&self) -> T {
|
2014-04-28 17:14:18 +02:00
|
|
|
let hundred = FromPrimitive::from_uint(100).unwrap();
|
|
|
|
(self.std_dev() / self.mean()) * hundred
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn median_abs_dev(&self) -> T {
|
2013-01-15 17:30:35 -08:00
|
|
|
let med = self.median();
|
2014-11-09 16:59:28 +11:00
|
|
|
let abs_devs: Vec<T> = self.iter().map(|&v| (med - v).abs()).collect();
|
2013-06-30 17:34:23 -07:00
|
|
|
// This constant is derived by smarter statistics brains than me, but it is
|
|
|
|
// consistent with how R and other packages treat the MAD.
|
2014-04-28 17:14:18 +02:00
|
|
|
let number = FromPrimitive::from_f64(1.4826).unwrap();
|
2014-11-27 15:06:09 -05:00
|
|
|
abs_devs.median() * number
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn median_abs_dev_pct(&self) -> T {
|
2014-04-28 17:14:18 +02:00
|
|
|
let hundred = FromPrimitive::from_uint(100).unwrap();
|
|
|
|
(self.median_abs_dev() / self.median()) * hundred
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|
2013-06-30 17:34:23 -07:00
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn percentile(&self, pct: T) -> T {
|
2014-09-17 12:56:31 -07:00
|
|
|
let mut tmp = self.to_vec();
|
2014-04-28 17:14:18 +02:00
|
|
|
local_sort(tmp.as_mut_slice());
|
2014-03-21 23:17:33 +11:00
|
|
|
percentile_of_sorted(tmp.as_slice(), pct)
|
2013-06-30 17:34:23 -07:00
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn quartiles(&self) -> (T,T,T) {
|
2014-09-17 12:56:31 -07:00
|
|
|
let mut tmp = self.to_vec();
|
2014-04-28 17:14:18 +02:00
|
|
|
local_sort(tmp.as_mut_slice());
|
|
|
|
let first = FromPrimitive::from_uint(25).unwrap();
|
|
|
|
let a = percentile_of_sorted(tmp.as_slice(), first);
|
|
|
|
let secound = FromPrimitive::from_uint(50).unwrap();
|
|
|
|
let b = percentile_of_sorted(tmp.as_slice(), secound);
|
|
|
|
let third = FromPrimitive::from_uint(75).unwrap();
|
|
|
|
let c = percentile_of_sorted(tmp.as_slice(), third);
|
2013-06-30 17:34:23 -07:00
|
|
|
(a,b,c)
|
|
|
|
}
|
|
|
|
|
2014-11-17 15:13:56 -05:00
|
|
|
fn iqr(&self) -> T {
|
2013-06-30 17:34:23 -07:00
|
|
|
let (a,_,c) = self.quartiles();
|
|
|
|
c - a
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Helper function: extract a value representing the `pct` percentile of a sorted sample-set, using
|
|
|
|
// linear interpolation. If samples are not sorted, return nonsensical value.
|
2014-04-28 17:14:18 +02:00
|
|
|
fn percentile_of_sorted<T: Float + FromPrimitive>(sorted_samples: &[T],
|
|
|
|
pct: T) -> T {
|
2013-06-30 17:34:23 -07:00
|
|
|
assert!(sorted_samples.len() != 0);
|
|
|
|
if sorted_samples.len() == 1 {
|
|
|
|
return sorted_samples[0];
|
|
|
|
}
|
2014-11-10 09:35:53 +11:00
|
|
|
let zero: T = Float::zero();
|
2014-04-28 17:14:18 +02:00
|
|
|
assert!(zero <= pct);
|
|
|
|
let hundred = FromPrimitive::from_uint(100).unwrap();
|
|
|
|
assert!(pct <= hundred);
|
|
|
|
if pct == hundred {
|
2013-06-30 17:34:23 -07:00
|
|
|
return sorted_samples[sorted_samples.len() - 1];
|
|
|
|
}
|
2014-04-28 17:14:18 +02:00
|
|
|
let length = FromPrimitive::from_uint(sorted_samples.len() - 1).unwrap();
|
|
|
|
let rank = (pct / hundred) * length;
|
2013-06-30 17:34:23 -07:00
|
|
|
let lrank = rank.floor();
|
|
|
|
let d = rank - lrank;
|
2014-04-28 17:14:18 +02:00
|
|
|
let n = lrank.to_uint().unwrap();
|
2013-06-30 17:34:23 -07:00
|
|
|
let lo = sorted_samples[n];
|
|
|
|
let hi = sorted_samples[n+1];
|
|
|
|
lo + (hi - lo) * d
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Winsorize a set of samples, replacing values above the `100-pct` percentile and below the `pct`
|
|
|
|
/// percentile with those percentiles themselves. This is a way of minimizing the effect of
|
|
|
|
/// outliers, at the cost of biasing the sample. It differs from trimming in that it does not
|
|
|
|
/// change the number of samples, just changes the values of those that are outliers.
|
|
|
|
///
|
|
|
|
/// See: http://en.wikipedia.org/wiki/Winsorising
|
2014-04-28 17:14:18 +02:00
|
|
|
pub fn winsorize<T: Float + FromPrimitive>(samples: &mut [T], pct: T) {
|
2014-09-17 12:56:31 -07:00
|
|
|
let mut tmp = samples.to_vec();
|
2014-04-28 17:14:18 +02:00
|
|
|
local_sort(tmp.as_mut_slice());
|
2014-03-21 23:17:33 +11:00
|
|
|
let lo = percentile_of_sorted(tmp.as_slice(), pct);
|
2014-04-28 17:14:18 +02:00
|
|
|
let hundred: T = FromPrimitive::from_uint(100).unwrap();
|
|
|
|
let hi = percentile_of_sorted(tmp.as_slice(), hundred-pct);
|
2015-01-31 20:02:00 -05:00
|
|
|
for samp in samples {
|
2013-06-30 17:34:23 -07:00
|
|
|
if *samp > hi {
|
|
|
|
*samp = hi
|
|
|
|
} else if *samp < lo {
|
|
|
|
*samp = lo
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-16 15:41:28 +10:00
|
|
|
/// Returns a HashMap with the number of occurrences of every element in the
|
2013-08-07 09:46:09 -07:00
|
|
|
/// sequence that the iterator exposes.
|
2015-01-31 09:17:50 -05:00
|
|
|
pub fn freq_count<T, U>(iter: T) -> hash_map::HashMap<U, uint>
|
std: Stabilize the std::hash module
This commit aims to prepare the `std::hash` module for alpha by formalizing its
current interface whileholding off on adding `#[stable]` to the new APIs. The
current usage with the `HashMap` and `HashSet` types is also reconciled by
separating out composable parts of the design. The primary goal of this slight
redesign is to separate the concepts of a hasher's state from a hashing
algorithm itself.
The primary change of this commit is to separate the `Hasher` trait into a
`Hasher` and a `HashState` trait. Conceptually the old `Hasher` trait was
actually just a factory for various states, but hashing had very little control
over how these states were used. Additionally the old `Hasher` trait was
actually fairly unrelated to hashing.
This commit redesigns the existing `Hasher` trait to match what the notion of a
`Hasher` normally implies with the following definition:
trait Hasher {
type Output;
fn reset(&mut self);
fn finish(&self) -> Output;
}
This `Hasher` trait emphasizes that hashing algorithms may produce outputs other
than a `u64`, so the output type is made generic. Other than that, however, very
little is assumed about a particular hasher. It is left up to implementors to
provide specific methods or trait implementations to feed data into a hasher.
The corresponding `Hash` trait becomes:
trait Hash<H: Hasher> {
fn hash(&self, &mut H);
}
The old default of `SipState` was removed from this trait as it's not something
that we're willing to stabilize until the end of time, but the type parameter is
always required to implement `Hasher`. Note that the type parameter `H` remains
on the trait to enable multidispatch for specialization of hashing for
particular hashers.
Note that `Writer` is not mentioned in either of `Hash` or `Hasher`, it is
simply used as part `derive` and the implementations for all primitive types.
With these definitions, the old `Hasher` trait is realized as a new `HashState`
trait in the `collections::hash_state` module as an unstable addition for
now. The current definition looks like:
trait HashState {
type Hasher: Hasher;
fn hasher(&self) -> Hasher;
}
The purpose of this trait is to emphasize that the one piece of functionality
for implementors is that new instances of `Hasher` can be created. This
conceptually represents the two keys from which more instances of a
`SipHasher` can be created, and a `HashState` is what's stored in a
`HashMap`, not a `Hasher`.
Implementors of custom hash algorithms should implement the `Hasher` trait, and
only hash algorithms intended for use in hash maps need to implement or worry
about the `HashState` trait.
The entire module and `HashState` infrastructure remains `#[unstable]` due to it
being recently redesigned, but some other stability decision made for the
`std::hash` module are:
* The `Writer` trait remains `#[experimental]` as it's intended to be replaced
with an `io::Writer` (more details soon).
* The top-level `hash` function is `#[unstable]` as it is intended to be generic
over the hashing algorithm instead of hardwired to `SipHasher`
* The inner `sip` module is now private as its one export, `SipHasher` is
reexported in the `hash` module.
And finally, a few changes were made to the default parameters on `HashMap`.
* The `RandomSipHasher` default type parameter was renamed to `RandomState`.
This renaming emphasizes that it is not a hasher, but rather just state to
generate hashers. It also moves away from the name "sip" as it may not always
be implemented as `SipHasher`. This type lives in the
`std::collections::hash_map` module as `#[unstable]`
* The associated `Hasher` type of `RandomState` is creatively called...
`Hasher`! This concrete structure lives next to `RandomState` as an
implemenation of the "default hashing algorithm" used for a `HashMap`. Under
the hood this is currently implemented as `SipHasher`, but it draws an
explicit interface for now and allows us to modify the implementation over
time if necessary.
There are many breaking changes outlined above, and as a result this commit is
a:
[breaking-change]
2014-12-09 12:37:23 -08:00
|
|
|
where T: Iterator<Item=U>, U: Eq + Clone + Hash<Hasher>
|
2015-01-04 14:07:32 -05:00
|
|
|
{
|
2014-10-30 21:25:08 -04:00
|
|
|
let mut map: hash_map::HashMap<U,uint> = hash_map::HashMap::new();
|
2013-08-07 09:46:09 -07:00
|
|
|
for elem in iter {
|
2015-01-06 16:36:30 +00:00
|
|
|
match map.entry(elem) {
|
2014-09-18 17:05:52 -04:00
|
|
|
Occupied(mut entry) => { *entry.get_mut() += 1; },
|
2015-01-04 14:07:32 -05:00
|
|
|
Vacant(entry) => { entry.insert(1); },
|
2014-09-18 17:05:52 -04:00
|
|
|
}
|
2013-08-07 09:46:09 -07:00
|
|
|
}
|
|
|
|
map
|
|
|
|
}
|
|
|
|
|
2013-06-30 17:34:23 -07:00
|
|
|
// Test vectors generated from R, using the script src/etc/stat-test-vectors.r.
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use stats::Stats;
|
|
|
|
use stats::Summary;
|
2015-01-22 16:31:00 -08:00
|
|
|
use std::old_io;
|
2014-03-05 16:40:26 -05:00
|
|
|
use std::f64;
|
2013-06-30 17:34:23 -07:00
|
|
|
|
2014-11-14 09:18:10 -08:00
|
|
|
macro_rules! assert_approx_eq {
|
2014-01-08 22:57:31 +11:00
|
|
|
($a:expr, $b:expr) => ({
|
2014-11-13 00:02:42 +11:00
|
|
|
use std::num::Float;
|
2014-01-08 22:57:31 +11:00
|
|
|
let (a, b) = (&$a, &$b);
|
|
|
|
assert!((*a - *b).abs() < 1.0e-6,
|
|
|
|
"{} is not approximately equal to {}", *a, *b);
|
|
|
|
})
|
2014-11-14 09:18:10 -08:00
|
|
|
}
|
2014-01-08 22:57:31 +11:00
|
|
|
|
2014-04-28 17:14:18 +02:00
|
|
|
fn check(samples: &[f64], summ: &Summary<f64>) {
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
let summ2 = Summary::new(samples);
|
|
|
|
|
2015-01-22 16:31:00 -08:00
|
|
|
let mut w = old_io::stdout();
|
2014-12-05 09:04:55 -08:00
|
|
|
let w = &mut w;
|
2014-01-30 14:28:20 -08:00
|
|
|
(write!(w, "\n")).unwrap();
|
2013-06-30 17:34:23 -07:00
|
|
|
|
|
|
|
assert_eq!(summ.sum, summ2.sum);
|
|
|
|
assert_eq!(summ.min, summ2.min);
|
|
|
|
assert_eq!(summ.max, summ2.max);
|
|
|
|
assert_eq!(summ.mean, summ2.mean);
|
|
|
|
assert_eq!(summ.median, summ2.median);
|
|
|
|
|
|
|
|
// We needed a few more digits to get exact equality on these
|
|
|
|
// but they're within float epsilon, which is 1.0e-6.
|
|
|
|
assert_approx_eq!(summ.var, summ2.var);
|
|
|
|
assert_approx_eq!(summ.std_dev, summ2.std_dev);
|
|
|
|
assert_approx_eq!(summ.std_dev_pct, summ2.std_dev_pct);
|
|
|
|
assert_approx_eq!(summ.median_abs_dev, summ2.median_abs_dev);
|
|
|
|
assert_approx_eq!(summ.median_abs_dev_pct, summ2.median_abs_dev_pct);
|
|
|
|
|
|
|
|
assert_eq!(summ.quartiles, summ2.quartiles);
|
|
|
|
assert_eq!(summ.iqr, summ2.iqr);
|
|
|
|
}
|
|
|
|
|
2014-03-05 16:40:26 -05:00
|
|
|
#[test]
|
|
|
|
fn test_min_max_nan() {
|
|
|
|
let xs = &[1.0, 2.0, f64::NAN, 3.0, 4.0];
|
|
|
|
let summary = Summary::new(xs);
|
|
|
|
assert_eq!(summary.min, 1.0);
|
|
|
|
assert_eq!(summary.max, 4.0);
|
|
|
|
}
|
|
|
|
|
2013-06-30 17:34:23 -07:00
|
|
|
#[test]
|
|
|
|
fn test_norm2() {
|
|
|
|
let val = &[
|
|
|
|
958.0000000000,
|
|
|
|
924.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 1882.0000000000,
|
|
|
|
min: 924.0000000000,
|
|
|
|
max: 958.0000000000,
|
|
|
|
mean: 941.0000000000,
|
|
|
|
median: 941.0000000000,
|
|
|
|
var: 578.0000000000,
|
|
|
|
std_dev: 24.0416305603,
|
|
|
|
std_dev_pct: 2.5549022912,
|
|
|
|
median_abs_dev: 25.2042000000,
|
|
|
|
median_abs_dev_pct: 2.6784484591,
|
|
|
|
quartiles: (932.5000000000,941.0000000000,949.5000000000),
|
|
|
|
iqr: 17.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_norm10narrow() {
|
|
|
|
let val = &[
|
|
|
|
966.0000000000,
|
|
|
|
985.0000000000,
|
|
|
|
1110.0000000000,
|
|
|
|
848.0000000000,
|
|
|
|
821.0000000000,
|
|
|
|
975.0000000000,
|
|
|
|
962.0000000000,
|
|
|
|
1157.0000000000,
|
|
|
|
1217.0000000000,
|
|
|
|
955.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 9996.0000000000,
|
|
|
|
min: 821.0000000000,
|
|
|
|
max: 1217.0000000000,
|
|
|
|
mean: 999.6000000000,
|
|
|
|
median: 970.5000000000,
|
|
|
|
var: 16050.7111111111,
|
|
|
|
std_dev: 126.6914010938,
|
|
|
|
std_dev_pct: 12.6742097933,
|
|
|
|
median_abs_dev: 102.2994000000,
|
|
|
|
median_abs_dev_pct: 10.5408964451,
|
|
|
|
quartiles: (956.7500000000,970.5000000000,1078.7500000000),
|
|
|
|
iqr: 122.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_norm10medium() {
|
|
|
|
let val = &[
|
|
|
|
954.0000000000,
|
|
|
|
1064.0000000000,
|
|
|
|
855.0000000000,
|
|
|
|
1000.0000000000,
|
|
|
|
743.0000000000,
|
|
|
|
1084.0000000000,
|
|
|
|
704.0000000000,
|
|
|
|
1023.0000000000,
|
|
|
|
357.0000000000,
|
|
|
|
869.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 8653.0000000000,
|
|
|
|
min: 357.0000000000,
|
|
|
|
max: 1084.0000000000,
|
|
|
|
mean: 865.3000000000,
|
|
|
|
median: 911.5000000000,
|
|
|
|
var: 48628.4555555556,
|
|
|
|
std_dev: 220.5186059170,
|
|
|
|
std_dev_pct: 25.4846418487,
|
|
|
|
median_abs_dev: 195.7032000000,
|
|
|
|
median_abs_dev_pct: 21.4704552935,
|
|
|
|
quartiles: (771.0000000000,911.5000000000,1017.2500000000),
|
|
|
|
iqr: 246.2500000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_norm10wide() {
|
|
|
|
let val = &[
|
|
|
|
505.0000000000,
|
|
|
|
497.0000000000,
|
|
|
|
1591.0000000000,
|
|
|
|
887.0000000000,
|
|
|
|
1026.0000000000,
|
|
|
|
136.0000000000,
|
|
|
|
1580.0000000000,
|
|
|
|
940.0000000000,
|
|
|
|
754.0000000000,
|
|
|
|
1433.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 9349.0000000000,
|
|
|
|
min: 136.0000000000,
|
|
|
|
max: 1591.0000000000,
|
|
|
|
mean: 934.9000000000,
|
|
|
|
median: 913.5000000000,
|
|
|
|
var: 239208.9888888889,
|
|
|
|
std_dev: 489.0899599142,
|
|
|
|
std_dev_pct: 52.3146817750,
|
|
|
|
median_abs_dev: 611.5725000000,
|
|
|
|
median_abs_dev_pct: 66.9482758621,
|
|
|
|
quartiles: (567.2500000000,913.5000000000,1331.2500000000),
|
|
|
|
iqr: 764.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_norm25verynarrow() {
|
|
|
|
let val = &[
|
|
|
|
991.0000000000,
|
|
|
|
1018.0000000000,
|
|
|
|
998.0000000000,
|
|
|
|
1013.0000000000,
|
|
|
|
974.0000000000,
|
|
|
|
1007.0000000000,
|
|
|
|
1014.0000000000,
|
|
|
|
999.0000000000,
|
|
|
|
1011.0000000000,
|
|
|
|
978.0000000000,
|
|
|
|
985.0000000000,
|
|
|
|
999.0000000000,
|
|
|
|
983.0000000000,
|
|
|
|
982.0000000000,
|
|
|
|
1015.0000000000,
|
|
|
|
1002.0000000000,
|
|
|
|
977.0000000000,
|
|
|
|
948.0000000000,
|
|
|
|
1040.0000000000,
|
|
|
|
974.0000000000,
|
|
|
|
996.0000000000,
|
|
|
|
989.0000000000,
|
|
|
|
1015.0000000000,
|
|
|
|
994.0000000000,
|
|
|
|
1024.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 24926.0000000000,
|
|
|
|
min: 948.0000000000,
|
|
|
|
max: 1040.0000000000,
|
|
|
|
mean: 997.0400000000,
|
|
|
|
median: 998.0000000000,
|
|
|
|
var: 393.2066666667,
|
|
|
|
std_dev: 19.8294393937,
|
|
|
|
std_dev_pct: 1.9888308788,
|
|
|
|
median_abs_dev: 22.2390000000,
|
|
|
|
median_abs_dev_pct: 2.2283567134,
|
|
|
|
quartiles: (983.0000000000,998.0000000000,1013.0000000000),
|
|
|
|
iqr: 30.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_exp10a() {
|
|
|
|
let val = &[
|
|
|
|
23.0000000000,
|
|
|
|
11.0000000000,
|
|
|
|
2.0000000000,
|
|
|
|
57.0000000000,
|
|
|
|
4.0000000000,
|
|
|
|
12.0000000000,
|
|
|
|
5.0000000000,
|
|
|
|
29.0000000000,
|
|
|
|
3.0000000000,
|
|
|
|
21.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 167.0000000000,
|
|
|
|
min: 2.0000000000,
|
|
|
|
max: 57.0000000000,
|
|
|
|
mean: 16.7000000000,
|
|
|
|
median: 11.5000000000,
|
|
|
|
var: 287.7888888889,
|
|
|
|
std_dev: 16.9643416875,
|
|
|
|
std_dev_pct: 101.5828843560,
|
|
|
|
median_abs_dev: 13.3434000000,
|
|
|
|
median_abs_dev_pct: 116.0295652174,
|
|
|
|
quartiles: (4.2500000000,11.5000000000,22.5000000000),
|
|
|
|
iqr: 18.2500000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_exp10b() {
|
|
|
|
let val = &[
|
|
|
|
24.0000000000,
|
|
|
|
17.0000000000,
|
|
|
|
6.0000000000,
|
|
|
|
38.0000000000,
|
|
|
|
25.0000000000,
|
|
|
|
7.0000000000,
|
|
|
|
51.0000000000,
|
|
|
|
2.0000000000,
|
|
|
|
61.0000000000,
|
|
|
|
32.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 263.0000000000,
|
|
|
|
min: 2.0000000000,
|
|
|
|
max: 61.0000000000,
|
|
|
|
mean: 26.3000000000,
|
|
|
|
median: 24.5000000000,
|
|
|
|
var: 383.5666666667,
|
|
|
|
std_dev: 19.5848580967,
|
|
|
|
std_dev_pct: 74.4671410520,
|
|
|
|
median_abs_dev: 22.9803000000,
|
|
|
|
median_abs_dev_pct: 93.7971428571,
|
|
|
|
quartiles: (9.5000000000,24.5000000000,36.5000000000),
|
|
|
|
iqr: 27.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_exp10c() {
|
|
|
|
let val = &[
|
|
|
|
71.0000000000,
|
|
|
|
2.0000000000,
|
|
|
|
32.0000000000,
|
|
|
|
1.0000000000,
|
|
|
|
6.0000000000,
|
|
|
|
28.0000000000,
|
|
|
|
13.0000000000,
|
|
|
|
37.0000000000,
|
|
|
|
16.0000000000,
|
|
|
|
36.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 242.0000000000,
|
|
|
|
min: 1.0000000000,
|
|
|
|
max: 71.0000000000,
|
|
|
|
mean: 24.2000000000,
|
|
|
|
median: 22.0000000000,
|
|
|
|
var: 458.1777777778,
|
|
|
|
std_dev: 21.4050876611,
|
|
|
|
std_dev_pct: 88.4507754589,
|
|
|
|
median_abs_dev: 21.4977000000,
|
|
|
|
median_abs_dev_pct: 97.7168181818,
|
|
|
|
quartiles: (7.7500000000,22.0000000000,35.0000000000),
|
|
|
|
iqr: 27.2500000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_exp25() {
|
|
|
|
let val = &[
|
|
|
|
3.0000000000,
|
|
|
|
24.0000000000,
|
|
|
|
1.0000000000,
|
|
|
|
19.0000000000,
|
|
|
|
7.0000000000,
|
|
|
|
5.0000000000,
|
|
|
|
30.0000000000,
|
|
|
|
39.0000000000,
|
|
|
|
31.0000000000,
|
|
|
|
13.0000000000,
|
|
|
|
25.0000000000,
|
|
|
|
48.0000000000,
|
|
|
|
1.0000000000,
|
|
|
|
6.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
63.0000000000,
|
|
|
|
2.0000000000,
|
|
|
|
12.0000000000,
|
|
|
|
108.0000000000,
|
|
|
|
26.0000000000,
|
|
|
|
1.0000000000,
|
|
|
|
7.0000000000,
|
|
|
|
44.0000000000,
|
|
|
|
25.0000000000,
|
|
|
|
11.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 593.0000000000,
|
|
|
|
min: 1.0000000000,
|
|
|
|
max: 108.0000000000,
|
|
|
|
mean: 23.7200000000,
|
|
|
|
median: 19.0000000000,
|
|
|
|
var: 601.0433333333,
|
|
|
|
std_dev: 24.5161851301,
|
|
|
|
std_dev_pct: 103.3565983562,
|
|
|
|
median_abs_dev: 19.2738000000,
|
|
|
|
median_abs_dev_pct: 101.4410526316,
|
|
|
|
quartiles: (6.0000000000,19.0000000000,31.0000000000),
|
|
|
|
iqr: 25.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_binom25() {
|
|
|
|
let val = &[
|
|
|
|
18.0000000000,
|
|
|
|
17.0000000000,
|
|
|
|
27.0000000000,
|
|
|
|
15.0000000000,
|
|
|
|
21.0000000000,
|
|
|
|
25.0000000000,
|
|
|
|
17.0000000000,
|
|
|
|
24.0000000000,
|
|
|
|
25.0000000000,
|
|
|
|
24.0000000000,
|
|
|
|
26.0000000000,
|
|
|
|
26.0000000000,
|
|
|
|
23.0000000000,
|
|
|
|
15.0000000000,
|
|
|
|
23.0000000000,
|
|
|
|
17.0000000000,
|
|
|
|
18.0000000000,
|
|
|
|
18.0000000000,
|
|
|
|
21.0000000000,
|
|
|
|
16.0000000000,
|
|
|
|
15.0000000000,
|
|
|
|
31.0000000000,
|
|
|
|
20.0000000000,
|
|
|
|
17.0000000000,
|
|
|
|
15.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 514.0000000000,
|
|
|
|
min: 15.0000000000,
|
|
|
|
max: 31.0000000000,
|
|
|
|
mean: 20.5600000000,
|
|
|
|
median: 20.0000000000,
|
|
|
|
var: 20.8400000000,
|
|
|
|
std_dev: 4.5650848842,
|
|
|
|
std_dev_pct: 22.2037202539,
|
|
|
|
median_abs_dev: 5.9304000000,
|
|
|
|
median_abs_dev_pct: 29.6520000000,
|
|
|
|
quartiles: (17.0000000000,20.0000000000,24.0000000000),
|
|
|
|
iqr: 7.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_pois25lambda30() {
|
|
|
|
let val = &[
|
|
|
|
27.0000000000,
|
|
|
|
33.0000000000,
|
|
|
|
34.0000000000,
|
|
|
|
34.0000000000,
|
|
|
|
24.0000000000,
|
|
|
|
39.0000000000,
|
|
|
|
28.0000000000,
|
|
|
|
27.0000000000,
|
|
|
|
31.0000000000,
|
|
|
|
28.0000000000,
|
|
|
|
38.0000000000,
|
|
|
|
21.0000000000,
|
|
|
|
33.0000000000,
|
|
|
|
36.0000000000,
|
|
|
|
29.0000000000,
|
|
|
|
37.0000000000,
|
|
|
|
32.0000000000,
|
|
|
|
34.0000000000,
|
|
|
|
31.0000000000,
|
|
|
|
39.0000000000,
|
|
|
|
25.0000000000,
|
|
|
|
31.0000000000,
|
|
|
|
32.0000000000,
|
|
|
|
40.0000000000,
|
|
|
|
24.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 787.0000000000,
|
|
|
|
min: 21.0000000000,
|
|
|
|
max: 40.0000000000,
|
|
|
|
mean: 31.4800000000,
|
|
|
|
median: 32.0000000000,
|
|
|
|
var: 26.5933333333,
|
|
|
|
std_dev: 5.1568724372,
|
|
|
|
std_dev_pct: 16.3814245145,
|
|
|
|
median_abs_dev: 5.9304000000,
|
|
|
|
median_abs_dev_pct: 18.5325000000,
|
|
|
|
quartiles: (28.0000000000,32.0000000000,34.0000000000),
|
|
|
|
iqr: 6.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_pois25lambda40() {
|
|
|
|
let val = &[
|
|
|
|
42.0000000000,
|
|
|
|
50.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
46.0000000000,
|
|
|
|
34.0000000000,
|
|
|
|
45.0000000000,
|
|
|
|
34.0000000000,
|
|
|
|
49.0000000000,
|
|
|
|
39.0000000000,
|
|
|
|
28.0000000000,
|
|
|
|
40.0000000000,
|
|
|
|
35.0000000000,
|
|
|
|
37.0000000000,
|
|
|
|
39.0000000000,
|
|
|
|
46.0000000000,
|
|
|
|
44.0000000000,
|
|
|
|
32.0000000000,
|
|
|
|
45.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
37.0000000000,
|
|
|
|
48.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
33.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
48.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 1019.0000000000,
|
|
|
|
min: 28.0000000000,
|
|
|
|
max: 50.0000000000,
|
|
|
|
mean: 40.7600000000,
|
|
|
|
median: 42.0000000000,
|
|
|
|
var: 34.4400000000,
|
|
|
|
std_dev: 5.8685603004,
|
|
|
|
std_dev_pct: 14.3978417577,
|
|
|
|
median_abs_dev: 5.9304000000,
|
|
|
|
median_abs_dev_pct: 14.1200000000,
|
|
|
|
quartiles: (37.0000000000,42.0000000000,45.0000000000),
|
|
|
|
iqr: 8.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_pois25lambda50() {
|
|
|
|
let val = &[
|
|
|
|
45.0000000000,
|
|
|
|
43.0000000000,
|
|
|
|
44.0000000000,
|
|
|
|
61.0000000000,
|
|
|
|
51.0000000000,
|
|
|
|
53.0000000000,
|
|
|
|
59.0000000000,
|
|
|
|
52.0000000000,
|
|
|
|
49.0000000000,
|
|
|
|
51.0000000000,
|
|
|
|
51.0000000000,
|
|
|
|
50.0000000000,
|
|
|
|
49.0000000000,
|
|
|
|
56.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
52.0000000000,
|
|
|
|
51.0000000000,
|
|
|
|
43.0000000000,
|
|
|
|
48.0000000000,
|
|
|
|
48.0000000000,
|
|
|
|
50.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
43.0000000000,
|
|
|
|
42.0000000000,
|
|
|
|
60.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 1235.0000000000,
|
|
|
|
min: 42.0000000000,
|
|
|
|
max: 61.0000000000,
|
|
|
|
mean: 49.4000000000,
|
|
|
|
median: 50.0000000000,
|
|
|
|
var: 31.6666666667,
|
|
|
|
std_dev: 5.6273143387,
|
|
|
|
std_dev_pct: 11.3913245723,
|
|
|
|
median_abs_dev: 4.4478000000,
|
|
|
|
median_abs_dev_pct: 8.8956000000,
|
|
|
|
quartiles: (44.0000000000,50.0000000000,52.0000000000),
|
|
|
|
iqr: 8.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_unif25() {
|
|
|
|
let val = &[
|
|
|
|
99.0000000000,
|
|
|
|
55.0000000000,
|
|
|
|
92.0000000000,
|
|
|
|
79.0000000000,
|
|
|
|
14.0000000000,
|
|
|
|
2.0000000000,
|
|
|
|
33.0000000000,
|
|
|
|
49.0000000000,
|
|
|
|
3.0000000000,
|
|
|
|
32.0000000000,
|
|
|
|
84.0000000000,
|
|
|
|
59.0000000000,
|
|
|
|
22.0000000000,
|
|
|
|
86.0000000000,
|
|
|
|
76.0000000000,
|
|
|
|
31.0000000000,
|
|
|
|
29.0000000000,
|
|
|
|
11.0000000000,
|
|
|
|
41.0000000000,
|
|
|
|
53.0000000000,
|
|
|
|
45.0000000000,
|
|
|
|
44.0000000000,
|
|
|
|
98.0000000000,
|
|
|
|
98.0000000000,
|
|
|
|
7.0000000000,
|
|
|
|
];
|
|
|
|
let summ = &Summary {
|
|
|
|
sum: 1242.0000000000,
|
|
|
|
min: 2.0000000000,
|
|
|
|
max: 99.0000000000,
|
|
|
|
mean: 49.6800000000,
|
|
|
|
median: 45.0000000000,
|
|
|
|
var: 1015.6433333333,
|
|
|
|
std_dev: 31.8691595957,
|
|
|
|
std_dev_pct: 64.1488719719,
|
|
|
|
median_abs_dev: 45.9606000000,
|
|
|
|
median_abs_dev_pct: 102.1346666667,
|
|
|
|
quartiles: (29.0000000000,45.0000000000,79.0000000000),
|
|
|
|
iqr: 50.0000000000,
|
|
|
|
};
|
|
|
|
check(val, summ);
|
|
|
|
}
|
2013-08-12 00:36:24 +10:00
|
|
|
|
2013-12-12 01:57:13 +02:00
|
|
|
#[test]
|
|
|
|
fn test_sum_f64s() {
|
2014-11-10 16:26:10 +11:00
|
|
|
assert_eq!([0.5f64, 3.2321f64, 1.5678f64].sum(), 5.2999);
|
2013-12-12 01:57:13 +02:00
|
|
|
}
|
|
|
|
#[test]
|
|
|
|
fn test_sum_f64_between_ints_that_sum_to_0() {
|
2014-11-10 16:26:10 +11:00
|
|
|
assert_eq!([1e30f64, 1.2f64, -1e30f64].sum(), 1.2);
|
2013-12-12 01:57:13 +02:00
|
|
|
}
|
|
|
|
}
|
2013-08-12 00:36:24 +10:00
|
|
|
|
2013-12-12 01:57:13 +02:00
|
|
|
#[cfg(test)]
|
|
|
|
mod bench {
|
2014-04-01 09:16:35 +08:00
|
|
|
use Bencher;
|
2014-01-06 22:33:50 -08:00
|
|
|
use stats::Stats;
|
2013-12-12 01:57:13 +02:00
|
|
|
|
|
|
|
#[bench]
|
2014-04-01 09:16:35 +08:00
|
|
|
pub fn sum_three_items(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
2014-11-10 16:26:10 +11:00
|
|
|
[1e20f64, 1.5f64, -1e20f64].sum();
|
2013-12-12 01:57:13 +02:00
|
|
|
})
|
|
|
|
}
|
|
|
|
#[bench]
|
2014-04-01 09:16:35 +08:00
|
|
|
pub fn sum_many_f64(b: &mut Bencher) {
|
2014-04-21 17:58:52 -04:00
|
|
|
let nums = [-1e30f64, 1e60, 1e30, 1.0, -1e60];
|
2015-01-26 19:08:22 -05:00
|
|
|
let v = (0us..500).map(|i| nums[i%5]).collect::<Vec<_>>();
|
2013-08-12 00:36:24 +10:00
|
|
|
|
2014-04-01 09:16:35 +08:00
|
|
|
b.iter(|| {
|
2015-01-26 21:21:15 -05:00
|
|
|
v.sum();
|
2013-12-12 01:57:13 +02:00
|
|
|
})
|
|
|
|
}
|
2013-01-15 17:30:35 -08:00
|
|
|
}
|