rust/src/test/bench/shootout-k-nucleotide.rs

319 lines
8.6 KiB
Rust
Raw Normal View History

// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
2014-02-05 16:33:10 -06:00
//
// contributed by the Rust Project Developers
// Copyright (c) 2014 The Rust Project Developers
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// - Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in
// the documentation and/or other materials provided with the
// distribution.
//
// - Neither the name of "The Computer Language Benchmarks Game" nor
// the name of "The Computer Language Shootout Benchmarks" nor the
// names of its contributors may be used to endorse or promote
// products derived from this software without specific prior
// written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.
2014-02-05 16:33:10 -06:00
// ignore-android see #10393 #13206
2014-12-26 01:17:30 +01:00
use std::ascii::OwnedAsciiExt;
use std::iter::repeat;
use std::slice;
use std::sync::Arc;
use std::thread::Thread;
2013-04-17 18:59:54 -07:00
static TABLE: [u8;4] = [ 'A' as u8, 'C' as u8, 'G' as u8, 'T' as u8 ];
2013-04-17 18:59:54 -07:00
static TABLE_SIZE: uint = 2 << 16;
static OCCURRENCES: [&'static str;5] = [
2013-04-17 18:59:54 -07:00
"GGT",
"GGTA",
"GGTATT",
"GGTATTTTAATT",
"GGTATTTTAATTTATAGT",
];
// Code implementation
#[derive(PartialEq, PartialOrd, Ord, Eq)]
2013-04-17 18:59:54 -07:00
struct Code(u64);
impl Copy for Code {}
2013-04-17 18:59:54 -07:00
impl Code {
fn hash(&self) -> u64 {
let Code(ret) = *self;
return ret;
2013-04-17 18:59:54 -07:00
}
fn push_char(&self, c: u8) -> Code {
Code((self.hash() << 2) + (pack_symbol(c) as u64))
2013-04-17 18:59:54 -07:00
}
fn rotate(&self, c: u8, frame: uint) -> Code {
Code(self.push_char(c).hash() & ((1u64 << (2 * frame)) - 1))
2013-04-17 18:59:54 -07:00
}
fn pack(string: &str) -> Code {
string.bytes().fold(Code(0u64), |a, b| a.push_char(b))
2013-04-17 18:59:54 -07:00
}
fn unpack(&self, frame: uint) -> String {
let mut key = self.hash();
let mut result = Vec::new();
for _ in range(0, frame) {
2013-04-17 18:59:54 -07:00
result.push(unpack_symbol((key as u8) & 3));
key >>= 2;
}
result.reverse();
String::from_utf8(result).unwrap()
2013-04-17 18:59:54 -07:00
}
}
// Hash table implementation
trait TableCallback {
fn f(&self, entry: &mut Entry);
}
struct BumpCallback;
impl TableCallback for BumpCallback {
fn f(&self, entry: &mut Entry) {
entry.count += 1;
}
}
struct PrintCallback(&'static str);
impl TableCallback for PrintCallback {
fn f(&self, entry: &mut Entry) {
let PrintCallback(s) = *self;
println!("{}\t{}", entry.count as int, s);
2013-04-17 18:59:54 -07:00
}
}
struct Entry {
code: Code,
count: uint,
next: Option<Box<Entry>>,
2013-04-17 18:59:54 -07:00
}
struct Table {
2014-10-11 01:46:59 +02:00
items: Vec<Option<Box<Entry>>>
}
struct Items<'a> {
cur: Option<&'a Entry>,
items: slice::Iter<'a, Option<Box<Entry>>>,
2013-04-17 18:59:54 -07:00
}
impl Table {
fn new() -> Table {
Table {
items: range(0, TABLE_SIZE).map(|_| None).collect()
2013-04-17 18:59:54 -07:00
}
}
fn search_remainder<C:TableCallback>(item: &mut Entry, key: Code, c: C) {
match item.next {
None => {
let mut entry = box Entry {
2013-04-17 18:59:54 -07:00
code: key,
count: 0,
next: None,
};
c.f(&mut *entry);
2013-04-17 18:59:54 -07:00
item.next = Some(entry);
}
Some(ref mut entry) => {
if entry.code == key {
c.f(&mut **entry);
2013-04-17 18:59:54 -07:00
return;
}
Table::search_remainder(&mut **entry, key, c)
2013-04-17 18:59:54 -07:00
}
}
}
fn lookup<C:TableCallback>(&mut self, key: Code, c: C) {
let index = key.hash() % (TABLE_SIZE as u64);
2013-04-17 18:59:54 -07:00
{
2014-10-11 01:46:59 +02:00
if self.items[index as uint].is_none() {
let mut entry = box Entry {
2013-04-17 18:59:54 -07:00
code: key,
count: 0,
next: None,
};
c.f(&mut *entry);
2014-11-06 12:25:16 -05:00
self.items[index as uint] = Some(entry);
2013-04-17 18:59:54 -07:00
return;
}
}
{
2014-11-06 12:25:16 -05:00
let entry = self.items[index as uint].as_mut().unwrap();
2013-04-17 18:59:54 -07:00
if entry.code == key {
c.f(&mut **entry);
2013-04-17 18:59:54 -07:00
return;
}
Table::search_remainder(&mut **entry, key, c)
2013-04-17 18:59:54 -07:00
}
}
fn iter(&self) -> Items {
Items { cur: None, items: self.items.iter() }
}
}
2015-01-02 14:54:01 -05:00
impl<'a> Iterator for Items<'a> {
type Item = &'a Entry;
fn next(&mut self) -> Option<&'a Entry> {
let ret = match self.cur {
None => {
let i;
loop {
match self.items.next() {
None => return None,
Some(&None) => {}
Some(&Some(ref a)) => { i = &**a; break }
2013-04-17 18:59:54 -07:00
}
}
self.cur = Some(&*i);
&*i
}
Some(c) => c
};
match ret.next {
None => { self.cur = None; }
Some(ref next) => { self.cur = Some(&**next); }
2013-04-17 18:59:54 -07:00
}
return Some(ret);
2013-04-17 18:59:54 -07:00
}
}
// Main program
fn pack_symbol(c: u8) -> u8 {
match c as char {
'A' => 0,
'C' => 1,
'G' => 2,
'T' => 3,
_ => panic!("{}", c as char),
2013-04-17 18:59:54 -07:00
}
}
fn unpack_symbol(c: u8) -> u8 {
2014-04-01 20:39:26 -07:00
TABLE[c as uint]
2013-04-17 18:59:54 -07:00
}
fn generate_frequencies(mut input: &[u8], frame: uint) -> Table {
let mut frequencies = Table::new();
if input.len() < frame { return frequencies; }
2013-04-17 18:59:54 -07:00
let mut code = Code(0);
2013-04-17 18:59:54 -07:00
// Pull first frame.
for _ in range(0, frame) {
2013-04-17 18:59:54 -07:00
code = code.push_char(input[0]);
2015-01-03 13:34:13 +13:00
input = &input[1..];
2013-04-17 18:59:54 -07:00
}
frequencies.lookup(code, BumpCallback);
while input.len() != 0 && input[0] != ('>' as u8) {
code = code.rotate(input[0], frame);
frequencies.lookup(code, BumpCallback);
2015-01-03 13:34:13 +13:00
input = &input[1..];
2013-04-17 18:59:54 -07:00
}
frequencies
2013-04-17 18:59:54 -07:00
}
fn print_frequencies(frequencies: &Table, frame: uint) {
let mut vector = Vec::new();
for entry in frequencies.iter() {
vector.push((entry.count, entry.code));
2013-04-17 18:59:54 -07:00
}
vector.as_mut_slice().sort();
2013-04-17 18:59:54 -07:00
let mut total_count = 0;
for &(count, _) in vector.iter() {
2013-04-17 18:59:54 -07:00
total_count += count;
}
for &(count, key) in vector.iter().rev() {
println!("{} {:.3}",
key.unpack(frame).as_slice(),
(count as f32 * 100.0) / (total_count as f32));
2013-04-17 18:59:54 -07:00
}
println!("");
2013-04-17 18:59:54 -07:00
}
fn print_occurrences(frequencies: &mut Table, occurrence: &'static str) {
frequencies.lookup(Code::pack(occurrence), PrintCallback(occurrence))
}
fn get_sequence<R: Buffer>(r: &mut R, key: &str) -> Vec<u8> {
let mut res = Vec::new();
for l in r.lines().map(|l| l.ok().unwrap())
.skip_while(|l| key != l.as_slice().slice_to(key.len())).skip(1)
{
res.push_all(l.as_slice().trim().as_bytes());
}
2014-12-26 01:17:30 +01:00
res.into_ascii_uppercase()
}
2013-04-17 18:59:54 -07:00
fn main() {
let input = if std::os::getenv("RUST_BENCH").is_some() {
let fd = std::io::File::open(&Path::new("shootout-k-nucleotide.data"));
get_sequence(&mut std::io::BufferedReader::new(fd), ">THREE")
} else {
get_sequence(&mut *std::io::stdin().lock(), ">THREE")
};
let input = Arc::new(input);
let nb_freqs: Vec<_> = range(1u, 3).map(|i| {
let input = input.clone();
2015-01-05 21:59:45 -08:00
(i, Thread::scoped(move|| generate_frequencies(input.as_slice(), i)))
}).collect();
let occ_freqs: Vec<_> = OCCURRENCES.iter().map(|&occ| {
let input = input.clone();
2015-01-05 21:59:45 -08:00
Thread::scoped(move|| generate_frequencies(input.as_slice(), occ.len()))
}).collect();
2014-09-14 20:27:36 -07:00
for (i, freq) in nb_freqs.into_iter() {
print_frequencies(&freq.join().ok().unwrap(), i);
}
2014-09-14 20:27:36 -07:00
for (&occ, freq) in OCCURRENCES.iter().zip(occ_freqs.into_iter()) {
print_occurrences(&mut freq.join().ok().unwrap(), occ);
2013-04-17 18:59:54 -07:00
}
}