rust/src/test/bench/shootout-k-nucleotide.rs

284 lines
6.9 KiB
Rust
Raw Normal View History

2014-02-05 16:33:10 -06:00
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// ignore-pretty
use std::str;
use std::vec;
2013-04-17 18:59:54 -07:00
static TABLE: [u8, ..4] = [ 'A' as u8, 'C' as u8, 'G' as u8, 'T' as u8 ];
static TABLE_SIZE: uint = 2 << 16;
static OCCURRENCES: [&'static str, ..5] = [
"GGT",
"GGTA",
"GGTATT",
"GGTATTTTAATT",
"GGTATTTTAATTTATAGT",
];
// Code implementation
#[deriving(Eq, TotalOrd, TotalEq)]
2013-04-17 18:59:54 -07:00
struct Code(u64);
impl Code {
fn hash(&self) -> u64 {
let Code(ret) = *self;
return ret;
2013-04-17 18:59:54 -07:00
}
fn push_char(&self, c: u8) -> Code {
Code((self.hash() << 2) + (pack_symbol(c) as u64))
2013-04-17 18:59:54 -07:00
}
fn rotate(&self, c: u8, frame: i32) -> Code {
Code(self.push_char(c).hash() & ((1u64 << (2 * (frame as u64))) - 1))
2013-04-17 18:59:54 -07:00
}
fn pack(string: &str) -> Code {
string.bytes().fold(Code(0u64), |a, b| a.push_char(b))
2013-04-17 18:59:54 -07:00
}
// FIXME: Inefficient.
2013-04-17 18:59:54 -07:00
fn unpack(&self, frame: i32) -> ~str {
let mut key = self.hash();
2013-04-17 18:59:54 -07:00
let mut result = ~[];
for _ in range(0, frame) {
2013-04-17 18:59:54 -07:00
result.push(unpack_symbol((key as u8) & 3));
key >>= 2;
}
result.reverse();
str::from_utf8_owned(result).unwrap()
2013-04-17 18:59:54 -07:00
}
}
// Hash table implementation
trait TableCallback {
fn f(&self, entry: &mut Entry);
}
struct BumpCallback;
impl TableCallback for BumpCallback {
fn f(&self, entry: &mut Entry) {
entry.count += 1;
}
}
struct PrintCallback(&'static str);
impl TableCallback for PrintCallback {
fn f(&self, entry: &mut Entry) {
let PrintCallback(s) = *self;
println!("{}\t{}", entry.count as int, s);
2013-04-17 18:59:54 -07:00
}
}
struct Entry {
code: Code,
count: i32,
next: Option<~Entry>,
}
struct Table {
count: i32,
items: ~[Option<~Entry>]
}
struct Items<'a> {
cur: Option<&'a Entry>,
items: vec::Items<'a, Option<~Entry>>,
2013-04-17 18:59:54 -07:00
}
impl Table {
fn new() -> Table {
Table {
count: 0,
items: vec::from_fn(TABLE_SIZE, |_| None),
2013-04-17 18:59:54 -07:00
}
}
fn search_remainder<C:TableCallback>(item: &mut Entry, key: Code, c: C) {
match item.next {
None => {
let mut entry = ~Entry {
code: key,
count: 0,
next: None,
};
c.f(entry);
item.next = Some(entry);
}
Some(ref mut entry) => {
if entry.code == key {
c.f(*entry);
return;
}
Table::search_remainder(*entry, key, c)
}
}
}
fn lookup<C:TableCallback>(&mut self, key: Code, c: C) {
let index = key.hash() % (TABLE_SIZE as u64);
2013-04-17 18:59:54 -07:00
{
if self.items[index].is_none() {
let mut entry = ~Entry {
code: key,
count: 0,
next: None,
};
c.f(entry);
self.items[index] = Some(entry);
return;
}
}
{
let entry = &mut *self.items[index].get_mut_ref();
2013-04-17 18:59:54 -07:00
if entry.code == key {
c.f(*entry);
return;
}
Table::search_remainder(*entry, key, c)
}
}
fn iter<'a>(&'a self) -> Items<'a> {
Items { cur: None, items: self.items.iter() }
}
}
impl<'a> Iterator<&'a Entry> for Items<'a> {
fn next(&mut self) -> Option<&'a Entry> {
let ret = match self.cur {
None => {
let i;
loop {
match self.items.next() {
None => return None,
Some(&None) => {}
Some(&Some(ref a)) => { i = &**a; break }
2013-04-17 18:59:54 -07:00
}
}
self.cur = Some(&*i);
&*i
}
Some(c) => c
};
match ret.next {
None => { self.cur = None; }
Some(ref next) => { self.cur = Some(&**next); }
2013-04-17 18:59:54 -07:00
}
return Some(ret);
2013-04-17 18:59:54 -07:00
}
}
// Main program
fn pack_symbol(c: u8) -> u8 {
match c as char {
'a' | 'A' => 0,
'c' | 'C' => 1,
'g' | 'G' => 2,
't' | 'T' => 3,
_ => fail!("{}", c as char),
2013-04-17 18:59:54 -07:00
}
}
fn unpack_symbol(c: u8) -> u8 {
TABLE[c]
}
fn next_char<'a>(mut buf: &'a [u8]) -> &'a [u8] {
loop {
buf = buf.slice(1, buf.len());
2013-04-17 18:59:54 -07:00
if buf.len() == 0 {
break;
}
if buf[0] != (' ' as u8) && buf[0] != ('\t' as u8) &&
buf[0] != ('\n' as u8) && buf[0] != 0 {
break;
}
}
buf
}
fn generate_frequencies(frequencies: &mut Table,
mut input: &[u8],
frame: i32) {
let mut code = Code(0);
2013-04-17 18:59:54 -07:00
// Pull first frame.
for _ in range(0, frame) {
2013-04-17 18:59:54 -07:00
code = code.push_char(input[0]);
input = next_char(input);
}
frequencies.lookup(code, BumpCallback);
while input.len() != 0 && input[0] != ('>' as u8) {
code = code.rotate(input[0], frame);
frequencies.lookup(code, BumpCallback);
input = next_char(input);
}
}
fn print_frequencies(frequencies: &Table, frame: i32) {
let mut vector = ~[];
for entry in frequencies.iter() {
2013-04-17 18:59:54 -07:00
vector.push((entry.code, entry.count));
}
vector.sort();
2013-04-17 18:59:54 -07:00
let mut total_count = 0;
for &(_, count) in vector.iter() {
2013-04-17 18:59:54 -07:00
total_count += count;
}
for &(key, count) in vector.iter() {
println!("{} {:.3f}",
key.unpack(frame),
(count as f32 * 100.0) / (total_count as f32));
2013-04-17 18:59:54 -07:00
}
}
fn print_occurrences(frequencies: &mut Table, occurrence: &'static str) {
frequencies.lookup(Code::pack(occurrence), PrintCallback(occurrence))
}
fn main() {
let input = include_str!("shootout-k-nucleotide.data");
let pos = input.find_str(">THREE").unwrap();
let pos2 = pos + input.slice_from(pos).find_str("\n").unwrap();
let input = input.slice_from(pos2 + 1).as_bytes();
let mut frequencies = Table::new();
generate_frequencies(&mut frequencies, input, 1);
print_frequencies(&frequencies, 1);
frequencies = Table::new();
generate_frequencies(&mut frequencies, input, 2);
print_frequencies(&frequencies, 2);
for occurrence in OCCURRENCES.iter() {
frequencies = Table::new();
generate_frequencies(&mut frequencies,
2013-04-17 18:59:54 -07:00
input,
occurrence.len() as i32);
print_occurrences(&mut frequencies, *occurrence);
2013-04-17 18:59:54 -07:00
}
}