2014-02-05 16:33:10 -06:00
|
|
|
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2014-03-30 12:04:57 +02:00
|
|
|
// ignore-android see #10393 #13206
|
2013-04-19 19:21:53 -07:00
|
|
|
|
2014-05-22 16:57:53 -07:00
|
|
|
use std::string::String;
|
2014-03-08 18:11:52 -05:00
|
|
|
use std::slice;
|
2014-06-07 11:13:26 -07:00
|
|
|
use std::sync::{Arc, Future};
|
2013-04-17 18:59:54 -07:00
|
|
|
|
|
|
|
static TABLE: [u8, ..4] = [ 'A' as u8, 'C' as u8, 'G' as u8, 'T' as u8 ];
|
|
|
|
static TABLE_SIZE: uint = 2 << 16;
|
|
|
|
|
|
|
|
static OCCURRENCES: [&'static str, ..5] = [
|
|
|
|
"GGT",
|
|
|
|
"GGTA",
|
|
|
|
"GGTATT",
|
|
|
|
"GGTATTTTAATT",
|
|
|
|
"GGTATTTTAATTTATAGT",
|
|
|
|
];
|
|
|
|
|
|
|
|
// Code implementation
|
|
|
|
|
2014-05-31 10:43:52 -07:00
|
|
|
#[deriving(PartialEq, PartialOrd, Ord, Eq)]
|
2013-04-17 18:59:54 -07:00
|
|
|
struct Code(u64);
|
|
|
|
|
|
|
|
impl Code {
|
|
|
|
fn hash(&self) -> u64 {
|
2014-02-21 15:41:51 -08:00
|
|
|
let Code(ret) = *self;
|
|
|
|
return ret;
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn push_char(&self, c: u8) -> Code {
|
2014-02-21 15:41:51 -08:00
|
|
|
Code((self.hash() << 2) + (pack_symbol(c) as u64))
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
2014-03-30 12:04:57 +02:00
|
|
|
fn rotate(&self, c: u8, frame: uint) -> Code {
|
|
|
|
Code(self.push_char(c).hash() & ((1u64 << (2 * frame)) - 1))
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn pack(string: &str) -> Code {
|
2014-02-21 15:41:51 -08:00
|
|
|
string.bytes().fold(Code(0u64), |a, b| a.push_char(b))
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
2014-05-22 16:57:53 -07:00
|
|
|
fn unpack(&self, frame: uint) -> String {
|
2014-02-21 15:41:51 -08:00
|
|
|
let mut key = self.hash();
|
2014-03-05 14:02:44 -08:00
|
|
|
let mut result = Vec::new();
|
2013-08-05 23:43:06 -04:00
|
|
|
for _ in range(0, frame) {
|
2013-04-17 18:59:54 -07:00
|
|
|
result.push(unpack_symbol((key as u8) & 3));
|
|
|
|
key >>= 2;
|
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
result.reverse();
|
2014-05-22 16:57:53 -07:00
|
|
|
String::from_utf8(result).unwrap()
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Hash table implementation
|
|
|
|
|
|
|
|
trait TableCallback {
|
|
|
|
fn f(&self, entry: &mut Entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct BumpCallback;
|
|
|
|
|
|
|
|
impl TableCallback for BumpCallback {
|
|
|
|
fn f(&self, entry: &mut Entry) {
|
|
|
|
entry.count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct PrintCallback(&'static str);
|
|
|
|
|
|
|
|
impl TableCallback for PrintCallback {
|
|
|
|
fn f(&self, entry: &mut Entry) {
|
2014-02-21 15:41:51 -08:00
|
|
|
let PrintCallback(s) = *self;
|
|
|
|
println!("{}\t{}", entry.count as int, s);
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Entry {
|
|
|
|
code: Code,
|
2014-03-30 12:04:57 +02:00
|
|
|
count: uint,
|
2014-05-05 18:56:44 -07:00
|
|
|
next: Option<Box<Entry>>,
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
struct Table {
|
2014-03-30 12:04:57 +02:00
|
|
|
count: uint,
|
2014-05-05 18:56:44 -07:00
|
|
|
items: Vec<Option<Box<Entry>>> }
|
2014-02-21 15:41:51 -08:00
|
|
|
|
|
|
|
struct Items<'a> {
|
|
|
|
cur: Option<&'a Entry>,
|
2014-05-05 18:56:44 -07:00
|
|
|
items: slice::Items<'a, Option<Box<Entry>>>,
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Table {
|
|
|
|
fn new() -> Table {
|
|
|
|
Table {
|
|
|
|
count: 0,
|
2014-03-05 15:28:08 -08:00
|
|
|
items: Vec::from_fn(TABLE_SIZE, |_| None),
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn search_remainder<C:TableCallback>(item: &mut Entry, key: Code, c: C) {
|
|
|
|
match item.next {
|
|
|
|
None => {
|
2014-05-05 18:56:44 -07:00
|
|
|
let mut entry = box Entry {
|
2013-04-17 18:59:54 -07:00
|
|
|
code: key,
|
|
|
|
count: 0,
|
|
|
|
next: None,
|
|
|
|
};
|
2014-06-24 23:11:57 -07:00
|
|
|
c.f(&mut *entry);
|
2013-04-17 18:59:54 -07:00
|
|
|
item.next = Some(entry);
|
|
|
|
}
|
|
|
|
Some(ref mut entry) => {
|
|
|
|
if entry.code == key {
|
2014-06-24 23:11:57 -07:00
|
|
|
c.f(&mut **entry);
|
2013-04-17 18:59:54 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-06-24 23:11:57 -07:00
|
|
|
Table::search_remainder(&mut **entry, key, c)
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn lookup<C:TableCallback>(&mut self, key: Code, c: C) {
|
2014-02-21 15:41:51 -08:00
|
|
|
let index = key.hash() % (TABLE_SIZE as u64);
|
2013-04-17 18:59:54 -07:00
|
|
|
|
|
|
|
{
|
2014-03-05 15:28:08 -08:00
|
|
|
if self.items.get(index as uint).is_none() {
|
2014-05-05 18:56:44 -07:00
|
|
|
let mut entry = box Entry {
|
2013-04-17 18:59:54 -07:00
|
|
|
code: key,
|
|
|
|
count: 0,
|
|
|
|
next: None,
|
|
|
|
};
|
2014-06-24 23:11:57 -07:00
|
|
|
c.f(&mut *entry);
|
2014-03-05 15:28:08 -08:00
|
|
|
*self.items.get_mut(index as uint) = Some(entry);
|
2013-04-17 18:59:54 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
2014-03-05 15:28:08 -08:00
|
|
|
let entry = &mut *self.items.get_mut(index as uint).get_mut_ref();
|
2013-04-17 18:59:54 -07:00
|
|
|
if entry.code == key {
|
2014-06-24 23:11:57 -07:00
|
|
|
c.f(&mut **entry);
|
2013-04-17 18:59:54 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-06-24 23:11:57 -07:00
|
|
|
Table::search_remainder(&mut **entry, key, c)
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
fn iter<'a>(&'a self) -> Items<'a> {
|
|
|
|
Items { cur: None, items: self.items.iter() }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator<&'a Entry> for Items<'a> {
|
|
|
|
fn next(&mut self) -> Option<&'a Entry> {
|
|
|
|
let ret = match self.cur {
|
|
|
|
None => {
|
|
|
|
let i;
|
|
|
|
loop {
|
|
|
|
match self.items.next() {
|
|
|
|
None => return None,
|
|
|
|
Some(&None) => {}
|
|
|
|
Some(&Some(ref a)) => { i = &**a; break }
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
2014-02-21 15:41:51 -08:00
|
|
|
self.cur = Some(&*i);
|
|
|
|
&*i
|
|
|
|
}
|
|
|
|
Some(c) => c
|
|
|
|
};
|
|
|
|
match ret.next {
|
|
|
|
None => { self.cur = None; }
|
|
|
|
Some(ref next) => { self.cur = Some(&**next); }
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
2014-02-21 15:41:51 -08:00
|
|
|
return Some(ret);
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Main program
|
|
|
|
|
|
|
|
fn pack_symbol(c: u8) -> u8 {
|
2014-02-21 15:41:51 -08:00
|
|
|
match c as char {
|
2014-03-30 12:04:57 +02:00
|
|
|
'A' => 0,
|
|
|
|
'C' => 1,
|
|
|
|
'G' => 2,
|
|
|
|
'T' => 3,
|
2014-02-21 15:41:51 -08:00
|
|
|
_ => fail!("{}", c as char),
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unpack_symbol(c: u8) -> u8 {
|
2014-04-01 20:39:26 -07:00
|
|
|
TABLE[c as uint]
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
2014-04-17 09:38:55 +02:00
|
|
|
fn generate_frequencies(mut input: &[u8], frame: uint) -> Table {
|
|
|
|
let mut frequencies = Table::new();
|
|
|
|
if input.len() < frame { return frequencies; }
|
2013-04-17 18:59:54 -07:00
|
|
|
let mut code = Code(0);
|
2013-05-03 19:25:04 -04:00
|
|
|
|
2013-04-17 18:59:54 -07:00
|
|
|
// Pull first frame.
|
2013-08-05 23:43:06 -04:00
|
|
|
for _ in range(0, frame) {
|
2013-04-17 18:59:54 -07:00
|
|
|
code = code.push_char(input[0]);
|
2014-03-30 12:04:57 +02:00
|
|
|
input = input.slice_from(1);
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
frequencies.lookup(code, BumpCallback);
|
|
|
|
|
|
|
|
while input.len() != 0 && input[0] != ('>' as u8) {
|
|
|
|
code = code.rotate(input[0], frame);
|
|
|
|
frequencies.lookup(code, BumpCallback);
|
2014-03-30 12:04:57 +02:00
|
|
|
input = input.slice_from(1);
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
2014-04-17 09:38:55 +02:00
|
|
|
frequencies
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
2014-03-30 12:04:57 +02:00
|
|
|
fn print_frequencies(frequencies: &Table, frame: uint) {
|
2014-03-05 14:02:44 -08:00
|
|
|
let mut vector = Vec::new();
|
2014-02-21 15:41:51 -08:00
|
|
|
for entry in frequencies.iter() {
|
2014-03-30 12:04:57 +02:00
|
|
|
vector.push((entry.count, entry.code));
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
2014-03-05 15:28:08 -08:00
|
|
|
vector.as_mut_slice().sort();
|
2013-04-17 18:59:54 -07:00
|
|
|
|
|
|
|
let mut total_count = 0;
|
2014-03-30 12:04:57 +02:00
|
|
|
for &(count, _) in vector.iter() {
|
2013-04-17 18:59:54 -07:00
|
|
|
total_count += count;
|
|
|
|
}
|
|
|
|
|
2014-03-30 12:04:57 +02:00
|
|
|
for &(count, key) in vector.iter().rev() {
|
2013-09-24 22:16:43 -07:00
|
|
|
println!("{} {:.3f}",
|
2014-04-10 20:55:34 +10:00
|
|
|
key.unpack(frame).as_slice(),
|
2014-02-21 15:41:51 -08:00
|
|
|
(count as f32 * 100.0) / (total_count as f32));
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
2014-03-30 12:04:57 +02:00
|
|
|
println!("");
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn print_occurrences(frequencies: &mut Table, occurrence: &'static str) {
|
|
|
|
frequencies.lookup(Code::pack(occurrence), PrintCallback(occurrence))
|
|
|
|
}
|
|
|
|
|
2014-04-10 20:55:34 +10:00
|
|
|
fn get_sequence<R: Buffer>(r: &mut R, key: &str) -> Vec<u8> {
|
|
|
|
let mut res = Vec::new();
|
2014-03-30 12:04:57 +02:00
|
|
|
for l in r.lines().map(|l| l.ok().unwrap())
|
2014-05-19 23:19:56 -07:00
|
|
|
.skip_while(|l| key != l.as_slice().slice_to(key.len())).skip(1)
|
2014-03-30 12:04:57 +02:00
|
|
|
{
|
2014-05-19 23:19:56 -07:00
|
|
|
res.push_all(l.as_slice().trim().as_bytes());
|
2014-03-30 12:04:57 +02:00
|
|
|
}
|
2014-04-10 20:55:34 +10:00
|
|
|
for b in res.mut_iter() {
|
|
|
|
*b = b.to_ascii().to_upper().to_byte();
|
|
|
|
}
|
|
|
|
res
|
2014-03-30 12:04:57 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 18:59:54 -07:00
|
|
|
fn main() {
|
2014-03-30 12:04:57 +02:00
|
|
|
let input = if std::os::getenv("RUST_BENCH").is_some() {
|
|
|
|
let fd = std::io::File::open(&Path::new("shootout-k-nucleotide.data"));
|
|
|
|
get_sequence(&mut std::io::BufferedReader::new(fd), ">THREE")
|
|
|
|
} else {
|
|
|
|
get_sequence(&mut std::io::stdin(), ">THREE")
|
|
|
|
};
|
2014-04-17 09:38:55 +02:00
|
|
|
let input = Arc::new(input);
|
|
|
|
|
|
|
|
let nb_freqs: Vec<(uint, Future<Table>)> = range(1u, 3).map(|i| {
|
|
|
|
let input = input.clone();
|
|
|
|
(i, Future::spawn(proc() generate_frequencies(input.as_slice(), i)))
|
|
|
|
}).collect();
|
|
|
|
let occ_freqs: Vec<Future<Table>> = OCCURRENCES.iter().map(|&occ| {
|
|
|
|
let input = input.clone();
|
|
|
|
Future::spawn(proc() generate_frequencies(input.as_slice(), occ.len()))
|
|
|
|
}).collect();
|
|
|
|
|
|
|
|
for (i, freq) in nb_freqs.move_iter() {
|
|
|
|
print_frequencies(&freq.unwrap(), i);
|
|
|
|
}
|
|
|
|
for (&occ, freq) in OCCURRENCES.iter().zip(occ_freqs.move_iter()) {
|
|
|
|
print_occurrences(&mut freq.unwrap(), occ);
|
2013-04-17 18:59:54 -07:00
|
|
|
}
|
|
|
|
}
|