2011-06-15 13:19:50 -05:00
|
|
|
|
|
|
|
|
2010-09-15 20:22:10 -05:00
|
|
|
/* -*- mode: rust; indent-tabs-mode: nil -*-
|
|
|
|
* Implementation of 'fasta' benchmark from
|
|
|
|
* Computer Language Benchmarks Game
|
|
|
|
* http://shootout.alioth.debian.org/
|
|
|
|
*/
|
|
|
|
use std;
|
2011-12-13 18:25:51 -06:00
|
|
|
import vec;
|
|
|
|
import uint;
|
|
|
|
import int;
|
|
|
|
import str;
|
2010-09-15 20:22:10 -05:00
|
|
|
|
2011-06-15 13:19:50 -05:00
|
|
|
fn LINE_LENGTH() -> uint { ret 60u; }
|
2010-09-15 20:22:10 -05:00
|
|
|
|
2012-03-26 20:35:18 -05:00
|
|
|
type myrandom = @{mut last: u32};
|
2012-01-13 04:48:55 -06:00
|
|
|
|
|
|
|
fn myrandom_next(r: myrandom, mx: u32) -> u32 {
|
|
|
|
r.last = (r.last * 3877u32 + 29573u32) % 139968u32;
|
|
|
|
mx * r.last / 139968u32
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2011-07-27 07:19:39 -05:00
|
|
|
type aminoacids = {ch: char, prob: u32};
|
2010-09-15 20:22:10 -05:00
|
|
|
|
2011-09-12 04:27:30 -05:00
|
|
|
fn make_cumulative(aa: [aminoacids]) -> [aminoacids] {
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut cp: u32 = 0u32;
|
|
|
|
let mut ans: [aminoacids] = [];
|
2012-04-06 13:01:43 -05:00
|
|
|
for aa.each {|a| cp += a.prob; ans += [{ch: a.ch, prob: cp}]; }
|
2011-06-15 13:19:50 -05:00
|
|
|
ret ans;
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2011-09-12 04:27:30 -05:00
|
|
|
fn select_random(r: u32, genelist: [aminoacids]) -> char {
|
2011-08-19 17:16:48 -05:00
|
|
|
if r < genelist[0].prob { ret genelist[0].ch; }
|
2011-09-12 04:27:30 -05:00
|
|
|
fn bisect(v: [aminoacids], lo: uint, hi: uint, target: u32) -> char {
|
2011-07-27 07:19:39 -05:00
|
|
|
if hi > lo + 1u {
|
|
|
|
let mid: uint = lo + (hi - lo) / 2u;
|
2011-08-19 17:16:48 -05:00
|
|
|
if target < v[mid].prob {
|
2012-05-14 18:55:01 -05:00
|
|
|
ret bisect(v, lo, mid, target);
|
|
|
|
} else { ret bisect(v, mid, hi, target); }
|
2011-08-19 17:16:48 -05:00
|
|
|
} else { ret v[hi].ch; }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
2011-08-13 02:10:18 -05:00
|
|
|
ret bisect(genelist, 0u, vec::len::<aminoacids>(genelist) - 1u, r);
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2011-09-12 04:27:30 -05:00
|
|
|
fn make_random_fasta(id: str, desc: str, genelist: [aminoacids], n: int) {
|
2011-12-22 19:53:53 -06:00
|
|
|
log(debug, ">" + id + " " + desc);
|
2012-03-26 20:35:18 -05:00
|
|
|
let rng = @{mut last: std::rand::rng().next()};
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut op: str = "";
|
2012-01-13 04:48:55 -06:00
|
|
|
uint::range(0u, n as uint) {|_i|
|
2012-02-12 01:49:03 -06:00
|
|
|
str::push_char(op, select_random(myrandom_next(rng, 100u32),
|
|
|
|
genelist));
|
2012-02-23 03:44:04 -06:00
|
|
|
if str::len(op) >= LINE_LENGTH() {
|
2011-12-22 19:53:53 -06:00
|
|
|
log(debug, op);
|
2011-12-22 16:42:52 -06:00
|
|
|
op = "";
|
|
|
|
}
|
2011-10-21 07:12:12 -05:00
|
|
|
}
|
2012-02-23 03:44:04 -06:00
|
|
|
if str::len(op) > 0u { log(debug, op); }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2012-02-12 01:49:03 -06:00
|
|
|
fn make_repeat_fasta(id: str, desc: str, s: str, n: int) unsafe {
|
2011-12-22 19:53:53 -06:00
|
|
|
log(debug, ">" + id + " " + desc);
|
2012-03-22 10:39:41 -05:00
|
|
|
let mut op: str = "";
|
2012-02-23 03:44:04 -06:00
|
|
|
let sl: uint = str::len(s);
|
2011-10-21 06:14:28 -05:00
|
|
|
uint::range(0u, n as uint) {|i|
|
2012-02-12 01:49:03 -06:00
|
|
|
str::unsafe::push_byte(op, s[i % sl]);
|
2012-02-23 03:44:04 -06:00
|
|
|
if str::len(op) >= LINE_LENGTH() {
|
2011-12-22 19:53:53 -06:00
|
|
|
log(debug, op);
|
2011-12-22 16:42:52 -06:00
|
|
|
op = "";
|
|
|
|
}
|
2011-10-21 07:12:12 -05:00
|
|
|
}
|
2012-02-23 03:44:04 -06:00
|
|
|
if str::len(op) > 0u { log(debug, op); }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2011-07-27 07:19:39 -05:00
|
|
|
fn acid(ch: char, prob: u32) -> aminoacids { ret {ch: ch, prob: prob}; }
|
2011-07-26 07:49:40 -05:00
|
|
|
|
2012-05-24 00:53:50 -05:00
|
|
|
fn main(args: [str]) {
|
|
|
|
let args = if os::getenv("RUST_BENCH").is_some() {
|
|
|
|
["", "300000"]
|
|
|
|
} else if args.len() <= 1u {
|
|
|
|
["", "1000"]
|
|
|
|
} else {
|
|
|
|
args
|
|
|
|
};
|
|
|
|
|
|
|
|
let n = int::from_str(args[1]).get();
|
|
|
|
|
2011-08-11 23:37:27 -05:00
|
|
|
let iub: [aminoacids] =
|
2011-08-19 17:16:48 -05:00
|
|
|
make_cumulative([acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
|
|
|
|
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
|
|
|
|
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
|
|
|
|
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
|
|
|
|
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
|
2011-08-11 23:37:27 -05:00
|
|
|
let homosapiens: [aminoacids] =
|
2011-08-19 17:16:48 -05:00
|
|
|
make_cumulative([acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
|
|
|
|
acid('t', 30u32)]);
|
2011-09-02 17:34:58 -05:00
|
|
|
let alu: str =
|
|
|
|
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
|
|
|
|
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
|
|
|
|
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
|
|
|
|
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
|
|
|
|
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
|
|
|
|
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
|
|
|
|
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
|
|
|
make_repeat_fasta("ONE", "Homo sapiens alu", alu, n * 2);
|
|
|
|
make_random_fasta("TWO", "IUB ambiguity codes", iub, n * 3);
|
|
|
|
make_random_fasta("THREE", "Homo sapiens frequency", homosapiens, n * 5);
|
2011-08-15 23:54:52 -05:00
|
|
|
}
|