rust/src/test/bench/shootout-fasta.rs

117 lines
3.9 KiB
Rust
Raw Normal View History

/* -*- mode: rust; indent-tabs-mode: nil -*-
* Implementation of 'fasta' benchmark from
* Computer Language Benchmarks Game
* http://shootout.alioth.debian.org/
*/
use std;
import vec;
import uint;
import int;
import str;
import io::writer_util;
fn LINE_LENGTH() -> uint { ret 60u; }
2012-03-26 20:35:18 -05:00
type myrandom = @{mut last: u32};
fn myrandom_next(r: myrandom, mx: u32) -> u32 {
r.last = (r.last * 3877u32 + 29573u32) % 139968u32;
mx * r.last / 139968u32
}
2011-07-27 07:19:39 -05:00
type aminoacids = {ch: char, prob: u32};
fn make_cumulative(aa: ~[aminoacids]) -> ~[aminoacids] {
let mut cp: u32 = 0u32;
let mut ans: ~[aminoacids] = ~[];
2012-06-30 18:19:07 -05:00
for aa.each |a| { cp += a.prob; ans += ~[{ch: a.ch, prob: cp}]; }
ret ans;
}
fn select_random(r: u32, genelist: ~[aminoacids]) -> char {
if r < genelist[0].prob { ret genelist[0].ch; }
fn bisect(v: ~[aminoacids], lo: uint, hi: uint, target: u32) -> char {
2011-07-27 07:19:39 -05:00
if hi > lo + 1u {
let mid: uint = lo + (hi - lo) / 2u;
if target < v[mid].prob {
2012-05-14 18:55:01 -05:00
ret bisect(v, lo, mid, target);
} else { ret bisect(v, mid, hi, target); }
} else { ret v[hi].ch; }
}
ret bisect(genelist, 0u, vec::len::<aminoacids>(genelist) - 1u, r);
}
fn make_random_fasta(wr: io::writer, id: str, desc: str, genelist: ~[aminoacids], n: int) {
wr.write_line(">" + id + " " + desc);
2012-03-26 20:35:18 -05:00
let rng = @{mut last: std::rand::rng().next()};
let mut op: str = "";
2012-06-30 18:19:07 -05:00
for uint::range(0u, n as uint) |_i| {
str::push_char(op, select_random(myrandom_next(rng, 100u32),
genelist));
2012-02-23 03:44:04 -06:00
if str::len(op) >= LINE_LENGTH() {
wr.write_line(op);
op = "";
}
}
if str::len(op) > 0u { wr.write_line(op); }
}
fn make_repeat_fasta(wr: io::writer, id: str, desc: str, s: str, n: int) unsafe {
wr.write_line(">" + id + " " + desc);
let mut op: str = "";
2012-02-23 03:44:04 -06:00
let sl: uint = str::len(s);
2012-06-30 18:19:07 -05:00
for uint::range(0u, n as uint) |i| {
str::unsafe::push_byte(op, s[i % sl]);
2012-02-23 03:44:04 -06:00
if str::len(op) >= LINE_LENGTH() {
wr.write_line(op);
op = "";
}
}
if str::len(op) > 0u { wr.write_line(op); }
}
2011-07-27 07:19:39 -05:00
fn acid(ch: char, prob: u32) -> aminoacids { ret {ch: ch, prob: prob}; }
fn main(args: ~[str]) {
let args = if os::getenv("RUST_BENCH").is_some() {
// alioth tests k-nucleotide with this data at 25,000,000
~["", "5000000"]
} else if args.len() <= 1u {
~["", "1000"]
} else {
args
};
let writer = if os::getenv("RUST_BENCH").is_some() {
result::get(io::file_writer("./shootout-fasta.data", ~[io::truncate, io::create]))
} else {
io::stdout()
};
let n = int::from_str(args[1]).get();
let iub: ~[aminoacids] =
make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
let homosapiens: ~[aminoacids] =
make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
acid('t', 30u32)]);
2011-09-02 17:34:58 -05:00
let alu: str =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
make_repeat_fasta(writer, "ONE", "Homo sapiens alu", alu, n * 2);
make_random_fasta(writer, "TWO", "IUB ambiguity codes", iub, n * 3);
make_random_fasta(writer, "THREE",
"Homo sapiens frequency", homosapiens, n * 5);
}