rust/src/test/bench/shootout-fasta.rs

156 lines
4.8 KiB
Rust
Raw Normal View History

// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
2013-10-23 03:49:18 -05:00
#[feature(managed_boxes)];
/* -*- mode: rust; indent-tabs-mode: nil -*-
* Implementation of 'fasta' benchmark from
* Computer Language Benchmarks Game
* http://shootout.alioth.debian.org/
*/
extern mod extra;
use std::int;
use std::rt::io;
use std::os;
use std::rand::Rng;
use std::rand;
use std::str;
2013-06-16 16:46:31 -05:00
static LINE_LENGTH: uint = 60u;
2013-01-28 20:55:44 -06:00
struct MyRandom {
last: u32
2013-01-28 20:55:44 -06:00
}
fn myrandom_next(r: @mut MyRandom, mx: u32) -> u32 {
r.last = (r.last * 3877u32 + 29573u32) % 139968u32;
mx * r.last / 139968u32
}
2013-07-02 14:47:32 -05:00
#[deriving(Clone)]
2013-01-28 20:55:44 -06:00
struct AminoAcids {
ch: char,
prob: u32
}
2013-01-28 20:55:44 -06:00
fn make_cumulative(aa: ~[AminoAcids]) -> ~[AminoAcids] {
let mut cp: u32 = 0u32;
2013-01-28 20:55:44 -06:00
let mut ans: ~[AminoAcids] = ~[];
for a in aa.iter() {
2013-01-28 20:55:44 -06:00
cp += a.prob;
ans.push(AminoAcids {ch: a.ch, prob: cp});
2013-01-28 20:55:44 -06:00
}
ans
}
2013-01-28 20:55:44 -06:00
fn select_random(r: u32, genelist: ~[AminoAcids]) -> char {
2012-08-01 19:30:05 -05:00
if r < genelist[0].prob { return genelist[0].ch; }
2013-01-28 20:55:44 -06:00
fn bisect(v: ~[AminoAcids], lo: uint, hi: uint, target: u32) -> char {
2011-07-27 07:19:39 -05:00
if hi > lo + 1u {
let mid: uint = lo + (hi - lo) / 2u;
if target < v[mid].prob {
2012-08-01 19:30:05 -05:00
return bisect(v, lo, mid, target);
2013-07-02 14:47:32 -05:00
} else {
return bisect(v, mid, hi, target);
}
} else {
return v[hi].ch;
}
}
2013-07-02 14:47:32 -05:00
bisect(genelist.clone(), 0, genelist.len() - 1, r)
}
fn make_random_fasta(wr: @mut io::Writer,
id: ~str,
desc: ~str,
genelist: ~[AminoAcids],
n: int) {
writeln!(wr, ">{} {}", id, desc);
2013-05-06 21:29:04 -05:00
let mut rng = rand::rng();
let rng = @mut MyRandom {
last: rng.gen()
2013-05-06 21:29:04 -05:00
};
let mut op: ~str = ~"";
for _ in range(0u, n as uint) {
op.push_char(select_random(myrandom_next(rng, 100u32),
2013-07-02 14:47:32 -05:00
genelist.clone()));
2013-06-16 16:46:31 -05:00
if op.len() >= LINE_LENGTH {
writeln!(wr, "{}", op);
op = ~"";
}
}
if op.len() > 0u { writeln!(wr, "{}", op); }
}
fn make_repeat_fasta(wr: @mut io::Writer, id: ~str, desc: ~str, s: ~str, n: int) {
writeln!(wr, ">{} {}", id, desc);
2013-06-16 16:46:31 -05:00
let mut op = str::with_capacity( LINE_LENGTH );
let sl = s.len();
for i in range(0u, n as uint) {
if (op.len() >= LINE_LENGTH) {
writeln!(wr, "{}", op);
op = str::with_capacity( LINE_LENGTH );
}
op.push_char( s[i % sl] as char );
}
if op.len() > 0 {
writeln!(wr, "{}", op);
}
}
2013-01-28 20:55:44 -06:00
fn acid(ch: char, prob: u32) -> AminoAcids {
AminoAcids {ch: ch, prob: prob}
2013-01-28 20:55:44 -06:00
}
fn main() {
2013-10-17 23:08:48 -05:00
use std::rt::io::file::FileInfo;
let args = os::args();
let args = if os::getenv("RUST_BENCH").is_some() {
// alioth tests k-nucleotide with this data at 25,000,000
~[~"", ~"5000000"]
} else if args.len() <= 1u {
~[~"", ~"1000"]
} else {
args
};
let writer = if os::getenv("RUST_BENCH").is_some() {
2013-10-17 23:08:48 -05:00
let file = Path::new("./shootout-fasta.data").open_writer(io::CreateOrTruncate);
@mut file as @mut io::Writer
} else {
@mut io::stdout() as @mut io::Writer
};
let n = from_str::<int>(args[1]).unwrap();
2013-01-28 20:55:44 -06:00
let iub: ~[AminoAcids] =
make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
2013-01-28 20:55:44 -06:00
let homosapiens: ~[AminoAcids] =
make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
acid('t', 30u32)]);
let alu: ~str =
~"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
make_repeat_fasta(writer, ~"ONE", ~"Homo sapiens alu", alu, n * 2);
make_random_fasta(writer, ~"TWO", ~"IUB ambiguity codes", iub, n * 3);
make_random_fasta(writer, ~"THREE",
~"Homo sapiens frequency", homosapiens, n * 5);
}