2013-06-16 23:48:46 +02:00
|
|
|
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
|
2012-12-10 17:32:48 -08:00
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2011-06-15 11:19:50 -07:00
|
|
|
|
|
|
|
|
2010-09-15 18:22:10 -07:00
|
|
|
/* -*- mode: rust; indent-tabs-mode: nil -*-
|
|
|
|
* Implementation of 'fasta' benchmark from
|
|
|
|
* Computer Language Benchmarks Game
|
|
|
|
* http://shootout.alioth.debian.org/
|
|
|
|
*/
|
2013-05-20 17:07:24 -07:00
|
|
|
extern mod extra;
|
2013-05-24 19:35:29 -07:00
|
|
|
|
|
|
|
use std::int;
|
|
|
|
use std::io;
|
|
|
|
use std::os;
|
2013-05-20 17:07:24 -07:00
|
|
|
use std::rand::Rng;
|
2013-05-24 19:35:29 -07:00
|
|
|
use std::rand;
|
|
|
|
use std::result;
|
|
|
|
use std::str;
|
|
|
|
use std::uint;
|
2010-09-15 18:22:10 -07:00
|
|
|
|
2013-06-16 23:46:31 +02:00
|
|
|
static LINE_LENGTH: uint = 60u;
|
|
|
|
|
2013-01-28 18:55:44 -08:00
|
|
|
struct MyRandom {
|
2013-02-22 16:08:16 -08:00
|
|
|
last: u32
|
2013-01-28 18:55:44 -08:00
|
|
|
}
|
2012-01-13 11:48:55 +01:00
|
|
|
|
2013-02-22 16:08:16 -08:00
|
|
|
fn myrandom_next(r: @mut MyRandom, mx: u32) -> u32 {
|
2012-01-13 11:48:55 +01:00
|
|
|
r.last = (r.last * 3877u32 + 29573u32) % 139968u32;
|
|
|
|
mx * r.last / 139968u32
|
2010-09-15 18:22:10 -07:00
|
|
|
}
|
|
|
|
|
2013-07-02 12:47:32 -07:00
|
|
|
#[deriving(Clone)]
|
2013-01-28 18:55:44 -08:00
|
|
|
struct AminoAcids {
|
|
|
|
ch: char,
|
|
|
|
prob: u32
|
|
|
|
}
|
2010-09-15 18:22:10 -07:00
|
|
|
|
2013-01-28 18:55:44 -08:00
|
|
|
fn make_cumulative(aa: ~[AminoAcids]) -> ~[AminoAcids] {
|
2012-03-22 08:39:41 -07:00
|
|
|
let mut cp: u32 = 0u32;
|
2013-01-28 18:55:44 -08:00
|
|
|
let mut ans: ~[AminoAcids] = ~[];
|
2013-06-21 08:29:53 -04:00
|
|
|
for aa.iter().advance |a| {
|
2013-01-28 18:55:44 -08:00
|
|
|
cp += a.prob;
|
2013-06-11 19:13:42 -07:00
|
|
|
ans.push(AminoAcids {ch: a.ch, prob: cp});
|
2013-01-28 18:55:44 -08:00
|
|
|
}
|
2013-06-21 08:29:53 -04:00
|
|
|
ans
|
2010-09-15 18:22:10 -07:00
|
|
|
}
|
|
|
|
|
2013-01-28 18:55:44 -08:00
|
|
|
fn select_random(r: u32, genelist: ~[AminoAcids]) -> char {
|
2012-08-01 17:30:05 -07:00
|
|
|
if r < genelist[0].prob { return genelist[0].ch; }
|
2013-01-28 18:55:44 -08:00
|
|
|
fn bisect(v: ~[AminoAcids], lo: uint, hi: uint, target: u32) -> char {
|
2011-07-27 14:19:39 +02:00
|
|
|
if hi > lo + 1u {
|
|
|
|
let mid: uint = lo + (hi - lo) / 2u;
|
2011-08-19 15:16:48 -07:00
|
|
|
if target < v[mid].prob {
|
2012-08-01 17:30:05 -07:00
|
|
|
return bisect(v, lo, mid, target);
|
2013-07-02 12:47:32 -07:00
|
|
|
} else {
|
|
|
|
return bisect(v, mid, hi, target);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return v[hi].ch;
|
|
|
|
}
|
2010-09-15 18:22:10 -07:00
|
|
|
}
|
2013-07-02 12:47:32 -07:00
|
|
|
bisect(genelist.clone(), 0, genelist.len() - 1, r)
|
2010-09-15 18:22:10 -07:00
|
|
|
}
|
|
|
|
|
2013-03-14 11:22:14 -07:00
|
|
|
fn make_random_fasta(wr: @io::Writer,
|
|
|
|
id: ~str,
|
|
|
|
desc: ~str,
|
|
|
|
genelist: ~[AminoAcids],
|
|
|
|
n: int) {
|
2013-05-29 20:10:16 +02:00
|
|
|
wr.write_line(~">" + id + " " + desc);
|
2013-05-06 19:29:04 -07:00
|
|
|
let mut rng = rand::rng();
|
|
|
|
let rng = @mut MyRandom {
|
|
|
|
last: rng.next()
|
|
|
|
};
|
2012-07-13 22:57:48 -07:00
|
|
|
let mut op: ~str = ~"";
|
2012-06-30 16:19:07 -07:00
|
|
|
for uint::range(0u, n as uint) |_i| {
|
2013-06-10 17:42:24 +10:00
|
|
|
op.push_char(select_random(myrandom_next(rng, 100u32),
|
2013-07-02 12:47:32 -07:00
|
|
|
genelist.clone()));
|
2013-06-16 23:46:31 +02:00
|
|
|
if op.len() >= LINE_LENGTH {
|
2012-06-09 01:08:26 -07:00
|
|
|
wr.write_line(op);
|
2012-07-13 22:57:48 -07:00
|
|
|
op = ~"";
|
2011-12-22 14:42:52 -08:00
|
|
|
}
|
2011-10-21 14:12:12 +02:00
|
|
|
}
|
2013-06-10 00:44:58 +10:00
|
|
|
if op.len() > 0u { wr.write_line(op); }
|
2010-09-15 18:22:10 -07:00
|
|
|
}
|
|
|
|
|
2013-03-14 11:22:14 -07:00
|
|
|
fn make_repeat_fasta(wr: @io::Writer, id: ~str, desc: ~str, s: ~str, n: int) {
|
2013-06-16 23:46:31 +02:00
|
|
|
wr.write_line(~">" + id + " " + desc);
|
|
|
|
let mut op = str::with_capacity( LINE_LENGTH );
|
|
|
|
let sl = s.len();
|
2013-06-16 23:48:46 +02:00
|
|
|
for uint::range(0u, n as uint) |i| {
|
|
|
|
if (op.len() >= LINE_LENGTH) {
|
|
|
|
wr.write_line( op );
|
|
|
|
op = str::with_capacity( LINE_LENGTH );
|
2011-12-22 14:42:52 -08:00
|
|
|
}
|
2013-06-16 23:48:46 +02:00
|
|
|
op.push_char( s[i % sl] as char );
|
|
|
|
}
|
|
|
|
if op.len() > 0 {
|
|
|
|
wr.write_line(op)
|
2011-10-21 14:12:12 +02:00
|
|
|
}
|
2010-09-15 18:22:10 -07:00
|
|
|
}
|
|
|
|
|
2013-01-28 18:55:44 -08:00
|
|
|
fn acid(ch: char, prob: u32) -> AminoAcids {
|
2013-06-21 08:29:53 -04:00
|
|
|
AminoAcids {ch: ch, prob: prob}
|
2013-01-28 18:55:44 -08:00
|
|
|
}
|
2011-07-26 14:49:40 +02:00
|
|
|
|
2012-10-03 19:16:27 -07:00
|
|
|
fn main() {
|
|
|
|
let args = os::args();
|
2013-06-16 23:48:46 +02:00
|
|
|
let args = if os::getenv("RUST_BENCH").is_some() {
|
2012-06-09 01:08:26 -07:00
|
|
|
// alioth tests k-nucleotide with this data at 25,000,000
|
2012-07-13 22:57:48 -07:00
|
|
|
~[~"", ~"5000000"]
|
2012-05-23 22:53:50 -07:00
|
|
|
} else if args.len() <= 1u {
|
2012-07-13 22:57:48 -07:00
|
|
|
~[~"", ~"1000"]
|
2012-05-23 22:53:50 -07:00
|
|
|
} else {
|
|
|
|
args
|
|
|
|
};
|
|
|
|
|
2013-06-16 23:48:46 +02:00
|
|
|
let writer = if os::getenv("RUST_BENCH").is_some() {
|
2013-07-26 18:36:51 -07:00
|
|
|
io::file_writer(&Path("./shootout-fasta.data"),
|
|
|
|
[io::Truncate, io::Create]).unwrap()
|
2012-06-09 01:08:26 -07:00
|
|
|
} else {
|
|
|
|
io::stdout()
|
|
|
|
};
|
|
|
|
|
2012-05-23 22:53:50 -07:00
|
|
|
let n = int::from_str(args[1]).get();
|
|
|
|
|
2013-01-28 18:55:44 -08:00
|
|
|
let iub: ~[AminoAcids] =
|
2012-06-29 16:26:56 -07:00
|
|
|
make_cumulative(~[acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
|
2011-08-19 15:16:48 -07:00
|
|
|
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
|
|
|
|
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
|
|
|
|
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
|
2012-06-29 16:26:56 -07:00
|
|
|
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
|
2013-01-28 18:55:44 -08:00
|
|
|
let homosapiens: ~[AminoAcids] =
|
2012-06-29 16:26:56 -07:00
|
|
|
make_cumulative(~[acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
|
|
|
|
acid('t', 30u32)]);
|
2012-07-13 22:57:48 -07:00
|
|
|
let alu: ~str =
|
2013-05-29 20:10:16 +02:00
|
|
|
~"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
|
|
|
|
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
|
|
|
|
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
|
|
|
|
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
|
|
|
|
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
|
|
|
|
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
|
|
|
|
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
2012-07-13 22:57:48 -07:00
|
|
|
make_repeat_fasta(writer, ~"ONE", ~"Homo sapiens alu", alu, n * 2);
|
|
|
|
make_random_fasta(writer, ~"TWO", ~"IUB ambiguity codes", iub, n * 3);
|
|
|
|
make_random_fasta(writer, ~"THREE",
|
|
|
|
~"Homo sapiens frequency", homosapiens, n * 5);
|
2011-08-15 21:54:52 -07:00
|
|
|
}
|