2011-06-15 13:19:50 -05:00
|
|
|
|
|
|
|
|
2010-09-15 20:22:10 -05:00
|
|
|
/* -*- mode: rust; indent-tabs-mode: nil -*-
|
|
|
|
* Implementation of 'fasta' benchmark from
|
|
|
|
* Computer Language Benchmarks Game
|
|
|
|
* http://shootout.alioth.debian.org/
|
|
|
|
*/
|
|
|
|
use std;
|
2011-05-17 13:41:41 -05:00
|
|
|
import std::vec;
|
|
|
|
import std::str;
|
|
|
|
import std::uint;
|
|
|
|
import std::int;
|
2010-09-15 20:22:10 -05:00
|
|
|
|
2011-06-15 13:19:50 -05:00
|
|
|
fn LINE_LENGTH() -> uint { ret 60u; }
|
2010-09-15 20:22:10 -05:00
|
|
|
|
|
|
|
obj myrandom(mutable u32 last) {
|
2011-06-15 13:19:50 -05:00
|
|
|
fn next(u32 mx) -> u32 {
|
|
|
|
last = (last * 3877u32 + 29573u32) % 139968u32;
|
|
|
|
auto ans = mx * last / 139968u32;
|
|
|
|
ret ans;
|
|
|
|
}
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2011-07-26 07:49:40 -05:00
|
|
|
type aminoacids = rec(char ch, u32 prob);
|
2010-09-15 20:22:10 -05:00
|
|
|
|
|
|
|
fn make_cumulative(vec[aminoacids] aa) -> vec[aminoacids] {
|
2011-06-15 13:19:50 -05:00
|
|
|
let u32 cp = 0u32;
|
|
|
|
let vec[aminoacids] ans = [];
|
2011-07-26 07:49:40 -05:00
|
|
|
for (aminoacids a in aa) { cp += a.prob; ans += [rec(ch=a.ch, prob=cp)]; }
|
2011-06-15 13:19:50 -05:00
|
|
|
ret ans;
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn select_random(u32 r, vec[aminoacids] genelist) -> char {
|
2011-07-26 07:49:40 -05:00
|
|
|
if (r < genelist.(0).prob) { ret genelist.(0).ch; }
|
2011-06-15 13:19:50 -05:00
|
|
|
fn bisect(vec[aminoacids] v, uint lo, uint hi, u32 target) -> char {
|
|
|
|
if (hi > lo + 1u) {
|
|
|
|
let uint mid = lo + (hi - lo) / 2u;
|
2011-07-26 07:49:40 -05:00
|
|
|
if (target < v.(mid).prob) {
|
2011-06-15 13:19:50 -05:00
|
|
|
be bisect(v, lo, mid, target);
|
|
|
|
} else { be bisect(v, mid, hi, target); }
|
2011-07-26 07:49:40 -05:00
|
|
|
} else { ret v.(hi).ch; }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
2011-06-15 13:19:50 -05:00
|
|
|
ret bisect(genelist, 0u, vec::len[aminoacids](genelist) - 1u, r);
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn make_random_fasta(str id, str desc, vec[aminoacids] genelist, int n) {
|
2011-06-15 13:19:50 -05:00
|
|
|
log ">" + id + " " + desc;
|
|
|
|
auto rng = myrandom(std::rand::mk_rng().next());
|
|
|
|
let str op = "";
|
|
|
|
for each (uint i in uint::range(0u, n as uint)) {
|
|
|
|
str::push_byte(op, select_random(rng.next(100u32), genelist) as u8);
|
|
|
|
if (str::byte_len(op) >= LINE_LENGTH()) { log op; op = ""; }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
2011-06-15 13:19:50 -05:00
|
|
|
if (str::byte_len(op) > 0u) { log op; }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn make_repeat_fasta(str id, str desc, str s, int n) {
|
2011-06-15 13:19:50 -05:00
|
|
|
log ">" + id + " " + desc;
|
|
|
|
let str op = "";
|
|
|
|
let uint sl = str::byte_len(s);
|
|
|
|
for each (uint i in uint::range(0u, n as uint)) {
|
|
|
|
str::push_byte(op, s.(i % sl));
|
|
|
|
if (str::byte_len(op) >= LINE_LENGTH()) { log op; op = ""; }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
2011-06-15 13:19:50 -05:00
|
|
|
if (str::byte_len(op) > 0u) { log op; }
|
2010-09-15 20:22:10 -05:00
|
|
|
}
|
|
|
|
|
2011-07-26 07:56:24 -05:00
|
|
|
fn acid(char ch, u32 prob) -> aminoacids { ret rec(ch=ch, prob=prob); }
|
2011-07-26 07:49:40 -05:00
|
|
|
|
2010-09-15 20:22:10 -05:00
|
|
|
fn main(vec[str] args) {
|
2011-06-15 13:19:50 -05:00
|
|
|
let vec[aminoacids] iub =
|
2011-07-26 07:49:40 -05:00
|
|
|
make_cumulative([acid('a', 27u32), acid('c', 12u32), acid('g', 12u32),
|
|
|
|
acid('t', 27u32), acid('B', 2u32), acid('D', 2u32),
|
|
|
|
acid('H', 2u32), acid('K', 2u32), acid('M', 2u32),
|
|
|
|
acid('N', 2u32), acid('R', 2u32), acid('S', 2u32),
|
|
|
|
acid('V', 2u32), acid('W', 2u32), acid('Y', 2u32)]);
|
2011-06-15 13:19:50 -05:00
|
|
|
let vec[aminoacids] homosapiens =
|
2011-07-26 07:49:40 -05:00
|
|
|
make_cumulative([acid('a', 30u32), acid('c', 20u32), acid('g', 20u32),
|
|
|
|
acid('t', 30u32)]);
|
2011-06-15 13:19:50 -05:00
|
|
|
let str alu =
|
|
|
|
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
|
|
|
|
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
|
|
|
|
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
|
|
|
|
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
|
|
|
|
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
|
|
|
|
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
|
|
|
|
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
|
|
|
let int n = 512;
|
|
|
|
make_repeat_fasta("ONE", "Homo sapiens alu", alu, n * 2);
|
|
|
|
make_random_fasta("TWO", "IUB ambiguity codes", iub, n * 3);
|
|
|
|
make_random_fasta("THREE", "Homo sapiens frequency", homosapiens, n * 5);
|
|
|
|
}
|