2014-02-05 16:33:10 -06:00
|
|
|
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
|
|
|
// file at the top-level directory of this distribution and at
|
|
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
|
|
// option. This file may not be copied, modified, or distributed
|
|
|
|
// except according to those terms.
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
use std::cmp::min;
|
2014-02-28 12:55:30 -08:00
|
|
|
use std::io::{stdout, IoResult};
|
2013-05-24 19:35:29 -07:00
|
|
|
use std::os;
|
2014-03-08 18:11:52 -05:00
|
|
|
use std::slice::bytes::copy_memory;
|
2013-04-17 14:19:25 -07:00
|
|
|
|
|
|
|
static LINE_LEN: uint = 60;
|
|
|
|
static LOOKUP_SIZE: uint = 4 * 1024;
|
|
|
|
static LOOKUP_SCALE: f32 = (LOOKUP_SIZE - 1) as f32;
|
|
|
|
|
|
|
|
// Random number generator constants
|
|
|
|
static IM: u32 = 139968;
|
|
|
|
static IA: u32 = 3877;
|
|
|
|
static IC: u32 = 29573;
|
|
|
|
|
|
|
|
static ALU: &'static str = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG\
|
|
|
|
GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA\
|
|
|
|
GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA\
|
|
|
|
AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT\
|
|
|
|
CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC\
|
|
|
|
CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG\
|
|
|
|
CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
|
|
|
|
|
|
|
|
static NULL_AMINO_ACID: AminoAcid = AminoAcid { c: ' ' as u8, p: 0.0 };
|
|
|
|
|
|
|
|
static IUB: [AminoAcid, ..15] = [
|
|
|
|
AminoAcid { c: 'a' as u8, p: 0.27 },
|
|
|
|
AminoAcid { c: 'c' as u8, p: 0.12 },
|
|
|
|
AminoAcid { c: 'g' as u8, p: 0.12 },
|
|
|
|
AminoAcid { c: 't' as u8, p: 0.27 },
|
|
|
|
AminoAcid { c: 'B' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'D' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'H' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'K' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'M' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'N' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'R' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'S' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'V' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'W' as u8, p: 0.02 },
|
|
|
|
AminoAcid { c: 'Y' as u8, p: 0.02 },
|
|
|
|
];
|
|
|
|
|
|
|
|
static HOMO_SAPIENS: [AminoAcid, ..4] = [
|
|
|
|
AminoAcid { c: 'a' as u8, p: 0.3029549426680 },
|
|
|
|
AminoAcid { c: 'c' as u8, p: 0.1979883004921 },
|
|
|
|
AminoAcid { c: 'g' as u8, p: 0.1975473066391 },
|
|
|
|
AminoAcid { c: 't' as u8, p: 0.3015094502008 },
|
|
|
|
];
|
|
|
|
|
2014-01-26 03:43:42 -05:00
|
|
|
// FIXME: Use map().
|
2014-03-05 14:02:44 -08:00
|
|
|
fn sum_and_scale(a: &'static [AminoAcid]) -> Vec<AminoAcid> {
|
|
|
|
let mut result = Vec::new();
|
2013-04-17 14:19:25 -07:00
|
|
|
let mut p = 0f32;
|
2013-08-03 12:45:23 -04:00
|
|
|
for a_i in a.iter() {
|
2013-04-17 14:19:25 -07:00
|
|
|
let mut a_i = *a_i;
|
|
|
|
p += a_i.p;
|
|
|
|
a_i.p = p * LOOKUP_SCALE;
|
|
|
|
result.push(a_i);
|
|
|
|
}
|
2014-03-05 15:28:08 -08:00
|
|
|
let result_len = result.len();
|
|
|
|
result.get_mut(result_len - 1).p = LOOKUP_SCALE;
|
2013-04-17 14:19:25 -07:00
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
struct AminoAcid {
|
|
|
|
c: u8,
|
|
|
|
p: f32,
|
|
|
|
}
|
|
|
|
|
2014-08-27 21:46:52 -04:00
|
|
|
struct RepeatFasta<'a, W:'a> {
|
2013-04-17 14:19:25 -07:00
|
|
|
alu: &'static str,
|
2014-02-21 15:41:51 -08:00
|
|
|
out: &'a mut W
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
impl<'a, W: Writer> RepeatFasta<'a, W> {
|
|
|
|
fn new(alu: &'static str, w: &'a mut W) -> RepeatFasta<'a, W> {
|
|
|
|
RepeatFasta { alu: alu, out: w }
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
fn make(&mut self, n: uint) -> IoResult<()> {
|
|
|
|
let alu_len = self.alu.len();
|
2014-04-17 15:59:07 -07:00
|
|
|
let mut buf = Vec::from_elem(alu_len + LINE_LEN, 0u8);
|
2014-02-21 15:41:51 -08:00
|
|
|
let alu: &[u8] = self.alu.as_bytes();
|
|
|
|
|
2014-04-17 15:59:07 -07:00
|
|
|
copy_memory(buf.as_mut_slice(), alu);
|
2014-02-21 15:41:51 -08:00
|
|
|
let buf_len = buf.len();
|
|
|
|
copy_memory(buf.mut_slice(alu_len, buf_len),
|
|
|
|
alu.slice_to(LINE_LEN));
|
|
|
|
|
|
|
|
let mut pos = 0;
|
|
|
|
let mut bytes;
|
|
|
|
let mut n = n;
|
|
|
|
while n > 0 {
|
|
|
|
bytes = min(LINE_LEN, n);
|
|
|
|
try!(self.out.write(buf.slice(pos, pos + bytes)));
|
|
|
|
try!(self.out.write_u8('\n' as u8));
|
|
|
|
pos += bytes;
|
|
|
|
if pos > alu_len {
|
|
|
|
pos -= alu_len;
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
2014-02-21 15:41:51 -08:00
|
|
|
n -= bytes;
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
2014-02-21 15:41:51 -08:00
|
|
|
Ok(())
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
fn make_lookup(a: &[AminoAcid]) -> [AminoAcid, ..LOOKUP_SIZE] {
|
|
|
|
let mut lookup = [ NULL_AMINO_ACID, ..LOOKUP_SIZE ];
|
|
|
|
let mut j = 0;
|
|
|
|
for (i, slot) in lookup.mut_iter().enumerate() {
|
|
|
|
while a[j].p < (i as f32) {
|
|
|
|
j += 1;
|
|
|
|
}
|
|
|
|
*slot = a[j];
|
|
|
|
}
|
|
|
|
lookup
|
|
|
|
}
|
|
|
|
|
2014-08-27 21:46:52 -04:00
|
|
|
struct RandomFasta<'a, W:'a> {
|
2013-04-17 14:19:25 -07:00
|
|
|
seed: u32,
|
|
|
|
lookup: [AminoAcid, ..LOOKUP_SIZE],
|
2014-02-21 15:41:51 -08:00
|
|
|
out: &'a mut W,
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
impl<'a, W: Writer> RandomFasta<'a, W> {
|
|
|
|
fn new(w: &'a mut W, a: &[AminoAcid]) -> RandomFasta<'a, W> {
|
2013-04-17 14:19:25 -07:00
|
|
|
RandomFasta {
|
|
|
|
seed: 42,
|
2014-02-21 15:41:51 -08:00
|
|
|
out: w,
|
|
|
|
lookup: make_lookup(a),
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn rng(&mut self, max: f32) -> f32 {
|
|
|
|
self.seed = (self.seed * IA + IC) % IM;
|
|
|
|
max * (self.seed as f32) / (IM as f32)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn nextc(&mut self) -> u8 {
|
|
|
|
let r = self.rng(1.0);
|
2013-08-03 12:45:23 -04:00
|
|
|
for a in self.lookup.iter() {
|
2013-04-17 14:19:25 -07:00
|
|
|
if a.p >= r {
|
|
|
|
return a.c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
0
|
|
|
|
}
|
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
fn make(&mut self, n: uint) -> IoResult<()> {
|
|
|
|
let lines = n / LINE_LEN;
|
|
|
|
let chars_left = n % LINE_LEN;
|
|
|
|
let mut buf = [0, ..LINE_LEN + 1];
|
|
|
|
|
|
|
|
for _ in range(0, lines) {
|
|
|
|
for i in range(0u, LINE_LEN) {
|
2013-04-17 14:19:25 -07:00
|
|
|
buf[i] = self.nextc();
|
|
|
|
}
|
2014-02-21 15:41:51 -08:00
|
|
|
buf[LINE_LEN] = '\n' as u8;
|
|
|
|
try!(self.out.write(buf));
|
|
|
|
}
|
|
|
|
for i in range(0u, chars_left) {
|
|
|
|
buf[i] = self.nextc();
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
2014-02-21 15:41:51 -08:00
|
|
|
self.out.write(buf.slice_to(chars_left))
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn main() {
|
2014-02-21 15:41:51 -08:00
|
|
|
let args = os::args();
|
2014-05-05 00:29:59 -07:00
|
|
|
let args = args.as_slice();
|
2014-02-21 15:41:51 -08:00
|
|
|
let n = if args.len() > 1 {
|
2014-05-19 23:19:56 -07:00
|
|
|
from_str::<uint>(args[1].as_slice()).unwrap()
|
2014-02-21 15:41:51 -08:00
|
|
|
} else {
|
|
|
|
5
|
|
|
|
};
|
|
|
|
|
2014-02-28 12:55:30 -08:00
|
|
|
let mut out = stdout();
|
2014-02-21 15:41:51 -08:00
|
|
|
|
|
|
|
out.write_line(">ONE Homo sapiens alu").unwrap();
|
|
|
|
{
|
|
|
|
let mut repeat = RepeatFasta::new(ALU, &mut out);
|
|
|
|
repeat.make(n * 2).unwrap();
|
|
|
|
}
|
2013-04-17 14:19:25 -07:00
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
out.write_line(">TWO IUB ambiguity codes").unwrap();
|
|
|
|
let iub = sum_and_scale(IUB);
|
2014-03-05 15:28:08 -08:00
|
|
|
let mut random = RandomFasta::new(&mut out, iub.as_slice());
|
2014-02-21 15:41:51 -08:00
|
|
|
random.make(n * 3).unwrap();
|
2013-04-17 14:19:25 -07:00
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
random.out.write_line(">THREE Homo sapiens frequency").unwrap();
|
|
|
|
let homo_sapiens = sum_and_scale(HOMO_SAPIENS);
|
2014-03-05 15:28:08 -08:00
|
|
|
random.lookup = make_lookup(homo_sapiens.as_slice());
|
2014-02-21 15:41:51 -08:00
|
|
|
random.make(n * 5).unwrap();
|
2013-04-17 14:19:25 -07:00
|
|
|
|
2014-02-21 15:41:51 -08:00
|
|
|
random.out.write_str("\n").unwrap();
|
2013-04-17 14:19:25 -07:00
|
|
|
}
|