├── .copier-answers.yml ├── .github └── workflows │ ├── msrv.yml │ ├── format.yml │ ├── doc.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── Readme.md ├── LICENSE ├── Cargo.toml ├── src ├── lib.rs ├── error.rs ├── format.rs ├── constants.rs ├── format │ ├── cigar.rs │ ├── quality.rs │ ├── sequence.rs │ ├── fasta.rs │ ├── gff.rs │ ├── fastq.rs │ ├── vcf │ │ ├── header.rs │ │ └── record.rs │ └── vcf.rs └── values.rs └── examples └── default.rs /.copier-answers.yml: -------------------------------------------------------------------------------- 1 | _commit: 1.7.1 2 | _src_path: gh:natir/copier-rust 3 | author_email: pierre@marijon.fr 4 | author_name: Pierre Marijon 5 | ci: github 6 | cli: false 7 | copyright_date: '2024' 8 | copyright_holder: Pierre Marijon 9 | copyright_holder_email: pierre@marijon.fr 10 | copyright_license: MIT 11 | forge_host: github.com 12 | forge_namespace: natir 13 | forge_repo_name: biotest 14 | msrv: '1.75' 15 | proc_macro: true 16 | project_description: Generate random test data for bioinformatics 17 | project_name: biotest 18 | -------------------------------------------------------------------------------- /.github/workflows/msrv.yml: -------------------------------------------------------------------------------- 1 | name: MSRV 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: ["**"] 8 | 9 | jobs: 10 | minimum_rust_version: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v1 14 | 15 | - uses: actions-rs/toolchain@v1 16 | with: 17 | toolchain: 1.74 18 | override: true 19 | 20 | - name: check if Readme matches MSRV defined here 21 | run: grep '1.74' Readme.md 22 | 23 | - name: Run tests 24 | uses: actions-rs/cargo@v1 25 | with: 26 | command: test 27 | args: --all-features --no-fail-fast 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Rust ### 2 | # Generated by Cargo 3 | # will have compiled files and executables 4 | debug/ 5 | target/ 6 | 7 | {% if not cli -%} 8 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 9 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 10 | Cargo.lock 11 | {% endif %} 12 | # These are backup files generated by rustfmt 13 | **/*.rs.bk 14 | 15 | # MSVC Windows builds of rustc generate these, which store debugging information 16 | *.pdb 17 | 18 | # Ignore tarpauline report 19 | tarpaulin-report* 20 | cobertura.xml 21 | lcov.info 22 | 23 | # Ignore flamegraphe output 24 | flamegraph.svg 25 | perf.data* -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [Unreleased] yyyy-mm-dd 4 | 5 | ### Added 6 | 7 | - Add support of Cigar format 8 | - Add support of Gff format 9 | 10 | ### Changed 11 | 12 | - Replace derive_builder by typed_builder 13 | 14 | ### Deprecated 15 | 16 | ### Removed 17 | 18 | - All error related to Builder 19 | 20 | ### Fixed 21 | 22 | ### Security 23 | 24 | ## biotest 0.2 Sandslash 2024-06-04 25 | 26 | ### Added 27 | 28 | - Add a single Sequence generator 29 | - Add a single Quality generator 30 | - Sequence, Quality, Fasta and Fastq generator case use weighted probability distribution 31 | - Add method to let user choose seed of RNG 32 | 33 | ### Changed 34 | 35 | ### Deprecated 36 | 37 | ### Removed 38 | 39 | ### Fixed 40 | 41 | ### Security 42 | -------------------------------------------------------------------------------- /.github/workflows/format.yml: -------------------------------------------------------------------------------- 1 | name: Lints 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: ["**"] 8 | 9 | jobs: 10 | lints: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout sources 14 | uses: actions/checkout@v1 15 | 16 | - name: Install stable toolchain 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | profile: minimal 20 | toolchain: stable 21 | override: true 22 | components: rustfmt, clippy 23 | 24 | - name: Run cargo fmt 25 | uses: actions-rs/cargo@v1 26 | with: 27 | command: fmt 28 | args: --all -- --check 29 | 30 | - name: Run cargo clippy 31 | uses: actions-rs/cargo@v1 32 | with: 33 | command: clippy 34 | args: --all-features -- -D warnings 35 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # biotest 🧬 💻 2 | 3 | [![License](https://img.shields.io/badge/license-MIT-green)](https://github.com/natir/biotest/blob/master/LICENSE) 4 | ![Test](https://github.com/natir/biotest/workflows/Test/badge.svg) 5 | ![Lints](https://github.com/natir/biotest/workflows/Lints/badge.svg) 6 | ![MSRV](https://github.com/natir/biotest/workflows/MSRV/badge.svg) 7 | [![codecov](https://codecov.io/gh/natir/biotest/graph/badge.svg?token=7KY1Z4RHDB)](https://codecov.io/gh/natir/biotest) 8 | [![Documentation](https://github.com/natir/biotest/workflows/Documentation/badge.svg)](https://natir.github.io/biotest/biotest) 9 | 10 | Generate random test data for bioinformatics 11 | 12 | ## Usage 13 | 14 | In your Cargo.toml add 15 | ```toml 16 | biotest = { version = "0.2", features = ["fasta", "fastq", "vcf", "sequence", "quality"] } 17 | ``` 18 | 19 | ## Minimum supported Rust version 20 | 21 | Currently the minimum supported Rust version is 1.74. 22 | -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | release: 10 | name: Github Pages 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout Repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Install Rust toolchain 18 | uses: actions-rs/toolchain@v1 19 | with: 20 | toolchain: stable 21 | profile: minimal 22 | override: true 23 | components: rustfmt, rust-src 24 | 25 | 26 | - name: Setup mdBook 27 | uses: peaceiris/actions-mdbook@v1 28 | with: 29 | mdbook-version: 'latest' 30 | 31 | - name: Build Documentation 32 | uses: actions-rs/cargo@v1 33 | with: 34 | command: doc 35 | args: --all --no-deps --all-features 36 | 37 | - name: Deploy Documentation 38 | uses: peaceiris/actions-gh-pages@v3 39 | with: 40 | github_token: ${{ secrets.GITHUB_TOKEN }} 41 | publish_dir: ./target/doc 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT 2 | 3 | Copyright (c) 2024 Pierre Marijon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "biotest" 3 | version = "0.2.0" 4 | authors = ["Pierre Marijon "] 5 | edition = "2021" 6 | description = "Generate random test data for bioinformatics" 7 | rust-version = "1.74" 8 | 9 | homepage = "https://github.com/natir/biotest" 10 | repository = "https://github.com/natir/biotest" 11 | documentation = "https://natir.github.io/biotest/biotest" 12 | 13 | readme = "Readme.md" 14 | license-file = "LICENSE" 15 | 16 | 17 | [features] 18 | cigar = [] 19 | fasta = [] 20 | fastq = [] 21 | gff = [] 22 | quality = [] 23 | sequence = [] 24 | vcf = [] 25 | 26 | 27 | [dependencies] 28 | rand = { version = "0.8" } 29 | typed-builder = { version = "0.18" } 30 | 31 | # Error management 32 | thiserror = { version = "1" } 33 | 34 | # Logging and error management 35 | log = { version = "0.4" } 36 | 37 | 38 | [dev-dependencies] 39 | # Test 40 | tempfile = { version = "3" } 41 | assert_matches = { version = "1" } 42 | 43 | # Examples 44 | clap = { version = "4", features = ["derive"] } 45 | stderrlog = { version = "0.6" } 46 | 47 | 48 | [[example]] 49 | name = "default" 50 | required-features = ["fasta", "fastq", "vcf", "sequence", "quality"] 51 | 52 | 53 | [profile.release] 54 | lto = 'thin' 55 | opt-level = 3 56 | overflow-checks = false 57 | panic = 'abort' 58 | incremental = false 59 | 60 | 61 | [profile.profiling] 62 | inherits = "release" 63 | debug = true 64 | 65 | 66 | [package.metadata.docs.rs] 67 | all-features = true -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Generate random test data for bioinformatics 2 | //! 3 | //! There's a feature for every file format that can be generated: 4 | //! - [`fasta`](module@format::fasta) 5 | //! - [`fastq`](module@format::fastq) 6 | //! - [`vcf`](module@format::vcf) 7 | //! - [`sequence`](module@format::sequence) 8 | 9 | #![warn(missing_docs)] 10 | 11 | /* std use */ 12 | 13 | /* crate use */ 14 | use rand::SeedableRng; 15 | 16 | /* project use */ 17 | 18 | /* mod declaration */ 19 | pub mod constants; 20 | pub mod values; 21 | #[macro_use] 22 | pub mod error; 23 | pub mod format; 24 | 25 | /* reexport */ 26 | pub use format::Format; 27 | 28 | #[cfg(feature = "fasta")] 29 | pub use format::fasta::Fasta; 30 | 31 | #[cfg(feature = "fastq")] 32 | pub use format::fastq::Fastq; 33 | 34 | #[cfg(feature = "vcf")] 35 | pub use format::vcf::Vcf; 36 | 37 | #[cfg(feature = "sequence")] 38 | pub use format::sequence::Sequence; 39 | 40 | #[cfg(feature = "quality")] 41 | pub use format::quality::Quality; 42 | 43 | #[cfg(feature = "cigar")] 44 | pub use format::cigar::Cigar; 45 | 46 | /// Create a random generator with [constants::SEED] 47 | pub fn rand() -> rand::rngs::StdRng { 48 | rand::rngs::StdRng::from_seed(constants::SEED) 49 | } 50 | 51 | /// Create a random generator with a user seed 52 | pub fn seeded_rand(seed: u64) -> rand::rngs::StdRng { 53 | rand::rngs::StdRng::seed_from_u64(seed) 54 | } 55 | 56 | #[cfg(test)] 57 | mod tests { 58 | /* crate use */ 59 | use rand::Rng; 60 | 61 | /* local use */ 62 | use super::*; 63 | 64 | #[test] 65 | fn check_rand() { 66 | let mut rng = rand(); 67 | 68 | assert_eq!(rng.gen::(), 27); 69 | } 70 | 71 | #[test] 72 | fn check_seeded_rand() { 73 | let mut rng = seeded_rand(42); 74 | 75 | assert_eq!(rng.gen::(), 162); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! Error struct of project biotest 2 | 3 | /* crate use */ 4 | use thiserror; 5 | 6 | /// Enum to manage error 7 | #[derive(std::fmt::Debug, thiserror::Error)] 8 | pub enum Error { 9 | /// WeightedDistribution is larger than value 10 | #[error("Weight array is larger than value array")] 11 | WeightArrayLargerValueArray, 12 | 13 | /// unreachable 14 | #[error("Unreachable error from file {file} in line {line}")] 15 | Unreachable { 16 | /// line number 17 | line: u32, 18 | /// file name 19 | file: &'static str, 20 | }, 21 | 22 | /// std::io::Error error 23 | #[error(transparent)] 24 | StdIo(#[from] std::io::Error), 25 | 26 | /// rand::distributions::weighted::WeightedError 27 | #[error(transparent)] 28 | RandWeightedError(#[from] rand::distributions::weighted::WeightedError), 29 | } 30 | 31 | macro_rules! create_unreachable { 32 | () => { 33 | crate::error::Error::Unreachable { 34 | line: std::line!(), 35 | file: std::file!(), 36 | } 37 | }; 38 | } 39 | 40 | pub(crate) use create_unreachable; 41 | 42 | /// Alias of result 43 | pub type Result = core::result::Result; 44 | 45 | #[cfg(test)] 46 | mod tests { 47 | /* local use */ 48 | use super::*; 49 | 50 | #[test] 51 | fn unreachable_macro() { 52 | assert_matches::assert_matches!( 53 | create_unreachable!(), 54 | crate::error::Error::Unreachable { 55 | line: 53, 56 | #[cfg(target_family = "windows")] 57 | file: "src\\error.rs", 58 | #[cfg(target_family = "unix")] 59 | file: "src/error.rs", 60 | #[cfg(target_family = "wasm")] 61 | file: "src/error.rs", 62 | } 63 | ); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/format.rs: -------------------------------------------------------------------------------- 1 | //! Format data generation 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | 7 | /* module declaration */ 8 | #[cfg(feature = "cigar")] 9 | pub mod cigar; 10 | 11 | #[cfg(feature = "fasta")] 12 | pub mod fasta; 13 | 14 | #[cfg(feature = "fastq")] 15 | pub mod fastq; 16 | 17 | #[cfg(feature = "gff")] 18 | pub mod gff; 19 | 20 | #[cfg(feature = "sequence")] 21 | pub mod sequence; 22 | 23 | #[cfg(feature = "quality")] 24 | pub mod quality; 25 | 26 | #[cfg(feature = "vcf")] 27 | pub mod vcf; 28 | 29 | /* projet use */ 30 | use crate::error; 31 | 32 | /// Trait of Format 33 | pub trait Format { 34 | /// Write header of format in output 35 | fn header( 36 | &self, 37 | output: &mut dyn std::io::Write, 38 | rng: &mut rand::rngs::StdRng, 39 | ) -> error::Result<()>; 40 | 41 | /// Write a record in output 42 | fn record( 43 | &self, 44 | output: &mut dyn std::io::Write, 45 | rng: &mut rand::rngs::StdRng, 46 | ) -> error::Result<()>; 47 | 48 | /// Write multiple record in output 49 | fn records( 50 | &self, 51 | output: &mut dyn std::io::Write, 52 | rng: &mut rand::rngs::StdRng, 53 | number: usize, 54 | ) -> error::Result<()> { 55 | for _ in 0..number { 56 | self.record(output, rng)?; 57 | output.write_all(&[b'\n'])?; 58 | } 59 | Ok(()) 60 | } 61 | 62 | /// Create a file at path with header and multiple records 63 | fn create

(&self, path: P, rng: &mut rand::rngs::StdRng, number: usize) -> error::Result<()> 64 | where 65 | P: core::convert::AsRef, 66 | { 67 | let mut output = std::io::BufWriter::new(std::fs::File::create(path)?); 68 | 69 | self.header(&mut output, rng)?; 70 | self.records(&mut output, rng, number)?; 71 | 72 | Ok(()) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: ["**"] 8 | 9 | jobs: 10 | check: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout sources 14 | uses: actions/checkout@v1 15 | 16 | - name: Install stable toolchain 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | profile: minimal 20 | toolchain: stable 21 | override: true 22 | 23 | - name: Run cargo check 24 | uses: actions-rs/cargo@v1 25 | with: 26 | command: check 27 | args: --all-features 28 | 29 | test: 30 | runs-on: ${{ matrix.os }} 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | build: [beta, stable, windows, macos] 35 | include: 36 | - build: macos 37 | os: macos-latest 38 | rust: stable 39 | - build: windows 40 | os: windows-latest 41 | rust: stable 42 | - build: beta 43 | os: ubuntu-latest 44 | rust: beta 45 | - build: stable 46 | os: ubuntu-latest 47 | rust: stable 48 | steps: 49 | - uses: actions/checkout@v1 50 | 51 | - uses: actions-rs/toolchain@v1 52 | with: 53 | toolchain: ${{ matrix.rust }} 54 | override: true 55 | 56 | - name: Run tests 57 | uses: actions-rs/cargo@v1 58 | with: 59 | command: test 60 | args: --no-fail-fast --all-features 61 | 62 | coverage: 63 | runs-on: ubuntu-latest 64 | steps: 65 | - uses: actions/checkout@v1 66 | 67 | - uses: actions-rs/toolchain@v1 68 | with: 69 | toolchain: nightly 70 | override: true 71 | 72 | - name: Install cargo-tarpaulin 73 | uses: actions-rs/install@v0.1 74 | with: 75 | crate: cargo-tarpaulin 76 | version: latest 77 | use-tool-cache: true 78 | 79 | - name: Run cargo-tarpaulin 80 | run: cargo tarpaulin --all-features --engine llvm --timeout 600 --out Xml -- --test-threads 1 81 | 82 | - name: Upload coverage to codecov 83 | uses: codecov/codecov-action@v4 84 | with: 85 | token: ${{ secrets.CODECOV_TOKEN }} 86 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | //! Declarations of some constants value 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | 7 | /* projet use */ 8 | 9 | pub(crate) const fn gen_array() -> [u8; N] { 10 | let mut array = [0; N]; 11 | 12 | let mut i = 0; 13 | while i < N { 14 | array[i] = (B + i) as u8; 15 | i += 1; 16 | } 17 | 18 | array 19 | } 20 | 21 | /// Fixed random seed 22 | pub static SEED: [u8; 32] = [42; 32]; 23 | 24 | pub(crate) const ASCII_VISIBLE: [u8; 94] = gen_array::<94, 33>(); 25 | 26 | /// Nucleotides with any case 27 | pub(crate) const DNA_NUCLEOTIDES: [u8; 8] = *b"ACTGactg"; 28 | pub(crate) const RNA_NUCLEOTIDES: [u8; 8] = *b"ACUGacug"; 29 | 30 | /// Some different possible chromosomes name 31 | pub static CHROMOSOMES: [&[u8]; 10] = [ 32 | b"chr1", 33 | b"23", 34 | b"93", 35 | b"chrMT", 36 | b"X", 37 | b"NC_000015.10", 38 | b"ENA|LT795502|LT795502.1", 39 | b"NC_016845.1", 40 | b"YAR028W", 41 | b"1", 42 | ]; 43 | 44 | /// All vcf info type 45 | pub static VCF_INFO_TYPE: [&[u8]; 5] = [b"Integer", b"Float", b"Flag", b"Character", b"String"]; 46 | 47 | /// All vcf info number 48 | pub static VCF_INFO_NUMBER: [&[u8]; 6] = [b"1", b"2", b"A", b"R", b"G", b"."]; 49 | 50 | /// All vcf info type 51 | pub static VCF_FORMAT_TYPE: [&[u8]; 4] = [b"Integer", b"Float", b"Character", b"String"]; 52 | 53 | /// All vcf info number 54 | pub static VCF_FORMAT_NUMBER: [&[u8]; 6] = [b"1", b"2", b"A", b"R", b"G", b"."]; 55 | 56 | /// Length of vcf string 57 | pub static VCF_STRING_LENGTH: usize = 5; 58 | 59 | /// Strand 60 | pub static STRAND: [&[u8]; 3] = [b".", b"+", b"-"]; 61 | 62 | /// Gff feature 63 | pub static GFF_FEATURE: [&[u8]; 4] = [b"gene", b"transcript", b"repeat", b"exon"]; 64 | 65 | /// Gff feature 66 | pub static GFF_PHASE: [&[u8]; 4] = [b".", b"0", b"1", b"2"]; 67 | 68 | /// CIGAR SAM 69 | pub static CIGAR_SAM: [u8; 9] = *b"MIDNSHP=X"; 70 | 71 | /// CIGAR GFF 72 | pub static CIGAR_GFF: [u8; 5] = *b"MIDFR"; 73 | 74 | /// biotest version 75 | pub const BIOTEST_VERSION: &[u8] = env!("CARGO_PKG_VERSION").as_bytes(); 76 | 77 | #[cfg(test)] 78 | mod tests { 79 | /* project use */ 80 | use super::*; 81 | 82 | #[test] 83 | fn ascii_visible() { 84 | assert_eq!(ASCII_VISIBLE, gen_array::<94, 33>()) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /examples/default.rs: -------------------------------------------------------------------------------- 1 | //! Generate file with default value 2 | 3 | #![warn(missing_docs)] 4 | 5 | /* std use */ 6 | 7 | /* crate use */ 8 | use clap::Parser as _; 9 | use rand::SeedableRng; 10 | 11 | /* project use */ 12 | use biotest::error; 13 | 14 | use biotest::Format as _; 15 | 16 | /// Select type of file too generate 17 | #[derive(Debug, Clone, clap::ValueEnum)] 18 | pub enum Type { 19 | /// Generate a fasta file 20 | Fasta, 21 | 22 | /// Generate a fastq file 23 | Fastq, 24 | 25 | /// Generate a vcf file 26 | Vcf, 27 | 28 | /// Generate a sequence file 29 | Sequence, 30 | 31 | /// Generate a sequence file 32 | Quality, 33 | } 34 | 35 | /// Example: {{project_description}} 36 | #[derive(clap::Parser, std::fmt::Debug)] 37 | #[clap( 38 | name = "biotest_default", 39 | version = "0.1", 40 | author = "Pierre Marijon " 41 | )] 42 | pub struct Command { 43 | /// Output path 44 | #[clap(short = 'o', long = "output")] 45 | pub output_path: std::path::PathBuf, 46 | 47 | /// Number of record 48 | #[clap(short = 'n', long = "number-record")] 49 | pub number_record: u64, 50 | 51 | /// Type of output 52 | #[clap(short = 't', long = "type")] 53 | pub out_type: Type, 54 | 55 | /// Silence all output 56 | #[clap(short = 'q', long = "quiet")] 57 | pub quiet: bool, 58 | 59 | /// Verbose mode (-v, -vv, -vvv, etc) 60 | #[clap(short = 'v', long = "verbosity", action = clap::ArgAction::Count)] 61 | pub verbosity: u8, 62 | 63 | /// Timestamp (sec, ms, ns, none) 64 | #[clap(short = 'T', long = "timestamp")] 65 | pub ts: Option, 66 | } 67 | 68 | fn main() -> error::Result<()> { 69 | // parse cli 70 | let params = Command::parse(); 71 | 72 | // Setup logger 73 | stderrlog::new() 74 | .quiet(params.quiet) 75 | .verbosity(params.verbosity as usize) 76 | .timestamp(params.ts.unwrap_or(stderrlog::Timestamp::Off)) 77 | .init() 78 | .unwrap(); 79 | 80 | let mut rng = rand::rngs::StdRng::from_entropy(); 81 | 82 | match params.out_type { 83 | Type::Fasta => biotest::Fasta::default().create( 84 | params.output_path, 85 | &mut rng, 86 | params.number_record as usize, 87 | ), 88 | Type::Fastq => biotest::Fastq::default().create( 89 | params.output_path, 90 | &mut rng, 91 | params.number_record as usize, 92 | ), 93 | Type::Vcf => biotest::Vcf::default().create( 94 | params.output_path, 95 | &mut rng, 96 | params.number_record as usize, 97 | ), 98 | Type::Sequence => biotest::Sequence::default().create( 99 | params.output_path, 100 | &mut rng, 101 | params.number_record as usize, 102 | ), 103 | Type::Quality => biotest::Quality::default().create( 104 | params.output_path, 105 | &mut rng, 106 | params.number_record as usize, 107 | ), 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/format/cigar.rs: -------------------------------------------------------------------------------- 1 | //! CIGAR 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | use rand::Rng as _; 7 | 8 | /* project use */ 9 | use crate::error; 10 | use crate::format; 11 | use crate::values; 12 | 13 | use crate::values::Generate as _; 14 | 15 | /// Struct to generate cigar record 16 | #[derive(typed_builder::TypedBuilder)] 17 | pub struct Cigar { 18 | /// Cigar length 19 | #[builder(default = 20)] 20 | length: u64, 21 | 22 | /// Cigar Alphabet 23 | #[builder(default = values::Cigar::Sam)] 24 | alphabet: values::Cigar, 25 | 26 | /// Cigar weights 27 | #[builder(default = vec![1; 0])] 28 | alphabet_weights: Vec, 29 | } 30 | 31 | impl core::default::Default for Cigar { 32 | fn default() -> Self { 33 | Cigar::builder().build() 34 | } 35 | } 36 | 37 | impl format::Format for Cigar { 38 | fn header( 39 | &self, 40 | _output: &mut dyn std::io::Write, 41 | _rng: &mut rand::rngs::StdRng, 42 | ) -> error::Result<()> { 43 | Ok(()) 44 | } 45 | 46 | fn record( 47 | &self, 48 | output: &mut dyn std::io::Write, 49 | rng: &mut rand::rngs::StdRng, 50 | ) -> error::Result<()> { 51 | let mut len = 0; 52 | while len < self.length { 53 | let size = if self.length - len > 1 { 54 | rng.gen_range::>(1..(self.length - len) as usize) 55 | } else { 56 | 1 57 | }; 58 | 59 | let letter = if self.alphabet_weights.is_empty() { 60 | self.alphabet.generate(rng, 1) 61 | } else { 62 | self.alphabet.weighted(rng, 1, &self.alphabet_weights) 63 | }; 64 | 65 | output.write_all(size.to_string().as_bytes())?; 66 | output.write_all(&letter?)?; 67 | 68 | len += size as u64; 69 | } 70 | 71 | Ok(()) 72 | } 73 | } 74 | 75 | #[cfg(test)] 76 | mod tests { 77 | /* std use */ 78 | use std::io::Read as _; 79 | 80 | /* project use */ 81 | use super::format::Format as _; 82 | use super::*; 83 | 84 | const TRUTH: &[u8] = b"12F20I1M13D1D1I1D1R 85 | 23R17I1F7M1D1I 86 | 33I15M1M1M 87 | 44F1D2I2R1R 88 | 16F29D2D1I1F1F 89 | "; 90 | 91 | const DEFAULT: &[u8] = b"5P8D1S1N1S1N1S1D1D"; 92 | 93 | const WEIGHTED_TRUTH: &[u8] = b"12R20D1D13R1I1D1M1F"; 94 | 95 | #[test] 96 | fn default() -> error::Result<()> { 97 | let mut output = Vec::new(); 98 | let mut rng = crate::rand(); 99 | 100 | let generator = Cigar::default(); 101 | 102 | generator.record(&mut output, &mut rng)?; 103 | 104 | assert_eq!(output, DEFAULT); 105 | 106 | Ok(()) 107 | } 108 | 109 | #[test] 110 | fn record() -> error::Result<()> { 111 | let mut output = Vec::new(); 112 | let mut rng = crate::rand(); 113 | 114 | let generator = Cigar::builder() 115 | .length(50) 116 | .alphabet(values::Cigar::Gff) 117 | .build(); 118 | 119 | generator.record(&mut output, &mut rng)?; 120 | 121 | assert_eq!(output, TRUTH.to_vec()[..19]); 122 | 123 | Ok(()) 124 | } 125 | 126 | #[test] 127 | fn weigthed_record() -> error::Result<()> { 128 | let mut output = Vec::new(); 129 | let mut rng = crate::rand(); 130 | 131 | let generator = Cigar::builder() 132 | .length(50) 133 | .alphabet(values::Cigar::Gff) 134 | .alphabet_weights(vec![1, 2, 3, 4, 5]) 135 | .build(); 136 | 137 | generator.record(&mut output, &mut rng)?; 138 | 139 | assert_eq!(output, WEIGHTED_TRUTH.to_vec()); 140 | 141 | Ok(()) 142 | } 143 | 144 | #[test] 145 | fn records() -> error::Result<()> { 146 | let mut output = Vec::new(); 147 | let mut rng = crate::rand(); 148 | 149 | let generator = Cigar::builder() 150 | .length(50) 151 | .alphabet(values::Cigar::Gff) 152 | .build(); 153 | 154 | generator.records(&mut output, &mut rng, 5)?; 155 | 156 | assert_eq!(output, TRUTH.to_vec()); 157 | 158 | Ok(()) 159 | } 160 | 161 | #[test] 162 | fn create() -> error::Result<()> { 163 | let mut rng = crate::rand(); 164 | 165 | let temp_dir = tempfile::tempdir()?; 166 | let temp_path = temp_dir.path(); 167 | 168 | let temp_file = temp_path.join("tmp.fasta"); 169 | 170 | let generator = Cigar::builder() 171 | .length(50) 172 | .alphabet(values::Cigar::Gff) 173 | .build(); 174 | 175 | generator.create(&temp_file, &mut rng, 5)?; 176 | 177 | let mut data = Vec::new(); 178 | let mut input = std::fs::File::open(&temp_file)?; 179 | input.read_to_end(&mut data)?; 180 | 181 | assert_eq!(data, TRUTH.to_vec()); 182 | 183 | Ok(()) 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/format/quality.rs: -------------------------------------------------------------------------------- 1 | //! quality generation 2 | //! 3 | //! Usage: 4 | //! ```no_run 5 | //! use biotest::Format as _; // import Format trait is required 6 | //! 7 | //! # fn main() -> Result<(), biotest::error::Error> { 8 | //! let mut rng = biotest::rand(); // Create a random generator with a fixed seed 9 | //! 10 | //! let mut output = Vec::new(); 11 | //! let generator = biotest::Quality::default(); 12 | //! 13 | //! generator.record(&mut output, &mut rng)?; // Write one sequence record in output 14 | //! generator.records(&mut output, &mut rng, 5)?; // Write five sequence records in output 15 | //! 16 | //! generator.create("test.sequence", &mut rng, 5)?; // Write five sequence record in "test.sequence" 17 | //! # Ok(()) 18 | //! # } 19 | //! ``` 20 | //! 21 | //! File generate follow this template 22 | //! ```no_compile 23 | //! {sequence} 24 | //! {sequence} 25 | //! . 26 | //! . 27 | //! ``` 28 | //! 29 | //! Many think could be configurable with builder patern: 30 | //! ```no_run 31 | //! use rand; 32 | //! use rand::SeedableRng; 33 | //! use biotest::Format; 34 | //! 35 | //! # fn main() -> Result<(), biotest::error::Error> { 36 | //! let mut rng = rand::rngs::StdRng::from_entropy(); // Create a random generator with a 'random' seed 37 | //! 38 | //! let generator = biotest::Quality::builder() 39 | //! .quality_len(50) // Set quality length 40 | //! .build(); 41 | //! 42 | //! generator.create("test.sequence", &mut rng, 5)?; // Write five sequence record in "test.sequence" 43 | //! # Ok(()) 44 | //! # } 45 | //! ``` 46 | 47 | /* std use */ 48 | 49 | /* crates use */ 50 | 51 | /* projet use */ 52 | use crate::error; 53 | use crate::format; 54 | use crate::values; 55 | 56 | use crate::values::Generate as _; 57 | 58 | /// Struct to generate random DNA sequence 59 | #[derive(typed_builder::TypedBuilder)] 60 | pub struct Quality { 61 | /// Alphabet use for sequence generation 62 | #[builder(default = values::Quality::Illumina)] 63 | quality: values::Quality, 64 | 65 | /// quality length 66 | #[builder(default = 150)] 67 | quality_len: usize, 68 | 69 | /// quality weights 70 | #[builder(default = vec![1; 0])] 71 | quality_weights: Vec, 72 | } 73 | 74 | impl core::default::Default for Quality { 75 | fn default() -> Self { 76 | Quality::builder().build() 77 | } 78 | } 79 | 80 | impl format::Format for Quality { 81 | fn header( 82 | &self, 83 | _output: &mut dyn std::io::Write, 84 | _rng: &mut rand::rngs::StdRng, 85 | ) -> error::Result<()> { 86 | Ok(()) 87 | } 88 | 89 | fn record( 90 | &self, 91 | output: &mut dyn std::io::Write, 92 | rng: &mut rand::rngs::StdRng, 93 | ) -> error::Result<()> { 94 | // quality 95 | if self.quality_weights.is_empty() { 96 | output.write_all(&self.quality.generate(rng, self.quality_len)?)?; 97 | } else { 98 | output.write_all(&self.quality.weighted( 99 | rng, 100 | self.quality_len, 101 | &self.quality_weights, 102 | )?)?; 103 | } 104 | 105 | Ok(()) 106 | } 107 | } 108 | 109 | #[cfg(test)] 110 | mod tests { 111 | /* std use */ 112 | use std::io::Read as _; 113 | 114 | /* project use */ 115 | use super::format::Format as _; 116 | use super::*; 117 | 118 | const TRUTH: &[u8] = b"=DI3E\"?#?3(\'5FAI2C+,\"E*=)#=G4A%H53A1).!83@9-660D.5-E.F/$*::>A2A>)\'=0B<$E/&411+! 120 | )AF,E;7.8.3GF2%\"%:4%#<399BE%$8900(08#,.;&2*@3,\"\"<) 121 | 79HH127*A+:%7,(<2H3F*!)H#BH<3?=@/-%%&3#.EFG@@D\'*98<-:,1+F?>\"?(B4-?C 123 | "; 124 | 125 | const DEFAULT: &[u8] = b"=DI3E\"?#?3(\'5FAI2C+,\"E*=)#=G4A%H53A1).!83@9-660D.5-E.F/$*::>A2A>)\'=0B<$E/&411+!)AF,E;7.8.3GF2%\"%:4%#<399BE%$8900(08#,.;&2*@3,\"\"<)"; 126 | 127 | const WEIGHTED_TRUTH: &[u8] = b"&$&&%$%&&!$%&!$&##&&%$#$&#%&&&$$$&!$$$!%&%%&#%#$&&"; 128 | 129 | #[test] 130 | fn default() -> error::Result<()> { 131 | let mut output = Vec::new(); 132 | let mut rng = crate::rand(); 133 | 134 | let generator = Quality::default(); 135 | 136 | generator.record(&mut output, &mut rng)?; 137 | 138 | assert_eq!(output, DEFAULT); 139 | 140 | Ok(()) 141 | } 142 | 143 | #[test] 144 | fn record() -> error::Result<()> { 145 | let mut output = Vec::new(); 146 | let mut rng = crate::rand(); 147 | 148 | let generator = Quality::builder().quality_len(50).build(); 149 | 150 | generator.record(&mut output, &mut rng)?; 151 | 152 | assert_eq!(output, TRUTH.to_vec()[..50]); 153 | 154 | Ok(()) 155 | } 156 | 157 | #[test] 158 | fn weigthed_record() -> error::Result<()> { 159 | let mut output = Vec::new(); 160 | let mut rng = crate::rand(); 161 | 162 | let generator = Quality::builder() 163 | .quality_len(50) 164 | .quality_weights(vec![1, 0, 3, 4, 5, 6]) 165 | .build(); 166 | 167 | generator.record(&mut output, &mut rng)?; 168 | 169 | assert_eq!(output, WEIGHTED_TRUTH.to_vec()); 170 | 171 | Ok(()) 172 | } 173 | 174 | #[test] 175 | fn records() -> error::Result<()> { 176 | let mut output = Vec::new(); 177 | let mut rng = crate::rand(); 178 | 179 | let generator = Quality::builder().quality_len(50).build(); 180 | 181 | generator.records(&mut output, &mut rng, 5)?; 182 | 183 | assert_eq!(output, TRUTH.to_vec()); 184 | 185 | Ok(()) 186 | } 187 | 188 | #[test] 189 | fn create() -> error::Result<()> { 190 | let mut rng = crate::rand(); 191 | 192 | let temp_dir = tempfile::tempdir()?; 193 | let temp_path = temp_dir.path(); 194 | 195 | let temp_file = temp_path.join("tmp.quality"); 196 | 197 | let generator = Quality::builder().quality_len(50).build(); 198 | 199 | generator.create(&temp_file, &mut rng, 5)?; 200 | 201 | let mut data = Vec::new(); 202 | let mut input = std::fs::File::open(&temp_file)?; 203 | input.read_to_end(&mut data)?; 204 | 205 | assert_eq!(data, TRUTH.to_vec()); 206 | 207 | Ok(()) 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/format/sequence.rs: -------------------------------------------------------------------------------- 1 | //! Sequence generation 2 | //! 3 | //! Usage: 4 | //! ```no_run 5 | //! use biotest::Format as _; // import Format trait is required 6 | //! 7 | //! # fn main() -> Result<(), biotest::error::Error> { 8 | //! let mut rng = biotest::rand(); // Create a random generator with a fixed seed 9 | //! 10 | //! let mut output = Vec::new(); 11 | //! let generator = biotest::Sequence::default(); 12 | //! 13 | //! generator.record(&mut output, &mut rng)?; // Write one sequence record in output 14 | //! generator.records(&mut output, &mut rng, 5)?; // Write five sequence records in output 15 | //! 16 | //! generator.create("test.sequence", &mut rng, 5)?; // Write five sequence record in "test.sequence" 17 | //! # Ok(()) 18 | //! # } 19 | //! ``` 20 | //! 21 | //! File generate follow this template 22 | //! ```no_compile 23 | //! {sequence} 24 | //! {sequence} 25 | //! . 26 | //! . 27 | //! ``` 28 | //! 29 | //! Many think could be configurable with builder patern: 30 | //! ```no_run 31 | //! use rand; 32 | //! use rand::SeedableRng; 33 | //! use biotest::Format; 34 | //! 35 | //! # fn main() -> Result<(), biotest::error::Error> { 36 | //! let mut rng = rand::rngs::StdRng::from_entropy(); // Create a random generator with a 'random' seed 37 | //! 38 | //! let generator = biotest::Sequence::builder() 39 | //! .sequence_len(50) // Set sequence length 40 | //! .build(); 41 | //! 42 | //! generator.create("test.sequence", &mut rng, 5)?; // Write five sequence record in "test.sequence" 43 | //! # Ok(()) 44 | //! # } 45 | //! ``` 46 | 47 | /* std use */ 48 | 49 | /* crates use */ 50 | 51 | /* projet use */ 52 | use crate::error; 53 | use crate::format; 54 | use crate::values; 55 | 56 | use crate::values::Generate as _; 57 | 58 | /// Struct to generate random DNA sequence 59 | #[derive(typed_builder::TypedBuilder)] 60 | pub struct Sequence { 61 | /// Alphabet use for sequence generation 62 | #[builder(default = values::Nucleotides::Dna)] 63 | sequence: values::Nucleotides, 64 | 65 | /// Sequence length 66 | #[builder(default = 150)] 67 | sequence_len: usize, 68 | 69 | /// Sequence weights 70 | #[builder(default = vec![1; 0])] 71 | sequence_weights: Vec, 72 | } 73 | 74 | impl core::default::Default for Sequence { 75 | fn default() -> Self { 76 | Sequence::builder().build() 77 | } 78 | } 79 | 80 | impl format::Format for Sequence { 81 | fn header( 82 | &self, 83 | _output: &mut dyn std::io::Write, 84 | _rng: &mut rand::rngs::StdRng, 85 | ) -> error::Result<()> { 86 | Ok(()) 87 | } 88 | 89 | fn record( 90 | &self, 91 | output: &mut dyn std::io::Write, 92 | rng: &mut rand::rngs::StdRng, 93 | ) -> error::Result<()> { 94 | // sequence 95 | if self.sequence_weights.is_empty() { 96 | output.write_all(&self.sequence.generate(rng, self.sequence_len)?)?; 97 | } else { 98 | output.write_all(&self.sequence.weighted( 99 | rng, 100 | self.sequence_len, 101 | &self.sequence_weights, 102 | )?)?; 103 | } 104 | 105 | Ok(()) 106 | } 107 | } 108 | 109 | #[cfg(test)] 110 | mod tests { 111 | /* std use */ 112 | use std::io::Read as _; 113 | 114 | /* project use */ 115 | use super::format::Format as _; 116 | use super::*; 117 | 118 | const TRUTH: &[u8] = b"taTATgAAtCGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAta 119 | TcgAAtTaTaGaTggttGCtCatGtctgCTGGTACtgTgcaaaagggGAG 120 | acAtgCtGCAAtTacCGtTAAcaGGtatTCaTCctcTGgAActTgCGAca 121 | AgaAAtaTCCcAgagggaCcttCcGcTTGcgAACcTtCttAacGtTtAtG 122 | TgACAGCCaCGctGagattTGtgCttaAGggTcCTGcGTAGCTGTCCACg 123 | "; 124 | 125 | const DEFAULT: &[u8] = b"taTATgAAtCGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaTaGaTggttGCtCatGtctgCTGGTACtgTgcaaaagggGAGacAtgCtGCAAtTacCGtTAAcaGGtatTCaTCctcTGgAActTgCGAca"; 126 | 127 | const WEIGHTED_TRUTH: &[u8] = b"GCGGTCGGGACTGATGAAGGTCCTGCTGGGTCCGATCCATGTTGAGCCGG"; 128 | 129 | #[test] 130 | fn default() -> error::Result<()> { 131 | let mut output = Vec::new(); 132 | let mut rng = crate::rand(); 133 | 134 | let generator = Sequence::default(); 135 | 136 | generator.record(&mut output, &mut rng)?; 137 | 138 | assert_eq!(output, DEFAULT); 139 | 140 | Ok(()) 141 | } 142 | 143 | #[test] 144 | fn record() -> error::Result<()> { 145 | let mut output = Vec::new(); 146 | let mut rng = crate::rand(); 147 | 148 | let generator = Sequence::builder().sequence_len(50).build(); 149 | 150 | generator.record(&mut output, &mut rng)?; 151 | 152 | assert_eq!(output, TRUTH.to_vec()[..50]); 153 | 154 | Ok(()) 155 | } 156 | 157 | #[test] 158 | fn weigthed_record() -> error::Result<()> { 159 | let mut output = Vec::new(); 160 | let mut rng = crate::rand(); 161 | 162 | let generator = Sequence::builder() 163 | .sequence_len(50) 164 | .sequence_weights(vec![1, 2, 3, 4]) 165 | .build(); 166 | 167 | generator.record(&mut output, &mut rng)?; 168 | 169 | assert_eq!(output, WEIGHTED_TRUTH.to_vec()); 170 | 171 | Ok(()) 172 | } 173 | 174 | #[test] 175 | fn records() -> error::Result<()> { 176 | let mut output = Vec::new(); 177 | let mut rng = crate::rand(); 178 | 179 | let generator = Sequence::builder().sequence_len(50).build(); 180 | 181 | generator.records(&mut output, &mut rng, 5)?; 182 | 183 | assert_eq!(output, TRUTH.to_vec()); 184 | 185 | Ok(()) 186 | } 187 | 188 | #[test] 189 | fn create() -> error::Result<()> { 190 | let mut rng = crate::rand(); 191 | 192 | let temp_dir = tempfile::tempdir()?; 193 | let temp_path = temp_dir.path(); 194 | 195 | let temp_file = temp_path.join("tmp.sequence"); 196 | 197 | let generator = Sequence::builder().sequence_len(50).build(); 198 | 199 | generator.create(&temp_file, &mut rng, 5)?; 200 | 201 | let mut data = Vec::new(); 202 | let mut input = std::fs::File::open(&temp_file)?; 203 | input.read_to_end(&mut data)?; 204 | 205 | assert_eq!(data, TRUTH.to_vec()); 206 | 207 | Ok(()) 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /src/format/fasta.rs: -------------------------------------------------------------------------------- 1 | //! Fasta generation 2 | //! 3 | //! Usage: 4 | //! ```no_run 5 | //! use biotest::Format as _; // import Format trait is required 6 | //! 7 | //! # fn main() -> Result<(), biotest::error::Error> { 8 | //! let mut rng = biotest::rand(); // Create a random generator with a fixed seed 9 | //! 10 | //! let mut output = Vec::new(); 11 | //! let generator = biotest::Fasta::default(); 12 | //! 13 | //! generator.record(&mut output, &mut rng)?; // Write one fasta record in output 14 | //! generator.records(&mut output, &mut rng, 5)?; // Write five fasta records in output 15 | //! 16 | //! generator.create("test.fasta", &mut rng, 5)?; // Write five fasta record in "test.fasta" 17 | //! # Ok(()) 18 | //! # } 19 | //! ``` 20 | //! 21 | //! Read generate follow this template 22 | //! ```no_compile 23 | //! >{id_prefix}{id}{id_suffix} {comment_prefix}{comment}{comment_suffix} 24 | //! {sequence} 25 | //! ``` 26 | //! 27 | //! Many think could be configurable with builder patern: 28 | //! ```no_run 29 | //! use rand; 30 | //! use rand::SeedableRng; 31 | //! use biotest::Format; 32 | //! 33 | //! # fn main() -> Result<(), biotest::error::Error> { 34 | //! let mut rng = rand::rngs::StdRng::from_entropy(); // Create a random generator with a 'random' seed 35 | //! 36 | //! let generator = biotest::Fasta::builder() 37 | //! .id(biotest::values::Alphabet::Lower) // Set alphabet use to generate sequence id 38 | //! .id_len(10) // Set length of id 39 | //! .id_prefix(b"prefix".to_vec()) // Set read id prefix 40 | //! .id_suffix(b"suffix".to_vec()) // Set read id suffix 41 | //! .comment(biotest::values::Alphabet::Upper) // Set alphabet use to generate sequence comment 42 | //! .comment_len(0) // If comment length is set to 0 prefix and suffix isn't write 43 | //! .comment_prefix(b"prefix".to_vec()) // Set read id prefix 44 | //! .comment_suffix(b"suffix".to_vec()) // Set read id suffix 45 | //! .build(); 46 | //! 47 | //! generator.create("test.fasta", &mut rng, 5)?; // Write five fasta record in "test.fasta" 48 | //! # Ok(()) 49 | //! # } 50 | //! ``` 51 | 52 | /* std use */ 53 | 54 | /* crates use */ 55 | 56 | /* projet use */ 57 | use crate::error; 58 | use crate::format; 59 | use crate::values; 60 | 61 | use crate::values::Generate as _; 62 | 63 | /// Struct to generate random fastq record 64 | #[derive(typed_builder::TypedBuilder)] 65 | pub struct Fasta { 66 | /// Alphabet use for id generation 67 | #[builder(default = values::Alphabet::Upper)] 68 | id: values::Alphabet, 69 | 70 | /// Length of id 71 | #[builder(default = 10)] 72 | id_len: usize, 73 | 74 | /// Id prefix 75 | #[builder(default = b"".to_vec())] 76 | id_prefix: Vec, 77 | 78 | /// Id suffix 79 | #[builder(default = b"".to_vec())] 80 | id_suffix: Vec, 81 | 82 | /// Id weights 83 | #[builder(default = vec![1; 0])] 84 | id_weights: Vec, 85 | 86 | /// Alphapet use for comment generation 87 | #[builder(default = values::Alphabet::Lower)] 88 | comment: values::Alphabet, 89 | 90 | /// Comment length 91 | #[builder(default = 20)] 92 | comment_len: usize, 93 | 94 | /// Comment prefix 95 | #[builder(default = b"".to_vec())] 96 | comment_prefix: Vec, 97 | 98 | /// Comment suffix 99 | #[builder(default = b"".to_vec())] 100 | comment_suffix: Vec, 101 | 102 | /// Comment weights 103 | #[builder(default = vec![1; 0])] 104 | comment_weights: Vec, 105 | 106 | /// Alphabet use for sequence generation 107 | #[builder(default = values::Nucleotides::Dna)] 108 | sequence: values::Nucleotides, 109 | 110 | /// Sequence length 111 | #[builder(default = 150)] 112 | sequence_len: usize, 113 | 114 | /// Sequence weights 115 | #[builder(default = vec![1; 0])] 116 | sequence_weights: Vec, 117 | } 118 | 119 | impl core::default::Default for Fasta { 120 | fn default() -> Self { 121 | Fasta::builder().build() 122 | } 123 | } 124 | 125 | impl format::Format for Fasta { 126 | fn header( 127 | &self, 128 | _output: &mut dyn std::io::Write, 129 | _rng: &mut rand::rngs::StdRng, 130 | ) -> error::Result<()> { 131 | Ok(()) 132 | } 133 | 134 | fn record( 135 | &self, 136 | output: &mut dyn std::io::Write, 137 | rng: &mut rand::rngs::StdRng, 138 | ) -> error::Result<()> { 139 | // id 140 | output.write_all(&[b'>'])?; 141 | output.write_all(&self.id_prefix)?; 142 | if self.id_weights.is_empty() { 143 | output.write_all(&self.id.generate(rng, self.id_len)?)?; 144 | } else { 145 | output.write_all(&self.id.weighted(rng, self.id_len, &self.id_weights)?)?; 146 | } 147 | output.write_all(&self.id_suffix)?; 148 | if self.id_prefix.len() + self.id_len + self.id_suffix.len() != 0 { 149 | output.write_all(&[b' '])?; 150 | } 151 | 152 | // comment 153 | output.write_all(&self.comment_prefix)?; 154 | if self.comment_weights.is_empty() { 155 | output.write_all(&self.comment.generate(rng, self.comment_len)?)?; 156 | } else { 157 | output.write_all(&self.comment.weighted( 158 | rng, 159 | self.comment_len, 160 | &self.comment_weights, 161 | )?)?; 162 | } 163 | output.write_all(&self.comment_suffix)?; 164 | output.write_all(b"\n")?; 165 | 166 | // sequence 167 | if self.sequence_weights.is_empty() { 168 | output.write_all(&self.sequence.generate(rng, self.sequence_len)?)?; 169 | } else { 170 | output.write_all(&self.sequence.weighted( 171 | rng, 172 | self.sequence_len, 173 | &self.sequence_weights, 174 | )?)?; 175 | } 176 | 177 | Ok(()) 178 | } 179 | } 180 | 181 | #[cfg(test)] 182 | mod tests { 183 | /* std use */ 184 | use std::io::Read as _; 185 | 186 | /* project use */ 187 | use super::format::Format as _; 188 | use super::*; 189 | 190 | const TRUTH: &[u8] = b">GSWNP zybhlatbbu 191 | CGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaT 192 | >NJWIN icfqqisulj 193 | CtCatGtctgCTGGTACtgTgcaaaagggGAGacAtgCtGCAAtTacCGt 194 | >LHABR foipykuoug 195 | CaTCctcTGgAActTgCGAcaAgaAAtaTCCcAgagggaCcttCcGcTTG 196 | >GZCGR xtisataesr 197 | TtCttAacGtTtAtGTgACAGCCaCGctGagattTGtgCttaAGggTcCT 198 | >CKRPH yaldfvgykz 199 | TCCACgTTTGagtGaGCatAGGACAAaacTaTTagagGtatAGCcTatTt 200 | "; 201 | 202 | const DEFAULT: &[u8] = b">GSWNPZYBHL atbbutlfemxuzgaghmwn 203 | gccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaTaGaTggttGCtCatGtctgCTGGTACtgTgcaaaagggGAGacAtgCtGCAAtTacCGtTAAcaGGtatTCaTCctcTGgAActTgCGAcaAgaAAtaTCCcAgagggaCcttC"; 204 | 205 | const WEIGHTED_TRUTH: &[u8] = b">ECEED cdeeacdeac 206 | GAAGGTCCTGCTGGGTCCGATCCATGTTGAGCCGGTGCAGGTGGACGGTT"; 207 | 208 | #[test] 209 | fn default() -> error::Result<()> { 210 | let mut output = Vec::new(); 211 | let mut rng = crate::rand(); 212 | 213 | let generator = Fasta::default(); 214 | 215 | generator.record(&mut output, &mut rng)?; 216 | 217 | assert_eq!(output, DEFAULT); 218 | 219 | Ok(()) 220 | } 221 | 222 | #[test] 223 | fn record() -> error::Result<()> { 224 | let mut output = Vec::new(); 225 | let mut rng = crate::rand(); 226 | 227 | let generator = Fasta::builder() 228 | .id_len(5) 229 | .comment_len(10) 230 | .sequence_len(50) 231 | .build(); 232 | 233 | generator.record(&mut output, &mut rng)?; 234 | 235 | assert_eq!(output, TRUTH.to_vec()[..68]); 236 | 237 | Ok(()) 238 | } 239 | 240 | #[test] 241 | fn weigthed_record() -> error::Result<()> { 242 | let mut output = Vec::new(); 243 | let mut rng = crate::rand(); 244 | 245 | let generator = Fasta::builder() 246 | .id_len(5) 247 | .id_weights(vec![1, 2, 3, 4, 5]) 248 | .comment_len(10) 249 | .comment_weights(vec![1, 2, 3, 4, 5]) 250 | .sequence_len(50) 251 | .sequence_weights(vec![1, 2, 3, 4]) 252 | .build(); 253 | 254 | generator.record(&mut output, &mut rng)?; 255 | 256 | assert_eq!(output, WEIGHTED_TRUTH.to_vec()); 257 | 258 | Ok(()) 259 | } 260 | 261 | #[test] 262 | fn pre_suf_ix() -> error::Result<()> { 263 | let mut output = Vec::new(); 264 | let mut rng = crate::rand(); 265 | 266 | let generator = Fasta::builder() 267 | .id_len(5) 268 | .comment_len(10) 269 | .sequence_len(50) 270 | .id_prefix(b"id_prefix_".to_vec()) 271 | .id_suffix(b"_id_suffix".to_vec()) 272 | .comment_prefix(b"comment_prefix_".to_vec()) 273 | .comment_suffix(b"_comment_suffix".to_vec()) 274 | .build(); 275 | 276 | generator.record(&mut output, &mut rng)?; 277 | 278 | assert_eq!( 279 | output, 280 | b">id_prefix_GSWNP_id_suffix comment_prefix_zybhlatbbu_comment_suffix 281 | CGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaT" 282 | .to_vec() 283 | ); 284 | 285 | Ok(()) 286 | } 287 | 288 | #[test] 289 | fn records() -> error::Result<()> { 290 | let mut output = Vec::new(); 291 | let mut rng = crate::rand(); 292 | 293 | let generator = Fasta::builder() 294 | .id_len(5) 295 | .comment_len(10) 296 | .sequence_len(50) 297 | .build(); 298 | 299 | generator.records(&mut output, &mut rng, 5)?; 300 | 301 | assert_eq!(output, TRUTH.to_vec()); 302 | 303 | Ok(()) 304 | } 305 | 306 | #[test] 307 | fn create() -> error::Result<()> { 308 | let mut rng = crate::rand(); 309 | 310 | let temp_dir = tempfile::tempdir()?; 311 | let temp_path = temp_dir.path(); 312 | 313 | let temp_file = temp_path.join("tmp.fasta"); 314 | 315 | let generator = Fasta::builder() 316 | .id_len(5) 317 | .comment_len(10) 318 | .sequence_len(50) 319 | .build(); 320 | 321 | generator.create(&temp_file, &mut rng, 5)?; 322 | 323 | let mut data = Vec::new(); 324 | let mut input = std::fs::File::open(&temp_file)?; 325 | input.read_to_end(&mut data)?; 326 | 327 | assert_eq!(data, TRUTH.to_vec()); 328 | 329 | Ok(()) 330 | } 331 | } 332 | -------------------------------------------------------------------------------- /src/format/gff.rs: -------------------------------------------------------------------------------- 1 | //! GFF3 format 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | use rand::seq::SliceRandom as _; 7 | use rand::Rng as _; 8 | 9 | /* project use */ 10 | use crate::error; 11 | use crate::format; 12 | use crate::values; 13 | 14 | use crate::values::Generate as _; 15 | use crate::values::Get as _; 16 | 17 | /// Struct to generate gff record 18 | #[derive(typed_builder::TypedBuilder)] 19 | pub struct Gff { 20 | /// Chromosome 21 | #[builder(default = values::Chromosomes::Default)] 22 | contigs: values::Chromosomes, 23 | 24 | /// Feature 25 | #[builder(default = values::GffFeature::All)] 26 | features: values::GffFeature, 27 | 28 | /// Position 29 | #[builder(default = values::Integer::Position)] 30 | position: values::Integer, 31 | 32 | /// Feature length 33 | #[builder(default = values::Integer::UserDefine(1..100_000))] 34 | length: values::Integer, 35 | 36 | /// Score 37 | #[builder(default = values::Float::Default)] 38 | score: values::Float, 39 | 40 | /// Strand 41 | #[builder(default = values::Strand::All)] 42 | strand: values::Strand, 43 | 44 | /// Phase 45 | #[builder(default = values::GffPhase::All)] 46 | phase: values::GffPhase, 47 | 48 | /// Id 49 | #[builder(default = values::Alphabet::A2z)] 50 | id: values::Alphabet, 51 | 52 | /// Length of id 53 | #[builder(default = 10)] 54 | id_len: usize, 55 | 56 | /// Id prefix 57 | #[builder(default = b"".to_vec())] 58 | id_prefix: Vec, 59 | 60 | /// Id suffix 61 | #[builder(default = b"".to_vec())] 62 | id_suffix: Vec, 63 | 64 | /// Name 65 | #[builder(default = values::Alphabet::Lower)] 66 | name: values::Alphabet, 67 | 68 | /// Length of name 69 | #[builder(default = 10)] 70 | name_len: usize, 71 | 72 | /// Name prefix 73 | #[builder(default = b"".to_vec())] 74 | name_prefix: Vec, 75 | 76 | /// Name suffix 77 | #[builder(default = b"".to_vec())] 78 | name_suffix: Vec, 79 | 80 | /// Alias 81 | #[builder(default = values::Alphabet::A2z)] 82 | alias: values::Alphabet, 83 | 84 | /// Length of alias 85 | #[builder(default = 10)] 86 | alias_len: usize, 87 | 88 | /// Alias prefix 89 | #[builder(default = b"".to_vec())] 90 | alias_prefix: Vec, 91 | 92 | /// Alias suffix 93 | #[builder(default = b"".to_vec())] 94 | alias_suffix: Vec, 95 | 96 | /// Parent 97 | #[builder(default = values::Alphabet::A2z)] 98 | parent: values::Alphabet, 99 | 100 | /// Length of parent 101 | #[builder(default = 10)] 102 | parent_len: usize, 103 | 104 | /// Parent prefix 105 | #[builder(default = b"".to_vec())] 106 | parent_prefix: Vec, 107 | 108 | /// Parent suffix 109 | #[builder(default = b"".to_vec())] 110 | parent_suffix: Vec, 111 | } 112 | 113 | impl Gff { 114 | fn produce_gap_value(rng: &mut rand::rngs::StdRng, length: u64) -> error::Result> { 115 | let mut output = Vec::new(); 116 | let mut lengths = Vec::new(); 117 | let mut len = 0; 118 | while len < length { 119 | let size = if length - len > 1 { 120 | rng.gen_range::>(1..(length - len) as usize) 121 | } else { 122 | 1 123 | }; 124 | 125 | lengths.push(size); 126 | len += size as u64; 127 | } 128 | 129 | for len in lengths { 130 | let letter = values::Cigar::Gff.generate(rng, 1)?; 131 | output.extend(letter); 132 | output.extend(len.to_string().as_bytes().to_vec()); 133 | output.push(b' '); 134 | } 135 | output.pop(); 136 | 137 | Ok(output) 138 | } 139 | } 140 | 141 | impl core::default::Default for Gff { 142 | fn default() -> Self { 143 | Gff::builder().build() 144 | } 145 | } 146 | 147 | impl format::Format for Gff { 148 | fn header( 149 | &self, 150 | _output: &mut dyn std::io::Write, 151 | _rng: &mut rand::rngs::StdRng, 152 | ) -> error::Result<()> { 153 | Ok(()) 154 | } 155 | 156 | fn record( 157 | &self, 158 | output: &mut dyn std::io::Write, 159 | rng: &mut rand::rngs::StdRng, 160 | ) -> error::Result<()> { 161 | // seqid 162 | output.write_all( 163 | self.contigs 164 | .as_ref() 165 | .choose(rng) 166 | .ok_or(error::create_unreachable!())?, 167 | )?; 168 | output.write_all(b"\t")?; 169 | 170 | // source 171 | output.write_all(b"biotest\t")?; 172 | 173 | // type 174 | output.write_all( 175 | self.features 176 | .as_ref() 177 | .choose(rng) 178 | .ok_or(error::create_unreachable!())?, 179 | )?; 180 | output.write_all(b"\t")?; 181 | 182 | // start 183 | let start = rng.gen_range::>(self.position.clone().into()); 184 | output.write_all(start.to_string().as_bytes())?; 185 | output.write_all(b"\t")?; 186 | 187 | // end 188 | let end: i32 = 189 | start + rng.gen_range::>(self.length.clone().into()); 190 | output.write_all(end.to_string().as_bytes())?; 191 | output.write_all(b"\t")?; 192 | 193 | // score 194 | output.write_all(&self.score.clone().get(rng))?; 195 | output.write_all(b"\t")?; 196 | 197 | // strand 198 | output.write_all( 199 | self.strand 200 | .as_ref() 201 | .choose(rng) 202 | .ok_or(error::create_unreachable!())?, 203 | )?; 204 | output.write_all(b"\t")?; 205 | 206 | // phase 207 | output.write_all( 208 | self.phase 209 | .as_ref() 210 | .choose(rng) 211 | .ok_or(error::create_unreachable!())?, 212 | )?; 213 | output.write_all(b"\t")?; 214 | 215 | // attributes 216 | // id 217 | output.write_all(b"ID=")?; 218 | output.write_all(&self.id_prefix)?; 219 | output.write_all(&self.id.generate(rng, self.id_len)?)?; 220 | output.write_all(&self.id_suffix)?; 221 | output.write_all(b";")?; 222 | 223 | // name 224 | output.write_all(b"Name=")?; 225 | output.write_all(&self.name_prefix)?; 226 | output.write_all(&self.name.generate(rng, self.name_len)?)?; 227 | output.write_all(&self.name_suffix)?; 228 | output.write_all(b";")?; 229 | 230 | // alias 231 | output.write_all(b"Alias=")?; 232 | output.write_all(&self.alias_prefix)?; 233 | output.write_all(&self.alias.generate(rng, self.alias_len)?)?; 234 | output.write_all(&self.alias_suffix)?; 235 | output.write_all(b";")?; 236 | 237 | // parent 238 | output.write_all(b"Parent=")?; 239 | output.write_all(&self.parent_prefix)?; 240 | output.write_all(&self.parent.generate(rng, self.parent_len)?)?; 241 | output.write_all(&self.parent_suffix)?; 242 | output.write_all(b";")?; 243 | 244 | // gap 245 | output.write_all(b"Gap=")?; 246 | output.write_all(&Gff::produce_gap_value(rng, (end - start) as u64)?)?; 247 | 248 | Ok(()) 249 | } 250 | } 251 | 252 | #[cfg(test)] 253 | mod tests { 254 | /* std use */ 255 | use std::io::Read as _; 256 | 257 | /* project use */ 258 | use super::format::Format as _; 259 | use super::*; 260 | 261 | const TRUTH: &[u8] = b"YAR028W\tbiotest\texon\t6057\t6155\t9.429573\t.\t0\tID=[tBTlDDl[M;Name=emxuzgaghm;Alias=s^[teLMir[;Parent=gMDhw\\voCG;Gap=M48 D11 R26 F10 M1 D1 D1 262 | 93\tbiotest\texon\t8323\t8381\t3.2013047\t-\t.\tID=dbZcRFrrQ_;Name=jwinicfqqi;Alias=jonYVInjLI;Parent=i`oWogntTH;Gap=I28 R12 F5 I1 R3 D8 I1 263 | X\tbiotest\texon\t9176\t9219\t1.0694146\t.\t.\tID=zkT\\Wk_sGD;Name=rlpbpvmdcp;Alias=nVWJVaDBnQ;Parent=SHYNm[QBCg;Gap=F24 R15 F2 I1 D1 264 | ENA|LT795502|LT795502.1\tbiotest\tgene\t2073\t2169\t0.5253875\t-\t2\tID=gZliSmUzRv;Name=ccdkarvolo;Alias=Bw_ZxxkAFA;Parent=[o`OIdJgjZ;Gap=R31 F13 I47 D4 F1 265 | ENA|LT795502|LT795502.1\tbiotest\ttranscript\t3919\t3944\t9.702128\t.\t0\tID=jBlBKigqzn;Name=gultrkslsv;Alias=\\RlwOmAiZP;Parent=wyAsKBssXJ;Gap=R6 R4 D2 D10 F2 I1 266 | "; 267 | 268 | const DEFAULT: &[u8] = b"YAR028W\tbiotest\texon\t1133862760\t1133889429\t21.144531\t.\t0\tID=[tBTlDDl[M;Name=emxuzgaghm;Alias=s^[teLMir[;Parent=gMDhw\\voCG;Gap=D21168 D1146 R2911 I1127 D50 I96 R103 R1 M46 I7 F1 F1 F7 R1 D2 F1 F1"; 269 | 270 | #[test] 271 | fn default() -> error::Result<()> { 272 | let mut output = Vec::new(); 273 | let mut rng = crate::rand(); 274 | 275 | let generator = Gff::default(); 276 | 277 | generator.record(&mut output, &mut rng)?; 278 | 279 | assert_eq!(output, DEFAULT); 280 | 281 | Ok(()) 282 | } 283 | 284 | #[test] 285 | fn record() -> error::Result<()> { 286 | let mut output = Vec::new(); 287 | let mut rng = crate::rand(); 288 | 289 | let generator = Gff::builder() 290 | .position(values::Integer::UserDefine(0..10_000)) 291 | .length(values::Integer::UserDefine(2..100)) 292 | .score(values::Float::UserDefine(0.0..10.0)) 293 | .build(); 294 | 295 | generator.record(&mut output, &mut rng)?; 296 | 297 | assert_eq!(output, TRUTH.to_vec()[..137]); 298 | 299 | Ok(()) 300 | } 301 | 302 | #[test] 303 | fn records() -> error::Result<()> { 304 | let mut output = Vec::new(); 305 | let mut rng = crate::rand(); 306 | 307 | let generator = Gff::builder() 308 | .position(values::Integer::UserDefine(0..10_000)) 309 | .length(values::Integer::UserDefine(2..100)) 310 | .score(values::Float::UserDefine(0.0..10.0)) 311 | .build(); 312 | 313 | generator.records(&mut output, &mut rng, 5)?; 314 | 315 | assert_eq!(output, TRUTH.to_vec()); 316 | 317 | Ok(()) 318 | } 319 | 320 | #[test] 321 | fn create() -> error::Result<()> { 322 | let mut rng = crate::rand(); 323 | 324 | let temp_dir = tempfile::tempdir()?; 325 | let temp_path = temp_dir.path(); 326 | 327 | let temp_file = temp_path.join("tmp.fasta"); 328 | 329 | let generator = Gff::builder() 330 | .position(values::Integer::UserDefine(0..10_000)) 331 | .length(values::Integer::UserDefine(2..100)) 332 | .score(values::Float::UserDefine(0.0..10.0)) 333 | .build(); 334 | 335 | generator.create(&temp_file, &mut rng, 5)?; 336 | 337 | let mut data = Vec::new(); 338 | let mut input = std::fs::File::open(&temp_file)?; 339 | input.read_to_end(&mut data)?; 340 | 341 | assert_eq!(data, TRUTH.to_vec()); 342 | 343 | Ok(()) 344 | } 345 | } 346 | -------------------------------------------------------------------------------- /src/format/fastq.rs: -------------------------------------------------------------------------------- 1 | //! Fastq generation 2 | //! 3 | //! Usage: 4 | //! ```no_run 5 | //! use biotest::Format; 6 | //! 7 | //! # fn main() -> Result<(), biotest::error::Error> { 8 | //! let mut rng = biotest::rand(); // Create a random generator with a fixed seed 9 | //! 10 | //! let mut output = Vec::new(); 11 | //! let generator = biotest::Fastq::default(); 12 | //! 13 | //! generator.record(&mut output, &mut rng)?; // Write one fastq record in output 14 | //! generator.records(&mut output, &mut rng, 5)?; // Write five fastq records in output 15 | //! 16 | //! generator.create("test.fastq", &mut rng, 5)?; // Write five fastq record in "test.fasta" 17 | //! # Ok(()) 18 | //! # } 19 | //! ``` 20 | //! 21 | //! Read generate follow this template 22 | //! ```no_compile 23 | //! >{id_prefix}{id}{id_suffix} {comment_prefix}{comment}{comment_suffix} 24 | //! {sequence} 25 | //! +{plus_prefix}{plus}{plus_suffix} 26 | //! {quality} 27 | //! ``` 28 | //! 29 | //! Many think could be configurable with builder patern: 30 | //! ```no_run 31 | //! use rand; 32 | //! use rand::SeedableRng; 33 | //! use biotest::Format; 34 | //! 35 | //! # fn main() -> Result<(), biotest::error::Error> { 36 | //! let mut rng = rand::rngs::StdRng::from_entropy(); // Create a random generator with a 'random' seed 37 | //! 38 | //! let generator = biotest::Fastq::builder() 39 | //! .id(biotest::values::Alphabet::Lower) // Set alphabet use to generate sequence id 40 | //! .id_len(10) // Set length of id 41 | //! .id_prefix(b"prefix".to_vec()) // Set read id prefix 42 | //! .id_suffix(b"suffix".to_vec()) // Set read id prefix 43 | //! .comment(biotest::values::Alphabet::Upper) // Set alphabet use to generate sequence comment 44 | //! .comment_len(0) // If comment length is set to 0 prefix and suffix isn't write 45 | //! .comment_prefix(b"prefix".to_vec()) // Set read id prefix 46 | //! .comment_suffix(b"suffix".to_vec()) // Set read id prefix 47 | //! .plus(biotest::values::Alphabet::Upper) // Set alphabet use to generate sequence plus 48 | //! .plus_len(0) // If plus length is set to 0 prefix and suffix isn't write 49 | //! .plus_prefix(b"prefix".to_vec()) // Set read id prefix 50 | //! .plus_suffix(b"suffix".to_vec()) // Set read id prefix 51 | //! .build(); 52 | //! 53 | //! generator.create("test.fastq", &mut rng, 5)?; // Write five fasta record in "test.fastq" 54 | //! # Ok(()) 55 | //! # } 56 | //! ``` 57 | 58 | /* std use */ 59 | 60 | /* crates use */ 61 | 62 | /* projet use */ 63 | use crate::error; 64 | use crate::format; 65 | use crate::values; 66 | 67 | use crate::values::Generate as _; 68 | 69 | /// Struct to generate random fastq record 70 | #[derive(typed_builder::TypedBuilder)] 71 | pub struct Fastq { 72 | /// Alphabet use for id generation 73 | #[builder(default = values::Alphabet::Upper )] 74 | id: values::Alphabet, 75 | 76 | /// Length of id 77 | #[builder(default = 10)] 78 | id_len: usize, 79 | 80 | /// Id prefix 81 | #[builder(default = b"".to_vec())] 82 | id_prefix: Vec, 83 | 84 | /// Id suffix 85 | #[builder(default = b"".to_vec())] 86 | id_suffix: Vec, 87 | 88 | /// Id weights 89 | #[builder(default = vec![1; 0])] 90 | id_weights: Vec, 91 | 92 | /// Alphapet use for comment generation 93 | #[builder(default = values::Alphabet::Lower)] 94 | comment: values::Alphabet, 95 | /// Comment length 96 | #[builder(default = 20)] 97 | comment_len: usize, 98 | 99 | /// Comment prefix 100 | #[builder(default = b"".to_vec())] 101 | comment_prefix: Vec, 102 | 103 | /// Comment suffix 104 | #[builder(default = b"".to_vec())] 105 | comment_suffix: Vec, 106 | 107 | /// Comment weights 108 | #[builder(default = vec![1; 0])] 109 | comment_weights: Vec, 110 | 111 | /// Alphabet use for sequence generation 112 | #[builder(default = values::Nucleotides::Dna)] 113 | sequence: values::Nucleotides, 114 | 115 | /// Sequence length 116 | #[builder(default = 150)] 117 | sequence_len: usize, 118 | 119 | /// Sequence weights 120 | #[builder(default = vec![1; 0])] 121 | sequence_weights: Vec, 122 | 123 | /// Alphabet use for plus comment generation 124 | #[builder(default = values::Alphabet::A2z)] 125 | plus: values::Alphabet, 126 | 127 | /// Plus comment len 128 | #[builder(default = 5)] 129 | plus_len: usize, 130 | 131 | /// Plus prefix 132 | #[builder(default = b"".to_vec())] 133 | plus_prefix: Vec, 134 | 135 | /// Plus suffix 136 | #[builder(default = b"".to_vec())] 137 | plus_suffix: Vec, 138 | 139 | /// Plus weights 140 | #[builder(default = vec![1; 0])] 141 | plus_weights: Vec, 142 | 143 | /// Alphabet use for quality generation 144 | #[builder(default = values::Quality::Illumina)] 145 | quality: values::Quality, 146 | 147 | /// Quality weights 148 | #[builder(default = vec![1; 0])] 149 | quality_weights: Vec, 150 | } 151 | 152 | impl core::default::Default for Fastq { 153 | fn default() -> Self { 154 | Fastq::builder().build() 155 | } 156 | } 157 | 158 | impl format::Format for Fastq { 159 | fn header( 160 | &self, 161 | _output: &mut dyn std::io::Write, 162 | _rng: &mut rand::rngs::StdRng, 163 | ) -> error::Result<()> { 164 | Ok(()) 165 | } 166 | 167 | fn record( 168 | &self, 169 | output: &mut dyn std::io::Write, 170 | rng: &mut rand::rngs::StdRng, 171 | ) -> error::Result<()> { 172 | // id 173 | output.write_all(&[b'@'])?; 174 | output.write_all(&self.id_prefix)?; 175 | if self.id_weights.is_empty() { 176 | output.write_all(&self.id.generate(rng, self.id_len)?)?; 177 | } else { 178 | output.write_all(&self.id.weighted(rng, self.id_len, &self.id_weights)?)?; 179 | } 180 | output.write_all(&self.id_suffix)?; 181 | if self.id_prefix.len() + self.id_len + self.id_suffix.len() != 0 { 182 | output.write_all(&[b' '])?; 183 | } 184 | 185 | // comment 186 | output.write_all(&self.comment_prefix)?; 187 | if self.comment_weights.is_empty() { 188 | output.write_all(&self.comment.generate(rng, self.comment_len)?)?; 189 | } else { 190 | output.write_all(&self.comment.weighted( 191 | rng, 192 | self.comment_len, 193 | &self.comment_weights, 194 | )?)?; 195 | } 196 | output.write_all(&self.comment_suffix)?; 197 | output.write_all(b"\n")?; 198 | 199 | // sequence 200 | if self.sequence_weights.is_empty() { 201 | output.write_all(&self.sequence.generate(rng, self.sequence_len)?)?; 202 | } else { 203 | output.write_all(&self.sequence.weighted( 204 | rng, 205 | self.sequence_len, 206 | &self.sequence_weights, 207 | )?)?; 208 | } 209 | output.write_all(b"\n")?; 210 | 211 | // plus 212 | output.write_all(b"+")?; 213 | output.write_all(&self.plus_prefix)?; 214 | if self.plus_weights.is_empty() { 215 | output.write_all(&self.plus.generate(rng, self.plus_len)?)?; 216 | } else { 217 | output.write_all(&self.plus.weighted(rng, self.plus_len, &self.plus_weights)?)?; 218 | } 219 | output.write_all(&self.plus_suffix)?; 220 | output.write_all(b"\n")?; 221 | 222 | // quality 223 | if self.quality_weights.is_empty() { 224 | output.write_all(&self.quality.generate(rng, self.sequence_len)?)?; 225 | } else { 226 | output.write_all(&self.quality.weighted( 227 | rng, 228 | self.sequence_len, 229 | &self.quality_weights, 230 | )?)?; 231 | } 232 | 233 | Ok(()) 234 | } 235 | } 236 | 237 | #[cfg(test)] 238 | mod tests { 239 | /* std use */ 240 | use std::io::Read as _; 241 | 242 | /* project use */ 243 | use super::format::Format as _; 244 | use super::*; 245 | 246 | const TRUTH: &[u8] = b"@GSWNP zybhlatbbu 247 | CGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaT 248 | + 249 | 60D.5-E.F/$*::>A2A>)\'=0B<$E/&411+!)AF,E;7.8.3GF2%\" 250 | @CTQMK tnwbrlpbpv 251 | gCtGCAAtTacCGtTAAcaGGtatTCaTCctcTGgAActTgCGAcaAgaA 252 | + 253 | !B+\':<>3#.EFG@@D\'*98<-:,1+F?>\"?(B4- 254 | @TYGTA sulyzaweta 255 | GCCaCGctGagattTGtgCttaAGggTcCTGcGTAGCTGTCCACgTTTGa 256 | + 257 | E/A71C4\'6&!1\"\'!I\")67,8>$$*,:6GIE0@C6B2<7A53478?%I\' 258 | @KWRUD mkpkjusxnd 259 | tgACTacgtCTaTgTCAGgCtaGTtcCCTcgcTgAgGgAtCAAatTCTAT 260 | + 261 | -I2IIGC27.=)(&3=8(A!,=EB-E/HC)-%\'9>0$&&?A/DAIC!1)1 262 | @DPOXN glybtnsmjj 263 | atcaCtGcTAGCCAgaTTgcAaTtaTGgACTTagGgtATACCtcTctCAt 264 | + 265 | E(1)(8E,\'HC4<55;&3!,*$G>A)@H149G@/7.D$$6-CGI5#@$F= 266 | "; 267 | 268 | const DEFAULT: &[u8] = b"@GSWNPZYBHL atbbutlfemxuzgaghmwn 269 | gccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaTaGaTggttGCtCatGtctgCTGGTACtgTgcaaaagggGAGacAtgCtGCAAtTacCGtTAAcaGGtatTCaTCctcTGgAActTgCGAcaAgaAAtaTCCcAgagggaCcttC 270 | +gNXcb 271 | <-:,1+F?>\"?(B4-?CG@!GI!(?\"1\'%))7&08<27F?3AA$E(/@A#FBBF<\')G+%2-\"&*C+,\'!&F4,,7E/A71C4\'6&!1\"\'!I\")67,8>$$*,:6GIE0@C6B2<7A53478?%I\'E<@AH5189"; 272 | 273 | const WEIGHTED_TRUTH: &[u8] = b"@ECEED cdeeacdeac 274 | GAAGGTCCTGCTGGGTCCGATCCATGTTGAGCCGGTGCAGGTGGACGGTT 275 | +DEDCC 276 | (%))&$)'$)(&##&'*)(*!)*''%%)())'&(!(!'')(&$)')%''&"; 277 | 278 | #[test] 279 | fn default() -> error::Result<()> { 280 | let mut output = Vec::new(); 281 | let mut rng = crate::rand(); 282 | 283 | let generator = Fastq::default(); 284 | 285 | generator.record(&mut output, &mut rng)?; 286 | 287 | assert_eq!(output, DEFAULT); 288 | 289 | Ok(()) 290 | } 291 | 292 | #[test] 293 | fn record() -> error::Result<()> { 294 | let mut output = Vec::new(); 295 | let mut rng = crate::rand(); 296 | 297 | let generator = Fastq::builder() 298 | .id_len(5) 299 | .comment_len(10) 300 | .plus_len(0) 301 | .sequence_len(50) 302 | .build(); 303 | 304 | generator.record(&mut output, &mut rng)?; 305 | 306 | assert_eq!(output, TRUTH.to_vec()[..121]); 307 | 308 | Ok(()) 309 | } 310 | 311 | #[test] 312 | fn weigthed_record() -> error::Result<()> { 313 | let mut output = Vec::new(); 314 | let mut rng = crate::rand(); 315 | 316 | let generator = Fastq::builder() 317 | .id_len(5) 318 | .id_weights(vec![1, 2, 3, 4, 5]) 319 | .comment_len(10) 320 | .comment_weights(vec![1, 2, 3, 4, 5]) 321 | .sequence_len(50) 322 | .sequence_weights(vec![1, 2, 3, 4]) 323 | .plus_weights(vec![1, 2, 3, 4, 5]) 324 | .quality_weights(vec![1, 0, 3, 4, 5, 6, 7, 8, 9, 2]) 325 | .build(); 326 | 327 | generator.record(&mut output, &mut rng)?; 328 | 329 | assert_eq!(output, WEIGHTED_TRUTH.to_vec()); 330 | 331 | Ok(()) 332 | } 333 | 334 | #[test] 335 | fn pre_suf_ix() -> error::Result<()> { 336 | let mut output = Vec::new(); 337 | let mut rng = crate::rand(); 338 | 339 | let generator = Fastq::builder() 340 | .id_len(5) 341 | .comment_len(10) 342 | .plus_len(0) 343 | .sequence_len(50) 344 | .id_prefix(b"id_prefix_".to_vec()) 345 | .id_suffix(b"_id_suffix".to_vec()) 346 | .comment_prefix(b"comment_prefix_".to_vec()) 347 | .comment_suffix(b"_comment_suffix".to_vec()) 348 | .plus_prefix(b"plus_prefix_".to_vec()) 349 | .plus_suffix(b"_plus_suffix".to_vec()) 350 | .build(); 351 | 352 | generator.record(&mut output, &mut rng)?; 353 | 354 | assert_eq!( 355 | output, 356 | b"@id_prefix_GSWNP_id_suffix comment_prefix_zybhlatbbu_comment_suffix 357 | CGCgtGTTAGTTAagccAcggtAatGcTtgtaCgcAGgAtaTcgAAtTaT 358 | +plus_prefix__plus_suffix 359 | 60D.5-E.F/$*::>A2A>)\'=0B<$E/&411+!)AF,E;7.8.3GF2%\"" 360 | .to_vec() 361 | ); 362 | 363 | Ok(()) 364 | } 365 | 366 | #[test] 367 | fn records() -> error::Result<()> { 368 | let mut output = Vec::new(); 369 | let mut rng = crate::rand(); 370 | 371 | let generator = Fastq::builder() 372 | .id_len(5) 373 | .comment_len(10) 374 | .plus_len(0) 375 | .sequence_len(50) 376 | .build(); 377 | 378 | generator.records(&mut output, &mut rng, 5)?; 379 | 380 | assert_eq!(output, TRUTH.to_vec()); 381 | 382 | Ok(()) 383 | } 384 | 385 | #[test] 386 | fn create() -> error::Result<()> { 387 | let mut rng = crate::rand(); 388 | 389 | let temp_dir = tempfile::tempdir()?; 390 | let temp_path = temp_dir.path(); 391 | 392 | let temp_file = temp_path.join("tmp.fasta"); 393 | 394 | let generator = Fastq::builder() 395 | .id_len(5) 396 | .comment_len(10) 397 | .plus_len(0) 398 | .sequence_len(50) 399 | .build(); 400 | 401 | generator.create(&temp_file, &mut rng, 5)?; 402 | 403 | let mut data = Vec::new(); 404 | let mut input = std::fs::File::open(&temp_file)?; 405 | input.read_to_end(&mut data)?; 406 | 407 | assert_eq!(data, TRUTH.to_vec()); 408 | 409 | Ok(()) 410 | } 411 | } 412 | -------------------------------------------------------------------------------- /src/format/vcf/header.rs: -------------------------------------------------------------------------------- 1 | //! VCF header generation 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | 7 | /* projet use */ 8 | use crate::constants; 9 | use crate::error; 10 | use crate::values; 11 | 12 | #[derive(typed_builder::TypedBuilder)] 13 | /// Struct to generate header 14 | pub struct Header { 15 | /// Value use for chromosomes 16 | #[builder(default = values::Chromosomes::Default)] 17 | contigs: values::Chromosomes, 18 | 19 | /// vcf version number 20 | #[builder(default = b"VCFv4.3".to_vec())] 21 | version: Vec, 22 | 23 | /// contig species 24 | #[builder(default = b"random".to_vec())] 25 | contig_species: Vec, 26 | 27 | /// contig length 28 | #[builder(default = u32::MAX)] 29 | contig_length: u32, 30 | 31 | /// filter range 32 | #[builder(default = values::Integer::UserDefine(0..3))] 33 | filter: values::Integer, 34 | 35 | /// filter prefix 36 | #[builder(default = b"filter_".to_vec())] 37 | filter_prefix: Vec, 38 | 39 | /// filter description 40 | #[builder(default = b"generated vcf filter field".to_vec())] 41 | filter_description: Vec, 42 | 43 | /// info prefix 44 | #[builder(default = b"info_".to_vec())] 45 | info_prefix: Vec, 46 | 47 | /// info description 48 | #[builder(default = b"generated vcf info field".to_vec())] 49 | info_description: Vec, 50 | 51 | /// InfoType 52 | #[builder(default = values::VcfInfoType::All)] 53 | info_type: values::VcfInfoType, 54 | 55 | /// InfoNumber 56 | #[builder(default = values::VcfInfoNumber::All)] 57 | info_number: values::VcfInfoNumber, 58 | 59 | /// format prefix 60 | #[builder(default = b"format_".to_vec())] 61 | format_prefix: Vec, 62 | 63 | /// format description 64 | #[builder(default = b"generated vcf format field".to_vec())] 65 | format_description: Vec, 66 | 67 | /// FormatType 68 | #[builder(default = values::VcfFormatType::All)] 69 | format_type: values::VcfFormatType, 70 | 71 | /// FormatNumber 72 | #[builder(default = values::VcfFormatNumber::All)] 73 | format_number: values::VcfFormatNumber, 74 | 75 | /// Number of sample 76 | #[builder(default = 3)] 77 | sample: usize, 78 | 79 | /// Sample prefix 80 | #[builder(default = b"sample_".to_vec())] 81 | sample_prefix: Vec, 82 | 83 | /// Sample suffix 84 | #[builder(default = b"".to_vec())] 85 | sample_suffix: Vec, 86 | } 87 | 88 | impl Header { 89 | /// Generate vcf header 90 | pub fn generate(&self, output: &mut dyn std::io::Write) -> error::Result<()> { 91 | // version 92 | output.write_all(b"##fileformat=")?; 93 | output.write_all(&self.version)?; 94 | output.write_all(b"\n")?; 95 | 96 | // contig 97 | for chr in self.contigs.as_ref() { 98 | output.write_all(b"##contig=\n")?; 105 | } 106 | 107 | // filters 108 | for n in >>::into( 109 | ::clone(&self.filter), 110 | ) { 111 | output.write_all(b"##FILTER=\n")?; 117 | } 118 | 119 | // infos 120 | for vcf_type in self.info_type.as_ref() { 121 | if vcf_type == b"Flag" { 122 | output.write_all(b"##INFO=\n")?; 130 | } else { 131 | for vcf_number in self.info_number.as_ref() { 132 | // IDentifiant 133 | output.write_all(b"##INFO=\n")?; 154 | } 155 | } 156 | } 157 | 158 | // formats 159 | for vcf_type in self.format_type.as_ref() { 160 | for vcf_number in self.format_number.as_ref() { 161 | // ID 162 | output.write_all(b"##FORMAT=\n")?; 180 | } 181 | } 182 | 183 | // column name 184 | output.write_all(b"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO")?; 185 | 186 | if self.sample != 0 { 187 | output.write_all(b"\tFORMAT")?; 188 | for n in 0..self.sample { 189 | output.write_all(b"\t")?; 190 | output.write_all(&self.sample_prefix)?; 191 | output.write_all(n.to_string().as_bytes())?; 192 | output.write_all(&self.sample_suffix)?; 193 | } 194 | } 195 | 196 | output.write_all(b"\n")?; 197 | 198 | Ok(()) 199 | } 200 | } 201 | 202 | impl core::default::Default for Header { 203 | fn default() -> Self { 204 | Header::builder().build() 205 | } 206 | } 207 | 208 | #[cfg(test)] 209 | mod tests { 210 | /* project use */ 211 | use super::*; 212 | 213 | const DEFAULT: &[u8] = b"##fileformat=VCFv4.3 214 | ##contig= 215 | ##contig= 216 | ##contig= 217 | ##contig= 218 | ##contig= 219 | ##contig= 220 | ##contig= 221 | ##contig= 222 | ##contig= 223 | ##contig= 224 | ##FILTER= 225 | ##FILTER= 226 | ##FILTER= 227 | ##INFO= 228 | ##INFO= 229 | ##INFO= 230 | ##INFO= 231 | ##INFO= 232 | ##INFO= 233 | ##INFO= 234 | ##INFO= 235 | ##INFO= 236 | ##INFO= 237 | ##INFO= 238 | ##INFO= 239 | ##INFO= 240 | ##INFO= 241 | ##INFO= 242 | ##INFO= 243 | ##INFO= 244 | ##INFO= 245 | ##INFO= 246 | ##INFO= 247 | ##INFO= 248 | ##INFO= 249 | ##INFO= 250 | ##INFO= 251 | ##INFO= 252 | ##FORMAT= 253 | ##FORMAT= 254 | ##FORMAT= 255 | ##FORMAT= 256 | ##FORMAT= 257 | ##FORMAT= 258 | ##FORMAT= 259 | ##FORMAT= 260 | ##FORMAT= 261 | ##FORMAT= 262 | ##FORMAT= 263 | ##FORMAT= 264 | ##FORMAT= 265 | ##FORMAT= 266 | ##FORMAT= 267 | ##FORMAT= 268 | ##FORMAT= 269 | ##FORMAT= 270 | ##FORMAT= 271 | ##FORMAT= 272 | ##FORMAT= 273 | ##FORMAT= 274 | ##FORMAT= 275 | ##FORMAT= 276 | #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample_0 sample_1 sample_2 277 | "; 278 | 279 | const SET: &[u8] = b"##fileformat=VCFv4.3 280 | ##contig= 281 | ##contig= 282 | ##contig= 283 | ##INFO= 284 | ##INFO= 285 | ##INFO= 286 | ##INFO= 287 | ##FORMAT= 288 | ##FORMAT= 289 | ##FORMAT= 290 | ##FORMAT= 291 | #CHROM POS ID REF ALT QUAL FILTER INFO 292 | "; 293 | 294 | #[test] 295 | fn default() -> error::Result<()> { 296 | let mut output = Vec::new(); 297 | 298 | let generator = Header::builder().build(); 299 | 300 | generator.generate(&mut output)?; 301 | 302 | assert_eq!(output, DEFAULT); 303 | 304 | Ok(()) 305 | } 306 | 307 | #[test] 308 | fn set() -> error::Result<()> { 309 | let mut output = Vec::new(); 310 | 311 | let generator = Header::builder() 312 | .contigs(values::Chromosomes::UserDefine(vec![b"A", b"B", b"C"])) 313 | .contig_species(b"alphabet".to_vec()) 314 | .contig_length(4223) 315 | .filter(values::Integer::UserDefine(0..0)) 316 | .filter_prefix(b"filter_".to_vec()) 317 | .filter_description(b"description".to_vec()) 318 | .info_prefix(b"INFO_".to_vec()) 319 | .info_description(b"description".to_vec()) 320 | .info_type(values::VcfInfoType::UserDefine(vec![b"Integer", b"Float"])) 321 | .info_number(values::VcfInfoNumber::UserDefine(vec![b"1", b"2"])) 322 | .format_prefix(b"FORMAT_".to_vec()) 323 | .format_description(b"description".to_vec()) 324 | .format_type(values::VcfFormatType::UserDefine(vec![ 325 | b"Integer", b"Float", 326 | ])) 327 | .format_number(values::VcfFormatNumber::UserDefine(vec![b"1", b"2"])) 328 | .sample(0) 329 | .sample_prefix(b"individual_".to_vec()) 330 | .sample_suffix(b" auie".to_vec()) 331 | .build(); 332 | 333 | generator.generate(&mut output)?; 334 | 335 | assert_eq!(output, SET); 336 | 337 | Ok(()) 338 | } 339 | } 340 | -------------------------------------------------------------------------------- /src/format/vcf.rs: -------------------------------------------------------------------------------- 1 | //! VCF generation 2 | //! 3 | //! Usage: 4 | //! ```no_run 5 | //! use biotest::Format as _; // import Format trait is required 6 | //! 7 | //! # fn main() -> Result<(), biotest::error::Error> { 8 | //! let mut rng = biotest::rand(); // Create a random generator with a fixed seed 9 | //! 10 | //! let mut output = Vec::new(); 11 | //! let generator = biotest::Vcf::default(); 12 | //! 13 | //! // Write one vcf record in output with 3 samples and all possible INFO and FORMAT 14 | //! generator.record(&mut output, &mut rng)?; 15 | //! generator.records(&mut output, &mut rng, 5)?; // Write five vcf records in output 16 | //! 17 | //! // Write five vcf record in "test.vcf" with complete header 18 | //! generator.create("test.vcf", &mut rng, 5)?; 19 | //! # Ok(()) 20 | //! # } 21 | //! ``` 22 | //! 23 | 24 | /* std use */ 25 | 26 | /* crates use */ 27 | 28 | /* projet use */ 29 | use crate::error; 30 | 31 | /* module declaration */ 32 | pub mod header; 33 | pub mod record; 34 | 35 | /// Struct to generate random fastq record 36 | #[derive(typed_builder::TypedBuilder)] 37 | pub struct Vcf { 38 | /// Structure to define header 39 | #[builder(default = header::Header::default())] 40 | header: header::Header, 41 | 42 | /// Structure to define record 43 | #[builder(default = record::Record::default())] 44 | record: record::Record, 45 | } 46 | 47 | impl core::default::Default for Vcf { 48 | fn default() -> Self { 49 | Vcf::builder().build() 50 | } 51 | } 52 | 53 | impl crate::format::Format for Vcf { 54 | fn header( 55 | &self, 56 | output: &mut dyn std::io::Write, 57 | _rng: &mut rand::rngs::StdRng, 58 | ) -> error::Result<()> { 59 | self.header.generate(output) 60 | } 61 | 62 | fn record( 63 | &self, 64 | output: &mut dyn std::io::Write, 65 | rng: &mut rand::rngs::StdRng, 66 | ) -> error::Result<()> { 67 | self.record.generate(output, rng) 68 | } 69 | } 70 | 71 | #[cfg(test)] 72 | mod tests { 73 | /* std use */ 74 | use std::io::Read as _; 75 | 76 | /* project use */ 77 | use super::*; 78 | use crate::format::Format as _; 79 | 80 | const DEFAULT: &[u8] = b"##fileformat=VCFv4.3\n##contig=\n##contig=\n##contig=\n##contig=\n##contig=\n##contig=\n##contig=\n##contig=\n##contig=\n##contig=\n##FILTER=\n##FILTER=\n##FILTER=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n##FORMAT=\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample_0\tsample_1\tsample_2\nYAR028W\t509242864\t.\tA\t.\t224\tfilter_0\tinfo_Integer_1=-1867486109;info_Integer_2=1180908492,1041698939;info_Integer_A=-207506017;info_Integer_R=-1221871790,-1356802783;info_Integer_G=-496257857,2127853583,-1498117423;info_Integer_.=2082620030,-344161843,-1022296784,-1007334138;info_Float_1=68.286865;info_Float_2=-96.154594,-23.433853;info_Float_A=-48.782158;info_Float_R=-46.15216,-92.639305;info_Float_G=-7.5115204,74.78337,1.5983124;info_Float_.=26.825455;info_Flag_0;info_Character_1=i;info_Character_2=r,[;info_Character_A=g;info_Character_R=M,D;info_Character_G=h,w,\\;info_Character_.=C,G,p,];info_String_1=ZoXMT;info_String_2=gQouV,Gn`Jw;info_String_A=eVDDU;info_String_R=YytzA,ny[_P;info_String_G=Oshsq,bSjAd,bZcRF;info_String_.=rQ_[V,S^RtS,vzMeT,jonYV\tformat_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_.\t-1552897203:1249370088,894744660:-1298826907:-1500526673,846767901:154354090,1292630937,-513388490:730433769,-1782228224,1193004039,1639963889:-31.463745:-74.13223,44.792007:-4.5392303:-42.586063,-20.249939:-19.714546,-48.754406,40.519638:-27.838158:L:J,L:n:u,P:t,f,`:r,^:aaSsw:svYGC,zkT\\W:k_sGD:gZcCc,]tIGE:bcnVW,JVaDB,nQSHY:[QBCg,L`Scx,xXYm`,NnOG[\t-1345745815:173280036,-939420073:-1365650667:679852521,1295053734:732715199,-819759668,-308523151:1942972144,-249711286,1737760149:-53.047443:-97.35165,-58.53014:93.27409:-89.49225,65.68997:62.677032,92.94722,32.79944:52.132156,-30.33149:z:R,v:G:G,X:B,g,q:[,a,B:w_Zxx:kAFA[,o`OId:JgjZD:StKau,vtaIh:wmmrI,gNXcb,hRd]Q:OgukS\t946791943:-2019035904,1055813342:-2045085244:-1401538285,878536766:731752434,1439145027,-966674455:-1096509554,-1513894259,1176983779,-199713084:51.48242:-93.36465,6.6719513:32.869843:-77.50437,-17.745377:38.63495,-9.558914,42.16661:-6.823944,-39.047478,48.595016,68.83052:w:O,m:A:i,Z:P,w,y:s:KBssX:JGMMK,`HVkg:oY`vk:xarZo,yTnQF:EntKU,mnaDW,uppug:FhYRx,BZHMq\n93\t2036067340\t.\tT\t.\t3\tfilter_2\tinfo_Integer_1=-945359064;info_Integer_2=2042408529,-281042636;info_Integer_A=1400195836;info_Integer_R=158409543,1664317966;info_Integer_G=-678096394,1218409815,-280169010;info_Integer_.=-429223473,-798239102,1447160136;info_Float_1=-6.572792;info_Float_2=-69.61241,4.734352;info_Float_A=-75.17469;info_Float_R=54.42581,-98.062325;info_Float_G=-23.31765,-19.276001,-94.52958;info_Float_.=-96.97473;info_Flag_0;info_Character_1=L;info_Character_2=_,`;info_Character_A=e;info_Character_R=b,j;info_Character_G=E,N,F;info_Character_.=`;info_String_1=pNSPd;info_String_2=^wz^t,ZVmq_;info_String_A=oBYJg;info_String_R=Q`oPn,^Z\\`b;info_String_G=la^yz,IWtrg,moGx];info_String_.=cWVPn,iuT_I,lSskB\tformat_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_.\t-1281703324:1908114858,-1293150474:-1602964452:1056979575,-963728372:1402654398,-1718558894,95336780:-1647348167,1565404409,1392446648,-1451772547:-40.51242:-35.81727,89.454605:-6.7819138:11.500885,-47.349976:86.69888,-0.38061523,-49.81668:-85.17905:B:b,Z:v:x,F:j,H,G:`,Y,P,f:IFpIP:NevhO,TNvrC:wSEWb:LtHSU,iCnsI:sSMCB,y^pRI,Q\\eLD:RzYyz,V_szw\t1429610357:667106503,-347005078:-1450892043:187576632,-731059940:2111030852,810139033,-1298935060:312049871,870382568,-1387207741,1225417725:21.764038:-99.951675,-45.165207:39.073135:64.91058,77.78046:63.085556,-47.436356,-39.503193:-42.086887,93.22655:B:P,e:N:q,L:a,R,r:F,J:cjVaE:rHHlo,MTrco:Gyzgq:iA_WI,LkXIc:`ot_P,Zwl^\\,Uz^rc:ndZg_,IpyMn\t-298362881:554100151,-1638105763:-813444979:285964333,-159387058:-2103781190,-330220715,-544136330:-1391083738,-712907221:63.459488:48.598007,-64.342575:65.56694:-0.56829834,-94.79799:-28.613068,81.39114,68.24536:39.960556,73.48726:`:N,R:y:Q,G:D,w,G:`:RjrNl:hQqYM,wuBIP:QMGV`:vWunB,R\\Okz:KMcfE,GOhnT,sJUAE:tKWGL,KatQJ,Qxq\\g,^^xfH\nX\t2138516245\t.\tA\t.\t58\tfilter_0\tinfo_Integer_1=1885408291;info_Integer_2=922014433,1238890301;info_Integer_A=-474357293;info_Integer_R=-1290577166,1120909097;info_Integer_G=-186729156,1973040123,-422535277;info_Integer_.=-183630805,1867038567,1281678071;info_Float_1=-95.3896;info_Float_2=51.682953,-31.031967;info_Float_A=46.307205;info_Float_R=8.105705,35.940765;info_Float_G=-34.93447,72.228195,-83.08275;info_Float_.=-82.36182;info_Character_1=H;info_Character_2=o,R;info_Character_A=q;info_Character_R=Z,v;info_Character_G=z,y,];info_Character_.=m;info_String_1=EhXGl;info_String_2=\\R[nM,ljhET;info_String_A=zoPwT;info_String_R=hoTOP,mgWvu;info_String_G=BiQUb,oxchY,cBuGc;info_String_.=LjZa],OQjTt,_wcgT,tO\\lL\tformat_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_.\t1581362196:-1447784025,-1525232815:-1035525905:-1979111486,-1258086770:428899970,1146438819,-1211106230:634110781:61.013245:11.870453,-10.159973:-17.56308:-89.45878,-27.66838:-34.221336,81.52295,87.46088:22.250458:T:G,D:Y:X,C:r,I,y:b,B,D:UVwho:[vJ`a,muhok:xubhH:L^ANq,ulNzZ:Mpgwy,XlyYC,qgVMS:zrlVo\t1082793408:-1252548594,195759635:-1056145565:1294511812,1376570218:-1300715418,-1097690924,-72191116:-416974155,1624764853,-68133459,-693202153:-81.10368:-62.194897,-10.530067:-69.0829:-94.7505,48.184113:-68.95244,86.32448,-70.56353:74.956985:]:d,Z:Q:F,m:e,W,J:^:PamSD:QW[XW,\\\\Hpl:aP_dw:Tkfvm,vkqDI:xIXno,EQsVH,lECpM:P_hBj,XI`Pk,lhoEn,nDGdq\t-1756403182:-1210584648,1067164580:-2026752630:1524204479,2063402043:-1671581241,1992411203,-255678314:387204102,-2048329797:29.60765:-70.24462,91.82048:-57.780792:-19.511703,87.46164:17.362617,-10.059616,-89.640594:-4.2216034,31.744385:n:v,T:E:N,p:],Y,R:p,A,W,i:yIqiC:w]gp_,[s]vD:hY\\Sm:ynkIV,^tOuG:kqHsi,EQDdb,nppEh:gQkWC,CgEHr\nNC_000015.10\t1204106469\t.\tc\t.\t163\tfilter_2\tinfo_Integer_1=1856695899;info_Integer_2=-1228532925,-1558813844;info_Integer_A=-1669649672;info_Integer_R=-2009180694,-858629871;info_Integer_G=-1571758790,-1808158134,-952576567;info_Integer_.=-839104859,-1425897419,1479780909;info_Float_1=86.42102;info_Float_2=47.610138,-21.43116;info_Float_A=22.063469;info_Float_R=31.69635,81.844086;info_Float_G=-0.43652344,34.970734,6.893921;info_Float_.=-70.91541,88.53403,11.178253,-25.09742;info_Character_1=w;info_Character_2=K,O;info_Character_A=R;info_Character_R=q,B;info_Character_G=P,P,[;info_Character_.=S,h,I;info_String_1=Z^Mr\\;info_String_2=Pozxs,[sGNN;info_String_A=uycmn;info_String_R=jXNUP,kaQaF;info_String_G=rhEZa,IB_Tj,XJMdW;info_String_.=MRIyw\tformat_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_.\t-1127684345:-447879000,-1851298129:1367475938:1988967275,-439362504:-447904683,-1278415490,-1965808177:-1579759849,-1859788951:-31.910011:6.785797,-95.413086:19.286415:9.942863,-23.623634:31.06224,42.57071,92.734314:-51.402973,-25.126984,73.030045:J:K,Y:K:c,^:y,P,V:S,f,s:UwMXI:KZ]ZM,Dszwa:pRf\\B:VTcUU,OJHFc:gkajf,Ijt]R,ZCsdT:RPBHo,RSsDF,^wkt`,MKboQ\t-17276091:-1084847601,-962513528:1986061217:1121398096,539835016:398616881,544166366,-1410097944:1412514655:18.60447:86.08472,21.346832:12.448051:82.72441,61.860733:-29.544258,33.810944,17.074417:59.14827,-3.4019012:B:k,S:Y:K,b:I,J,\\:Q,x,t,j:ZBq\\n:QWJTe,YEfMY:iUtHu:jPdNd,e\\EWF:fR`mi,g_`q_,CHPa^:kUBhP,n\\^lc,zNX^D,Bir[s\t1513956177:842026642,704497487:1383022793:-798417759,1694579519:122079663,1485654311,1917949394:-1493128866,-669588433,-1130379312,-279135289:6.5678864:-39.898037,40.419724:-27.728035:27.380325,59.674316:-78.01762,-46.58332,82.15553:-9.7939,3.7038345,-10.703232:M:`,V:M:m,[:m,p,f:T,a,W:j\\c`P:mbjEz,GJcxU:gIaec:]IAHY,eyWTl:fKaF],eJYeM,oCeFY:lwAXv,GHCdL,yzGwQ,fB_YF\nNC_016845.1\t1745241132\t.\tc\t.\t178\tfilter_1\tinfo_Integer_1=1255642541;info_Integer_2=-495098950,-163997913;info_Integer_A=1186186199;info_Integer_R=-572056065,-813811802;info_Integer_G=1802548524,-1453024998,1578487959;info_Integer_.=135778623,772025838,1954620613,662130014;info_Float_1=-38.989304;info_Float_2=89.58476,9.665085;info_Float_A=90.821075;info_Float_R=-69.59674,45.46039;info_Float_G=-22.850967,40.54007,-72.7124;info_Float_.=85.93584,22.705269,-53.66709,-12.823059;info_Flag_0;info_Character_1=`;info_Character_2=_,e;info_Character_A=k;info_Character_R=u,a;info_Character_G=I,q,f;info_Character_.=p;info_String_1=mSQrS;info_String_2=lEvUS,zv[RB;info_String_A=\\lKRx;info_String_R=HP`gj,ftMtl;info_String_G=uGX\\V,D^xYp,\\]fYN;info_String_.=hvzef,hZ_x]\tformat_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_.\t-1818220937:-1716370196,1471873252:1693568706:-1384353442,-531174370:290881760,-1611919520,-30861859:1820031599,1223028519,-241837959,-130516933:97.7139:-71.66517,-12.71994:-10.58564:25.678925,-49.043964:45.110657,-93.51485,27.87349:98.51344,89.07881,67.16724:L:z,B:d:r,m:^,r,Q:C,M:\\wEaP:Iq`oJ,tZfNB:cc[uV:_`pF`,wSolG:OBI\\a,L`htu,t^OEb:yBTff,Eql_^\t-519243625:1394920398,970547643:-1339598250:2069050503,1673139727:-383366852,819217239,-1582651946:394275479,1405093414:-88.27617:48.00322,-98.616264:-35.968445:-16.261383,64.94083:23.264336,55.233124,-12.705566:-31.018303,32.29892,88.60808:c:U,s:k:k,h:H,V,V:X,f,N:S^fl[:q[ODL,Tk[Kk:XkZnS:kkStd,H_Ns[:RRu[f,wNYIM,Skoky:KIrEa,Xxim^,nfhEz,mMSVs\t711611455:-246472977,1741690154:1085519110:-567213617,-400517020:1344286726,-1099251448,-600330030:-1239546736,247739344:4.053589:-22.763847,15.909172:47.2063:-13.576843,27.678421:8.278969,-50.282,57.042984:-70.92239,25.547836:e:s,u:c:j,A:E,],^:Q,V,t:WSfOh:IoRqu,HnRI`:MpI_G:XQKWn,sVHVf:s[UFc,tFryv,aJzJt:XVSVn,eOcXy\n"; 81 | 82 | #[test] 83 | fn default() -> error::Result<()> { 84 | let mut output = Vec::new(); 85 | let mut rng = crate::rand(); 86 | 87 | let generator = Vcf::default(); 88 | 89 | generator.header(&mut output, &mut rng)?; 90 | generator.records(&mut output, &mut rng, 5)?; 91 | 92 | assert_eq!(output, DEFAULT); 93 | 94 | Ok(()) 95 | } 96 | 97 | #[test] 98 | fn create() -> error::Result<()> { 99 | let mut rng = crate::rand(); 100 | 101 | let temp_dir = tempfile::tempdir()?; 102 | let temp_path = temp_dir.path(); 103 | 104 | let temp_file = temp_path.join("tmp.vcf"); 105 | 106 | let generator = Vcf::builder().build(); 107 | 108 | generator.create(&temp_file, &mut rng, 5)?; 109 | 110 | let mut data = Vec::new(); 111 | let mut input = std::fs::File::open(&temp_file)?; 112 | input.read_to_end(&mut data)?; 113 | 114 | assert_eq!(data, DEFAULT.to_vec()); 115 | 116 | Ok(()) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/values.rs: -------------------------------------------------------------------------------- 1 | //! Declarations of many possible values 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | use rand::distributions::Distribution as _; 7 | use rand::seq::SliceRandom as _; 8 | use rand::Rng as _; 9 | 10 | /* projet use */ 11 | use crate::constants; 12 | use crate::error; 13 | 14 | #[derive(Debug, Clone, Default)] 15 | /// Differente generic ascii alphabet 16 | pub enum Alphabet { 17 | #[default] 18 | /// Any visible ascii character 19 | Visible, 20 | 21 | /// Upper case latin alphabet 22 | Upper, 23 | 24 | /// Lower case latin alphapet 25 | Lower, 26 | 27 | /// Ascii character between A-z 28 | A2z, 29 | 30 | /// Vcf default value 31 | VcfDefault, 32 | } 33 | 34 | impl core::convert::AsRef<[u8]> for Alphabet { 35 | fn as_ref(&self) -> &[u8] { 36 | match self { 37 | Alphabet::Visible => &constants::ASCII_VISIBLE, 38 | Alphabet::Upper => &constants::ASCII_VISIBLE[32..58], 39 | Alphabet::Lower => &constants::ASCII_VISIBLE[64..90], 40 | Alphabet::A2z => &constants::ASCII_VISIBLE[32..90], 41 | Alphabet::VcfDefault => &constants::ASCII_VISIBLE[13..14], 42 | } 43 | } 44 | } 45 | 46 | #[derive(Debug, Clone, Default)] 47 | /// Fastq quality range 48 | pub enum Quality { 49 | /// Sanger fastq quality range 50 | Sanger, 51 | 52 | /// Solexa fastq quality range 53 | Solexa, 54 | 55 | /// Illumina quality range version 1.3 56 | Illumina13, 57 | 58 | /// Illumina quality range version 1.5 59 | Illumina15, 60 | 61 | /// Illumina quality range version 1.8 62 | Illumina18, 63 | 64 | #[default] 65 | /// Illumina quality range version 1.8 66 | Illumina, 67 | } 68 | 69 | impl core::convert::AsRef<[u8]> for Quality { 70 | fn as_ref(&self) -> &[u8] { 71 | match self { 72 | Quality::Sanger => &constants::ASCII_VISIBLE[0..40], 73 | Quality::Solexa => &constants::ASCII_VISIBLE[26..71], 74 | Quality::Illumina13 => &constants::ASCII_VISIBLE[31..71], 75 | Quality::Illumina15 => &constants::ASCII_VISIBLE[34..71], 76 | Quality::Illumina18 | Quality::Illumina => &constants::ASCII_VISIBLE[0..41], 77 | } 78 | } 79 | } 80 | 81 | #[derive(Debug, Clone, Default)] 82 | /// Any nucleotides 83 | pub enum Nucleotides { 84 | #[default] 85 | /// Dna any case 86 | Dna, 87 | 88 | /// Dna lower case 89 | DnaLower, 90 | 91 | /// Dna upper case 92 | DnaUpper, 93 | 94 | /// Rna any case 95 | Rna, 96 | 97 | /// Rna lower case 98 | RnaLower, 99 | 100 | /// Rna upper case 101 | RnaUpper, 102 | } 103 | 104 | impl core::convert::AsRef<[u8]> for Nucleotides { 105 | fn as_ref(&self) -> &[u8] { 106 | match self { 107 | Nucleotides::Dna => &constants::DNA_NUCLEOTIDES, 108 | Nucleotides::DnaLower => &constants::DNA_NUCLEOTIDES[4..], 109 | Nucleotides::DnaUpper => &constants::DNA_NUCLEOTIDES[..4], 110 | Nucleotides::Rna => &constants::RNA_NUCLEOTIDES, 111 | Nucleotides::RnaLower => &constants::RNA_NUCLEOTIDES[4..], 112 | Nucleotides::RnaUpper => &constants::RNA_NUCLEOTIDES[..4], 113 | } 114 | } 115 | } 116 | 117 | /// Trait use to generate random data from values Enum 118 | pub trait Generate 119 | where 120 | Self: core::convert::AsRef<[u8]>, 121 | { 122 | /// Generate n bytes 123 | fn generate(&self, rng: &mut rand::rngs::StdRng, n: usize) -> error::Result> { 124 | (0..n) 125 | .map(|_| { 126 | self.as_ref() 127 | .choose(rng) 128 | .cloned() 129 | .ok_or(error::create_unreachable!()) 130 | }) 131 | .collect::>>() 132 | } 133 | 134 | /// Generate n bytes with a weigthed distributions 135 | fn weighted( 136 | &self, 137 | rng: &mut rand::rngs::StdRng, 138 | n: usize, 139 | weights: I, 140 | ) -> error::Result> 141 | where 142 | I: core::iter::IntoIterator, 143 | I::Item: rand::distributions::uniform::SampleBorrow, 144 | X: rand::distributions::uniform::SampleUniform 145 | + PartialOrd 146 | + for<'a> core::ops::AddAssign<&'a X> 147 | + Clone 148 | + Default, 149 | { 150 | let dist = rand::distributions::WeightedIndex::new(weights)?; 151 | 152 | (0..n) 153 | .map(|_| { 154 | self.as_ref() 155 | .get(dist.sample(rng)) 156 | .cloned() 157 | .ok_or(error::Error::WeightArrayLargerValueArray) 158 | }) 159 | .collect::>>() 160 | } 161 | } 162 | 163 | impl Generate for Alphabet {} 164 | impl Generate for Quality {} 165 | impl Generate for Nucleotides {} 166 | 167 | /// Range of integer value 168 | #[derive(Debug, Clone, Default)] 169 | pub enum Integer { 170 | /// Vcf possible position 171 | Position, 172 | 173 | /// Vcf integer possible value 174 | Vcf, 175 | 176 | /// Quality 177 | Quality, 178 | 179 | #[default] 180 | /// Full i32 range 181 | Full, 182 | 183 | /// UserDefine 184 | UserDefine(core::ops::Range), 185 | } 186 | 187 | impl core::convert::From for core::ops::Range { 188 | fn from(val: Integer) -> Self { 189 | match val { 190 | Integer::Position => 0..i32::MAX, 191 | Integer::Vcf => (i32::MIN + 7)..i32::MAX, 192 | Integer::Quality => 0..255, 193 | Integer::Full => i32::MIN..i32::MAX, 194 | Integer::UserDefine(x) => x, 195 | } 196 | } 197 | } 198 | 199 | /// Range of float value 200 | #[derive(Debug, Clone, Default)] 201 | pub enum Float { 202 | #[default] 203 | /// between -100.0 and 100.0 204 | Default, 205 | 206 | /// Full f32 range 207 | Full, 208 | 209 | /// UserDefine 210 | UserDefine(core::ops::Range), 211 | } 212 | 213 | impl core::convert::From for core::ops::Range { 214 | fn from(val: Float) -> Self { 215 | match val { 216 | Float::Default => -100.0..100.0, 217 | Float::Full => f32::MIN..f32::MAX, 218 | Float::UserDefine(x) => x, 219 | } 220 | } 221 | } 222 | 223 | /// Trait to choose a random value in range and convert it in ASCII string 224 | pub trait Get 225 | where 226 | Self: core::convert::Into>, 227 | T: std::string::ToString + rand::distributions::uniform::SampleUniform + core::cmp::PartialOrd, 228 | { 229 | /// Get a value 230 | fn get(self, rng: &mut rand::rngs::StdRng) -> Vec { 231 | rng.gen_range::>(self.into()) 232 | .to_string() 233 | .as_bytes() 234 | .to_vec() 235 | } 236 | } 237 | 238 | impl Get for Integer {} 239 | impl Get for Float {} 240 | 241 | #[derive(Debug, Clone, Default)] 242 | /// Possible chromosomes names 243 | pub enum Chromosomes { 244 | #[default] 245 | /// Default chromosomes names 246 | Default, 247 | 248 | /// UserDefine 249 | UserDefine(Vec<&'static [u8]>), 250 | } 251 | 252 | impl core::convert::AsRef<[&'static [u8]]> for Chromosomes { 253 | fn as_ref(&self) -> &[&'static [u8]] { 254 | match self { 255 | Chromosomes::Default => &constants::CHROMOSOMES, 256 | Chromosomes::UserDefine(a) => a.as_ref(), 257 | } 258 | } 259 | } 260 | 261 | #[derive(Debug, Clone, Default)] 262 | /// Possible vcf info type 263 | pub enum VcfInfoType { 264 | #[default] 265 | /// All possible Vcf info type 266 | All, 267 | 268 | /// UserDefine 269 | UserDefine(Vec<&'static [u8]>), 270 | } 271 | 272 | impl core::convert::AsRef<[&'static [u8]]> for VcfInfoType { 273 | fn as_ref(&self) -> &[&'static [u8]] { 274 | match self { 275 | VcfInfoType::All => &constants::VCF_INFO_TYPE, 276 | VcfInfoType::UserDefine(a) => a.as_ref(), 277 | } 278 | } 279 | } 280 | 281 | #[derive(Debug, Clone, Default)] 282 | /// Possible vcf info type 283 | pub enum VcfInfoNumber { 284 | #[default] 285 | /// All possible Vcf info type 286 | All, 287 | 288 | /// UserDefine 289 | UserDefine(Vec<&'static [u8]>), 290 | } 291 | 292 | impl core::convert::AsRef<[&'static [u8]]> for VcfInfoNumber { 293 | fn as_ref(&self) -> &[&'static [u8]] { 294 | match self { 295 | VcfInfoNumber::All => &constants::VCF_INFO_NUMBER, 296 | VcfInfoNumber::UserDefine(a) => a.as_ref(), 297 | } 298 | } 299 | } 300 | 301 | #[derive(Debug, Clone, Default)] 302 | /// Possible vcf format type 303 | pub enum VcfFormatType { 304 | #[default] 305 | /// All possible Vcf format type 306 | All, 307 | 308 | /// UserDefine 309 | UserDefine(Vec<&'static [u8]>), 310 | } 311 | 312 | impl core::convert::AsRef<[&'static [u8]]> for VcfFormatType { 313 | fn as_ref(&self) -> &[&'static [u8]] { 314 | match self { 315 | VcfFormatType::All => &constants::VCF_FORMAT_TYPE, 316 | VcfFormatType::UserDefine(a) => a.as_ref(), 317 | } 318 | } 319 | } 320 | 321 | #[derive(Debug, Clone, Default)] 322 | /// Possible vcf format type 323 | pub enum VcfFormatNumber { 324 | #[default] 325 | /// All possible Vcf format type 326 | All, 327 | 328 | /// UserDefine 329 | UserDefine(Vec<&'static [u8]>), 330 | } 331 | 332 | impl core::convert::AsRef<[&'static [u8]]> for VcfFormatNumber { 333 | fn as_ref(&self) -> &[&'static [u8]] { 334 | match self { 335 | VcfFormatNumber::All => &constants::VCF_FORMAT_NUMBER, 336 | VcfFormatNumber::UserDefine(a) => a.as_ref(), 337 | } 338 | } 339 | } 340 | 341 | #[derive(Debug, Clone, Default)] 342 | /// Possible value for strand 343 | pub enum Strand { 344 | #[default] 345 | /// All possible value for strand 346 | All, 347 | 348 | /// No unknow 349 | NoUnknow, 350 | 351 | /// UserDefine 352 | UserDefine(Vec<&'static [u8]>), 353 | } 354 | 355 | impl core::convert::AsRef<[&'static [u8]]> for Strand { 356 | fn as_ref(&self) -> &[&'static [u8]] { 357 | match self { 358 | Strand::All => &constants::STRAND, 359 | Strand::NoUnknow => &constants::STRAND[1..], 360 | Strand::UserDefine(a) => a.as_ref(), 361 | } 362 | } 363 | } 364 | 365 | #[derive(Default)] 366 | /// Possible cigar alphabet 367 | pub enum Cigar { 368 | #[default] 369 | /// Sam CIGAR value 370 | Sam, 371 | 372 | /// Gff CIGAR value 373 | Gff, 374 | } 375 | 376 | impl core::convert::AsRef<[u8]> for Cigar { 377 | fn as_ref(&self) -> &[u8] { 378 | match self { 379 | Cigar::Sam => &constants::CIGAR_SAM, 380 | Cigar::Gff => &constants::CIGAR_GFF, 381 | } 382 | } 383 | } 384 | 385 | impl Generate for Cigar {} 386 | 387 | #[derive(Debug, Clone, Default)] 388 | /// Possible value for frame 389 | pub enum GffFeature { 390 | #[default] 391 | /// All possible gff frame format type 392 | All, 393 | 394 | /// No unknow 395 | NoUnknow, 396 | 397 | /// UserDefine 398 | UserDefine(Vec<&'static [u8]>), 399 | } 400 | 401 | impl core::convert::AsRef<[&'static [u8]]> for GffFeature { 402 | fn as_ref(&self) -> &[&'static [u8]] { 403 | match self { 404 | GffFeature::All => &constants::GFF_FEATURE, 405 | GffFeature::NoUnknow => &constants::GFF_FEATURE[1..], 406 | GffFeature::UserDefine(a) => a.as_ref(), 407 | } 408 | } 409 | } 410 | 411 | #[derive(Debug, Clone, Default)] 412 | /// Possible value for frame 413 | pub enum GffPhase { 414 | #[default] 415 | /// All possible gff frame format type 416 | All, 417 | 418 | /// No unknow 419 | NoUnknow, 420 | 421 | /// UserDefine 422 | UserDefine(Vec<&'static [u8]>), 423 | } 424 | 425 | impl core::convert::AsRef<[&'static [u8]]> for GffPhase { 426 | fn as_ref(&self) -> &[&'static [u8]] { 427 | match self { 428 | GffPhase::All => &constants::GFF_PHASE, 429 | GffPhase::NoUnknow => &constants::GFF_PHASE[1..], 430 | GffPhase::UserDefine(a) => a.as_ref(), 431 | } 432 | } 433 | } 434 | 435 | #[cfg(test)] 436 | mod tests { 437 | /* project use */ 438 | use super::*; 439 | 440 | #[test] 441 | fn alphabet() -> error::Result<()> { 442 | assert_eq!(Alphabet::Visible.as_ref(), b"!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"); 443 | assert_eq!(Alphabet::Upper.as_ref(), b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 444 | assert_eq!(Alphabet::Lower.as_ref(), b"abcdefghijklmnopqrstuvwxyz"); 445 | assert_eq!( 446 | Alphabet::A2z.as_ref(), 447 | b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz" 448 | ); 449 | assert_eq!(Alphabet::VcfDefault.as_ref(), b"."); 450 | 451 | let mut rng = crate::rand(); 452 | assert_eq!(Alphabet::Visible.generate(&mut rng, 5)?, b"l7bR:".to_vec()); 453 | 454 | assert_eq!( 455 | Alphabet::Visible.weighted(&mut rng, 5, [1, 1, 1, 1])?, 456 | b"#$$!\"".to_vec() 457 | ); 458 | 459 | Ok(()) 460 | } 461 | 462 | #[test] 463 | fn quality() -> error::Result<()> { 464 | assert_eq!( 465 | Quality::Sanger.as_ref(), 466 | b"!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGH" 467 | ); 468 | assert_eq!( 469 | Quality::Solexa.as_ref(), 470 | b";<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefg" 471 | ); 472 | assert_eq!( 473 | Quality::Illumina13.as_ref(), 474 | b"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefg" 475 | ); 476 | assert_eq!( 477 | Quality::Illumina15.as_ref(), 478 | b"CDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefg" 479 | ); 480 | assert_eq!( 481 | Quality::Illumina18.as_ref(), 482 | b"!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHI" 483 | ); 484 | assert_eq!( 485 | Quality::Illumina.as_ref(), 486 | b"!\"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHI" 487 | ); 488 | 489 | let mut rng = crate::rand(); 490 | assert_eq!(Quality::Illumina.generate(&mut rng, 5)?, b"=DI3E".to_vec()); 491 | 492 | assert_eq!( 493 | Quality::Sanger.weighted(&mut rng, 5, [2, 0, 2, 1, 1])?, 494 | b"!#$!!".to_vec() 495 | ); 496 | assert_eq!( 497 | Quality::Solexa.weighted(&mut rng, 5, [1, 0, 1, 0, 1])?, 498 | b"??=;;".to_vec() 499 | ); 500 | assert_eq!( 501 | Quality::Illumina13.weighted(&mut rng, 5, [5, 2, 1, 3, 1])?, 502 | b"@D@AC".to_vec() 503 | ); 504 | assert_eq!( 505 | Quality::Illumina15.weighted(&mut rng, 5, [50, 25, 10, 1, 2])?, 506 | b"DGCCC".to_vec() 507 | ); 508 | assert_eq!( 509 | Quality::Illumina18.weighted(&mut rng, 5, [1, 2, 3, 4, 5])?, 510 | b"%!###".to_vec() 511 | ); 512 | assert_eq!( 513 | Quality::Illumina.weighted(&mut rng, 5, [1, 0, 2, 1, 1])?, 514 | b"!#%##".to_vec() 515 | ); 516 | 517 | Ok(()) 518 | } 519 | 520 | #[test] 521 | fn nucleotides() -> error::Result<()> { 522 | assert_eq!(Nucleotides::Dna.as_ref(), b"ACTGactg"); 523 | assert_eq!(Nucleotides::DnaLower.as_ref(), b"actg"); 524 | assert_eq!(Nucleotides::DnaUpper.as_ref(), b"ACTG"); 525 | 526 | assert_eq!(Nucleotides::Rna.as_ref(), b"ACUGacug"); 527 | assert_eq!(Nucleotides::RnaLower.as_ref(), b"acug"); 528 | assert_eq!(Nucleotides::RnaUpper.as_ref(), b"ACUG"); 529 | 530 | let mut rng = crate::rand(); 531 | assert_eq!( 532 | Nucleotides::RnaUpper.generate(&mut rng, 5)?, 533 | b"GGUCU".to_vec() 534 | ); 535 | 536 | assert!(matches!( 537 | Nucleotides::DnaUpper.weighted(&mut rng, 1, [0, 0, 0, 0, 1]), 538 | Err(error::Error::WeightArrayLargerValueArray) 539 | )); 540 | 541 | assert_eq!( 542 | Nucleotides::Dna.weighted(&mut rng, 5, [1, 1, 1, 1, 2, 2, 2, 2])?, 543 | b"gAGag".to_vec() 544 | ); 545 | assert_eq!( 546 | Nucleotides::DnaLower.weighted(&mut rng, 5, [1, 1, 1, 1])?, 547 | b"actaa".to_vec() 548 | ); 549 | assert_eq!( 550 | Nucleotides::DnaUpper.weighted(&mut rng, 5, [1, 1, 5, 5])?, 551 | b"GGTTT".to_vec() 552 | ); 553 | assert_eq!( 554 | Nucleotides::Rna.weighted(&mut rng, 5, [1, 1, 1, 1, 2, 2, 2, 2])?, 555 | b"agCag".to_vec() 556 | ); 557 | assert_eq!( 558 | Nucleotides::RnaLower.weighted(&mut rng, 5, [1, 5, 5, 1])?, 559 | b"ugccc".to_vec() 560 | ); 561 | assert_eq!( 562 | Nucleotides::RnaUpper.weighted(&mut rng, 5, [1, 1, 5, 5])?, 563 | b"GAUUU".to_vec() 564 | ); 565 | 566 | Ok(()) 567 | } 568 | 569 | #[test] 570 | fn chromosomes() { 571 | assert_eq!(Chromosomes::Default.as_ref(), constants::CHROMOSOMES); 572 | assert_eq!(Chromosomes::UserDefine(vec![b"A"]).as_ref(), &[b"A"]); 573 | } 574 | 575 | #[test] 576 | fn info() { 577 | assert_eq!(VcfInfoType::All.as_ref(), constants::VCF_INFO_TYPE); 578 | assert_eq!(VcfInfoType::UserDefine(vec![b"A"]).as_ref(), &[b"A"]); 579 | 580 | assert_eq!(VcfInfoNumber::All.as_ref(), constants::VCF_INFO_NUMBER); 581 | assert_eq!(VcfInfoNumber::UserDefine(vec![b"A"]).as_ref(), &[b"A"]); 582 | } 583 | 584 | #[test] 585 | fn format() { 586 | assert_eq!(VcfFormatType::All.as_ref(), constants::VCF_FORMAT_TYPE); 587 | assert_eq!(VcfFormatType::UserDefine(vec![b"A"]).as_ref(), &[b"A"]); 588 | 589 | assert_eq!(VcfFormatNumber::All.as_ref(), constants::VCF_FORMAT_NUMBER); 590 | assert_eq!(VcfFormatNumber::UserDefine(vec![b"A"]).as_ref(), &[b"A"]); 591 | } 592 | 593 | #[test] 594 | fn interger() { 595 | assert_eq!( 596 | core::ops::Range::::from(Integer::Position), 597 | 0..i32::MAX 598 | ); 599 | assert_eq!( 600 | core::ops::Range::::from(Integer::Vcf), 601 | (i32::MIN + 7)..i32::MAX 602 | ); 603 | assert_eq!( 604 | core::ops::Range::::from(Integer::Full), 605 | i32::MIN..i32::MAX 606 | ); 607 | assert_eq!( 608 | core::ops::Range::::from(Integer::UserDefine(-92..108)), 609 | -92..108 610 | ); 611 | 612 | let mut rng = crate::rand(); 613 | assert_eq!(Integer::Position.get(&mut rng,), b"1720731148".to_vec()); 614 | } 615 | 616 | #[test] 617 | fn float() { 618 | assert_eq!( 619 | >>::into(Float::Full), 620 | f32::MIN..f32::MAX 621 | ); 622 | assert_eq!( 623 | >>::into(Float::UserDefine(-1023.3..3002.5)), 624 | -1023.3..3002.5 625 | ); 626 | 627 | let mut rng = crate::rand(); 628 | assert_eq!( 629 | Float::UserDefine(-1023.3..3002.5).get(&mut rng,), 630 | b"2202.4844".to_vec() 631 | ); 632 | } 633 | 634 | #[test] 635 | fn strand() { 636 | assert_eq!(Strand::All.as_ref(), constants::STRAND); 637 | assert_eq!(Strand::NoUnknow.as_ref(), &constants::STRAND[1..]); 638 | assert_eq!( 639 | Strand::UserDefine(vec![b"Forward", b"Reverse"]).as_ref(), 640 | &[b"Forward", b"Reverse"] 641 | ); 642 | } 643 | 644 | #[test] 645 | fn gff_feature() { 646 | assert_eq!(GffFeature::All.as_ref(), constants::GFF_FEATURE); 647 | assert_eq!(GffFeature::NoUnknow.as_ref(), &constants::GFF_FEATURE[1..]); 648 | assert_eq!( 649 | GffFeature::UserDefine(vec![b"intron", b"exon__"]).as_ref(), 650 | &[b"intron", b"exon__"] 651 | ); 652 | } 653 | 654 | #[test] 655 | fn gff_phase() { 656 | assert_eq!(GffPhase::All.as_ref(), constants::GFF_PHASE); 657 | assert_eq!(GffPhase::NoUnknow.as_ref(), &constants::GFF_PHASE[1..]); 658 | assert_eq!( 659 | GffPhase::UserDefine(vec![b"One", b"Two"]).as_ref(), 660 | &[b"One", b"Two"] 661 | ); 662 | } 663 | 664 | #[test] 665 | fn cigar() -> error::Result<()> { 666 | assert_eq!(Cigar::Sam.as_ref(), constants::CIGAR_SAM); 667 | assert_eq!(Cigar::Gff.as_ref(), constants::CIGAR_GFF); 668 | 669 | let mut rng = crate::rand(); 670 | assert_eq!(Cigar::Sam.generate(&mut rng, 5)?, b"=DPDH".to_vec()); 671 | assert_eq!( 672 | Cigar::Sam.weighted(&mut rng, 5, [1, 1, 1, 1])?, 673 | b"NNMII".to_vec() 674 | ); 675 | assert!(matches!( 676 | Cigar::Sam.weighted(&mut rng, 1, [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]), 677 | Err(error::Error::WeightArrayLargerValueArray) 678 | )); 679 | 680 | assert_eq!(Cigar::Gff.generate(&mut rng, 5)?, b"MMMDI".to_vec()); 681 | assert_eq!( 682 | Cigar::Gff.weighted(&mut rng, 5, [1, 1, 1])?, 683 | b"MIDMI".to_vec() 684 | ); 685 | assert!(matches!( 686 | Cigar::Gff.weighted(&mut rng, 1, [0, 0, 0, 0, 0, 1]), 687 | Err(error::Error::WeightArrayLargerValueArray) 688 | )); 689 | 690 | Ok(()) 691 | } 692 | } 693 | -------------------------------------------------------------------------------- /src/format/vcf/record.rs: -------------------------------------------------------------------------------- 1 | //! VCF header generation 2 | 3 | /* std use */ 4 | 5 | /* crates use */ 6 | use rand::seq::SliceRandom as _; 7 | use rand::Rng as _; 8 | 9 | /* projet use */ 10 | use crate::constants; 11 | use crate::error; 12 | use crate::values; 13 | 14 | use crate::values::Generate as _; 15 | use crate::values::Get as _; 16 | 17 | #[derive(typed_builder::TypedBuilder)] 18 | /// Struct to generate record 19 | pub struct Record { 20 | /// Value use for chromosomes 21 | #[builder(default = values::Chromosomes::Default)] 22 | contigs: values::Chromosomes, 23 | 24 | /// Possible position 25 | #[builder(default = values::Integer::Position)] 26 | position: values::Integer, 27 | 28 | /// Alphabet use to variant id 29 | #[builder(default = values::Alphabet::VcfDefault)] 30 | id: values::Alphabet, 31 | 32 | /// Length of id 33 | #[builder(default = 1)] 34 | id_len: usize, 35 | 36 | /// Id prefix 37 | #[builder(default = b"".to_vec())] 38 | id_prefix: Vec, 39 | 40 | /// Id suffix 41 | #[builder(default = b"".to_vec())] 42 | id_suffix: Vec, 43 | 44 | /// Alphabet use to reference sequence 45 | #[builder(default = values::Nucleotides::Dna)] 46 | reference: values::Nucleotides, 47 | 48 | /// Alphabet use to reference sequence 49 | #[builder(default = 1)] 50 | reference_len: usize, 51 | 52 | /// Alphabet use to alternative sequence 53 | #[builder(default = values::Nucleotides::DnaUpper)] 54 | alternative: values::Nucleotides, 55 | 56 | /// Alphabet use to alternative sequence 57 | #[builder(default = 1)] 58 | alternative_len: usize, 59 | 60 | /// Quality range 61 | #[builder(default = values::Integer::Quality)] 62 | quality: values::Integer, 63 | 64 | /// filter range 65 | #[builder(default = values::Integer::UserDefine(0..3))] 66 | filter: values::Integer, 67 | 68 | /// filter prefix 69 | #[builder(default = b"filter_".to_vec())] 70 | filter_prefix: Vec, 71 | 72 | /// info prefix 73 | #[builder(default = b"info_".to_vec())] 74 | info_prefix: Vec, 75 | 76 | /// InfoType 77 | #[builder(default = values::VcfInfoType::All)] 78 | info_type: values::VcfInfoType, 79 | 80 | /// InfoNumber 81 | #[builder(default = values::VcfInfoNumber::All)] 82 | info_number: values::VcfInfoNumber, 83 | 84 | /// format prefix 85 | #[builder(default = b"format_".to_vec())] 86 | format_prefix: Vec, 87 | 88 | /// FormatType 89 | #[builder(default = values::VcfFormatType::All)] 90 | format_type: values::VcfFormatType, 91 | 92 | /// FormatNumber 93 | #[builder(default = values::VcfFormatNumber::All)] 94 | format_number: values::VcfFormatNumber, 95 | 96 | /// Number of sample 97 | #[builder(default = 3)] 98 | sample: usize, 99 | } 100 | 101 | impl Record { 102 | fn format(&self, output: &mut W) -> error::Result<()> 103 | where 104 | W: std::io::Write + ?Sized, 105 | { 106 | for vcf_type in self.format_type.as_ref() { 107 | for vcf_number in self.format_number.as_ref() { 108 | output.write_all(&self.format_prefix)?; 109 | output.write_all(vcf_type)?; 110 | output.write_all(b"_")?; 111 | output.write_all(vcf_number)?; 112 | 113 | if Some(vcf_number) != self.format_number.as_ref().last() 114 | || Some(vcf_type) != self.format_type.as_ref().last() 115 | { 116 | output.write_all(b":")?; 117 | } 118 | } 119 | } 120 | 121 | Ok(()) 122 | } 123 | 124 | fn sample(&self, output: &mut W, rng: &mut rand::rngs::StdRng) -> error::Result<()> 125 | where 126 | W: std::io::Write + ?Sized, 127 | { 128 | for vcf_type in self.format_type.as_ref() { 129 | for vcf_number in self.format_number.as_ref() { 130 | output.write_all(&generate_value( 131 | rng, 132 | vcf_type, 133 | vcf_number, 134 | self.sample as u8, 135 | )?)?; 136 | if Some(vcf_number) != self.format_number.as_ref().last() 137 | || Some(vcf_type) != self.format_type.as_ref().last() 138 | { 139 | output.write_all(b":")?; 140 | } 141 | } 142 | } 143 | 144 | Ok(()) 145 | } 146 | 147 | fn info(&self, output: &mut W, rng: &mut rand::rngs::StdRng) -> error::Result<()> 148 | where 149 | W: std::io::Write + ?Sized, 150 | { 151 | for vcf_type in self.info_type.as_ref() { 152 | if vcf_type == b"Flag" { 153 | if rng.gen_bool(0.5) { 154 | output.write_all(&self.info_prefix)?; 155 | output.write_all(b"Flag_0;")?; 156 | } 157 | } else { 158 | for vcf_number in self.info_number.as_ref() { 159 | output.write_all(&self.info_prefix)?; 160 | output.write_all(vcf_type)?; 161 | output.write_all(b"_")?; 162 | output.write_all(vcf_number)?; 163 | output.write_all(b"=")?; 164 | output.write_all(&generate_value( 165 | rng, 166 | vcf_type, 167 | vcf_number, 168 | self.sample as u8, 169 | )?)?; 170 | 171 | if Some(vcf_number) != self.info_number.as_ref().last() 172 | || Some(vcf_type) != self.info_type.as_ref().last() 173 | { 174 | output.write_all(b";")?; 175 | } 176 | } 177 | } 178 | } 179 | 180 | Ok(()) 181 | } 182 | 183 | /// Generate vcf record 184 | pub fn generate( 185 | &self, 186 | output: &mut dyn std::io::Write, 187 | rng: &mut rand::rngs::StdRng, 188 | ) -> error::Result<()> { 189 | // chromosomes 190 | output.write_all( 191 | self.contigs 192 | .as_ref() 193 | .choose(rng) 194 | .ok_or(error::create_unreachable!())?, 195 | )?; 196 | output.write_all(b"\t")?; 197 | 198 | // position 199 | output.write_all(&self.position.clone().get(rng))?; 200 | output.write_all(b"\t")?; 201 | 202 | // identifiant 203 | let id_len = self.id_prefix.len() + self.id_len + self.id_suffix.len(); 204 | output.write_all(&self.id_prefix)?; 205 | output.write_all(&self.id.generate(rng, self.id_len)?)?; 206 | output.write_all(&self.id_suffix)?; 207 | if id_len == 0 { 208 | output.write_all(b".")?; 209 | } 210 | output.write_all(b"\t")?; 211 | 212 | // reference 213 | output.write_all(&self.reference.generate(rng, self.reference_len)?)?; 214 | output.write_all(b"\t")?; 215 | 216 | // alternative 217 | let alt_len = rng.gen_range(0..self.alternative_len); 218 | if alt_len == 0 { 219 | output.write_all(b".")?; 220 | } else { 221 | output.write_all(&self.alternative.generate(rng, alt_len)?)?; 222 | } 223 | output.write_all(b"\t")?; 224 | 225 | // quality 226 | output.write_all(&self.quality.clone().get(rng))?; 227 | output.write_all(b"\t")?; 228 | 229 | // filter 230 | let nb_filters = 231 | >>::into(self.filter.clone()).len(); 232 | if nb_filters == 0 || rng.gen_bool(1.0 / nb_filters as f64) { 233 | output.write_all(b".")?; 234 | } else { 235 | output.write_all(&self.filter_prefix)?; 236 | output.write_all(rng.gen_range(0..nb_filters).to_string().as_bytes())?; 237 | } 238 | 239 | if (!self.info_type.as_ref().is_empty() && !self.info_number.as_ref().is_empty()) 240 | && (!self.format_type.as_ref().is_empty() && !self.format_number.as_ref().is_empty() 241 | || self.sample != 0) 242 | { 243 | output.write_all(b"\t")?; 244 | } 245 | 246 | // info 247 | if !self.info_type.as_ref().is_empty() && !self.info_number.as_ref().is_empty() { 248 | self.info(output, rng)?; 249 | } 250 | // check end of line 251 | if (!self.format_type.as_ref().is_empty() || !self.format_number.as_ref().is_empty()) 252 | && self.sample != 0 253 | { 254 | output.write_all(b"\t")?; 255 | } 256 | 257 | // format 258 | if !self.format_type.as_ref().is_empty() 259 | && !self.format_number.as_ref().is_empty() 260 | && self.sample != 0 261 | { 262 | self.format(output)?; 263 | output.write_all(b"\t")?; 264 | } 265 | 266 | // sample 267 | for s in 0..self.sample { 268 | self.sample(output, rng)?; 269 | if s != self.sample - 1 { 270 | output.write_all(b"\t")?; 271 | } 272 | } 273 | 274 | Ok(()) 275 | } 276 | } 277 | 278 | fn generate_value( 279 | rng: &mut rand::rngs::StdRng, 280 | vcf_type: &[u8], 281 | vcf_number: &[u8], 282 | nb_samples: u8, 283 | ) -> error::Result> { 284 | match vcf_type { 285 | b"Integer" => match vcf_number { 286 | b"1" | b"A" => Ok(values::Integer::default().get(rng)), 287 | b"2" | b"R" => { 288 | let mut ret = Vec::new(); 289 | for _ in 0..2 { 290 | ret.extend(values::Integer::default().get(rng)); 291 | ret.push(b','); 292 | } 293 | ret.pop(); 294 | 295 | Ok(ret) 296 | } 297 | b"G" => { 298 | let mut ret = Vec::new(); 299 | for _ in 0..nb_samples { 300 | ret.extend(values::Integer::default().get(rng)); 301 | ret.push(b','); 302 | } 303 | ret.pop(); 304 | 305 | Ok(ret) 306 | } 307 | b"." => { 308 | let mut ret = Vec::new(); 309 | for _ in 0..rng.gen_range(1..5) { 310 | ret.extend(values::Integer::default().get(rng)); 311 | ret.push(b','); 312 | } 313 | ret.pop(); 314 | 315 | Ok(ret) 316 | } 317 | _ => Err(create_unreachable!()), 318 | }, 319 | b"Float" => match vcf_number { 320 | b"1" | b"A" => Ok(values::Float::default().get(rng)), 321 | b"2" | b"R" => { 322 | let mut ret = Vec::new(); 323 | for _ in 0..2 { 324 | ret.extend(values::Float::default().get(rng)); 325 | ret.push(b','); 326 | } 327 | ret.pop(); 328 | 329 | Ok(ret) 330 | } 331 | b"G" => { 332 | let mut ret = Vec::new(); 333 | for _ in 0..nb_samples { 334 | ret.extend(values::Float::default().get(rng)); 335 | ret.push(b','); 336 | } 337 | ret.pop(); 338 | 339 | Ok(ret) 340 | } 341 | b"." => { 342 | let mut ret = Vec::new(); 343 | for _ in 0..rng.gen_range(1..5) { 344 | ret.extend(values::Float::default().get(rng)); 345 | ret.push(b','); 346 | } 347 | ret.pop(); 348 | 349 | Ok(ret) 350 | } 351 | _ => Err(create_unreachable!()), 352 | }, 353 | b"Character" => match vcf_number { 354 | b"1" | b"A" => Ok(values::Alphabet::A2z.generate(rng, 1)?), 355 | b"2" | b"R" => { 356 | let mut ret = Vec::new(); 357 | for _ in 0..2 { 358 | ret.extend(values::Alphabet::A2z.generate(rng, 1)?); 359 | ret.push(b','); 360 | } 361 | ret.pop(); 362 | 363 | Ok(ret) 364 | } 365 | b"G" => { 366 | let mut ret = Vec::new(); 367 | for _ in 0..nb_samples { 368 | ret.extend(values::Alphabet::A2z.generate(rng, 1)?); 369 | ret.push(b','); 370 | } 371 | ret.pop(); 372 | 373 | Ok(ret) 374 | } 375 | b"." => { 376 | let mut ret = Vec::new(); 377 | for _ in 0..rng.gen_range(1..5) { 378 | ret.extend(values::Alphabet::A2z.generate(rng, 1)?); 379 | ret.push(b','); 380 | } 381 | ret.pop(); 382 | 383 | Ok(ret) 384 | } 385 | _ => Err(create_unreachable!()), 386 | }, 387 | b"String" => match vcf_number { 388 | b"1" | b"A" => values::Alphabet::A2z.generate(rng, constants::VCF_STRING_LENGTH), 389 | b"2" | b"R" => { 390 | let mut ret = Vec::new(); 391 | for _ in 0..2 { 392 | ret.extend(values::Alphabet::A2z.generate(rng, constants::VCF_STRING_LENGTH)?); 393 | ret.push(b','); 394 | } 395 | ret.pop(); 396 | 397 | Ok(ret) 398 | } 399 | b"G" => { 400 | let mut ret = Vec::new(); 401 | for _ in 0..nb_samples { 402 | ret.extend(values::Alphabet::A2z.generate(rng, constants::VCF_STRING_LENGTH)?); 403 | ret.push(b','); 404 | } 405 | ret.pop(); 406 | 407 | Ok(ret) 408 | } 409 | b"." => { 410 | let mut ret = Vec::new(); 411 | for _ in 0..rng.gen_range(1..5) { 412 | ret.extend(values::Alphabet::A2z.generate(rng, constants::VCF_STRING_LENGTH)?); 413 | ret.push(b','); 414 | } 415 | ret.pop(); 416 | 417 | Ok(ret) 418 | } 419 | _ => Err(create_unreachable!()), 420 | }, 421 | _ => Err(create_unreachable!()), 422 | } 423 | } 424 | 425 | impl core::default::Default for Record { 426 | fn default() -> Self { 427 | Record::builder().build() 428 | } 429 | } 430 | 431 | #[cfg(test)] 432 | mod tests { 433 | /* std use */ 434 | 435 | /* project use */ 436 | use super::*; 437 | 438 | const DEFAULT: &[u8] = b"YAR028W 509242864 . A . 224 filter_0 info_Integer_1=-1867486109;info_Integer_2=1180908492,1041698939;info_Integer_A=-207506017;info_Integer_R=-1221871790,-1356802783;info_Integer_G=-496257857,2127853583,-1498117423;info_Integer_.=2082620030,-344161843,-1022296784,-1007334138;info_Float_1=68.286865;info_Float_2=-96.154594,-23.433853;info_Float_A=-48.782158;info_Float_R=-46.15216,-92.639305;info_Float_G=-7.5115204,74.78337,1.5983124;info_Float_.=26.825455;info_Flag_0;info_Character_1=i;info_Character_2=r,[;info_Character_A=g;info_Character_R=M,D;info_Character_G=h,w,\\;info_Character_.=C,G,p,];info_String_1=ZoXMT;info_String_2=gQouV,Gn`Jw;info_String_A=eVDDU;info_String_R=YytzA,ny[_P;info_String_G=Oshsq,bSjAd,bZcRF;info_String_.=rQ_[V,S^RtS,vzMeT,jonYV format_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_. -1552897203:1249370088,894744660:-1298826907:-1500526673,846767901:154354090,1292630937,-513388490:730433769,-1782228224,1193004039,1639963889:-31.463745:-74.13223,44.792007:-4.5392303:-42.586063,-20.249939:-19.714546,-48.754406,40.519638:-27.838158:L:J,L:n:u,P:t,f,`:r,^:aaSsw:svYGC,zkT\\W:k_sGD:gZcCc,]tIGE:bcnVW,JVaDB,nQSHY:[QBCg,L`Scx,xXYm`,NnOG[ -1345745815:173280036,-939420073:-1365650667:679852521,1295053734:732715199,-819759668,-308523151:1942972144,-249711286,1737760149:-53.047443:-97.35165,-58.53014:93.27409:-89.49225,65.68997:62.677032,92.94722,32.79944:52.132156,-30.33149:z:R,v:G:G,X:B,g,q:[,a,B:w_Zxx:kAFA[,o`OId:JgjZD:StKau,vtaIh:wmmrI,gNXcb,hRd]Q:OgukS 946791943:-2019035904,1055813342:-2045085244:-1401538285,878536766:731752434,1439145027,-966674455:-1096509554,-1513894259,1176983779,-199713084:51.48242:-93.36465,6.6719513:32.869843:-77.50437,-17.745377:38.63495,-9.558914,42.16661:-6.823944,-39.047478,48.595016,68.83052:w:O,m:A:i,Z:P,w,y:s:KBssX:JGMMK,`HVkg:oY`vk:xarZo,yTnQF:EntKU,mnaDW,uppug:FhYRx,BZHMq"; 439 | 440 | const NO_SAMPLE: &[u8] = b"YAR028W 509242864 . A . 224 filter_0 info_Integer_1=-1867486109;info_Integer_2=1180908492,1041698939;info_Integer_A=-207506017;info_Integer_R=-1221871790,-1356802783;info_Integer_G=;info_Integer_.=2082620030,-344161843,-1022296784,-1007334138;info_Float_1=68.286865;info_Float_2=-96.154594,-23.433853;info_Float_A=-48.782158;info_Float_R=-46.15216,-92.639305;info_Float_G=;info_Float_.=1.5983124,-8.867523,77.741455,-86.29277;info_Flag_0;info_Character_1=M;info_Character_2=i,r;info_Character_A=[;info_Character_R=g,M;info_Character_G=;info_Character_.=h;info_String_1=w\\voC;info_String_2=Gp]Zo,XMTgQ;info_String_A=ouVGn;info_String_R=`JweV,DDUYy;info_String_G=;info_String_.=zAny[,_POsh,sqbSj"; 441 | 442 | const NO_INFO: &[u8] = b"YAR028W 509242864 . A . 224 filter_0 format_Integer_1:format_Integer_2:format_Integer_A:format_Integer_R:format_Integer_G:format_Integer_.:format_Float_1:format_Float_2:format_Float_A:format_Float_R:format_Float_G:format_Float_.:format_Character_1:format_Character_2:format_Character_A:format_Character_R:format_Character_G:format_Character_.:format_String_1:format_String_2:format_String_A:format_String_R:format_String_G:format_String_. -1867486109:1180908492,1041698939:-207506017:-1221871790,-1356802783:-496257857,2127853583,-1498117423:2082620030,-344161843,-1022296784,-1007334138:68.286865:-96.154594,-23.433853:-48.782158:-46.15216,-92.639305:-7.5115204,74.78337,1.5983124:26.825455:L:M,i:r:[,g:M,D,h:C,G,p,]:ZoXMT:gQouV,Gn`Jw:eVDDU:YytzA,ny[_P:Oshsq,bSjAd,bZcRF:rQ_[V,S^RtS,vzMeT,jonYV -1552897203:1249370088,894744660:-1298826907:-1500526673,846767901:154354090,1292630937,-513388490:730433769,-1782228224,1193004039,1639963889:-31.463745:-74.13223,44.792007:-4.5392303:-42.586063,-20.249939:-19.714546,-48.754406,40.519638:-27.838158:L:J,L:n:u,P:t,f,`:r,^:aaSsw:svYGC,zkT\\W:k_sGD:gZcCc,]tIGE:bcnVW,JVaDB,nQSHY:[QBCg,L`Scx,xXYm`,NnOG[ -1345745815:173280036,-939420073:-1365650667:679852521,1295053734:732715199,-819759668,-308523151:1942972144,-249711286,1737760149:-53.047443:-97.35165,-58.53014:93.27409:-89.49225,65.68997:62.677032,92.94722,32.79944:52.132156,-30.33149:z:R,v:G:G,X:B,g,q:[,a,B:w_Zxx:kAFA[,o`OId:JgjZD:StKau,vtaIh:wmmrI,gNXcb,hRd]Q:OgukS"; 443 | 444 | const NO_INFO_SAMPLE: &[u8] = b"YAR028W 509242864 . A . 224 filter_0"; 445 | 446 | const NO_INFO_SAMPLE_FILTER: &[u8] = b"YAR028W 509242864 . A . 224 ."; 447 | 448 | const SET_ID: &[u8] = b"YAR028W 509242864 id_i_Pdz! A . 224 ."; 449 | 450 | const ID_0: &[u8] = b"YAR028W 509242864 . a . 114 ."; 451 | 452 | const LARGE_ALT: &[u8] = b"YAR028W\t509242864\t.\ta\tACA\t86\t."; 453 | 454 | #[test] 455 | fn default() -> error::Result<()> { 456 | let mut output = Vec::new(); 457 | let mut rng = crate::rand(); 458 | 459 | let generator = Record::builder().build(); 460 | 461 | generator.generate(&mut output, &mut rng)?; 462 | 463 | assert_eq!(output, DEFAULT); 464 | 465 | Ok(()) 466 | } 467 | 468 | #[test] 469 | fn no_sample() -> error::Result<()> { 470 | let mut output = Vec::new(); 471 | let mut rng = crate::rand(); 472 | 473 | let generator = Record::builder().sample(0).build(); 474 | 475 | generator.generate(&mut output, &mut rng)?; 476 | 477 | assert_eq!(output, NO_SAMPLE); 478 | 479 | Ok(()) 480 | } 481 | 482 | #[test] 483 | fn no_info() -> error::Result<()> { 484 | let mut output = Vec::new(); 485 | let mut rng = crate::rand(); 486 | 487 | let generator = Record::builder() 488 | .info_number(values::VcfInfoNumber::UserDefine(vec![])) 489 | .build(); 490 | 491 | generator.generate(&mut output, &mut rng)?; 492 | 493 | assert_eq!(output, NO_INFO); 494 | 495 | Ok(()) 496 | } 497 | 498 | #[test] 499 | fn no_info_sample() -> error::Result<()> { 500 | let mut output = Vec::new(); 501 | let mut rng = crate::rand(); 502 | 503 | let generator = Record::builder() 504 | .info_number(values::VcfInfoNumber::UserDefine(vec![])) 505 | .sample(0) 506 | .build(); 507 | 508 | generator.generate(&mut output, &mut rng)?; 509 | 510 | assert_eq!(output, NO_INFO_SAMPLE); 511 | 512 | Ok(()) 513 | } 514 | 515 | #[test] 516 | fn no_info_sample_filter() -> error::Result<()> { 517 | let mut output = Vec::new(); 518 | let mut rng = crate::rand(); 519 | 520 | let generator = Record::builder() 521 | .filter(values::Integer::UserDefine(0..0)) 522 | .info_number(values::VcfInfoNumber::UserDefine(vec![])) 523 | .format_number(values::VcfFormatNumber::UserDefine(vec![])) 524 | .sample(0) 525 | .build(); 526 | 527 | generator.generate(&mut output, &mut rng)?; 528 | 529 | assert_eq!(output, NO_INFO_SAMPLE_FILTER); 530 | 531 | Ok(()) 532 | } 533 | 534 | #[test] 535 | fn set_id() -> error::Result<()> { 536 | let mut output = Vec::new(); 537 | let mut rng = crate::rand(); 538 | 539 | let generator = Record::builder() 540 | .filter(values::Integer::UserDefine(0..0)) 541 | .info_number(values::VcfInfoNumber::UserDefine(vec![])) 542 | .format_number(values::VcfFormatNumber::UserDefine(vec![])) 543 | .sample(0) 544 | .id_len(5) 545 | .id(values::Alphabet::A2z) 546 | .id_prefix(b"id_".to_vec()) 547 | .id_suffix(b"!".to_vec()) 548 | .build(); 549 | 550 | generator.generate(&mut output, &mut rng)?; 551 | 552 | assert_eq!(output, SET_ID); 553 | 554 | Ok(()) 555 | } 556 | 557 | #[test] 558 | fn id_0() -> error::Result<()> { 559 | let mut output = Vec::new(); 560 | let mut rng = crate::rand(); 561 | 562 | let generator = Record::builder() 563 | .filter(values::Integer::UserDefine(0..0)) 564 | .info_number(values::VcfInfoNumber::UserDefine(vec![])) 565 | .format_number(values::VcfFormatNumber::UserDefine(vec![])) 566 | .sample(0) 567 | .id_len(0) 568 | .id(values::Alphabet::A2z) 569 | .build(); 570 | 571 | generator.generate(&mut output, &mut rng)?; 572 | 573 | assert_eq!(output, ID_0); 574 | 575 | Ok(()) 576 | } 577 | 578 | #[test] 579 | fn large_alt() -> error::Result<()> { 580 | let mut output = Vec::new(); 581 | let mut rng = crate::rand(); 582 | 583 | let generator = Record::builder() 584 | .filter(values::Integer::UserDefine(0..0)) 585 | .info_number(values::VcfInfoNumber::UserDefine(vec![])) 586 | .format_number(values::VcfFormatNumber::UserDefine(vec![])) 587 | .alternative_len(6) 588 | .sample(0) 589 | .id_len(0) 590 | .id(values::Alphabet::A2z) 591 | .build(); 592 | 593 | generator.generate(&mut output, &mut rng)?; 594 | 595 | assert_eq!(output, LARGE_ALT); 596 | 597 | Ok(()) 598 | } 599 | } 600 | --------------------------------------------------------------------------------