├── www ├── png.html.haml ├── input_files.html.haml ├── pipeline.html.haml ├── output_files.html.haml ├── status.html.haml ├── time.html.haml ├── layout.html.haml ├── overview.html.haml ├── command.html.haml └── css.html.haml ├── .gitignore ├── Gemfile ├── examples └── fastq_to_fasta.rb ├── lib ├── BioDSL │ ├── commands.rb │ ├── version.rb │ ├── helpers.rb │ ├── test.rb │ ├── debug.rb │ ├── verbose.rb │ ├── helpers │ │ ├── history_helper.rb │ │ ├── aux_helper.rb │ │ ├── log_helper.rb │ │ └── email_helper.rb │ ├── seq │ │ ├── homopolymer.rb │ │ ├── ambiguity.rb │ │ ├── digest.rb │ │ └── levenshtein.rb │ ├── math.rb │ ├── config.rb │ ├── fastq.rb │ ├── tmp_dir.rb │ ├── stream.rb │ ├── serializer.rb │ └── commands │ │ ├── reverse_seq.rb │ │ ├── merge_values.rb │ │ └── complement_seq.rb └── BioDSL.rb ├── test ├── BioDSL │ ├── test_usearch.rb │ ├── test_debug.rb │ ├── test_test.rb │ ├── test_verbose.rb │ ├── test_math.rb │ ├── test_command.rb │ ├── test_fastq.rb │ ├── commands │ │ ├── test_classify_seq.rb │ │ ├── test_genecall.rb │ │ ├── test_assemble_seq_idba.rb │ │ ├── test_assemble_seq_spades.rb │ │ ├── test_assemble_seq_ray.rb │ │ ├── test_index_taxonomy.rb │ │ ├── test_classify_seq_mothur.rb │ │ ├── test_reverse_seq.rb │ │ ├── test_random.rb │ │ ├── test_complement_seq.rb │ │ ├── test_merge_values.rb │ │ ├── test_count_values.rb │ │ ├── test_collapse_otus.rb │ │ ├── test_unique_values.rb │ │ ├── test_collect_otus.rb │ │ ├── test_degap_seq.rb │ │ ├── test_split_values.rb │ │ ├── test_dereplicate_seq.rb │ │ ├── test_align_seq_mothur.rb │ │ ├── test_mask_seq.rb │ │ └── test_count.rb │ ├── test_stream.rb │ ├── test_tmp_dir.rb │ ├── seq │ │ ├── test_homopolymer.rb │ │ ├── test_translate.rb │ │ └── test_digest.rb │ ├── test_serializer.rb │ ├── test_fork.rb │ └── test_mummer.rb └── helper.rb ├── BioDSL.gemspec └── Rakefile /www/png.html.haml: -------------------------------------------------------------------------------- 1 | %p 2 | %img{:src => png_data}/ 3 | -------------------------------------------------------------------------------- /www/input_files.html.haml: -------------------------------------------------------------------------------- 1 | %h3 Input 2 | - files.each do |file| 3 | %p #{file} 4 | -------------------------------------------------------------------------------- /www/pipeline.html.haml: -------------------------------------------------------------------------------- 1 | .section 2 | %h2 Pipeline 3 | .command 4 | = pipeline 5 | -------------------------------------------------------------------------------- /www/output_files.html.haml: -------------------------------------------------------------------------------- 1 | %h3 Output 2 | %p 3 | %a{:href => options[:output].split('/').last} #{options[:output].split('/').last} 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.gem 3 | mothur.*.logfile 4 | coverage/ 5 | doc/ 6 | pkg/ 7 | .yardoc/ 8 | .tags* 9 | tags 10 | 8mer 11 | Gemfile.lock 12 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # A sample Gemfile 2 | source "https://rubygems.org" 3 | 4 | gem 'bundler' #, '1.7.4' 5 | gem 'mocha' #, '1.0.0' 6 | gem 'simplecov' #, '0.9.2' 7 | -------------------------------------------------------------------------------- /www/status.html.haml: -------------------------------------------------------------------------------- 1 | %h3 Status: #{exit_status} 2 | %table 3 | - statsus.each do |key, val| 4 | %tr 5 | %td.bold #{key} 6 | - if val.is_a? Numeric 7 | %td.right #{val.commify} 8 | - else 9 | %td.right #{val} 10 | -------------------------------------------------------------------------------- /examples/fastq_to_fasta.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'BioDSL' 4 | 5 | # Read in sequences in FASTQ format from the file `test.fq` and save them in 6 | # FASTA format in the file `test.fna`. 7 | 8 | BD.new.read_fastq(input: "test.fq").write_fasta(output: "test.fna").run 9 | -------------------------------------------------------------------------------- /www/time.html.haml: -------------------------------------------------------------------------------- 1 | %h3 Time 2 | %table 3 | %tr 4 | %td.bold Time start 5 | %td.right #{status[:time_start]} 6 | %tr 7 | %td.bold Time stop 8 | %td.right #{status[:time_stop]} 9 | %tr 10 | %td.bold Time elapsed: 11 | %td.right #{status[:time_elapsed]} 12 | -------------------------------------------------------------------------------- /www/layout.html.haml: -------------------------------------------------------------------------------- 1 | !!! 2 | %head 3 | %title BioDSL report 4 | = render_css 5 | %body 6 | %h1.center BioDSL report 7 | %p.center.bold Version #{BioDSL::VERSION} 8 | %p.center.bold Generated #{Time.now} 9 | = render_pipeline pipeline 10 | = render_overview commands 11 | - commands.each_with_index do |command, i| 12 | = render_command command, i 13 | -------------------------------------------------------------------------------- /www/overview.html.haml: -------------------------------------------------------------------------------- 1 | .section 2 | %h2 Overview 3 | %table 4 | %tr.left 5 | %th.bold Command 6 | %th.bold Records in 7 | %th.bold Records out 8 | %th.bold Time elapsed 9 | - commands.each_with_index do |command, i| 10 | %tr 11 | %td 12 | %a{:href => "##{command.name}#{i}"} #{command.name} 13 | %td.right #{command.status[:records_in].commify} 14 | %td.right #{command.status[:records_out].commify} 15 | %td.right #{command.status[:time_elapsed]} 16 | -------------------------------------------------------------------------------- /www/command.html.haml: -------------------------------------------------------------------------------- 1 | .section 2 | %a{:name => "#{command.name}#{index}"} 3 | %h2 4 | Command: #{command.name} 5 | %a{:href => help_url(command.name), :title => "Click for help on #{command.name}", :target => "_blank"} ? 6 | %p.command #{command.to_s} 7 | = render_status command 8 | = render_time command.status 9 | - if input? command.options 10 | = render_input_files command.options 11 | - if output? command.options 12 | = render_output_files command.options 13 | - if png? command.options 14 | = render_png command.options 15 | -------------------------------------------------------------------------------- /www/css.html.haml: -------------------------------------------------------------------------------- 1 | :css 2 | .center { 3 | text-align: center; 4 | } 5 | 6 | .right { 7 | text-align: right; 8 | } 9 | 10 | .left { 11 | text-align: left; 12 | } 13 | 14 | .bold { 15 | font-weight: bold; 16 | } 17 | 18 | img { 19 | display: block; 20 | margin-left: auto; 21 | margin-right: auto; 22 | } 23 | 24 | table { 25 | border-collapse: collapse; 26 | } 27 | 28 | th { 29 | padding-left: 10px; 30 | padding-right: 10px; 31 | } 32 | 33 | td { 34 | padding-left: 10px; 35 | padding-right: 10px; 36 | } 37 | 38 | body { 39 | font-family: Arial, Helvetica, Verdana, sans-serif; 40 | min-width: 1000px; 41 | margin-left: 10%; 42 | margin-right: 10%; 43 | } 44 | 45 | .section { 46 | padding-bottom: 1em; 47 | border-bottom: 2px solid #ccc; 48 | } 49 | 50 | .command { 51 | font-family: Courier, monospace, sans-serif; 52 | border: 1px solid black; 53 | padding: 1em; 54 | white-space: pre; 55 | } 56 | -------------------------------------------------------------------------------- /lib/BioDSL/commands.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # http://www.gnu.org/copyleft/gpl.html # 16 | # # 17 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 18 | # # 19 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 20 | # # 21 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 22 | 23 | module BioDSL 24 | # Module that require all files in the BioDSL/commands/ directory 25 | module Commands 26 | Dir[File.join(File.dirname(__FILE__), 'commands', '*')].each do |file| 27 | require file.split(File::SEPARATOR)[-3..-1].join(File::SEPARATOR). 28 | chomp('.rb') 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /lib/BioDSL/version.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | VERSION = '1.0.2' 31 | end 32 | -------------------------------------------------------------------------------- /test/BioDSL/test_usearch.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | class TestUsearch < Test::Unit::TestCase 34 | end 35 | -------------------------------------------------------------------------------- /lib/BioDSL/helpers.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | require 'BioDSL/helpers/history_helper' 31 | require 'BioDSL/helpers/log_helper' 32 | require 'BioDSL/helpers/options_helper' 33 | require 'BioDSL/helpers/status_helper' 34 | require 'BioDSL/helpers/aux_helper' 35 | end 36 | -------------------------------------------------------------------------------- /lib/BioDSL/test.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Class variabel visible across the BioDSL module scope. 31 | @@test = false 32 | 33 | # Class variable getter method. 34 | def self.test 35 | @@test 36 | end 37 | 38 | # Class variable setter method. 39 | def self.test=(x) 40 | @@test = x 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/BioDSL/debug.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Class variabel visible across the BioDSL module scope. 31 | @@debug = false 32 | 33 | # Class variable getter method. 34 | def self.debug 35 | @@debug 36 | end 37 | 38 | # Class variable setter method. 39 | def self.debug=(x) 40 | @@debug = x 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/BioDSL/verbose.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Class variabel visible across the BioDSL module scope. 31 | @@verbose = false 32 | 33 | # Class variable getter method. 34 | def self.verbose 35 | @@verbose 36 | end 37 | 38 | # Class variable setter method. 39 | def self.verbose=(x) 40 | @@verbose = x 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/BioDSL/helpers/history_helper.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # Namespace for HistoryHelper. 30 | module HistoryHelper 31 | # Save pipeline to history file unless test is set. 32 | def save_history 33 | return if BioDSL.test 34 | 35 | File.open(BioDSL::Config::HISTORY_FILE, 'a') do |ios| 36 | ios.puts to_s 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/BioDSL/test_debug.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | class DebugTest < Test::Unit::TestCase 34 | def teardown 35 | BioDSL.debug = false 36 | end 37 | 38 | test 'BioDSL::debug returns correctly' do 39 | BioDSL.debug = true 40 | assert_equal(true, BioDSL.debug) 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/BioDSL/test_test.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Test. 34 | class TestTest < Test::Unit::TestCase 35 | def teardown 36 | BioDSL.test = false 37 | end 38 | 39 | test 'BioDSL::test returns correctly' do 40 | BioDSL.test = true 41 | assert_equal(true, BioDSL.test) 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /test/BioDSL/test_verbose.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Verbose. 34 | class VerboseTest < Test::Unit::TestCase 35 | def teardown 36 | BioDSL.verbose = false 37 | end 38 | 39 | test 'BioDSL::verbose returns correctly' do 40 | BioDSL.verbose = true 41 | assert_equal(true, BioDSL.verbose) 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /test/BioDSL/test_math.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Math. 34 | class MathTest < Test::Unit::TestCase 35 | test 'BioDSL::Math#dist_point2point returns correctly' do 36 | assert_equal(1.5, BioDSL::Math.dist_point2point(1.0, 1.0, 1.0, 2.5)) 37 | end 38 | 39 | test 'BioDSL::Math#dist_point2line returns correctly' do 40 | assert_equal(1.5, BioDSL::Math.dist_point2line(3, 3, 0, 4.5, 5, 4.5)) 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/BioDSL/helpers/aux_helper.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Namespace for AuxHelper. 31 | module AuxHelper 32 | BioDSL::AuxiliaryError = Class.new(StandardError) 33 | 34 | # Method that raises if the given command is not found on the system. 35 | # 36 | # @param command [String] Command that must exist. 37 | # 38 | # @raise [AuxiliaryError] if command is not found. 39 | def aux_exist(command) 40 | return if BioDSL::Filesys.which(command) 41 | fail AuxiliaryError, "command: #{command} not found" 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /test/BioDSL/test_command.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 6 | # # 7 | # This program is free software; you can redistribute it and/or # 8 | # modify it under the terms of the GNU General Public License # 9 | # as published by the Free Software Foundation; either version 2 # 10 | # of the License, or (at your option) any later version. # 11 | # # 12 | # This program is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU General Public License for more details. # 16 | # # 17 | # You should have received a copy of the GNU General Public License # 18 | # along with this program; if not, write to the Free Software # 19 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 20 | # USA. # 21 | # # 22 | # http://www.gnu.org/copyleft/gpl.html # 23 | # # 24 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 25 | # # 26 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 27 | # # 28 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 29 | 30 | require 'test/helper' 31 | 32 | # Test class for Command. 33 | class CommandTest < Test::Unit::TestCase 34 | test 'BioDSL::Command#to_s w/o options returns OK' do 35 | command = BioDSL::Command.new('dump', nil, {}) 36 | expected = %(dump) 37 | assert_equal(expected, command.to_s) 38 | end 39 | 40 | test 'BioDSL::Command#to_s with options returns OK' do 41 | command = BioDSL::Command.new('read_fasta', nil, input: 'test.fna') 42 | expected = %{read_fasta(input: "test.fna")} 43 | assert_equal(expected, command.to_s) 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /test/BioDSL/test_fastq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | class FastqTest < Test::Unit::TestCase 34 | def setup 35 | @io = StringIO.new("@test1\nATCG\n+\nABCD\n@test2\natcg\n+test2\n@ABG\n") 36 | @fastq = BioDSL::Fastq.new(@io) 37 | end 38 | 39 | test "#next_entry obtains the correct seq_name" do 40 | assert_equal("test1", @fastq.next_entry.seq_name) 41 | end 42 | 43 | test "#next_entry with two entries obtain correct sequences" do 44 | assert_equal("ATCG", @fastq.next_entry.seq) 45 | assert_equal("atcg", @fastq.next_entry.seq) 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_classify_seq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for ClassifySeq. 34 | class TestClassifySeq < Test::Unit::TestCase 35 | def setup 36 | @p = BD.new 37 | end 38 | 39 | test 'BioDSL::Pipeline#classify_seq with disallowed option raises' do 40 | assert_raise(BioDSL::OptionError) do 41 | @p.classify_seq(dir: Dir.pwd, foo: 'bar') 42 | end 43 | end 44 | 45 | test 'BioDSL::Pipeline#classify_seq with allowed option dont raise' do 46 | assert_nothing_raised { @p.classify_seq(dir: Dir.pwd) } 47 | end 48 | 49 | # TODO: write tests! 50 | end 51 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_genecall.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Genecall. 34 | class TestGenecall < Test::Unit::TestCase 35 | def setup 36 | omit('prodigal not found') unless BioDSL::Filesys.which('ray') 37 | 38 | @p = BioDSL::Pipeline.new 39 | end 40 | 41 | test 'BioDSL::Pipeline::Genecall with invalid options raises' do 42 | assert_raise(BioDSL::OptionError) { @p.assemble_seq_ray(foo: 'bar') } 43 | end 44 | 45 | test 'BioDSL::Pipeline::Genecall with valid options don\'t raise' do 46 | assert_nothing_raised { @p.assemble_seq_ray(cpus: 1) } 47 | end 48 | 49 | # FIXME: tests missing! 50 | end 51 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_assemble_seq_idba.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for AssembleSeqIdba. 34 | class TestAssembleSeqIdba < Test::Unit::TestCase 35 | def setup 36 | omit('idba_ud not found') unless BioDSL::Filesys.which('idba_ud') 37 | 38 | @p = BioDSL::Pipeline.new 39 | end 40 | 41 | test 'BioDSL::Pipeline::AssembleSeqIdba with invalid options raises' do 42 | assert_raise(BioDSL::OptionError) { @p.assemble_seq_idba(foo: 'bar') } 43 | end 44 | 45 | test 'BioDSL::Pipeline::AssembleSeqIdba with valid options don\'t raise' do 46 | assert_nothing_raised { @p.assemble_seq_idba(cpus: 1) } 47 | end 48 | 49 | # FIXME: tests missing! 50 | end 51 | -------------------------------------------------------------------------------- /lib/BioDSL/helpers/log_helper.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # Namespace for LogHelper. 30 | module LogHelper 31 | require 'yaml' 32 | 33 | # Log an OK messge to the log file. 34 | def log_ok 35 | return if BioDSL.test 36 | 37 | File.open(BioDSL::Config::LOG_FILE, 'a') do |ios| 38 | ios.puts to_s 39 | ios.puts status.to_yaml 40 | ios.puts 'OK' 41 | end 42 | end 43 | 44 | # Log an ERROR messge to the log file. 45 | def log_error(exception) 46 | File.open(BioDSL::Config::LOG_FILE, 'a') do |ios| 47 | ios.puts to_s 48 | ios.puts status.to_yaml if self.respond_to? :status 49 | ios.puts 'ERROR' 50 | ios.puts exception.message 51 | ios.puts exception.backtrace 52 | end 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_assemble_seq_spades.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for AssembleSeqSpandes. 34 | class TestAssembleSeqSpades < Test::Unit::TestCase 35 | def setup 36 | omit('spades.py not found') unless BioDSL::Filesys.which('spades.py') 37 | 38 | @p = BioDSL::Pipeline.new 39 | end 40 | 41 | test 'BioDSL::Pipeline::AssembleSeqSpades with invalid options raises' do 42 | assert_raise(BioDSL::OptionError) { @p.assemble_seq_spades(foo: 'bar') } 43 | end 44 | 45 | test 'BioDSL::Pipeline::AssembleSeqSpades with OK options dont raise' do 46 | assert_nothing_raised { @p.assemble_seq_spades(cpus: 1) } 47 | end 48 | 49 | # TODO: Fix missing testing here! 50 | end 51 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_assemble_seq_ray.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for AssembleSeqRay. 34 | class TestAssembleSeqRay < Test::Unit::TestCase 35 | def setup 36 | omit('ray not found') unless BioDSL::Filesys.which('ray') 37 | omit('mpiexec not found') unless BioDSL::Filesys.which('mpiexec') 38 | 39 | @p = BioDSL::Pipeline.new 40 | end 41 | 42 | test 'BioDSL::Pipeline::AssembleSeqRay with invalid options raises' do 43 | assert_raise(BioDSL::OptionError) { @p.assemble_seq_ray(foo: 'bar') } 44 | end 45 | 46 | test 'BioDSL::Pipeline::AssembleSeqRay with valid options don\'t raise' do 47 | assert_nothing_raised { @p.assemble_seq_ray(cpus: 1) } 48 | end 49 | 50 | # FIXME: tests missing! 51 | end 52 | -------------------------------------------------------------------------------- /lib/BioDSL/seq/homopolymer.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Error class for all exceptions to do with Homopolymer. 31 | class HomopolymerError < StandardError; end 32 | 33 | # Namespace for Homopolymer 34 | module Homopolymer 35 | def each_homopolymer(min = 1) 36 | fail HomopolymerError, "Bad min value: #{min}" if min <= 0 37 | list = [] 38 | 39 | regex = Regexp.new("A{#{min},}|T{#{min},}|G{#{min},}|C{#{min},}|" \ 40 | "N{#{min},}") 41 | 42 | @seq.upcase.scan(regex) do |match| 43 | hp = Homopolymer.new(match, match.length, $`.length) 44 | 45 | if block_given? 46 | yield hp 47 | else 48 | list << hp 49 | end 50 | end 51 | 52 | block_given? ? self : list 53 | end 54 | 55 | Homopolymer = Struct.new(:pattern, :length, :pos) 56 | end 57 | end 58 | -------------------------------------------------------------------------------- /test/BioDSL/test_stream.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | class TestStream < Test::Unit::TestCase 34 | def setup 35 | @obj = {foo: "bar"} 36 | @reader, @writer = BioDSL::Stream.pipe 37 | end 38 | 39 | def teardown 40 | @reader.close unless @reader.closed? 41 | @writer.close unless @writer.closed? 42 | end 43 | 44 | test "BioDSL::Stream.pipe writing and reading an object returns correctly" do 45 | @writer.write @obj 46 | @writer.close 47 | assert_equal(@obj, @reader.read) 48 | end 49 | 50 | test "BioDSL::Stream.pipe writing and reading multiple object returns correctly" do 51 | 10.times { @writer.write @obj } 52 | @writer.close 53 | result = @reader.inject([]) { |memo, obj| memo << obj } 54 | assert_equal(Array.new(10, @obj), result) 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /test/BioDSL/test_tmp_dir.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for TmpDir 34 | class TmpDirTest < Test::Unit::TestCase 35 | test 'BioDSL::TmpDir#create with no files returns correctly' do 36 | dir = '' 37 | 38 | BioDSL::TmpDir.create do |tmp_dir| 39 | dir = tmp_dir 40 | assert_true(File.directory? dir) 41 | end 42 | 43 | assert_false(File.directory? dir) 44 | end 45 | 46 | test 'BioDSL::TmpDir#create with files returns correctly' do 47 | dir = '' 48 | 49 | BioDSL::TmpDir.create('foo', 'bar') do |foo, bar, tmp_dir| 50 | dir = tmp_dir 51 | assert_true(File.directory? dir) 52 | assert_equal(File.join(dir, 'foo'), foo) 53 | assert_equal(File.join(dir, 'bar'), bar) 54 | end 55 | 56 | assert_false(File.directory? dir) 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/BioDSL/math.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Adding methods to Math module. 31 | module Math 32 | # Class method to calculate the distance from at point to a line. 33 | # The point and line are given as pairs of coordinates. 34 | def self.dist_point2line( 35 | px, # point x coordinate 36 | py, # point y coordinate 37 | x1, # line 1 x coordinate 38 | y1, # line 1 y coordinate 39 | x2, # line 2 x coordinate 40 | y2 # line 2 y coordinate 41 | ) 42 | 43 | a = (y2 - y1).to_f / (x2 - x1).to_f 44 | b = y1 - a * x1 45 | 46 | (a * px + b - py).abs / ::Math.sqrt(a**2 + 1) 47 | end 48 | 49 | # Class method to calculate the distance between two points given 50 | # as pairs of coordinates. 51 | def self.dist_point2point(x1, y1, x2, y2) 52 | ::Math.sqrt((x2.to_f - x1.to_f)**2 + (y2.to_f - y1.to_f)**2) 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/BioDSL/config.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # Module with Config constants. 30 | module Config 31 | require 'parallel' 32 | require 'BioDSL/helpers/options_helper' 33 | 34 | extend OptionsHelper 35 | 36 | HISTORY_FILE = File.join(ENV['HOME'], '.BioDSL_history') 37 | LOG_FILE = File.join(ENV['HOME'], '.BioDSL_log') 38 | RC_FILE = File.join(ENV['HOME'], '.BioDSLrc') 39 | STATUS_PROGRESS_INTERVAL = 0.1 # update progress every n second. 40 | 41 | options = options_load_rc({}, :pipeline) 42 | 43 | TMP_DIR = if options && !options[:tmp_dir].empty? 44 | options[:tmp_dir].first 45 | else 46 | Dir.tmpdir 47 | end 48 | 49 | CORES_MAX = if options && !options[:processor_count].empty? 50 | options[:processor_count].first.to_i 51 | else 52 | Parallel.processor_count 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_index_taxonomy.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for IndexTaxonomy. 34 | class TestIndexTaxonomy < Test::Unit::TestCase 35 | def setup 36 | @tmpdir = Dir.mktmpdir('BioDSL') 37 | 38 | @input, @output = BioDSL::Stream.pipe 39 | @input2, @output2 = BioDSL::Stream.pipe 40 | 41 | @p = BioDSL::Pipeline.new 42 | end 43 | 44 | def teardown 45 | FileUtils.rm_r @tmpdir 46 | end 47 | 48 | test 'BioDSL::Pipeline::IndexTaxonomy with invalid options raises' do 49 | assert_raise(BioDSL::OptionError) do 50 | @p.index_taxonomy(output_dir: @tmpdir, foo: 'bar') 51 | end 52 | end 53 | 54 | test 'BioDSL::Pipeline::IndexTaxonomy with valid options don\'t raise' do 55 | assert_nothing_raised do 56 | @p.index_taxonomy(output_dir: @tmpdir, kmer_size: 8, step_size: 1, 57 | prefix: 'foo') 58 | end 59 | end 60 | 61 | # TODO: write some tests! 62 | end 63 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_classify_seq_mothur.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for ClassifySeqMothur. 34 | class TestClassifySeqMothur < Test::Unit::TestCase 35 | def setup 36 | omit('mothur not found') unless BioDSL::Filesys.which('mothur') 37 | 38 | @p = BD.new 39 | @database = __FILE__ 40 | @taxonomy = __FILE__ 41 | end 42 | 43 | test 'BioDSL::Pipeline#classify_seq_mothur with disallowed option fail' do 44 | assert_raise(BioDSL::OptionError) do 45 | @p.classify_seq_mothur(database: @database, taxonomy: @taxonomy, 46 | foo: 'bar') 47 | end 48 | end 49 | 50 | test 'BioDSL::Pipeline#classify_seq_mothur w. allowed option dont fail' do 51 | assert_nothing_raised do 52 | @p.classify_seq_mothur(database: @database, taxonomy: @taxonomy, cpus: 2) 53 | end 54 | end 55 | 56 | # test "BioDSL::Pipeline#classify_seq_mothur outputs correctly" do 57 | # # TODO: mock this sucker. 58 | # end 59 | end 60 | -------------------------------------------------------------------------------- /test/BioDSL/seq/test_homopolymer.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Homopolymer. 34 | class TestHomopolymer < Test::Unit::TestCase 35 | def setup 36 | @entry = BioDSL::Seq.new(seq: 'atcgatTTTTTTcggttga') 37 | end 38 | 39 | test '#each_homopolymer with bad min raises' do 40 | assert_raise(BioDSL::HomopolymerError) { @entry.each_homopolymer(0) } 41 | assert_raise(BioDSL::HomopolymerError) { @entry.each_homopolymer(-1) } 42 | end 43 | 44 | test '#each_homopolymer returns correctly' do 45 | hps = @entry.each_homopolymer(3) 46 | assert_equal(1, hps.size) 47 | assert_equal(7, hps.first.length) 48 | assert_equal('TTTTTTT', hps.first.pattern) 49 | assert_equal(5, hps.first.pos) 50 | end 51 | 52 | test '#each_homopolymer in block context returns correctly' do 53 | @entry.each_homopolymer(3) do |hp| 54 | assert_equal(7, hp.length) 55 | assert_equal('TTTTTTT', hp.pattern) 56 | assert_equal(5, hp.pos) 57 | break 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /test/BioDSL/test_serializer.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Serializer. 34 | class TestSerializer < Test::Unit::TestCase 35 | def setup 36 | @records = [ 37 | {'foo' => 1}, 38 | {'bar' => 2} 39 | ] 40 | end 41 | 42 | test 'BioDSL::Serializer with no block raises' do 43 | assert_raise(BioDSL::SerializerError) { BioDSL::Serializer.new('foo') } 44 | end 45 | 46 | test 'BioDSL::Serializer returns correctly' do 47 | require 'tempfile' 48 | 49 | file = Tempfile.new('serializer') 50 | 51 | begin 52 | File.open(file, 'wb') do |io| 53 | BioDSL::Serializer.new(io) do |s| 54 | @records.each { |r| s << r } 55 | end 56 | end 57 | 58 | result = [] 59 | 60 | File.open(file, 'rb') do |io| 61 | BioDSL::Serializer.new(io) do |s| 62 | s.each do |record| 63 | result << record 64 | end 65 | end 66 | end 67 | 68 | assert_equal(@records, result) 69 | ensure 70 | file.close 71 | file.unlink 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | require 'simplecov' 29 | require 'English' 30 | 31 | if ENV['SIMPLECOV'] 32 | SimpleCov.start do 33 | add_filter '/test/' 34 | end 35 | 36 | SimpleCov.command_name 'test:units' 37 | end 38 | 39 | require 'pp' 40 | require 'tempfile' 41 | require 'fileutils' 42 | require 'BioDSL' 43 | require 'test/unit' 44 | require 'mocha/test_unit' 45 | 46 | ENV['BD_TEST'] = 'true' 47 | 48 | # Kernel namespace 49 | module Kernel 50 | def capture_stdout 51 | out = StringIO.new 52 | $stdout = out 53 | yield 54 | return out.string 55 | ensure 56 | $stdout = STDOUT 57 | end 58 | 59 | def capture_stderr 60 | out = StringIO.new 61 | $stderr = out 62 | yield 63 | return out.string 64 | ensure 65 | $stderr = STDERR 66 | end 67 | end 68 | 69 | # Patching TestCase 70 | class Test::Unit::TestCase 71 | # Ruby 2.2 have omit, < 2.2 have skip 72 | alias_method :omit, :skip unless instance_methods.include? :omit 73 | 74 | def self.test(desc, &impl) 75 | define_method("test #{desc}", &impl) 76 | end 77 | 78 | def collect_result 79 | @input2.each_with_object('') { |e, a| a << "#{e}#{$RS}" } 80 | end 81 | 82 | def collect_sorted_result 83 | @input2.sort_by(&:to_s).each_with_object('') { |e, a| a << "#{e}#{$RS}" } 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /lib/BioDSL/fastq.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Error class for all exceptions to do with FASTQ. 31 | class FastqError < StandardError; end 32 | 33 | # Class for parsing FASTQ entries from an ios and return as Seq objects. 34 | class Fastq < BioDSL::Filesys 35 | def self.open(*args) 36 | ios = IO.open(*args) 37 | 38 | if block_given? 39 | begin 40 | yield new(ios) 41 | ensure 42 | ios.close 43 | end 44 | else 45 | return new(ios) 46 | end 47 | end 48 | 49 | def initialize(io) 50 | @io = io 51 | end 52 | 53 | def each 54 | while (entry = next_entry) 55 | yield entry 56 | end 57 | end 58 | 59 | # Method to get the next FASTQ entry from an ios and return this 60 | # as a Seq object. If no entry is found or eof then nil is returned. 61 | def next_entry 62 | return nil if @io.eof? 63 | seq_name = @io.gets[1..-2] 64 | seq = @io.gets.chomp 65 | @io.gets 66 | qual = @io.gets.chomp 67 | 68 | Seq.new(seq_name: seq_name, seq: seq, qual: qual) 69 | end 70 | 71 | # Class for FASTQ IO. 72 | class IO < Filesys 73 | def each 74 | until @io.eof? 75 | yield @io.gets 76 | end 77 | end 78 | end 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /lib/BioDSL/tmp_dir.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 3 | # # 4 | # This program is free software; you can redistribute it and/or # 5 | # modify it under the terms of the GNU General Public License # 6 | # as published by the Free Software Foundation; either version 2 # 7 | # of the License, or (at your option) any later version. # 8 | # # 9 | # This program is distributed in the hope that it will be useful, # 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 12 | # GNU General Public License for more details. # 13 | # # 14 | # You should have received a copy of the GNU General Public License # 15 | # along with this program; if not, write to the Free Software # 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 17 | # USA. # 18 | # # 19 | # http://www.gnu.org/copyleft/gpl.html # 20 | # # 21 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 22 | # # 23 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | module BioDSL 27 | # Module to provide a temporary directory. 28 | module TmpDir 29 | require 'tempfile' 30 | 31 | # Create a temporary directory in block context. The directory is deleted 32 | # when the TmpDir object is garbage collected or the Ruby intepreter exits. 33 | # If called with a list of filenames, these are provided as block arguments 34 | # such that the files parent are the temporary directory. However, the last 35 | # block argument is always the path to the temporary directory. 36 | # 37 | # @param files [Array] List of file names. 38 | # 39 | # @example 40 | # BioDSL::TmpDir.create do |dir| 41 | # puts dir 42 | # # => "" 43 | # end 44 | # 45 | # @example 46 | # BioDSL::TmpDir.create("foo", "bar") do |foo, bar, dir| 47 | # puts foo 48 | # # => "/foo" 49 | # puts bar 50 | # # => "/foo" 51 | # puts dir 52 | # # => "" 53 | # end 54 | def self.create(*files, &block) 55 | fail 'no block given' unless block 56 | 57 | Dir.mktmpdir(nil, BioDSL::Config::TMP_DIR) do |dir| 58 | paths = files.each_with_object([]) { |e, a| a << File.join(dir, e) } 59 | 60 | if paths.empty? 61 | block.call(dir) 62 | else 63 | block.call(paths << dir) 64 | end 65 | end 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/BioDSL.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | fail 'Ruby 2.0 or later required' if RUBY_VERSION < '2.0' 29 | 30 | # Commify numbers. 31 | class Numeric 32 | def commify 33 | to_s.gsub(/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/, '\1,') 34 | end 35 | end 36 | 37 | # Convert string to float or integer if applicable. 38 | class String 39 | def to_num 40 | Integer(self) 41 | to_i 42 | rescue ArgumentError 43 | begin 44 | Float(self) 45 | to_f 46 | rescue ArgumentError 47 | self 48 | end 49 | end 50 | end 51 | 52 | # Namespace for BioDSL. 53 | module BioDSL 54 | require 'pp' 55 | require 'BioDSL/cary' 56 | require 'BioDSL/commands' 57 | require 'BioDSL/debug' 58 | require 'BioDSL/helpers' 59 | require 'BioDSL/seq' 60 | require 'BioDSL/config' 61 | require 'BioDSL/hamming' 62 | require 'BioDSL/version' 63 | require 'BioDSL/filesys' 64 | require 'BioDSL/csv' 65 | require 'BioDSL/fork' 66 | require 'BioDSL/html_report' 67 | require 'BioDSL/pipeline' 68 | require 'BioDSL/fasta' 69 | require 'BioDSL/fastq' 70 | require 'BioDSL/math' 71 | require 'BioDSL/mummer' 72 | require 'BioDSL/taxonomy' 73 | require 'BioDSL/tmp_dir' 74 | require 'BioDSL/serializer' 75 | require 'BioDSL/stream' 76 | require 'BioDSL/test' 77 | require 'BioDSL/usearch' 78 | require 'BioDSL/verbose' 79 | end 80 | 81 | BD = BioDSL::Pipeline # Module alias for irb short hand 82 | -------------------------------------------------------------------------------- /lib/BioDSL/helpers/email_helper.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # Namespace for EmailHelper. 30 | module EmailHelper 31 | # Send email notification to email address specfied in @options[:email], 32 | # including a optional subject specified in @options[:subject], that will 33 | # otherwise default to self.to_s. The body of the email will be an HTML 34 | # report. 35 | # 36 | # @param pipeline [BioDSL::Pipeline] Pipeline object 37 | def send_email(pipeline) 38 | return unless @options[:email] 39 | 40 | html_part = Mail::Part.new do 41 | content_type 'text/html; charset=UTF-8' 42 | body BioDSL::HtmlReport.new(pipeline).to_html 43 | end 44 | 45 | compose_mail(html_part).deliver! 46 | end 47 | 48 | # Compose an email. 49 | # 50 | # @param html_part [Mail::Part] The email body. 51 | # 52 | # @return [Mail] Mail to be sent. 53 | def compose_mail(html_part) 54 | mail = Mail.new 55 | mail[:from] = "do-not-reply@#{`hostname -f`.strip}" 56 | mail[:to] = @options[:email] 57 | mail[:subject] = @options[:subject] || to_s.first(30) 58 | mail.html_part = html_part 59 | mail.delivery_method :smtp, 60 | address: 'localhost', 61 | port: 25, 62 | enable_starttls_auto: false 63 | mail 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /test/BioDSL/test_fork.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | class TestFork < Test::Unit::TestCase 34 | def setup 35 | @obj = {foo: "bar"} 36 | end 37 | 38 | test "BioDSL::Fork.new without block raises" do 39 | assert_raise(ArgumentError) { BioDSL::Fork.new } 40 | end 41 | 42 | test "BioDSL::Fork.read with no running fork raises" do 43 | parent = BioDSL::Fork.new do |child| 44 | end 45 | 46 | assert_raise(BioDSL::ForkError) { parent.read } 47 | end 48 | 49 | test "BioDSL::Fork.write with no running fork raises" do 50 | parent = BioDSL::Fork.new do |child| 51 | end 52 | 53 | assert_raise(BioDSL::ForkError) { parent.write @obj } 54 | end 55 | 56 | test "BioDSL::Fork.wait with no running fork raises" do 57 | parent = BioDSL::Fork.new do |child| 58 | end 59 | 60 | assert_raise(BioDSL::ForkError) { parent.wait } 61 | end 62 | 63 | test "BioDSL::Fork.wait with running fork don't raise" do 64 | parent = BioDSL::Fork.execute do |child| 65 | end 66 | 67 | assert_nothing_raised { parent.wait } 68 | end 69 | 70 | test "BioDSL::Fork IPC returns correctly" do 71 | parent = BioDSL::Fork.execute do |child| 72 | obj = child.read 73 | obj[:child] = true 74 | child.write obj 75 | end 76 | 77 | parent.write @obj 78 | parent.output.close 79 | 80 | result = parent.read 81 | 82 | parent.wait 83 | 84 | assert_equal({foo: "bar", child: true}, result) 85 | end 86 | end 87 | -------------------------------------------------------------------------------- /test/BioDSL/seq/test_translate.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Translate. 34 | class TestTranslate < Test::Unit::TestCase 35 | def setup 36 | @entry = BioDSL::Seq.new(seq: 'atcgatcgatcgtacggttga', type: :dna) 37 | end 38 | 39 | test '#tranlate with bad type raises' do 40 | @entry.type = nil 41 | assert_raise(BioDSL::SeqError) { @entry.translate } 42 | end 43 | 44 | test '#tranlate with bad length raises' do 45 | @entry.seq = 'atcgatcgatcgtacggtga' 46 | assert_raise(BioDSL::SeqError) { @entry.translate } 47 | end 48 | 49 | test '#tranlate with bad translation table raises' do 50 | @entry.seq = 'atcgatcgatcgtacggttga' 51 | assert_raise(BioDSL::SeqError) { @entry.translate(0) } 52 | end 53 | 54 | test '#tranlate with bad start codon raises' do 55 | @entry.seq = 'ttagatcgatcgtacggttga' 56 | assert_raise(BioDSL::SeqError) { @entry.translate } 57 | end 58 | 59 | test '#tranlate with bad codon raises' do 60 | @entry.seq = 'atggatcgaxxxtcgtacggttga' 61 | assert_raise(BioDSL::SeqError) { @entry.translate } 62 | end 63 | 64 | test '#tranlate returns correctly' do 65 | entry = @entry.translate 66 | assert_equal('MDRSYG', entry.seq) 67 | assert_equal(:protein, entry.type) 68 | assert_equal('atcgatcgatcgtacggttga', @entry.seq) 69 | assert_equal(:dna, @entry.type) 70 | end 71 | 72 | test '#tranlate! returns correctly' do 73 | @entry.translate! 74 | assert_equal('MDRSYG', @entry.seq) 75 | assert_equal(:protein, @entry.type) 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /BioDSL.gemspec: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 3 | # # 4 | # This program is free software; you can redistribute it and/or # 5 | # modify it under the terms of the GNU General Public License # 6 | # as published by the Free Software Foundation; either version 2 # 7 | # of the License, or (at your option) any later version. # 8 | # # 9 | # This program is distributed in the hope that it will be useful, # 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 12 | # GNU General Public License for more details. # 13 | # # 14 | # You should have received a copy of the GNU General Public License # 15 | # along with this program; if not, write to the Free Software # 16 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 17 | # USA. # 18 | # # 19 | # http://www.gnu.org/copyleft/gpl.html # 20 | # # 21 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 22 | # # 23 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | 27 | $LOAD_PATH.push File.expand_path('../lib', __FILE__) 28 | 29 | require 'BioDSL/version' 30 | 31 | Gem::Specification.new do |s| 32 | s.name = 'BioDSL' 33 | s.version = BioDSL::VERSION 34 | s.platform = Gem::Platform::RUBY 35 | s.date = Time.now.strftime('%F') 36 | s.summary = 'BioDSL' 37 | s.description = 'BioDSL is a Bioinformatics Domain Specific Language.' 38 | s.authors = ['Martin A. Hansen'] 39 | s.email = 'mail@maasha.dk' 40 | s.rubyforge_project = 'BioDSL' 41 | s.homepage = 'http://www.github.com/maasha/BioDSL' 42 | s.license = 'GPL2' 43 | s.rubygems_version = '2.0.0' 44 | s.files = `git ls-files`.split("\n") 45 | s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") 46 | s.executables = `git ls-files -- bin/*`.split("\n"). 47 | map { |f| File.basename(f) } 48 | s.extra_rdoc_files = Dir['wiki/*.rdoc'] 49 | s.require_paths = ['lib'] 50 | 51 | s.add_dependency('haml', '>= 4.0.5') 52 | s.add_dependency('RubyInline', '>= 3.12.2') 53 | s.add_dependency('narray', '>= 0.6.0') 54 | s.add_dependency('mail', '>= 2.5.4') 55 | s.add_dependency('msgpack', '>= 0.5.8') 56 | s.add_dependency('gnuplotter', '>= 1.0.2') 57 | s.add_dependency('parallel', '>= 1.0.0') 58 | s.add_dependency('pqueue', '>= 2.0.2') 59 | s.add_dependency('terminal-table', '>= 1.4.5') 60 | s.add_dependency('tilt', '>= 2.0.1') 61 | s.add_development_dependency('bundler', '>= 1.7.4') 62 | s.add_development_dependency('simplecov', '>= 0.9.2') 63 | s.add_development_dependency('mocha', '>= 1.0.0') 64 | end 65 | -------------------------------------------------------------------------------- /test/BioDSL/test_mummer.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 6 | # # 7 | # This program is free software; you can redistribute it and/or # 8 | # modify it under the terms of the GNU General Public License # 9 | # as published by the Free Software Foundation; either version 2 # 10 | # of the License, or (at your option) any later version. # 11 | # # 12 | # This program is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU General Public License for more details. # 16 | # # 17 | # You should have received a copy of the GNU General Public License # 18 | # along with this program; if not, write to the Free Software # 19 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 20 | # USA. # 21 | # # 22 | # http://www.gnu.org/copyleft/gpl.html # 23 | # # 24 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 25 | # # 26 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 27 | # # 28 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 29 | 30 | require 'test/helper' 31 | 32 | # Test class for Mummer. 33 | class TestMummer < Test::Unit::TestCase 34 | def setup 35 | omit('mummer not found') unless BioDSL::Filesys.which('mummer') 36 | 37 | @entry1 = BioDSL::Seq.new(seq_name: 'test1', seq: 'ctagcttcaacctagctag') 38 | @entry2 = BioDSL::Seq.new(seq_name: 'test2', seq: 'ctagcttcaGacctagctag') 39 | end 40 | 41 | test 'Mummer.each_mem with bad :length_min fails' do 42 | assert_raise(BioDSL::MummerError) do 43 | BioDSL::Mummer.each_mem(@entry1, @entry2, length_min: 0) 44 | end 45 | 46 | assert_raise(BioDSL::MummerError) do 47 | BioDSL::Mummer.each_mem(@entry1, @entry2, length_min: 5.5) 48 | end 49 | end 50 | 51 | test 'Mummer.each_mem with bad :direction fails' do 52 | assert_raise(BioDSL::MummerError) do 53 | BioDSL::Mummer.each_mem(@entry1, @entry2, direction: 'up') 54 | end 55 | end 56 | 57 | test 'Mummer#each_mem returns OK' do 58 | mems = BioDSL::Mummer.each_mem(@entry1, @entry2, length_min: 9) 59 | expected = <<-END.gsub(/^\s+\|/, '') 60 | |[#, 67 | | #] 74 | END 75 | 76 | assert_equal(Enumerator, mems.class) 77 | assert_equal(expected.gsub("\n", '').gsub(' ', ' '), mems.to_a.to_s) 78 | end 79 | end 80 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_reverse_seq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for ReverseSeq. 34 | class TestReverseSeq < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | hash = { 40 | SEQ_NAME: 'test', 41 | SEQ: 'gatcgatcgt', 42 | SEQ_LEN: 10, 43 | SCORES: 'ABCDEFGHII' 44 | } 45 | 46 | @output.write hash 47 | @output.close 48 | 49 | @p = BioDSL::Pipeline.new 50 | end 51 | 52 | test 'BioDSL::Pipeline::ReverseSeq with invalid options raises' do 53 | assert_raise(BioDSL::OptionError) { @p.reverse_seq(foo: 'bar') } 54 | end 55 | 56 | test 'BioDSL::Pipeline::ReverseSeq returns correctly' do 57 | @p.reverse_seq.run(input: @input, output: @output2) 58 | 59 | expected = <<-EXP.gsub(/^\s+\|/, '') 60 | |{:SEQ_NAME=>"test", 61 | | :SEQ=>"tgctagctag", 62 | | :SEQ_LEN=>10, 63 | | :SCORES=>"IIHGFEDCBA"} 64 | EXP 65 | 66 | assert_equal(expected.delete("\n"), collect_result.delete("\n")) 67 | end 68 | 69 | test 'BioDSL::Pipeline::ReverseSeq status returns correctly' do 70 | @p.reverse_seq.run(input: @input, output: @output2) 71 | 72 | assert_equal(1, @p.status.first[:records_in]) 73 | assert_equal(1, @p.status.first[:records_out]) 74 | assert_equal(1, @p.status.first[:sequences_in]) 75 | assert_equal(1, @p.status.first[:sequences_out]) 76 | assert_equal(10, @p.status.first[:residues_in]) 77 | assert_equal(10, @p.status.first[:residues_out]) 78 | end 79 | end 80 | -------------------------------------------------------------------------------- /test/BioDSL/seq/test_digest.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Digest. 34 | class TestDigest < Test::Unit::TestCase 35 | def setup 36 | @entry = BioDSL::Seq.new(seq: 'cgatcgatcGGATCCgagagggtgtgtagtgGAATTCcgctgc') 37 | end 38 | 39 | test '#each_digest with bad residue in pattern raises' do 40 | assert_raise(BioDSL::DigestError) { @entry.each_digest('X', 0).to_a } 41 | end 42 | 43 | test '#each_digest returns correctly' do 44 | digests = @entry.each_digest('GGATCC', 1).to_a 45 | assert_equal(2, digests.size) 46 | assert_equal('[0-9]', digests.first.seq_name) 47 | assert_equal('cgatcgatcG', digests.first.seq) 48 | assert_equal('[10-42]', digests.last.seq_name) 49 | assert_equal('GATCCgagagggtgtgtagtgGAATTCcgctgc', digests.last.seq) 50 | end 51 | 52 | test '#each_digest with negavive offset returns correctly' do 53 | digests = @entry.each_digest('CGATCG', -1).to_a 54 | assert_equal(1, digests.size) 55 | assert_equal('[0-42]', digests.first.seq_name) 56 | assert_equal(@entry.seq, digests.first.seq) 57 | end 58 | 59 | test '#each_digest with offset out of bounds returns correctly' do 60 | digests = @entry.each_digest('AATTCcgctgc', 15).to_a 61 | assert_equal(1, digests.size) 62 | assert_equal('[0-42]', digests.first.seq_name) 63 | assert_equal(@entry.seq, digests.first.seq) 64 | end 65 | 66 | test '#each_digest in block context returns correctly' do 67 | @entry.each_digest('GGATCC', 1) do |digest| 68 | assert_equal('[0-9]', digest.seq_name) 69 | assert_equal('cgatcgatcG', digest.seq) 70 | break 71 | end 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | require 'English' 3 | require 'rake/testtask' 4 | require 'pp' 5 | 6 | Bundler::GemHelper.install_tasks 7 | 8 | task default: 'test' 9 | 10 | Rake::TestTask.new do |t| 11 | t.description = 'Run test suite' 12 | t.test_files = Dir['test/**/*'].select { |f| f.match(/\.rb$/) } 13 | t.warning = true 14 | end 15 | 16 | desc 'Run test suite with simplecov' 17 | task :simplecov do 18 | ENV['SIMPLECOV'] = 'true' 19 | Rake::Task['test'].invoke 20 | end 21 | 22 | desc 'Add or update yardoc' 23 | task :doc do 24 | run_docgen 25 | end 26 | 27 | task build: :boilerplate 28 | 29 | desc 'Add or update license boilerplate in source files' 30 | task :boilerplate do 31 | run_boilerplate 32 | end 33 | 34 | def run_docgen 35 | $stderr.puts 'Building docs' 36 | `yardoc lib/` 37 | $stderr.puts 'Docs done' 38 | end 39 | 40 | def run_boilerplate 41 | boilerplate = <>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 43 | # # 44 | # Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen (mail@maasha.dk). # 45 | # # 46 | # This program is free software; you can redistribute it and/or # 47 | # modify it under the terms of the GNU General Public License # 48 | # as published by the Free Software Foundation; either version 2 # 49 | # of the License, or (at your option) any later version. # 50 | # # 51 | # This program is distributed in the hope that it will be useful, # 52 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 53 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 54 | # GNU General Public License for more details. # 55 | # # 56 | # You should have received a copy of the GNU General Public License # 57 | # along with this program; if not, write to the Free Software # 58 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 59 | # USA. # 60 | # # 61 | # http://www.gnu.org/copyleft/gpl.html # 62 | # # 63 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 64 | # # 65 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 66 | # # 67 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 68 | END 69 | 70 | files = Rake::FileList.new('bin/**/*', 'lib/**/*.rb', 'test/**/*.rb') 71 | 72 | files.each do |file| 73 | body = '' 74 | 75 | File.open(file) do |ios| 76 | body = ios.read 77 | end 78 | 79 | if body.match(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/) && 80 | Regexp.last_match[1].to_i != Time.now.year 81 | STDERR.puts "Updating boilerplate: #{file}" 82 | 83 | body.sub!(/Copyright \(C\) 2007-(\d{4}) Martin Asser Hansen/, 84 | "Copyright (C) 2007-#{Time.now.year} Martin Asser Hansen") 85 | 86 | File.open(file, 'w') do |ios| 87 | ios.puts body 88 | end 89 | end 90 | 91 | next if body.match('Copyright') 92 | STDERR.puts "Warning: missing boilerplate in #{file}" 93 | STDERR.puts body.split($RS).first(10).join($RS) 94 | exit 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_random.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Random. 34 | class TestRandom < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | [{TEST: 1}, 40 | {TEST: 2}, 41 | {TEST: 3}, 42 | {TEST: 4}, 43 | {TEST: 5}, 44 | {TEST: 6}].each do |record| 45 | @output.write record 46 | end 47 | 48 | @output.close 49 | 50 | @p = BioDSL::Pipeline.new 51 | end 52 | 53 | test 'BioDSL::Pipeline#random with disallowed option raises' do 54 | assert_raise(BioDSL::OptionError) { @p.random(foo: 'bar') } 55 | end 56 | 57 | test 'BioDSL::Pipeline#random with allowed options don\'t raise' do 58 | assert_nothing_raised { @p.random(number: 2) } 59 | end 60 | 61 | test 'BioDSL::Pipeline#random returns correctly' do 62 | @p.random(number: 3).run(input: @input, output: @output2) 63 | size = 0 64 | @input2.map { size += 1 } 65 | 66 | assert_equal(3, size) 67 | end 68 | 69 | test 'BioDSL::Pipeline#random status returns correctly' do 70 | @p.random(number: 3).run(input: @input, output: @output2) 71 | 72 | assert_equal(6, @p.status.first[:records_in]) 73 | assert_equal(3, @p.status.first[:records_out]) 74 | end 75 | 76 | test 'BioDSL::Pipeline#random with pairs: true returns correctly' do 77 | @p.random(number: 4, pairs: true).run(input: @input, output: @output2) 78 | 79 | size = 0 80 | 81 | @input2.each_slice(2) do |record1, record2| 82 | assert_equal(record1[:TEST].to_i, record2[:TEST].to_i - 1) 83 | size += 2 84 | end 85 | 86 | assert_equal(4, size) 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_complement_seq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for ComplementSeq. 34 | class TestComplementSeq < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @p = BioDSL::Pipeline.new 40 | end 41 | 42 | test 'BioDSL::Pipeline::ComplementSeq with invalid options raises' do 43 | assert_raise(BioDSL::OptionError) { @p.complement_seq(foo: 'bar') } 44 | end 45 | 46 | test 'BioDSL::Pipeline::ComplementSeq of DNA returns correctly' do 47 | @output.write(SEQ: 'gatcGATCGT') 48 | @output.close 49 | @p.complement_seq.run(input: @input, output: @output2) 50 | 51 | expected = '{:SEQ=>"ctagCTAGCA", :SEQ_LEN=>10}' 52 | 53 | assert_equal(expected, collect_result.chomp) 54 | end 55 | 56 | test 'BioDSL::Pipeline::ComplementSeq of RNA returns correctly' do 57 | @output.write(SEQ: 'gaucGAUCGU') 58 | @output.close 59 | @p.complement_seq.run(input: @input, output: @output2) 60 | 61 | expected = '{:SEQ=>"cuagCUAGCA", :SEQ_LEN=>10}' 62 | 63 | assert_equal(expected, collect_result.chomp) 64 | end 65 | 66 | test 'BioDSL::Pipeline::ComplementSeq status returns correctly' do 67 | @output.write(SEQ: 'gaucGAUCGU') 68 | @output.close 69 | @p.complement_seq.run(input: @input, output: @output2) 70 | 71 | assert_equal(1, @p.status.first[:records_in]) 72 | assert_equal(1, @p.status.first[:records_out]) 73 | assert_equal(1, @p.status.first[:sequences_in]) 74 | assert_equal(1, @p.status.first[:sequences_out]) 75 | assert_equal(10, @p.status.first[:residues_in]) 76 | assert_equal(10, @p.status.first[:residues_out]) 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /lib/BioDSL/seq/ambiguity.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Namespace for Ambiguity. 31 | module Ambiguity 32 | # Add C functions to Inline::C object. 33 | # 34 | # @param inline_builder [Inline::C] Inline C object. 35 | def add_ambiguity_macro(inline_builder) 36 | # Macro for matching nucleotides including ambiguity codes. 37 | inline_builder.prefix %( 38 | #define MATCH(A,B) ((bitmap[(int) A] & bitmap[(int) B]) != 0) 39 | ) 40 | 41 | # Bitmap for matching nucleotides including ambiguity codes. 42 | # For each value bits are set from the left: bit pos 1 for A, 43 | # bit pos 2 for T, bit pos 3 for C, and bit pos 4 for G. 44 | inline_builder.prefix %( 45 | char bitmap[256] = { 46 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50 | 0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0, 51 | 0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0, 52 | 0, 1,14, 4,11, 0, 0, 8, 7, 0, 0,10, 0, 5,15, 0, 53 | 0, 0, 9,12, 2, 2,13, 3, 0, 6, 0, 0, 0, 0, 0, 0, 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 62 | }; 63 | ) 64 | end 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_merge_values.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for MergeValues. 34 | class TestMergeValues < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @output.write(ID: 'FOO', COUNT: 10, SEQ: 'gataag') 40 | @output.write(ID: 'FOO', SEQ: 'gataag') 41 | @output.close 42 | 43 | @p = BioDSL::Pipeline.new 44 | end 45 | 46 | test 'BioDSL::Pipeline::MergeValues with invalid options raises' do 47 | assert_raise(BioDSL::OptionError) { @p.merge_values(foo: 'bar') } 48 | end 49 | 50 | test 'BioDSL::Pipeline::MergeValues with valid options don\'t raise' do 51 | assert_nothing_raised { @p.merge_values(keys: [:ID]) } 52 | end 53 | 54 | test 'BioDSL::Pipeline::MergeValues returns correctly' do 55 | @p.merge_values(keys: [:COUNT, :ID]).run(input: @input, output: @output2) 56 | 57 | expected = <<-EXP.gsub(/^\s+\|/, '') 58 | |{:ID=>"FOO", :COUNT=>"10_FOO", :SEQ=>"gataag"} 59 | |{:ID=>"FOO", :SEQ=>"gataag"} 60 | EXP 61 | 62 | assert_equal(expected, collect_result) 63 | end 64 | 65 | test 'BioDSL::Pipeline::MergeValues status returns correctly' do 66 | @p.merge_values(keys: [:COUNT, :ID]).run(input: @input, output: @output2) 67 | 68 | assert_equal(2, @p.status.first[:records_in]) 69 | assert_equal(2, @p.status.first[:records_out]) 70 | end 71 | 72 | test 'BioDSL::Pipeline::MergeValues with :delimiter returns correctly' do 73 | @p.merge_values(keys: [:ID, :COUNT], delimiter: ':count='). 74 | run(input: @input, output: @output2) 75 | 76 | expected = <<-EXP.gsub(/^\s+\|/, '') 77 | |{:ID=>"FOO:count=10", :COUNT=>10, :SEQ=>"gataag"} 78 | |{:ID=>"FOO", :SEQ=>"gataag"} 79 | EXP 80 | 81 | assert_equal(expected, collect_result) 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /lib/BioDSL/stream.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Class for Inter Process Communication between forked processes using msgpack 31 | # to serialize and deserialize objects. 32 | class Stream 33 | require 'msgpack' 34 | 35 | include Enumerable 36 | 37 | # Create a pair of connected pipe endpoints. The connection uses msgpack 38 | # allowing objects to be written and read. 39 | # 40 | # Stream.pipe -> [read_io, write_io] 41 | def self.pipe 42 | read, write = IO.pipe(Encoding::BINARY) 43 | 44 | [new(read), new(write)] 45 | end 46 | 47 | def initialize(io) 48 | @io = io 49 | end 50 | 51 | def close 52 | @io.close 53 | end 54 | 55 | def closed? 56 | @io.closed? 57 | end 58 | 59 | def each 60 | yield read until @io.eof? 61 | end 62 | 63 | def read 64 | size = @io.read(4) 65 | fail EOFError unless size 66 | size = size.unpack('I').first 67 | msg = @io.read(size) 68 | MessagePack.unpack(msg, symbolize_keys: true) 69 | end 70 | 71 | def write(obj) 72 | msg = MessagePack.pack(obj) 73 | @io.write([msg.size].pack('I')) 74 | @io.write(msg) 75 | end 76 | 77 | alias_method :<<, :write 78 | end 79 | 80 | class Channel 81 | include Enumerable 82 | 83 | def self.pair 84 | queue = Queue.new 85 | 86 | [new(queue), new(queue)] 87 | end 88 | 89 | def initialize(queue) 90 | @queue = queue 91 | end 92 | 93 | def each 94 | while (obj = read) 95 | yield obj 96 | end 97 | end 98 | 99 | def read 100 | @queue.pop 101 | end 102 | 103 | def write(obj) 104 | @queue << obj 105 | end 106 | 107 | def terminate 108 | @queue << nil 109 | end 110 | 111 | alias_method :<<, :write 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /lib/BioDSL/serializer.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Error class for Serializer. 31 | SerializerError = Class.new(StandardError) 32 | 33 | # Class for serializing and de-serializing data using Marshal. 34 | class Serializer 35 | include Enumerable 36 | 37 | # Constructor for serializer. 38 | # 39 | # @param io [IO] IO object. 40 | # @param block [Proc] Block context. 41 | # 42 | # @raise [SerializerError] if no block given. 43 | # 44 | # @return [Serializer] class instance. 45 | def initialize(io, &block) 46 | @io = io 47 | 48 | fail SerializerError, 'No block given' unless block 49 | 50 | block.call(self) 51 | end 52 | 53 | # Method to write serialized data using Marshal to a given IO. 54 | # 55 | # @param obj [Object] Object to serialize. 56 | # 57 | # @example 58 | # File.open("foo.dat", 'wb') do |io| 59 | # BioDSL::Serializer.new(io) do |s| 60 | # s << {"foo": 0} 61 | # s << {"bar": 1} 62 | # end 63 | # end 64 | def <<(obj) 65 | data = Marshal.dump(obj) 66 | @io.write([data.size].pack('N')) 67 | @io.write(data) 68 | end 69 | 70 | alias_method :writei, :<< 71 | 72 | # Iterator for reading and de-serialized data from a given IO. 73 | # 74 | # @example 75 | # File.open("foo.dat", 'rb') do |io| 76 | # BioDSL::Serializer.new(io) do |s| 77 | # s.each do |record| 78 | # puts record 79 | # end 80 | # end 81 | # end 82 | # 83 | # @yield [Object] 84 | def each 85 | yield next_entry until @io.eof? 86 | end 87 | 88 | # Read next entry from serialized stream. 89 | # 90 | # @return [Object] Deserialized Object. 91 | def next_entry 92 | size = @io.read(4) 93 | fail EOFError unless size 94 | data = @io.read(size.unpack('N').first) 95 | Marshal.load(data) 96 | end 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_count_values.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for CountValues. 34 | class TestCountValues < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | [{V0: 'HUMAN', V1: 'H1'}, 40 | {V0: 'HUMAN', V1: 'H2'}, 41 | {V0: 'HUMAN', V1: 'H3'}, 42 | {V0: 'DOG', V1: 'D1'}, 43 | {V0: 'DOG', V1: 'D2'}, 44 | {V0: 'MOUSE', V1: 'M1'} 45 | ].each do |record| 46 | @output.write record 47 | end 48 | 49 | @output.close 50 | 51 | @p = BioDSL::Pipeline.new 52 | end 53 | 54 | test 'BioDSL::Pipeline#count_values with disallowed option raises' do 55 | assert_raise(BioDSL::OptionError) { @p.count_values(foo: 'bar') } 56 | end 57 | 58 | test 'BioDSL::Pipeline#count_values with allowed options don\'t raise' do 59 | assert_nothing_raised { @p.count_values(keys: [:V0]) } 60 | end 61 | 62 | test 'BioDSL::Pipeline#count_values returns correctly' do 63 | @p.count_values(keys: ['V0', :V1, :FOO]). 64 | run(input: @input, output: @output2) 65 | 66 | expected = <<-EXP.gsub(/^\s+\|/, '') 67 | |{:V0=>"HUMAN", :V1=>"H1", :V0_COUNT=>3, :V1_COUNT=>1} 68 | |{:V0=>"HUMAN", :V1=>"H2", :V0_COUNT=>3, :V1_COUNT=>1} 69 | |{:V0=>"HUMAN", :V1=>"H3", :V0_COUNT=>3, :V1_COUNT=>1} 70 | |{:V0=>"DOG", :V1=>"D1", :V0_COUNT=>2, :V1_COUNT=>1} 71 | |{:V0=>"DOG", :V1=>"D2", :V0_COUNT=>2, :V1_COUNT=>1} 72 | |{:V0=>"MOUSE", :V1=>"M1", :V0_COUNT=>1, :V1_COUNT=>1} 73 | EXP 74 | 75 | assert_equal(expected, collect_result) 76 | end 77 | 78 | test 'BioDSL::Pipeline#count_values status returns correctly' do 79 | @p.count_values(keys: ['V0', :V1, :FOO]). 80 | run(input: @input, output: @output2) 81 | 82 | assert_equal(6, @p.status.first[:records_in]) 83 | assert_equal(6, @p.status.first[:records_out]) 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_collapse_otus.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for CollapseOtus. 34 | class TestCollapseOtus < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @output.write(OTU: 'OTU_0', SAMPLE1_COUNT: 3352, 40 | TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)') 41 | @output.write(OTU: 'OTU_1', SAMPLE1_COUNT: 881, 42 | TAXONOMY: 'Leuconostocaceae(100);Leuconostoc(100)') 43 | @output.write(OTU: 'OTU_2', SAMPLE1_COUNT: 228, 44 | TAXONOMY: 'Streptococcaceae(100);Lactococcus(100)') 45 | @output.write(OTU: 'OTU_3', SAMPLE1_COUNT: 5, 46 | TAXONOMY: 'Pseudomonadaceae(100);Pseudomonas(100)') 47 | 48 | @output.close 49 | 50 | @p = BD.new 51 | end 52 | 53 | test 'BioDSL::Pipeline::Count with invalid options raises' do 54 | assert_raise(BioDSL::OptionError) { @p.collapse_otus(foo: 'bar') } 55 | end 56 | 57 | test 'BioDSL::Pipeline::Count to file outputs correctly' do 58 | @p.collapse_otus.run(input: @input, output: @output2) 59 | expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n") 60 | |{:OTU=>"OTU_0", 61 | | :SAMPLE1_COUNT=>3580, 62 | | :TAXONOMY=>"Streptococcaceae(100);Lactococcus(100)"} 63 | |{:OTU=>"OTU_1", 64 | | :SAMPLE1_COUNT=>881, 65 | | :TAXONOMY=>"Leuconostocaceae(100);Leuconostoc(100)"} 66 | |{:OTU=>"OTU_3", 67 | | :SAMPLE1_COUNT=>5, 68 | | :TAXONOMY=>"Pseudomonadaceae(100);Pseudomonas(100)"} 69 | EXP 70 | assert_equal(expected, collect_result.delete("\n")) 71 | end 72 | 73 | test 'BioDSL::Pipeline::Count status outputs correctly' do 74 | @p.collapse_otus.run(input: @input, output: @output2) 75 | 76 | assert_equal(4, @p.status.first[:records_in]) 77 | assert_equal(3, @p.status.first[:records_out]) 78 | assert_equal(4, @p.status.first[:otus_in]) 79 | assert_equal(3, @p.status.first[:otus_out]) 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /lib/BioDSL/seq/digest.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | # Namespace for BioDSL. 29 | module BioDSL 30 | # Error class for all exceptions to do with Digest. 31 | DigestError = Class.new(StandardError) 32 | 33 | # Namespace for Digest. 34 | module Digest 35 | # Method to get the next digestion product from a sequence. 36 | def each_digest(pattern, cut_pos) 37 | return to_enum(:each_digest, pattern, cut_pos) unless block_given? 38 | pattern = disambiguate(pattern) 39 | offset = 0 40 | 41 | seq.upcase.scan pattern do 42 | pos = $`.length + cut_pos 43 | 44 | if pos >= 0 && pos < length - 2 45 | subseq = self[offset...pos] 46 | subseq.seq_name = "#{seq_name}[#{offset}-#{pos - offset - 1}]" 47 | 48 | yield subseq 49 | end 50 | 51 | offset = pos 52 | end 53 | 54 | offset = 0 if offset < 0 || offset > length 55 | subseq = self[offset..-1] 56 | subseq.seq_name = "#{seq_name}[#{offset}-#{length - 1}]" 57 | 58 | yield subseq 59 | end 60 | 61 | private 62 | 63 | # Method that returns a regexp object with a restriction 64 | # enzyme pattern with ambiguity codes substituted to the 65 | # appropriate regexp. 66 | def disambiguate(pattern) 67 | ambiguity = { 68 | 'A' => 'A', 69 | 'T' => 'T', 70 | 'U' => 'T', 71 | 'C' => 'C', 72 | 'G' => 'G', 73 | 'M' => '[AC]', 74 | 'R' => '[AG]', 75 | 'W' => '[AT]', 76 | 'S' => '[CG]', 77 | 'Y' => '[CT]', 78 | 'K' => '[GT]', 79 | 'V' => '[ACG]', 80 | 'H' => '[ACT]', 81 | 'D' => '[AGT]', 82 | 'B' => '[CGT]', 83 | 'N' => '[GATC]' 84 | } 85 | 86 | new_pattern = '' 87 | 88 | pattern.upcase.each_char do |char| 89 | if ambiguity[char] 90 | new_pattern << ambiguity[char] 91 | else 92 | fail DigestError, "Could not disambiguate residue: #{char}" 93 | end 94 | end 95 | 96 | Regexp.new(new_pattern) 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_unique_values.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for UniqueValues. 34 | class TestUniqueValues < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | [{V0: 'HUMAN', V1: 'H1'}, 40 | {V0: 'HUMAN', V1: 'H2'}, 41 | {V0: 'HUMAN', V1: 'H3'}, 42 | {V0: 'DOG', V1: 'D1'}, 43 | {V0: 'DOG', V1: 'D2'}, 44 | {V0: 'MOUSE', V1: 'M1'}, 45 | {FOO: 'BAR'} 46 | ].each do |record| 47 | @output.write record 48 | end 49 | 50 | @output.close 51 | 52 | @p = BioDSL::Pipeline.new 53 | end 54 | 55 | test 'BioDSL::Pipeline#unique_values with disallowed option raises' do 56 | assert_raise(BioDSL::OptionError) do 57 | @p.unique_values(key: :V0, foo: 'bar') 58 | end 59 | end 60 | 61 | test 'BioDSL::Pipeline#unique_values with allowed options dont raise' do 62 | assert_nothing_raised { @p.unique_values(key: :V0) } 63 | end 64 | 65 | test 'BioDSL::Pipeline#unique_values returns correctly' do 66 | @p.unique_values(key: 'V0').run(input: @input, output: @output2) 67 | 68 | expected = <<-EXP.gsub(/^\s+\|/, '') 69 | |{:V0=>"HUMAN", :V1=>"H1"} 70 | |{:V0=>"DOG", :V1=>"D1"} 71 | |{:V0=>"MOUSE", :V1=>"M1"} 72 | |{:FOO=>"BAR"} 73 | EXP 74 | 75 | assert_equal(expected, collect_result) 76 | end 77 | 78 | test 'BioDSL::Pipeline#unique_values status returns correctly' do 79 | @p.unique_values(key: 'V0').run(input: @input, output: @output2) 80 | 81 | assert_equal(7, @p.status.first[:records_in]) 82 | assert_equal(4, @p.status.first[:records_out]) 83 | end 84 | 85 | test 'BioDSL::Pipeline#unique_values with :invert returns correctly' do 86 | @p.unique_values(key: 'V0', invert: true). 87 | run(input: @input, output: @output2) 88 | 89 | expected = <<-EXP.gsub(/^\s+\|/, '') 90 | |{:V0=>"HUMAN", :V1=>"H2"} 91 | |{:V0=>"HUMAN", :V1=>"H3"} 92 | |{:V0=>"DOG", :V1=>"D2"} 93 | |{:FOO=>"BAR"} 94 | EXP 95 | 96 | assert_equal(expected, collect_result) 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_collect_otus.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for CollectOtus. 34 | class TestCollectOtus < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @output.write(one: 1, two: 2, three: 3) 40 | @output.write(TYPE: 'H', S_ID: 'OTU_0', SAMPLE: 'Sample0') 41 | @output.write(TYPE: 'H', S_ID: 'OTU_0', SAMPLE: 'Sample0') 42 | @output.write(TYPE: 'H', S_ID: 'OTU_0', SAMPLE: 'Sample1') 43 | @output.write(TYPE: 'H', S_ID: 'OTU_1', SAMPLE: 'Sample0') 44 | @output.write(TYPE: 'H', S_ID: 'OTU_1', SAMPLE: 'Sample1') 45 | @output.write(TYPE: 'H', S_ID: 'OTU_1', SAMPLE: 'Sample1') 46 | @output.close 47 | 48 | @p = BioDSL::Pipeline.new 49 | end 50 | 51 | test 'BioDSL::Pipeline#collect_otus with disallowed option raises' do 52 | assert_raise(BioDSL::OptionError) { @p.collect_otus(foo: 'bar') } 53 | end 54 | 55 | test 'BioDSL::Pipeline#collect_otus outputs correctly' do 56 | @p.collect_otus.run(input: @input, output: @output2) 57 | expected = <<-EXP.gsub(/^\s+\|/, '').delete("\n") 58 | |{:one=>1, :two=>2, :three=>3} 59 | |{:TYPE=>"H", :S_ID=>"OTU_0", :SAMPLE=>"Sample0"} 60 | |{:TYPE=>"H", :S_ID=>"OTU_0", :SAMPLE=>"Sample0"} 61 | |{:TYPE=>"H", :S_ID=>"OTU_0", :SAMPLE=>"Sample1"} 62 | |{:TYPE=>"H", :S_ID=>"OTU_1", :SAMPLE=>"Sample0"} 63 | |{:TYPE=>"H", :S_ID=>"OTU_1", :SAMPLE=>"Sample1"} 64 | |{:TYPE=>"H", :S_ID=>"OTU_1", :SAMPLE=>"Sample1"} 65 | |{:RECORD_TYPE=>"OTU", :OTU=>"OTU_0", :SAMPLE0_COUNT=>2, 66 | | :SAMPLE1_COUNT=>1} 67 | |{:RECORD_TYPE=>"OTU", :OTU=>"OTU_1", :SAMPLE0_COUNT=>1, 68 | | :SAMPLE1_COUNT=>2} 69 | EXP 70 | 71 | assert_equal(expected, collect_result.delete("\n")) 72 | end 73 | 74 | test 'BioDSL::Pipeline#collect_otus status outputs correctly' do 75 | @p.collect_otus.run(input: @input, output: @output2) 76 | 77 | assert_equal(7, @p.status.first[:records_in]) 78 | assert_equal(9, @p.status.first[:records_out]) 79 | assert_equal(6, @p.status.first[:hits_in]) 80 | assert_equal(2, @p.status.first[:hits_out]) 81 | end 82 | end 83 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_degap_seq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for DegapSeq. 34 | class TestDegapSeq < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @p = BioDSL::Pipeline.new 40 | end 41 | 42 | test 'BioDSL::Pipeline::DegapSeq with invalid options raises' do 43 | assert_raise(BioDSL::OptionError) { @p.degap_seq(foo: 'bar') } 44 | end 45 | 46 | test 'BioDSL::Pipeline::DegapSeq with valid options don\'t raise' do 47 | assert_nothing_raised { @p.degap_seq(columns_only: true) } 48 | end 49 | 50 | test 'BioDSL::Pipeline::DegapSeq returns correctly' do 51 | @output.write(SEQ: 'AT--C.G~') 52 | @output.close 53 | @p.degap_seq.run(input: @input, output: @output2) 54 | 55 | expected = '{:SEQ=>"ATCG", :SEQ_LEN=>4}' 56 | 57 | assert_equal(expected, collect_result.chomp) 58 | end 59 | 60 | test 'BioDSL::Pipeline::DegapSeq status returns correctly' do 61 | @output.write(SEQ: 'AT--C.G~') 62 | @output.close 63 | @p.degap_seq.run(input: @input, output: @output2) 64 | 65 | assert_equal(1, @p.status.first[:records_in]) 66 | assert_equal(1, @p.status.first[:records_out]) 67 | assert_equal(1, @p.status.first[:sequences_in]) 68 | assert_equal(1, @p.status.first[:sequences_out]) 69 | assert_equal(8, @p.status.first[:residues_in]) 70 | assert_equal(4, @p.status.first[:residues_out]) 71 | end 72 | 73 | test 'BioDSL::Pipeline::DegapSeq with :columns_only and uneven seq ' \ 74 | 'lengths raises' do 75 | @output.write(SEQ: 'AT--C.G~') 76 | @output.write(SEQ: 'AT--C.G') 77 | @output.close 78 | assert_raise(BioDSL::SeqError) do 79 | @p.degap_seq(columns_only: true).run(input: @input, output: @output2) 80 | end 81 | end 82 | 83 | test 'BioDSL::Pipeline::DegapSeq with :columns_only returns correctly' do 84 | @output.write(SEQ: 'ATA-C.G~') 85 | @output.write(SEQ: 'AT--C.G.') 86 | @output.close 87 | @p.degap_seq(columns_only: true).run(input: @input, output: @output2) 88 | 89 | expected = <<-EXP.gsub(/^\s+\|/, '') 90 | |{:SEQ=>"ATACG", :SEQ_LEN=>5} 91 | |{:SEQ=>"AT-CG", :SEQ_LEN=>5} 92 | EXP 93 | 94 | assert_equal(expected, collect_result) 95 | end 96 | end 97 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_split_values.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for SplitValues. 34 | class TestSplitValues < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @output.write(ID: 'FOO:count=10', SEQ: 'gataag') 40 | @output.write(ID: 'FOO_10_20', SEQ: 'gataag') 41 | @output.close 42 | 43 | @p = BioDSL::Pipeline.new 44 | end 45 | 46 | test 'BioDSL::Pipeline::SplitValues with invalid options raises' do 47 | assert_raise(BioDSL::OptionError) { @p.split_values(foo: 'bar') } 48 | end 49 | 50 | test 'BioDSL::Pipeline::SplitValues with valid options don\'t raise' do 51 | assert_nothing_raised { @p.split_values(key: :ID) } 52 | end 53 | 54 | test 'BioDSL::Pipeline::SplitValues returns correctly' do 55 | @p.split_values(key: :ID).run(input: @input, output: @output2) 56 | 57 | expected = <<-EXP.gsub(/^\s+\|/, '') 58 | |{:ID=>"FOO:count=10", :SEQ=>"gataag"} 59 | |{:ID=>"FOO_10_20", :SEQ=>"gataag", :ID_0=>"FOO", :ID_1=>10, :ID_2=>20} 60 | EXP 61 | 62 | assert_equal(expected, collect_result) 63 | end 64 | 65 | test 'BioDSL::Pipeline::SplitValues status returns correctly' do 66 | @p.split_values(key: :ID).run(input: @input, output: @output2) 67 | 68 | assert_equal(2, @p.status.first[:records_in]) 69 | assert_equal(2, @p.status.first[:records_out]) 70 | end 71 | 72 | test 'BioDSL::Pipeline::SplitValues with :delimiter returns correctly' do 73 | @p.split_values(key: 'ID', delimiter: ':count='). 74 | run(input: @input, output: @output2) 75 | 76 | expected = <<-EXP.gsub(/^\s+\|/, '') 77 | |{:ID=>"FOO:count=10", :SEQ=>"gataag", :ID_0=>"FOO", :ID_1=>10} 78 | |{:ID=>"FOO_10_20", :SEQ=>"gataag"} 79 | EXP 80 | 81 | assert_equal(expected, collect_result) 82 | end 83 | 84 | test 'BioDSL::Pipeline::SplitValues w. :delimiter and :keys returns OK' do 85 | @p.split_values(key: 'ID', keys: ['ID', :COUNT], delimiter: ':count='). 86 | run(input: @input, output: @output2) 87 | 88 | expected = <<-EXP.gsub(/^\s+\|/, '') 89 | |{:ID=>"FOO", :SEQ=>"gataag", :COUNT=>10} 90 | |{:ID=>"FOO_10_20", :SEQ=>"gataag"} 91 | EXP 92 | 93 | assert_equal(expected, collect_result) 94 | end 95 | end 96 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_dereplicate_seq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for DereplicateSeq. 34 | class TestDereplicateSeq < Test::Unit::TestCase 35 | def setup 36 | @input, @output = BioDSL::Stream.pipe 37 | @input2, @output2 = BioDSL::Stream.pipe 38 | 39 | @output.write(SEQ_NAME: 'test1', SEQ: 'ATCG') 40 | @output.write(SEQ_NAME: 'test2', SEQ: 'ATCG') 41 | @output.write(SEQ_NAME: 'test3', SEQ: 'atcg') 42 | @output.write(SEQ_NAME: 'test4', SEQ: 'GCTA') 43 | @output.write(FISH: 'eel') 44 | @output.close 45 | 46 | @p = BioDSL::Pipeline.new 47 | end 48 | 49 | test 'BioDSL::Pipeline::DereplicateSeq with invalid options raises' do 50 | assert_raise(BioDSL::OptionError) { @p.dereplicate_seq(foo: 'bar') } 51 | end 52 | 53 | test 'BioDSL::Pipeline::DereplicateSeq with valid options don\'t raise' do 54 | assert_nothing_raised { @p.dereplicate_seq(ignore_case: true) } 55 | end 56 | 57 | test 'BioDSL::Pipeline::DereplicateSeq returns correctly' do 58 | @p.dereplicate_seq.run(input: @input, output: @output2) 59 | 60 | expected = <<-EXP.gsub(/^\s+\|/, '') 61 | |{:FISH=>"eel"} 62 | |{:SEQ_NAME=>"test1", :SEQ=>"ATCG", :SEQ_COUNT=>2} 63 | |{:SEQ_NAME=>"test3", :SEQ=>"atcg", :SEQ_COUNT=>1} 64 | |{:SEQ_NAME=>"test4", :SEQ=>"GCTA", :SEQ_COUNT=>1} 65 | EXP 66 | 67 | assert_equal(expected, collect_result) 68 | end 69 | 70 | test 'BioDSL::Pipeline::DereplicateSeq status returns correctly' do 71 | @p.dereplicate_seq.run(input: @input, output: @output2) 72 | 73 | assert_equal(5, @p.status.first[:records_in]) 74 | assert_equal(4, @p.status.first[:records_out]) 75 | assert_equal(4, @p.status.first[:sequences_in]) 76 | assert_equal(3, @p.status.first[:sequences_out]) 77 | assert_equal(16, @p.status.first[:residues_in]) 78 | assert_equal(12, @p.status.first[:residues_out]) 79 | end 80 | 81 | test 'BioDSL::Pipeline::DereplicateSeq with ignore_case returns OK' do 82 | @p.dereplicate_seq(ignore_case: true).run(input: @input, output: @output2) 83 | 84 | expected = <<-EXP.gsub(/^\s+\|/, '') 85 | |{:FISH=>"eel"} 86 | |{:SEQ_NAME=>"test1", :SEQ=>"ATCG", :SEQ_COUNT=>3} 87 | |{:SEQ_NAME=>"test4", :SEQ=>"GCTA", :SEQ_COUNT=>1} 88 | EXP 89 | 90 | assert_equal(expected, collect_result) 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_align_seq_mothur.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for AlignSeqMothur. 34 | class TestAlignSeqMothur < Test::Unit::TestCase 35 | def setup 36 | require 'tempfile' 37 | 38 | omit('mothur not found') unless BioDSL::Filesys.which('mothur') 39 | 40 | @template = Tempfile.new('template') 41 | 42 | write_template 43 | 44 | @input, @output = BioDSL::Stream.pipe 45 | @input2, @output2 = BioDSL::Stream.pipe 46 | 47 | @output.write(SEQ_NAME: 'test', SEQ: 'gattccgatcgatcgatcga') 48 | @output.close 49 | 50 | @p = BD.new 51 | end 52 | 53 | def write_template 54 | seq_name = 'ref' 55 | seq = '--a-ttc--c-a-tcga----Ttcg-at---cCa---' 56 | BioDSL::Fasta.open(@template, 'w') do |ios| 57 | ios.puts BioDSL::Seq.new(seq_name: seq_name, seq: seq).to_fasta 58 | end 59 | end 60 | 61 | def teardown 62 | @template.close 63 | @template.unlink 64 | end 65 | 66 | test 'BioDSL::Pipeline#align_seq_mothur with disallowed option raises' do 67 | assert_raise(BioDSL::OptionError) do 68 | @p.align_seq_mothur(template_file: @template, foo: 'bar') 69 | end 70 | end 71 | 72 | test 'BioDSL::Pipeline#align_seq_mothur w. allowed option don\'t raise' do 73 | assert_nothing_raised do 74 | @p.align_seq_mothur(template_file: @template, cpus: 2) 75 | end 76 | end 77 | 78 | test 'BioDSL::Pipeline#align_seq_mothur outputs correctly' do 79 | @p.align_seq_mothur(template_file: @template.path). 80 | run(input: @input, output: @output2) 81 | 82 | expected = '{:SEQ_NAME=>"test", ' \ 83 | ':SEQ=>"..A-TTC--CGA-TCGA-----TCG-AT---CGA...", :SEQ_LEN=>37}' 84 | 85 | assert_equal(expected, collect_result.chomp) 86 | end 87 | 88 | test 'BioDSL::Pipeline#align_seq_mothur status returns correctly' do 89 | @p.align_seq_mothur(template_file: @template.path). 90 | run(input: @input, output: @output2) 91 | 92 | assert_equal(1, @p.status.first[:records_in]) 93 | assert_equal(1, @p.status.first[:records_out]) 94 | assert_equal(1, @p.status.first[:sequences_in]) 95 | assert_equal(1, @p.status.first[:sequences_in]) 96 | assert_equal(20, @p.status.first[:residues_in]) 97 | assert_equal(20, @p.status.first[:residues_in]) 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /lib/BioDSL/commands/reverse_seq.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # == Reverse sequences in the stream. 30 | # 31 | # +reverse_seq+ reverses sequences in the stream. If a SCORES key is found 32 | # then the SCORES are also reversed. 33 | # 34 | # +reverse_seq+ can be used together with +complment_seq+ to reverse- 35 | # complement sequences. 36 | # 37 | # == Usage 38 | # 39 | # reverse_seq() 40 | # 41 | # === Options 42 | # 43 | # == Examples 44 | # 45 | # Consider the following FASTQ entry in the file test.fq: 46 | # 47 | # @M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185 48 | # TTGTAAAACGACGGCCAGTG 49 | # + 50 | # >>>>>FFFFD@A?A0AE0FG 51 | # 52 | # To reverse the sequence simply do: 53 | # 54 | # BD.new.read_fastq(input:"test.fq").reverse_seq.dump.run 55 | # 56 | # {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185", 57 | # :SEQ=>"GTGACCGGCAGCAAAATGTT", 58 | # :SEQ_LEN=>20, 59 | # :SCORES=>"GF0EA0A?A@DFFFF>>>>>"} 60 | class ReverseSeq 61 | STATS = %i(records_in records_out sequences_in sequences_out residues_in 62 | residues_out) 63 | 64 | # Constructor for ReverseSeq. 65 | # 66 | # @param options [Hash] Options hash. 67 | # 68 | # @return [ReverseSeq] Class instance. 69 | def initialize(options) 70 | @options = options 71 | 72 | check_options 73 | end 74 | 75 | # Return command lambda for reverse_seq. 76 | # 77 | # @return [Proc] Command lambda. 78 | def lmb 79 | lambda do |input, output, status| 80 | status_init(status, STATS) 81 | 82 | input.each do |record| 83 | @status[:records_in] += 1 84 | reverse(record) if record[:SEQ] 85 | output << record 86 | @status[:records_out] += 1 87 | end 88 | end 89 | end 90 | 91 | private 92 | 93 | # Check options. 94 | def check_options 95 | options_allowed(@options, nil) 96 | end 97 | 98 | # Reverse sequence. 99 | # 100 | # @param record [Hash] BioDSL record. 101 | def reverse(record) 102 | entry = BioDSL::Seq.new_bp(record) 103 | entry.reverse! 104 | 105 | @status[:sequences_in] += 1 106 | @status[:sequences_out] += 1 107 | @status[:residues_in] += entry.length 108 | @status[:residues_out] += entry.length 109 | 110 | record.merge! entry.to_bp 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_mask_seq.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for MaskSeq. 34 | # 35 | # rubocop:disable Metrics/LineLength 36 | class TestMaskSeq < Test::Unit::TestCase 37 | def setup 38 | @input, @output = BioDSL::Stream.pipe 39 | @input2, @output2 = BioDSL::Stream.pipe 40 | 41 | hash = { 42 | SEQ_NAME: 'test', 43 | SEQ: 'gatcgatcgtacgagcagcatctgacgtatcgatcatgcagtctacgacgagcatgctagctag', 44 | SEQ_LEN: 82, 45 | SCORES: '!"#$%&()*+,-013456;<=>?@ABCDEIIHGCBA@?>=<;:9843210/.-,+*)(&%$III' 46 | } 47 | 48 | @output.write hash 49 | @output.close 50 | 51 | @p = BioDSL::Pipeline.new 52 | end 53 | 54 | test 'BioDSL::Pipeline::MaskSeq with invalid options raises' do 55 | assert_raise(BioDSL::OptionError) { @p.mask_seq(foo: 'bar') } 56 | end 57 | 58 | test 'BioDSL::Pipeline::MaskSeq with valid options don\'t raise' do 59 | assert_nothing_raised { @p.mask_seq(mask: :hard) } 60 | end 61 | 62 | test 'BioDSL::Pipeline::MaskSeq with mask: :soft returns correctly' do 63 | @p.mask_seq.run(input: @input, output: @output2) 64 | 65 | expected = <<-EXP.gsub(/^\s+\|/, '') 66 | |{:SEQ_NAME=>"test", 67 | | :SEQ=>"gatcgatcgtacgagcAGCATCTGACGTATCGATCATGCAGTCTAcgacgagcatgctagcTAG", 68 | | :SEQ_LEN=>64, 69 | | :SCORES=>"!\\\"\\\#$%&()*+,-013456;<=>?@ABCDEIIHGCBA@?>=<;:9843210/.-,+*)(&%$III"} 70 | EXP 71 | 72 | assert_equal(expected.delete("\n"), collect_result.delete("\n")) 73 | end 74 | 75 | test 'BioDSL::Pipeline::MaskSeq with mask: :hard returns correctly' do 76 | @p.mask_seq(mask: 'hard').run(input: @input, output: @output2) 77 | 78 | expected = <<-EXP.gsub(/^\s+\|/, '') 79 | |{:SEQ_NAME=>"test", 80 | | :SEQ=>"NNNNNNNNNNNNNNNNAGCATCTGACGTATCGATCATGCAGTCTANNNNNNNNNNNNNNNNTAG", 81 | | :SEQ_LEN=>64, 82 | | :SCORES=>"!\\\"\\\#$%&()*+,-013456;<=>?@ABCDEIIHGCBA@?>=<;:9843210/.-,+*)(&%$III"} 83 | EXP 84 | 85 | assert_equal(expected.delete("\n"), collect_result.delete("\n")) 86 | end 87 | 88 | test 'BioDSL::Pipeline::MaskSeq status returns correctly' do 89 | @p.mask_seq(mask: 'hard').run(input: @input, output: @output2) 90 | 91 | assert_equal(1, @p.status.first[:records_in]) 92 | assert_equal(1, @p.status.first[:records_out]) 93 | assert_equal(1, @p.status.first[:sequences_in]) 94 | assert_equal(1, @p.status.first[:sequences_out]) 95 | assert_equal(64, @p.status.first[:residues_in]) 96 | assert_equal(64, @p.status.first[:residues_out]) 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /lib/BioDSL/seq/levenshtein.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # Class to calculate the Levenshtein distance between two 30 | # given strings. 31 | # http://en.wikipedia.org/wiki/Levenshtein_distance 32 | class Levenshtein 33 | extend BioDSL::Ambiguity 34 | 35 | BYTES_IN_INT = 4 36 | 37 | def self.distance(s, t) 38 | return 0 if s == t 39 | return t.length if s.length == 0 40 | return s.length if t.length == 0 41 | 42 | v0 = "\0" * (t.length + 1) * BYTES_IN_INT 43 | v1 = "\0" * (t.length + 1) * BYTES_IN_INT 44 | 45 | new.levenshtein_distance_C(s, t, s.length, t.length, v0, v1) 46 | end 47 | 48 | # >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<< 49 | 50 | inline do |builder| 51 | add_ambiguity_macro(builder) 52 | 53 | builder.prefix %{ 54 | unsigned int min(unsigned int a, unsigned int b, unsigned int c) 55 | { 56 | unsigned int m = a; 57 | 58 | if (m > b) m = b; 59 | if (m > c) m = c; 60 | 61 | return m; 62 | } 63 | } 64 | 65 | builder.c %{ 66 | VALUE levenshtein_distance_C( 67 | VALUE _s, // string 68 | VALUE _t, // string 69 | VALUE _s_len, // string length 70 | VALUE _t_len, // string length 71 | VALUE _v0, // score vector 72 | VALUE _v1 // score vector 73 | ) 74 | { 75 | char *s = (char *) StringValuePtr(_s); 76 | char *t = (char *) StringValuePtr(_t); 77 | unsigned int s_len = FIX2UINT(_s_len); 78 | unsigned int t_len = FIX2UINT(_t_len); 79 | unsigned int *v0 = (unsigned int *) StringValuePtr(_v0); 80 | unsigned int *v1 = (unsigned int *) StringValuePtr(_v1); 81 | 82 | unsigned int i = 0; 83 | unsigned int j = 0; 84 | unsigned int cost = 0; 85 | 86 | for (i = 0; i < t_len + 1; i++) 87 | v0[i] = i; 88 | 89 | for (i = 0; i < s_len; i++) 90 | { 91 | v1[0] = i + 1; 92 | 93 | for (j = 0; j < t_len; j++) 94 | { 95 | cost = (MATCH(s[i], t[j])) ? 0 : 1; 96 | v1[j + 1] = min(v1[j] + 1, v0[j + 1] + 1, v0[j] + cost); 97 | } 98 | 99 | for (j = 0; j < t_len + 1; j++) 100 | v0[j] = v1[j]; 101 | } 102 | 103 | return UINT2NUM(v1[t_len]); 104 | } 105 | } 106 | end 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /lib/BioDSL/commands/merge_values.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # == Merge values of specified keys. 30 | # 31 | # +merge_values+ merges the values of a list of keys using a given delimiter 32 | # and saves the new value as the value of the first key. 33 | # 34 | # == Usage 35 | # 36 | # merge_values(>[, delimiter: ]) 37 | # 38 | # === Options 39 | # 40 | # * keys: - List of keys to merge. 41 | # * delimiter: - Delimiter (default='_'). 42 | # 43 | # == Examples 44 | # 45 | # Consider the following record: 46 | # 47 | # {ID: "FOO", COUNT: 10, SEQ: "gataag"} 48 | # 49 | # To merge the values so that the COUNT and ID is merged in that order do: 50 | # 51 | # merge_values(keys: [:COUNT, :ID]) 52 | # 53 | # {:ID=>"FOO", :COUNT=>"10_FOO", :SEQ=>"gataag"} 54 | # 55 | # Changing the +delimiter+ and order: 56 | # 57 | # merge_values(keys: [:ID, :COUNT], delimiter: ':count=') 58 | # 59 | # {:ID=>"FOO:count=10", :COUNT=>10, :SEQ=>"gataag"} 60 | class MergeValues 61 | STATS = %i(records_in records_out) 62 | 63 | # Constructor for MergeValues. 64 | # 65 | # @param options [Hash] Options hash. 66 | # @option options [Array] :keys Keys whos values to merge. 67 | # @option options [String] :delimiter Delimiter for joining. 68 | # 69 | # @return [MergeValues] Class instance of MergeValues. 70 | def initialize(options) 71 | @options = options 72 | check_options 73 | defaults 74 | 75 | @keys = options[:keys] 76 | @delimiter = options[:delimiter] 77 | end 78 | 79 | # Return command lambda for merge_values. 80 | # 81 | # @return [Proc] Command lambda. 82 | def lmb 83 | lambda do |input, output, status| 84 | status_init(status, STATS) 85 | 86 | input.each do |record| 87 | @status[:records_in] += 1 88 | 89 | if @keys.all? { |key| record.key? key } 90 | values = @keys.inject([]) { |a, e| a << record[e.to_sym] } 91 | record[@keys.first] = values.join(@delimiter) 92 | end 93 | 94 | output << record 95 | @status[:records_out] += 1 96 | end 97 | end 98 | end 99 | 100 | private 101 | 102 | # Check options. 103 | def check_options 104 | options_allowed(@options, :keys, :delimiter) 105 | options_required(@options, :keys) 106 | end 107 | 108 | # Set default options. 109 | def defaults 110 | @options[:delimiter] ||= '_' 111 | end 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /lib/BioDSL/commands/complement_seq.rb: -------------------------------------------------------------------------------- 1 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 2 | # # 3 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 4 | # # 5 | # This program is free software; you can redistribute it and/or # 6 | # modify it under the terms of the GNU General Public License # 7 | # as published by the Free Software Foundation; either version 2 # 8 | # of the License, or (at your option) any later version. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 18 | # USA. # 19 | # # 20 | # http://www.gnu.org/copyleft/gpl.html # 21 | # # 22 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 23 | # # 24 | # This software is part of the BioDSL (www.BioDSL.org). # 25 | # # 26 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 27 | 28 | module BioDSL 29 | # == Complment sequences in the stream. 30 | # 31 | # +complement_seq+ complements sequences in the stream. The sequence type - 32 | # DNA or RNA - is guessed by inspected the first sequence in the stream. 33 | # 34 | # +complement_seq+ can be used together with +reverse_seq+ to reverse- 35 | # complement sequences. 36 | # 37 | # == Usage 38 | # 39 | # complement_seq() 40 | # 41 | # === Options 42 | # 43 | # == Examples 44 | # 45 | # Consider the following FASTQ entry in the file test.fq: 46 | # 47 | # @M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185 48 | # TTGTAAAACGACGGCCAGTG 49 | # + 50 | # >>>>>FFFFD@A?A0AE0FG 51 | # 52 | # To complement the sequence do: 53 | # 54 | # BD.new.read_fastq(input:"test.fq").complement_seq.dump.run 55 | # 56 | # {:SEQ_NAME=>"M02529:88:000000000-AC0WY:1:1101:12879:1928 2:N:0:185", 57 | # :SEQ=>"AACATTTTGCTGCCGGTCAC", 58 | # :SEQ_LEN=>20, 59 | # :SCORES=>">>>>>FFFFD@A?A0AE0FG"} 60 | class ComplementSeq 61 | STATS = %i(records_in records_out sequences_in sequences_out residues_in 62 | residues_out) 63 | 64 | # Constructor for ComplementSeq. 65 | # 66 | # @param options [Hash] Options hash. 67 | def initialize(options) 68 | @options = options 69 | @type = nil 70 | 71 | check_options 72 | end 73 | 74 | # Return the command lambda for ComplementSeq. 75 | # 76 | # @return [Proc] Command lambda 77 | def lmb 78 | lambda do |input, output, status| 79 | status_init(status, STATS) 80 | 81 | input.each do |record| 82 | @status[:records_in] += 1 83 | 84 | complement(record) if record.key? :SEQ 85 | 86 | output << record 87 | 88 | @status[:records_out] += 1 89 | end 90 | end 91 | end 92 | 93 | private 94 | 95 | # Check options. 96 | def check_options 97 | options_allowed(@options, nil) 98 | end 99 | 100 | # Complements sequence in record. 101 | # 102 | # @param record [Hash] BioDSL record with sequence. 103 | def complement(record) 104 | entry = BioDSL::Seq.new_bp(record) 105 | @type = entry.type_guess unless @type 106 | entry.type = @type 107 | entry.complement! 108 | 109 | @status[:sequences_in] += 1 110 | @status[:sequences_out] += 1 111 | @status[:residues_in] += entry.length 112 | @status[:residues_out] += entry.length 113 | 114 | record.merge! entry.to_bp 115 | end 116 | end 117 | end 118 | -------------------------------------------------------------------------------- /test/BioDSL/commands/test_count.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '..', '..', '..') 3 | 4 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 5 | # # 6 | # Copyright (C) 2007-2015 Martin Asser Hansen (mail@maasha.dk). # 7 | # # 8 | # This program is free software; you can redistribute it and/or # 9 | # modify it under the terms of the GNU General Public License # 10 | # as published by the Free Software Foundation; either version 2 # 11 | # of the License, or (at your option) any later version. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # 21 | # USA. # 22 | # # 23 | # http://www.gnu.org/copyleft/gpl.html # 24 | # # 25 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 26 | # # 27 | # This software is part of BioDSL (http://maasha.github.io/BioDSL). # 28 | # # 29 | # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # 30 | 31 | require 'test/helper' 32 | 33 | # Test class for Count. 34 | class TestCount < Test::Unit::TestCase 35 | def setup 36 | @tmpdir = Dir.mktmpdir('BioDSL') 37 | @file = File.join(@tmpdir, 'test.txt') 38 | @file2 = File.join(@tmpdir, 'test.txt') 39 | 40 | @input, @output = BioDSL::Stream.pipe 41 | @input2, @output2 = BioDSL::Stream.pipe 42 | 43 | @output.write(SEQ_NAME: 'test1', SEQ: 'atcg', SEQ_LEN: 4) 44 | @output.write(SEQ_NAME: 'test2', SEQ: 'gtac', SEQ_LEN: 4) 45 | @output.close 46 | 47 | @p = BioDSL::Pipeline.new 48 | end 49 | 50 | def teardown 51 | FileUtils.rm_r @tmpdir 52 | end 53 | 54 | test 'BioDSL::Pipeline::Count with invalid options raises' do 55 | assert_raise(BioDSL::OptionError) { @p.count(foo: 'bar') } 56 | end 57 | 58 | test 'BioDSL::Pipeline::Count with valid options don\'t raise' do 59 | assert_nothing_raised { @p.count(output: @file) } 60 | end 61 | 62 | test 'BioDSL::Pipeline::Count to file outputs correctly' do 63 | @p.count(output: @file).run(input: @input, output: @output2) 64 | result = File.open(@file).read 65 | expected = "#RECORD_TYPE\tCOUNT\ncount\t2\n" 66 | assert_equal(expected, result) 67 | end 68 | 69 | test 'BioDSL::Pipeline::Count to existing file raises' do 70 | `touch #{@file}` 71 | assert_raise(BioDSL::OptionError) { @p.count(output: @file) } 72 | end 73 | 74 | test 'BioDSL::Pipeline::Count to existing file with :force outputs OK' do 75 | `touch #{@file}` 76 | @p.count(output: @file, force: true).run(input: @input) 77 | result = File.open(@file).read 78 | expected = "#RECORD_TYPE\tCOUNT\ncount\t2\n" 79 | assert_equal(expected, result) 80 | end 81 | 82 | test 'BioDSL::Pipeline::Count with flux outputs correctly' do 83 | @p.count(output: @file).run(input: @input, output: @output2) 84 | result = File.open(@file).read 85 | expected = "#RECORD_TYPE\tCOUNT\ncount\t2\n" 86 | assert_equal(expected, result) 87 | 88 | stream_expected = <<-EXP.gsub(/^\s+\|/, '') 89 | |{:SEQ_NAME=>"test1", :SEQ=>"atcg", :SEQ_LEN=>4} 90 | |{:SEQ_NAME=>"test2", :SEQ=>"gtac", :SEQ_LEN=>4} 91 | |{:RECORD_TYPE=>"count", :COUNT=>2} 92 | EXP 93 | 94 | assert_equal(stream_expected, collect_result) 95 | end 96 | 97 | test 'BioDSL::Pipeline::Count status outputs correctly' do 98 | @p.count.run(input: @input, output: @output2) 99 | 100 | assert_equal(2, @p.status.first[:records_in]) 101 | assert_equal(3, @p.status.first[:records_out]) 102 | end 103 | end 104 | --------------------------------------------------------------------------------