├── .github ├── FUNDING.yml └── workflows │ ├── ci.yml │ └── doc.yml ├── .gitignore ├── .gitmodules ├── .gitpod.yml ├── .rubocop.yml ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── examples ├── quick_start.rb ├── quick_start_bam.rb └── sr.rb ├── ext ├── Rakefile ├── cmappy │ ├── cmappy.c │ └── cmappy.h └── minimap2.patch ├── lib ├── minimap2.rb └── minimap2 │ ├── aligner.rb │ ├── alignment.rb │ ├── ffi.rb │ ├── ffi │ ├── constants.rb │ ├── functions.rb │ └── mappy.rb │ └── version.rb ├── minimap2.gemspec ├── renovate.json └── test ├── minimap2 ├── aligner_test.rb ├── alignment_test.rb ├── ffi_test.rb └── version_test.rb ├── minimap2_test.rb └── test_helper.rb /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: kojix2 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | name: ${{ matrix.os }} Ruby ${{ matrix.ruby }} 6 | runs-on: ${{ matrix.os }}-latest 7 | strategy: 8 | fail-fast: false 9 | matrix: 10 | os: ["ubuntu", "macos", "windows"] 11 | ruby: ["3.2", "3.3", "3.4"] 12 | steps: 13 | - uses: actions/checkout@v4 14 | with: 15 | submodules: true 16 | - uses: ruby/setup-ruby@v1 17 | with: 18 | ruby-version: ${{ matrix.ruby }} 19 | bundler-cache: true 20 | - run: bundle exec rake minimap2:build 21 | - run: bundle exec rake test 22 | -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: doc 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: ruby/setup-ruby@v1 15 | with: 16 | ruby-version: ruby 17 | - name: Generate document 18 | run: gem install -N yard && yard doc 19 | - name: Publish Documentation on GitHub Pages 20 | uses: peaceiris/actions-gh-pages@v4 21 | with: 22 | github_token: ${{ secrets.GITHUB_TOKEN }} 23 | publish_dir: ./doc 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | /vendor/ 10 | /.rubocop 11 | 12 | *.lock 13 | 14 | *.so 15 | *.dylib 16 | *.dll 17 | 18 | *.sam 19 | *.bam 20 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "minimap2"] 2 | path = ext/minimap2 3 | url = https://github.com/lh3/minimap2 4 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | # This configuration file was automatically generated by Gitpod. 2 | # Please adjust to your needs (see https://www.gitpod.io/docs/introduction/learn-gitpod/gitpod-yaml) 3 | # and commit this file to your remote git repository to share the goodness with others. 4 | 5 | # Learn more from ready-to-use templates: https://www.gitpod.io/docs/introduction/getting-started/quickstart 6 | 7 | tasks: 8 | - name: Setup, Install & Build 9 | before: bundle install 10 | init: bundle exec rake minimap2:build 11 | command: bundle exec rake test 12 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | StringLiterals: 2 | EnforcedStyle: double_quotes 3 | 4 | Style/NumericPredicate: 5 | EnforcedStyle: comparison 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | group :test do 8 | gem "minitest" 9 | gem "rake" 10 | end 11 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 kojix2 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ruby-minimap2 2 | 3 | [![Gem Version](https://img.shields.io/gem/v/minimap2?color=brightgreen)](https://rubygems.org/gems/minimap2) 4 | [![test](https://github.com/kojix2/ruby-minimap2/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/ruby-minimap2/actions/workflows/ci.yml) 5 | [![Docs Latest](https://img.shields.io/badge/docs-latest-blue.svg)](https://kojix2.github.io/ruby-minimap2/) 6 | [![The MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE.txt) 7 | [![DOI](https://zenodo.org/badge/325711305.svg)](https://zenodo.org/badge/latestdoi/325711305) 8 | [![Lines of Code](https://img.shields.io/endpoint?url=https%3A%2F%2Ftokei.kojix2.net%2Fbadge%2Fgithub%2Fkojix2%2Fruby-minimap2%2Flines)](https://tokei.kojix2.net/github/kojix2/ruby-minimap2) 9 | 10 | :dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby) 11 | 12 | ## Installation 13 | 14 | ``` 15 | gem install minimap2 16 | ``` 17 | 18 |
19 | Compiling from source 20 | 21 | git clone --recursive https://github.com/kojix2/ruby-minimap2 22 | cd ruby-minimap2 23 | bundle install 24 | bundle exec rake minimap2:build 25 | bundle exec rake install 26 | 27 |
28 | 29 | ## Quick Start 30 | 31 | ```ruby 32 | require "minimap2" 33 | 34 | aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa") 35 | seq = aligner.seq("MT_human", 100, 200) 36 | hits = aligner.align(seq) 37 | pp hits 38 | ``` 39 | 40 | ``` 41 | [#] 60 | ``` 61 | 62 | ## APIs Overview 63 | 64 | ```markdown 65 | * Minimap2 module 66 | - fastx_read Read fasta/fastq file. 67 | - revcomp Reverse complement sequence. 68 | - execute Calls the main function of Minimap2 with arguments. `Minimap2.execute("--version")` 69 | 70 | * Aligner class 71 | * attributes 72 | - index Returns the value of attribute index. 73 | - idx_opt Returns the value of attribute idx_opt. 74 | - map_opt Returns the value of attribute map_opt. 75 | * methods 76 | - new(path, preset: nil) Create a new aligner. (presets: sr, map-pb, map-out, map-hifi, splice, asm5, etc.) 77 | - align Maps and returns alignments. 78 | - seq Retrieve a subsequence from the index. 79 | 80 | * Alignment class 81 | * attributes 82 | - ctg Returns name of the reference sequence the query is mapped to. 83 | - ctg_len Returns total length of the reference sequence. 84 | - r_st Returns start positions on the reference. 85 | - r_en Returns end positions on the reference. 86 | - strand Returns +1 if on the forward strand; -1 if on the reverse strand. 87 | - trans_strand Returns transcript strand. +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown. 88 | - blen Returns length of the alignment, including both alignment matches and gaps but excluding ambiguous bases. 89 | - mlen Returns length of the matching bases in the alignment, excluding ambiguous base matches. 90 | - nm Returns number of mismatches, gaps and ambiguous positions in the alignment. 91 | - primary Returns if the alignment is primary (typically the best and the first to generate). 92 | - q_st Returns start positions on the query. 93 | - q_en Returns end positions on the query. 94 | - mapq Returns mapping quality. 95 | - cigar Returns CIGAR returned as an array of shape (n_cigar,2). The two numbers give the length and the operator of each CIGAR operation. 96 | - read_num Returns read number that the alignment corresponds to; 1 for the first read and 2 for the second read. 97 | - cs Returns the cs tag. 98 | - md Returns the MD tag as in the SAM format. It is an empty string unless the md argument is applied when calling Aligner#align. 99 | - cigar_str Returns CIGAR string. 100 | * methods 101 | - to_h Convert Alignment to hash. 102 | - to_s Convert to the PAF format without the QueryName and QueryLength columns. 103 | 104 | ## FFI module 105 | * IdxOpt class Indexing options. 106 | * MapOpt class Mapping options. 107 | ``` 108 | 109 | - API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2. 110 | - `Aligner#map` has been changed to `align`, because `map` means iterator in Ruby. 111 | - See [documentation](https://kojix2.github.io/ruby-minimap2/) for details. 112 | 113 |
114 | C Structures and Functions 115 | 116 | ### FFI 117 | 118 | - Ruby-Minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi). 119 | - Native C functions can be called from the `Minimap2::FFI` module. 120 | - Native C structure members can be accessed. 121 | - Bitfields are supported by [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) gems. 122 | 123 | ```ruby 124 | aligner.idx_opt.members 125 | # => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size] 126 | aligner.kds_opt.values 127 | # => [15, 10, 0, 14, 50000000, 9223372036854775807] 128 | aligner.idx_opt[:k] 129 | # => 15 130 | aligner.idx_opt[:k] = 14 131 | aligner.idx_opt[:k] 132 | # => 14 133 | ``` 134 | 135 |
136 | 137 | ## Contributing 138 | 139 |
140 | Development 141 | 142 | Fork your repository. 143 | then clone. 144 | 145 | ```sh 146 | git clone --recursive https://github.com/kojix2/ruby-minimap2 147 | # git clone https://github.com/kojix2/ruby-minimap2 148 | # cd ruby-minimap2 149 | # git submodule update -i 150 | ``` 151 | 152 | Build Minimap2 and Mappy. 153 | 154 | ```sh 155 | cd ruby-minimap2 156 | bundle install # Install dependent packages including Ruby-FFI 157 | bundle exec rake minimap2:build 158 | ``` 159 | 160 | A shared library will be created in the vendor directory. 161 | 162 | ``` 163 | └── vendor 164 | └── libminimap2.so 165 | ``` 166 | 167 | Run tests. 168 | 169 | ``` 170 | bundle exec rake test 171 | ``` 172 | 173 | Release a Gem. 174 | 175 | ``` 176 | bundle exec rake minimap2:cleanall 177 | bundle exec rake build 178 | ls -l pkg # Check the size of the Gem and make sure it does not contain any unused code such as shared libraries or lib/simde. 179 | bundle exec rake release 180 | ``` 181 | 182 |
183 | 184 | ruby-minimap2 is a library under development and there are many points to be improved. 185 | 186 | Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)! 187 | 188 | Many OSS projects become abandoned because only the founder has commit rights to the original repository. 189 | If you need commit rights to ruby-minimap2 repository or want to get admin rights and take over the project, please feel free to contact me @kojix2. 190 | 191 | ## License 192 | 193 | [MIT License](https://opensource.org/licenses/MIT). 194 | 195 | ## Acknowledgements 196 | 197 | I would like to thank Heng Li for making Minimap2, and all the readers who read the README to the end. 198 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/gem_tasks" 4 | require "rake/testtask" 5 | 6 | # Prevent releasing the gem including htslib shared library. 7 | 8 | task :check_shared_library_exist do 9 | unless Dir.glob("vendor/*.{so,dylib,dll}").empty? 10 | magenta = "\e[35m" 11 | clear = "\e[0m" 12 | abort "#{magenta}Shared library exists in the vendor directory.#{clear}" 13 | end 14 | end 15 | 16 | Rake::Task["release:guard_clean"].enhance(["check_shared_library_exist"]) 17 | 18 | Rake::TestTask.new(:test) do |t| 19 | t.libs << "test" 20 | t.libs << "lib" 21 | t.test_files = FileList["test/**/*_test.rb"] 22 | end 23 | 24 | task default: :test 25 | 26 | load "ext/Rakefile" 27 | -------------------------------------------------------------------------------- /examples/quick_start.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "minimap2" 4 | 5 | # load or build index 6 | aligner = Minimap2::Aligner.new("#{__dir__}/../ext/minimap2/test/MT-human.fa") 7 | 8 | # retrieve a subsequence from the index 9 | seq = aligner.seq("MT_human", 100, 200) 10 | 11 | # mapping 12 | hits = aligner.align(seq) 13 | 14 | # show result 15 | pp hits 16 | -------------------------------------------------------------------------------- /examples/quick_start_bam.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "minimap2" 4 | require "htslib" 5 | 6 | # load or build index 7 | aligner = Minimap2::Aligner.new("#{__dir__}/../ext/minimap2/test/MT-human.fa") 8 | 9 | # retrieve a subsequence from the index 10 | seq = aligner.seq("MT_human", 100, 200) 11 | 12 | # mapping 13 | hits = aligner.align(seq) 14 | hit = hits[0] 15 | 16 | # save result to BAM file 17 | HTS::Bam.open("test.bam", "wb") do |bam| 18 | header = HTS::Bam::Header.new do |h| 19 | h << "@SQ\tSN:MT_human\tLN:16569" 20 | h << "@PG\tID:ruby-minimap2\tPN:ruby-minimap2\tVN:#{Minimap2::VERSION}" 21 | end 22 | bam.header = header 23 | record = HTS::Bam::Record.new( 24 | header, 25 | qname: "Read1", 26 | flag: 0, 27 | tid: 0, 28 | pos: hit.r_st, 29 | mapq: hit.mapq, 30 | cigar: hit.cigar_str, 31 | mtid: 0, 32 | mpos: 0, 33 | isize: 0, 34 | seq: seq, 35 | qual: [20] * 100, 36 | l_aux: 0 37 | ) 38 | bam << record 39 | end 40 | -------------------------------------------------------------------------------- /examples/sr.rb: -------------------------------------------------------------------------------- 1 | require "minimap2" 2 | 3 | # Usage 4 | 5 | if ARGV.size < 3 6 | puts "Usage: ruby sr.rb " 7 | exit 1 8 | end 9 | 10 | # Prepare aligner 11 | 12 | REFERENCE = ARGV[0] # reference.fa 13 | FASTQ1 = ARGV[1] # a_1.fa 14 | FASTQ2 = ARGV[2] # a_2.fa 15 | 16 | aligner = Minimap2::Aligner.new( 17 | REFERENCE, 18 | preset: "sr" # Paired short reads 19 | ) 20 | 21 | # Read Fastq file 22 | 23 | a1 = Minimap2.fastx_read(FASTQ1) # Enumerator 24 | a2 = Minimap2.fastx_read(FASTQ2) # Enumerator 25 | 26 | # Output 27 | 28 | loop do 29 | r1 = a1.first 30 | r2 = a2.first 31 | break if r1.nil? or r2.nil? 32 | 33 | s1 = r1[1] 34 | s2 = r2[1] 35 | 36 | aligner.align(s1, s2).each do |aln| 37 | puts aln 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /ext/Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "rake" 4 | require "fileutils" 5 | require "ffi" 6 | 7 | minimap2_dir = File.expand_path("minimap2", __dir__) 8 | target_dir = "../../vendor" 9 | target_fname = FFI.map_library_name("minimap2") 10 | target_path = File.join(target_dir, target_fname) 11 | 12 | task default: ["minimap2:build", "minimap2:clean"] 13 | 14 | namespace :minimap2 do 15 | desc "Compile Minimap2" 16 | task :build do 17 | Dir.chdir(minimap2_dir) do 18 | # Add -fPIC option to Makefile 19 | sh "git apply ../minimap2.patch" 20 | sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ." 21 | case RbConfig::CONFIG["host_cpu"] 22 | when /arm64/ 23 | sh "make arm_neon=1 aarch64=1" 24 | when /arm/ 25 | sh "make arm_neon=1" 26 | else 27 | sh "make" 28 | end 29 | case RbConfig::CONFIG["host_os"] 30 | when /mswin|msys|mingw|cygwin|bccwin|wince|emc/ 31 | sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread" 32 | when /darwin|mac os/ 33 | sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o -lm -lz -lpthread" 34 | sh "otool -L #{target_fname}" 35 | else 36 | sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread" 37 | sh "ldd -r #{target_fname}" 38 | end 39 | sh "rm cmappy.h cmappy.c" 40 | sh "git apply -R ../minimap2.patch" 41 | FileUtils.mkdir_p(target_dir) 42 | warn "mkdir -p #{target_dir}" 43 | sh "mv #{target_fname} #{target_path}" 44 | end 45 | end 46 | 47 | desc "`make clean`" 48 | task :clean do 49 | Dir.chdir(minimap2_dir) do 50 | sh "make clean" 51 | end 52 | end 53 | 54 | desc "`make clean` and remove shared lib" 55 | task cleanall: [:clean] do 56 | Dir.chdir(minimap2_dir) do 57 | sh "rm #{target_path}" if File.exist?(target_path) 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /ext/cmappy/cmappy.c: -------------------------------------------------------------------------------- 1 | #include "cmappy.h" 2 | 3 | void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h) 4 | { 5 | h->ctg = mi->seq[r->rid].name; 6 | h->ctg_len = mi->seq[r->rid].len; 7 | h->ctg_start = r->rs, h->ctg_end = r->re; 8 | h->qry_start = r->qs, h->qry_end = r->qe; 9 | h->strand = r->rev? -1 : 1; 10 | h->mapq = r->mapq; 11 | h->mlen = r->mlen; 12 | h->blen = r->blen; 13 | h->NM = r->blen - r->mlen + r->p->n_ambi; 14 | h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0; 15 | h->is_primary = (r->id == r->parent); 16 | h->seg_id = r->seg_id; 17 | h->n_cigar32 = r->p->n_cigar; 18 | h->cigar32 = r->p->cigar; 19 | } 20 | 21 | void mm_free_reg1(mm_reg1_t *r) 22 | { 23 | free(r->p); 24 | } 25 | 26 | kseq_t *mm_fastx_open(const char *fn) 27 | { 28 | gzFile fp; 29 | fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); 30 | return kseq_init(fp); 31 | } 32 | 33 | void mm_fastx_close(kseq_t *ks) 34 | { 35 | gzFile fp; 36 | fp = ks->f->f; 37 | kseq_destroy(ks); 38 | gzclose(fp); 39 | } 40 | 41 | int mm_verbose_level(int v) 42 | { 43 | if (v >= 0) mm_verbose = v; 44 | return mm_verbose; 45 | } 46 | 47 | void mm_reset_timer(void) 48 | { 49 | extern double realtime(void); 50 | mm_realtime0 = realtime(); 51 | } 52 | 53 | mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) 54 | { 55 | mm_reg1_t *r; 56 | 57 | // Py_BEGIN_ALLOW_THREADS 58 | if (seq2 == 0) { 59 | r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, seqname); 60 | } else { 61 | int _n_regs[2]; 62 | mm_reg1_t *regs[2]; 63 | char *seq[2]; 64 | int i, len[2]; 65 | 66 | len[0] = strlen(seq1); 67 | len[1] = strlen(seq2); 68 | seq[0] = (char*)seq1; 69 | seq[1] = strdup(seq2); 70 | for (i = 0; i < len[1]>>1; ++i) { 71 | int t = seq[1][len[1] - i - 1]; 72 | seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]]; 73 | seq[1][i] = seq_comp_table[t]; 74 | } 75 | if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]]; 76 | mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, seqname); 77 | for (i = 0; i < _n_regs[1]; ++i) 78 | regs[1][i].rev = !regs[1][i].rev; 79 | *n_regs = _n_regs[0] + _n_regs[1]; 80 | regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs)); 81 | memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t)); 82 | free(regs[1]); 83 | r = regs[0]; 84 | } 85 | // Py_END_ALLOW_THREADS 86 | 87 | return r; 88 | } 89 | 90 | char *mappy_revcomp(int len, const uint8_t *seq) 91 | { 92 | int i; 93 | char *rev; 94 | rev = (char*)malloc(len + 1); 95 | for (i = 0; i < len; ++i) 96 | rev[len - i - 1] = seq_comp_table[seq[i]]; 97 | rev[len] = 0; 98 | return rev; 99 | } 100 | 101 | char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len) 102 | { 103 | int i, rid; 104 | char *s; 105 | *len = 0; 106 | rid = mm_idx_name2id(mi, name); 107 | if (rid < 0) return 0; 108 | if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0; 109 | if (en < 0 || (uint32_t)en > mi->seq[rid].len) 110 | en = mi->seq[rid].len; 111 | s = (char*)malloc(en - st + 1); 112 | *len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s); 113 | for (i = 0; i < *len; ++i) 114 | s[i] = "ACGTN"[(uint8_t)s[i]]; 115 | s[*len] = 0; 116 | return s; 117 | } 118 | 119 | mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len) 120 | { 121 | const char *fake_name = "N/A"; 122 | char *s; 123 | mm_idx_t *mi; 124 | s = (char*)calloc(len + 1, 1); 125 | memcpy(s, seq, len); 126 | mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name); 127 | free(s); 128 | return mi; 129 | } 130 | -------------------------------------------------------------------------------- /ext/cmappy/cmappy.h: -------------------------------------------------------------------------------- 1 | #ifndef CMAPPY_H 2 | #define CMAPPY_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "minimap.h" 8 | #include "kseq.h" 9 | KSEQ_DECLARE(gzFile) 10 | 11 | typedef struct { 12 | const char *ctg; 13 | int32_t ctg_start, ctg_end; 14 | int32_t qry_start, qry_end; 15 | int32_t blen, mlen, NM, ctg_len; 16 | uint8_t mapq, is_primary; 17 | int8_t strand, trans_strand; 18 | int32_t seg_id; 19 | int32_t n_cigar32; 20 | uint32_t *cigar32; 21 | } mm_hitpy_t; 22 | 23 | void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h); 24 | 25 | void mm_free_reg1(mm_reg1_t *r); 26 | 27 | kseq_t *mm_fastx_open(const char *fn); 28 | 29 | void mm_fastx_close(kseq_t *ks); 30 | 31 | int mm_verbose_level(int v); 32 | 33 | void mm_reset_timer(void); 34 | 35 | extern unsigned char seq_comp_table[256]; 36 | mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt); 37 | 38 | char *mappy_revcomp(int len, const uint8_t *seq); 39 | 40 | char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len); 41 | 42 | mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len); 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /ext/minimap2.patch: -------------------------------------------------------------------------------- 1 | --- a/Makefile 2 | +++ b/Makefile 3 | @@ -1,9 +1,9 @@ 4 | -CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra 5 | +CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra 6 | CPPFLAGS= -DHAVE_KALLOC 7 | INCLUDES= 8 | OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \ 9 | lchain.o align.o hit.o seed.o jump.o map.o format.o pe.o esterr.o splitidx.o \ 10 | - ksw2_ll_sse.o 11 | + ksw2_ll_sse.o cmappy.o 12 | PROG= minimap2 13 | PROG_EXTRA= sdust minimap2-lite 14 | LIBS= -lm -lz -lpthread 15 | @@ -135,3 +135,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h 16 | seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h 17 | sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h 18 | splitidx.o: mmpriv.h minimap.h bseq.h kseq.h 19 | +cmappy.o: cmappy.h 20 | -------------------------------------------------------------------------------- /lib/minimap2.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # dependencies 4 | require "ffi" 5 | 6 | # modules 7 | require_relative "minimap2/aligner" 8 | require_relative "minimap2/alignment" 9 | require_relative "minimap2/version" 10 | 11 | # Minimap2 mapper for long read sequences 12 | # https://github.com/lh3/minimap2 13 | # Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34:3094-3100. 14 | # doi:10.1093/bioinformatics/bty191 15 | module Minimap2 16 | class Error < StandardError; end 17 | 18 | class << self 19 | attr_accessor :ffi_lib 20 | end 21 | 22 | lib_name = ::FFI.map_library_name("minimap2") 23 | self.ffi_lib = if ENV["MINIMAPDIR"] 24 | File.expand_path(lib_name, ENV["MINIMAPDIR"]) 25 | else 26 | File.expand_path("../vendor/#{lib_name}", __dir__) 27 | end 28 | 29 | # friendlier error message 30 | autoload :FFI, "minimap2/ffi" 31 | 32 | # methods from mappy 33 | class << self 34 | # Execute minimap2 comannd with given options. 35 | # @overload execute(arg0,arg1,...) 36 | # @param [String] arg minimap2 command option. 37 | # @example Get minimap2 version 38 | # Minimap2.execute('--version') 39 | 40 | def execute(*rb_argv) 41 | str_ptrs = [] 42 | # First argument is the program name. 43 | str_ptrs << ::FFI::MemoryPointer.from_string("minimap2") 44 | rb_argv.each do |arg| 45 | arg.to_s.split(/\s+/).each do |s| 46 | str_ptrs << ::FFI::MemoryPointer.from_string(s) 47 | end 48 | end 49 | str_ptrs << nil 50 | 51 | # Load all the pointers into a native memory block 52 | argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length) 53 | str_ptrs.each_with_index do |p, i| 54 | argv[i].put_pointer(0, p) 55 | end 56 | 57 | FFI.main(str_ptrs.length - 1, argv) 58 | end 59 | 60 | # Get verbosity level. 61 | # @return [Integer] verbosity level. 62 | 63 | def verbose 64 | FFI.mm_verbose_level(-1) 65 | end 66 | 67 | # Set verbosity level. 68 | # @param [Integer] verbosity level 69 | # @return [Integer] verbosity level. 70 | 71 | def verbose=(level) 72 | FFI.mm_verbose_level(level) 73 | end 74 | 75 | # Read fasta/fastq file. 76 | # @param [String] file_path 77 | # @param [Boolean] comment If True, the comment will be read. 78 | # @yield [name, seq, qual, comment] 79 | # @return [Enumerator] enum Return Enumerator if not block given. 80 | # Note: You can BioRuby instead of this method. 81 | 82 | def fastx_read(file_path, comment: false, &block) 83 | path = File.expand_path(file_path) 84 | 85 | # raise error in Ruby because ks.null? is false even if file not exist. 86 | raise ArgumentError, "File not found: #{path}" unless File.exist?(path) 87 | 88 | ks = FFI.mm_fastx_open(path) 89 | 90 | if block_given? 91 | fastx_each(ks, comment, &block) 92 | else 93 | Enumerator.new do |y| 94 | # rewind not work 95 | fastx_each(ks, comment) { |r| y << r } 96 | end 97 | end 98 | end 99 | 100 | # Reverse complement sequence. 101 | # @param [String] seq 102 | # @return [string] seq 103 | 104 | def revcomp(seq) 105 | l = seq.size 106 | bseq = ::FFI::MemoryPointer.new(:char, l) 107 | bseq.put_bytes(0, seq) 108 | FFI.mappy_revcomp(l, bseq) 109 | end 110 | 111 | private 112 | 113 | def fastx_each(ks, comment) 114 | yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0 115 | FFI.mm_fastx_close(ks) 116 | end 117 | 118 | def fastx_next(ks, read_comment) 119 | qual = ks[:qual][:s] if ks[:qual][:l] > 0 120 | name = ks[:name][:s] 121 | seq = ks[:seq][:s] 122 | if read_comment 123 | comment = ks[:comment][:s] if ks[:comment][:l] > 0 124 | [name, seq, qual, comment] 125 | else 126 | [name, seq, qual] 127 | end 128 | end 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /lib/minimap2/aligner.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Minimap2 4 | class Aligner 5 | attr_reader :idx_opt, :map_opt, :index 6 | 7 | # Create a new aligner. 8 | # 9 | # @param fn_idx_in [String] index or sequence file name. 10 | # @param seq [String] a single sequence to index. 11 | # @param preset [String] minimap2 preset. 12 | # * map-pb : PacBio CLR genomic reads 13 | # * map-ont : Oxford Nanopore genomic reads 14 | # * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later) 15 | # * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier) 16 | # * sr : short genomic paired-end reads 17 | # * splice : spliced long reads (strand unknown) 18 | # * splice:hq : Final PacBio Iso-seq or traditional cDNA 19 | # * asm5 : intra-species asm-to-asm alignment 20 | # * ava-pb : PacBio read overlap 21 | # * ava-ont : Nanopore read overlap 22 | # @param k [Integer] k-mer length, no larger than 28. 23 | # @param w [Integer] minimizer window size, no larger than 255. 24 | # @param min_cnt [Integer] minimum number of minimizers on a chain. 25 | # @param min_chain_score [Integer] minimum chain score. 26 | # @param min_dp_score 27 | # @param bw [Integer] chaining and alignment band width. (initial chaining and extension) 28 | # @param bw_long [Integer] chaining and alignment band width (RMQ-based rechaining and closing gaps) 29 | # @param best_n [Integer] max number of alignments to return. 30 | # @param n_threads [Integer] number of indexing threads. 31 | # @param fn_idx_out [String] name of file to which the index is written. 32 | # This parameter has no effect if seq is set. 33 | # @param max_frag_len [Integer] 34 | # @param extra_flags [Integer] additional flags defined in minimap.h. 35 | # @param scoring [Array] scoring system. 36 | # It is a tuple/list consisting of 4, 6 or 7 positive integers. 37 | # The first 4 elements specify match scoring, mismatch penalty, gap open and gap extension penalty. 38 | # The 5th and 6th elements, if present, set long-gap open and long-gap extension penalty. 39 | # The 7th sets a mismatch penalty involving ambiguous bases. 40 | 41 | def initialize( 42 | fn_idx_in = nil, 43 | seq: nil, 44 | preset: nil, 45 | k: nil, 46 | w: nil, 47 | min_cnt: nil, 48 | min_chain_score: nil, 49 | min_dp_score: nil, 50 | bw: nil, 51 | bw_long: nil, 52 | best_n: nil, 53 | n_threads: 3, 54 | fn_idx_out: nil, 55 | max_frag_len: nil, 56 | extra_flags: nil, 57 | scoring: nil, 58 | sc_ambi: nil, 59 | max_chain_skip: nil 60 | ) 61 | @idx_opt = FFI::IdxOpt.new 62 | @map_opt = FFI::MapOpt.new 63 | 64 | r = FFI.mm_set_opt(preset, idx_opt, map_opt) 65 | raise ArgumentError, "Unknown preset name: #{preset}" if r == -1 66 | 67 | # always perform alignment 68 | map_opt[:flag] |= 4 69 | idx_opt[:batch_size] = 0x7fffffffffffffff 70 | 71 | # override preset options 72 | idx_opt[:k] = k if k 73 | idx_opt[:w] = w if w 74 | map_opt[:min_cnt] = min_cnt if min_cnt 75 | map_opt[:min_chain_score] = min_chain_score if min_chain_score 76 | map_opt[:min_dp_max] = min_dp_score if min_dp_score 77 | map_opt[:bw] = bw if bw 78 | map_opt[:bw_long] = bw_long if bw_long 79 | map_opt[:best_n] = best_n if best_n 80 | map_opt[:max_frag_len] = max_frag_len if max_frag_len 81 | map_opt[:flag] |= extra_flags if extra_flags 82 | if scoring && scoring.size >= 4 83 | map_opt[:a] = scoring[0] 84 | map_opt[:b] = scoring[1] 85 | map_opt[:q] = scoring[2] 86 | map_opt[:e] = scoring[3] 87 | map_opt[:q2] = map_opt[:q] 88 | map_opt[:e2] = map_opt[:e] 89 | if scoring.size >= 6 90 | map_opt[:q2] = scoring[4] 91 | map_opt[:e2] = scoring[5] 92 | map_opt[:sc_ambi] = scoring[6] if scoring.size >= 7 93 | end 94 | end 95 | map_opt[:sc_ambi] = sc_ambi if sc_ambi 96 | map_opt[:max_chain_skip] = max_chain_skip if max_chain_skip 97 | 98 | if fn_idx_in 99 | warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq 100 | reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out) 101 | 102 | # The Ruby version raises an error here 103 | raise "Cannot open : #{fn_idx_in}" if reader.null? 104 | 105 | @index = FFI.mm_idx_reader_read(reader, n_threads) 106 | FFI.mm_idx_reader_close(reader) 107 | FFI.mm_mapopt_update(map_opt, index) 108 | FFI.mm_idx_index_name(index) 109 | elsif seq 110 | @index = FFI.mappy_idx_seq( 111 | idx_opt[:w], idx_opt[:k], idx_opt[:flag] & 1, 112 | idx_opt[:bucket_bits], seq, seq.size 113 | ) 114 | FFI.mm_mapopt_update(map_opt, index) 115 | map_opt[:mid_occ] = 1000 # don't filter high-occ seeds 116 | end 117 | end 118 | 119 | # Explicitly releases the memory of the index object. 120 | 121 | def free_index 122 | FFI.mm_idx_destroy(index) unless index.null? 123 | end 124 | 125 | # @param seq [String] 126 | # @param seq2 [String] 127 | # @param buf [FFI::TBuf] 128 | # @param cs [true, false] 129 | # @param md [true, false] 130 | # @param max_frag_len [Integer] 131 | # @param extra_flags [Integer] 132 | # @note Name change: map -> align 133 | # In the Ruby language, the name map means iterator. 134 | # The original name is map, but here I use the method name align. 135 | # @note The use of Enumerator is being considered. The method names may change again. 136 | # @return [Array] alignments 137 | 138 | def align( 139 | seq, seq2 = nil, 140 | name: nil, 141 | buf: nil, 142 | cs: false, 143 | md: false, 144 | max_frag_len: nil, 145 | extra_flags: nil 146 | ) 147 | return if index.null? 148 | return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero? 149 | 150 | map_opt[:max_frag_len] = max_frag_len if max_frag_len 151 | map_opt[:flag] |= extra_flags if extra_flags 152 | 153 | buf ||= FFI::TBuf.new 154 | km = FFI.mm_tbuf_get_km(buf) 155 | 156 | n_regs_ptr = ::FFI::MemoryPointer.new :int 157 | regs_ptr = FFI.mm_map_aux(index, name, seq, seq2, n_regs_ptr, buf, map_opt) 158 | n_regs = n_regs_ptr.read_int 159 | 160 | regs = Array.new(n_regs) do |i| 161 | FFI::Reg1.new(regs_ptr + i * FFI::Reg1.size) 162 | end 163 | 164 | hit = FFI::Hit.new 165 | 166 | cs_str = ::FFI::MemoryPointer.new(::FFI::MemoryPointer.new(:string)) 167 | m_cs_str = ::FFI::MemoryPointer.new :int 168 | 169 | alignments = [] 170 | 171 | i = 0 172 | begin 173 | while i < n_regs 174 | FFI.mm_reg2hitpy(index, regs[i], hit) 175 | 176 | c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32]) 177 | cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array 178 | 179 | _cs = "" 180 | _md = "" 181 | if cs or md 182 | cur_seq = hit[:seg_id] > 0 && seq2 ? seq2 : seq 183 | 184 | if cs 185 | l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], cur_seq, 1) 186 | _cs = cs_str.read_pointer.read_string(l_cs_str) 187 | end 188 | 189 | if md 190 | l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], cur_seq) 191 | _md = cs_str.read_pointer.read_string(l_cs_str) 192 | end 193 | end 194 | 195 | alignments << Alignment.new(hit, cigar, _cs, _md) 196 | 197 | FFI.mm_free_reg1(regs[i]) 198 | i += 1 199 | end 200 | ensure 201 | while i < n_regs 202 | FFI.mm_free_reg1(regs[i]) 203 | i += 1 204 | end 205 | end 206 | alignments 207 | end 208 | 209 | # Retrieve a subsequence from the index. 210 | # @param name 211 | # @param start 212 | # @param stop 213 | 214 | def seq(name, start = 0, stop = 0x7fffffff) 215 | return if index.null? 216 | return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero? 217 | 218 | lp = ::FFI::MemoryPointer.new(:int) 219 | s = FFI.mappy_fetch_seq(index, name, start, stop, lp) 220 | l = lp.read_int 221 | return nil if l == 0 222 | 223 | s.read_string(l) 224 | end 225 | 226 | # k-mer length, no larger than 28 227 | 228 | def k 229 | index[:k] 230 | end 231 | 232 | # minimizer window size, no larger than 255 233 | 234 | def w 235 | index[:w] 236 | end 237 | 238 | def n_seq 239 | index[:n_seq] 240 | end 241 | 242 | def seq_names 243 | ptr = index[:seq].to_ptr 244 | Array.new(index[:n_seq]) do |i| 245 | FFI::IdxSeq.new(ptr + i * FFI::IdxSeq.size)[:name] 246 | end 247 | end 248 | end 249 | end 250 | -------------------------------------------------------------------------------- /lib/minimap2/alignment.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Minimap2 4 | # Alignment result. 5 | # 6 | # @!attribute ctg 7 | # @return [String] name of the reference sequence the query is mapped to. 8 | # @!attribute ctg_len 9 | # @return [Integer] total length of the reference sequence. 10 | # @!attribute r_st 11 | # @return [Integer] start positions on the reference. 12 | # @!attribute r_en 13 | # @return [Integer] end positions on the reference. 14 | # @!attribute strand 15 | # @return [Integer] +1 if on the forward strand; -1 if on the reverse strand. 16 | # @!attribute trans_strand 17 | # @return [Integer] transcript strand. 18 | # +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown. 19 | # @!attribute blen 20 | # @return [Integer] length of the alignment, including both alignment matches and gaps 21 | # but excluding ambiguous bases. 22 | # @!attribute mlen 23 | # @return [Integer] length of the matching bases in the alignment, 24 | # excluding ambiguous base matches. 25 | # @!attribute nm 26 | # @return [Integer] number of mismatches, gaps and ambiguous positions in the alignment. 27 | # @!attribute primary 28 | # @return [Integer] if the alignment is primary (typically the best and the first to generate) 29 | # @!attribute q_st 30 | # @return [Integer] start positions on the query. 31 | # @!attribute q_en 32 | # @return [Integer] end positions on the query. 33 | # @!attribute mapq 34 | # @return [Integer] mapping quality. 35 | # @!attribute cigar 36 | # @return [Array] CIGAR returned as an array of shape (n_cigar,2). 37 | # The two numbers give the length and the operator of each CIGAR operation. 38 | # @!attribute read_num 39 | # @return [Integer] read number that the alignment corresponds to; 40 | # 1 for the first read and 2 for the second read. 41 | # @!attribute cs 42 | # @return [String] the cs tag. 43 | # @!attribute md 44 | # @return [String] the MD tag as in the SAM format. 45 | # It is an empty string unless the md argument is applied when calling Aligner#align. 46 | # @!attribute cigar_str 47 | # @return [String] CIGAR string. 48 | 49 | class Alignment 50 | def self.keys 51 | %i[ctg ctg_len r_st r_en strand trans_strand blen mlen nm primary 52 | q_st q_en mapq cigar read_num cs md cigar_str] 53 | end 54 | 55 | attr_reader(*keys) 56 | 57 | def initialize(h, cigar, cs = nil, md = nil) 58 | @ctg = h[:ctg] 59 | @ctg_len = h[:ctg_len] 60 | @r_st = h[:ctg_start] 61 | @r_en = h[:ctg_end] 62 | @strand = h[:strand] 63 | @trans_strand = h[:trans_strand] 64 | @blen = h[:blen] 65 | @mlen = h[:mlen] 66 | @nm = h[:NM] 67 | @primary = h[:is_primary] 68 | @q_st = h[:qry_start] 69 | @q_en = h[:qry_end] 70 | @mapq = h[:mapq] 71 | @cigar = cigar 72 | @read_num = h[:seg_id] + 1 73 | @cs = cs 74 | @md = md 75 | 76 | @cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join 77 | end 78 | 79 | def primary? 80 | @primary == 1 81 | end 82 | 83 | # Convert Alignment to hash. 84 | 85 | def to_h 86 | self.class.keys.map { |k| [k, __send__(k)] }.to_h 87 | end 88 | 89 | # Convert to the PAF format without the QueryName and QueryLength columns. 90 | 91 | def to_s 92 | strand = if @strand > 0 93 | "+" 94 | elsif @strand < 0 95 | "-" 96 | else 97 | "?" 98 | end 99 | tp = @primary != 0 ? "tp:A:P" : "tp:A:S" 100 | ts = if @trans_strand > 0 101 | "ts:A:+" 102 | elsif @trans_strand < 0 103 | "ts:A:-" 104 | else 105 | "ts:A:." 106 | end 107 | a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en, 108 | @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"] 109 | a << "cs:Z:#{@cs}" if @cs 110 | a << "MD:Z:#{@md}" if @md 111 | a.join("\t") 112 | end 113 | end 114 | end 115 | -------------------------------------------------------------------------------- /lib/minimap2/ffi.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # bit fields 4 | require "ffi/bit_struct" 5 | 6 | module Minimap2 7 | # Native APIs 8 | module FFI 9 | extend ::FFI::Library 10 | begin 11 | ffi_lib Minimap2.ffi_lib 12 | rescue LoadError => e 13 | raise LoadError, "Could not find #{Minimap2.ffi_lib} \n#{e}" 14 | end 15 | 16 | # Continue even if some functions are not found. 17 | def self.attach_function(*) 18 | super 19 | rescue ::FFI::NotFoundError => e 20 | warn e.message 21 | end 22 | end 23 | end 24 | 25 | require_relative "ffi/constants" 26 | require_relative "ffi/functions" 27 | require_relative "ffi/mappy" 28 | -------------------------------------------------------------------------------- /lib/minimap2/ffi/constants.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Minimap2 4 | module FFI 5 | # flags 6 | NO_DIAG = 0x001 # no exact diagonal hit 7 | NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name 8 | CIGAR = 0x004 9 | OUT_SAM = 0x008 10 | NO_QUAL = 0x010 11 | OUT_CG = 0x020 12 | OUT_CS = 0x040 13 | SPLICE = 0x080 # splice mode 14 | SPLICE_FOR = 0x100 # match GT-AG 15 | SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG 16 | NO_LJOIN = 0x400 17 | OUT_CS_LONG = 0x800 18 | SR = 0x1000 19 | FRAG_MODE = 0x2000 20 | NO_PRINT_2ND = 0x4000 21 | TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined. 22 | LONG_CIGAR = 0x10000 23 | INDEPEND_SEG = 0x20000 24 | SPLICE_FLANK = 0x40000 25 | SOFTCLIP = 0x80000 26 | FOR_ONLY = 0x100000 27 | REV_ONLY = 0x200000 28 | HEAP_SORT = 0x400000 29 | ALL_CHAINS = 0x800000 30 | OUT_MD = 0x1000000 31 | COPY_COMMENT = 0x2000000 32 | EQX = 0x4000000 # use =/X instead of M 33 | PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF 34 | NO_END_FLT = 0x10000000 35 | HARD_MLEVEL = 0x20000000 36 | SAM_HIT_ONLY = 0x40000000 37 | RMQ = 0x80000000 38 | QSTRAND = 0x100000000 39 | NO_INV = 0x200000000 40 | NO_HASH_NAME = 0x400000000 41 | SPLICE_OLD = 0x800000000 42 | SECONDARY_SEQ = 0x1000000000 # output SEQ field for seqondary alignments using hard clipping 43 | OUT_DS = 0x2000000000 44 | WEAK_PAIRING = 0x4000000000 45 | SR_RNA = 0x8000000000 46 | OUT_JUNC = 0x10000000000 47 | 48 | HPC = 0x1 49 | NO_SEQ = 0x2 50 | NO_NAME = 0x4 51 | 52 | IDX_MAGIC = "MMI\2" 53 | 54 | MAX_SEG = 255 55 | 56 | CIGAR_MATCH = 0 57 | CIGAR_INS = 1 58 | CIGAR_DEL = 2 59 | CIGAR_N_SKIP = 3 60 | CIGAR_SOFTCLIP = 4 61 | CIGAR_HARDCLIP = 5 62 | CIGAR_PADDING = 6 63 | CIGAR_EQ_MATCH = 7 64 | CIGAR_X_MISMATCH = 8 65 | 66 | CIGAR_STR = "MIDNSHP=XB" 67 | 68 | # emulate 128-bit integers 69 | class MM128 < ::FFI::Struct 70 | layout \ 71 | :x, :uint64_t, 72 | :y, :uint64_t 73 | end 74 | 75 | # emulate 128-bit arrays 76 | class MM128V < ::FFI::Struct 77 | layout \ 78 | :n, :size_t, 79 | :m, :size_t, 80 | :a, MM128.ptr 81 | end 82 | 83 | # minimap2 index 84 | class IdxSeq < ::FFI::Struct 85 | layout \ 86 | :name, :string, # name of the db sequence 87 | :offset, :uint64_t, # offset in mm_idx_t::S 88 | :len, :uint32, # length 89 | :is_alt, :uint32 90 | end 91 | 92 | class Idx < ::FFI::Struct 93 | layout \ 94 | :b, :int32, 95 | :w, :int32, 96 | :k, :int32, 97 | :flag, :int32, 98 | :n_seq, :uint32, # number of reference sequences 99 | :index, :int32, 100 | :n_alt, :int32, 101 | :seq, IdxSeq.ptr, # sequence name, length and offset 102 | :S, :pointer, # 4-bit packed sequence 103 | :B, :pointer, # index (hidden) 104 | :I, :pointer, # intervals (hidden) 105 | :spsc, :pointer, # splice score (hidden) 106 | :J, :pointer, # junctions to create jumps (hidden) 107 | :km, :pointer, 108 | :h, :pointer 109 | end 110 | 111 | # minimap2 alignment 112 | class Extra < ::FFI::BitStruct 113 | layout \ 114 | :capacity, :uint32, # the capacity of cigar[] 115 | :dp_score, :int32, # DP score 116 | :dp_max, :int32, # score of the max-scoring segment 117 | :dp_max2, :int32, # score of the best alternate mappings 118 | :dp_max0, :int32, # DP score before mm_update_dp_max() adjustment 119 | :n_ambi_trans_strand, :uint32, 120 | :n_cigar, :uint32 121 | # :cigar, :pointer # variable length array (see cigar method below) 122 | 123 | bit_field :n_ambi_trans_strand, 124 | :n_ambi, 30, # number of ambiguous bases 125 | :trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for - 126 | 127 | # variable length array 128 | def cigar 129 | pointer.get_array_of_uint32(size, self[:n_cigar]) 130 | end 131 | end 132 | 133 | class Reg1 < ::FFI::BitStruct 134 | layout \ 135 | :id, :int32, # ID for internal uses (see also parent below) 136 | :cnt, :int32, # number of minimizers; if on the reverse strand 137 | :rid, :int32, # reference index; if this is an alignment from inversion rescue 138 | :score, :int32, # DP alignment score 139 | :qs, :int32, # query start 140 | :qe, :int32, # query end 141 | :rs, :int32, # reference start 142 | :re, :int32, # reference end 143 | :parent, :int32, # parent==id if primary 144 | :subsc, :int32, # best alternate mapping score 145 | :as, :int32, # offset in the a[] array (for internal uses only) 146 | :mlen, :int32, # seeded exact match length 147 | :blen, :int32, # seeded alignment block length 148 | :n_sub, :int32, # number of suboptimal mappings 149 | :score0, :int32, # initial chaining score (before chain merging/spliting) 150 | :fields, :uint32, 151 | :hash, :uint32, 152 | :div, :float, 153 | :p, Extra.ptr 154 | 155 | bit_field :fields, 156 | :mapq, 8, 157 | :split, 2, 158 | :rev, 1, 159 | :inv, 1, 160 | :sam_pri, 1, 161 | :proper_frag, 1, 162 | :pe_thru, 1, 163 | :seg_split, 1, 164 | :seg_id, 8, 165 | :split_inv, 1, 166 | :is_alt, 1, 167 | :strand_retained, 1, 168 | :is_spliced, 1, 169 | :dummy, 4 170 | end 171 | 172 | # indexing option 173 | class IdxOpt < ::FFI::Struct 174 | layout \ 175 | :k, :short, 176 | :w, :short, 177 | :flag, :short, 178 | :bucket_bits, :short, 179 | :mini_batch_size, :int64_t, 180 | :batch_size, :uint64_t 181 | end 182 | 183 | # mapping option 184 | class MapOpt < ::FFI::Struct 185 | layout \ 186 | :flag, :int64_t, # see MM_F_* macros 187 | :seed, :int, 188 | :sdust_thres, :int, # score threshold for SDUST; 0 to disable 189 | :max_qlen, :int, # max query length 190 | :bw, :int, # bandwidth 191 | :bw_long, :int, 192 | :max_gap, :int, # break a chain if there are no minimizers in a max_gap window 193 | :max_gap_ref, :int, 194 | :max_frag_len, :int, 195 | :max_chain_skip, :int, 196 | :max_chain_iter, :int, 197 | :min_cnt, :int, # min number of minimizers on each chain 198 | :min_chain_score, :int, # min chaining score 199 | :chain_gap_scale, :float, 200 | :chain_skip_scale, :float, 201 | :rmq_size_cap, :int, 202 | :rmq_inner_dist, :int, 203 | :rmq_rescue_size, :int, 204 | :rmq_rescue_ratio, :float, 205 | :mask_level, :float, 206 | :mask_len, :int, 207 | :pri_ratio, :float, 208 | :best_n, :int, # top best_n chains are subjected to DP alignment 209 | :alt_drop, :float, 210 | :a, :int, # matching score 211 | :b, :int, # mismatch 212 | :q, :int, # gap-open 213 | :e, :int, # gap-ext 214 | :q2, :int, # gap-open 215 | :e2, :int, # gap-ext 216 | :transition, :int, # transition mismatch score (A:G, C:T) 217 | :sc_ambi, :int, # score when one or both bases are "N" 218 | :noncan, :int, # cost of non-canonical splicing sites 219 | :junc_pen, :int, 220 | :junc_bonus, :int, 221 | :zdrop, :int, # break alignment if alignment score drops too fast along the diagonal 222 | :zdrop_inv, :int, 223 | :end_bonus, :int, 224 | :min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold 225 | :min_ksw_len, :int, 226 | :anchor_ext_len, :int, 227 | :anchor_ext_shift, :int, 228 | :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio 229 | :rank_min_len, :int, 230 | :rank_frac, :float, 231 | :pe_ori, :int, 232 | :pe_bonus, :int, 233 | :jump_min_match, :int32, 234 | :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below 235 | :q_occ_frac, :float, 236 | :min_mid_occ, :int32, 237 | :max_mid_occ, :int32, 238 | :mid_occ, :int32, # ignore seeds with occurrences above this threshold 239 | :max_occ, :int32, 240 | :max_max_occ, :int32, 241 | :occ_dist, :int32, 242 | :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel 243 | :max_sw_mat, :int64_t, 244 | :cap_kalloc, :int64_t, 245 | :split_prefix, :string 246 | end 247 | 248 | # index reader 249 | class IdxReader < ::FFI::Struct 250 | layout \ 251 | :is_idx, :int, 252 | :n_parts, :int, 253 | :idx_size, :int64_t, 254 | :opt, IdxOpt, 255 | :fp_out, :pointer, # FILE 256 | :seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE 257 | end 258 | 259 | # memory buffer for thread-local storage during mapping 260 | class TBuf < ::FFI::Struct 261 | layout \ 262 | :km, :pointer, 263 | :rep_len, :int, 264 | :frag_gap, :int 265 | end 266 | end 267 | end 268 | -------------------------------------------------------------------------------- /lib/minimap2/ffi/functions.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Minimap2 4 | module FFI 5 | attach_function \ 6 | :main, 7 | %i[int pointer], 8 | :int 9 | 10 | # int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo); 11 | attach_function \ 12 | :mm_set_opt_raw, :mm_set_opt, 13 | [:pointer, IdxOpt.by_ref, MapOpt.by_ref], 14 | :int 15 | 16 | private_class_method :mm_set_opt_raw 17 | 18 | def self.mm_set_opt(preset, io, mo) 19 | ptr = case preset 20 | when 0, nil 21 | ::FFI::Pointer.new(:int, 0) 22 | else 23 | ::FFI::MemoryPointer.from_string(preset.to_s) 24 | end 25 | mm_set_opt_raw(ptr, io, mo) 26 | end 27 | 28 | # int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo); 29 | attach_function \ 30 | :mm_check_opt, 31 | [IdxOpt.by_ref, MapOpt.by_ref], 32 | :int 33 | 34 | # void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi); 35 | attach_function \ 36 | :mm_mapopt_update, 37 | [MapOpt.by_ref, Idx.by_ref], 38 | :void 39 | 40 | # void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len); 41 | attach_function \ 42 | :mm_mapopt_max_intron_len, 43 | [MapOpt.by_ref, :int], 44 | :void 45 | 46 | # mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out); 47 | attach_function \ 48 | :mm_idx_reader_open, 49 | [:string, IdxOpt.by_ref, :string], 50 | IdxReader.by_ref 51 | 52 | # mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads); 53 | attach_function \ 54 | :mm_idx_reader_read, 55 | [IdxReader.by_ref, :int], 56 | Idx.by_ref 57 | 58 | # void mm_idx_reader_close(mm_idx_reader_t *r); 59 | attach_function \ 60 | :mm_idx_reader_close, 61 | [IdxReader.by_ref], 62 | :void 63 | 64 | # int mm_idx_reader_eof(const mm_idx_reader_t *r); 65 | attach_function \ 66 | :mm_idx_reader_eof, 67 | [IdxReader.by_ref], 68 | :int 69 | 70 | # int64_t mm_idx_is_idx(const char *fn); 71 | attach_function \ 72 | :mm_idx_is_idx, 73 | [:string], 74 | :int64_t 75 | 76 | # mm_idx_t *mm_idx_load(FILE *fp); 77 | attach_function \ 78 | :mm_idx_load, 79 | [:pointer], # FILE pointer 80 | Idx.by_ref 81 | 82 | # void mm_idx_dump(FILE *fp, const mm_idx_t *mi); 83 | attach_function \ 84 | :mm_idx_dump, 85 | [:pointer, Idx.by_ref], # FILE pointer 86 | :void 87 | 88 | # mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name); 89 | attach_function \ 90 | :mm_idx_str, 91 | %i[int int int int int pointer pointer], 92 | Idx.by_ref 93 | 94 | # void mm_idx_stat(const mm_idx_t *idx); 95 | attach_function \ 96 | :mm_idx_stat, 97 | [Idx.by_ref], 98 | :void 99 | 100 | # void mm_idx_destroy(mm_idx_t *mi); 101 | attach_function \ 102 | :mm_idx_destroy, 103 | [Idx.by_ref], 104 | :void 105 | 106 | # mm_tbuf_t *mm_tbuf_init(void); 107 | attach_function \ 108 | :mm_tbuf_init, 109 | [], 110 | TBuf.by_ref 111 | 112 | # void mm_tbuf_destroy(mm_tbuf_t *b); 113 | attach_function \ 114 | :mm_tbuf_destroy, 115 | [TBuf.by_ref], 116 | :void 117 | 118 | # void *mm_tbuf_get_km(mm_tbuf_t *b); 119 | attach_function \ 120 | :mm_tbuf_get_km, 121 | [TBuf.by_ref], 122 | :pointer 123 | 124 | # mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name); 125 | attach_function \ 126 | :mm_map, 127 | [Idx.by_ref, :int, :string, :pointer, TBuf.by_ref, MapOpt.by_ref, :string], 128 | Reg1.by_ref 129 | 130 | # void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname); 131 | attach_function \ 132 | :mm_map_frag, 133 | [Idx.by_ref, :int, :pointer, :pointer, :pointer, TBuf.by_ref, MapOpt.by_ref, :string], 134 | :void 135 | 136 | # int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads); 137 | attach_function \ 138 | :mm_map_file, 139 | [Idx.by_ref, :string, MapOpt.by_ref, :int], 140 | :int 141 | 142 | # int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads); 143 | attach_function \ 144 | :mm_map_file_frag, 145 | [Idx.by_ref, :int, :pointer, MapOpt.by_ref, :int], 146 | :int 147 | 148 | # int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden); 149 | attach_function \ 150 | :mm_gen_cs, 151 | [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int], 152 | :int 153 | 154 | # int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq); 155 | attach_function \ 156 | :mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names. 157 | [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string], 158 | :int 159 | 160 | # int mm_idx_index_name(mm_idx_t *mi); 161 | attach_function \ 162 | :mm_idx_index_name, 163 | [Idx.by_ref], 164 | :int 165 | 166 | # int mm_idx_name2id(const mm_idx_t *mi, const char *name); 167 | attach_function \ 168 | :mm_idx_name2id, 169 | [Idx.by_ref, :string], 170 | :int 171 | 172 | # int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq); 173 | attach_function \ 174 | :mm_idx_getseq, 175 | [Idx.by_ref, :uint32, :uint32, :uint32, :pointer], 176 | :int 177 | 178 | # int mm_idx_alt_read(mm_idx_t *mi, const char *fn); 179 | attach_function \ 180 | :mm_idx_alt_read, 181 | [Idx.by_ref, :string], 182 | :int 183 | 184 | # int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc); 185 | attach_function \ 186 | :mm_idx_bed_read, 187 | [Idx.by_ref, :string, :int], 188 | :int 189 | 190 | # int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s); 191 | attach_function \ 192 | :mm_idx_bed_junc, 193 | [Idx.by_ref, :int32, :int32, :int32, :pointer], 194 | :int 195 | 196 | # int mm_max_spsc_bonus(const mm_mapopt_t *mo); 197 | attach_function \ 198 | :mm_max_spsc_bonus, 199 | [MapOpt.by_ref], 200 | :int 201 | 202 | # int32_t mm_idx_spsc_read(mm_idx_t *idx, const char *fn, int32_t max_sc); 203 | attach_function \ 204 | :mm_idx_spsc_read, 205 | [Idx.by_ref, :string, :int32], 206 | :int32 207 | 208 | # int64_t mm_idx_spsc_get(const mm_idx_t *db, int32_t cid, int64_t st0, int64_t en0, int32_t rev, uint8_t *sc); 209 | attach_function \ 210 | :mm_idx_spsc_get, 211 | [Idx.by_ref, :int32, :int64, :int64, :int32, :pointer], 212 | :int64 213 | 214 | # void mm_mapopt_init(mm_mapopt_t *opt); 215 | attach_function \ 216 | :mm_mapopt_init, 217 | [MapOpt.by_ref], 218 | :void 219 | 220 | # mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads); 221 | attach_function \ 222 | :mm_idx_build, 223 | %i[string int int int int], 224 | Idx.by_ref 225 | 226 | # mmpriv.h 227 | 228 | attach_function \ 229 | :mm_idxopt_init, 230 | [IdxOpt.by_ref], 231 | :void 232 | end 233 | end 234 | -------------------------------------------------------------------------------- /lib/minimap2/ffi/mappy.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # https://github.com/lh3/minimap2/blob/master/python/cmappy.h 4 | 5 | module Minimap2 6 | module FFI 7 | class Hit < ::FFI::Struct 8 | layout \ 9 | :ctg, :string, 10 | :ctg_start, :int32, 11 | :ctg_end, :int32, 12 | :qry_start, :int32, 13 | :qry_end, :int32, 14 | :blen, :int32, 15 | :mlen, :int32, 16 | :NM, :int32, 17 | :ctg_len, :int32, 18 | :mapq, :uint8_t, 19 | :is_primary, :uint8_t, 20 | :strand, :int8_t, 21 | :trans_strand, :int8_t, 22 | :seg_id, :int32, 23 | :n_cigar32, :int32, 24 | :cigar32, :pointer 25 | end 26 | 27 | class KString < ::FFI::Struct 28 | layout \ 29 | :l, :size_t, 30 | :m, :size_t, 31 | :s, :string 32 | end 33 | 34 | class KSeq < ::FFI::Struct 35 | layout \ 36 | :name, KString, 37 | :comment, KString, 38 | :seq, KString, 39 | :qual, KString, 40 | :last_char, :int, 41 | :f, :pointer # KStream 42 | end 43 | 44 | attach_function \ 45 | :mm_reg2hitpy, 46 | [Idx.by_ref, Reg1.by_ref, Hit.by_ref], 47 | :void 48 | 49 | attach_function \ 50 | :mm_free_reg1, 51 | [Reg1.by_ref], 52 | :void 53 | 54 | attach_function \ 55 | :mm_fastx_open, 56 | [:string], 57 | KSeq.by_ref 58 | 59 | attach_function \ 60 | :mm_fastx_close, 61 | [KSeq.by_ref], 62 | :void 63 | 64 | attach_function \ 65 | :mm_verbose_level, 66 | [:int], 67 | :int 68 | 69 | attach_function \ 70 | :mm_reset_timer, 71 | [:void], 72 | :void 73 | 74 | attach_function \ 75 | :mm_map_aux, 76 | [Idx.by_ref, :string, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref], 77 | :pointer # Reg1 78 | 79 | attach_function \ 80 | :mappy_revcomp, 81 | %i[int pointer], 82 | :string 83 | 84 | attach_function \ 85 | :mappy_fetch_seq, 86 | [Idx.by_ref, :string, :int, :int, :pointer], 87 | :pointer # Use pointer instead of string to read with a specified length 88 | 89 | attach_function \ 90 | :mappy_idx_seq, 91 | %i[int int int int string int], 92 | Idx.by_ref 93 | 94 | attach_function \ 95 | :kseq_read, 96 | [KSeq.by_ref], 97 | :int 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /lib/minimap2/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Minimap2 4 | VERSION = "0.2.29.0" 5 | end 6 | -------------------------------------------------------------------------------- /minimap2.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "lib/minimap2/version" 4 | 5 | Gem::Specification.new do |spec| 6 | spec.name = "minimap2" 7 | spec.version = Minimap2::VERSION 8 | spec.authors = ["kojix2"] 9 | spec.email = ["2xijok@gmail.com"] 10 | 11 | spec.summary = "minimap2" 12 | spec.description = "Ruby bindings to the Minimap2 aligner." 13 | spec.homepage = "https://github.com/kojix2/ruby-minimap2" 14 | spec.license = "MIT" 15 | spec.required_ruby_version = ">= 2.5" 16 | 17 | # If you include the lib/simde code, the Gem size will be 1MB. 18 | # Build with lib/simde is currently not supported, so simde code is not included in the Gem. 19 | spec.files = (Dir["*.{md,txt}", "{lib,ext}/**/*", "vendor/libminimap2.{so,dylib,dll}"] - 20 | Dir["ext/minimap2/lib/**/*"]) 21 | spec.require_paths = ["lib"] 22 | 23 | spec.extensions = %w[ext/Rakefile] 24 | 25 | spec.add_dependency "ffi" 26 | spec.add_dependency "ffi-bitfield" 27 | end 28 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /test/minimap2/aligner_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "../test_helper" 4 | class AlignerTest < Minitest::Test 5 | def fa_path 6 | File.expand_path("../../ext/minimap2/test/MT-human.fa", __dir__) 7 | end 8 | 9 | def setup 10 | @a = MM2::Aligner.new(fa_path) 11 | end 12 | 13 | def test_initialize 14 | assert_instance_of MM2::Aligner, @a 15 | end 16 | 17 | def test_initialize_preset_short 18 | assert_instance_of MM2::Aligner, MM2::Aligner.new(fa_path, preset: "short") 19 | assert_instance_of MM2::Aligner, MM2::Aligner.new(fa_path, preset: :short) 20 | end 21 | 22 | def test_initialize_preset_unknown 23 | assert_raises(ArgumentError) { MM2::Aligner.new(fa_path, preset: "sort") } 24 | end 25 | 26 | def test_initialize_with_seq 27 | assert_instance_of MM2::Aligner, MM2::Aligner.new(seq: "CACAGGTCGAAGGAGTAATTACCCAACAATGGGTCTCTAG") 28 | end 29 | 30 | def test_idx_opt 31 | assert_instance_of MM2::FFI::IdxOpt, @a.idx_opt 32 | end 33 | 34 | def test_map_opt 35 | assert_instance_of MM2::FFI::MapOpt, @a.map_opt 36 | end 37 | 38 | def test_index 39 | assert_instance_of MM2::FFI::Idx, @a.index 40 | end 41 | 42 | def test_align 43 | qseq = @a.seq("MT_human", 100, 200) 44 | @a.align(qseq) do |h| 45 | assert_instance_of MM2::Alignment, h 46 | end 47 | end 48 | 49 | def test_align2 50 | qseq = MM2.revcomp(@a.seq("MT_human", 300, 400)) 51 | @a.align(qseq) do |h| 52 | assert_instance_of MM2::Alignment, h 53 | end 54 | end 55 | 56 | def test_align_seq 57 | qseq = @a.seq("MT_human", 100, 200) 58 | ref = @a.seq("MT_human", 0, 3000) 59 | a = MM2::Aligner.new(seq: ref) 60 | a.align(qseq) do |h| 61 | assert_instance_of MM2::Alignment, h 62 | end 63 | end 64 | 65 | def test_align2_seq 66 | qseq1 = @a.seq("MT_human", 100, 200) 67 | qseq2 = MM2.revcomp(@a.seq("MT_human", 300, 400)) 68 | ref = @a.seq("MT_human", 0, 3000) 69 | a = MM2::Aligner.new(seq: ref) 70 | a.align(qseq1, qseq2) do |h| 71 | assert_instance_of MM2::Alignment, h 72 | end 73 | end 74 | 75 | def test_seq 76 | assert_nil @a.seq("MT_human", 0, 0) 77 | assert_equal "G", @a.seq("MT_human", 0, 1) 78 | assert_equal "GA", @a.seq("MT_human", 0, 2) 79 | assert_equal "CACAG", @a.seq("MT_human", 3, 8) 80 | assert_equal "ATCACGATG", @a.seq("MT_human", 16_560) 81 | end 82 | 83 | def test_k 84 | assert_equal 15, @a.k 85 | end 86 | 87 | def test_w 88 | assert_equal 10, @a.w 89 | end 90 | 91 | def test_n_seq 92 | assert_equal 1, @a.n_seq 93 | end 94 | 95 | def test_seq_names 96 | path = File.expand_path("../../ext/minimap2/test/q-inv.fa", __dir__) 97 | @a = MM2::Aligner.new(path) 98 | assert_equal %w[read1 read2], @a.seq_names 99 | end 100 | end 101 | -------------------------------------------------------------------------------- /test/minimap2/alignment_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "../test_helper" 4 | class AlignmentTest < Minitest::Test 5 | def setup 6 | path = File.expand_path("../../ext/minimap2/test/MT-human.fa", __dir__) 7 | aligner = MM2::Aligner.new(path) 8 | seq = aligner.seq("MT_human", 100, 300) 9 | @a = aligner.align(seq, cs: true, md: true).first 10 | end 11 | 12 | def test_keys 13 | assert_instance_of Array, MM2::Alignment.keys 14 | end 15 | 16 | def test_initialize 17 | assert_instance_of MM2::Alignment, @a 18 | end 19 | 20 | def test_ctg 21 | assert_equal "MT_human", @a.ctg 22 | end 23 | 24 | def test_ctg_len 25 | assert_equal 16_569, @a.ctg_len 26 | end 27 | 28 | def test_r_st 29 | assert_equal 100, @a.r_st 30 | end 31 | 32 | def test_r_en 33 | assert_equal 300, @a.r_en 34 | end 35 | 36 | def test_strand 37 | assert_equal 1, @a.strand 38 | end 39 | 40 | def test_trans_strand 41 | assert_equal 0, @a.trans_strand 42 | end 43 | 44 | def test_blen 45 | assert_equal 200, @a.blen 46 | end 47 | 48 | def test_mlen 49 | assert_equal 200, @a.mlen 50 | end 51 | 52 | def test_nm 53 | assert_equal 0, @a.nm 54 | end 55 | 56 | def test_primary 57 | assert_equal 1, @a.primary 58 | end 59 | 60 | def test_q_st 61 | assert_equal 0, @a.q_st 62 | end 63 | 64 | def test_q_en 65 | assert_equal 200, @a.q_en 66 | end 67 | 68 | def test_mapq 69 | assert_equal 60, @a.mapq 70 | end 71 | 72 | def test_cigar 73 | assert_equal [[200, 0]], @a.cigar 74 | end 75 | 76 | def test_read_num 77 | assert_equal 1, @a.read_num 78 | end 79 | 80 | def test_cs 81 | assert_equal ":200", @a.cs 82 | end 83 | 84 | def test_md 85 | assert_equal "200", @a.md 86 | end 87 | 88 | def test_cigar_str 89 | assert_equal "200M", @a.cigar_str 90 | end 91 | 92 | def test_primary? 93 | assert_equal true, @a.primary? 94 | end 95 | 96 | def test_to_h 97 | hit = { 98 | ctg: "MT_human", 99 | ctg_len: 16_569, 100 | r_st: 100, 101 | r_en: 300, 102 | strand: 1, 103 | trans_strand: 0, 104 | blen: 200, 105 | mlen: 200, 106 | nm: 0, 107 | primary: 1, 108 | q_st: 0, 109 | q_en: 200, 110 | mapq: 60, 111 | cigar: [[200, 0]], 112 | read_num: 1, 113 | cs: ":200", 114 | md: "200", 115 | cigar_str: "200M" 116 | } 117 | assert_equal hit, @a.to_h 118 | end 119 | 120 | def test_to_s 121 | assert_equal "0\t200\t+\tMT_human\t16569\t100\t300\t200\t200\t60\ttp:A:P\tts:A:.\tcg:Z:200M\tcs:Z::200\tMD:Z:200", 122 | @a.to_s 123 | end 124 | end 125 | -------------------------------------------------------------------------------- /test/minimap2/ffi_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "../test_helper" 4 | 5 | class FFITest < Minitest::Test 6 | def test_mm128 7 | obj = MM2::FFI::MM128.new 8 | assert_instance_of MM2::FFI::MM128, obj 9 | assert_equal 0, obj[:x] 10 | assert_equal 0, obj[:y] 11 | end 12 | 13 | def test_mm128v 14 | obj = MM2::FFI::MM128V.new 15 | assert_instance_of MM2::FFI::MM128V, obj 16 | assert_equal 0, obj[:n] 17 | assert_equal 0, obj[:m] 18 | assert_instance_of MM2::FFI::MM128, obj[:a] 19 | end 20 | 21 | def test_idxopt 22 | io = MM2::FFI::IdxOpt.new 23 | assert_instance_of MM2::FFI::IdxOpt, io 24 | assert_equal 0, io[:k] 25 | MM2::FFI.mm_idxopt_init(io) 26 | assert_equal 15, io[:k] 27 | assert_equal 10, io[:w] 28 | assert_equal 0, io[:flag] 29 | assert_equal 14, io[:bucket_bits] 30 | assert_equal 50_000_000, io[:mini_batch_size] 31 | assert_equal 8_000_000_000, io[:batch_size] 32 | end 33 | 34 | def test_mapopt 35 | mo = MM2::FFI::MapOpt.new 36 | assert_instance_of MM2::FFI::MapOpt, mo 37 | assert_equal 0, mo[:seed] 38 | MM2::FFI.mm_mapopt_init(mo) 39 | assert_equal 11, mo[:seed] 40 | assert_equal 0, mo[:flag] 41 | assert_equal 0, mo[:sdust_thres] 42 | assert_equal 0, mo[:max_qlen] 43 | assert_equal 500, mo[:bw] 44 | assert_equal 20_000, mo[:bw_long] 45 | assert_equal 5000, mo[:max_gap] 46 | assert_equal(-1, mo[:max_gap_ref]) 47 | assert_equal 0, mo[:max_frag_len] 48 | assert_equal 25, mo[:max_chain_skip] 49 | assert_equal 5000, mo[:max_chain_iter] 50 | assert_equal 3, mo[:min_cnt] 51 | assert_equal 40, mo[:min_chain_score] 52 | assert_in_epsilon 0.8, mo[:chain_gap_scale] 53 | assert_in_epsilon 0, mo[:chain_skip_scale] 54 | assert_equal 100_000, mo[:rmq_size_cap] 55 | assert_equal 1000, mo[:rmq_inner_dist] 56 | assert_equal 1000, mo[:rmq_rescue_size] 57 | assert_in_epsilon 0.1, mo[:rmq_rescue_ratio] 58 | assert_in_epsilon 0.5, mo[:mask_level] 59 | # assert_equal INT_MAX, mo[:mask_len] 60 | assert_in_epsilon 0.8, mo[:pri_ratio] 61 | assert_equal 5, mo[:best_n] 62 | assert_in_epsilon 0.15, mo[:alt_drop] 63 | assert_equal 2, mo[:a] 64 | assert_equal 4, mo[:b] 65 | assert_equal 4, mo[:q] 66 | assert_equal 2, mo[:e] 67 | assert_equal 24, mo[:q2] 68 | assert_equal 1, mo[:e2] 69 | assert_equal 1, mo[:sc_ambi] 70 | assert_equal 0, mo[:noncan] 71 | assert_equal 0, mo[:junc_pen] 72 | assert_equal 0, mo[:junc_bonus] 73 | assert_equal 400, mo[:zdrop] 74 | assert_equal 200, mo[:zdrop_inv] 75 | assert_equal(-1, mo[:end_bonus]) 76 | assert_equal (mo[:min_chain_score] * mo[:a]), mo[:min_dp_max] 77 | assert_equal 200, mo[:min_ksw_len] 78 | assert_equal 20, mo[:anchor_ext_len] 79 | assert_equal 6, mo[:anchor_ext_shift] 80 | assert_in_epsilon 1.0, mo[:max_clip_ratio] 81 | assert_equal 500, mo[:rank_min_len] 82 | assert_in_epsilon 0.9, mo[:rank_frac] 83 | assert_equal 0, mo[:pe_ori] 84 | assert_equal 33, mo[:pe_bonus] 85 | assert_equal 3, mo[:jump_min_match] 86 | assert_in_epsilon 0.0002, mo[:mid_occ_frac] 87 | assert_in_epsilon 0.01, mo[:q_occ_frac] 88 | assert_equal 10, mo[:min_mid_occ] 89 | assert_equal 1_000_000, mo[:max_mid_occ] 90 | assert_equal 0, mo[:mid_occ] 91 | assert_equal 0, mo[:max_occ] 92 | assert_equal 4095, mo[:max_max_occ] 93 | assert_equal 500, mo[:occ_dist] 94 | assert_equal 500_000_000, mo[:mini_batch_size] 95 | assert_equal 100_000_000, mo[:max_sw_mat] 96 | assert_equal 500_000_000, mo[:cap_kalloc] 97 | assert_nil mo[:split_prefix] 98 | end 99 | 100 | def test_idxseq 101 | obj = MM2::FFI::IdxSeq.new 102 | assert_instance_of MM2::FFI::IdxSeq, obj 103 | assert_nil obj[:name] 104 | assert_equal 0, obj[:offset] 105 | assert_equal 0, obj[:len] 106 | assert_equal 0, obj[:is_alt] 107 | end 108 | 109 | def test_idx 110 | obj = MM2::FFI::Idx.new 111 | assert_instance_of MM2::FFI::Idx, obj 112 | assert_equal 0, obj[:b] 113 | assert_equal 0, obj[:w] 114 | assert_equal 0, obj[:flag] 115 | assert_equal 0, obj[:n_seq] 116 | assert_equal 0, obj[:index] 117 | assert_equal 0, obj[:n_alt] 118 | assert_equal true, obj[:seq].null? 119 | assert_equal true, obj[:S].null? 120 | assert_equal true, obj[:B].null? 121 | assert_equal true, obj[:I].null? 122 | assert_equal true, obj[:spsc].null? 123 | assert_equal true, obj[:J].null? 124 | assert_equal true, obj[:km].null? 125 | assert_equal true, obj[:h].null? 126 | end 127 | 128 | def test_Reader 129 | obj = MM2::FFI::IdxReader.new 130 | assert_instance_of MM2::FFI::IdxReader, obj 131 | assert_equal 0, obj[:is_idx] 132 | assert_equal 0, obj[:n_parts] 133 | assert_equal 0, obj[:idx_size] 134 | assert_instance_of MM2::FFI::IdxOpt, obj[:opt] 135 | assert_equal true, obj[:fp_out].null? 136 | assert_equal true, obj[:seq_or_idx].null? 137 | end 138 | 139 | def test_extra 140 | cigar = [4, 5, 6] 141 | obj = MM2::FFI::Extra.new(::FFI::MemoryPointer.new(MM2::FFI::Extra.size + ::FFI.type_size(:uint32) * cigar.size)) 142 | assert_instance_of MM2::FFI::Extra, obj 143 | assert_equal 0, obj[:capacity] 144 | assert_equal 0, obj[:dp_score] 145 | assert_equal 0, obj[:dp_max] 146 | assert_equal 0, obj[:dp_max2] 147 | # assert_equal 0, obj[:n_ambi_trans_strand] 148 | assert_equal 0, obj[:n_ambi] 149 | assert_equal 0, obj[:trans_strand] 150 | cigar = [4, 5, 6] 151 | obj[:n_cigar] = cigar.size 152 | obj.pointer.put_array_of_uint32(obj.size, cigar) 153 | assert_equal cigar, obj.cigar 154 | end 155 | 156 | def test_reg1 157 | obj = MM2::FFI::Reg1.new 158 | assert_instance_of MM2::FFI::Reg1, obj 159 | assert_equal 0, obj[:id] 160 | assert_equal 0, obj[:cnt] 161 | assert_equal 0, obj[:rid] 162 | assert_equal 0, obj[:score] 163 | assert_equal 0, obj[:qs] 164 | assert_equal 0, obj[:qe] 165 | assert_equal 0, obj[:rs] 166 | assert_equal 0, obj[:re] 167 | assert_equal 0, obj[:parent] 168 | assert_equal 0, obj[:subsc] 169 | assert_equal 0, obj[:as] 170 | assert_equal 0, obj[:mlen] 171 | assert_equal 0, obj[:blen] 172 | assert_equal 0, obj[:n_sub] 173 | assert_equal 0, obj[:score0] 174 | # assert_equal 0, obj[:fields] 175 | assert_equal 0, obj[:hash] 176 | assert_equal 0, obj[:div] 177 | assert_equal true, obj[:p].null? 178 | 179 | assert_equal 0, obj[:mapq] 180 | assert_equal 0, obj[:split] 181 | assert_equal 0, obj[:rev] 182 | assert_equal 0, obj[:inv] 183 | assert_equal 0, obj[:sam_pri] 184 | assert_equal 0, obj[:proper_frag] 185 | assert_equal 0, obj[:pe_thru] 186 | assert_equal 0, obj[:seg_split] 187 | assert_equal 0, obj[:seg_id] 188 | assert_equal 0, obj[:split_inv] 189 | assert_equal 0, obj[:is_alt] 190 | assert_equal 0, obj[:strand_retained] 191 | assert_equal 0, obj[:is_spliced] 192 | assert_equal 0, obj[:dummy] 193 | end 194 | 195 | def test_tbuf 196 | obj = MM2::FFI::TBuf.new 197 | assert_instance_of MM2::FFI::TBuf, obj 198 | assert_equal true, obj[:km].null? 199 | assert_equal 0, obj[:rep_len] 200 | assert_equal 0, obj[:frag_gap] 201 | end 202 | 203 | def test_mm_set_opt_0 204 | iopt = MM2::FFI::IdxOpt.new 205 | mopt = MM2::FFI::MapOpt.new 206 | MM2::FFI.mm_set_opt(nil, iopt, mopt) 207 | assert_equal [15, 10, 0, 14, 50_000_000, 8_000_000_000], iopt.values 208 | end 209 | 210 | def test_mm_set_opt_short 211 | iopt = MM2::FFI::IdxOpt.new 212 | mopt = MM2::FFI::MapOpt.new 213 | MM2::FFI.mm_set_opt("short", iopt, mopt) 214 | assert_equal [21, 11, 0, 0, 0, 0], iopt.values 215 | assert MM2::FFI.mm_set_opt(":asm10", iopt, mopt) 216 | end 217 | end 218 | -------------------------------------------------------------------------------- /test/minimap2/version_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "../test_helper" 4 | class VersionTest < Minitest::Test 5 | def test_version 6 | refute_nil ::Minimap2::VERSION 7 | end 8 | end 9 | -------------------------------------------------------------------------------- /test/minimap2_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "test_helper" 4 | 5 | MM2 = Minimap2 6 | 7 | class Minimap2Test < Minitest::Test 8 | def test_that_it_has_a_version_number 9 | refute_nil ::Minimap2::VERSION 10 | end 11 | 12 | # unique features of ruby bindings 13 | 14 | def test_execute_with_string_arg 15 | assert_equal 0, MM2.execute("--version") 16 | assert_equal 1, MM2.execute("--lh 3") 17 | # After executing the "--version" command, the verbosity is changed to 3. 18 | # To prevent test_get_verbose from failing, set it back to 1. 19 | MM2.verbose = 1 20 | end 21 | 22 | def test_if_minimap2_version_numbers_match 23 | begin 24 | out, err = capture_subprocess_io do 25 | pid = fork do 26 | MM2.execute("--version") 27 | end 28 | Process.waitpid(pid) 29 | end 30 | rescue NotImplementedError 31 | # Windows does not support fork. 32 | skip "Fork not supported on this platform" 33 | end 34 | assert_match(/^[\d.\-r]+\n/, out) 35 | # The version number of the gem should match the version number of the 36 | # Minimap2 shared library. Prevent version mismatch before release. 37 | assert_includes Minimap2::VERSION, out.split("-r")[0] 38 | assert_equal "", err 39 | end 40 | 41 | # mappy 42 | 43 | def test_fastx_read 44 | n1, s1, n2, s2 = File.readlines("ext/minimap2/test/q-inv.fa").map(&:chomp) 45 | names = [n1, n2].map { |n| n.sub(">", "") } 46 | seqs = [s1, s2] 47 | MM2.fastx_read("ext/minimap2/test/q-inv.fa") do |n, s| 48 | assert_equal names.shift, n 49 | assert_equal seqs.shift, s 50 | end 51 | # comment should be nil if there is no comment. 52 | MM2.fastx_read("ext/minimap2/test/q-inv.fa", comment: true) do |_n, _s, c| 53 | assert_nil c 54 | end 55 | end 56 | 57 | def test_fastx_read_comment 58 | require "tempfile" 59 | require "zlib" 60 | Tempfile.create("comment.fq.gz") do |fq| 61 | Zlib::GzipWriter.open(fq.path) do |gz| 62 | gz.write <<~FASTQ 63 | >chat katze 64 | CATCATCATCAT 65 | + 66 | GATOGATOGATO 67 | FASTQ 68 | end 69 | MM2.fastx_read(fq.path, comment: true) do |n, s, q, c| 70 | assert_equal "chat", n 71 | assert_equal "CATCATCATCAT", s 72 | assert_equal "GATOGATOGATO", q 73 | assert_equal "katze", c 74 | end 75 | end 76 | end 77 | 78 | def test_fastx_read_comment_enumerator 79 | require "tempfile" 80 | require "zlib" 81 | Tempfile.create("comment.fq.gz") do |fq| 82 | Zlib::GzipWriter.open(fq.path) do |gz| 83 | gz.write <<~FASTQ 84 | >chat katze 85 | CATCATCATCAT 86 | + 87 | GATOGATOGATO 88 | FASTQ 89 | end 90 | enum = MM2.fastx_read(fq.path, comment: true) 91 | arr = enum.to_a 92 | n, s, q, c = arr[0] 93 | assert_equal 1, arr.size 94 | assert_equal "chat", n 95 | assert_equal "CATCATCATCAT", s 96 | assert_equal "GATOGATOGATO", q 97 | assert_equal "katze", c 98 | end 99 | end 100 | 101 | def test_revcomp 102 | assert_equal "TCCCAAAGGGTTT", MM2.revcomp("AAACCCTTTGGGA") 103 | end 104 | 105 | def test_get_verbose 106 | assert_equal 1, MM2.verbose 107 | end 108 | 109 | def test_set_verbose 110 | assert_equal 3, MM2.verbose = 3 111 | assert_equal 3, MM2.verbose 112 | assert_equal 1, MM2.verbose = 1 113 | end 114 | end 115 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | $LOAD_PATH.unshift File.expand_path("../lib", __dir__) 4 | require "minimap2" 5 | 6 | require "minitest/autorun" 7 | require "minitest/pride" 8 | --------------------------------------------------------------------------------