├── .github
├── FUNDING.yml
└── workflows
│ ├── ci.yml
│ └── doc.yml
├── .gitignore
├── .gitmodules
├── .gitpod.yml
├── .rubocop.yml
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── examples
├── quick_start.rb
├── quick_start_bam.rb
└── sr.rb
├── ext
├── Rakefile
├── cmappy
│ ├── cmappy.c
│ └── cmappy.h
└── minimap2.patch
├── lib
├── minimap2.rb
└── minimap2
│ ├── aligner.rb
│ ├── alignment.rb
│ ├── ffi.rb
│ ├── ffi
│ ├── constants.rb
│ ├── functions.rb
│ └── mappy.rb
│ └── version.rb
├── minimap2.gemspec
├── renovate.json
└── test
├── minimap2
├── aligner_test.rb
├── alignment_test.rb
├── ffi_test.rb
└── version_test.rb
├── minimap2_test.rb
└── test_helper.rb
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | ko_fi: kojix2
2 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: test
2 | on: [push, pull_request]
3 | jobs:
4 | build:
5 | name: ${{ matrix.os }} Ruby ${{ matrix.ruby }}
6 | runs-on: ${{ matrix.os }}-latest
7 | strategy:
8 | fail-fast: false
9 | matrix:
10 | os: ["ubuntu", "macos", "windows"]
11 | ruby: ["3.2", "3.3", "3.4"]
12 | steps:
13 | - uses: actions/checkout@v4
14 | with:
15 | submodules: true
16 | - uses: ruby/setup-ruby@v1
17 | with:
18 | ruby-version: ${{ matrix.ruby }}
19 | bundler-cache: true
20 | - run: bundle exec rake minimap2:build
21 | - run: bundle exec rake test
22 |
--------------------------------------------------------------------------------
/.github/workflows/doc.yml:
--------------------------------------------------------------------------------
1 | name: doc
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - uses: actions/checkout@v4
14 | - uses: ruby/setup-ruby@v1
15 | with:
16 | ruby-version: ruby
17 | - name: Generate document
18 | run: gem install -N yard && yard doc
19 | - name: Publish Documentation on GitHub Pages
20 | uses: peaceiris/actions-gh-pages@v4
21 | with:
22 | github_token: ${{ secrets.GITHUB_TOKEN }}
23 | publish_dir: ./doc
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.bundle/
2 | /.yardoc
3 | /_yardoc/
4 | /coverage/
5 | /doc/
6 | /pkg/
7 | /spec/reports/
8 | /tmp/
9 | /vendor/
10 | /.rubocop
11 |
12 | *.lock
13 |
14 | *.so
15 | *.dylib
16 | *.dll
17 |
18 | *.sam
19 | *.bam
20 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "minimap2"]
2 | path = ext/minimap2
3 | url = https://github.com/lh3/minimap2
4 |
--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
1 | # This configuration file was automatically generated by Gitpod.
2 | # Please adjust to your needs (see https://www.gitpod.io/docs/introduction/learn-gitpod/gitpod-yaml)
3 | # and commit this file to your remote git repository to share the goodness with others.
4 |
5 | # Learn more from ready-to-use templates: https://www.gitpod.io/docs/introduction/getting-started/quickstart
6 |
7 | tasks:
8 | - name: Setup, Install & Build
9 | before: bundle install
10 | init: bundle exec rake minimap2:build
11 | command: bundle exec rake test
12 |
--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
1 | StringLiterals:
2 | EnforcedStyle: double_quotes
3 |
4 | Style/NumericPredicate:
5 | EnforcedStyle: comparison
6 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | source "https://rubygems.org"
4 |
5 | gemspec
6 |
7 | group :test do
8 | gem "minitest"
9 | gem "rake"
10 | end
11 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2020 kojix2
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ruby-minimap2
2 |
3 | [](https://rubygems.org/gems/minimap2)
4 | [](https://github.com/kojix2/ruby-minimap2/actions/workflows/ci.yml)
5 | [](https://kojix2.github.io/ruby-minimap2/)
6 | [](LICENSE.txt)
7 | [](https://zenodo.org/badge/latestdoi/325711305)
8 | [](https://tokei.kojix2.net/github/kojix2/ruby-minimap2)
9 |
10 | :dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
11 |
12 | ## Installation
13 |
14 | ```
15 | gem install minimap2
16 | ```
17 |
18 |
19 | Compiling from source
20 |
21 | git clone --recursive https://github.com/kojix2/ruby-minimap2
22 | cd ruby-minimap2
23 | bundle install
24 | bundle exec rake minimap2:build
25 | bundle exec rake install
26 |
27 |
28 |
29 | ## Quick Start
30 |
31 | ```ruby
32 | require "minimap2"
33 |
34 | aligner = Minimap2::Aligner.new("ext/minimap2/test/MT-human.fa")
35 | seq = aligner.seq("MT_human", 100, 200)
36 | hits = aligner.align(seq)
37 | pp hits
38 | ```
39 |
40 | ```
41 | [#]
60 | ```
61 |
62 | ## APIs Overview
63 |
64 | ```markdown
65 | * Minimap2 module
66 | - fastx_read Read fasta/fastq file.
67 | - revcomp Reverse complement sequence.
68 | - execute Calls the main function of Minimap2 with arguments. `Minimap2.execute("--version")`
69 |
70 | * Aligner class
71 | * attributes
72 | - index Returns the value of attribute index.
73 | - idx_opt Returns the value of attribute idx_opt.
74 | - map_opt Returns the value of attribute map_opt.
75 | * methods
76 | - new(path, preset: nil) Create a new aligner. (presets: sr, map-pb, map-out, map-hifi, splice, asm5, etc.)
77 | - align Maps and returns alignments.
78 | - seq Retrieve a subsequence from the index.
79 |
80 | * Alignment class
81 | * attributes
82 | - ctg Returns name of the reference sequence the query is mapped to.
83 | - ctg_len Returns total length of the reference sequence.
84 | - r_st Returns start positions on the reference.
85 | - r_en Returns end positions on the reference.
86 | - strand Returns +1 if on the forward strand; -1 if on the reverse strand.
87 | - trans_strand Returns transcript strand. +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown.
88 | - blen Returns length of the alignment, including both alignment matches and gaps but excluding ambiguous bases.
89 | - mlen Returns length of the matching bases in the alignment, excluding ambiguous base matches.
90 | - nm Returns number of mismatches, gaps and ambiguous positions in the alignment.
91 | - primary Returns if the alignment is primary (typically the best and the first to generate).
92 | - q_st Returns start positions on the query.
93 | - q_en Returns end positions on the query.
94 | - mapq Returns mapping quality.
95 | - cigar Returns CIGAR returned as an array of shape (n_cigar,2). The two numbers give the length and the operator of each CIGAR operation.
96 | - read_num Returns read number that the alignment corresponds to; 1 for the first read and 2 for the second read.
97 | - cs Returns the cs tag.
98 | - md Returns the MD tag as in the SAM format. It is an empty string unless the md argument is applied when calling Aligner#align.
99 | - cigar_str Returns CIGAR string.
100 | * methods
101 | - to_h Convert Alignment to hash.
102 | - to_s Convert to the PAF format without the QueryName and QueryLength columns.
103 |
104 | ## FFI module
105 | * IdxOpt class Indexing options.
106 | * MapOpt class Mapping options.
107 | ```
108 |
109 | - API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2.
110 | - `Aligner#map` has been changed to `align`, because `map` means iterator in Ruby.
111 | - See [documentation](https://kojix2.github.io/ruby-minimap2/) for details.
112 |
113 |
114 | C Structures and Functions
115 |
116 | ### FFI
117 |
118 | - Ruby-Minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi).
119 | - Native C functions can be called from the `Minimap2::FFI` module.
120 | - Native C structure members can be accessed.
121 | - Bitfields are supported by [ffi-bitfield](https://github.com/kojix2/ffi-bitfield) gems.
122 |
123 | ```ruby
124 | aligner.idx_opt.members
125 | # => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
126 | aligner.kds_opt.values
127 | # => [15, 10, 0, 14, 50000000, 9223372036854775807]
128 | aligner.idx_opt[:k]
129 | # => 15
130 | aligner.idx_opt[:k] = 14
131 | aligner.idx_opt[:k]
132 | # => 14
133 | ```
134 |
135 |
136 |
137 | ## Contributing
138 |
139 |
140 | Development
141 |
142 | Fork your repository.
143 | then clone.
144 |
145 | ```sh
146 | git clone --recursive https://github.com/kojix2/ruby-minimap2
147 | # git clone https://github.com/kojix2/ruby-minimap2
148 | # cd ruby-minimap2
149 | # git submodule update -i
150 | ```
151 |
152 | Build Minimap2 and Mappy.
153 |
154 | ```sh
155 | cd ruby-minimap2
156 | bundle install # Install dependent packages including Ruby-FFI
157 | bundle exec rake minimap2:build
158 | ```
159 |
160 | A shared library will be created in the vendor directory.
161 |
162 | ```
163 | └── vendor
164 | └── libminimap2.so
165 | ```
166 |
167 | Run tests.
168 |
169 | ```
170 | bundle exec rake test
171 | ```
172 |
173 | Release a Gem.
174 |
175 | ```
176 | bundle exec rake minimap2:cleanall
177 | bundle exec rake build
178 | ls -l pkg # Check the size of the Gem and make sure it does not contain any unused code such as shared libraries or lib/simde.
179 | bundle exec rake release
180 | ```
181 |
182 |
183 |
184 | ruby-minimap2 is a library under development and there are many points to be improved.
185 |
186 | Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
187 |
188 | Many OSS projects become abandoned because only the founder has commit rights to the original repository.
189 | If you need commit rights to ruby-minimap2 repository or want to get admin rights and take over the project, please feel free to contact me @kojix2.
190 |
191 | ## License
192 |
193 | [MIT License](https://opensource.org/licenses/MIT).
194 |
195 | ## Acknowledgements
196 |
197 | I would like to thank Heng Li for making Minimap2, and all the readers who read the README to the end.
198 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require "bundler/gem_tasks"
4 | require "rake/testtask"
5 |
6 | # Prevent releasing the gem including htslib shared library.
7 |
8 | task :check_shared_library_exist do
9 | unless Dir.glob("vendor/*.{so,dylib,dll}").empty?
10 | magenta = "\e[35m"
11 | clear = "\e[0m"
12 | abort "#{magenta}Shared library exists in the vendor directory.#{clear}"
13 | end
14 | end
15 |
16 | Rake::Task["release:guard_clean"].enhance(["check_shared_library_exist"])
17 |
18 | Rake::TestTask.new(:test) do |t|
19 | t.libs << "test"
20 | t.libs << "lib"
21 | t.test_files = FileList["test/**/*_test.rb"]
22 | end
23 |
24 | task default: :test
25 |
26 | load "ext/Rakefile"
27 |
--------------------------------------------------------------------------------
/examples/quick_start.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require "minimap2"
4 |
5 | # load or build index
6 | aligner = Minimap2::Aligner.new("#{__dir__}/../ext/minimap2/test/MT-human.fa")
7 |
8 | # retrieve a subsequence from the index
9 | seq = aligner.seq("MT_human", 100, 200)
10 |
11 | # mapping
12 | hits = aligner.align(seq)
13 |
14 | # show result
15 | pp hits
16 |
--------------------------------------------------------------------------------
/examples/quick_start_bam.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require "minimap2"
4 | require "htslib"
5 |
6 | # load or build index
7 | aligner = Minimap2::Aligner.new("#{__dir__}/../ext/minimap2/test/MT-human.fa")
8 |
9 | # retrieve a subsequence from the index
10 | seq = aligner.seq("MT_human", 100, 200)
11 |
12 | # mapping
13 | hits = aligner.align(seq)
14 | hit = hits[0]
15 |
16 | # save result to BAM file
17 | HTS::Bam.open("test.bam", "wb") do |bam|
18 | header = HTS::Bam::Header.new do |h|
19 | h << "@SQ\tSN:MT_human\tLN:16569"
20 | h << "@PG\tID:ruby-minimap2\tPN:ruby-minimap2\tVN:#{Minimap2::VERSION}"
21 | end
22 | bam.header = header
23 | record = HTS::Bam::Record.new(
24 | header,
25 | qname: "Read1",
26 | flag: 0,
27 | tid: 0,
28 | pos: hit.r_st,
29 | mapq: hit.mapq,
30 | cigar: hit.cigar_str,
31 | mtid: 0,
32 | mpos: 0,
33 | isize: 0,
34 | seq: seq,
35 | qual: [20] * 100,
36 | l_aux: 0
37 | )
38 | bam << record
39 | end
40 |
--------------------------------------------------------------------------------
/examples/sr.rb:
--------------------------------------------------------------------------------
1 | require "minimap2"
2 |
3 | # Usage
4 |
5 | if ARGV.size < 3
6 | puts "Usage: ruby sr.rb "
7 | exit 1
8 | end
9 |
10 | # Prepare aligner
11 |
12 | REFERENCE = ARGV[0] # reference.fa
13 | FASTQ1 = ARGV[1] # a_1.fa
14 | FASTQ2 = ARGV[2] # a_2.fa
15 |
16 | aligner = Minimap2::Aligner.new(
17 | REFERENCE,
18 | preset: "sr" # Paired short reads
19 | )
20 |
21 | # Read Fastq file
22 |
23 | a1 = Minimap2.fastx_read(FASTQ1) # Enumerator
24 | a2 = Minimap2.fastx_read(FASTQ2) # Enumerator
25 |
26 | # Output
27 |
28 | loop do
29 | r1 = a1.first
30 | r2 = a2.first
31 | break if r1.nil? or r2.nil?
32 |
33 | s1 = r1[1]
34 | s2 = r2[1]
35 |
36 | aligner.align(s1, s2).each do |aln|
37 | puts aln
38 | end
39 | end
40 |
--------------------------------------------------------------------------------
/ext/Rakefile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require "rake"
4 | require "fileutils"
5 | require "ffi"
6 |
7 | minimap2_dir = File.expand_path("minimap2", __dir__)
8 | target_dir = "../../vendor"
9 | target_fname = FFI.map_library_name("minimap2")
10 | target_path = File.join(target_dir, target_fname)
11 |
12 | task default: ["minimap2:build", "minimap2:clean"]
13 |
14 | namespace :minimap2 do
15 | desc "Compile Minimap2"
16 | task :build do
17 | Dir.chdir(minimap2_dir) do
18 | # Add -fPIC option to Makefile
19 | sh "git apply ../minimap2.patch"
20 | sh "cp ../cmappy/cmappy.h ../cmappy/cmappy.c ."
21 | case RbConfig::CONFIG["host_cpu"]
22 | when /arm64/
23 | sh "make arm_neon=1 aarch64=1"
24 | when /arm/
25 | sh "make arm_neon=1"
26 | else
27 | sh "make"
28 | end
29 | case RbConfig::CONFIG["host_os"]
30 | when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
31 | sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
32 | when /darwin|mac os/
33 | sh "clang -dynamiclib -undefined dynamic_lookup -o #{target_fname} *.o -lm -lz -lpthread"
34 | sh "otool -L #{target_fname}"
35 | else
36 | sh "cc *.o -shared -o #{target_fname} -lm -lz -lpthread"
37 | sh "ldd -r #{target_fname}"
38 | end
39 | sh "rm cmappy.h cmappy.c"
40 | sh "git apply -R ../minimap2.patch"
41 | FileUtils.mkdir_p(target_dir)
42 | warn "mkdir -p #{target_dir}"
43 | sh "mv #{target_fname} #{target_path}"
44 | end
45 | end
46 |
47 | desc "`make clean`"
48 | task :clean do
49 | Dir.chdir(minimap2_dir) do
50 | sh "make clean"
51 | end
52 | end
53 |
54 | desc "`make clean` and remove shared lib"
55 | task cleanall: [:clean] do
56 | Dir.chdir(minimap2_dir) do
57 | sh "rm #{target_path}" if File.exist?(target_path)
58 | end
59 | end
60 | end
61 |
--------------------------------------------------------------------------------
/ext/cmappy/cmappy.c:
--------------------------------------------------------------------------------
1 | #include "cmappy.h"
2 |
3 | void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
4 | {
5 | h->ctg = mi->seq[r->rid].name;
6 | h->ctg_len = mi->seq[r->rid].len;
7 | h->ctg_start = r->rs, h->ctg_end = r->re;
8 | h->qry_start = r->qs, h->qry_end = r->qe;
9 | h->strand = r->rev? -1 : 1;
10 | h->mapq = r->mapq;
11 | h->mlen = r->mlen;
12 | h->blen = r->blen;
13 | h->NM = r->blen - r->mlen + r->p->n_ambi;
14 | h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
15 | h->is_primary = (r->id == r->parent);
16 | h->seg_id = r->seg_id;
17 | h->n_cigar32 = r->p->n_cigar;
18 | h->cigar32 = r->p->cigar;
19 | }
20 |
21 | void mm_free_reg1(mm_reg1_t *r)
22 | {
23 | free(r->p);
24 | }
25 |
26 | kseq_t *mm_fastx_open(const char *fn)
27 | {
28 | gzFile fp;
29 | fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
30 | return kseq_init(fp);
31 | }
32 |
33 | void mm_fastx_close(kseq_t *ks)
34 | {
35 | gzFile fp;
36 | fp = ks->f->f;
37 | kseq_destroy(ks);
38 | gzclose(fp);
39 | }
40 |
41 | int mm_verbose_level(int v)
42 | {
43 | if (v >= 0) mm_verbose = v;
44 | return mm_verbose;
45 | }
46 |
47 | void mm_reset_timer(void)
48 | {
49 | extern double realtime(void);
50 | mm_realtime0 = realtime();
51 | }
52 |
53 | mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
54 | {
55 | mm_reg1_t *r;
56 |
57 | // Py_BEGIN_ALLOW_THREADS
58 | if (seq2 == 0) {
59 | r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, seqname);
60 | } else {
61 | int _n_regs[2];
62 | mm_reg1_t *regs[2];
63 | char *seq[2];
64 | int i, len[2];
65 |
66 | len[0] = strlen(seq1);
67 | len[1] = strlen(seq2);
68 | seq[0] = (char*)seq1;
69 | seq[1] = strdup(seq2);
70 | for (i = 0; i < len[1]>>1; ++i) {
71 | int t = seq[1][len[1] - i - 1];
72 | seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
73 | seq[1][i] = seq_comp_table[t];
74 | }
75 | if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
76 | mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, seqname);
77 | for (i = 0; i < _n_regs[1]; ++i)
78 | regs[1][i].rev = !regs[1][i].rev;
79 | *n_regs = _n_regs[0] + _n_regs[1];
80 | regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
81 | memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
82 | free(regs[1]);
83 | r = regs[0];
84 | }
85 | // Py_END_ALLOW_THREADS
86 |
87 | return r;
88 | }
89 |
90 | char *mappy_revcomp(int len, const uint8_t *seq)
91 | {
92 | int i;
93 | char *rev;
94 | rev = (char*)malloc(len + 1);
95 | for (i = 0; i < len; ++i)
96 | rev[len - i - 1] = seq_comp_table[seq[i]];
97 | rev[len] = 0;
98 | return rev;
99 | }
100 |
101 | char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
102 | {
103 | int i, rid;
104 | char *s;
105 | *len = 0;
106 | rid = mm_idx_name2id(mi, name);
107 | if (rid < 0) return 0;
108 | if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
109 | if (en < 0 || (uint32_t)en > mi->seq[rid].len)
110 | en = mi->seq[rid].len;
111 | s = (char*)malloc(en - st + 1);
112 | *len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
113 | for (i = 0; i < *len; ++i)
114 | s[i] = "ACGTN"[(uint8_t)s[i]];
115 | s[*len] = 0;
116 | return s;
117 | }
118 |
119 | mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
120 | {
121 | const char *fake_name = "N/A";
122 | char *s;
123 | mm_idx_t *mi;
124 | s = (char*)calloc(len + 1, 1);
125 | memcpy(s, seq, len);
126 | mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
127 | free(s);
128 | return mi;
129 | }
130 |
--------------------------------------------------------------------------------
/ext/cmappy/cmappy.h:
--------------------------------------------------------------------------------
1 | #ifndef CMAPPY_H
2 | #define CMAPPY_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include "minimap.h"
8 | #include "kseq.h"
9 | KSEQ_DECLARE(gzFile)
10 |
11 | typedef struct {
12 | const char *ctg;
13 | int32_t ctg_start, ctg_end;
14 | int32_t qry_start, qry_end;
15 | int32_t blen, mlen, NM, ctg_len;
16 | uint8_t mapq, is_primary;
17 | int8_t strand, trans_strand;
18 | int32_t seg_id;
19 | int32_t n_cigar32;
20 | uint32_t *cigar32;
21 | } mm_hitpy_t;
22 |
23 | void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h);
24 |
25 | void mm_free_reg1(mm_reg1_t *r);
26 |
27 | kseq_t *mm_fastx_open(const char *fn);
28 |
29 | void mm_fastx_close(kseq_t *ks);
30 |
31 | int mm_verbose_level(int v);
32 |
33 | void mm_reset_timer(void);
34 |
35 | extern unsigned char seq_comp_table[256];
36 | mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char* seqname, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt);
37 |
38 | char *mappy_revcomp(int len, const uint8_t *seq);
39 |
40 | char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len);
41 |
42 | mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len);
43 |
44 | #endif
45 |
--------------------------------------------------------------------------------
/ext/minimap2.patch:
--------------------------------------------------------------------------------
1 | --- a/Makefile
2 | +++ b/Makefile
3 | @@ -1,9 +1,9 @@
4 | -CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
5 | +CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
6 | CPPFLAGS= -DHAVE_KALLOC
7 | INCLUDES=
8 | OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
9 | lchain.o align.o hit.o seed.o jump.o map.o format.o pe.o esterr.o splitidx.o \
10 | - ksw2_ll_sse.o
11 | + ksw2_ll_sse.o cmappy.o
12 | PROG= minimap2
13 | PROG_EXTRA= sdust minimap2-lite
14 | LIBS= -lm -lz -lpthread
15 | @@ -135,3 +135,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
16 | seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
17 | sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
18 | splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
19 | +cmappy.o: cmappy.h
20 |
--------------------------------------------------------------------------------
/lib/minimap2.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | # dependencies
4 | require "ffi"
5 |
6 | # modules
7 | require_relative "minimap2/aligner"
8 | require_relative "minimap2/alignment"
9 | require_relative "minimap2/version"
10 |
11 | # Minimap2 mapper for long read sequences
12 | # https://github.com/lh3/minimap2
13 | # Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34:3094-3100.
14 | # doi:10.1093/bioinformatics/bty191
15 | module Minimap2
16 | class Error < StandardError; end
17 |
18 | class << self
19 | attr_accessor :ffi_lib
20 | end
21 |
22 | lib_name = ::FFI.map_library_name("minimap2")
23 | self.ffi_lib = if ENV["MINIMAPDIR"]
24 | File.expand_path(lib_name, ENV["MINIMAPDIR"])
25 | else
26 | File.expand_path("../vendor/#{lib_name}", __dir__)
27 | end
28 |
29 | # friendlier error message
30 | autoload :FFI, "minimap2/ffi"
31 |
32 | # methods from mappy
33 | class << self
34 | # Execute minimap2 comannd with given options.
35 | # @overload execute(arg0,arg1,...)
36 | # @param [String] arg minimap2 command option.
37 | # @example Get minimap2 version
38 | # Minimap2.execute('--version')
39 |
40 | def execute(*rb_argv)
41 | str_ptrs = []
42 | # First argument is the program name.
43 | str_ptrs << ::FFI::MemoryPointer.from_string("minimap2")
44 | rb_argv.each do |arg|
45 | arg.to_s.split(/\s+/).each do |s|
46 | str_ptrs << ::FFI::MemoryPointer.from_string(s)
47 | end
48 | end
49 | str_ptrs << nil
50 |
51 | # Load all the pointers into a native memory block
52 | argv = ::FFI::MemoryPointer.new(:pointer, str_ptrs.length)
53 | str_ptrs.each_with_index do |p, i|
54 | argv[i].put_pointer(0, p)
55 | end
56 |
57 | FFI.main(str_ptrs.length - 1, argv)
58 | end
59 |
60 | # Get verbosity level.
61 | # @return [Integer] verbosity level.
62 |
63 | def verbose
64 | FFI.mm_verbose_level(-1)
65 | end
66 |
67 | # Set verbosity level.
68 | # @param [Integer] verbosity level
69 | # @return [Integer] verbosity level.
70 |
71 | def verbose=(level)
72 | FFI.mm_verbose_level(level)
73 | end
74 |
75 | # Read fasta/fastq file.
76 | # @param [String] file_path
77 | # @param [Boolean] comment If True, the comment will be read.
78 | # @yield [name, seq, qual, comment]
79 | # @return [Enumerator] enum Return Enumerator if not block given.
80 | # Note: You can BioRuby instead of this method.
81 |
82 | def fastx_read(file_path, comment: false, &block)
83 | path = File.expand_path(file_path)
84 |
85 | # raise error in Ruby because ks.null? is false even if file not exist.
86 | raise ArgumentError, "File not found: #{path}" unless File.exist?(path)
87 |
88 | ks = FFI.mm_fastx_open(path)
89 |
90 | if block_given?
91 | fastx_each(ks, comment, &block)
92 | else
93 | Enumerator.new do |y|
94 | # rewind not work
95 | fastx_each(ks, comment) { |r| y << r }
96 | end
97 | end
98 | end
99 |
100 | # Reverse complement sequence.
101 | # @param [String] seq
102 | # @return [string] seq
103 |
104 | def revcomp(seq)
105 | l = seq.size
106 | bseq = ::FFI::MemoryPointer.new(:char, l)
107 | bseq.put_bytes(0, seq)
108 | FFI.mappy_revcomp(l, bseq)
109 | end
110 |
111 | private
112 |
113 | def fastx_each(ks, comment)
114 | yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
115 | FFI.mm_fastx_close(ks)
116 | end
117 |
118 | def fastx_next(ks, read_comment)
119 | qual = ks[:qual][:s] if ks[:qual][:l] > 0
120 | name = ks[:name][:s]
121 | seq = ks[:seq][:s]
122 | if read_comment
123 | comment = ks[:comment][:s] if ks[:comment][:l] > 0
124 | [name, seq, qual, comment]
125 | else
126 | [name, seq, qual]
127 | end
128 | end
129 | end
130 | end
131 |
--------------------------------------------------------------------------------
/lib/minimap2/aligner.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module Minimap2
4 | class Aligner
5 | attr_reader :idx_opt, :map_opt, :index
6 |
7 | # Create a new aligner.
8 | #
9 | # @param fn_idx_in [String] index or sequence file name.
10 | # @param seq [String] a single sequence to index.
11 | # @param preset [String] minimap2 preset.
12 | # * map-pb : PacBio CLR genomic reads
13 | # * map-ont : Oxford Nanopore genomic reads
14 | # * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)
15 | # * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)
16 | # * sr : short genomic paired-end reads
17 | # * splice : spliced long reads (strand unknown)
18 | # * splice:hq : Final PacBio Iso-seq or traditional cDNA
19 | # * asm5 : intra-species asm-to-asm alignment
20 | # * ava-pb : PacBio read overlap
21 | # * ava-ont : Nanopore read overlap
22 | # @param k [Integer] k-mer length, no larger than 28.
23 | # @param w [Integer] minimizer window size, no larger than 255.
24 | # @param min_cnt [Integer] minimum number of minimizers on a chain.
25 | # @param min_chain_score [Integer] minimum chain score.
26 | # @param min_dp_score
27 | # @param bw [Integer] chaining and alignment band width. (initial chaining and extension)
28 | # @param bw_long [Integer] chaining and alignment band width (RMQ-based rechaining and closing gaps)
29 | # @param best_n [Integer] max number of alignments to return.
30 | # @param n_threads [Integer] number of indexing threads.
31 | # @param fn_idx_out [String] name of file to which the index is written.
32 | # This parameter has no effect if seq is set.
33 | # @param max_frag_len [Integer]
34 | # @param extra_flags [Integer] additional flags defined in minimap.h.
35 | # @param scoring [Array] scoring system.
36 | # It is a tuple/list consisting of 4, 6 or 7 positive integers.
37 | # The first 4 elements specify match scoring, mismatch penalty, gap open and gap extension penalty.
38 | # The 5th and 6th elements, if present, set long-gap open and long-gap extension penalty.
39 | # The 7th sets a mismatch penalty involving ambiguous bases.
40 |
41 | def initialize(
42 | fn_idx_in = nil,
43 | seq: nil,
44 | preset: nil,
45 | k: nil,
46 | w: nil,
47 | min_cnt: nil,
48 | min_chain_score: nil,
49 | min_dp_score: nil,
50 | bw: nil,
51 | bw_long: nil,
52 | best_n: nil,
53 | n_threads: 3,
54 | fn_idx_out: nil,
55 | max_frag_len: nil,
56 | extra_flags: nil,
57 | scoring: nil,
58 | sc_ambi: nil,
59 | max_chain_skip: nil
60 | )
61 | @idx_opt = FFI::IdxOpt.new
62 | @map_opt = FFI::MapOpt.new
63 |
64 | r = FFI.mm_set_opt(preset, idx_opt, map_opt)
65 | raise ArgumentError, "Unknown preset name: #{preset}" if r == -1
66 |
67 | # always perform alignment
68 | map_opt[:flag] |= 4
69 | idx_opt[:batch_size] = 0x7fffffffffffffff
70 |
71 | # override preset options
72 | idx_opt[:k] = k if k
73 | idx_opt[:w] = w if w
74 | map_opt[:min_cnt] = min_cnt if min_cnt
75 | map_opt[:min_chain_score] = min_chain_score if min_chain_score
76 | map_opt[:min_dp_max] = min_dp_score if min_dp_score
77 | map_opt[:bw] = bw if bw
78 | map_opt[:bw_long] = bw_long if bw_long
79 | map_opt[:best_n] = best_n if best_n
80 | map_opt[:max_frag_len] = max_frag_len if max_frag_len
81 | map_opt[:flag] |= extra_flags if extra_flags
82 | if scoring && scoring.size >= 4
83 | map_opt[:a] = scoring[0]
84 | map_opt[:b] = scoring[1]
85 | map_opt[:q] = scoring[2]
86 | map_opt[:e] = scoring[3]
87 | map_opt[:q2] = map_opt[:q]
88 | map_opt[:e2] = map_opt[:e]
89 | if scoring.size >= 6
90 | map_opt[:q2] = scoring[4]
91 | map_opt[:e2] = scoring[5]
92 | map_opt[:sc_ambi] = scoring[6] if scoring.size >= 7
93 | end
94 | end
95 | map_opt[:sc_ambi] = sc_ambi if sc_ambi
96 | map_opt[:max_chain_skip] = max_chain_skip if max_chain_skip
97 |
98 | if fn_idx_in
99 | warn "Since fn_idx_in is specified, the seq argument will be ignored." if seq
100 | reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
101 |
102 | # The Ruby version raises an error here
103 | raise "Cannot open : #{fn_idx_in}" if reader.null?
104 |
105 | @index = FFI.mm_idx_reader_read(reader, n_threads)
106 | FFI.mm_idx_reader_close(reader)
107 | FFI.mm_mapopt_update(map_opt, index)
108 | FFI.mm_idx_index_name(index)
109 | elsif seq
110 | @index = FFI.mappy_idx_seq(
111 | idx_opt[:w], idx_opt[:k], idx_opt[:flag] & 1,
112 | idx_opt[:bucket_bits], seq, seq.size
113 | )
114 | FFI.mm_mapopt_update(map_opt, index)
115 | map_opt[:mid_occ] = 1000 # don't filter high-occ seeds
116 | end
117 | end
118 |
119 | # Explicitly releases the memory of the index object.
120 |
121 | def free_index
122 | FFI.mm_idx_destroy(index) unless index.null?
123 | end
124 |
125 | # @param seq [String]
126 | # @param seq2 [String]
127 | # @param buf [FFI::TBuf]
128 | # @param cs [true, false]
129 | # @param md [true, false]
130 | # @param max_frag_len [Integer]
131 | # @param extra_flags [Integer]
132 | # @note Name change: map -> align
133 | # In the Ruby language, the name map means iterator.
134 | # The original name is map, but here I use the method name align.
135 | # @note The use of Enumerator is being considered. The method names may change again.
136 | # @return [Array] alignments
137 |
138 | def align(
139 | seq, seq2 = nil,
140 | name: nil,
141 | buf: nil,
142 | cs: false,
143 | md: false,
144 | max_frag_len: nil,
145 | extra_flags: nil
146 | )
147 | return if index.null?
148 | return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?
149 |
150 | map_opt[:max_frag_len] = max_frag_len if max_frag_len
151 | map_opt[:flag] |= extra_flags if extra_flags
152 |
153 | buf ||= FFI::TBuf.new
154 | km = FFI.mm_tbuf_get_km(buf)
155 |
156 | n_regs_ptr = ::FFI::MemoryPointer.new :int
157 | regs_ptr = FFI.mm_map_aux(index, name, seq, seq2, n_regs_ptr, buf, map_opt)
158 | n_regs = n_regs_ptr.read_int
159 |
160 | regs = Array.new(n_regs) do |i|
161 | FFI::Reg1.new(regs_ptr + i * FFI::Reg1.size)
162 | end
163 |
164 | hit = FFI::Hit.new
165 |
166 | cs_str = ::FFI::MemoryPointer.new(::FFI::MemoryPointer.new(:string))
167 | m_cs_str = ::FFI::MemoryPointer.new :int
168 |
169 | alignments = []
170 |
171 | i = 0
172 | begin
173 | while i < n_regs
174 | FFI.mm_reg2hitpy(index, regs[i], hit)
175 |
176 | c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
177 | cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
178 |
179 | _cs = ""
180 | _md = ""
181 | if cs or md
182 | cur_seq = hit[:seg_id] > 0 && seq2 ? seq2 : seq
183 |
184 | if cs
185 | l_cs_str = FFI.mm_gen_cs(km, cs_str, m_cs_str, @index, regs[i], cur_seq, 1)
186 | _cs = cs_str.read_pointer.read_string(l_cs_str)
187 | end
188 |
189 | if md
190 | l_cs_str = FFI.mm_gen_md(km, cs_str, m_cs_str, @index, regs[i], cur_seq)
191 | _md = cs_str.read_pointer.read_string(l_cs_str)
192 | end
193 | end
194 |
195 | alignments << Alignment.new(hit, cigar, _cs, _md)
196 |
197 | FFI.mm_free_reg1(regs[i])
198 | i += 1
199 | end
200 | ensure
201 | while i < n_regs
202 | FFI.mm_free_reg1(regs[i])
203 | i += 1
204 | end
205 | end
206 | alignments
207 | end
208 |
209 | # Retrieve a subsequence from the index.
210 | # @param name
211 | # @param start
212 | # @param stop
213 |
214 | def seq(name, start = 0, stop = 0x7fffffff)
215 | return if index.null?
216 | return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?
217 |
218 | lp = ::FFI::MemoryPointer.new(:int)
219 | s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
220 | l = lp.read_int
221 | return nil if l == 0
222 |
223 | s.read_string(l)
224 | end
225 |
226 | # k-mer length, no larger than 28
227 |
228 | def k
229 | index[:k]
230 | end
231 |
232 | # minimizer window size, no larger than 255
233 |
234 | def w
235 | index[:w]
236 | end
237 |
238 | def n_seq
239 | index[:n_seq]
240 | end
241 |
242 | def seq_names
243 | ptr = index[:seq].to_ptr
244 | Array.new(index[:n_seq]) do |i|
245 | FFI::IdxSeq.new(ptr + i * FFI::IdxSeq.size)[:name]
246 | end
247 | end
248 | end
249 | end
250 |
--------------------------------------------------------------------------------
/lib/minimap2/alignment.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module Minimap2
4 | # Alignment result.
5 | #
6 | # @!attribute ctg
7 | # @return [String] name of the reference sequence the query is mapped to.
8 | # @!attribute ctg_len
9 | # @return [Integer] total length of the reference sequence.
10 | # @!attribute r_st
11 | # @return [Integer] start positions on the reference.
12 | # @!attribute r_en
13 | # @return [Integer] end positions on the reference.
14 | # @!attribute strand
15 | # @return [Integer] +1 if on the forward strand; -1 if on the reverse strand.
16 | # @!attribute trans_strand
17 | # @return [Integer] transcript strand.
18 | # +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown.
19 | # @!attribute blen
20 | # @return [Integer] length of the alignment, including both alignment matches and gaps
21 | # but excluding ambiguous bases.
22 | # @!attribute mlen
23 | # @return [Integer] length of the matching bases in the alignment,
24 | # excluding ambiguous base matches.
25 | # @!attribute nm
26 | # @return [Integer] number of mismatches, gaps and ambiguous positions in the alignment.
27 | # @!attribute primary
28 | # @return [Integer] if the alignment is primary (typically the best and the first to generate)
29 | # @!attribute q_st
30 | # @return [Integer] start positions on the query.
31 | # @!attribute q_en
32 | # @return [Integer] end positions on the query.
33 | # @!attribute mapq
34 | # @return [Integer] mapping quality.
35 | # @!attribute cigar
36 | # @return [Array] CIGAR returned as an array of shape (n_cigar,2).
37 | # The two numbers give the length and the operator of each CIGAR operation.
38 | # @!attribute read_num
39 | # @return [Integer] read number that the alignment corresponds to;
40 | # 1 for the first read and 2 for the second read.
41 | # @!attribute cs
42 | # @return [String] the cs tag.
43 | # @!attribute md
44 | # @return [String] the MD tag as in the SAM format.
45 | # It is an empty string unless the md argument is applied when calling Aligner#align.
46 | # @!attribute cigar_str
47 | # @return [String] CIGAR string.
48 |
49 | class Alignment
50 | def self.keys
51 | %i[ctg ctg_len r_st r_en strand trans_strand blen mlen nm primary
52 | q_st q_en mapq cigar read_num cs md cigar_str]
53 | end
54 |
55 | attr_reader(*keys)
56 |
57 | def initialize(h, cigar, cs = nil, md = nil)
58 | @ctg = h[:ctg]
59 | @ctg_len = h[:ctg_len]
60 | @r_st = h[:ctg_start]
61 | @r_en = h[:ctg_end]
62 | @strand = h[:strand]
63 | @trans_strand = h[:trans_strand]
64 | @blen = h[:blen]
65 | @mlen = h[:mlen]
66 | @nm = h[:NM]
67 | @primary = h[:is_primary]
68 | @q_st = h[:qry_start]
69 | @q_en = h[:qry_end]
70 | @mapq = h[:mapq]
71 | @cigar = cigar
72 | @read_num = h[:seg_id] + 1
73 | @cs = cs
74 | @md = md
75 |
76 | @cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
77 | end
78 |
79 | def primary?
80 | @primary == 1
81 | end
82 |
83 | # Convert Alignment to hash.
84 |
85 | def to_h
86 | self.class.keys.map { |k| [k, __send__(k)] }.to_h
87 | end
88 |
89 | # Convert to the PAF format without the QueryName and QueryLength columns.
90 |
91 | def to_s
92 | strand = if @strand > 0
93 | "+"
94 | elsif @strand < 0
95 | "-"
96 | else
97 | "?"
98 | end
99 | tp = @primary != 0 ? "tp:A:P" : "tp:A:S"
100 | ts = if @trans_strand > 0
101 | "ts:A:+"
102 | elsif @trans_strand < 0
103 | "ts:A:-"
104 | else
105 | "ts:A:."
106 | end
107 | a = [@q_st, @q_en, strand, @ctg, @ctg_len, @r_st, @r_en,
108 | @mlen, @blen, @mapq, tp, ts, "cg:Z:#{@cigar_str}"]
109 | a << "cs:Z:#{@cs}" if @cs
110 | a << "MD:Z:#{@md}" if @md
111 | a.join("\t")
112 | end
113 | end
114 | end
115 |
--------------------------------------------------------------------------------
/lib/minimap2/ffi.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | # bit fields
4 | require "ffi/bit_struct"
5 |
6 | module Minimap2
7 | # Native APIs
8 | module FFI
9 | extend ::FFI::Library
10 | begin
11 | ffi_lib Minimap2.ffi_lib
12 | rescue LoadError => e
13 | raise LoadError, "Could not find #{Minimap2.ffi_lib} \n#{e}"
14 | end
15 |
16 | # Continue even if some functions are not found.
17 | def self.attach_function(*)
18 | super
19 | rescue ::FFI::NotFoundError => e
20 | warn e.message
21 | end
22 | end
23 | end
24 |
25 | require_relative "ffi/constants"
26 | require_relative "ffi/functions"
27 | require_relative "ffi/mappy"
28 |
--------------------------------------------------------------------------------
/lib/minimap2/ffi/constants.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module Minimap2
4 | module FFI
5 | # flags
6 | NO_DIAG = 0x001 # no exact diagonal hit
7 | NO_DUAL = 0x002 # skip pairs where query name is lexicographically larger than target name
8 | CIGAR = 0x004
9 | OUT_SAM = 0x008
10 | NO_QUAL = 0x010
11 | OUT_CG = 0x020
12 | OUT_CS = 0x040
13 | SPLICE = 0x080 # splice mode
14 | SPLICE_FOR = 0x100 # match GT-AG
15 | SPLICE_REV = 0x200 # match CT-AC, the reverse complement of GT-AG
16 | NO_LJOIN = 0x400
17 | OUT_CS_LONG = 0x800
18 | SR = 0x1000
19 | FRAG_MODE = 0x2000
20 | NO_PRINT_2ND = 0x4000
21 | TWO_IO_THREADS = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
22 | LONG_CIGAR = 0x10000
23 | INDEPEND_SEG = 0x20000
24 | SPLICE_FLANK = 0x40000
25 | SOFTCLIP = 0x80000
26 | FOR_ONLY = 0x100000
27 | REV_ONLY = 0x200000
28 | HEAP_SORT = 0x400000
29 | ALL_CHAINS = 0x800000
30 | OUT_MD = 0x1000000
31 | COPY_COMMENT = 0x2000000
32 | EQX = 0x4000000 # use =/X instead of M
33 | PAF_NO_HIT = 0x8000000 # output unmapped reads to PAF
34 | NO_END_FLT = 0x10000000
35 | HARD_MLEVEL = 0x20000000
36 | SAM_HIT_ONLY = 0x40000000
37 | RMQ = 0x80000000
38 | QSTRAND = 0x100000000
39 | NO_INV = 0x200000000
40 | NO_HASH_NAME = 0x400000000
41 | SPLICE_OLD = 0x800000000
42 | SECONDARY_SEQ = 0x1000000000 # output SEQ field for seqondary alignments using hard clipping
43 | OUT_DS = 0x2000000000
44 | WEAK_PAIRING = 0x4000000000
45 | SR_RNA = 0x8000000000
46 | OUT_JUNC = 0x10000000000
47 |
48 | HPC = 0x1
49 | NO_SEQ = 0x2
50 | NO_NAME = 0x4
51 |
52 | IDX_MAGIC = "MMI\2"
53 |
54 | MAX_SEG = 255
55 |
56 | CIGAR_MATCH = 0
57 | CIGAR_INS = 1
58 | CIGAR_DEL = 2
59 | CIGAR_N_SKIP = 3
60 | CIGAR_SOFTCLIP = 4
61 | CIGAR_HARDCLIP = 5
62 | CIGAR_PADDING = 6
63 | CIGAR_EQ_MATCH = 7
64 | CIGAR_X_MISMATCH = 8
65 |
66 | CIGAR_STR = "MIDNSHP=XB"
67 |
68 | # emulate 128-bit integers
69 | class MM128 < ::FFI::Struct
70 | layout \
71 | :x, :uint64_t,
72 | :y, :uint64_t
73 | end
74 |
75 | # emulate 128-bit arrays
76 | class MM128V < ::FFI::Struct
77 | layout \
78 | :n, :size_t,
79 | :m, :size_t,
80 | :a, MM128.ptr
81 | end
82 |
83 | # minimap2 index
84 | class IdxSeq < ::FFI::Struct
85 | layout \
86 | :name, :string, # name of the db sequence
87 | :offset, :uint64_t, # offset in mm_idx_t::S
88 | :len, :uint32, # length
89 | :is_alt, :uint32
90 | end
91 |
92 | class Idx < ::FFI::Struct
93 | layout \
94 | :b, :int32,
95 | :w, :int32,
96 | :k, :int32,
97 | :flag, :int32,
98 | :n_seq, :uint32, # number of reference sequences
99 | :index, :int32,
100 | :n_alt, :int32,
101 | :seq, IdxSeq.ptr, # sequence name, length and offset
102 | :S, :pointer, # 4-bit packed sequence
103 | :B, :pointer, # index (hidden)
104 | :I, :pointer, # intervals (hidden)
105 | :spsc, :pointer, # splice score (hidden)
106 | :J, :pointer, # junctions to create jumps (hidden)
107 | :km, :pointer,
108 | :h, :pointer
109 | end
110 |
111 | # minimap2 alignment
112 | class Extra < ::FFI::BitStruct
113 | layout \
114 | :capacity, :uint32, # the capacity of cigar[]
115 | :dp_score, :int32, # DP score
116 | :dp_max, :int32, # score of the max-scoring segment
117 | :dp_max2, :int32, # score of the best alternate mappings
118 | :dp_max0, :int32, # DP score before mm_update_dp_max() adjustment
119 | :n_ambi_trans_strand, :uint32,
120 | :n_cigar, :uint32
121 | # :cigar, :pointer # variable length array (see cigar method below)
122 |
123 | bit_field :n_ambi_trans_strand,
124 | :n_ambi, 30, # number of ambiguous bases
125 | :trans_strand, 2 # transcript strand: 0 for unknown, 1 for +, 2 for -
126 |
127 | # variable length array
128 | def cigar
129 | pointer.get_array_of_uint32(size, self[:n_cigar])
130 | end
131 | end
132 |
133 | class Reg1 < ::FFI::BitStruct
134 | layout \
135 | :id, :int32, # ID for internal uses (see also parent below)
136 | :cnt, :int32, # number of minimizers; if on the reverse strand
137 | :rid, :int32, # reference index; if this is an alignment from inversion rescue
138 | :score, :int32, # DP alignment score
139 | :qs, :int32, # query start
140 | :qe, :int32, # query end
141 | :rs, :int32, # reference start
142 | :re, :int32, # reference end
143 | :parent, :int32, # parent==id if primary
144 | :subsc, :int32, # best alternate mapping score
145 | :as, :int32, # offset in the a[] array (for internal uses only)
146 | :mlen, :int32, # seeded exact match length
147 | :blen, :int32, # seeded alignment block length
148 | :n_sub, :int32, # number of suboptimal mappings
149 | :score0, :int32, # initial chaining score (before chain merging/spliting)
150 | :fields, :uint32,
151 | :hash, :uint32,
152 | :div, :float,
153 | :p, Extra.ptr
154 |
155 | bit_field :fields,
156 | :mapq, 8,
157 | :split, 2,
158 | :rev, 1,
159 | :inv, 1,
160 | :sam_pri, 1,
161 | :proper_frag, 1,
162 | :pe_thru, 1,
163 | :seg_split, 1,
164 | :seg_id, 8,
165 | :split_inv, 1,
166 | :is_alt, 1,
167 | :strand_retained, 1,
168 | :is_spliced, 1,
169 | :dummy, 4
170 | end
171 |
172 | # indexing option
173 | class IdxOpt < ::FFI::Struct
174 | layout \
175 | :k, :short,
176 | :w, :short,
177 | :flag, :short,
178 | :bucket_bits, :short,
179 | :mini_batch_size, :int64_t,
180 | :batch_size, :uint64_t
181 | end
182 |
183 | # mapping option
184 | class MapOpt < ::FFI::Struct
185 | layout \
186 | :flag, :int64_t, # see MM_F_* macros
187 | :seed, :int,
188 | :sdust_thres, :int, # score threshold for SDUST; 0 to disable
189 | :max_qlen, :int, # max query length
190 | :bw, :int, # bandwidth
191 | :bw_long, :int,
192 | :max_gap, :int, # break a chain if there are no minimizers in a max_gap window
193 | :max_gap_ref, :int,
194 | :max_frag_len, :int,
195 | :max_chain_skip, :int,
196 | :max_chain_iter, :int,
197 | :min_cnt, :int, # min number of minimizers on each chain
198 | :min_chain_score, :int, # min chaining score
199 | :chain_gap_scale, :float,
200 | :chain_skip_scale, :float,
201 | :rmq_size_cap, :int,
202 | :rmq_inner_dist, :int,
203 | :rmq_rescue_size, :int,
204 | :rmq_rescue_ratio, :float,
205 | :mask_level, :float,
206 | :mask_len, :int,
207 | :pri_ratio, :float,
208 | :best_n, :int, # top best_n chains are subjected to DP alignment
209 | :alt_drop, :float,
210 | :a, :int, # matching score
211 | :b, :int, # mismatch
212 | :q, :int, # gap-open
213 | :e, :int, # gap-ext
214 | :q2, :int, # gap-open
215 | :e2, :int, # gap-ext
216 | :transition, :int, # transition mismatch score (A:G, C:T)
217 | :sc_ambi, :int, # score when one or both bases are "N"
218 | :noncan, :int, # cost of non-canonical splicing sites
219 | :junc_pen, :int,
220 | :junc_bonus, :int,
221 | :zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
222 | :zdrop_inv, :int,
223 | :end_bonus, :int,
224 | :min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold
225 | :min_ksw_len, :int,
226 | :anchor_ext_len, :int,
227 | :anchor_ext_shift, :int,
228 | :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
229 | :rank_min_len, :int,
230 | :rank_frac, :float,
231 | :pe_ori, :int,
232 | :pe_bonus, :int,
233 | :jump_min_match, :int32,
234 | :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
235 | :q_occ_frac, :float,
236 | :min_mid_occ, :int32,
237 | :max_mid_occ, :int32,
238 | :mid_occ, :int32, # ignore seeds with occurrences above this threshold
239 | :max_occ, :int32,
240 | :max_max_occ, :int32,
241 | :occ_dist, :int32,
242 | :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
243 | :max_sw_mat, :int64_t,
244 | :cap_kalloc, :int64_t,
245 | :split_prefix, :string
246 | end
247 |
248 | # index reader
249 | class IdxReader < ::FFI::Struct
250 | layout \
251 | :is_idx, :int,
252 | :n_parts, :int,
253 | :idx_size, :int64_t,
254 | :opt, IdxOpt,
255 | :fp_out, :pointer, # FILE
256 | :seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE
257 | end
258 |
259 | # memory buffer for thread-local storage during mapping
260 | class TBuf < ::FFI::Struct
261 | layout \
262 | :km, :pointer,
263 | :rep_len, :int,
264 | :frag_gap, :int
265 | end
266 | end
267 | end
268 |
--------------------------------------------------------------------------------
/lib/minimap2/ffi/functions.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module Minimap2
4 | module FFI
5 | attach_function \
6 | :main,
7 | %i[int pointer],
8 | :int
9 |
10 | # int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo);
11 | attach_function \
12 | :mm_set_opt_raw, :mm_set_opt,
13 | [:pointer, IdxOpt.by_ref, MapOpt.by_ref],
14 | :int
15 |
16 | private_class_method :mm_set_opt_raw
17 |
18 | def self.mm_set_opt(preset, io, mo)
19 | ptr = case preset
20 | when 0, nil
21 | ::FFI::Pointer.new(:int, 0)
22 | else
23 | ::FFI::MemoryPointer.from_string(preset.to_s)
24 | end
25 | mm_set_opt_raw(ptr, io, mo)
26 | end
27 |
28 | # int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo);
29 | attach_function \
30 | :mm_check_opt,
31 | [IdxOpt.by_ref, MapOpt.by_ref],
32 | :int
33 |
34 | # void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
35 | attach_function \
36 | :mm_mapopt_update,
37 | [MapOpt.by_ref, Idx.by_ref],
38 | :void
39 |
40 | # void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len);
41 | attach_function \
42 | :mm_mapopt_max_intron_len,
43 | [MapOpt.by_ref, :int],
44 | :void
45 |
46 | # mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out);
47 | attach_function \
48 | :mm_idx_reader_open,
49 | [:string, IdxOpt.by_ref, :string],
50 | IdxReader.by_ref
51 |
52 | # mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads);
53 | attach_function \
54 | :mm_idx_reader_read,
55 | [IdxReader.by_ref, :int],
56 | Idx.by_ref
57 |
58 | # void mm_idx_reader_close(mm_idx_reader_t *r);
59 | attach_function \
60 | :mm_idx_reader_close,
61 | [IdxReader.by_ref],
62 | :void
63 |
64 | # int mm_idx_reader_eof(const mm_idx_reader_t *r);
65 | attach_function \
66 | :mm_idx_reader_eof,
67 | [IdxReader.by_ref],
68 | :int
69 |
70 | # int64_t mm_idx_is_idx(const char *fn);
71 | attach_function \
72 | :mm_idx_is_idx,
73 | [:string],
74 | :int64_t
75 |
76 | # mm_idx_t *mm_idx_load(FILE *fp);
77 | attach_function \
78 | :mm_idx_load,
79 | [:pointer], # FILE pointer
80 | Idx.by_ref
81 |
82 | # void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
83 | attach_function \
84 | :mm_idx_dump,
85 | [:pointer, Idx.by_ref], # FILE pointer
86 | :void
87 |
88 | # mm_idx_t *mm_idx_str(int w, int k, int is_hpc, int bucket_bits, int n, const char **seq, const char **name);
89 | attach_function \
90 | :mm_idx_str,
91 | %i[int int int int int pointer pointer],
92 | Idx.by_ref
93 |
94 | # void mm_idx_stat(const mm_idx_t *idx);
95 | attach_function \
96 | :mm_idx_stat,
97 | [Idx.by_ref],
98 | :void
99 |
100 | # void mm_idx_destroy(mm_idx_t *mi);
101 | attach_function \
102 | :mm_idx_destroy,
103 | [Idx.by_ref],
104 | :void
105 |
106 | # mm_tbuf_t *mm_tbuf_init(void);
107 | attach_function \
108 | :mm_tbuf_init,
109 | [],
110 | TBuf.by_ref
111 |
112 | # void mm_tbuf_destroy(mm_tbuf_t *b);
113 | attach_function \
114 | :mm_tbuf_destroy,
115 | [TBuf.by_ref],
116 | :void
117 |
118 | # void *mm_tbuf_get_km(mm_tbuf_t *b);
119 | attach_function \
120 | :mm_tbuf_get_km,
121 | [TBuf.by_ref],
122 | :pointer
123 |
124 | # mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
125 | attach_function \
126 | :mm_map,
127 | [Idx.by_ref, :int, :string, :pointer, TBuf.by_ref, MapOpt.by_ref, :string],
128 | Reg1.by_ref
129 |
130 | # void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname);
131 | attach_function \
132 | :mm_map_frag,
133 | [Idx.by_ref, :int, :pointer, :pointer, :pointer, TBuf.by_ref, MapOpt.by_ref, :string],
134 | :void
135 |
136 | # int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads);
137 | attach_function \
138 | :mm_map_file,
139 | [Idx.by_ref, :string, MapOpt.by_ref, :int],
140 | :int
141 |
142 | # int mm_map_file_frag(const mm_idx_t *idx, int n_segs, const char **fn, const mm_mapopt_t *opt, int n_threads);
143 | attach_function \
144 | :mm_map_file_frag,
145 | [Idx.by_ref, :int, :pointer, MapOpt.by_ref, :int],
146 | :int
147 |
148 | # int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden);
149 | attach_function \
150 | :mm_gen_cs,
151 | [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string, :int],
152 | :int
153 |
154 | # int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq);
155 | attach_function \
156 | :mm_gen_md, :mm_gen_MD, # Avoid uppercase letters in method names.
157 | [:pointer, :pointer, :pointer, Idx.by_ref, Reg1.by_ref, :string],
158 | :int
159 |
160 | # int mm_idx_index_name(mm_idx_t *mi);
161 | attach_function \
162 | :mm_idx_index_name,
163 | [Idx.by_ref],
164 | :int
165 |
166 | # int mm_idx_name2id(const mm_idx_t *mi, const char *name);
167 | attach_function \
168 | :mm_idx_name2id,
169 | [Idx.by_ref, :string],
170 | :int
171 |
172 | # int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
173 | attach_function \
174 | :mm_idx_getseq,
175 | [Idx.by_ref, :uint32, :uint32, :uint32, :pointer],
176 | :int
177 |
178 | # int mm_idx_alt_read(mm_idx_t *mi, const char *fn);
179 | attach_function \
180 | :mm_idx_alt_read,
181 | [Idx.by_ref, :string],
182 | :int
183 |
184 | # int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc);
185 | attach_function \
186 | :mm_idx_bed_read,
187 | [Idx.by_ref, :string, :int],
188 | :int
189 |
190 | # int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);
191 | attach_function \
192 | :mm_idx_bed_junc,
193 | [Idx.by_ref, :int32, :int32, :int32, :pointer],
194 | :int
195 |
196 | # int mm_max_spsc_bonus(const mm_mapopt_t *mo);
197 | attach_function \
198 | :mm_max_spsc_bonus,
199 | [MapOpt.by_ref],
200 | :int
201 |
202 | # int32_t mm_idx_spsc_read(mm_idx_t *idx, const char *fn, int32_t max_sc);
203 | attach_function \
204 | :mm_idx_spsc_read,
205 | [Idx.by_ref, :string, :int32],
206 | :int32
207 |
208 | # int64_t mm_idx_spsc_get(const mm_idx_t *db, int32_t cid, int64_t st0, int64_t en0, int32_t rev, uint8_t *sc);
209 | attach_function \
210 | :mm_idx_spsc_get,
211 | [Idx.by_ref, :int32, :int64, :int64, :int32, :pointer],
212 | :int64
213 |
214 | # void mm_mapopt_init(mm_mapopt_t *opt);
215 | attach_function \
216 | :mm_mapopt_init,
217 | [MapOpt.by_ref],
218 | :void
219 |
220 | # mm_idx_t *mm_idx_build(const char *fn, int w, int k, int flag, int n_threads);
221 | attach_function \
222 | :mm_idx_build,
223 | %i[string int int int int],
224 | Idx.by_ref
225 |
226 | # mmpriv.h
227 |
228 | attach_function \
229 | :mm_idxopt_init,
230 | [IdxOpt.by_ref],
231 | :void
232 | end
233 | end
234 |
--------------------------------------------------------------------------------
/lib/minimap2/ffi/mappy.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | # https://github.com/lh3/minimap2/blob/master/python/cmappy.h
4 |
5 | module Minimap2
6 | module FFI
7 | class Hit < ::FFI::Struct
8 | layout \
9 | :ctg, :string,
10 | :ctg_start, :int32,
11 | :ctg_end, :int32,
12 | :qry_start, :int32,
13 | :qry_end, :int32,
14 | :blen, :int32,
15 | :mlen, :int32,
16 | :NM, :int32,
17 | :ctg_len, :int32,
18 | :mapq, :uint8_t,
19 | :is_primary, :uint8_t,
20 | :strand, :int8_t,
21 | :trans_strand, :int8_t,
22 | :seg_id, :int32,
23 | :n_cigar32, :int32,
24 | :cigar32, :pointer
25 | end
26 |
27 | class KString < ::FFI::Struct
28 | layout \
29 | :l, :size_t,
30 | :m, :size_t,
31 | :s, :string
32 | end
33 |
34 | class KSeq < ::FFI::Struct
35 | layout \
36 | :name, KString,
37 | :comment, KString,
38 | :seq, KString,
39 | :qual, KString,
40 | :last_char, :int,
41 | :f, :pointer # KStream
42 | end
43 |
44 | attach_function \
45 | :mm_reg2hitpy,
46 | [Idx.by_ref, Reg1.by_ref, Hit.by_ref],
47 | :void
48 |
49 | attach_function \
50 | :mm_free_reg1,
51 | [Reg1.by_ref],
52 | :void
53 |
54 | attach_function \
55 | :mm_fastx_open,
56 | [:string],
57 | KSeq.by_ref
58 |
59 | attach_function \
60 | :mm_fastx_close,
61 | [KSeq.by_ref],
62 | :void
63 |
64 | attach_function \
65 | :mm_verbose_level,
66 | [:int],
67 | :int
68 |
69 | attach_function \
70 | :mm_reset_timer,
71 | [:void],
72 | :void
73 |
74 | attach_function \
75 | :mm_map_aux,
76 | [Idx.by_ref, :string, :string, :string, :pointer, TBuf.by_ref, MapOpt.by_ref],
77 | :pointer # Reg1
78 |
79 | attach_function \
80 | :mappy_revcomp,
81 | %i[int pointer],
82 | :string
83 |
84 | attach_function \
85 | :mappy_fetch_seq,
86 | [Idx.by_ref, :string, :int, :int, :pointer],
87 | :pointer # Use pointer instead of string to read with a specified length
88 |
89 | attach_function \
90 | :mappy_idx_seq,
91 | %i[int int int int string int],
92 | Idx.by_ref
93 |
94 | attach_function \
95 | :kseq_read,
96 | [KSeq.by_ref],
97 | :int
98 | end
99 | end
100 |
--------------------------------------------------------------------------------
/lib/minimap2/version.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | module Minimap2
4 | VERSION = "0.2.29.0"
5 | end
6 |
--------------------------------------------------------------------------------
/minimap2.gemspec:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require_relative "lib/minimap2/version"
4 |
5 | Gem::Specification.new do |spec|
6 | spec.name = "minimap2"
7 | spec.version = Minimap2::VERSION
8 | spec.authors = ["kojix2"]
9 | spec.email = ["2xijok@gmail.com"]
10 |
11 | spec.summary = "minimap2"
12 | spec.description = "Ruby bindings to the Minimap2 aligner."
13 | spec.homepage = "https://github.com/kojix2/ruby-minimap2"
14 | spec.license = "MIT"
15 | spec.required_ruby_version = ">= 2.5"
16 |
17 | # If you include the lib/simde code, the Gem size will be 1MB.
18 | # Build with lib/simde is currently not supported, so simde code is not included in the Gem.
19 | spec.files = (Dir["*.{md,txt}", "{lib,ext}/**/*", "vendor/libminimap2.{so,dylib,dll}"] -
20 | Dir["ext/minimap2/lib/**/*"])
21 | spec.require_paths = ["lib"]
22 |
23 | spec.extensions = %w[ext/Rakefile]
24 |
25 | spec.add_dependency "ffi"
26 | spec.add_dependency "ffi-bitfield"
27 | end
28 |
--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json",
3 | "extends": [
4 | "config:recommended"
5 | ]
6 | }
7 |
--------------------------------------------------------------------------------
/test/minimap2/aligner_test.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require_relative "../test_helper"
4 | class AlignerTest < Minitest::Test
5 | def fa_path
6 | File.expand_path("../../ext/minimap2/test/MT-human.fa", __dir__)
7 | end
8 |
9 | def setup
10 | @a = MM2::Aligner.new(fa_path)
11 | end
12 |
13 | def test_initialize
14 | assert_instance_of MM2::Aligner, @a
15 | end
16 |
17 | def test_initialize_preset_short
18 | assert_instance_of MM2::Aligner, MM2::Aligner.new(fa_path, preset: "short")
19 | assert_instance_of MM2::Aligner, MM2::Aligner.new(fa_path, preset: :short)
20 | end
21 |
22 | def test_initialize_preset_unknown
23 | assert_raises(ArgumentError) { MM2::Aligner.new(fa_path, preset: "sort") }
24 | end
25 |
26 | def test_initialize_with_seq
27 | assert_instance_of MM2::Aligner, MM2::Aligner.new(seq: "CACAGGTCGAAGGAGTAATTACCCAACAATGGGTCTCTAG")
28 | end
29 |
30 | def test_idx_opt
31 | assert_instance_of MM2::FFI::IdxOpt, @a.idx_opt
32 | end
33 |
34 | def test_map_opt
35 | assert_instance_of MM2::FFI::MapOpt, @a.map_opt
36 | end
37 |
38 | def test_index
39 | assert_instance_of MM2::FFI::Idx, @a.index
40 | end
41 |
42 | def test_align
43 | qseq = @a.seq("MT_human", 100, 200)
44 | @a.align(qseq) do |h|
45 | assert_instance_of MM2::Alignment, h
46 | end
47 | end
48 |
49 | def test_align2
50 | qseq = MM2.revcomp(@a.seq("MT_human", 300, 400))
51 | @a.align(qseq) do |h|
52 | assert_instance_of MM2::Alignment, h
53 | end
54 | end
55 |
56 | def test_align_seq
57 | qseq = @a.seq("MT_human", 100, 200)
58 | ref = @a.seq("MT_human", 0, 3000)
59 | a = MM2::Aligner.new(seq: ref)
60 | a.align(qseq) do |h|
61 | assert_instance_of MM2::Alignment, h
62 | end
63 | end
64 |
65 | def test_align2_seq
66 | qseq1 = @a.seq("MT_human", 100, 200)
67 | qseq2 = MM2.revcomp(@a.seq("MT_human", 300, 400))
68 | ref = @a.seq("MT_human", 0, 3000)
69 | a = MM2::Aligner.new(seq: ref)
70 | a.align(qseq1, qseq2) do |h|
71 | assert_instance_of MM2::Alignment, h
72 | end
73 | end
74 |
75 | def test_seq
76 | assert_nil @a.seq("MT_human", 0, 0)
77 | assert_equal "G", @a.seq("MT_human", 0, 1)
78 | assert_equal "GA", @a.seq("MT_human", 0, 2)
79 | assert_equal "CACAG", @a.seq("MT_human", 3, 8)
80 | assert_equal "ATCACGATG", @a.seq("MT_human", 16_560)
81 | end
82 |
83 | def test_k
84 | assert_equal 15, @a.k
85 | end
86 |
87 | def test_w
88 | assert_equal 10, @a.w
89 | end
90 |
91 | def test_n_seq
92 | assert_equal 1, @a.n_seq
93 | end
94 |
95 | def test_seq_names
96 | path = File.expand_path("../../ext/minimap2/test/q-inv.fa", __dir__)
97 | @a = MM2::Aligner.new(path)
98 | assert_equal %w[read1 read2], @a.seq_names
99 | end
100 | end
101 |
--------------------------------------------------------------------------------
/test/minimap2/alignment_test.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require_relative "../test_helper"
4 | class AlignmentTest < Minitest::Test
5 | def setup
6 | path = File.expand_path("../../ext/minimap2/test/MT-human.fa", __dir__)
7 | aligner = MM2::Aligner.new(path)
8 | seq = aligner.seq("MT_human", 100, 300)
9 | @a = aligner.align(seq, cs: true, md: true).first
10 | end
11 |
12 | def test_keys
13 | assert_instance_of Array, MM2::Alignment.keys
14 | end
15 |
16 | def test_initialize
17 | assert_instance_of MM2::Alignment, @a
18 | end
19 |
20 | def test_ctg
21 | assert_equal "MT_human", @a.ctg
22 | end
23 |
24 | def test_ctg_len
25 | assert_equal 16_569, @a.ctg_len
26 | end
27 |
28 | def test_r_st
29 | assert_equal 100, @a.r_st
30 | end
31 |
32 | def test_r_en
33 | assert_equal 300, @a.r_en
34 | end
35 |
36 | def test_strand
37 | assert_equal 1, @a.strand
38 | end
39 |
40 | def test_trans_strand
41 | assert_equal 0, @a.trans_strand
42 | end
43 |
44 | def test_blen
45 | assert_equal 200, @a.blen
46 | end
47 |
48 | def test_mlen
49 | assert_equal 200, @a.mlen
50 | end
51 |
52 | def test_nm
53 | assert_equal 0, @a.nm
54 | end
55 |
56 | def test_primary
57 | assert_equal 1, @a.primary
58 | end
59 |
60 | def test_q_st
61 | assert_equal 0, @a.q_st
62 | end
63 |
64 | def test_q_en
65 | assert_equal 200, @a.q_en
66 | end
67 |
68 | def test_mapq
69 | assert_equal 60, @a.mapq
70 | end
71 |
72 | def test_cigar
73 | assert_equal [[200, 0]], @a.cigar
74 | end
75 |
76 | def test_read_num
77 | assert_equal 1, @a.read_num
78 | end
79 |
80 | def test_cs
81 | assert_equal ":200", @a.cs
82 | end
83 |
84 | def test_md
85 | assert_equal "200", @a.md
86 | end
87 |
88 | def test_cigar_str
89 | assert_equal "200M", @a.cigar_str
90 | end
91 |
92 | def test_primary?
93 | assert_equal true, @a.primary?
94 | end
95 |
96 | def test_to_h
97 | hit = {
98 | ctg: "MT_human",
99 | ctg_len: 16_569,
100 | r_st: 100,
101 | r_en: 300,
102 | strand: 1,
103 | trans_strand: 0,
104 | blen: 200,
105 | mlen: 200,
106 | nm: 0,
107 | primary: 1,
108 | q_st: 0,
109 | q_en: 200,
110 | mapq: 60,
111 | cigar: [[200, 0]],
112 | read_num: 1,
113 | cs: ":200",
114 | md: "200",
115 | cigar_str: "200M"
116 | }
117 | assert_equal hit, @a.to_h
118 | end
119 |
120 | def test_to_s
121 | assert_equal "0\t200\t+\tMT_human\t16569\t100\t300\t200\t200\t60\ttp:A:P\tts:A:.\tcg:Z:200M\tcs:Z::200\tMD:Z:200",
122 | @a.to_s
123 | end
124 | end
125 |
--------------------------------------------------------------------------------
/test/minimap2/ffi_test.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require_relative "../test_helper"
4 |
5 | class FFITest < Minitest::Test
6 | def test_mm128
7 | obj = MM2::FFI::MM128.new
8 | assert_instance_of MM2::FFI::MM128, obj
9 | assert_equal 0, obj[:x]
10 | assert_equal 0, obj[:y]
11 | end
12 |
13 | def test_mm128v
14 | obj = MM2::FFI::MM128V.new
15 | assert_instance_of MM2::FFI::MM128V, obj
16 | assert_equal 0, obj[:n]
17 | assert_equal 0, obj[:m]
18 | assert_instance_of MM2::FFI::MM128, obj[:a]
19 | end
20 |
21 | def test_idxopt
22 | io = MM2::FFI::IdxOpt.new
23 | assert_instance_of MM2::FFI::IdxOpt, io
24 | assert_equal 0, io[:k]
25 | MM2::FFI.mm_idxopt_init(io)
26 | assert_equal 15, io[:k]
27 | assert_equal 10, io[:w]
28 | assert_equal 0, io[:flag]
29 | assert_equal 14, io[:bucket_bits]
30 | assert_equal 50_000_000, io[:mini_batch_size]
31 | assert_equal 8_000_000_000, io[:batch_size]
32 | end
33 |
34 | def test_mapopt
35 | mo = MM2::FFI::MapOpt.new
36 | assert_instance_of MM2::FFI::MapOpt, mo
37 | assert_equal 0, mo[:seed]
38 | MM2::FFI.mm_mapopt_init(mo)
39 | assert_equal 11, mo[:seed]
40 | assert_equal 0, mo[:flag]
41 | assert_equal 0, mo[:sdust_thres]
42 | assert_equal 0, mo[:max_qlen]
43 | assert_equal 500, mo[:bw]
44 | assert_equal 20_000, mo[:bw_long]
45 | assert_equal 5000, mo[:max_gap]
46 | assert_equal(-1, mo[:max_gap_ref])
47 | assert_equal 0, mo[:max_frag_len]
48 | assert_equal 25, mo[:max_chain_skip]
49 | assert_equal 5000, mo[:max_chain_iter]
50 | assert_equal 3, mo[:min_cnt]
51 | assert_equal 40, mo[:min_chain_score]
52 | assert_in_epsilon 0.8, mo[:chain_gap_scale]
53 | assert_in_epsilon 0, mo[:chain_skip_scale]
54 | assert_equal 100_000, mo[:rmq_size_cap]
55 | assert_equal 1000, mo[:rmq_inner_dist]
56 | assert_equal 1000, mo[:rmq_rescue_size]
57 | assert_in_epsilon 0.1, mo[:rmq_rescue_ratio]
58 | assert_in_epsilon 0.5, mo[:mask_level]
59 | # assert_equal INT_MAX, mo[:mask_len]
60 | assert_in_epsilon 0.8, mo[:pri_ratio]
61 | assert_equal 5, mo[:best_n]
62 | assert_in_epsilon 0.15, mo[:alt_drop]
63 | assert_equal 2, mo[:a]
64 | assert_equal 4, mo[:b]
65 | assert_equal 4, mo[:q]
66 | assert_equal 2, mo[:e]
67 | assert_equal 24, mo[:q2]
68 | assert_equal 1, mo[:e2]
69 | assert_equal 1, mo[:sc_ambi]
70 | assert_equal 0, mo[:noncan]
71 | assert_equal 0, mo[:junc_pen]
72 | assert_equal 0, mo[:junc_bonus]
73 | assert_equal 400, mo[:zdrop]
74 | assert_equal 200, mo[:zdrop_inv]
75 | assert_equal(-1, mo[:end_bonus])
76 | assert_equal (mo[:min_chain_score] * mo[:a]), mo[:min_dp_max]
77 | assert_equal 200, mo[:min_ksw_len]
78 | assert_equal 20, mo[:anchor_ext_len]
79 | assert_equal 6, mo[:anchor_ext_shift]
80 | assert_in_epsilon 1.0, mo[:max_clip_ratio]
81 | assert_equal 500, mo[:rank_min_len]
82 | assert_in_epsilon 0.9, mo[:rank_frac]
83 | assert_equal 0, mo[:pe_ori]
84 | assert_equal 33, mo[:pe_bonus]
85 | assert_equal 3, mo[:jump_min_match]
86 | assert_in_epsilon 0.0002, mo[:mid_occ_frac]
87 | assert_in_epsilon 0.01, mo[:q_occ_frac]
88 | assert_equal 10, mo[:min_mid_occ]
89 | assert_equal 1_000_000, mo[:max_mid_occ]
90 | assert_equal 0, mo[:mid_occ]
91 | assert_equal 0, mo[:max_occ]
92 | assert_equal 4095, mo[:max_max_occ]
93 | assert_equal 500, mo[:occ_dist]
94 | assert_equal 500_000_000, mo[:mini_batch_size]
95 | assert_equal 100_000_000, mo[:max_sw_mat]
96 | assert_equal 500_000_000, mo[:cap_kalloc]
97 | assert_nil mo[:split_prefix]
98 | end
99 |
100 | def test_idxseq
101 | obj = MM2::FFI::IdxSeq.new
102 | assert_instance_of MM2::FFI::IdxSeq, obj
103 | assert_nil obj[:name]
104 | assert_equal 0, obj[:offset]
105 | assert_equal 0, obj[:len]
106 | assert_equal 0, obj[:is_alt]
107 | end
108 |
109 | def test_idx
110 | obj = MM2::FFI::Idx.new
111 | assert_instance_of MM2::FFI::Idx, obj
112 | assert_equal 0, obj[:b]
113 | assert_equal 0, obj[:w]
114 | assert_equal 0, obj[:flag]
115 | assert_equal 0, obj[:n_seq]
116 | assert_equal 0, obj[:index]
117 | assert_equal 0, obj[:n_alt]
118 | assert_equal true, obj[:seq].null?
119 | assert_equal true, obj[:S].null?
120 | assert_equal true, obj[:B].null?
121 | assert_equal true, obj[:I].null?
122 | assert_equal true, obj[:spsc].null?
123 | assert_equal true, obj[:J].null?
124 | assert_equal true, obj[:km].null?
125 | assert_equal true, obj[:h].null?
126 | end
127 |
128 | def test_Reader
129 | obj = MM2::FFI::IdxReader.new
130 | assert_instance_of MM2::FFI::IdxReader, obj
131 | assert_equal 0, obj[:is_idx]
132 | assert_equal 0, obj[:n_parts]
133 | assert_equal 0, obj[:idx_size]
134 | assert_instance_of MM2::FFI::IdxOpt, obj[:opt]
135 | assert_equal true, obj[:fp_out].null?
136 | assert_equal true, obj[:seq_or_idx].null?
137 | end
138 |
139 | def test_extra
140 | cigar = [4, 5, 6]
141 | obj = MM2::FFI::Extra.new(::FFI::MemoryPointer.new(MM2::FFI::Extra.size + ::FFI.type_size(:uint32) * cigar.size))
142 | assert_instance_of MM2::FFI::Extra, obj
143 | assert_equal 0, obj[:capacity]
144 | assert_equal 0, obj[:dp_score]
145 | assert_equal 0, obj[:dp_max]
146 | assert_equal 0, obj[:dp_max2]
147 | # assert_equal 0, obj[:n_ambi_trans_strand]
148 | assert_equal 0, obj[:n_ambi]
149 | assert_equal 0, obj[:trans_strand]
150 | cigar = [4, 5, 6]
151 | obj[:n_cigar] = cigar.size
152 | obj.pointer.put_array_of_uint32(obj.size, cigar)
153 | assert_equal cigar, obj.cigar
154 | end
155 |
156 | def test_reg1
157 | obj = MM2::FFI::Reg1.new
158 | assert_instance_of MM2::FFI::Reg1, obj
159 | assert_equal 0, obj[:id]
160 | assert_equal 0, obj[:cnt]
161 | assert_equal 0, obj[:rid]
162 | assert_equal 0, obj[:score]
163 | assert_equal 0, obj[:qs]
164 | assert_equal 0, obj[:qe]
165 | assert_equal 0, obj[:rs]
166 | assert_equal 0, obj[:re]
167 | assert_equal 0, obj[:parent]
168 | assert_equal 0, obj[:subsc]
169 | assert_equal 0, obj[:as]
170 | assert_equal 0, obj[:mlen]
171 | assert_equal 0, obj[:blen]
172 | assert_equal 0, obj[:n_sub]
173 | assert_equal 0, obj[:score0]
174 | # assert_equal 0, obj[:fields]
175 | assert_equal 0, obj[:hash]
176 | assert_equal 0, obj[:div]
177 | assert_equal true, obj[:p].null?
178 |
179 | assert_equal 0, obj[:mapq]
180 | assert_equal 0, obj[:split]
181 | assert_equal 0, obj[:rev]
182 | assert_equal 0, obj[:inv]
183 | assert_equal 0, obj[:sam_pri]
184 | assert_equal 0, obj[:proper_frag]
185 | assert_equal 0, obj[:pe_thru]
186 | assert_equal 0, obj[:seg_split]
187 | assert_equal 0, obj[:seg_id]
188 | assert_equal 0, obj[:split_inv]
189 | assert_equal 0, obj[:is_alt]
190 | assert_equal 0, obj[:strand_retained]
191 | assert_equal 0, obj[:is_spliced]
192 | assert_equal 0, obj[:dummy]
193 | end
194 |
195 | def test_tbuf
196 | obj = MM2::FFI::TBuf.new
197 | assert_instance_of MM2::FFI::TBuf, obj
198 | assert_equal true, obj[:km].null?
199 | assert_equal 0, obj[:rep_len]
200 | assert_equal 0, obj[:frag_gap]
201 | end
202 |
203 | def test_mm_set_opt_0
204 | iopt = MM2::FFI::IdxOpt.new
205 | mopt = MM2::FFI::MapOpt.new
206 | MM2::FFI.mm_set_opt(nil, iopt, mopt)
207 | assert_equal [15, 10, 0, 14, 50_000_000, 8_000_000_000], iopt.values
208 | end
209 |
210 | def test_mm_set_opt_short
211 | iopt = MM2::FFI::IdxOpt.new
212 | mopt = MM2::FFI::MapOpt.new
213 | MM2::FFI.mm_set_opt("short", iopt, mopt)
214 | assert_equal [21, 11, 0, 0, 0, 0], iopt.values
215 | assert MM2::FFI.mm_set_opt(":asm10", iopt, mopt)
216 | end
217 | end
218 |
--------------------------------------------------------------------------------
/test/minimap2/version_test.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require_relative "../test_helper"
4 | class VersionTest < Minitest::Test
5 | def test_version
6 | refute_nil ::Minimap2::VERSION
7 | end
8 | end
9 |
--------------------------------------------------------------------------------
/test/minimap2_test.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | require "test_helper"
4 |
5 | MM2 = Minimap2
6 |
7 | class Minimap2Test < Minitest::Test
8 | def test_that_it_has_a_version_number
9 | refute_nil ::Minimap2::VERSION
10 | end
11 |
12 | # unique features of ruby bindings
13 |
14 | def test_execute_with_string_arg
15 | assert_equal 0, MM2.execute("--version")
16 | assert_equal 1, MM2.execute("--lh 3")
17 | # After executing the "--version" command, the verbosity is changed to 3.
18 | # To prevent test_get_verbose from failing, set it back to 1.
19 | MM2.verbose = 1
20 | end
21 |
22 | def test_if_minimap2_version_numbers_match
23 | begin
24 | out, err = capture_subprocess_io do
25 | pid = fork do
26 | MM2.execute("--version")
27 | end
28 | Process.waitpid(pid)
29 | end
30 | rescue NotImplementedError
31 | # Windows does not support fork.
32 | skip "Fork not supported on this platform"
33 | end
34 | assert_match(/^[\d.\-r]+\n/, out)
35 | # The version number of the gem should match the version number of the
36 | # Minimap2 shared library. Prevent version mismatch before release.
37 | assert_includes Minimap2::VERSION, out.split("-r")[0]
38 | assert_equal "", err
39 | end
40 |
41 | # mappy
42 |
43 | def test_fastx_read
44 | n1, s1, n2, s2 = File.readlines("ext/minimap2/test/q-inv.fa").map(&:chomp)
45 | names = [n1, n2].map { |n| n.sub(">", "") }
46 | seqs = [s1, s2]
47 | MM2.fastx_read("ext/minimap2/test/q-inv.fa") do |n, s|
48 | assert_equal names.shift, n
49 | assert_equal seqs.shift, s
50 | end
51 | # comment should be nil if there is no comment.
52 | MM2.fastx_read("ext/minimap2/test/q-inv.fa", comment: true) do |_n, _s, c|
53 | assert_nil c
54 | end
55 | end
56 |
57 | def test_fastx_read_comment
58 | require "tempfile"
59 | require "zlib"
60 | Tempfile.create("comment.fq.gz") do |fq|
61 | Zlib::GzipWriter.open(fq.path) do |gz|
62 | gz.write <<~FASTQ
63 | >chat katze
64 | CATCATCATCAT
65 | +
66 | GATOGATOGATO
67 | FASTQ
68 | end
69 | MM2.fastx_read(fq.path, comment: true) do |n, s, q, c|
70 | assert_equal "chat", n
71 | assert_equal "CATCATCATCAT", s
72 | assert_equal "GATOGATOGATO", q
73 | assert_equal "katze", c
74 | end
75 | end
76 | end
77 |
78 | def test_fastx_read_comment_enumerator
79 | require "tempfile"
80 | require "zlib"
81 | Tempfile.create("comment.fq.gz") do |fq|
82 | Zlib::GzipWriter.open(fq.path) do |gz|
83 | gz.write <<~FASTQ
84 | >chat katze
85 | CATCATCATCAT
86 | +
87 | GATOGATOGATO
88 | FASTQ
89 | end
90 | enum = MM2.fastx_read(fq.path, comment: true)
91 | arr = enum.to_a
92 | n, s, q, c = arr[0]
93 | assert_equal 1, arr.size
94 | assert_equal "chat", n
95 | assert_equal "CATCATCATCAT", s
96 | assert_equal "GATOGATOGATO", q
97 | assert_equal "katze", c
98 | end
99 | end
100 |
101 | def test_revcomp
102 | assert_equal "TCCCAAAGGGTTT", MM2.revcomp("AAACCCTTTGGGA")
103 | end
104 |
105 | def test_get_verbose
106 | assert_equal 1, MM2.verbose
107 | end
108 |
109 | def test_set_verbose
110 | assert_equal 3, MM2.verbose = 3
111 | assert_equal 3, MM2.verbose
112 | assert_equal 1, MM2.verbose = 1
113 | end
114 | end
115 |
--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 |
3 | $LOAD_PATH.unshift File.expand_path("../lib", __dir__)
4 | require "minimap2"
5 |
6 | require "minitest/autorun"
7 | require "minitest/pride"
8 |
--------------------------------------------------------------------------------