├── test
    ├── q2.fa
    ├── t2.fa
    └── t-inv.fa
├── pyproject.toml
├── tex
    ├── graphmap.eval
    ├── mm2.approx.eval
    ├── mm2.eval
    ├── blasr-mc.eval
    ├── Makefile
    ├── eval2roc.pl
    ├── ngmlr.eval
    ├── bowtie2-s3.sam.eval
    ├── minialign.eval
    ├── hs38-simu.sh
    ├── bwa.eval
    ├── bwa-s3.sam.eval
    ├── roc.gp
    ├── mm2-s3.sam.eval
    └── snap-s3.sam.eval
├── .gitmodules
├── .gitignore
├── MANIFEST.in
├── kthread.h
├── .github
    └── workflows
    │   └── ci.yaml
├── gpu
    ├── plrange.cuh
    ├── plscore.cuh
    ├── planalyze.cuh
    ├── plchain.h
    ├── orin32GB.json
    ├── a6000_config.json
    ├── mi210_over50k_config.json
    ├── gfx1030_config.json
    ├── mi210_below50k_config.json
    ├── gpu_config.json
    ├── gpu.mk
    ├── hipify.cuh
    ├── debug.h
    ├── plutils.h
    ├── plmem.cuh
    └── planalyze.cu
├── sdust.h
├── LICENSE.txt
├── code_of_conduct.md
├── python
    ├── minimap2.py
    ├── cmappy.pxd
    ├── cmappy.h
    └── README.rst
├── bseq.h
├── esterr.c
├── setup.py
├── FAQ.md
├── example.c
├── kalloc.h
├── splitidx.c
├── Makefile.simde
├── kvec.h
├── ketopt.h
├── seed.c
├── ksw2_dispatch.c
├── kdq.h
├── kthread.c
├── ksw2_ll_sse.c
├── misc.c
├── ksort.h
├── bseq.c
├── Makefile
├── sketch.c
├── pe.c
├── mmpriv.h
├── sdust.c
├── misc
    └── README.md
├── kalloc.c
└── ksw2.h


/test/q2.fa:
--------------------------------------------------------------------------------
1 | >q2
2 | GGACATCCCGATGGTGCAGTCCTACCTGTACGAAAGGAC
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "Cython"]
3 | 


--------------------------------------------------------------------------------
/test/t2.fa:
--------------------------------------------------------------------------------
1 | >t2
2 | GGACATCCCGATGGTGCAGgtGCTATTAAAGGTTCGTTTGTTCAACGATTAAagTCCTACCTGTACGAAAGGAC
3 | 


--------------------------------------------------------------------------------
/tex/graphmap.eval:
--------------------------------------------------------------------------------
1 | Q	40	31897	63	0.001975107
2 | Q	3	423	267	0.010210396
3 | Q	2	162	120	0.013853827
4 | Q	1	188	172	0.019038874
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/simde"]
2 | 	path = lib/simde
3 | 	url = https://github.com/nemequ/simde.git
4 | [submodule "cJSON"]
5 | 	path = cJSON
6 | 	url = https://github.com/DaveGamble/cJSON.git
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .cproject
 2 | .project
 3 | .*.swp
 4 | *.a
 5 | *.o
 6 | *.dSYM
 7 | minimap2
 8 | mappy.c
 9 | data
10 | .vscode/**
11 | test.sam
12 | *.sam
13 | Log/**
14 | debug/**
15 | verf
16 | trace
17 | ncu
18 | nsys
19 | *_output*
20 | workloads
21 | .cmake/**
22 | .depend


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.h
 2 | include Makefile
 3 | include ksw2_dispatch.c
 4 | include main.c
 5 | include README.md
 6 | include sse2neon/emmintrin.h
 7 | include python/cmappy.h
 8 | include python/cmappy.pxd
 9 | include python/mappy.pyx
10 | include python/README.rst
11 | 


--------------------------------------------------------------------------------
/kthread.h:
--------------------------------------------------------------------------------
 1 | #ifndef KTHREAD_H
 2 | #define KTHREAD_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
 9 | void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
10 | 
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/tex/mm2.approx.eval:
--------------------------------------------------------------------------------
 1 | Q	60	32084	0	0.000000000	32084
 2 | Q	24	318	2	0.000061725	32402
 3 | Q	11	98	2	0.000123077	32500
 4 | Q	8	37	2	0.000184405	32537
 5 | Q	7	37	3	0.000276294	32574
 6 | Q	6	40	3	0.000367940	32614
 7 | Q	5	34	2	0.000428816	32648
 8 | Q	4	37	5	0.000581306	32685
 9 | Q	3	28	6	0.000764222	32713
10 | Q	2	38	6	0.000946536	32751
11 | Q	1	50	21	0.001585318	32801
12 | Q	0	286	150	0.006105117	33087
13 | 


--------------------------------------------------------------------------------
/tex/mm2.eval:
--------------------------------------------------------------------------------
 1 | Q	60	32477	0	0.000000000	32477
 2 | Q	22	16	1	0.000030776	32493
 3 | Q	21	44	1	0.000061468	32537
 4 | Q	19	73	1	0.000091996	32610
 5 | Q	14	66	1	0.000122414	32676
 6 | Q	10	26	3	0.000214054	32702
 7 | Q	8	14	1	0.000244529	32716
 8 | Q	7	13	2	0.000305539	32729
 9 | Q	6	47	1	0.000335611	32776
10 | Q	3	10	1	0.000366010	32786
11 | Q	2	20	2	0.000426751	32806
12 | Q	1	248	94	0.003267381	33054
13 | Q	0	31	17	0.003778147	33085
14 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |   pull_request:
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         compiler: [gcc, clang]
15 | 
16 |     steps:
17 |     - name: Checkout minimap2
18 |       uses: actions/checkout@v2
19 | 
20 |     - name: Compile with ${{ matrix.compiler }}
21 |       run:  make CC=${{ matrix.compiler }}
22 | 


--------------------------------------------------------------------------------
/tex/blasr-mc.eval:
--------------------------------------------------------------------------------
 1 | Q	60	32681	57	0.001744133
 2 | Q	39	3	1	0.001774569
 3 | Q	38	3	1	0.001804999
 4 | Q	35	5	1	0.001835311
 5 | Q	34	31	2	0.001894692
 6 | Q	20	11	2	0.001955154
 7 | Q	19	4	1	0.001985460
 8 | Q	15	29	5	0.002136296
 9 | Q	14	6	1	0.002166417
10 | Q	10	11	1	0.002196193
11 | Q	6	11	2	0.002256442
12 | Q	5	1	1	0.002286864
13 | Q	4	1	1	0.002317285
14 | Q	3	36	15	0.002771602
15 | Q	2	5	2	0.002832085
16 | Q	1	12	9	0.003105023
17 | Q	0	220	83	0.005594194
18 | 


--------------------------------------------------------------------------------
/tex/Makefile:
--------------------------------------------------------------------------------
 1 | .SUFFIXES: .gp .tex .eps .pdf .eps.gz
 2 | 
 3 | .eps.pdf:
 4 | 		epstopdf --outfile $@ $<
 5 | 
 6 | .eps.gz.pdf:
 7 | 		gzip -dc $< | epstopdf --filter > $@
 8 | 
 9 | .pdf.eps:
10 | 		pdftops -eps $< $@
11 | 
12 | all:minimap2.pdf
13 | 
14 | roc-color.eps:roc.gp
15 | 		gnuplot roc.gp
16 | 
17 | minimap2.pdf:minimap2.tex minimap2.bib roc-color.pdf
18 | 		pdflatex minimap2; bibtex minimap2; pdflatex minimap2; pdflatex minimap2;
19 | 
20 | clean:
21 | 		rm -fr *.toc *.aux *.bbl *.blg *.idx *.log *.out *~ minimap2.pdf
22 | 


--------------------------------------------------------------------------------
/gpu/plrange.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _PLRANGE_CUH_
 2 | #define _PLRANGE_CUH_
 3 | 
 4 | #include "plmem.cuh"
 5 | #include <assert.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | typedef __int32_t int32_t;
12 | 
13 | /* functions declaration */
14 | void plrange_upload_misc(Misc misc);
15 | void plrange_async_range_selection(deviceMemPtr* device_mem_ptr, cudaStream_t* stream);
16 | void plrange_sync_range_selection(deviceMemPtr* dev_mem, Misc misc);
17 | 
18 | extern range_kernel_config_t range_kernel_config;
19 | 
20 | 
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | 
25 | #endif  // _PLRANGE_CUH_
26 | 


--------------------------------------------------------------------------------
/gpu/plscore.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef _PLSCORE_CUH_
 2 | #define _PLSCORE_CUH_
 3 | 
 4 | #include "plmem.cuh"
 5 | #include "mmpriv.h"
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C"{
 9 | #endif
10 | 
11 | #define MM_QSPAN 15 
12 | 
13 | void plscore_upload_misc(Misc misc);
14 | void plscore_async_naive_forward_dp(deviceMemPtr* dev_mem, cudaStream_t* stream);
15 | void plscore_async_short_mid_forward_dp(deviceMemPtr* dev_mem,cudaStream_t* stream);
16 | void plscore_async_long_forward_dp(deviceMemPtr* dev_mem,cudaStream_t* stream);
17 | 
18 | extern score_kernel_config_t score_kernel_config;
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif // _PLSCORE_CUH_


--------------------------------------------------------------------------------
/gpu/planalyze.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef __PLANALYZE_H__
 2 | #define __PLANALYZE_H__
 3 | 
 4 | /* Implement kernel performance analysis that requires extra device
 5 |  * synchornization. disabled unless DEBUG_LEVEL is set to analyze.
 6 |  * Enable individual verbose prints in planalyze.cu 
 7 |  */
 8 | 
 9 | #include "hipify.cuh"
10 | #include "plchain.h"
11 | #include "plutils.h"
12 | #include "plmem.cuh"
13 | #include "plscore.cuh"
14 | 
15 | 
16 | #ifdef DEBUG_CHECK
17 | void planalyze_short_kernel(stream_ptr_t stream, int uid, float throughput[]);
18 | void planalyze_long_kernel(stream_ptr_t stream, float* throughput);
19 | #endif // DEBUG_CHECK
20 | 
21 | #endif // __PLANALYZE_H__


--------------------------------------------------------------------------------
/sdust.h:
--------------------------------------------------------------------------------
 1 | #ifndef SDUST_H
 2 | #define SDUST_H
 3 | 
 4 | #include <stdint.h>
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | struct sdust_buf_s;
11 | typedef struct sdust_buf_s sdust_buf_t;
12 | 
13 | // the simple interface
14 | uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n);
15 | 
16 | // the following interface dramatically reduce heap allocations when sdust is frequently called.
17 | sdust_buf_t *sdust_buf_init(void *km);
18 | void sdust_buf_destroy(sdust_buf_t *buf);
19 | const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf);
20 | 
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/tex/eval2roc.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Getopt::Std;
 6 | 
 7 | my %opts = (n=>33088, s=>100);
 8 | getopts('n:', \%opts);
 9 | 
10 | my $pseudo = .5;
11 | my $tot = $pseudo;
12 | my $err = $pseudo;
13 | my $tot_last_out = -$opts{s};
14 | my $state = 0;
15 | my $mapq = 0;
16 | while (<>) {
17 | 	chomp;
18 | 	if (/^Q\t(\d+)\t(\d+)\t(\d+)/) {
19 | 		$tot += $2;
20 | 		$err += $3;
21 | 		if ($tot - $tot_last_out >= $opts{s}) {
22 | 			print join("\t", $1, $err/$tot, $tot / $opts{n}), "\n";
23 | 			$tot_last_out = $tot;
24 | 			$state = 0;
25 | 		} else {
26 | 			$state = 1;
27 | 			$mapq = $1;
28 | 		}
29 | 	}
30 | }
31 | if ($state) {
32 | 	print join("\t", $mapq, $err/$tot, $tot / $opts{n}), "\n";
33 | }
34 | 


--------------------------------------------------------------------------------
/gpu/plchain.h:
--------------------------------------------------------------------------------
 1 | #ifndef _PLCHAIN_H_
 2 | #define _PLCHAIN_H_
 3 | 
 4 | /* Range Kernel configuaration */
 5 | typedef struct range_kernel_config_t {
 6 |     int blockdim;           // number of threads in each block
 7 |     int cut_check_anchors;  // number of anchors to check around each cut
 8 |     int anchor_per_block;   // number of anchors assgined to one block = max_it * blockdim
 9 | } range_kernel_config_t;
10 | 
11 | /* Score Generation Kernel configuration */
12 | typedef struct score_kernel_config_t{
13 |     int micro_batch;
14 |     int short_blockdim;
15 |     int long_blockdim;
16 |     int mid_blockdim;
17 |     int short_griddim;
18 |     int long_griddim;
19 |     int mid_griddim;
20 |     int cut_unit;
21 |     int long_seg_cutoff;
22 |     int mid_seg_cutoff;
23 | } score_kernel_config_t;
24 | 
25 | #endif // _PLCHAIN_H_


--------------------------------------------------------------------------------
/tex/ngmlr.eval:
--------------------------------------------------------------------------------
 1 | Q	60	23616	0	0.000000000
 2 | Q	45	3520	1	0.000036851
 3 | Q	41	1840	1	0.000069023
 4 | Q	37	328	2	0.000136500
 5 | Q	36	276	1	0.000169033
 6 | Q	35	480	1	0.000199601
 7 | Q	33	375	2	0.000262855
 8 | Q	31	178	2	0.000326659
 9 | Q	30	153	5	0.000487551
10 | Q	29	200	1	0.000516696
11 | Q	27	100	3	0.000611601
12 | Q	26	93	3	0.000706056
13 | Q	25	75	2	0.000768393
14 | Q	24	82	1	0.000798314
15 | Q	23	80	6	0.000987387
16 | Q	22	71	6	0.001175835
17 | Q	21	76	7	0.001394921
18 | Q	20	63	9	0.001676897
19 | Q	19	55	4	0.001800322
20 | Q	18	62	8	0.002048987
21 | Q	17	55	7	0.002265718
22 | Q	16	60	10	0.002575539
23 | Q	15	82	9	0.002850877
24 | Q	14	67	7	0.003063745
25 | Q	13	62	11	0.003401042
26 | Q	12	64	13	0.003799084
27 | Q	11	56	5	0.003947900
28 | Q	10	58	17	0.004468303
29 | Q	9	70	22	0.005139796
30 | Q	8	23	9	0.005414604
31 | Q	7	41	17	0.005933068
32 | Q	6	42	18	0.006480881
33 | Q	5	33	9	0.006751757
34 | Q	4	29	9	0.007022948
35 | Q	3	27	15	0.007478764
36 | Q	2	23	10	0.007781024
37 | Q	1	9	2	0.007840364
38 | Q	0	13	8	0.008083105
39 | 


--------------------------------------------------------------------------------
/tex/bowtie2-s3.sam.eval:
--------------------------------------------------------------------------------
 1 | Q	42	16872292	669	0.000039651	16872292
 2 | Q	40	835329	636	0.000073697	17707621
 3 | Q	31	6544	2	0.000073783	17714165
 4 | Q	30	8882	6	0.000074084	17723047
 5 | Q	27	68499	9	0.000074305	17791546
 6 | Q	26	132041	81	0.000078277	17923587
 7 | Q	25	129378	96	0.000083033	18052965
 8 | Q	24	92056	382	0.000103665	18145021
 9 | Q	23	14341	402	0.000125720	18159362
10 | Q	22	132838	146	0.000132789	18292200
11 | Q	21	122274	124	0.000138641	18414474
12 | Q	18	112183	103	0.000143361	18526657
13 | Q	17	126981	213	0.000153804	18653638
14 | Q	16	16356	208	0.000164810	18669994
15 | Q	15	42804	782	0.000206223	18712798
16 | Q	14	16026	318	0.000223025	18728824
17 | Q	12	170250	814	0.000264087	18899074
18 | Q	11	48351	1409	0.000337777	18947425
19 | Q	8	1843	311	0.000354156	18949268
20 | Q	7	62266	4435	0.000586276	19011534
21 | Q	6	413997	50057	0.003150647	19425531
22 | Q	5	404	58	0.003153568	19425935
23 | Q	4	704	154	0.003161381	19426639
24 | Q	3	1473	681	0.003196193	19428112
25 | Q	2	17541	16462	0.004039875	19445653
26 | Q	1	534344	354879	0.021693547	19979997
27 | Q	0	11939	9917	0.022176642	19991936
28 | U	8064
29 | 


--------------------------------------------------------------------------------
/gpu/orin32GB.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "//config is for": "orin32GB. Fits one micro-batch + 3% x 4 long buffer",
 3 |     "num_streams": 1,
 4 |     "min_n": 10, 
 5 |     "//min_n": "queries with less anchors will be handled on cpu",
 6 |     "long_seg_buffer_size": 10000000,
 7 |     "max_total_n": 5000000, 
 8 |     "max_read": 4000,
 9 |     "avg_read_n": 4000,
10 |     "//avg_read_n": "expect average number of anchors per read, not used if max_total_n and max_read are specified",
11 |     "range_kernel": {
12 |         "blockdim": 128,
13 |         "cut_check_anchors": 12,
14 |         "//cut_check_anchors": "Number of anchors to check to attemp a cut",
15 |         "anchor_per_block": 12800,
16 |         "//anchor_per_block": "Number of anchors each block handle. Must be int * blockdim"
17 |     },
18 |     "score_kernel": {
19 |         "//host Memory Warning: ": "make sure your host memory size is at least micro_batch * 48GB * 2 ",
20 |         "micro_batch": 8,
21 |         "mid_blockdim": 256,
22 |         "short_griddim": 128,
23 |         "long_griddim": 512,
24 |         "mid_griddim": 256,
25 |         "long_seg_cutoff": 20,
26 |         "mid_seg_cutoff": 3
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/gpu/a6000_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "//config is for": "a6000. Fits one micro-batch + 3% x 4 long buffer",
 3 |     "num_streams": 1,
 4 |     "min_n": 512, 
 5 |     "//min_n": "queries with less anchors will be handled on cpu",
 6 |     "long_seg_buffer_size": 500000000,
 7 |     "max_total_n": 1700000000, 
 8 |     "max_read": 170000,
 9 |     "avg_read_n": 10000,
10 |     "//avg_read_n": "expect average number of anchors per read, not used if max_total_n and max_read are specified",
11 |     "range_kernel": {
12 |         "blockdim": 512,
13 |         "cut_check_anchors": 10,
14 |         "//cut_check_anchors": "Number of anchors to check to attemp a cut",
15 |         "anchor_per_block": 32768,
16 |         "//anchor_per_block": "Number of anchors each block handle. Must be int * blockdim"
17 |     },
18 |     "score_kernel": {
19 |         "//host Memory Warning: ": "make sure your host memory size is at least micro_batch * 48GB * 2 ",
20 |         "micro_batch": 4,
21 |         "mid_blockdim": 512,
22 |         "short_griddim": 2688,
23 |         "long_griddim": 1024,
24 |         "mid_griddim": 2688,
25 |         "long_seg_cutoff": 20,
26 |         "mid_seg_cutoff": 3
27 |     }
28 | }


--------------------------------------------------------------------------------
/gpu/mi210_over50k_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "//config is for": "aac cloud. Fits one batch + 5% x 4 long buffer avg_read_n 10k",
 3 |     "num_streams": 1,
 4 |     "min_n": 512, 
 5 |     "//min_n": "queries with less anchors will be handled on cpu",
 6 |     "long_seg_buffer_size": 1117376000,
 7 |     "max_total_n": 2036880000, 
 8 |     "max_read": 2036880,
 9 |     "avg_read_n": 20000,
10 |     "//avg_read_n": "expect average number of anchors per read, not used if max_total_n and max_read are specified",
11 |     "range_kernel": {
12 |         "blockdim": 512,
13 |         "cut_check_anchors": 10,
14 |         "//cut_check_anchors": "Number of anchors to check to attemp a cut",
15 |         "anchor_per_block": 32768,
16 |         "//anchor_per_block": "Number of anchors each block handle. Must be int * blockdim"
17 |     },
18 |     "score_kernel": {
19 |         "micro_batch": 6,
20 |         "mid_blockdim": 512,
21 |         "short_griddim": 3328,
22 |         "mid_griddim": 3328,
23 |         "long_griddim": 104,
24 |         "//long reads benefit from less blocks": "long_griddim = num of CUs",
25 |         "long_seg_cutoff": 20,
26 |         "mid_seg_cutoff": 3
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/gpu/gfx1030_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "//config is for": "AMD Radeon RX 6800 XT on amdxfx. Fits one batch + 5% x 4 long buffer avg_read_n 10k",
 3 |     "num_streams": 1,
 4 |     "min_n": 512, 
 5 |     "//min_n": "queries with less anchors will be handled on cpu",
 6 |     "long_seg_buffer_size": 100000000,
 7 |     "max_total_n": 493440000, 
 8 |     "max_read": 493440,
 9 |     "avg_read_n": 20000,
10 |     "//avg_read_n": "expect average number of anchors per read, not used if max_total_n and max_read are specified",
11 |     "range_kernel": {
12 |         "blockdim": 512,
13 |         "cut_check_anchors": 10,
14 |         "//cut_check_anchors": "Number of anchors to check to attemp a cut",
15 |         "anchor_per_block": 32768,
16 |         "//anchor_per_block": "Number of anchors each block handle. Must be int * blockdim"
17 |     },
18 |     "score_kernel": {
19 |         "micro_batch": 4,
20 |         "mid_blockdim": 512,
21 |         "//blockdim config": "options are not used: static config specified at compile time (make ... LONG_BLOCK_SIZE=1024)",
22 |         "short_griddim": 2688,
23 |         "long_griddim": 144,
24 |         "mid_griddim": 2688,
25 |         "long_seg_cutoff": 20,
26 |         "mid_seg_cutoff": 3
27 |     }
28 | }


--------------------------------------------------------------------------------
/gpu/mi210_below50k_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "//config is for": "aac cloud. Fits one batch + 5% x 4 long buffer avg_read_n 10k",
 3 |     "num_streams": 1,
 4 |     "min_n": 512, 
 5 |     "//min_n": "queries with less anchors will be handled on cpu",
 6 |     "long_seg_buffer_size": 1117376000,
 7 |     "max_total_n": 2036880000, 
 8 |     "max_read": 2036880,
 9 |     "avg_read_n": 50000,
10 |     "//avg_read_n": "expect average number of anchors per read, not used if max_total_n and max_read are specified",
11 |     "range_kernel": {
12 |         "blockdim": 512,
13 |         "cut_check_anchors": 10,
14 |         "//cut_check_anchors": "Number of anchors to check to attemp a cut",
15 |         "anchor_per_block": 32768,
16 |         "//anchor_per_block": "Number of anchors each block handle. Must be int * blockdim"
17 |     },
18 |     "score_kernel": {
19 |         "micro_batch": 6,
20 |         "mid_blockdim": 512,
21 |         "//static options for mid_blockdim": "128/256/512/1024",
22 |         "short_griddim": 3328,
23 |         "mid_griddim": 3328,
24 |         "long_griddim": 208,
25 |         "//normal reads benefit from more blocks": "long_griddim = 2 * num of CUs",
26 |         "long_seg_cutoff": 20,
27 |         "mid_seg_cutoff": 3
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/tex/minialign.eval:
--------------------------------------------------------------------------------
 1 | Q	60	32070	190	0.005924540
 2 | Q	59	62	2	0.005975352
 3 | Q	58	37	5	0.006123908
 4 | Q	57	40	7	0.006333633
 5 | Q	56	39	6	0.006512032
 6 | Q	55	32	2	0.006567534
 7 | Q	54	54	2	0.006618420
 8 | Q	53	33	4	0.006735255
 9 | Q	52	39	2	0.006788866
10 | Q	51	48	3	0.006871264
11 | Q	50	34	2	0.006925634
12 | Q	49	32	3	0.007011070
13 | Q	48	35	2	0.007064967
14 | Q	47	36	4	0.007179896
15 | Q	46	23	1	0.007205495
16 | Q	45	25	1	0.007230614
17 | Q	44	17	3	0.007318716
18 | Q	43	17	2	0.007376121
19 | Q	42	31	5	0.007522016
20 | Q	41	25	4	0.007638486
21 | Q	40	26	4	0.007754541
22 | Q	39	35	2	0.007807258
23 | Q	37	18	4	0.007924896
24 | Q	36	13	3	0.008013162
25 | Q	35	15	2	0.008070411
26 | Q	34	20	3	0.008156805
27 | Q	33	11	1	0.008184501
28 | Q	32	15	3	0.008272003
29 | Q	31	25	1	0.008296107
30 | Q	29	8	1	0.008324472
31 | Q	28	7	2	0.008383452
32 | Q	27	9	2	0.008441894
33 | Q	26	30	2	0.008494888
34 | Q	23	2	1	0.008524710
35 | Q	22	11	3	0.008612846
36 | Q	20	23	3	0.008697760
37 | Q	19	6	1	0.008726479
38 | Q	18	8	1	0.008754658
39 | Q	16	6	1	0.008783354
40 | Q	13	2	1	0.008813108
41 | Q	12	4	2	0.008872604
42 | Q	11	7	2	0.008931275
43 | Q	10	4	3	0.009021009
44 | Q	9	6	4	0.009140436
45 | Q	8	6	3	0.009229559
46 | Q	7	5	1	0.009258419
47 | Q	6	8	3	0.009346925
48 | Q	4	8	5	0.009495872
49 | Q	3	17	8	0.009732801
50 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2018-     Dana-Farber Cancer Institute
 4 |               2017-2018 Broad Institute, Inc.
 5 |               2022      Advanced Micro Devices, Inc.
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining
 8 | a copy of this software and associated documentation files (the
 9 | "Software"), to deal in the Software without restriction, including
10 | without limitation the rights to use, copy, modify, merge, publish,
11 | distribute, sublicense, and/or sell copies of the Software, and to
12 | permit persons to whom the Software is furnished to do so, subject to
13 | the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be
16 | included in all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/tex/hs38-simu.sh:
--------------------------------------------------------------------------------
 1 | ./pbsim --prefix pb-1 --depth 0.1 --sample-fastq m131017_060208_42213_c100579642550000001823095604021496_s1_p0.1.subreads.fastq --length-min 1000 --length-max 30000 --seed 11 hs38.fa
 2 | 
 3 | bin/mason_variator -ir hs38.fa -s 1 -ov hs38-s1.vcf --snp-rate 1e-3 --small-indel-rate 2e-4 --sv-indel-rate 0 --sv-inversion-rate 0 --sv-translocation-rate 0 --sv-duplication-rate 0 --max-small-indel-size 10
 4 | bin/mason_simulator -ir hs38.fa -iv hs38-s1.vcf -n 1000000 --seed 1 -o s1_1.fq -or s1_2.fq -oa s1.sam --illumina-prob-mismatch-scale 2.5
 5 | 
 6 | bin/mason_variator -ir hs38.fa -s 2 -ov hs38-s2.vcf --snp-rate 1e-3 --small-indel-rate 2e-4 --sv-indel-rate 0 --sv-inversion-rate 0 --sv-translocation-rate 0 --sv-duplication-rate 0 --max-small-indel-size 10
 7 | bin/mason_simulator -ir hs38.fa -iv hs38-s2.vcf -n 1000000 --seed 2 -o mason-s2_1.fq -or mason-s2_2.fq -oa mason-s2.sam --illumina-prob-mismatch-scale 2.5 --illumina-read-length 150
 8 | 
 9 | bin/mason_variator -ir hs38.fa -s 3 -ov hs38-s3.vcf --snp-rate 1e-3 --small-indel-rate 2e-4 --sv-indel-rate 0 --sv-inversion-rate 0 --sv-translocation-rate 0 --sv-duplication-rate 0 --max-small-indel-size 10
10 | bin/mason_simulator -ir hs38.fa -iv hs38-s3.vcf -n 10000000 --seed 3 -o mason-s3_1.fq -or mason-s3_2.fq -oa mason-s3.sam --illumina-prob-mismatch-scale 2.5 --illumina-read-length 150
11 | 


--------------------------------------------------------------------------------
/gpu/gpu_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "num_streams": 1,
 3 |     "//num_streams": "Must set to 1 for current implementation of mm2-gb",
 4 |     "min_n": 512, 
 5 |     "//min_n": "queries with less anchors will be handled on cpu",
 6 |     "long_seg_buffer_size": 100000000,
 7 |     "//long_seg_buffer_size": "maximum number of anchors to fit in the aggregated long segment buffer. ",
 8 |     "max_total_n": 500000000, 
 9 |     "max_read": 500000,
10 |      "//max_total_n, max_read": "maximum number of anchors / reads to fit in one micro batch. Make sure this fits in the device memory.",
11 |     "range_kernel": {
12 |         "blockdim": 512,
13 |         "cut_check_anchors": 10,
14 |         "//cut_check_anchors": "Number of anchors to check to attemp a cut",
15 |         "anchor_per_block": 32768,
16 |         "//anchor_per_block": "Number of anchors each block handle. Must be int * blockdim"
17 |     },
18 |     "score_kernel": {
19 |         "micro_batch": 4,
20 |         "//micro_batch": "Number of micro batches to aggregate into one long kernel. Make sure your host memory size is at least micro_batch * device mem size * 2",
21 |         "mid_blockdim": 512,
22 |         "short_griddim": 2688,
23 |         "long_griddim": 144,
24 |         "//long_griddim": "To achieve the best occupancy for the long kernel, long_griddim is usually optimal at 2* No. of CUs ",
25 |         "mid_griddim": 2688,
26 |         "long_seg_cutoff": 20,
27 |         "mid_seg_cutoff": 3
28 |     }
29 | }


--------------------------------------------------------------------------------
/tex/bwa.eval:
--------------------------------------------------------------------------------
 1 | Q	60	31721	27	0.000851171
 2 | Q	59	54	4	0.000975610
 3 | Q	58	29	5	0.001131933
 4 | Q	57	21	2	0.001194030
 5 | Q	56	14	4	0.001319137
 6 | Q	55	22	6	0.001506544
 7 | Q	54	12	4	0.001631475
 8 | Q	53	16	3	0.001724733
 9 | Q	51	10	1	0.001755541
10 | Q	50	10	1	0.001786330
11 | Q	49	11	3	0.001879699
12 | Q	47	8	2	0.001941869
13 | Q	46	17	1	0.001972140
14 | Q	44	8	3	0.002065534
15 | Q	43	10	1	0.002096174
16 | Q	42	13	1	0.002126595
17 | Q	41	14	3	0.002219444
18 | Q	40	13	2	0.002281036
19 | Q	38	17	4	0.002404747
20 | Q	37	15	4	0.002528484
21 | Q	36	12	1	0.002558742
22 | Q	35	19	3	0.002650783
23 | Q	34	12	3	0.002743313
24 | Q	33	7	1	0.002773882
25 | Q	32	21	3	0.002865508
26 | Q	31	11	2	0.002926799
27 | Q	30	14	3	0.003018891
28 | Q	29	17	1	0.003048401
29 | Q	28	11	2	0.003109549
30 | Q	27	20	5	0.003262998
31 | Q	26	11	1	0.003292948
32 | Q	25	14	4	0.003415725
33 | Q	24	16	5	0.003569212
34 | Q	23	43	6	0.003750426
35 | Q	21	15	1	0.003779664
36 | Q	20	29	7	0.003992943
37 | Q	19	22	2	0.004052089
38 | Q	18	28	4	0.004172204
39 | Q	16	25	5	0.004323390
40 | Q	15	24	5	0.004474480
41 | Q	14	25	5	0.004625204
42 | Q	13	23	3	0.004714365
43 | Q	12	22	1	0.004741963
44 | Q	11	32	11	0.005075674
45 | Q	10	35	7	0.005285315
46 | Q	9	32	12	0.005648503
47 | Q	8	33	8	0.005888126
48 | Q	7	39	7	0.006095506
49 | Q	6	42	14	0.006515953
50 | Q	5	38	15	0.006966725
51 | Q	4	37	12	0.007325113
52 | Q	3	49	18	0.007862737
53 | Q	2	63	21	0.008486434
54 | Q	1	55	27	0.009292156
55 | Q	0	153	77	0.011576593
56 | 


--------------------------------------------------------------------------------
/code_of_conduct.md:
--------------------------------------------------------------------------------
 1 | ## Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, we pledge to respect all
 4 | people who contribute through reporting issues, posting feature requests,
 5 | updating documentation, submitting pull requests or patches, and other
 6 | activities.
 7 | 
 8 | We are committed to making participation in this project a harassment-free
 9 | experience for everyone, regardless of level of experience, gender, gender
10 | identity and expression, sexual orientation, disability, personal appearance,
11 | body size, race, age, or religion.
12 | 
13 | Examples of unacceptable behavior by participants include the use of sexual
14 | language or imagery, derogatory comments or personal attacks, trolling, public
15 | or private harassment, insults, or other unprofessional conduct.
16 | 
17 | Project maintainers have the right and responsibility to remove, edit, or
18 | reject comments, commits, code, wiki edits, issues, and other contributions
19 | that are not aligned to this Code of Conduct. Project maintainers or
20 | contributors who do not follow the Code of Conduct may be removed from the
21 | project team.
22 | 
23 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
24 | reported by opening an issue or contacting the maintainer via email.
25 | 
26 | This Code of Conduct is adapted from the [Contributor Covenant][cc], [version
27 | 1.0.0][v1].
28 | 
29 | [cc]: http://contributor-covenant.org/
30 | [v1]: http://contributor-covenant.org/version/1/0/0/
31 | 


--------------------------------------------------------------------------------
/python/minimap2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import sys
 4 | import getopt
 5 | import mappy as mp
 6 | 
 7 | def main(argv):
 8 | 	opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:c")
 9 | 	if len(args) < 2:
10 | 		print("Usage: minimap2.py [options] <ref.fa>|<ref.mmi> <query.fq>")
11 | 		print("Options:")
12 | 		print("  -x STR      preset: sr, map-pb, map-ont, asm5, asm10 or splice")
13 | 		print("  -n INT      mininum number of minimizers")
14 | 		print("  -m INT      mininum chaining score")
15 | 		print("  -k INT      k-mer length")
16 | 		print("  -w INT      minimizer window length")
17 | 		print("  -r INT      band width")
18 | 		print("  -c          output the cs tag")
19 | 		sys.exit(1)
20 | 
21 | 	preset = min_cnt = min_sc = k = w = bw = None
22 | 	out_cs = False
23 | 	for opt, arg in opts:
24 | 		if opt == '-x': preset = arg
25 | 		elif opt == '-n': min_cnt = int(arg)
26 | 		elif opt == '-m': min_chain_score = int(arg)
27 | 		elif opt == '-r': bw = int(arg)
28 | 		elif opt == '-k': k = int(arg)
29 | 		elif opt == '-w': w = int(arg)
30 | 		elif opt == '-c': out_cs = True
31 | 
32 | 	a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw)
33 | 	if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0]))
34 | 	for name, seq, qual in mp.fastx_read(args[1]): # read one sequence
35 | 		for h in a.map(seq, cs=out_cs): # traverse hits
36 | 			print('{}\t{}\t{}'.format(name, len(seq), h))
37 | 
38 | if __name__ == "__main__":
39 | 	main(sys.argv)
40 | 


--------------------------------------------------------------------------------
/tex/bwa-s3.sam.eval:
--------------------------------------------------------------------------------
 1 | Q	60	18784147	3	0.000000160	18784147
 2 | Q	52	19002	1	0.000000213	18803149
 3 | Q	50	7152	2	0.000000319	18810301
 4 | Q	49	6797	1	0.000000372	18817098
 5 | Q	48	52188	2	0.000000477	18869286
 6 | Q	47	48775	3	0.000000634	18918061
 7 | Q	46	19447	2	0.000000739	18937508
 8 | Q	45	25983	3	0.000000896	18963491
 9 | Q	44	13455	1	0.000000949	18976946
10 | Q	43	14573	2	0.000001053	18991519
11 | Q	42	8697	4	0.000001263	19000216
12 | Q	41	8645	2	0.000001368	19008861
13 | Q	40	176603	75	0.000005264	19185464
14 | Q	38	2503	2	0.000005368	19187967
15 | Q	37	4117	3	0.000005523	19192084
16 | Q	36	2924	16	0.000006356	19195008
17 | Q	35	2323	8	0.000006772	19197331
18 | Q	34	2344	10	0.000007292	19199675
19 | Q	33	4279	6	0.000007603	19203954
20 | Q	32	2092	4	0.000007810	19206046
21 | Q	31	2625	11	0.000008382	19208671
22 | Q	30	2828	13	0.000009057	19211499
23 | Q	29	1581	1	0.000009108	19213080
24 | Q	28	1543	6	0.000009420	19214623
25 | Q	27	70916	223	0.000020948	19285539
26 | Q	26	1288	16	0.000021777	19286827
27 | Q	25	25551	122	0.000028065	19312378
28 | Q	24	14345	84	0.000032390	19326723
29 | Q	23	7308	87	0.000036878	19334031
30 | Q	22	8358	125	0.000043325	19342389
31 | Q	21	4836	71	0.000046983	19347225
32 | Q	20	5888	123	0.000053325	19353113
33 | Q	19	4656	83	0.000057600	19357769
34 | Q	18	3948	87	0.000062081	19361717
35 | Q	17	4418	114	0.000067954	19366135
36 | Q	16	4226	131	0.000074702	19370361
37 | Q	15	5760	164	0.000083144	19376121
38 | Q	14	4697	257	0.000096384	19380818
39 | Q	13	5246	313	0.000112503	19386064
40 | Q	12	4170	241	0.000124908	19390234
41 | Q	11	4095	304	0.000140557	19394329
42 | Q	10	3857	360	0.000159087	19398186
43 | Q	9	5300	438	0.000181617	19403486
44 | Q	8	4206	572	0.000211050	19407692
45 | Q	7	4676	787	0.000251541	19412368
46 | Q	6	3923	688	0.000286924	19416291
47 | Q	5	3294	708	0.000323333	19419585
48 | Q	4	2936	693	0.000358965	19422521
49 | Q	3	3928	816	0.000400897	19426449
50 | Q	2	2613	810	0.000442533	19429062
51 | Q	1	3515	1188	0.000503587	19432577
52 | Q	0	567423	376636	0.019321100	20000000
53 | 


--------------------------------------------------------------------------------
/bseq.h:
--------------------------------------------------------------------------------
 1 | #ifndef MM_BSEQ_H
 2 | #define MM_BSEQ_H
 3 | 
 4 | #include <stdint.h>
 5 | #include <string.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | struct mm_bseq_file_s;
12 | typedef struct mm_bseq_file_s mm_bseq_file_t;
13 | 
14 | typedef struct {
15 | 	int l_seq, rid;
16 | 	char *name, *seq, *qual, *comment;
17 | } mm_bseq1_t;
18 | 
19 | mm_bseq_file_t *mm_bseq_open(const char *fn);
20 | void mm_bseq_close(mm_bseq_file_t *fp);
21 | mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_);
22 | mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_);
23 | mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_);
24 | mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_);
25 | mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_);
26 | int mm_bseq_eof(mm_bseq_file_t *fp);
27 | 
28 | extern unsigned char seq_nt4_table[256];
29 | extern unsigned char seq_comp_table[256];
30 | 
31 | static inline int mm_qname_len(const char *s)
32 | {
33 | 	int l;
34 | 	l = strlen(s);
35 | 	return l >= 3 && s[l-1] >= '0' && s[l-1] <= '9' && s[l-2] == '/'? l - 2 : l;
36 | }
37 | 
38 | static inline int mm_qname_same(const char *s1, const char *s2)
39 | {
40 | 	int l1, l2;
41 | 	l1 = mm_qname_len(s1);
42 | 	l2 = mm_qname_len(s2);
43 | 	return (l1 == l2 && strncmp(s1, s2, l1) == 0);
44 | }
45 | 
46 | static inline void mm_revcomp_bseq(mm_bseq1_t *s)
47 | {
48 | 	int i, t, l = s->l_seq;
49 | 	for (i = 0; i < l>>1; ++i) {
50 | 		t = s->seq[l - i - 1];
51 | 		s->seq[l - i - 1] = seq_comp_table[(uint8_t)s->seq[i]];
52 | 		s->seq[i] = seq_comp_table[t];
53 | 	}
54 | 	if (l&1) s->seq[l>>1] = seq_comp_table[(uint8_t)s->seq[l>>1]];
55 | 	if (s->qual)
56 | 		for (i = 0; i < l>>1; ++i)
57 | 			t = s->qual[l - i - 1], s->qual[l - i - 1] = s->qual[i], s->qual[i] = t;
58 | }
59 | 
60 | #ifdef __cplusplus
61 | }
62 | #endif
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/esterr.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <assert.h>
 5 | #include "mmpriv.h"
 6 | 
 7 | static inline int32_t get_for_qpos(int32_t qlen, const mm128_t *a)
 8 | {
 9 | 	int32_t x = (int32_t)a->y;
10 | 	int32_t q_span = a->y>>32 & 0xff;
11 | 	if (a->x>>63)
12 | 		x = qlen - 1 - (x + 1 - q_span); // revert the position to the forward strand of query
13 | 	return x;
14 | }
15 | 
16 | static int get_mini_idx(int qlen, const mm128_t *a, int32_t n, const uint64_t *mini_pos)
17 | {
18 | 	int32_t x, L = 0, R = n - 1;
19 | 	x = get_for_qpos(qlen, a);
20 | 	while (L <= R) { // binary search
21 | 		int32_t m = ((uint64_t)L + R) >> 1;
22 | 		int32_t y = (int32_t)mini_pos[m];
23 | 		if (y < x) L = m + 1;
24 | 		else if (y > x) R = m - 1;
25 | 		else return m;
26 | 	}
27 | 	return -1;
28 | }
29 | 
30 | void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos)
31 | {
32 | 	int i;
33 | 	uint64_t sum_k = 0;
34 | 	float avg_k;
35 | 
36 | 	if (n == 0) return;
37 | 	for (i = 0; i < n; ++i)
38 | 		sum_k += mini_pos[i] >> 32 & 0xff;
39 | 	avg_k = (float)sum_k / n;
40 | 
41 | 	for (i = 0; i < n_regs; ++i) {
42 | 		mm_reg1_t *r = &regs[i];
43 | 		int32_t st, en, j, k, n_match, n_tot, l_ref;
44 | 		r->div = -1.0f;
45 | 		if (r->cnt == 0) continue;
46 | 		st = en = get_mini_idx(qlen, r->rev? &a[r->as + r->cnt - 1] : &a[r->as], n, mini_pos);
47 | 		if (st < 0) {
48 | 			if (mm_verbose >= 2)
49 | 				fprintf(stderr, "[WARNING] logic inconsistency in mm_est_err(). Please contact the developer.\n");
50 | 			continue;
51 | 		}
52 | 		l_ref = mi->seq[r->rid].len;
53 | 		for (k = 1, j = st + 1, n_match = 1; j < n && k < r->cnt; ++j) {
54 | 			int32_t x;
55 | 			x = get_for_qpos(qlen, r->rev? &a[r->as + r->cnt - 1 - k] : &a[r->as + k]);
56 | 			if (x == (int32_t)mini_pos[j])
57 | 				++k, en = j, ++n_match;
58 | 		}
59 | 		n_tot = en - st + 1;
60 | 		if (r->qs > avg_k && r->rs > avg_k) ++n_tot;
61 | 		if (qlen - r->qs > avg_k && l_ref - r->re > avg_k) ++n_tot;
62 | 		r->div = n_match >= n_tot? 0.0f : (float)(1.0 - pow((double)n_match / n_tot, 1.0 / avg_k));
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | try:
 2 | 	from setuptools import setup, Extension
 3 | 	from setuptools.command.build_ext import build_ext
 4 | except ImportError:
 5 | 	from distutils.core import setup
 6 | 	from distutils.extension import Extension
 7 | 	from distutils.command.build_ext import build_ext
 8 | 
 9 | import sys, platform, subprocess
10 | 
11 | 
12 | def readme():
13 | 	with open('python/README.rst') as f:
14 | 		return f.read()
15 | 
16 | 
17 | class LibMM2Build(build_ext):
18 | 	# Uses Makefile to build library, avoids duplicating logic
19 | 	# determining which objects to compile but does require
20 | 	# end users to have Make (since precompiled wheels are not
21 | 	# distributed on PyPI).
22 | 	def run(self):
23 | 		def compile_libminimap2(*args, **kwargs):
24 | 			cmd = ['make', 'libminimap2.a'] + list(args)
25 | 			subprocess.check_call(cmd)
26 | 		options = []
27 | 		if platform.machine() in ["aarch64", "arm64"]:
28 | 			options = ["arm_neon=1", "aarch64=1"]
29 | 		self.execute(
30 | 			compile_libminimap2, options,
31 | 			'Compiling libminimap2 using Makefile')
32 | 		build_ext.run(self)
33 | 
34 | 
35 | setup(
36 | 	name = 'mappy',
37 | 	version = '2.24',
38 | 	url = 'https://github.com/lh3/minimap2',
39 | 	description = 'Minimap2 python binding',
40 | 	long_description = readme(),
41 | 	author = 'Heng Li',
42 | 	author_email = 'lh3@me.com',
43 | 	license = 'MIT',
44 | 	keywords = 'sequence-alignment',
45 | 	scripts = ['python/minimap2.py'],
46 | 	cmdclass = {'build_ext': LibMM2Build},
47 | 	ext_modules = [
48 | 		Extension(
49 | 			'mappy',
50 | 			sources = ['python/mappy.pyx'],
51 | 			depends = ['python/cmappy.h', 'python/cmappy.pxd'],
52 | 			include_dirs = ['.'],
53 | 			extra_objects = ['libminimap2.a'],
54 | 			libraries = ['z', 'm', 'pthread'])],
55 | 	classifiers = [
56 | 		'Development Status :: 5 - Production/Stable',
57 | 		'License :: OSI Approved :: MIT License',
58 | 		'Operating System :: POSIX',
59 | 		'Programming Language :: C',
60 | 		'Programming Language :: Cython',
61 | 		'Programming Language :: Python :: 2.7',
62 | 		'Programming Language :: Python :: 3',
63 | 		'Intended Audience :: Science/Research',
64 | 		'Topic :: Scientific/Engineering :: Bio-Informatics'],
65 | 	setup_requires=["cython"])
66 | 


--------------------------------------------------------------------------------
/FAQ.md:
--------------------------------------------------------------------------------
 1 | #### 1. Alignment different with option `-a` or `-c`?
 2 | 
 3 | Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping
 4 | locations without detailed base alignment. In particular, the start and end
 5 | positions of the alignment are impricise. With one of those options, minimap2
 6 | will perform base alignment, which is generally more accurate but is much
 7 | slower.
 8 | 
 9 | #### 2. How to map Illumina short reads to noisy long reads?
10 | 
11 | No good solutions. The better approach is to assemble short reads into contigs
12 | and then map noisy reads to contigs.
13 | 
14 | #### 3. The output SAM doesn't have a header.
15 | 
16 | By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map
17 | all reads against each reference batch. Given a reference longer than 4Gb,
18 | minimap2 is unable to see all the sequences and thus can't produce a correct
19 | SAM header. In this case, minimap2 doesn't output any SAM header. There are two
20 | solutions to this issue. First, you may increase option `-I` to, for example,
21 | `-I8g` to index more reference bases in a batch. This is preferred if your
22 | machine has enough memory. Second, if your machines doesn't have enough memory
23 | to hold the reference index, you can use the `--split-prefix` option in a
24 | command line like:
25 | ```sh
26 | minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq
27 | ```
28 | This second approach uses less memory, but it is slower and requires temporary
29 | disk space.
30 | 
31 | #### 4. The output SAM is malformatted.
32 | 
33 | This typically happens when you use nohup to wrap a minimap2 command line.
34 | Nohup is discouraged as it breaks piping. If you have to use nohup, please
35 | specify an output file with option `-o`.
36 | 
37 | #### 5. How to output one alignment per read?
38 | 
39 | You can use `--secondary=no` to suppress secondary alignments (aka multiple
40 | mappings), but you can't suppress supplementary alignment (aka split or
41 | chimeric alignment) this way. You can use samtools to filter out these
42 | alignments:
43 | ```sh
44 | minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900
45 | ```
46 | However, this is discouraged as supplementary alignment is informative.
47 | 


--------------------------------------------------------------------------------
/tex/roc.gp:
--------------------------------------------------------------------------------
 1 | set t po eps enh co so "Helvetica,26"
 2 | 
 3 | set style line 1 lt 1 pt 1 lc rgb "#e41a1c" lw 2;
 4 | set style line 2 lt 1 pt 2 lc rgb "#377eb8" lw 2;
 5 | set style line 3 lt 1 pt 3 lc rgb "#4daf4a" lw 2;
 6 | set style line 4 lt 1 pt 4 lc rgb "#984ea3" lw 2;
 7 | set style line 5 lt 1 pt 6 lc rgb "#ff7f00" lw 2;
 8 | set style line 6 lt 1 pt 8 lc rgb "#f781bf" lw 2;
 9 | 
10 | set out "roc-color.eps"
11 | 
12 | set pointsize 2.0
13 | set size 1.59,1.04
14 | set multiplot layout 1,2
15 | 
16 | set label "(a)" at graph -0.245,1.06 font "Helvetica-bold,40"
17 | set xlab "Error rate of mapped PacBio reads"
18 | set ylab "Fraction of mapped reads" off +1.8
19 | set ytics 0.02
20 | set yran [0.9:1]
21 | 
22 | set size 0.8,1
23 | set log x
24 | set format x "10^{%L}"
25 | set key bot right
26 | plot "<./eval2roc.pl blasr-mc.eval" u 2:3 t "blasr-mc" w lp ls 4, \
27 |      "<./eval2roc.pl bwa.eval" u 2:3 t "bwa-mem" w lp ls 2, \
28 |      "<./eval2roc.pl graphmap.eval" u 2:3 t "graphmap" w lp ls 3, \
29 |      "<./eval2roc.pl minialign.eval" u 2:3 t "minialign" w lp ls 1, \
30 |      "<./eval2roc.pl mm2.eval" u 2:3 t "minimap2" w lp ls 6, \
31 |      "<./eval2roc.pl ngmlr.eval" u 2:3 t "ngm-lr" w lp ls 5
32 | unset label
33 | 
34 | set origin 0.8,0
35 | set size 0.79,1
36 | set label "(b)" at graph -0.245,1.06 font "Helvetica-bold,40"
37 | set xlab "Error rate of mapped short reads"
38 | 
39 | set key top left
40 | plot "<./eval2roc.pl -n2e7 bowtie2-s3.sam.eval" u 2:3 t "bowtie2" w lp ls 5, \
41 | 	 "<./eval2roc.pl -n2e7 bwa-s3.sam.eval" u 2:3 t "bwa-mem" w lp ls 2, \
42 | 	 "<./eval2roc.pl -n2e7 mm2-s3.sam.eval" u 2:3 t "minimap2" w lp ls 6, \
43 | 	 "<./eval2roc.pl -n2e7 snap-s3.sam.eval" u 2:3 t "snap" w lp ls 3
44 | 
45 | #unset log
46 | #unset format
47 | #unset key
48 | #set log y
49 | #set ylab "Accumulative mapping error rate" off +0
50 | #set xlab "Mapping quality"
51 | #set yran [1e-5:0.1]
52 | #set ytics 1e-5,0.1
53 | #set format y "10^{%L}"
54 | #set xran [60:0] reverse
55 | #plot "<./eval2roc.pl blasr-mc.eval" u 1:2 w lp ls 4, \
56 | #     "<./eval2roc.pl bwa.eval" u 1:2 t "bwa-mem" w lp ls 2, \
57 | #     "<./eval2roc.pl graphmap.eval" u 1:2 t "graphmap" w lp ls 3, \
58 | #     "<./eval2roc.pl minialign.eval" u 1:2 t "minialign" w lp ls 1, \
59 | #     "<./eval2roc.pl mm2.eval" u 1:2 t "minimap2" w lp ls 6, \
60 | #     "<./eval2roc.pl ngmlr.eval" u 1:2 t "ngm-lr" w lp ls 5
61 | 


--------------------------------------------------------------------------------
/tex/mm2-s3.sam.eval:
--------------------------------------------------------------------------------
 1 | Q	60	18579866	27	0.000001453	18579866
 2 | Q	59	27087	4	0.000001666	18606953
 3 | Q	58	21435	1	0.000001718	18628388
 4 | Q	57	45663	3	0.000001874	18674051
 5 | Q	56	36031	2	0.000001978	18710082
 6 | Q	55	18499	2	0.000002082	18728581
 7 | Q	54	14754	2	0.000002187	18743335
 8 | Q	53	25541	2	0.000002291	18768876
 9 | Q	52	26397	5	0.000002554	18795273
10 | Q	51	15090	3	0.000002711	18810363
11 | Q	50	13425	11	0.000003294	18823788
12 | Q	49	15175	2	0.000003397	18838963
13 | Q	48	19407	4	0.000003606	18858370
14 | Q	47	11538	16	0.000004452	18869908
15 | Q	46	12558	17	0.000005349	18882466
16 | Q	45	40362	28	0.000006817	18922828
17 | Q	44	10465	13	0.000007500	18933293
18 | Q	43	10098	20	0.000008552	18943391
19 | Q	42	10682	19	0.000009549	18954073
20 | Q	41	9823	11	0.000010125	18963896
21 | Q	40	9685	16	0.000010963	18973581
22 | Q	39	10273	18	0.000011905	18983854
23 | Q	38	9515	18	0.000012847	18993369
24 | Q	37	9474	27	0.000014261	19002843
25 | Q	36	10430	25	0.000015568	19013273
26 | Q	35	9241	34	0.000017348	19022514
27 | Q	34	9162	31	0.000018968	19031676
28 | Q	33	10164	49	0.000021532	19041840
29 | Q	32	9152	55	0.000024408	19050992
30 | Q	31	9252	35	0.000026233	19060244
31 | Q	30	9872	55	0.000029103	19070116
32 | Q	29	8938	65	0.000032496	19079054
33 | Q	28	8951	73	0.000036306	19088005
34 | Q	27	9949	95	0.000041261	19097954
35 | Q	26	9784	97	0.000046316	19107738
36 | Q	25	10126	97	0.000051366	19117864
37 | Q	24	11260	123	0.000057765	19129124
38 | Q	23	10047	114	0.000063691	19139171
39 | Q	22	9661	123	0.000070083	19148832
40 | Q	21	10339	168	0.000078813	19159171
41 | Q	20	17928	193	0.000088804	19177099
42 | Q	19	9842	193	0.000098817	19186941
43 | Q	18	14737	247	0.000111605	19201678
44 | Q	17	10218	238	0.000123934	19211896
45 | Q	16	10271	242	0.000136457	19222167
46 | Q	15	12241	333	0.000153683	19234408
47 | Q	14	9189	336	0.000171070	19243597
48 | Q	13	9493	515	0.000197734	19253090
49 | Q	12	11502	743	0.000236185	19264592
50 | Q	11	8211	507	0.000262390	19272803
51 | Q	10	9133	606	0.000293695	19281936
52 | Q	9	10014	931	0.000341801	19291950
53 | Q	8	8436	698	0.000377816	19300386
54 | Q	7	8443	705	0.000414163	19308829
55 | Q	6	10203	944	0.000462808	19319032
56 | Q	5	6936	756	0.000501760	19325968
57 | Q	4	6732	843	0.000545190	19332700
58 | Q	3	8215	1104	0.000602040	19340915
59 | Q	2	21201	5440	0.000882342	19362116
60 | Q	1	82328	22186	0.002019600	19444444
61 | Q	0	553853	371953	0.020562901	19998297
62 | U	1703
63 | 


--------------------------------------------------------------------------------
/tex/snap-s3.sam.eval:
--------------------------------------------------------------------------------
 1 | Q	60	18993268	10320	0.000543350	18993268
 2 | Q	59	33156	216	0.000553756	19026424
 3 | Q	58	29982	295	0.000568365	19056406
 4 | Q	57	9412	278	0.000582666	19065818
 5 | Q	56	11012	228	0.000594281	19076830
 6 | Q	55	9968	235	0.000606283	19086798
 7 | Q	54	8602	292	0.000621301	19095400
 8 | Q	53	6094	259	0.000634662	19101494
 9 | Q	52	5026	257	0.000647946	19106520
10 | Q	51	4278	224	0.000659522	19110798
11 | Q	50	3682	178	0.000668708	19114480
12 | Q	49	2750	156	0.000676772	19117230
13 | Q	48	2314	112	0.000682548	19119544
14 | Q	47	2056	96	0.000687495	19121600
15 | Q	46	1658	62	0.000690677	19123258
16 | Q	45	1492	74	0.000694493	19124750
17 | Q	44	1150	56	0.000697379	19125900
18 | Q	43	1062	48	0.000699850	19126962
19 | Q	42	976	60	0.000702951	19127938
20 | Q	41	884	36	0.000704800	19128822
21 | Q	40	708	52	0.000707493	19129530
22 | Q	39	870	26	0.000708819	19130400
23 | Q	38	598	26	0.000710156	19130998
24 | Q	37	542	34	0.000711913	19131540
25 | Q	36	846	50	0.000714495	19132386
26 | Q	35	590	50	0.000717087	19132976
27 | Q	34	550	42	0.000719261	19133526
28 | Q	33	2174	66	0.000722628	19135700
29 | Q	32	876	86	0.000727089	19136576
30 | Q	31	638	104	0.000732500	19137214
31 | Q	30	1718	196	0.000742675	19138932
32 | Q	29	91022	968	0.000789497	19229954
33 | Q	28	12864	781	0.000829556	19242818
34 | Q	27	5806	427	0.000851489	19248624
35 | Q	26	25274	728	0.000888144	19273898
36 | Q	25	7418	680	0.000923070	19281316
37 | Q	24	11800	701	0.000958839	19293116
38 | Q	23	57328	3933	0.001159250	19350444
39 | Q	22	7662	846	0.001202494	19358106
40 | Q	21	5924	617	0.001233989	19364030
41 | Q	20	4623	574	0.001263330	19368653
42 | Q	19	4988	942	0.001311627	19373641
43 | Q	18	3968	793	0.001352282	19377609
44 | Q	17	3630	681	0.001387166	19381239
45 | Q	16	2921	513	0.001413422	19384160
46 | Q	15	2716	424	0.001435095	19386876
47 | Q	14	2366	365	0.001453744	19389242
48 | Q	13	2169	412	0.001474828	19391411
49 | Q	12	2077	360	0.001493233	19393488
50 | Q	11	2016	441	0.001515815	19395504
51 | Q	10	2292	738	0.001553682	19397796
52 | Q	9	4165	1832	0.001647772	19401961
53 | Q	8	3963	1862	0.001743385	19405924
54 | Q	7	3927	1793	0.001835408	19409851
55 | Q	6	3572	1639	0.001919497	19413423
56 | Q	5	3270	1533	0.001998126	19416693
57 | Q	4	3046	1610	0.002080718	19419739
58 | Q	3	251447	125550	0.008436553	19671186
59 | Q	2	24390	13537	0.009113417	19695576
60 | Q	1	124406	86780	0.013434624	19819982
61 | Q	0	171254	153874	0.021016609	19991236
62 | U	8764
63 | 


--------------------------------------------------------------------------------
/example.c:
--------------------------------------------------------------------------------
 1 | // To compile:
 2 | //   gcc -g -O2 example.c libminimap2.a -lz
 3 | 
 4 | #include <stdlib.h>
 5 | #include <assert.h>
 6 | #include <stdio.h>
 7 | #include <zlib.h>
 8 | #include "minimap.h"
 9 | #include "kseq.h"
10 | KSEQ_INIT(gzFile, gzread)
11 | 
12 | int main(int argc, char *argv[])
13 | {
14 | 	mm_idxopt_t iopt;
15 | 	mm_mapopt_t mopt;
16 | 	int n_threads = 3;
17 | 
18 | 	mm_verbose = 2; // disable message output to stderr
19 | 	mm_set_opt(0, &iopt, &mopt);
20 | 	mopt.flag |= MM_F_CIGAR; // perform alignment
21 | 
22 | 	if (argc < 3) {
23 | 		fprintf(stderr, "Usage: minimap2-lite <target.fa> <query.fa>\n");
24 | 		return 1;
25 | 	}
26 | 
27 | 	// open query file for reading; you may use your favorite FASTA/Q parser
28 | 	gzFile f = gzopen(argv[2], "r");
29 | 	assert(f);
30 | 	kseq_t *ks = kseq_init(f);
31 | 
32 | 	// open index reader
33 | 	mm_idx_reader_t *r = mm_idx_reader_open(argv[1], &iopt, 0);
34 | 	mm_idx_t *mi;
35 | 	while ((mi = mm_idx_reader_read(r, n_threads)) != 0) { // traverse each part of the index
36 | 		mm_mapopt_update(&mopt, mi); // this sets the maximum minimizer occurrence; TODO: set a better default in mm_mapopt_init()!
37 | 		mm_tbuf_t *tbuf = mm_tbuf_init(); // thread buffer; for multi-threading, allocate one tbuf for each thread
38 | 		gzrewind(f);
39 | 		kseq_rewind(ks);
40 | 		while (kseq_read(ks) >= 0) { // each kseq_read() call reads one query sequence
41 | 			mm_reg1_t *reg;
42 | 			int j, i, n_reg;
43 | 			reg = mm_map(mi, ks->seq.l, ks->seq.s, &n_reg, tbuf, &mopt, 0); // get all hits for the query
44 | 			for (j = 0; j < n_reg; ++j) { // traverse hits and print them out
45 | 				mm_reg1_t *r = &reg[j];
46 | 				assert(r->p); // with MM_F_CIGAR, this should not be NULL
47 | 				printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
48 | 				printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq);
49 | 				for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings!
50 | 					printf("%d%c", r->p->cigar[i]>>4, MM_CIGAR_STR[r->p->cigar[i]&0xf]);
51 | 				putchar('\n');
52 | 				free(r->p);
53 | 			}
54 | 			free(reg);
55 | 		}
56 | 		mm_tbuf_destroy(tbuf);
57 | 		mm_idx_destroy(mi);
58 | 	}
59 | 	mm_idx_reader_close(r); // close the index reader
60 | 	kseq_destroy(ks); // close the query file
61 | 	gzclose(f);
62 | 	return 0;
63 | }
64 | 


--------------------------------------------------------------------------------
/kalloc.h:
--------------------------------------------------------------------------------
 1 | #ifndef _KALLOC_H_
 2 | #define _KALLOC_H_
 3 | 
 4 | #include <stddef.h> /* for size_t */
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | typedef struct {
11 | 	size_t capacity, available, n_blocks, n_cores, largest;
12 |     size_t meta_size;
13 | } km_stat_t;
14 | 
15 | void *kmalloc(void *km, size_t size);
16 | void *krealloc(void *km, void *ptr, size_t size);
17 | void *kcalloc(void *km, size_t count, size_t size);
18 | void kfree(void *km, void *ptr);
19 | 
20 | void *km_init(void);
21 | void *km_init2(void *km_par, size_t min_core_size);
22 | void km_destroy(void *km);
23 | void km_stat(const void *_km, km_stat_t *s);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr))))
30 | #define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr))))
31 | #define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr))))
32 | 
33 | #define KEXPAND(km, a, m) do { \
34 | 		(m) = (m) >= 4? (m) + ((m)>>1) : 16; \
35 | 		KREALLOC((km), (a), (m)); \
36 | 	} while (0)
37 | 
38 | #ifndef klib_unused
39 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
40 | #define klib_unused __attribute__ ((__unused__))
41 | #else
42 | #define klib_unused
43 | #endif
44 | #endif /* klib_unused */
45 | 
46 | #define KALLOC_POOL_INIT2(SCOPE, name, kmptype_t) \
47 | 	typedef struct { \
48 | 		size_t cnt, n, max; \
49 | 		kmptype_t **buf; \
50 | 		void *km; \
51 | 	} kmp_##name##_t; \
52 | 	SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
53 | 		kmp_##name##_t *mp; \
54 | 		KCALLOC(km, mp, 1); \
55 | 		mp->km = km; \
56 | 		return mp; \
57 | 	} \
58 | 	SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \
59 | 		size_t k; \
60 | 		for (k = 0; k < mp->n; ++k) kfree(mp->km, mp->buf[k]); \
61 | 		kfree(mp->km, mp->buf); kfree(mp->km, mp); \
62 | 	} \
63 | 	SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \
64 | 		++mp->cnt; \
65 | 		if (mp->n == 0) return (kmptype_t*)kcalloc(mp->km, 1, sizeof(kmptype_t)); \
66 | 		return mp->buf[--mp->n]; \
67 | 	} \
68 | 	SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
69 | 		--mp->cnt; \
70 | 		if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \
71 | 		mp->buf[mp->n++] = p; \
72 | 	}
73 | 
74 | #define KALLOC_POOL_INIT(name, kmptype_t) \
75 | 	KALLOC_POOL_INIT2(static inline klib_unused, name, kmptype_t)
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/splitidx.c:
--------------------------------------------------------------------------------
 1 | #include <string.h>
 2 | #include <assert.h>
 3 | #include <stdlib.h>
 4 | #include <stdio.h>
 5 | #include <errno.h>
 6 | #include "mmpriv.h"
 7 | 
 8 | FILE *mm_split_init(const char *prefix, const mm_idx_t *mi)
 9 | {
10 | 	char *fn;
11 | 	FILE *fp;
12 | 	uint32_t i, k = mi->k;
13 | 	fn = (char*)calloc(strlen(prefix) + 10, 1);
14 | 	sprintf(fn, "%s.%.4d.tmp", prefix, mi->index);
15 | 	if ((fp = fopen(fn, "wb")) == NULL) {
16 | 		if (mm_verbose >= 1)
17 | 			fprintf(stderr, "[ERROR]\033[1;31m failed to write to temporary file '%s'\033[0m: %s\n", fn, strerror(errno));
18 | 		exit(1);
19 | 	}
20 | 	mm_err_fwrite(&k, 4, 1, fp);
21 | 	mm_err_fwrite(&mi->n_seq, 4, 1, fp);
22 | 	for (i = 0; i < mi->n_seq; ++i) {
23 | 		uint32_t l;
24 | 		l = strlen(mi->seq[i].name);
25 | 		mm_err_fwrite(&l, 1, 4, fp);
26 | 		mm_err_fwrite(mi->seq[i].name, 1, l, fp);
27 | 		mm_err_fwrite(&mi->seq[i].len, 4, 1, fp);
28 | 	}
29 | 	free(fn);
30 | 	return fp;
31 | }
32 | 
33 | mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part)
34 | {
35 | 	mm_idx_t *mi = 0;
36 | 	char *fn;
37 | 	int i, j;
38 | 
39 | 	if (n_splits < 1) return 0;
40 | 	fn = CALLOC(char, strlen(prefix) + 10);
41 | 	for (i = 0; i < n_splits; ++i) {
42 | 		sprintf(fn, "%s.%.4d.tmp", prefix, i);
43 | 		if ((fp[i] = fopen(fn, "rb")) == 0) {
44 | 			if (mm_verbose >= 1)
45 | 				fprintf(stderr, "ERROR: failed to open temporary file '%s': %s\n", fn, strerror(errno));
46 | 			for (j = 0; j < i; ++j)
47 | 				fclose(fp[j]);
48 | 			free(fn);
49 | 			return 0;
50 | 		}
51 | 	}
52 | 	free(fn);
53 | 
54 | 	mi = CALLOC(mm_idx_t, 1);
55 | 	for (i = 0; i < n_splits; ++i) {
56 | 		mm_err_fread(&mi->k, 4, 1, fp[i]); // TODO: check if k is all the same
57 | 		mm_err_fread(&n_seq_part[i], 4, 1, fp[i]);
58 | 		mi->n_seq += n_seq_part[i];
59 | 	}
60 | 	mi->seq = CALLOC(mm_idx_seq_t, mi->n_seq);
61 | 	for (i = j = 0; i < n_splits; ++i) {
62 | 		uint32_t k;
63 | 		for (k = 0; k < n_seq_part[i]; ++k, ++j) {
64 | 			uint32_t l;
65 | 			mm_err_fread(&l, 1, 4, fp[i]);
66 | 			mi->seq[j].name = (char*)calloc(l + 1, 1);
67 | 			mm_err_fread(mi->seq[j].name, 1, l, fp[i]);
68 | 			mm_err_fread(&mi->seq[j].len, 4, 1, fp[i]);
69 | 		}
70 | 	}
71 | 	return mi;
72 | }
73 | 
74 | void mm_split_rm_tmp(const char *prefix, int n_splits)
75 | {
76 | 	int i;
77 | 	char *fn;
78 | 	fn = CALLOC(char, strlen(prefix) + 10);
79 | 	for (i = 0; i < n_splits; ++i) {
80 | 		sprintf(fn, "%s.%.4d.tmp", prefix, i);
81 | 		remove(fn);
82 | 	}
83 | 	free(fn);
84 | }
85 | 


--------------------------------------------------------------------------------
/gpu/gpu.mk:
--------------------------------------------------------------------------------
 1 | GPU				?= 		AMD
 2 | CONFIG			+= $(if $(MAX_MICRO_BATCH),-DMICRO_BATCH=\($(MAX_MICRO_BATCH)\))
 3 | 
 4 | ifeq ($(GPU), AMD)
 5 |     GPUARCH    ?= $(strip $(shell rocminfo |grep -m 1 -E gfx[^0]{1} | sed -e 's/ *Name: *//'))
 6 | else
 7 |     GPUARCH    ?= sm_86
 8 | endif
 9 | 
10 | ###################################################
11 | ############  	CPU Compile 	###################
12 | ###################################################
13 | CU_SRC			= $(wildcard gpu/*.cu)
14 | CU_OBJS			= $(CU_SRC:%.cu=%.o)
15 | CU_PTX			= $(CU_SRC:%.cu=%.ptx)
16 | C_SRC			= $(wildcard gpu/*.c)
17 | OBJS			+= $(C_SRC:%.c=%.o)
18 | INCLUDES		+= -I gpu
19 | 
20 | ###################################################
21 | ############  	CUDA Compile 	###################
22 | ###################################################
23 | COMPUTE_ARCH    = $(GPUARCH:sm_%=compute_%)
24 | NVCC 			= nvcc
25 | CUDAFLAGS		= -rdc=true -gencode arch=$(COMPUTE_ARCH),code=$(GPUARCH) -diag-suppress=177 -diag-suppress=1650 # supress unused variable / func warning
26 | CUDANALYZEFLAG	= -Xptxas -v 
27 | CUDATESTFLAG	= -G 
28 | 
29 | ###################################################
30 | ############	HIP Compile		###################
31 | ###################################################
32 | HIPCC			= hipcc
33 | HIPFLAGS		= -DUSEHIP --offload-arch=$(GPUARCH)
34 | HIPANALYZEFLAG  = -Rpass-analysis=kernel-resource-usage
35 | HIPTESTFLAGS	= -G -ggdb
36 | HIPLIBS			= -L${ROCM_PATH}/lib -lroctx64 -lroctracer64
37 | 
38 | ###################################################
39 | ############	DEBUG Options	###################
40 | ###################################################
41 | ifeq ($(GPU), AMD)
42 | 	GPU_CC 		= $(HIPCC)
43 | 	GPU_FLAGS	= $(HIPFLAGS)
44 | 	GPU_TESTFL	= $(HIPTESTFLAGS)
45 | 	GPU_ANALYZE	= $(HIPANALYZEFLAG)
46 | 	LIBS		+= $(HIPLIBS)
47 | else
48 | 	GPU_CC 		= $(NVCC)
49 | 	GPU_FLAGS	= $(CUDAFLAGS)
50 | 	GPU_ANALYZE = $(CUDANALYZEFLAG)
51 | 	GPU_TESTFL	= $(CUDATESTFLAG)
52 | endif
53 | 
54 | ifeq ($(DEBUG),analyze)
55 | 	GPU_FLAGS	+= $(GPU_ANALYZE)
56 | endif
57 | ifeq ($(DEBUG),verbose)
58 | 	GPU_FLAGS	+= $(GPU_ANALYZE)
59 | 	GPU_FLAGS	+= $(GPU_TESTFL)
60 | endif
61 | 
62 | 
63 | %.o: %.cu
64 | 	$(GPU_CC) -c $(GPU_FLAGS) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $(CONFIG) $< -o $@
65 | 
66 | %.ptx: %.cu
67 | 	$(GPU_CC) -ptx -src-in-ptx $(GPU_FLAGS) $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $(CONFIG) $< -o $@
68 | 
69 | %.as: %.o
70 | 	cuobjdump -all $< > $@
71 | 
72 | cleangpu: 
73 | 	rm -f $(CU_OBJS) $(CU_PTX)
74 | 
75 | # profile:CFLAGS += -pg -g3
76 | # profile:all
77 | # 	perf record --call-graph=dwarf -e cycles:u time ./minimap2 -a test/MT-human.fa test/MT-orang.fa > test.sam
78 | 
79 | cudep: gpu/.depend
80 | 
81 | gpu/.depend: $(CU_SRC)
82 | 	rm -f gpu/.depend
83 | 	$(GPU_CC) -c $(GPU_FLAGS) $(CFLAGS)  $(CPPFLAGS) $(INCLUDES) -MM $^ > $@
84 | 
85 | include gpu/.depend
86 | 


--------------------------------------------------------------------------------
/gpu/hipify.cuh:
--------------------------------------------------------------------------------
 1 | #ifndef __HIPIFY_CUH__
 2 | #define __HIPIFY_CUH__
 3 | 
 4 | #ifdef USEHIP
 5 | #include "hip/hip_runtime.h"
 6 | #include "roctracer/roctx.h"
 7 | #define     cudaDeviceProp                  hipDeviceProp_t
 8 | #define     cudaGetDeviceProperties         hipGetDeviceProperties
 9 | #define     cudaMalloc                      hipMalloc
10 | #define     cudaMallocAsync                 hipMallocAsync
11 | #define     cudaMemcpy                      hipMemcpy
12 | #define     cudaMemcpyAsync                 hipMemcpyAsync
13 | #define     cudaMemcpyToSymbolAsync         hipMemcpyToSymbolAsync
14 | #define     cudaMemcpyHostToDevice          hipMemcpyHostToDevice
15 | #define     cudaMemcpyDeviceToHost          hipMemcpyDeviceToHost
16 | #define     cudaDeviceSynchronize           hipDeviceSynchronize
17 | #define     cudaFree                        hipFree
18 | #define     cudaFreeAsync                   hipFreeAsync
19 | #define     cudaMemcpyToSymbol              hipMemcpyToSymbol
20 | #define     cudaMemset                      hipMemset
21 | #define     cudaMemsetAsync                 hipMemsetAsync
22 | #define     cudaStream_t                    hipStream_t
23 | #define     cudaStreamCreate                hipStreamCreate
24 | #define     cudaStreamSynchronize           hipStreamSynchronize
25 | #define     cudaStreamDestroy               hipStreamDestroy
26 | #define     cudaMallocHost                  hipHostMalloc
27 | #define     cudaFreeHost                    hipHostFree
28 | #define     cudaEvent_t                     hipEvent_t
29 | #define     cudaEventCreate                 hipEventCreate
30 | #define     cudaEventRecord                 hipEventRecord
31 | #define     cudaEventQuery                  hipEventQuery
32 | #define     cudaEventDestroy                hipEventDestroy
33 | #define     cudaEventElapsedTime            hipEventElapsedTime
34 | #define     cudaStreamWaitEvent             hipStreamWaitEvent
35 | #define     cudaMemGetInfo                  hipMemGetInfo
36 | #define cudaCheck() {                                                       \
37 |     hipError_t err = hipGetLastError();                                     \
38 |     if (hipSuccess != err) {                                                \
39 |         fprintf(stderr, "Error in %s:%i %s(): %s.\n", __FILE__, __LINE__,   \
40 |                 __func__, hipGetErrorString(err));                          \
41 |         fflush(stderr);                                                     \
42 |         exit(EXIT_FAILURE);                                                 \
43 |     }                                                                       \
44 | }
45 | #define     cudaWarpSize                    64
46 | #else
47 | #define cudaCheck() {                                                   \
48 |     cudaError_t err = cudaGetLastError();                               \
49 |     if (cudaSuccess != err) {                                           \
50 |         fprintf(stderr, "Error in %s:%i %s(): %s.\n", __FILE__, __LINE__,\
51 |                 __func__, cudaGetErrorString(err));                     \
52 |         fflush(stderr);                                                 \
53 |         exit(EXIT_FAILURE);                                             \
54 |     }                                                                   \
55 | }
56 | #include <cuda.h>
57 | 
58 | #endif
59 | 
60 | 
61 | #endif // __HIPIFY_CUH__
62 | 


--------------------------------------------------------------------------------
/Makefile.simde:
--------------------------------------------------------------------------------
 1 | CFLAGS=		-g -Wall -O2 -Wc++-compat #-Wextra
 2 | CPPFLAGS=	-DHAVE_KALLOC -DUSE_SIMDE -DSIMDE_ENABLE_NATIVE_ALIASES
 3 | INCLUDES=	-Ilib/simde
 4 | OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o lchain.o align.o hit.o map.o format.o pe.o seed.o esterr.o splitidx.o \
 5 | 			ksw2_extz2_simde.o ksw2_extd2_simde.o ksw2_exts2_simde.o ksw2_ll_simde.o
 6 | PROG=		minimap2
 7 | PROG_EXTRA=	sdust minimap2-lite
 8 | LIBS=		-lm -lz -lpthread
 9 | 
10 | 
11 | ifneq ($(arm_neon),) # if arm_neon is defined
12 | ifeq ($(aarch64),)   #if aarch64 is not defined
13 | 	CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
14 | else                 #if aarch64 is defined
15 | 	CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
16 | endif
17 | endif
18 | 
19 | ifneq ($(asan),)
20 | 	CFLAGS+=-fsanitize=address
21 | 	LIBS+=-fsanitize=address
22 | endif
23 | 
24 | ifneq ($(tsan),)
25 | 	CFLAGS+=-fsanitize=thread
26 | 	LIBS+=-fsanitize=thread
27 | endif
28 | 
29 | .PHONY:all extra clean depend
30 | .SUFFIXES:.c .o
31 | 
32 | .c.o:
33 | 		$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
34 | 
35 | all:$(PROG)
36 | 
37 | extra:all $(PROG_EXTRA)
38 | 
39 | minimap2:main.o libminimap2.a
40 | 		$(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
41 | 
42 | minimap2-lite:example.o libminimap2.a
43 | 		$(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
44 | 
45 | libminimap2.a:$(OBJS)
46 | 		$(AR) -csru $@ $(OBJS)
47 | 
48 | sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
49 | 		$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
50 | 
51 | ksw2_ll_simde.o:ksw2_ll_sse.c ksw2.h kalloc.h
52 | 		$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
53 | 
54 | ksw2_extz2_simde.o:ksw2_extz2_sse.c ksw2.h kalloc.h
55 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) $(INCLUDES) $< -o $@
56 | 
57 | ksw2_extd2_simde.o:ksw2_extd2_sse.c ksw2.h kalloc.h
58 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) $(INCLUDES) $< -o $@
59 | 
60 | ksw2_exts2_simde.o:ksw2_exts2_sse.c ksw2.h kalloc.h
61 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) $(INCLUDES) $< -o $@
62 | 
63 | # other non-file targets
64 | 
65 | clean:
66 | 		rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
67 | 
68 | depend:
69 | 		(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
70 | 
71 | # DO NOT DELETE
72 | 
73 | align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h
74 | bseq.o: bseq.h kvec.h kalloc.h kseq.h
75 | chain.o: minimap.h mmpriv.h bseq.h kseq.h kalloc.h
76 | esterr.o: mmpriv.h minimap.h bseq.h kseq.h
77 | example.o: minimap.h kseq.h
78 | format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
79 | hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
80 | index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
81 | index.o: ksort.h
82 | kalloc.o: kalloc.h
83 | ksw2_extd2_sse.o: ksw2.h kalloc.h
84 | ksw2_exts2_sse.o: ksw2.h kalloc.h
85 | ksw2_extz2_sse.o: ksw2.h kalloc.h
86 | ksw2_ll_sse.o: ksw2.h kalloc.h
87 | kthread.o: kthread.h
88 | main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
89 | map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
90 | map.o: khash.h ksort.h
91 | misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
92 | options.o: mmpriv.h minimap.h bseq.h kseq.h
93 | pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
94 | sdust.o: kalloc.h kdq.h kvec.h sdust.h
95 | self-chain.o: minimap.h kseq.h
96 | sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
97 | splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
98 | 


--------------------------------------------------------------------------------
/kvec.h:
--------------------------------------------------------------------------------
  1 | /* The MIT License
  2 | 
  3 |    Copyright (c) 2008, by Attractive Chaos <attractor@live.co.uk>
  4 | 
  5 |    Permission is hereby granted, free of charge, to any person obtaining
  6 |    a copy of this software and associated documentation files (the
  7 |    "Software"), to deal in the Software without restriction, including
  8 |    without limitation the rights to use, copy, modify, merge, publish,
  9 |    distribute, sublicense, and/or sell copies of the Software, and to
 10 |    permit persons to whom the Software is furnished to do so, subject to
 11 |    the following conditions:
 12 | 
 13 |    The above copyright notice and this permission notice shall be
 14 |    included in all copies or substantial portions of the Software.
 15 | 
 16 |    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 |    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 20 |    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 |    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 22 |    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 |    SOFTWARE.
 24 | */
 25 | 
 26 | /*
 27 |   An example:
 28 | 
 29 | #include "kvec.h"
 30 | int main() {
 31 | 	kvec_t(int) array;
 32 | 	kv_init(array);
 33 | 	kv_push(int, array, 10); // append
 34 | 	kv_a(int, array, 20) = 5; // dynamic
 35 | 	kv_A(array, 20) = 4; // static
 36 | 	kv_destroy(array);
 37 | 	return 0;
 38 | }
 39 | */
 40 | 
 41 | /*
 42 |   2008-09-22 (0.1.0):
 43 | 
 44 | 	* The initial version.
 45 | 
 46 | */
 47 | 
 48 | #ifndef AC_KVEC_H
 49 | #define AC_KVEC_H
 50 | 
 51 | #include <stdlib.h>
 52 | #include "kalloc.h"
 53 | 
 54 | #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 55 | 
 56 | #define kvec_t(type) struct { size_t n, m; type *a; }
 57 | #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
 58 | #define kv_destroy(v) free((v).a)
 59 | #define kv_A(v, i) ((v).a[(i)])
 60 | #define kv_pop(v) ((v).a[--(v).n])
 61 | #define kv_size(v) ((v).n)
 62 | #define kv_max(v) ((v).m)
 63 | 
 64 | #define kv_resize(type, km, v, s) do { \
 65 | 		if ((v).m < (s)) { \
 66 | 			(v).m = (s); \
 67 | 			kv_roundup32((v).m); \
 68 | 			(v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \
 69 | 		} \
 70 | 	} while (0)
 71 | 
 72 | #define kv_copy(type, km, v1, v0) do { \
 73 | 		if ((v1).m < (v0).n) kv_resize(type, (km), (v1), (v0).n); \
 74 | 		(v1).n = (v0).n; \
 75 | 		memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \
 76 | 	} while (0) \
 77 | 
 78 | #define kv_push(type, km, v, x) do { \
 79 | 		if ((v).n == (v).m) { \
 80 | 			(v).m = (v).m? (v).m<<1 : 2; \
 81 | 			(v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \
 82 | 		} \
 83 | 		(v).a[(v).n++] = (x); \
 84 | 	} while (0)
 85 | 
 86 | #define kv_pushp(type, km, v, p) do { \
 87 | 		if ((v).n == (v).m) { \
 88 | 			(v).m = (v).m? (v).m<<1 : 2; \
 89 | 			(v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \
 90 | 		} \
 91 | 		*(p) = &(v).a[(v).n++]; \
 92 | 	} while (0)
 93 | 
 94 | #define kv_reverse(type, v, start) do { \
 95 | 		if ((v).m > 0 && (v).n > (start)) { \
 96 | 			size_t __i, __end = (v).n - (start); \
 97 | 			type *__a = (v).a + (start); \
 98 | 			for (__i = 0; __i < __end>>1; ++__i) { \
 99 | 				type __t = __a[__end - 1 - __i]; \
100 | 				__a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \
101 | 			} \
102 | 		} \
103 | 	} while (0)
104 | 
105 | #endif
106 | 


--------------------------------------------------------------------------------
/gpu/debug.h:
--------------------------------------------------------------------------------
 1 | #ifndef __DEBUG_H__
 2 | #define __DEBUG_H__
 3 | #include "plutils.h"
 4 | #include "mmpriv.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | const char debug_folder[] = "debug";
11 | 
12 | // #define ITER_LIMIT 10000
13 | // #define MAX_READ_NUM 100000
14 | // #define MEM_CPU (96-6) // 96 - 6 GB for possible exceed read
15 | // #define MEM_GPU (16-4) // 16 - 4 GB as memory pool = 16760832(0xffc000) KB
16 | // #define SATURATE_FACTOR (0.7) // NOTE: how much portion of cpu memory shall be allocated, < 1
17 | 
18 | 
19 | // /* Input File Names: Set by command line arguments in main.c */
20 | // extern char input_filename[];   // plaintxt chaining inputs & score
21 | // extern char range_infile[];     // plaintxt range 
22 | // extern char binary_file[];      // binary chaining inputs & score
23 | // extern char binary_range[];     // binary range
24 | 
25 | #ifndef DEBUG_CHECK
26 | #define ASSERT(X) 
27 | // Chaining Debug Checker: checks chaining score against input. 
28 | #elif DEBUG_CHECK
29 | #define ASSERT(X) assert(X)
30 | // Read score from file for comparison
31 | int debug_read_score(const char input_filename[], chain_read_t *in, void *km);
32 | int debug_build_score(chain_read_t *in, void *km);
33 | 
34 | // Check score
35 | int debug_check_score(const int64_t *p, const int32_t *f, const int64_t *p_gold,
36 |                       const int32_t *f_gold, int64_t n, char* qname);
37 | void debug_check_range(const int32_t* range, size_t n);
38 | int debug_check_cut(const size_t *cut, const int32_t *range, size_t max_cut,
39 |                     size_t n, size_t offset);
40 | 
41 | #ifdef DEBUG_CHECK_FORCE
42 | mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip,
43 |                       int max_iter, int min_cnt, int min_sc, float chn_pen_gap,
44 |                       float chn_pen_skip, int is_cdna, int n_seg,
45 |                       int64_t n,   // NOTE: n is number of anchors
46 |                       mm128_t *a,  // NOTE: a is ptr to anchors.
47 |                       int *n_u_, uint64_t **_u, void *km, chain_read_t *input,
48 |                       int32_t *f_, int64_t *p_
49 | );
50 | #endif  // DEBUG_CHECK_FORCE
51 | 
52 | 
53 | 
54 | // Analyze Distribution
55 | 
56 | void debug_cal_long_seg_range_dis(size_t total_n, size_t num_cut, int32_t* range);
57 | void debug_cal_mid_range_dis(size_t total_n, size_t num_cut, int32_t *range);
58 | void debug_cal_range_dis(size_t total_n, size_t num_cut, int32_t *range);
59 | void debug_cal_sc_pair_density(size_t total_n, size_t num_cut, size_t* cut, int32_t* range);
60 | #endif  // DEBUG_CHECK
61 | 
62 | #ifdef DEBUG_VERBOSE
63 |     // Print Input Files
64 |     void debug_output_anchors(const char debug_folder[], chain_read_t *in);
65 | void debug_output_score(const char debug_folder[], chain_read_t *in);
66 | void debug_output_meta(const char debug_folder[], input_meta_t *meta);
67 | 
68 | void debug_print_successor_range(int32_t *range, int64_t n);
69 | int debug_print_cut(const size_t *cut, size_t max_cut, size_t n, size_t offset, char* qname);
70 | void debug_print_score(const int64_t *p, const int32_t *score, int64_t n);
71 | void debug_print_score_rel_p(const uint16_t *p, const int32_t *score, int64_t n);
72 | void debug_print_chain(mm128_t* a, uint64_t *u, int32_t n_u, char* qname);
73 | void debug_print_regs(mm_reg1_t *regs, int n_u, char *qname);
74 | void debug_print_segs(seg_t *segs, chain_read_t *reads, int num_segs, int num_reads);
75 | void debug_check_anchors(seg_t* segs, int num_segs, int32_t* ax_aggregated, int32_t* ax);
76 | #endif  // DEBUG_VERBOSE
77 | 
78 | #ifdef __cplusplus
79 | }
80 | #endif
81 | 
82 | #endif// __DEBUG_H__
83 | 


--------------------------------------------------------------------------------
/python/cmappy.pxd:
--------------------------------------------------------------------------------
  1 | from libc.stdint cimport int8_t, uint8_t, int32_t, int64_t, uint32_t, uint64_t
  2 | 
  3 | cdef extern from "minimap.h":
  4 | 	#
  5 | 	# Options
  6 | 	#
  7 | 	ctypedef struct mm_idxopt_t:
  8 | 		short k, w, flag, bucket_bits
  9 | 		int64_t mini_batch_size
 10 | 		uint64_t batch_size
 11 | 
 12 | 	ctypedef struct mm_mapopt_t:
 13 | 		int64_t flag
 14 | 		int seed
 15 | 		int sdust_thres
 16 | 
 17 | 		int max_qlen
 18 | 
 19 | 		int bw, bw_long
 20 | 		int max_gap, max_gap_ref
 21 | 		int max_frag_len
 22 | 		int max_chain_skip, max_chain_iter
 23 | 		int min_cnt
 24 | 		int min_chain_score
 25 | 		float chain_gap_scale
 26 | 		float chain_skip_scale
 27 | 		int rmq_size_cap, rmq_inner_dist
 28 | 		int rmq_rescue_size
 29 | 		float rmq_rescue_ratio
 30 | 
 31 | 		float mask_level
 32 | 		int mask_len
 33 | 		float pri_ratio
 34 | 		int best_n
 35 | 
 36 | 		float alt_drop
 37 | 
 38 | 		int a, b, q, e, q2, e2
 39 | 		int sc_ambi
 40 | 		int noncan
 41 | 		int junc_bonus
 42 | 		int zdrop, zdrop_inv
 43 | 		int end_bonus
 44 | 		int min_dp_max
 45 | 		int min_ksw_len
 46 | 		int anchor_ext_len, anchor_ext_shift
 47 | 		float max_clip_ratio
 48 | 
 49 | 		int rank_min_len
 50 | 		float rank_frac
 51 | 
 52 | 		int pe_ori, pe_bonus
 53 | 
 54 | 		float mid_occ_frac
 55 | 		float q_occ_frac
 56 | 		int32_t min_mid_occ
 57 | 		int32_t mid_occ
 58 | 		int32_t max_occ
 59 | 		int64_t mini_batch_size
 60 | 		int64_t max_sw_mat
 61 | 		int64_t cap_kalloc
 62 | 
 63 | 		const char *split_prefix
 64 | 
 65 | 	int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
 66 | 	int mm_verbose
 67 | 
 68 | 	#
 69 | 	# Indexing
 70 | 	#
 71 | 	ctypedef struct mm_idx_seq_t:
 72 | 		char *name
 73 | 		uint64_t offset
 74 | 		uint32_t len
 75 | 
 76 | 	ctypedef struct mm_idx_bucket_t:
 77 | 		pass
 78 | 
 79 | 	ctypedef struct mm_idx_t:
 80 | 		int32_t b, w, k, flag
 81 | 		uint32_t n_seq
 82 | 		mm_idx_seq_t *seq
 83 | 		uint32_t *S
 84 | 		mm_idx_bucket_t *B
 85 | 		void *km
 86 | 		void *h
 87 | 
 88 | 	ctypedef struct mm_idx_reader_t:
 89 | 		pass
 90 | 
 91 | 	mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out)
 92 | 	mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads)
 93 | 	void mm_idx_reader_close(mm_idx_reader_t *r)
 94 | 	void mm_idx_destroy(mm_idx_t *mi)
 95 | 	void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
 96 | 
 97 | 	int mm_idx_index_name(mm_idx_t *mi)
 98 | 
 99 | 	#
100 | 	# Mapping (key struct defined in cmappy.h below)
101 | 	#
102 | 	ctypedef struct mm_reg1_t:
103 | 		pass
104 | 
105 | 	ctypedef struct mm_tbuf_t:
106 | 		pass
107 | 
108 | 	mm_tbuf_t *mm_tbuf_init()
109 | 	void mm_tbuf_destroy(mm_tbuf_t *b)
110 | 	void *mm_tbuf_get_km(mm_tbuf_t *b)
111 | 	int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden)
112 | 	int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq)
113 | 
114 | #
115 | # Helper header (because it is hard to expose mm_reg1_t with Cython)
116 | #
117 | cdef extern from "cmappy.h":
118 | 	ctypedef struct mm_hitpy_t:
119 | 		const char *ctg
120 | 		int32_t ctg_start, ctg_end
121 | 		int32_t qry_start, qry_end
122 | 		int32_t blen, mlen, NM, ctg_len
123 | 		uint8_t mapq, is_primary
124 | 		int8_t strand, trans_strand
125 | 		int32_t seg_id
126 | 		int32_t n_cigar32
127 | 		uint32_t *cigar32
128 | 
129 | 	void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
130 | 	void mm_free_reg1(mm_reg1_t *r)
131 | 	mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
132 | 	char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l)
133 | 	mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l)
134 | 
135 | 	ctypedef struct kstring_t:
136 | 		unsigned l, m
137 | 		char *s
138 | 
139 | 	ctypedef struct kstream_t:
140 | 		pass
141 | 
142 | 	ctypedef struct kseq_t:
143 | 		kstring_t name, comment, seq, qual
144 | 		int last_char
145 | 		kstream_t *f
146 | 
147 | 	kseq_t *mm_fastx_open(const char *fn)
148 | 	void mm_fastx_close(kseq_t *ks)
149 | 	int kseq_read(kseq_t *seq)
150 | 
151 | 	char *mappy_revcomp(int l, const uint8_t *seq)
152 | 	int mm_verbose_level(int v)
153 | 	void mm_reset_timer()
154 | 


--------------------------------------------------------------------------------
/python/cmappy.h:
--------------------------------------------------------------------------------
  1 | #ifndef CMAPPY_H
  2 | #define CMAPPY_H
  3 | 
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | #include <zlib.h>
  7 | #include "minimap.h"
  8 | #include "kseq.h"
  9 | KSEQ_DECLARE(gzFile)
 10 | 
 11 | typedef struct {
 12 | 	const char *ctg;
 13 | 	int32_t ctg_start, ctg_end;
 14 | 	int32_t qry_start, qry_end;
 15 | 	int32_t blen, mlen, NM, ctg_len;
 16 | 	uint8_t mapq, is_primary;
 17 | 	int8_t strand, trans_strand;
 18 | 	int32_t seg_id;
 19 | 	int32_t n_cigar32;
 20 | 	uint32_t *cigar32;
 21 | } mm_hitpy_t;
 22 | 
 23 | static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h)
 24 | {
 25 | 	h->ctg = mi->seq[r->rid].name;
 26 | 	h->ctg_len = mi->seq[r->rid].len;
 27 | 	h->ctg_start = r->rs, h->ctg_end = r->re;
 28 | 	h->qry_start = r->qs, h->qry_end = r->qe;
 29 | 	h->strand = r->rev? -1 : 1;
 30 | 	h->mapq = r->mapq;
 31 | 	h->mlen = r->mlen;
 32 | 	h->blen = r->blen;
 33 | 	h->NM = r->blen - r->mlen + r->p->n_ambi;
 34 | 	h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0;
 35 | 	h->is_primary = (r->id == r->parent);
 36 | 	h->seg_id = r->seg_id;
 37 | 	h->n_cigar32 = r->p->n_cigar;
 38 | 	h->cigar32 = r->p->cigar;
 39 | }
 40 | 
 41 | static inline void mm_free_reg1(mm_reg1_t *r)
 42 | {
 43 | 	free(r->p);
 44 | }
 45 | 
 46 | static inline kseq_t *mm_fastx_open(const char *fn)
 47 | {
 48 | 	gzFile fp;
 49 | 	fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
 50 | 	return kseq_init(fp);
 51 | }
 52 | 
 53 | static inline void mm_fastx_close(kseq_t *ks)
 54 | {
 55 | 	gzFile fp;
 56 | 	fp = ks->f->f;
 57 | 	kseq_destroy(ks);
 58 | 	gzclose(fp);
 59 | }
 60 | 
 61 | static inline int mm_verbose_level(int v)
 62 | {
 63 | 	if (v >= 0) mm_verbose = v;
 64 | 	return mm_verbose;
 65 | }
 66 | 
 67 | static inline void mm_reset_timer(void)
 68 | {
 69 | 	extern double realtime(void);
 70 | 	mm_realtime0 = realtime();
 71 | }
 72 | 
 73 | extern unsigned char seq_comp_table[256];
 74 | static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt)
 75 | {
 76 | 	mm_reg1_t *r;
 77 | 
 78 | 	Py_BEGIN_ALLOW_THREADS
 79 | 	if (seq2 == 0) {
 80 | 		r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL);
 81 | 	} else {
 82 | 		int _n_regs[2];
 83 | 		mm_reg1_t *regs[2];
 84 | 		char *seq[2];
 85 | 		int i, len[2];
 86 | 
 87 | 		len[0] = strlen(seq1);
 88 | 		len[1] = strlen(seq2);
 89 | 		seq[0] = (char*)seq1;
 90 | 		seq[1] = strdup(seq2);
 91 | 		for (i = 0; i < len[1]>>1; ++i) {
 92 | 			int t = seq[1][len[1] - i - 1];
 93 | 			seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]];
 94 | 			seq[1][i] = seq_comp_table[t];
 95 | 		}
 96 | 		if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]];
 97 | 		mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL);
 98 | 		for (i = 0; i < _n_regs[1]; ++i)
 99 | 			regs[1][i].rev = !regs[1][i].rev;
100 | 		*n_regs = _n_regs[0] + _n_regs[1];
101 | 		regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs));
102 | 		memcpy(&regs[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t));
103 | 		free(regs[1]);
104 | 		r = regs[0];
105 | 	}
106 | 	Py_END_ALLOW_THREADS
107 | 
108 | 	return r;
109 | }
110 | 
111 | static inline char *mappy_revcomp(int len, const uint8_t *seq)
112 | {
113 | 	int i;
114 | 	char *rev;
115 | 	rev = (char*)malloc(len + 1);
116 | 	for (i = 0; i < len; ++i)
117 | 		rev[len - i - 1] = seq_comp_table[seq[i]];
118 | 	rev[len] = 0;
119 | 	return rev;
120 | }
121 | 
122 | static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len)
123 | {
124 | 	int i, rid;
125 | 	char *s;
126 | 	*len = 0;
127 | 	rid = mm_idx_name2id(mi, name);
128 | 	if (rid < 0) return 0;
129 | 	if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0;
130 | 	if (en < 0 || (uint32_t)en > mi->seq[rid].len)
131 | 		en = mi->seq[rid].len;
132 | 	s = (char*)malloc(en - st + 1);
133 | 	*len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s);
134 | 	for (i = 0; i < *len; ++i)
135 | 		s[i] = "ACGTN"[(uint8_t)s[i]];
136 | 	s[*len] = 0;
137 | 	return s;
138 | }
139 | 
140 | static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len)
141 | {
142 | 	const char *fake_name = "N/A";
143 | 	char *s;
144 | 	mm_idx_t *mi;
145 | 	s = (char*)calloc(len + 1, 1);
146 | 	memcpy(s, seq, len);
147 | 	mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name);
148 | 	free(s);
149 | 	return mi;
150 | }
151 | 
152 | #endif
153 | 


--------------------------------------------------------------------------------
/ketopt.h:
--------------------------------------------------------------------------------
  1 | #ifndef KETOPT_H
  2 | #define KETOPT_H
  3 | 
  4 | #include <string.h> /* for strchr() and strncmp() */
  5 | 
  6 | #define ko_no_argument       0
  7 | #define ko_required_argument 1
  8 | #define ko_optional_argument 2
  9 | 
 10 | typedef struct {
 11 | 	int ind;   /* equivalent to optind */
 12 | 	int opt;   /* equivalent to optopt */
 13 | 	char *arg; /* equivalent to optarg */
 14 | 	int longidx; /* index of a long option; or -1 if short */
 15 | 	/* private variables not intended for external uses */
 16 | 	int i, pos, n_args;
 17 | } ketopt_t;
 18 | 
 19 | typedef struct {
 20 | 	char *name;
 21 | 	int has_arg;
 22 | 	int val;
 23 | } ko_longopt_t;
 24 | 
 25 | static ketopt_t KETOPT_INIT = { 1, 0, 0, -1, 1, 0, 0 };
 26 | 
 27 | static void ketopt_permute(char *argv[], int j, int n) /* move argv[j] over n elements to the left */
 28 | {
 29 | 	int k;
 30 | 	char *p = argv[j];
 31 | 	for (k = 0; k < n; ++k)
 32 | 		argv[j - k] = argv[j - k - 1];
 33 | 	argv[j - k] = p;
 34 | }
 35 | 
 36 | /**
 37 |  * Parse command-line options and arguments
 38 |  *
 39 |  * This fuction has a similar interface to GNU's getopt_long(). Each call
 40 |  * parses one option and returns the option name.  s->arg points to the option
 41 |  * argument if present. The function returns -1 when all command-line arguments
 42 |  * are parsed. In this case, s->ind is the index of the first non-option
 43 |  * argument.
 44 |  *
 45 |  * @param s         status; shall be initialized to KETOPT_INIT on the first call
 46 |  * @param argc      length of argv[]
 47 |  * @param argv      list of command-line arguments; argv[0] is ignored
 48 |  * @param permute   non-zero to move options ahead of non-option arguments
 49 |  * @param ostr      option string
 50 |  * @param longopts  long options
 51 |  *
 52 |  * @return ASCII for a short option; ko_longopt_t::val for a long option; -1 if
 53 |  *         argv[] is fully processed; '?' for an unknown option or an ambiguous
 54 |  *         long option; ':' if an option argument is missing
 55 |  */
 56 | static int ketopt(ketopt_t *s, int argc, char *argv[], int permute, const char *ostr, const ko_longopt_t *longopts)
 57 | {
 58 | 	int opt = -1, i0, j;
 59 | 	if (permute) {
 60 | 		while (s->i < argc && (argv[s->i][0] != '-' || argv[s->i][1] == '\0'))
 61 | 			++s->i, ++s->n_args;
 62 | 	}
 63 | 	s->arg = 0, s->longidx = -1, i0 = s->i;
 64 | 	if (s->i >= argc || argv[s->i][0] != '-' || argv[s->i][1] == '\0') {
 65 | 		s->ind = s->i - s->n_args;
 66 | 		return -1;
 67 | 	}
 68 | 	if (argv[s->i][0] == '-' && argv[s->i][1] == '-') { /* "--" or a long option */
 69 | 		if (argv[s->i][2] == '\0') { /* a bare "--" */
 70 | 			ketopt_permute(argv, s->i, s->n_args);
 71 | 			++s->i, s->ind = s->i - s->n_args;
 72 | 			return -1;
 73 | 		}
 74 | 		s->opt = 0, opt = '?', s->pos = -1;
 75 | 		if (longopts) { /* parse long options */
 76 | 			int k, n_exact = 0, n_partial = 0;
 77 | 			const ko_longopt_t *o = 0, *o_exact = 0, *o_partial = 0;
 78 | 			for (j = 2; argv[s->i][j] != '\0' && argv[s->i][j] != '='; ++j) {} /* find the end of the option name */
 79 | 			for (k = 0; longopts[k].name != 0; ++k)
 80 | 				if (strncmp(&argv[s->i][2], longopts[k].name, j - 2) == 0) {
 81 | 					if (longopts[k].name[j - 2] == 0) ++n_exact, o_exact = &longopts[k];
 82 | 					else ++n_partial, o_partial = &longopts[k];
 83 | 				}
 84 | 			if (n_exact > 1 || (n_exact == 0 && n_partial > 1)) return '?';
 85 | 			o = n_exact == 1? o_exact : n_partial == 1? o_partial : 0;
 86 | 			if (o) {
 87 | 				s->opt = opt = o->val, s->longidx = o - longopts;
 88 | 				if (argv[s->i][j] == '=') s->arg = &argv[s->i][j + 1];
 89 | 				if (o->has_arg == 1 && argv[s->i][j] == '\0') {
 90 | 					if (s->i < argc - 1) s->arg = argv[++s->i];
 91 | 					else opt = ':'; /* missing option argument */
 92 | 				}
 93 | 			}
 94 | 		}
 95 | 	} else { /* a short option */
 96 | 		char *p;
 97 | 		if (s->pos == 0) s->pos = 1;
 98 | 		opt = s->opt = argv[s->i][s->pos++];
 99 | 		p = strchr((char*)ostr, opt);
100 | 		if (p == 0) {
101 | 			opt = '?'; /* unknown option */
102 | 		} else if (p[1] == ':') {
103 | 			if (argv[s->i][s->pos] == 0) {
104 | 				if (s->i < argc - 1) s->arg = argv[++s->i];
105 | 				else opt = ':'; /* missing option argument */
106 | 			} else s->arg = &argv[s->i][s->pos];
107 | 			s->pos = -1;
108 | 		}
109 | 	}
110 | 	if (s->pos < 0 || argv[s->i][s->pos] == 0) {
111 | 		++s->i, s->pos = 0;
112 | 		if (s->n_args > 0) /* permute */
113 | 			for (j = i0; j < s->i; ++j)
114 | 				ketopt_permute(argv, j, s->n_args);
115 | 	}
116 | 	s->ind = s->i - s->n_args;
117 | 	return opt;
118 | }
119 | 
120 | #endif
121 | 


--------------------------------------------------------------------------------
/seed.c:
--------------------------------------------------------------------------------
  1 | #include "mmpriv.h"
  2 | #include "kalloc.h"
  3 | #include "ksort.h"
  4 | 
  5 | void mm_seed_mz_flt(void *km, mm128_v *mv, int32_t q_occ_max, float q_occ_frac)
  6 | {
  7 | 	mm128_t *a;
  8 | 	size_t i, j, st;
  9 | 	if (mv->n <= q_occ_max || q_occ_frac <= 0.0f || q_occ_max <= 0) return;
 10 | 	KMALLOC(km, a, mv->n);
 11 | 	for (i = 0; i < mv->n; ++i)
 12 | 		a[i].x = mv->a[i].x, a[i].y = i;
 13 | 	radix_sort_128x(a, a + mv->n);
 14 | 	for (st = 0, i = 1; i <= mv->n; ++i) {
 15 | 		if (i == mv->n || a[i].x != a[st].x) {
 16 | 			int32_t cnt = i - st;
 17 | 			if (cnt > q_occ_max && cnt > mv->n * q_occ_frac)
 18 | 				for (j = st; j < i; ++j)
 19 | 					mv->a[a[j].y].x = 0;
 20 | 			st = i;
 21 | 		}
 22 | 	}
 23 | 	kfree(km, a);
 24 | 	for (i = j = 0; i < mv->n; ++i)
 25 | 		if (mv->a[i].x != 0)
 26 | 			mv->a[j++] = mv->a[i];
 27 | 	mv->n = j;
 28 | }
 29 | 
 30 | mm_seed_t *mm_seed_collect_all(void *km, const mm_idx_t *mi, const mm128_v *mv, int32_t *n_m_)
 31 | {
 32 | 	mm_seed_t *m;
 33 | 	size_t i;
 34 | 	int32_t k;
 35 | 	m = (mm_seed_t*)kmalloc(km, mv->n * sizeof(mm_seed_t));
 36 | 	for (i = k = 0; i < mv->n; ++i) {
 37 | 		const uint64_t *cr;
 38 | 		mm_seed_t *q;
 39 | 		mm128_t *p = &mv->a[i];
 40 | 		uint32_t q_pos = (uint32_t)p->y, q_span = p->x & 0xff;
 41 | 		int t;
 42 | 		cr = mm_idx_get(mi, p->x>>8, &t);
 43 | 		if (t == 0) continue;
 44 | 		q = &m[k++];
 45 | 		q->q_pos = q_pos, q->q_span = q_span, q->cr = cr, q->n = t, q->seg_id = p->y >> 32;
 46 | 		q->is_tandem = q->flt = 0;
 47 | 		if (i > 0 && p->x>>8 == mv->a[i - 1].x>>8) q->is_tandem = 1;
 48 | 		if (i < mv->n - 1 && p->x>>8 == mv->a[i + 1].x>>8) q->is_tandem = 1;
 49 | 	}
 50 | 	*n_m_ = k;
 51 | 	return m;
 52 | }
 53 | 
 54 | #define MAX_MAX_HIGH_OCC 128
 55 | 
 56 | void mm_seed_select(int32_t n, mm_seed_t *a, int len, int max_occ, int max_max_occ, int dist)
 57 | { // for high-occ minimizers, choose up to max_high_occ in each high-occ streak
 58 | 	extern void ks_heapdown_uint64_t(size_t i, size_t n, uint64_t*);
 59 | 	extern void ks_heapmake_uint64_t(size_t n, uint64_t*);
 60 | 	int32_t i, last0, m;
 61 | 	uint64_t b[MAX_MAX_HIGH_OCC]; // this is to avoid a heap allocation
 62 | 
 63 | 	if (n == 0 || n == 1) return;
 64 | 	for (i = m = 0; i < n; ++i)
 65 | 		if (a[i].n > max_occ) ++m;
 66 | 	if (m == 0) return; // no high-frequency k-mers; do nothing
 67 | 	for (i = 0, last0 = -1; i <= n; ++i) {
 68 | 		if (i == n || a[i].n <= max_occ) {
 69 | 			if (i - last0 > 1) {
 70 | 				int32_t ps = last0 < 0? 0 : (uint32_t)a[last0].q_pos>>1;
 71 | 				int32_t pe = i == n? len : (uint32_t)a[i].q_pos>>1;
 72 | 				int32_t j, k, st = last0 + 1, en = i;
 73 | 				int32_t max_high_occ = (int32_t)((double)(pe - ps) / dist + .499);
 74 | 				if (max_high_occ > 0) {
 75 | 					if (max_high_occ > MAX_MAX_HIGH_OCC)
 76 | 						max_high_occ = MAX_MAX_HIGH_OCC;
 77 | 					for (j = st, k = 0; j < en && k < max_high_occ; ++j, ++k)
 78 | 						b[k] = (uint64_t)a[j].n<<32 | j;
 79 | 					ks_heapmake_uint64_t(k, b); // initialize the binomial heap
 80 | 					for (; j < en; ++j) { // if there are more, choose top max_high_occ
 81 | 						if (a[j].n < (int32_t)(b[0]>>32)) { // then update the heap
 82 | 							b[0] = (uint64_t)a[j].n<<32 | j;
 83 | 							ks_heapdown_uint64_t(0, k, b);
 84 | 						}
 85 | 					}
 86 | 					for (j = 0; j < k; ++j) a[(uint32_t)b[j]].flt = 1;
 87 | 				}
 88 | 				for (j = st; j < en; ++j) a[j].flt ^= 1;
 89 | 				for (j = st; j < en; ++j)
 90 | 					if (a[j].n > max_max_occ)
 91 | 						a[j].flt = 1;
 92 | 			}
 93 | 			last0 = i;
 94 | 		}
 95 | 	}
 96 | }
 97 | 
 98 | mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos)
 99 | {
100 | 	int rep_st = 0, rep_en = 0, n_m, n_m0;
101 | 	size_t i;
102 | 	mm_seed_t *m;
103 | 	*n_mini_pos = 0;
104 | 	*mini_pos = (uint64_t*)kmalloc(km, mv->n * sizeof(uint64_t));
105 | 	m = mm_seed_collect_all(km, mi, mv, &n_m0);
106 | 	if (dist > 0 && max_max_occ > max_occ) {
107 | 		mm_seed_select(n_m0, m, qlen, max_occ, max_max_occ, dist);
108 | 	} else {
109 | 		for (i = 0; i < n_m0; ++i)
110 | 			if (m[i].n > max_occ)
111 | 				m[i].flt = 1;
112 | 	}
113 | 	for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < n_m0; ++i) {
114 | 		mm_seed_t *q = &m[i];
115 | 		//fprintf(stderr, "X\t%d\t%d\t%d\n", q->q_pos>>1, q->n, q->flt);
116 | 		if (q->flt) {
117 | 			int en = (q->q_pos >> 1) + 1, st = en - q->q_span;
118 | 			if (st > rep_en) {
119 | 				*rep_len += rep_en - rep_st;
120 | 				rep_st = st, rep_en = en;
121 | 			} else rep_en = en;
122 | 		} else {
123 | 			*n_a += q->n;
124 | 			(*mini_pos)[(*n_mini_pos)++] = (uint64_t)q->q_span<<32 | q->q_pos>>1;
125 | 			m[n_m++] = *q;
126 | 		}
127 | 	}
128 | 	*rep_len += rep_en - rep_st;
129 | 	*_n_m = n_m;
130 | 	return m;
131 | }
132 | 


--------------------------------------------------------------------------------
/ksw2_dispatch.c:
--------------------------------------------------------------------------------
 1 | #ifdef KSW_CPU_DISPATCH
 2 | #include <stdlib.h>
 3 | #include "ksw2.h"
 4 | 
 5 | #define SIMD_SSE     0x1
 6 | #define SIMD_SSE2    0x2
 7 | #define SIMD_SSE3    0x4
 8 | #define SIMD_SSSE3   0x8
 9 | #define SIMD_SSE4_1  0x10
10 | #define SIMD_SSE4_2  0x20
11 | #define SIMD_AVX     0x40
12 | #define SIMD_AVX2    0x80
13 | #define SIMD_AVX512F 0x100
14 | 
15 | #ifndef _MSC_VER
16 | // adapted from https://github.com/01org/linux-sgx/blob/master/common/inc/internal/linux/cpuid_gnu.h
17 | void __cpuidex(int cpuid[4], int func_id, int subfunc_id)
18 | {
19 | #if defined(__x86_64__)
20 | 	__asm__ volatile ("cpuid"
21 | 			: "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
22 | 			: "0" (func_id), "2" (subfunc_id));
23 | #else // on 32bit, ebx can NOT be used as PIC code
24 | 	__asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
25 | 			: "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
26 | 			: "0" (func_id), "2" (subfunc_id));
27 | #endif
28 | }
29 | #endif
30 | 
31 | static int ksw_simd = -1;
32 | 
33 | static int x86_simd(void)
34 | {
35 | 	int flag = 0, cpuid[4], max_id;
36 | 	__cpuidex(cpuid, 0, 0);
37 | 	max_id = cpuid[0];
38 | 	if (max_id == 0) return 0;
39 | 	__cpuidex(cpuid, 1, 0);
40 | 	if (cpuid[3]>>25&1) flag |= SIMD_SSE;
41 | 	if (cpuid[3]>>26&1) flag |= SIMD_SSE2;
42 | 	if (cpuid[2]>>0 &1) flag |= SIMD_SSE3;
43 | 	if (cpuid[2]>>9 &1) flag |= SIMD_SSSE3;
44 | 	if (cpuid[2]>>19&1) flag |= SIMD_SSE4_1;
45 | 	if (cpuid[2]>>20&1) flag |= SIMD_SSE4_2;
46 | 	if (cpuid[2]>>28&1) flag |= SIMD_AVX;
47 | 	if (max_id >= 7) {
48 | 		__cpuidex(cpuid, 7, 0);
49 | 		if (cpuid[1]>>5 &1) flag |= SIMD_AVX2;
50 | 		if (cpuid[1]>>16&1) flag |= SIMD_AVX512F;
51 | 	}
52 | 	return flag;
53 | }
54 | 
55 | void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
56 | {
57 | 	extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
58 | 	extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
59 | 	if (ksw_simd < 0) ksw_simd = x86_simd();
60 | 	if (ksw_simd & SIMD_SSE4_1)
61 | 		ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, end_bonus, flag, ez);
62 | 	else if (ksw_simd & SIMD_SSE2)
63 | 		ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, end_bonus, flag, ez);
64 | 	else abort();
65 | }
66 | 
67 | void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
68 | 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
69 | {
70 | 	extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
71 | 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
72 | 	extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
73 | 				   int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
74 | 	if (ksw_simd < 0) ksw_simd = x86_simd();
75 | 	if (ksw_simd & SIMD_SSE4_1)
76 | 		ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, end_bonus, flag, ez);
77 | 	else if (ksw_simd & SIMD_SSE2)
78 | 		ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, end_bonus, flag, ez);
79 | 	else abort();
80 | }
81 | 
82 | void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
83 | 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez)
84 | {
85 | 	extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
86 | 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
87 | 	extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
88 | 				   int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
89 | 	if (ksw_simd < 0) ksw_simd = x86_simd();
90 | 	if (ksw_simd & SIMD_SSE4_1)
91 | 		ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
92 | 	else if (ksw_simd & SIMD_SSE2)
93 | 		ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez);
94 | 	else abort();
95 | }
96 | #endif
97 | 


--------------------------------------------------------------------------------
/kdq.h:
--------------------------------------------------------------------------------
  1 | #ifndef __AC_KDQ_H
  2 | #define __AC_KDQ_H
  3 | 
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | #include <stdint.h>
  7 | #include "kalloc.h"
  8 | 
  9 | #define __KDQ_TYPE(type) \
 10 | 	typedef struct { \
 11 | 		uint64_t front:58, bits:6, count, mask; \
 12 | 		type *a; \
 13 | 		void *km; \
 14 | 	} kdq_##type##_t;
 15 | 
 16 | #define kdq_t(type) kdq_##type##_t
 17 | #define kdq_size(q) ((q)->count)
 18 | #define kdq_first(q) ((q)->a[(q)->front])
 19 | #define kdq_last(q) ((q)->a[((q)->front + (q)->count - 1) & (q)->mask])
 20 | #define kdq_at(q, i) ((q)->a[((q)->front + (i)) & (q)->mask])
 21 | 
 22 | #define __KDQ_IMPL(type, SCOPE) \
 23 | 	SCOPE kdq_##type##_t *kdq_init_##type(void *km) \
 24 | 	{ \
 25 | 		kdq_##type##_t *q; \
 26 | 		q = (kdq_##type##_t*)kcalloc(km, 1, sizeof(kdq_##type##_t)); \
 27 | 		q->bits = 2, q->mask = (1ULL<<q->bits) - 1; \
 28 | 		q->a = (type*)kmalloc(km, (1<<q->bits) * sizeof(type)); \
 29 | 		q->km = km; \
 30 | 		return q; \
 31 | 	} \
 32 | 	SCOPE void kdq_destroy_##type(kdq_##type##_t *q) \
 33 | 	{ \
 34 | 		if (q == 0) return; \
 35 | 		kfree(q->km, q->a); kfree(q->km, q); \
 36 | 	} \
 37 | 	SCOPE int kdq_resize_##type(kdq_##type##_t *q, int new_bits) \
 38 | 	{ \
 39 | 		size_t new_size = 1ULL<<new_bits, old_size = 1ULL<<q->bits; \
 40 | 		if (new_size < q->count) { /* not big enough */ \
 41 | 			int i; \
 42 | 			for (i = 0; i < 64; ++i) \
 43 | 				if (1ULL<<i > q->count) break; \
 44 | 			new_bits = i, new_size = 1ULL<<new_bits; \
 45 | 		} \
 46 | 		if (new_bits == q->bits) return q->bits; /* unchanged */ \
 47 | 		if (new_bits > q->bits) q->a = (type*)krealloc(q->km, q->a, (1ULL<<new_bits) * sizeof(type)); \
 48 | 		if (q->front + q->count <= old_size) { /* unwrapped */ \
 49 | 			if (q->front + q->count > new_size) /* only happens for shrinking */ \
 50 | 				memmove(q->a, q->a + new_size, (q->front + q->count - new_size) * sizeof(type)); \
 51 | 		} else { /* wrapped */ \
 52 | 			memmove(q->a + (new_size - (old_size - q->front)), q->a + q->front, (old_size - q->front) * sizeof(type)); \
 53 | 			q->front = new_size - (old_size - q->front); \
 54 | 		} \
 55 | 		q->bits = new_bits, q->mask = (1ULL<<q->bits) - 1; \
 56 | 		if (new_bits < q->bits) q->a = (type*)krealloc(q->km, q->a, (1ULL<<new_bits) * sizeof(type)); \
 57 | 		return q->bits; \
 58 | 	} \
 59 | 	SCOPE type *kdq_pushp_##type(kdq_##type##_t *q) \
 60 | 	{ \
 61 | 		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
 62 | 		return &q->a[((q->count++) + q->front) & (q)->mask]; \
 63 | 	} \
 64 | 	SCOPE void kdq_push_##type(kdq_##type##_t *q, type v) \
 65 | 	{ \
 66 | 		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
 67 | 		q->a[((q->count++) + q->front) & (q)->mask] = v; \
 68 | 	} \
 69 | 	SCOPE type *kdq_unshiftp_##type(kdq_##type##_t *q) \
 70 | 	{ \
 71 | 		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
 72 | 		++q->count; \
 73 | 		q->front = q->front? q->front - 1 : (1ULL<<q->bits) - 1; \
 74 | 		return &q->a[q->front]; \
 75 | 	} \
 76 | 	SCOPE void kdq_unshift_##type(kdq_##type##_t *q, type v) \
 77 | 	{ \
 78 | 		type *p; \
 79 | 		p = kdq_unshiftp_##type(q); \
 80 | 		*p = v; \
 81 | 	} \
 82 | 	SCOPE type *kdq_pop_##type(kdq_##type##_t *q) \
 83 | 	{ \
 84 | 		return q->count? &q->a[((--q->count) + q->front) & q->mask] : 0; \
 85 | 	} \
 86 | 	SCOPE type *kdq_shift_##type(kdq_##type##_t *q) \
 87 | 	{ \
 88 | 		type *d = 0; \
 89 | 		if (q->count == 0) return 0; \
 90 | 		d = &q->a[q->front++]; \
 91 | 		q->front &= q->mask; \
 92 | 		--q->count; \
 93 | 		return d; \
 94 | 	}
 95 | 
 96 | #define KDQ_INIT2(type, SCOPE) \
 97 | 	__KDQ_TYPE(type) \
 98 | 	__KDQ_IMPL(type, SCOPE)
 99 | 
100 | #ifndef klib_unused
101 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
102 | #define klib_unused __attribute__ ((__unused__))
103 | #else
104 | #define klib_unused
105 | #endif
106 | #endif /* klib_unused */
107 | 
108 | #define KDQ_INIT(type) KDQ_INIT2(type, static inline klib_unused)
109 | 
110 | #define KDQ_DECLARE(type) \
111 | 	__KDQ_TYPE(type) \
112 | 	kdq_##type##_t *kdq_init_##type(); \
113 | 	void kdq_destroy_##type(kdq_##type##_t *q); \
114 | 	int kdq_resize_##type(kdq_##type##_t *q, int new_bits); \
115 | 	type *kdq_pushp_##type(kdq_##type##_t *q); \
116 | 	void kdq_push_##type(kdq_##type##_t *q, type v); \
117 | 	type *kdq_unshiftp_##type(kdq_##type##_t *q); \
118 | 	void kdq_unshift_##type(kdq_##type##_t *q, type v); \
119 | 	type *kdq_pop_##type(kdq_##type##_t *q); \
120 | 	type *kdq_shift_##type(kdq_##type##_t *q);
121 | 
122 | #define kdq_init(type, km) kdq_init_##type(km)
123 | #define kdq_destroy(type, q) kdq_destroy_##type(q)
124 | #define kdq_resize(type, q, new_bits) kdq_resize_##type(q, new_bits)
125 | #define kdq_pushp(type, q) kdq_pushp_##type(q)
126 | #define kdq_push(type, q, v) kdq_push_##type(q, v)
127 | #define kdq_pop(type, q) kdq_pop_##type(q)
128 | #define kdq_unshiftp(type, q) kdq_unshiftp_##type(q)
129 | #define kdq_unshift(type, q, v) kdq_unshift_##type(q, v)
130 | #define kdq_shift(type, q) kdq_shift_##type(q)
131 | 
132 | #endif
133 | 


--------------------------------------------------------------------------------
/kthread.c:
--------------------------------------------------------------------------------
  1 | #include <pthread.h>
  2 | #include <stdlib.h>
  3 | #include <limits.h>
  4 | #include <stdint.h>
  5 | #include <stdio.h>
  6 | #include "kthread.h"
  7 | 
  8 | #if (defined(WIN32) || defined(_WIN32)) && defined(_MSC_VER)
  9 | #define __sync_fetch_and_add(ptr, addend)     _InterlockedExchangeAdd((void*)ptr, addend)
 10 | #endif
 11 | 
 12 | /************
 13 |  * kt_for() *
 14 |  ************/
 15 | 
 16 | struct kt_for_t;
 17 | 
 18 | typedef struct {
 19 | 	struct kt_for_t *t;
 20 | 	long i;
 21 | } ktf_worker_t;
 22 | 
 23 | typedef struct kt_for_t {
 24 | 	int n_threads;
 25 | 	long n;
 26 | 	ktf_worker_t *w;
 27 | 	void (*func)(void*,long,int);
 28 | 	void *data;
 29 | } kt_for_t;
 30 | 
 31 | static inline long steal_work(kt_for_t *t)
 32 | {
 33 | 	int i, min_i = -1;
 34 | 	long k, min = LONG_MAX;
 35 | 	for (i = 0; i < t->n_threads; ++i)
 36 | 		if (min > t->w[i].i) min = t->w[i].i, min_i = i;
 37 | 	k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads);
 38 | 	return k >= t->n? -1 : k;
 39 | }
 40 | 
 41 | static void *ktf_worker(void *data)
 42 | {
 43 | 	ktf_worker_t *w = (ktf_worker_t*)data;
 44 | 	long i;
 45 | 	for (;;) {
 46 | 		i = __sync_fetch_and_add(&w->i, w->t->n_threads);
 47 | 		if (i >= w->t->n) break;
 48 | 		w->t->func(w->t->data, i, w - w->t->w);
 49 | 	}
 50 | 	while ((i = steal_work(w->t)) >= 0)
 51 | 		w->t->func(w->t->data, i, w - w->t->w);
 52 | #if defined(__AMD_SPLIT_KERNELS__)
 53 | 	// call func one last time for this thread to signal end of all reads
 54 | 	w->t->func(w->t->data, -1, w - w->t->w);
 55 | #endif
 56 | 	pthread_exit(0);
 57 | }
 58 | 
 59 | void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n)
 60 | {
 61 | 	if (n_threads > 1) {
 62 | 		int i;
 63 | 		kt_for_t t;
 64 | 		pthread_t *tid;
 65 | 		t.func = func, t.data = data, t.n_threads = n_threads, t.n = n;
 66 | 		t.w = (ktf_worker_t*)calloc(n_threads, sizeof(ktf_worker_t));
 67 | 		tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t));
 68 | 		for (i = 0; i < n_threads; ++i)
 69 | 			t.w[i].t = &t, t.w[i].i = i;
 70 | 		for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]);
 71 | 		for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
 72 | 		free(tid); free(t.w);
 73 | 	} else {
 74 | 		long j;
 75 | 		for (j = 0; j < n; ++j) func(data, j, 0);
 76 | #if defined(__AMD_SPLIT_KERNELS__)
 77 | 		// call once at the end to signal end of all reads
 78 | 		func(data, -1, 0);
 79 | #endif
 80 | 
 81 | 	}
 82 | }
 83 | 
 84 | /*****************
 85 |  * kt_pipeline() *
 86 |  *****************/
 87 | 
 88 | struct ktp_t;
 89 | 
 90 | typedef struct {
 91 | 	struct ktp_t *pl;
 92 | 	int64_t index;
 93 | 	int step;
 94 | 	void *data;
 95 | } ktp_worker_t;
 96 | 
 97 | typedef struct ktp_t {
 98 | 	void *shared;
 99 | 	void *(*func)(void*, int, void*);
100 | 	int64_t index;
101 | 	int n_workers, n_steps;
102 | 	ktp_worker_t *workers;
103 | 	pthread_mutex_t mutex;
104 | 	pthread_cond_t cv;
105 | } ktp_t;
106 | 
107 | static void *ktp_worker(void *data)
108 | {
109 | 	ktp_worker_t *w = (ktp_worker_t*)data;
110 | 	ktp_t *p = w->pl;
111 | 	while (w->step < p->n_steps) {
112 | 		// test whether we can kick off the job with this worker
113 | 		pthread_mutex_lock(&p->mutex);
114 | 		for (;;) {
115 | 			int i;
116 | 			// test whether another worker is doing the same step
117 | 			for (i = 0; i < p->n_workers; ++i) {
118 | 				if (w == &p->workers[i]) continue; // ignore itself
119 | 				if (p->workers[i].step <= w->step && p->workers[i].index < w->index)
120 | 					break;
121 | 			}
122 | 			if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps
123 | 			pthread_cond_wait(&p->cv, &p->mutex);
124 | 		}
125 | 		pthread_mutex_unlock(&p->mutex);
126 | 
127 | 		// working on w->step
128 | 		w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL
129 | 
130 | 		// update step and let other workers know
131 | 		pthread_mutex_lock(&p->mutex);
132 | 		w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps;
133 | 		if (w->step == 0) w->index = p->index++;
134 | 		pthread_cond_broadcast(&p->cv);
135 | 		pthread_mutex_unlock(&p->mutex);
136 | 	}
137 | 	pthread_exit(0);
138 | }
139 | 
140 | void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps)
141 | {
142 | 	ktp_t aux;
143 | 	pthread_t *tid;
144 | 	int i;
145 | 
146 | 	if (n_threads < 1) n_threads = 1;
147 | 	aux.n_workers = n_threads;
148 | 	aux.n_steps = n_steps;
149 | 	aux.func = func;
150 | 	aux.shared = shared_data;
151 | 	aux.index = 0;
152 | 	pthread_mutex_init(&aux.mutex, 0);
153 | 	pthread_cond_init(&aux.cv, 0);
154 | 
155 | 	aux.workers = (ktp_worker_t*)calloc(n_threads, sizeof(ktp_worker_t));
156 | 	for (i = 0; i < n_threads; ++i) {
157 | 		ktp_worker_t *w = &aux.workers[i];
158 | 		w->step = 0; w->pl = &aux; w->data = 0;
159 | 		w->index = aux.index++;
160 | 	}
161 | 
162 | 	tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t));
163 | 	for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]);
164 | 	for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
165 | 	free(tid); free(aux.workers);
166 | 
167 | 	pthread_mutex_destroy(&aux.mutex);
168 | 	pthread_cond_destroy(&aux.cv);
169 | }
170 | 


--------------------------------------------------------------------------------
/gpu/plutils.h:
--------------------------------------------------------------------------------
  1 | #ifndef _PLUTILS_H_
  2 | #define _PLUTILS_H_
  3 | 
  4 | #include <assert.h>
  5 | #include <limits.h>
  6 | #include <stdbool.h>
  7 | #include <stdint.h>
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | 
 12 | #include "kalloc.h"
 13 | #include "minimap.h"
 14 | 
 15 | /* Chaining Options */
 16 | 
 17 | /* structure for metadata and hits */
 18 | // Sequence meta data
 19 | typedef struct {
 20 |     long i;          // read id
 21 |     int seg_id;      // seg id
 22 |     char name[200];  // name of the sequence
 23 |     uint32_t len;    // name of the sequence
 24 | 
 25 |     // mi data
 26 |     int n_alt;
 27 |     int is_alt;  // reference sequences only
 28 | 
 29 |     // sequence info
 30 |     int qlen_sum;
 31 | } mm_seq_meta_t;
 32 | 
 33 | typedef struct {
 34 |     int max_iter, max_dist_x, max_dist_y, max_skip, bw, min_cnt, min_score,
 35 |         is_cdna, n_seg;
 36 |     float chn_pen_gap, chn_pen_skip;
 37 | } Misc;
 38 | 
 39 | typedef struct {
 40 |     mm_seq_meta_t *refs;
 41 |     int n_refs;
 42 |     Misc misc;
 43 | } input_meta_t;
 44 | 
 45 | typedef struct {
 46 |     mm_seq_meta_t seq;
 47 | 
 48 |     // minimap2 input data for reads
 49 |     const char **qseqs;  // sequences for each segment          <- allocated in worker_for, freed in free_read after seeding
 50 |     int *qlens;          // query length for each segment       <- allocated in worker_for, freed in free_read after seeding
 51 |     int n_seg;           // number of segs
 52 | 
 53 | //DEBUG: for SCORE CHECK after chaining
 54 | #if defined(DEBUG_CHECK) && 0
 55 |     int32_t *f;
 56 |     int64_t *p;
 57 | #endif  // DEBUG_CHECK
 58 |     int rep_len;
 59 |     int frag_gap;
 60 | 
 61 |     // seeding outputs
 62 |     uint64_t *mini_pos;  // minimizer positions                 <- allocated in 
 63 |     int n_mini_pos;
 64 | 
 65 |     // seeding output, updated in chaining
 66 |     mm128_t *a;  // array of anchors
 67 |     int64_t n;   // number of anchors = n_a
 68 | 
 69 |     // chaining outputs
 70 |     uint64_t *u;      // scores for chains
 71 |     int n_u;          // number of chains formed from anchors == n_reg0
 72 | 
 73 | } chain_read_t;
 74 | 
 75 | typedef struct seg_t {
 76 |     size_t start_idx;
 77 |     size_t end_idx;
 78 | //DEBUG: used for debug plchain_cal_long_seg_range_dis LONG_SEG_RANGE_DIS
 79 | #ifdef DEBUG_VERBOSE 
 80 |     size_t start_segid;
 81 |     size_t end_segid;
 82 | #endif // DEBUG_VERBOSE
 83 | } seg_t;
 84 | 
 85 | #ifdef __cplusplus
 86 | extern "C" {
 87 | #endif // __cplusplus
 88 | /* GPU chaining methods */
 89 | // // <lchain.h> backward, original chaining methods
 90 | // void chain_backword_cpu(const input_meta_t *meta, chain_read_t *read_arr,
 91 | //                         int n_read);
 92 | // // <fchain.c> forward chaining methods
 93 | // void chain_forward_cpu(const input_meta_t *meta, chain_read_t *read_arr,
 94 | //                        int n_read);
 95 | 
 96 | // <plchain.cu> gpu chaining methods
 97 | // initialization and cleanup
 98 | void init_stream_gpu(size_t *max_total_n, int *max_reads,
 99 |                      int *min_n, char gpu_config_file[],  Misc misc);  // for stream_gpu
100 | void finish_stream_gpu(const mm_idx_t *mi, const mm_mapopt_t *opt, chain_read_t **batches,
101 |                         int *num_reads, int num_batch, void *km);  // for stream_gpu
102 | void free_stream_gpu(int n_threads); // for stream_gpu free pinned memory
103 | // chaining method
104 | void chain_stream_gpu(const mm_idx_t *mi, const mm_mapopt_t *opt, chain_read_t **in_arr_ptr, int *n_read_ptr, int thread_id, void* km);
105 | 
106 | /* <lchain.c> Chaining backtracking methods */
107 | uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f,
108 |                              const int64_t *p, int32_t *v, int32_t *t,
109 |                              int32_t min_cnt, int32_t min_sc, int32_t max_drop,
110 |                              int32_t *n_u_, int32_t *n_v_);
111 | mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32_t *v, mm128_t *a);
112 | 
113 | 
114 | /* <map.c> Post Chaining helpers */
115 | Misc build_misc(const mm_idx_t *mi, const mm_mapopt_t *opt, const int64_t qlen_sum, const int n_seg);
116 | void post_chaining_helper(const mm_idx_t *mi, const mm_mapopt_t *opt,
117 |                           chain_read_t *read, Misc misc, void *km);
118 | 
119 | #ifdef __cplusplus
120 | }
121 | #endif  // __cplusplus
122 | 
123 | /////////////////////////////////////////////////////
124 | ///////////         Free Input Struct   /////////////
125 | /////////////////////////////////////////////////////
126 | // free input_iter pointers except a, because it is freed seperately.
127 | static inline void free_read(chain_read_t *in, void* km) {
128 |     if (in->qseqs) kfree(km, in->qseqs);
129 |     if (in->qlens) kfree(km, in->qlens);
130 | 
131 | //DEBUG: for SCORE CHECK after chaining
132 | #if defined(DEBUG_CHECK) && 0 
133 |     if (in->f) kfree(km, in->f);
134 |     if (in->p) kfree(km, in->p);
135 |     in->f = 0, in->p = 0;
136 | #endif
137 |     in->qseqs = 0, in->qlens = 0;
138 |     in->a = 0, in->u = 0;
139 | }
140 | 
141 | static inline void free_meta_struct(input_meta_t *meta, void *km) {
142 |     if (meta->refs) kfree(km, meta->refs);
143 | }
144 | #endif  // _PLUTILS_H_
145 | 


--------------------------------------------------------------------------------
/ksw2_ll_sse.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <stdint.h>
  3 | #include <string.h>
  4 | #include "ksw2.h"
  5 | 
  6 | #ifdef USE_SIMDE
  7 | #include <simde/x86/sse2.h>
  8 | #else
  9 | #include <emmintrin.h>
 10 | #endif
 11 | 
 12 | #ifdef __GNUC__
 13 | #define LIKELY(x) __builtin_expect((x),1)
 14 | #define UNLIKELY(x) __builtin_expect((x),0)
 15 | #else
 16 | #define LIKELY(x) (x)
 17 | #define UNLIKELY(x) (x)
 18 | #endif
 19 | 
 20 | typedef struct {
 21 | 	int qlen, slen;
 22 | 	uint8_t shift, mdiff, max, size;
 23 | 	__m128i *qp, *H0, *H1, *E, *Hmax;
 24 | } kswq_t;
 25 | 
 26 | /**
 27 |  * Initialize the query data structure
 28 |  *
 29 |  * @param size   Number of bytes used to store a score; valid valures are 1 or 2
 30 |  * @param qlen   Length of the query sequence
 31 |  * @param query  Query sequence
 32 |  * @param m      Size of the alphabet
 33 |  * @param mat    Scoring matrix in a one-dimension array
 34 |  *
 35 |  * @return       Query data structure
 36 |  */
 37 | void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, const int8_t *mat)
 38 | {
 39 | 	kswq_t *q;
 40 | 	int slen, a, tmp, p;
 41 | 
 42 | 	size = size > 1? 2 : 1;
 43 | 	p = 8 * (3 - size); // # values per __m128i
 44 | 	slen = (qlen + p - 1) / p; // segmented length
 45 | 	q = (kswq_t*)kmalloc(km, sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
 46 | 	q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory
 47 | 	q->H0 = q->qp + slen * m;
 48 | 	q->H1 = q->H0 + slen;
 49 | 	q->E  = q->H1 + slen;
 50 | 	q->Hmax = q->E + slen;
 51 | 	q->slen = slen; q->qlen = qlen; q->size = size;
 52 | 	// compute shift
 53 | 	tmp = m * m;
 54 | 	for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
 55 | 		if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
 56 | 		if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
 57 | 	}
 58 | 	q->max = q->mdiff;
 59 | 	q->shift = 256 - q->shift; // NB: q->shift is uint8_t
 60 | 	q->mdiff += q->shift; // this is the difference between the min and max scores
 61 | 	// An example: p=8, qlen=19, slen=3 and segmentation:
 62 | 	//  {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
 63 | 	if (size == 1) {
 64 | 		int8_t *t = (int8_t*)q->qp;
 65 | 		for (a = 0; a < m; ++a) {
 66 | 			int i, k, nlen = slen * p;
 67 | 			const int8_t *ma = mat + a * m;
 68 | 			for (i = 0; i < slen; ++i)
 69 | 				for (k = i; k < nlen; k += slen) // p iterations
 70 | 					*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
 71 | 		}
 72 | 	} else {
 73 | 		int16_t *t = (int16_t*)q->qp;
 74 | 		for (a = 0; a < m; ++a) {
 75 | 			int i, k, nlen = slen * p;
 76 | 			const int8_t *ma = mat + a * m;
 77 | 			for (i = 0; i < slen; ++i)
 78 | 				for (k = i; k < nlen; k += slen) // p iterations
 79 | 					*t++ = (k >= qlen? 0 : ma[query[k]]);
 80 | 		}
 81 | 	}
 82 | 	return q;
 83 | }
 84 | 
 85 | int ksw_ll_i16(void *q_, int tlen, const uint8_t *target, int _gapo, int _gape, int *qe, int *te)
 86 | {
 87 | 	kswq_t *q = (kswq_t*)q_;
 88 | 	int slen, i, gmax = 0, qlen8;
 89 | 	__m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax;
 90 | 	uint16_t *H8;
 91 | 
 92 | #define __max_8(ret, xx) do { \
 93 | 		(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
 94 | 		(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
 95 | 		(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
 96 | 		(ret) = _mm_extract_epi16((xx), 0); \
 97 | 	} while (0)
 98 | 
 99 | 	// initialization
100 | 	*qe = *te = -1;
101 | 	zero = _mm_set1_epi32(0);
102 | 	gapoe = _mm_set1_epi16(_gapo + _gape);
103 | 	gape = _mm_set1_epi16(_gape);
104 | 	H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
105 | 	slen = q->slen, qlen8 = slen * 8;
106 | 	memset(E,    0, slen * sizeof(__m128i));
107 | 	memset(H0,   0, slen * sizeof(__m128i));
108 | 	memset(Hmax, 0, slen * sizeof(__m128i));
109 | 	// the core loop
110 | 	for (i = 0; i < tlen; ++i) {
111 | 		int j, k, imax;
112 | 		__m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
113 | 		h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
114 | 		h = _mm_slli_si128(h, 2);
115 | 		for (j = 0; LIKELY(j < slen); ++j) {
116 | 			h = _mm_adds_epi16(h, *S++);
117 | 			e = _mm_load_si128(E + j);
118 | 			h = _mm_max_epi16(h, e);
119 | 			h = _mm_max_epi16(h, f);
120 | 			max = _mm_max_epi16(max, h);
121 | 			_mm_store_si128(H1 + j, h);
122 | 			h = _mm_subs_epu16(h, gapoe);
123 | 			e = _mm_subs_epu16(e, gape);
124 | 			e = _mm_max_epi16(e, h);
125 | 			_mm_store_si128(E + j, e);
126 | 			f = _mm_subs_epu16(f, gape);
127 | 			f = _mm_max_epi16(f, h);
128 | 			h = _mm_load_si128(H0 + j);
129 | 		}
130 | 		for (k = 0; LIKELY(k < 8); ++k) {
131 | 			f = _mm_slli_si128(f, 2);
132 | 			for (j = 0; LIKELY(j < slen); ++j) {
133 | 				h = _mm_load_si128(H1 + j);
134 | 				h = _mm_max_epi16(h, f);
135 | 				_mm_store_si128(H1 + j, h);
136 | 				h = _mm_subs_epu16(h, gapoe);
137 | 				f = _mm_subs_epu16(f, gape);
138 | 				if(UNLIKELY(!_mm_movemask_epi8(_mm_cmpgt_epi16(f, h)))) goto end_loop_i16;
139 | 			}
140 | 		}
141 | end_loop_i16:
142 | 		__max_8(imax, max);
143 | 		if (imax >= gmax) {
144 | 			gmax = imax; *te = i;
145 | 			memcpy(Hmax, H1, slen * sizeof(__m128i));
146 | 		}
147 | 		S = H1; H1 = H0; H0 = S;
148 | 	}
149 | 	for (i = 0, H8 = (uint16_t*)Hmax; i < qlen8; ++i)
150 | 		if ((int)H8[i] == gmax) *qe = i / 8 + i % 8 * slen;
151 | 	return gmax;
152 | }
153 | 


--------------------------------------------------------------------------------
/misc.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include "mmpriv.h"
  3 | 
  4 | int mm_verbose = 1;
  5 | int mm_dbg_flag = 0;
  6 | double mm_realtime0;
  7 | double mm_time_seed_min = 0;
  8 | double mm_time_seed_max = 0;
  9 | double mm_time_seed_avg = 0;
 10 | double mm_time_seed_sum = 0;
 11 | double mm_time_chain_min = 0;
 12 | double mm_time_chain_max = 0;
 13 | double mm_time_chain_avg = 0;
 14 | double mm_time_chain_sum = 0;
 15 | double mm_time_align_min = 0;
 16 | double mm_time_align_max = 0;
 17 | double mm_time_align_avg = 0;
 18 | double mm_time_align_sum = 0;
 19 | 
 20 | #if defined(WIN32) || defined(_WIN32)
 21 | #include <windows.h>
 22 | 
 23 | struct timezone
 24 | {
 25 |   __int32  tz_minuteswest; /* minutes W of Greenwich */
 26 |   int  tz_dsttime;     /* type of dst correction */
 27 | };
 28 | 
 29 | /*
 30 |  * gettimeofday.c
 31 |  *    Win32 gettimeofday() replacement
 32 |  *    taken from PostgreSQL, according to
 33 |  *    https://stackoverflow.com/questions/1676036/what-should-i-use-to-replace-gettimeofday-on-windows
 34 |  *
 35 |  * src/port/gettimeofday.c
 36 |  *
 37 |  * Copyright (c) 2003 SRA, Inc.
 38 |  * Copyright (c) 2003 SKC, Inc.
 39 |  *
 40 |  * Permission to use, copy, modify, and distribute this software and
 41 |  * its documentation for any purpose, without fee, and without a
 42 |  * written agreement is hereby granted, provided that the above
 43 |  * copyright notice and this paragraph and the following two
 44 |  * paragraphs appear in all copies.
 45 |  *
 46 |  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
 47 |  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
 48 |  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
 49 |  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
 50 |  * OF THE POSSIBILITY OF SUCH DAMAGE.
 51 |  *
 52 |  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
 53 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 54 |  * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
 55 |  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
 56 |  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 57 |  */
 58 | 
 59 | /* FILETIME of Jan 1 1970 00:00:00. */
 60 | static const unsigned __int64 epoch = ((unsigned __int64) 116444736000000000ULL);
 61 | 
 62 | /*
 63 |  * timezone information is stored outside the kernel so tzp isn't used anymore.
 64 |  *
 65 |  * Note: this function is not for Win32 high precision timing purpose. See
 66 |  * elapsed_time().
 67 |  */
 68 | int gettimeofday(struct timeval * tp, struct timezone *tzp)
 69 | {
 70 |     FILETIME    file_time;
 71 |     SYSTEMTIME  system_time;
 72 |     ULARGE_INTEGER ularge;
 73 | 
 74 |     GetSystemTime(&system_time);
 75 |     SystemTimeToFileTime(&system_time, &file_time);
 76 |     ularge.LowPart = file_time.dwLowDateTime;
 77 |     ularge.HighPart = file_time.dwHighDateTime;
 78 | 
 79 |     tp->tv_sec = (long) ((ularge.QuadPart - epoch) / 10000000L);
 80 |     tp->tv_usec = (long) (system_time.wMilliseconds * 1000);
 81 | 
 82 |     return 0;
 83 | }
 84 | 
 85 | // taken from https://stackoverflow.com/questions/5272470/c-get-cpu-usage-on-linux-and-windows
 86 | double cputime()
 87 | {
 88 | 	HANDLE hProcess = GetCurrentProcess();
 89 | 	FILETIME ftCreation, ftExit, ftKernel, ftUser;
 90 | 	SYSTEMTIME stKernel;
 91 | 	SYSTEMTIME stUser;
 92 | 
 93 | 	GetProcessTimes(hProcess, &ftCreation, &ftExit, &ftKernel, &ftUser);
 94 | 	FileTimeToSystemTime(&ftKernel, &stKernel);
 95 | 	FileTimeToSystemTime(&ftUser, &stUser);
 96 | 
 97 | 	double kernelModeTime = ((stKernel.wHour * 60.) + stKernel.wMinute * 60.) + stKernel.wSecond * 1. + stKernel.wMilliseconds / 1000.;
 98 | 	double userModeTime = ((stUser.wHour * 60.) + stUser.wMinute * 60.) + stUser.wSecond * 1. + stUser.wMilliseconds / 1000.;
 99 | 
100 | 	return kernelModeTime + userModeTime;
101 | }
102 | 
103 | long peakrss(void) { return 0; }
104 | #else
105 | #include <sys/resource.h>
106 | #include <sys/time.h>
107 | 
108 | double cputime(void)
109 | {
110 | 	struct rusage r;
111 | 	getrusage(RUSAGE_SELF, &r);
112 | 	return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec);
113 | }
114 | 
115 | long peakrss(void)
116 | {
117 | 	struct rusage r;
118 | 	getrusage(RUSAGE_SELF, &r);
119 | #ifdef __linux__
120 | 	return r.ru_maxrss * 1024;
121 | #else
122 | 	return r.ru_maxrss;
123 | #endif
124 | }
125 | 
126 | #endif /* WIN32 || _WIN32 */
127 | 
128 | double realtime(void)
129 | {
130 | 	struct timeval tp;
131 | 	gettimeofday(&tp, NULL);
132 | 	return tp.tv_sec + tp.tv_usec * 1e-6;
133 | }
134 | 
135 | void mm_err_puts(const char *str)
136 | {
137 | 	int ret;
138 | 	ret = puts(str);
139 | 	if (ret == EOF) {
140 | 		perror("[ERROR] failed to write the results");
141 | 		exit(EXIT_FAILURE);
142 | 	}
143 | }
144 | 
145 | void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp)
146 | {
147 | 	int ret;
148 | 	ret = fwrite(p, size, nitems, fp);
149 | 	if (ret == EOF) {
150 | 		perror("[ERROR] failed to write data");
151 | 		exit(EXIT_FAILURE);
152 | 	}
153 | }
154 | 
155 | void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp)
156 | {
157 | 	int ret;
158 | 	ret = fread(p, size, nitems, fp);
159 | 	if (ret == EOF) {
160 | 		perror("[ERROR] failed to read data");
161 | 		exit(EXIT_FAILURE);
162 | 	}
163 | }
164 | 
165 | #include "ksort.h"
166 | 
167 | #define sort_key_128x(a) ((a).x)
168 | KRADIX_SORT_INIT(128x, mm128_t, sort_key_128x, 8) 
169 | 
170 | #define sort_key_64(x) (x)
171 | KRADIX_SORT_INIT(64, uint64_t, sort_key_64, 8)
172 | 
173 | KSORT_INIT_GENERIC(uint32_t)
174 | KSORT_INIT_GENERIC(uint64_t)
175 | 


--------------------------------------------------------------------------------
/ksort.h:
--------------------------------------------------------------------------------
  1 | /* The MIT License
  2 | 
  3 |    Copyright (c) 2008, 2011 Attractive Chaos <attractor@live.co.uk>
  4 | 
  5 |    Permission is hereby granted, free of charge, to any person obtaining
  6 |    a copy of this software and associated documentation files (the
  7 |    "Software"), to deal in the Software without restriction, including
  8 |    without limitation the rights to use, copy, modify, merge, publish,
  9 |    distribute, sublicense, and/or sell copies of the Software, and to
 10 |    permit persons to whom the Software is furnished to do so, subject to
 11 |    the following conditions:
 12 | 
 13 |    The above copyright notice and this permission notice shall be
 14 |    included in all copies or substantial portions of the Software.
 15 | 
 16 |    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 |    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 |    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 |    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 20 |    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 |    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 22 |    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 |    SOFTWARE.
 24 | */
 25 | 
 26 | // This is a simplified version of ksort.h
 27 | 
 28 | #ifndef AC_KSORT_H
 29 | #define AC_KSORT_H
 30 | 
 31 | #include <stdlib.h>
 32 | #include <string.h>
 33 | #include <assert.h>
 34 | 
 35 | typedef struct {
 36 | 	void *left, *right;
 37 | 	int depth;
 38 | } ks_isort_stack_t;
 39 | 
 40 | #define KSORT_SWAP(type_t, a, b) { type_t t=(a); (a)=(b); (b)=t; }
 41 | 
 42 | #define KSORT_INIT(name, type_t, __sort_lt) \
 43 | 	void ks_heapdown_##name(size_t i, size_t n, type_t l[]) \
 44 | 	{ \
 45 | 		size_t k = i; \
 46 | 		type_t tmp = l[i]; \
 47 | 		while ((k = (k << 1) + 1) < n) { \
 48 | 			if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
 49 | 			if (__sort_lt(l[k], tmp)) break; \
 50 | 			l[i] = l[k]; i = k; \
 51 | 		} \
 52 | 		l[i] = tmp; \
 53 | 	} \
 54 | 	void ks_heapmake_##name(size_t lsize, type_t l[]) \
 55 | 	{ \
 56 | 		size_t i; \
 57 | 		for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
 58 | 			ks_heapdown_##name(i, lsize, l); \
 59 | 	} \
 60 | 	type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)			\
 61 | 	{																	\
 62 | 		type_t *low, *high, *k, *ll, *hh, *mid;							\
 63 | 		low = arr; high = arr + n - 1; k = arr + kk;					\
 64 | 		for (;;) {														\
 65 | 			if (high <= low) return *k;									\
 66 | 			if (high == low + 1) {										\
 67 | 				if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
 68 | 				return *k;												\
 69 | 			}															\
 70 | 			mid = low + (high - low) / 2;								\
 71 | 			if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
 72 | 			if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
 73 | 			if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);	\
 74 | 			KSORT_SWAP(type_t, *mid, *(low+1));							\
 75 | 			ll = low + 1; hh = high;									\
 76 | 			for (;;) {													\
 77 | 				do ++ll; while (__sort_lt(*ll, *low));					\
 78 | 				do --hh; while (__sort_lt(*low, *hh));					\
 79 | 				if (hh < ll) break;										\
 80 | 				KSORT_SWAP(type_t, *ll, *hh);							\
 81 | 			}															\
 82 | 			KSORT_SWAP(type_t, *low, *hh);								\
 83 | 			if (hh <= k) low = ll;										\
 84 | 			if (hh >= k) high = hh - 1;									\
 85 | 		}																\
 86 | 	}																	\
 87 | 
 88 | #define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
 89 | 
 90 | #define ks_lt_generic(a, b) ((a) < (b))
 91 | #define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
 92 | 
 93 | typedef const char *ksstr_t;
 94 | 
 95 | #define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
 96 | #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
 97 | 
 98 | #define RS_MIN_SIZE 64
 99 | #define RS_MAX_BITS 8
100 | 
101 | #define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \
102 | 	typedef struct { \
103 | 		rstype_t *b, *e; \
104 | 	} rsbucket_##name##_t; \
105 | 	void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \
106 | 	{ \
107 | 		rstype_t *i; \
108 | 		for (i = beg + 1; i < end; ++i) \
109 | 			if (rskey(*i) < rskey(*(i - 1))) { \
110 | 				rstype_t *j, tmp = *i; \
111 | 				for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \
112 | 					*j = *(j - 1); \
113 | 				*j = tmp; \
114 | 			} \
115 | 	} \
116 | 	void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \
117 | 	{ \
118 | 		rstype_t *i; \
119 | 		int size = 1<<n_bits, m = size - 1; \
120 | 		rsbucket_##name##_t *k, b[1<<RS_MAX_BITS], *be = b + size; \
121 | 		assert(n_bits <= RS_MAX_BITS); \
122 | 		for (k = b; k != be; ++k) k->b = k->e = beg; \
123 | 		for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \
124 | 		for (k = b + 1; k != be; ++k) \
125 | 			k->e += (k-1)->e - beg, k->b = (k-1)->e; \
126 | 		for (k = b; k != be;) { \
127 | 			if (k->b != k->e) { \
128 | 				rsbucket_##name##_t *l; \
129 | 				if ((l = b + (rskey(*k->b)>>s&m)) != k) { \
130 | 					rstype_t tmp = *k->b, swap; \
131 | 					do { \
132 | 						swap = tmp; tmp = *l->b; *l->b++ = swap; \
133 | 						l = b + (rskey(tmp)>>s&m); \
134 | 					} while (l != k); \
135 | 					*k->b++ = tmp; \
136 | 				} else ++k->b; \
137 | 			} else ++k; \
138 | 		} \
139 | 		for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \
140 | 		if (s) { \
141 | 			s = s > n_bits? s - n_bits : 0; \
142 | 			for (k = b; k != be; ++k) \
143 | 				if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \
144 | 				else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \
145 | 		} \
146 | 	} \
147 | 	void radix_sort_##name(rstype_t *beg, rstype_t *end) \
148 | 	{ \
149 | 		if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \
150 | 		else rs_sort_##name(beg, end, RS_MAX_BITS, (sizeof_key - 1) * RS_MAX_BITS); \
151 | 	}
152 | 
153 | #endif
154 | 


--------------------------------------------------------------------------------
/gpu/plmem.cuh:
--------------------------------------------------------------------------------
  1 | #ifndef _PLMEM_CUH_
  2 | #define _PLMEM_CUH_
  3 | #include "hipify.cuh"
  4 | #include "plchain.h"
  5 | #include "plutils.h"
  6 | 
  7 | #ifndef MAX_MICRO_BATCH
  8 | #define MAX_MICRO_BATCH 8
  9 | #endif // MAX_MICRO_BATCH
 10 | 
 11 | #define OneK 1024
 12 | #define OneM (OneK*1024)
 13 | #define OneG (OneM*1024)
 14 | 
 15 | 
 16 | typedef struct {
 17 |     int index;       // read index / batch index
 18 |     int griddim;     // grid for range selection kernel. 
 19 |     int size;        // number of reads in the batch
 20 |     size_t total_n;  // number of anchors in the batch
 21 |     size_t cut_num;  // number of cuts in the batch
 22 | 
 23 |     // array size: number of anchors in the batch
 24 |     int32_t *ax;  // (int32_t) a[].x
 25 |     int32_t *ay;  // (int32_t) a[].y
 26 |     int8_t* sid;  // a[].y >> 40 & 0xff
 27 |     int32_t *xrev; // a[].x >> 32
 28 |     // outputs
 29 |     int32_t *f;   // score
 30 |     uint16_t *p;  // predecessor
 31 | 
 32 |     // array size: number of cuts in the batch / long_seg_cut
 33 |     // total long segs number till this batch
 34 |     unsigned int *long_segs_num;
 35 | 
 36 |     // start index for each block in range selection
 37 |     /***** range selection block assiagnment
 38 |      * One block only gets assgined one read or part of one read.
 39 |      *  start_idx:      idx of the first anchor assigned to each block
 40 |      *  read_end_idx:   idx of the last anchor OF THE READ assigned to each
 41 |      * block if a read is devided into several blocks, all the blocks take the
 42 |      * last anchor index of the read cut_start_idx:  idx of the first cut this
 43 |      * block needs to make
 44 |      */
 45 |     // array size: grid dimension
 46 |     size_t *start_idx;
 47 |     size_t *read_end_idx;
 48 |     size_t *cut_start_idx;
 49 | } hostMemPtr;
 50 | 
 51 | typedef struct {
 52 |     // array size: number of cuts in the batch / long_seg_cut
 53 |     seg_t *long_segs_og_idx;                   // start & end idx of long segs in the original micro batch
 54 |     unsigned int *total_long_segs_num; // sum of mini batch long_segs_num
 55 |     size_t *total_long_segs_n; // number of anchors in all the long segs
 56 |     int32_t *f_long;   // score for long segs
 57 |     uint16_t *p_long;  // predecessor for long segs
 58 | } longMemPtr;
 59 | 
 60 | typedef struct {
 61 |     int size;
 62 |     int griddim;
 63 |     size_t total_n;
 64 |     size_t num_cut;
 65 |     // device memory ptrs
 66 |     // data array
 67 |     int32_t *d_ax;
 68 |     int32_t *d_ay;
 69 |     int8_t *d_sid;  // a[].y >> 40 & 0xff
 70 |     int32_t *d_xrev; // a[].x >> 32
 71 |     int32_t *d_range;
 72 |     int32_t *d_f;   // score
 73 |     uint16_t *d_p;  // predecessor
 74 | 
 75 |     // range selection index
 76 |     size_t *d_start_idx;
 77 |     size_t *d_read_end_idx;
 78 |     size_t *d_cut_start_idx;
 79 | 
 80 |     // cut
 81 |     size_t *d_cut;  // cut
 82 |     unsigned int *d_long_seg_count; // total number of long seg (aggregated accross micro batches)
 83 |     seg_t *d_long_seg;              // start & end idx of long segs in the long seg buffer (aggregated across micro batches)
 84 |     seg_t *d_long_seg_og;           // start & end idx of long seg in the micro batch. (aggregated accross micro batches)
 85 |     unsigned int *d_mid_seg_count;  // private to micro batch
 86 |     seg_t *d_mid_seg;               // private to micro batch
 87 | 
 88 |     // long segement buffer
 89 |     unsigned *d_map;
 90 |     int32_t *d_ax_long, *d_ay_long;
 91 |     int8_t *d_sid_long;
 92 |     int32_t *d_range_long;
 93 |     size_t *d_total_n_long;
 94 |     size_t buffer_size_long;
 95 |     int32_t *d_f_long;  // score, size: buffer_size_long * sizeof(int32_t)
 96 |     uint16_t *d_p_long;  // predecessor, size: buffer_size_long * sizeof(uint16_t)
 97 | } deviceMemPtr;
 98 | 
 99 | typedef struct stream_ptr_t{
100 |     chain_read_t *reads;
101 |     size_t n_read;
102 |     hostMemPtr host_mems[MAX_MICRO_BATCH];
103 |     longMemPtr long_mem;
104 |     deviceMemPtr dev_mem;
105 |     cudaStream_t cudastream;
106 |     cudaEvent_t stopevent, startevent, long_kernel_event;
107 |     cudaEvent_t short_kernel_start_event[MAX_MICRO_BATCH];
108 |     cudaEvent_t short_kernel_stop_event[MAX_MICRO_BATCH];
109 |     bool busy = false;
110 | } stream_ptr_t;
111 | 
112 | typedef struct gputSetup_t {
113 |     int num_stream;
114 |     stream_ptr_t *streams;
115 |     size_t max_anchors_stream, max_num_cut, long_seg_buffer_size_stream;
116 |     int max_range_grid;
117 | } streamSetup_t;
118 | 
119 | extern streamSetup_t stream_setup;
120 | 
121 | /* memory management methods */
122 | // initialization and cleanup
123 | void plmem_initialize(size_t *max_total_n, int *max_read, int *min_n);
124 | void plmem_stream_initialize(size_t *max_total_n, int *max_read, int *min_n, char* gpu_config_file);
125 | void plmem_stream_cleanup();
126 | 
127 | // alloc and free
128 | void plmem_malloc_host_mem(hostMemPtr *host_mem, size_t anchor_per_batch,
129 |                            int range_grid_size, size_t buffer_size_long);
130 | void plmem_malloc_long_mem(longMemPtr *long_mem, size_t buffer_size_long);
131 | void plmem_free_host_mem(hostMemPtr *host_mem);
132 | void plmem_free_long_mem(longMemPtr *long_mem);
133 | void plmem_malloc_device_mem(deviceMemPtr *dev_mem, size_t anchor_per_batch,
134 |                              int range_grid_size, int num_cut);
135 | void plmem_free_device_mem(deviceMemPtr *dev_mem);
136 | 
137 | // data movement
138 | void plmem_reorg_input_arr(chain_read_t *reads, int n_read,
139 |                            hostMemPtr *host_mem, range_kernel_config_t config);
140 | void plmem_async_h2d_memcpy(stream_ptr_t *stream_ptrs);
141 | void plmem_async_h2d_short_memcpy(stream_ptr_t *stream_ptrs, size_t uid);
142 | void plmem_sync_h2d_memcpy(hostMemPtr *host_mem, deviceMemPtr *dev_mem);
143 | void plmem_async_d2h_memcpy(stream_ptr_t *stream_ptrs);
144 | void plmem_async_d2h_short_memcpy(stream_ptr_t *stream_ptrs, size_t uid);
145 | void plmem_async_d2h_long_memcpy(stream_ptr_t *stream_ptrs);
146 | void plmem_sync_d2h_memcpy(hostMemPtr *host_mem, deviceMemPtr *dev_mem);
147 | #endif  // _PLMEM_CUH_


--------------------------------------------------------------------------------
/bseq.c:
--------------------------------------------------------------------------------
  1 | #include <zlib.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <assert.h>
  5 | #define __STDC_LIMIT_MACROS
  6 | #include "bseq.h"
  7 | #include "kvec.h"
  8 | #include "kseq.h"
  9 | KSEQ_INIT2(, gzFile, gzread)
 10 | 
 11 | unsigned char seq_comp_table[256] = {
 12 | 	  0,   1,	2,	 3,	  4,   5,	6,	 7,	  8,   9,  10,	11,	 12,  13,  14,	15,
 13 | 	 16,  17,  18,	19,	 20,  21,  22,	23,	 24,  25,  26,	27,	 28,  29,  30,	31,
 14 | 	 32,  33,  34,	35,	 36,  37,  38,	39,	 40,  41,  42,	43,	 44,  45,  46,	47,
 15 | 	 48,  49,  50,	51,	 52,  53,  54,	55,	 56,  57,  58,	59,	 60,  61,  62,	63,
 16 | 	 64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
 17 | 	'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z',	91,	 92,  93,  94,	95,
 18 | 	 96, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
 19 | 	'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127,
 20 | 	128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
 21 | 	144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
 22 | 	160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 23 | 	176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
 24 | 	192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
 25 | 	208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
 26 | 	224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
 27 | 	240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255
 28 | };
 29 | 
 30 | #define CHECK_PAIR_THRES 1000000
 31 | 
 32 | struct mm_bseq_file_s {
 33 | 	gzFile fp;
 34 | 	kseq_t *ks;
 35 | 	mm_bseq1_t s;
 36 | };
 37 | 
 38 | mm_bseq_file_t *mm_bseq_open(const char *fn)
 39 | {
 40 | 	mm_bseq_file_t *fp;
 41 | 	gzFile f;
 42 | 	f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(0, "r");
 43 | 	if (f == 0) return 0;
 44 | 	fp = (mm_bseq_file_t*)calloc(1, sizeof(mm_bseq_file_t));
 45 | 	fp->fp = f;
 46 | 	fp->ks = kseq_init(fp->fp);
 47 | 	return fp;
 48 | }
 49 | 
 50 | void mm_bseq_close(mm_bseq_file_t *fp)
 51 | {
 52 | 	kseq_destroy(fp->ks);
 53 | 	gzclose(fp->fp);
 54 | 	free(fp);
 55 | }
 56 | 
 57 | static inline char *kstrdup(const kstring_t *s)
 58 | {
 59 | 	char *t;
 60 | 	t = (char*)malloc(s->l + 1);
 61 | 	memcpy(t, s->s, s->l + 1);
 62 | 	return t;
 63 | }
 64 | 
 65 | static inline void kseq2bseq(kseq_t *ks, mm_bseq1_t *s, int with_qual, int with_comment)
 66 | {
 67 | 	int i;
 68 | 	if (ks->name.l == 0)
 69 | 		fprintf(stderr, "[WARNING]\033[1;31m empty sequence name in the input.\033[0m\n");
 70 | 	s->name = kstrdup(&ks->name);
 71 | 	s->seq = kstrdup(&ks->seq);
 72 | 	for (i = 0; i < (int)ks->seq.l; ++i) // convert U to T
 73 | 		if (s->seq[i] == 'u' || s->seq[i] == 'U')
 74 | 			--s->seq[i];
 75 | 	s->qual = with_qual && ks->qual.l? kstrdup(&ks->qual) : 0;
 76 | 	s->comment = with_comment && ks->comment.l? kstrdup(&ks->comment) : 0;
 77 | 	s->l_seq = ks->seq.l;
 78 | }
 79 | 
 80 | mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_)
 81 | {
 82 | 	int64_t size = 0;
 83 | 	int ret;
 84 | 	kvec_t(mm_bseq1_t) a = {0,0,0};
 85 | 	kseq_t *ks = fp->ks;
 86 | 	*n_ = 0;
 87 | 	if (fp->s.seq) {
 88 | 		kv_resize(mm_bseq1_t, 0, a, 256);
 89 | 		kv_push(mm_bseq1_t, 0, a, fp->s);
 90 | 		size = fp->s.l_seq;
 91 | 		memset(&fp->s, 0, sizeof(mm_bseq1_t));
 92 | 	}
 93 | 	while ((ret = kseq_read(ks)) >= 0) {
 94 | 		mm_bseq1_t *s;
 95 | 		assert(ks->seq.l <= INT32_MAX);
 96 | 		if (a.m == 0) kv_resize(mm_bseq1_t, 0, a, 256);
 97 | 		kv_pushp(mm_bseq1_t, 0, a, &s);
 98 | 		kseq2bseq(ks, s, with_qual, with_comment);
 99 | 		size += s->l_seq;
100 | 		if (size >= chunk_size) {
101 | 			if (frag_mode && a.a[a.n-1].l_seq < CHECK_PAIR_THRES) {
102 | 				while ((ret = kseq_read(ks)) >= 0) {
103 | 					kseq2bseq(ks, &fp->s, with_qual, with_comment);
104 | 					if (mm_qname_same(fp->s.name, a.a[a.n-1].name)) {
105 | 						kv_push(mm_bseq1_t, 0, a, fp->s);
106 | 						memset(&fp->s, 0, sizeof(mm_bseq1_t));
107 | 					} else break;
108 | 				}
109 | 			}
110 | 			break;
111 | 		}
112 | 	}
113 | 	if (ret < -1) {
114 | 		if (a.n) fprintf(stderr, "[WARNING]\033[1;31m failed to parse the FASTA/FASTQ record next to '%s'. Continue anyway.\033[0m\n", a.a[a.n-1].name);
115 | 		else fprintf(stderr, "[WARNING]\033[1;31m failed to parse the first FASTA/FASTQ record. Continue anyway.\033[0m\n");
116 | 	}
117 | 	*n_ = a.n;
118 | 	return a.a;
119 | }
120 | 
121 | mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_)
122 | {
123 | 	return mm_bseq_read3(fp, chunk_size, with_qual, 0, frag_mode, n_);
124 | }
125 | 
126 | mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_)
127 | {
128 | 	return mm_bseq_read2(fp, chunk_size, with_qual, 0, n_);
129 | }
130 | 
131 | mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_)
132 | {
133 | 	int i;
134 | 	int64_t size = 0;
135 | 	kvec_t(mm_bseq1_t) a = {0,0,0};
136 | 	*n_ = 0;
137 | 	if (n_fp < 1) return 0;
138 | 	while (1) {
139 | 		int n_read = 0;
140 | 		for (i = 0; i < n_fp; ++i)
141 | 			if (kseq_read(fp[i]->ks) >= 0)
142 | 				++n_read;
143 | 		if (n_read < n_fp) {
144 | 			if (n_read > 0)
145 | 				fprintf(stderr, "[W::%s]\033[1;31m query files have different number of records; extra records skipped.\033[0m\n", __func__);
146 | 			break; // some file reaches the end
147 | 		}
148 | 		if (a.m == 0) kv_resize(mm_bseq1_t, 0, a, 256);
149 | 		for (i = 0; i < n_fp; ++i) {
150 | 			mm_bseq1_t *s;
151 | 			kv_pushp(mm_bseq1_t, 0, a, &s);
152 | 			kseq2bseq(fp[i]->ks, s, with_qual, with_comment);
153 | 			size += s->l_seq;
154 | 		}
155 | 		if (size >= chunk_size) break;
156 | 	}
157 | 	*n_ = a.n;
158 | 	return a.a;
159 | }
160 | 
161 | mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_)
162 | {
163 | 	return mm_bseq_read_frag2(n_fp, fp, chunk_size, with_qual, 0, n_);
164 | }
165 | 
166 | int mm_bseq_eof(mm_bseq_file_t *fp)
167 | {
168 | 	return (ks_eof(fp->ks->f) && fp->s.seq == 0);
169 | }
170 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | CFLAGS_NDEBUG = -DNDEBUG -O3 
  2 | CDEBUG_FLAGS= -g -O2 #-Wall -Wextra -Wno-unused-parameter -Wno-unused-variable -Wno-sign-compare -Wno-unused-function -Wno-c++17-extensions -Wno-\#warnings #-O0 -DNDEBUG
  3 | CPPFLAGS=	-DHAVE_KALLOC -D__AMD_SPLIT_KERNELS__ # -Wno-unused-but-set-variable -Wno-unused-variable
  4 | CPPFLAGS+= 	$(if $(MAX_MICRO_BATCH),-DMAX_MICRO_BATCH=\($(MAX_MICRO_BATCH)\))
  5 | INCLUDES=	-I .
  6 | OBJS=		kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \
  7 | 			lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \
  8 | 			ksw2_ll_sse.o
  9 | # PROG=		minimap2-zerobranch-debug
 10 | # PROG=		minimap2-nobalance-debug
 11 | PROG=		minimap2$(SUFFIX)
 12 | PROG_EXTRA=	sdust minimap2-lite
 13 | LIBS=		-lm -lz -lpthread 
 14 | 
 15 | ifeq ($(arm_neon),) # if arm_neon is not defined
 16 | ifeq ($(sse2only),) # if sse2only is not defined
 17 | 	OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
 18 | else                # if sse2only is defined
 19 | 	OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
 20 | endif
 21 | else				# if arm_neon is defined
 22 | 	OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
 23 |     INCLUDES+=-Isse2neon
 24 | ifeq ($(aarch64),)	#if aarch64 is not defined
 25 | 	CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
 26 | else				#if aarch64 is defined
 27 | 	CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
 28 | endif
 29 | endif
 30 | 
 31 | ifneq ($(asan),)
 32 | 	CFLAGS+=-fsanitize=address
 33 | 	LIBS+=-fsanitize=address
 34 | endif
 35 | 
 36 | ifneq ($(tsan),)
 37 | 	CFLAGS+=-fsanitize=thread
 38 | 	LIBS+=-fsanitize=thread
 39 | endif
 40 | 
 41 | 
 42 | # turn on debug flags 
 43 | ifeq ($(DEBUG),info) 
 44 | 	CFLAGS = $(CFLAGS_NDEBUG)
 45 | 	CFLAGS += -DDEBUG_PRINT
 46 | else ifeq ($(DEBUG), analyze) 
 47 | 	CFLAGS = $(CFLAGS_NDEBUG) 
 48 | 	CFLAGS += -DDEBUG_CHECK -DDEBUG_PRINT
 49 | else ifeq ($(DEBUG), verbose)
 50 | 	CFLAGS = $(CDEBUG_FLAGS) 
 51 | 	CFLAGS += -DDEBUG_CHECK -DDEBUG_PRINT -DDEBUG_VERBOSE
 52 | else 
 53 | 	CFLAGS = $(CFLAGS_NDEBUG)
 54 | endif
 55 | 
 56 | .PHONY:all extra clean depend # profile
 57 | .SUFFIXES:.c .o
 58 | 
 59 | .c.o:
 60 | 		$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
 61 | 
 62 | all:$(PROG)
 63 | 
 64 | extra:all $(PROG_EXTRA)
 65 | 
 66 | # build cJSON
 67 | CJSON_OBJ= 	cJSON/cJSON.o
 68 | INCLUDES += -I cJSON
 69 | $(CJSON_OBJ): 
 70 | 	make -C cJSON
 71 | 
 72 | # build kernel objs
 73 | include gpu/gpu.mk
 74 | 
 75 | 
 76 | # compile with nvcc/hipcc
 77 | $(PROG):main.o libminimap2.a
 78 | 		$(GPU_CC) $(CFLAGS) $(GPU_FLAGS) main.o -o $@ -L. -lminimap2 $(LIBS)
 79 | 
 80 | minimap2-lite:example.o libminimap2.a
 81 | 		$(GPU_CC) $(CFLAGS)  $(GPU_FLAGS) $< -o $@ -L. -lminimap2 $(LIBS)
 82 | 
 83 | libminimap2.a:$(OBJS) $(CU_OBJS) $(CJSON_OBJ)
 84 | 		$(AR) -csru $@ $^
 85 | 
 86 | sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
 87 | 		$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
 88 | 
 89 | # SSE-specific targets on x86/x86_64
 90 | 
 91 | ifeq ($(arm_neon),)   # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
 92 | ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
 93 | 		$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
 94 | endif
 95 | 
 96 | ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
 97 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
 98 | 
 99 | ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
100 | 		$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
101 | 
102 | ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
103 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
104 | 
105 | ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
106 | 		$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
107 | 
108 | ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
109 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
110 | 
111 | ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
112 | 		$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
113 | 
114 | ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
115 | 		$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
116 | 
117 | # NEON-specific targets on ARM
118 | 
119 | ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
120 | 		$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
121 | 
122 | ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
123 | 		$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
124 | 
125 | ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
126 | 		$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
127 | 
128 | # other non-file targets
129 | 
130 | clean: cleangpu
131 | 		rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg*
132 | 
133 | depend:
134 | 		(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
135 | 
136 | # DO NOT DELETE
137 | 
138 | align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h
139 | bseq.o: bseq.h kvec.h kalloc.h kseq.h
140 | esterr.o: mmpriv.h minimap.h bseq.h kseq.h
141 | example.o: minimap.h kseq.h
142 | format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h
143 | hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h
144 | index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h
145 | index.o: ksort.h
146 | kalloc.o: kalloc.h
147 | ksw2_extd2_sse.o: ksw2.h kalloc.h
148 | ksw2_exts2_sse.o: ksw2.h kalloc.h
149 | ksw2_extz2_sse.o: ksw2.h kalloc.h
150 | ksw2_ll_sse.o: ksw2.h kalloc.h
151 | kthread.o: kthread.h
152 | lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h
153 | main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h
154 | map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h
155 | map.o: khash.h ksort.h
156 | misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h
157 | options.o: mmpriv.h minimap.h bseq.h kseq.h
158 | pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h
159 | sdust.o: kalloc.h kdq.h kvec.h sdust.h
160 | seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
161 | sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
162 | splitidx.o: mmpriv.h minimap.h bseq.h kseq.h


--------------------------------------------------------------------------------
/sketch.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include <string.h>
  5 | #define __STDC_LIMIT_MACROS
  6 | #include "kvec.h"
  7 | #include "mmpriv.h"
  8 | 
  9 | unsigned char seq_nt4_table[256] = {
 10 | 	0, 1, 2, 3,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 11 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 12 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 13 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 14 | 	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
 15 | 	4, 4, 4, 4,  3, 3, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 16 | 	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
 17 | 	4, 4, 4, 4,  3, 3, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 18 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 19 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 20 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 21 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 22 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 23 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 24 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 25 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
 26 | };
 27 | 
 28 | static inline uint64_t hash64(uint64_t key, uint64_t mask)
 29 | {
 30 | 	key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1;
 31 | 	key = key ^ key >> 24;
 32 | 	key = ((key + (key << 3)) + (key << 8)) & mask; // key * 265
 33 | 	key = key ^ key >> 14;
 34 | 	key = ((key + (key << 2)) + (key << 4)) & mask; // key * 21
 35 | 	key = key ^ key >> 28;
 36 | 	key = (key + (key << 31)) & mask;
 37 | 	return key;
 38 | }
 39 | 
 40 | typedef struct { // a simplified version of kdq
 41 | 	int front, count;
 42 | 	int a[32];
 43 | } tiny_queue_t;
 44 | 
 45 | static inline void tq_push(tiny_queue_t *q, int x)
 46 | {
 47 | 	q->a[((q->count++) + q->front) & 0x1f] = x;
 48 | }
 49 | 
 50 | static inline int tq_shift(tiny_queue_t *q)
 51 | {
 52 | 	int x;
 53 | 	if (q->count == 0) return -1;
 54 | 	x = q->a[q->front++];
 55 | 	q->front &= 0x1f;
 56 | 	--q->count;
 57 | 	return x;
 58 | }
 59 | 
 60 | /**
 61 |  * Find symmetric (w,k)-minimizers on a DNA sequence
 62 |  *
 63 |  * @param km     thread-local memory pool; using NULL falls back to malloc()
 64 |  * @param str    DNA sequence
 65 |  * @param len    length of $str
 66 |  * @param w      find a minimizer for every $w consecutive k-mers
 67 |  * @param k      k-mer size
 68 |  * @param rid    reference ID; will be copied to the output $p array
 69 |  * @param is_hpc homopolymer-compressed or not
 70 |  * @param p      minimizers
 71 |  *               p->a[i].x = kMer<<8 | kmerSpan
 72 |  *               p->a[i].y = rid<<32 | lastPos<<1 | strand
 73 |  *               where lastPos is the position of the last base of the i-th minimizer,
 74 |  *               and strand indicates whether the minimizer comes from the top or the bottom strand.
 75 |  *               Callers may want to set "p->n = 0"; otherwise results are appended to p
 76 |  */
 77 | void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p)
 78 | {
 79 | 	uint64_t shift1 = 2 * (k - 1), mask = (1ULL<<2*k) - 1, kmer[2] = {0,0};
 80 | 	int i, j, l, buf_pos, min_pos, kmer_span = 0;
 81 | 	mm128_t buf[256], min = { UINT64_MAX, UINT64_MAX };
 82 | 	tiny_queue_t tq;
 83 | 
 84 | 	assert(len > 0 && (w > 0 && w < 256) && (k > 0 && k <= 28)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice
 85 | 	memset(buf, 0xff, w * 16);
 86 | 	memset(&tq, 0, sizeof(tiny_queue_t));
 87 | 	kv_resize(mm128_t, km, *p, p->n + len/w);
 88 | 
 89 | 	for (i = l = buf_pos = min_pos = 0; i < len; ++i) {
 90 | 		int c = seq_nt4_table[(uint8_t)str[i]];
 91 | 		mm128_t info = { UINT64_MAX, UINT64_MAX };
 92 | 		if (c < 4) { // not an ambiguous base
 93 | 			int z;
 94 | 			if (is_hpc) {
 95 | 				int skip_len = 1;
 96 | 				if (i + 1 < len && seq_nt4_table[(uint8_t)str[i + 1]] == c) {
 97 | 					for (skip_len = 2; i + skip_len < len; ++skip_len)
 98 | 						if (seq_nt4_table[(uint8_t)str[i + skip_len]] != c)
 99 | 							break;
100 | 					i += skip_len - 1; // put $i at the end of the current homopolymer run
101 | 				}
102 | 				tq_push(&tq, skip_len);
103 | 				kmer_span += skip_len;
104 | 				if (tq.count > k) kmer_span -= tq_shift(&tq);
105 | 			} else kmer_span = l + 1 < k? l + 1 : k;
106 | 			kmer[0] = (kmer[0] << 2 | c) & mask;           // forward k-mer
107 | 			kmer[1] = (kmer[1] >> 2) | (3ULL^c) << shift1; // reverse k-mer
108 | 			if (kmer[0] == kmer[1]) continue; // skip "symmetric k-mers" as we don't know it strand
109 | 			z = kmer[0] < kmer[1]? 0 : 1; // strand
110 | 			++l;
111 | 			if (l >= k && kmer_span < 256) {
112 | 				info.x = hash64(kmer[z], mask) << 8 | kmer_span;
113 | 				info.y = (uint64_t)rid<<32 | (uint32_t)i<<1 | z;
114 | 			}
115 | 		} else l = 0, tq.count = tq.front = 0, kmer_span = 0;
116 | 		buf[buf_pos] = info; // need to do this here as appropriate buf_pos and buf[buf_pos] are needed below
117 | 		if (l == w + k - 1 && min.x != UINT64_MAX) { // special case for the first window - because identical k-mers are not stored yet
118 | 			for (j = buf_pos + 1; j < w; ++j)
119 | 				if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mm128_t, km, *p, buf[j]);
120 | 			for (j = 0; j < buf_pos; ++j)
121 | 				if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mm128_t, km, *p, buf[j]);
122 | 		}
123 | 		if (info.x <= min.x) { // a new minimum; then write the old min
124 | 			if (l >= w + k && min.x != UINT64_MAX) kv_push(mm128_t, km, *p, min);
125 | 			min = info, min_pos = buf_pos;
126 | 		} else if (buf_pos == min_pos) { // old min has moved outside the window
127 | 			if (l >= w + k - 1 && min.x != UINT64_MAX) kv_push(mm128_t, km, *p, min);
128 | 			for (j = buf_pos + 1, min.x = UINT64_MAX; j < w; ++j) // the two loops are necessary when there are identical k-mers
129 | 				if (min.x >= buf[j].x) min = buf[j], min_pos = j; // >= is important s.t. min is always the closest k-mer
130 | 			for (j = 0; j <= buf_pos; ++j)
131 | 				if (min.x >= buf[j].x) min = buf[j], min_pos = j;
132 | 			if (l >= w + k - 1 && min.x != UINT64_MAX) { // write identical k-mers
133 | 				for (j = buf_pos + 1; j < w; ++j) // these two loops make sure the output is sorted
134 | 					if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mm128_t, km, *p, buf[j]);
135 | 				for (j = 0; j <= buf_pos; ++j)
136 | 					if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mm128_t, km, *p, buf[j]);
137 | 			}
138 | 		}
139 | 		if (++buf_pos == w) buf_pos = 0;
140 | 	}
141 | 	if (min.x != UINT64_MAX)
142 | 		kv_push(mm128_t, km, *p, min);
143 | }
144 | 


--------------------------------------------------------------------------------
/pe.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <math.h>
  3 | #include "mmpriv.h"
  4 | #include "kvec.h"
  5 | 
  6 | void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r)
  7 | {
  8 | 	if (pri_ratio > 0.0f && *n_ > 0) {
  9 | 		int i, k, n = *n_, n_2nd = 0;
 10 | 		int max_dist = n_segs == 2? qlens[0] + qlens[1] + max_gap_ref : 0;
 11 | 		for (i = k = 0; i < n; ++i) {
 12 | 			int to_keep = 0;
 13 | 			if (r[i].parent == i) { // primary
 14 | 				to_keep = 1;
 15 | 			} else if (r[i].score + min_diff >= r[r[i].parent].score) {
 16 | 				to_keep = 1;
 17 | 			} else {
 18 | 				mm_reg1_t *p = &r[r[i].parent], *q = &r[i];
 19 | 				if (p->rev == q->rev && p->rid == q->rid && q->re - p->rs < max_dist && p->re - q->rs < max_dist) { // child and parent are close on the ref
 20 | 					if (q->score >= p->score * pri1)
 21 | 						to_keep = 1;
 22 | 				} else {
 23 | 					int is_par_both = (n_segs == 2 && p->qs < qlens[0] && p->qe > qlens[0]);
 24 | 					int is_chi_both = (n_segs == 2 && q->qs < qlens[0] && q->qe > qlens[0]);
 25 | 					if (is_chi_both || is_chi_both == is_par_both) {
 26 | 						if (q->score >= p->score * pri_ratio)
 27 | 							to_keep = 1;
 28 | 					} else { // the remaining case: is_chi_both == 0 && is_par_both == 1
 29 | 						if (q->score >= p->score * pri2)
 30 | 							to_keep = 1;
 31 | 					}
 32 | 				}
 33 | 			}
 34 | 			if (to_keep && r[i].parent != i) {
 35 | 				if (n_2nd++ >= best_n) to_keep = 0; // don't keep if there are too many secondary hits
 36 | 			}
 37 | 			if (to_keep) r[k++] = r[i];
 38 | 			else if (r[i].p) free(r[i].p);
 39 | 		}
 40 | 		if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
 41 | 		*n_ = k;
 42 | 	}
 43 | }
 44 | 
 45 | void mm_set_pe_thru(const int *qlens, int *n_regs, mm_reg1_t **regs)
 46 | {
 47 | 	int s, i, n_pri[2], pri[2];
 48 | 	n_pri[0] = n_pri[1] = 0;
 49 | 	pri[0] = pri[1] = -1;
 50 | 	for (s = 0; s < 2; ++s)
 51 | 		for (i = 0; i < n_regs[s]; ++i)
 52 | 			if (regs[s][i].id == regs[s][i].parent)
 53 | 				++n_pri[s], pri[s] = i;
 54 | 	if (n_pri[0] == 1 && n_pri[1] == 1) {
 55 | 		mm_reg1_t *p = &regs[0][pri[0]];
 56 | 		mm_reg1_t *q = &regs[1][pri[1]];
 57 | 		if (p->rid == q->rid && p->rev == q->rev && abs(p->rs - q->rs) < 3 && abs(p->re - q->re) < 3
 58 | 			&& ((p->qs == 0 && qlens[1] - q->qe == 0) || (q->qs == 0 && qlens[0] - p->qe == 0)))
 59 | 		{
 60 | 			p->pe_thru = q->pe_thru = 1;
 61 | 		}
 62 | 	}
 63 | }
 64 | 
 65 | #include "ksort.h"
 66 | 
 67 | typedef struct {
 68 | 	int s, rev;
 69 | 	uint64_t key;
 70 | 	mm_reg1_t *r;
 71 | } pair_arr_t;
 72 | 
 73 | #define sort_key_pair(a) ((a).key)
 74 | KRADIX_SORT_INIT(pair, pair_arr_t, sort_key_pair, 8)
 75 | 
 76 | void mm_pair(void *km, int max_gap_ref, int pe_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs)
 77 | {
 78 | 	int i, j, s, n, last[2], dp_thres, segs = 0, max_idx[2];
 79 | 	int64_t max;
 80 | 	pair_arr_t *a;
 81 | 	kvec_t(uint64_t) sc = {0,0,0};
 82 | 
 83 | 	a = (pair_arr_t*)kmalloc(km, (n_regs[0] + n_regs[1]) * sizeof(pair_arr_t));
 84 | 	for (s = n = 0, dp_thres = 0; s < 2; ++s) {
 85 | 		int max = 0;
 86 | 		for (i = 0; i < n_regs[s]; ++i) {
 87 | 			a[n].s = s;
 88 | 			a[n].r = &regs[s][i];
 89 | 			a[n].rev = a[n].r->rev;
 90 | 			a[n].key = (uint64_t)a[n].r->rid << 32 | a[n].r->rs<<1 | (s^a[n].rev);
 91 | 			max = max > a[n].r->p->dp_max? max : a[n].r->p->dp_max;
 92 | 			++n;
 93 | 			segs |= 1<<s;
 94 | 		}
 95 | 		dp_thres += max;
 96 | 	}
 97 | 	if (segs != 3) {
 98 | 		kfree(km, a); // only one end is mapped
 99 | 		return;
100 | 	}
101 | 	dp_thres -= pe_bonus;
102 | 	if (dp_thres < 0) dp_thres = 0;
103 | 	radix_sort_pair(a, a + n);
104 | 
105 | 	max = -1;
106 | 	max_idx[0] = max_idx[1] = -1;
107 | 	last[0] = last[1] = -1;
108 | 	kv_resize(uint64_t, km, sc, (size_t)n);
109 | 	for (i = 0; i < n; ++i) {
110 | 		if (a[i].key & 1) { // reverse first read or forward second read
111 | 			mm_reg1_t *q, *r;
112 | 			if (last[a[i].rev] < 0) continue;
113 | 			r = a[i].r;
114 | 			q = a[last[a[i].rev]].r;
115 | 			if (r->rid != q->rid || r->rs - q->re > max_gap_ref) continue;
116 | 			for (j = last[a[i].rev]; j >= 0; --j) {
117 | 				int64_t score;
118 | 				if (a[j].rev != a[i].rev || a[j].s == a[i].s) continue;
119 | 				q = a[j].r;
120 | 				if (r->rid != q->rid || r->rs - q->re > max_gap_ref) break;
121 | 				if (r->p->dp_max + q->p->dp_max < dp_thres) continue;
122 | 				score = (int64_t)(r->p->dp_max + q->p->dp_max) << 32 | (r->hash + q->hash);
123 | 				if (score > max)
124 | 					max = score, max_idx[a[j].s] = j, max_idx[a[i].s] = i;
125 | 				kv_push(uint64_t, km, sc, score);
126 | 			}
127 | 		} else { // forward first read or reverse second read
128 | 			last[a[i].rev] = i;
129 | 		}
130 | 	}
131 | 	if (sc.n > 1)
132 | 		radix_sort_64(sc.a, sc.a + sc.n);
133 | 
134 | 	if (sc.n > 0 && max > 0) { // found at least one pair
135 | 		int n_sub = 0, mapq_pe;
136 | 		mm_reg1_t *r[2];
137 | 		r[0] = a[max_idx[0]].r, r[1] = a[max_idx[1]].r;
138 | 		r[0]->proper_frag = r[1]->proper_frag = 1;
139 | 		for (s = 0; s < 2; ++s) {
140 | 			if (r[s]->id != r[s]->parent) { // then lift to primary and update parent
141 | 				mm_reg1_t *p = &regs[s][r[s]->parent];
142 | 				for (i = 0; i < n_regs[s]; ++i)
143 | 					if (regs[s][i].parent == p->id)
144 | 						regs[s][i].parent = r[s]->id;
145 | 				p->mapq = 0;
146 | 			}
147 | 			if (!r[s]->sam_pri) { // then sync sam_pri
148 | 				for (i = 0; i < n_regs[s]; ++i)
149 | 					regs[s][i].sam_pri = 0;
150 | 				r[s]->sam_pri = 1;
151 | 			}
152 | 		}
153 | 		mapq_pe = r[0]->mapq > r[1]->mapq? r[0]->mapq : r[1]->mapq;
154 | 		for (i = 0; i < (int)sc.n; ++i)
155 | 			if ((sc.a[i]>>32) + sub_diff >= (uint64_t)max>>32)
156 | 				++n_sub;
157 | 		if (sc.n > 1) {
158 | 			int mapq_pe_alt;
159 | 			mapq_pe_alt = (int)(6.02f * ((max>>32) - (sc.a[sc.n - 2]>>32)) / match_sc - 4.343f * logf(n_sub)); // n_sub > 0 because it counts the optimal, too
160 | 			mapq_pe = mapq_pe < mapq_pe_alt? mapq_pe : mapq_pe_alt;
161 | 		}
162 | 		if (r[0]->mapq < mapq_pe) r[0]->mapq = (int)(.2f * r[0]->mapq + .8f * mapq_pe + .499f);
163 | 		if (r[1]->mapq < mapq_pe) r[1]->mapq = (int)(.2f * r[1]->mapq + .8f * mapq_pe + .499f);
164 | 		if (sc.n == 1) {
165 | 			if (r[0]->mapq < 2) r[0]->mapq = 2;
166 | 			if (r[1]->mapq < 2) r[1]->mapq = 2;
167 | 		} else if ((uint64_t)max>>32 > sc.a[sc.n - 2]>>32) {
168 | 			if (r[0]->mapq < 1) r[0]->mapq = 1;
169 | 			if (r[1]->mapq < 1) r[1]->mapq = 1;
170 | 		}
171 | 	}
172 | 
173 | 	kfree(km, a);
174 | 	kfree(km, sc.a);
175 | 
176 | 	mm_set_pe_thru(qlens, n_regs, regs);
177 | }
178 | 


--------------------------------------------------------------------------------
/mmpriv.h:
--------------------------------------------------------------------------------
  1 | #ifndef MMPRIV2_H
  2 | #define MMPRIV2_H
  3 | 
  4 | #include <assert.h>
  5 | #include "minimap.h"
  6 | #include "bseq.h"
  7 | #include "kseq.h"
  8 | 
  9 | #define MM_PARENT_UNSET   (-1)
 10 | #define MM_PARENT_TMP_PRI (-2)
 11 | 
 12 | #define MM_DBG_NO_KALLOC     0x1
 13 | #define MM_DBG_PRINT_QNAME   0x2
 14 | #define MM_DBG_PRINT_SEED    0x4
 15 | #define MM_DBG_PRINT_ALN_SEQ 0x8
 16 | #define MM_DBG_PRINT_CHAIN   0x10
 17 | 
 18 | #define MM_SEED_LONG_JOIN  (1ULL<<40)
 19 | #define MM_SEED_IGNORE     (1ULL<<41)
 20 | #define MM_SEED_TANDEM     (1ULL<<42)
 21 | #define MM_SEED_SELF       (1ULL<<43)
 22 | 
 23 | #define MM_SEED_SEG_SHIFT  48
 24 | #define MM_SEED_SEG_MASK   (0xffULL<<(MM_SEED_SEG_SHIFT))
 25 | 
 26 | #ifndef kroundup32
 27 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
 28 | #endif
 29 | 
 30 | #define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2))
 31 | #define mm_seq4_get(s, i)    ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf)
 32 | 
 33 | #define MALLOC(type, len) ((type*)malloc((len) * sizeof(type)))
 34 | #define CALLOC(type, len) ((type*)calloc((len), sizeof(type)))
 35 | 
 36 | #ifdef __cplusplus
 37 | extern "C" {
 38 | #endif
 39 | 
 40 | typedef struct {
 41 | 	uint32_t n;
 42 | 	uint32_t q_pos;
 43 | 	uint32_t q_span:31, flt:1;
 44 | 	uint32_t seg_id:31, is_tandem:1;
 45 | 	const uint64_t *cr;
 46 | } mm_seed_t;
 47 | 
 48 | typedef struct {
 49 | 	int n_u, n_a;
 50 | 	uint64_t *u;
 51 | 	mm128_t *a;
 52 | } mm_seg_t;
 53 | 
 54 | double cputime(void);
 55 | double realtime(void);
 56 | long peakrss(void);
 57 | 
 58 | void radix_sort_128x(mm128_t *beg, mm128_t *end);
 59 | void radix_sort_64(uint64_t *beg, uint64_t *end);
 60 | uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
 61 | 
 62 | void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
 63 | 
 64 | mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos);
 65 | void mm_seed_mz_flt(void *km, mm128_v *mv, int32_t q_occ_max, float q_occ_frac);
 66 | 
 67 | double mm_event_identity(const mm_reg1_t *r);
 68 | int mm_write_sam_hdr(const mm_idx_t *mi, const char *rg, const char *ver, int argc, char *argv[]);
 69 | void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag);
 70 | void mm_write_paf3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag, int rep_len);
 71 | void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs);
 72 | void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regs, const mm_reg1_t *const* regs, void *km, int64_t opt_flag);
 73 | void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int64_t opt_flag, int rep_len);
 74 | 
 75 | void mm_idxopt_init(mm_idxopt_t *opt);
 76 | const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
 77 | int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
 78 | int mm_idx_getseq2(const mm_idx_t *mi, int is_rev, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
 79 | mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
 80 | mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mm128_t *a, int is_qstrand);
 81 | 
 82 | mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale,
 83 | 					 int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
 84 | mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
 85 | 					  int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
 86 | mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
 87 | 					   int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km);
 88 | 
 89 | void mm_mark_alt(const mm_idx_t *mi, int n, mm_reg1_t *r);
 90 | void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a, int is_qstrand);
 91 | void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
 92 | int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a);
 93 | int mm_set_sam_pri(int n, mm_reg1_t *r);
 94 | void mm_set_parent(void *km, float mask_level, int mask_len, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level, float alt_diff_frac);
 95 | void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int check_strand, int min_strand_sc, int *n_, mm_reg1_t *r);
 96 | void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r);
 97 | int mm_filter_strand_retained(int n_regs, mm_reg1_t *r);
 98 | void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs);
 99 | void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac);
100 | void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
101 | void mm_update_dp_max(int qlen, int n_regs, mm_reg1_t *regs, float frac, int a, int b);
102 | 
103 | void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos);
104 | 
105 | mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
106 | void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);
107 | void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs);
108 | 
109 | FILE *mm_split_init(const char *prefix, const mm_idx_t *mi);
110 | mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part);
111 | int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_split_idx);
112 | void mm_split_rm_tmp(const char *prefix, int n_splits);
113 | 
114 | void mm_err_puts(const char *str);
115 | void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp);
116 | void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp);
117 | 
118 | static inline float mg_log2(float x) // NB: this doesn't work when x<2
119 | {
120 | 	union { float f; uint32_t i; } z = { x };
121 | 	float log_2 = ((z.i >> 23) & 255) - 128;
122 | 	z.i &= ~(255 << 23);
123 | 	z.i += 127 << 23;
124 | 	log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
125 | 	return log_2;
126 | }
127 | 
128 | #ifdef __cplusplus
129 | }
130 | #endif
131 | 
132 | #endif
133 | 


--------------------------------------------------------------------------------
/python/README.rst:
--------------------------------------------------------------------------------
  1 | ==============================
  2 | Mappy: Minimap2 Python Binding
  3 | ==============================
  4 | 
  5 | Mappy provides a convenient interface to `minimap2
  6 | <https://github.com/lh3/minimap2>`_, a fast and accurate C program to align
  7 | genomic and transcribe nucleotide sequences.
  8 | 
  9 | Installation
 10 | ------------
 11 | 
 12 | Mappy depends on `zlib <http://zlib.net>`_. It can be installed with `pip
 13 | <https://en.wikipedia.org/wiki/Pip_(package_manager)>`_:
 14 | 
 15 | .. code:: shell
 16 | 
 17 | 	pip install --user mappy
 18 | 
 19 | or from the minimap2 github repo (`Cython <http://cython.org>`_ required):
 20 | 
 21 | .. code:: shell
 22 | 
 23 | 	git clone https://github.com/lh3/minimap2
 24 | 	cd minimap2
 25 | 	python setup.py install
 26 | 
 27 | Usage
 28 | -----
 29 | 
 30 | The following Python script demonstrates the key functionality of mappy:
 31 | 
 32 | .. code:: python
 33 | 
 34 | 	import mappy as mp
 35 | 	a = mp.Aligner("test/MT-human.fa")  # load or build index
 36 | 	if not a: raise Exception("ERROR: failed to load/build index")
 37 | 	s = a.seq("MT_human", 100, 200)     # retrieve a subsequence from the index
 38 | 	print(mp.revcomp(s))                # reverse complement
 39 | 	for name, seq, qual in mp.fastx_read("test/MT-orang.fa"): # read a fasta/q sequence
 40 | 		for hit in a.map(seq): # traverse alignments
 41 | 			print("{}\t{}\t{}\t{}".format(hit.ctg, hit.r_st, hit.r_en, hit.cigar_str))
 42 | 
 43 | APIs
 44 | ----
 45 | 
 46 | Mappy implements two classes and two global function.
 47 | 
 48 | Class mappy.Aligner
 49 | ~~~~~~~~~~~~~~~~~~~
 50 | 
 51 | .. code:: python
 52 | 
 53 | 	mappy.Aligner(fn_idx_in=None, preset=None, ...)
 54 | 
 55 | This constructor accepts the following arguments:
 56 | 
 57 | * **fn_idx_in**: index or sequence file name. Minimap2 automatically tests the
 58 |   file type. If a sequence file is provided, minimap2 builds an index. The
 59 |   sequence file can be optionally gzip'd. This option has no effect if **seq**
 60 |   is set.
 61 | 
 62 | * **seq**: a single sequence to index. The sequence name will be set to
 63 |   :code:`N/A`.
 64 | 
 65 | * **preset**: minimap2 preset. Currently, minimap2 supports the following
 66 |   presets: **sr** for single-end short reads; **map-pb** for PacBio
 67 |   read-to-reference mapping; **map-ont** for Oxford Nanopore read mapping;
 68 |   **splice** for long-read spliced alignment; **asm5** for assembly-to-assembly
 69 |   alignment; **asm10** for full genome alignment of closely related species. Note
 70 |   that the Python module does not support all-vs-all read overlapping.
 71 | 
 72 | * **k**: k-mer length, no larger than 28
 73 | 
 74 | * **w**: minimizer window size, no larger than 255
 75 | 
 76 | * **min_cnt**: mininum number of minimizers on a chain
 77 | 
 78 | * **min_chain_score**: minimum chaing score
 79 | 
 80 | * **bw**: chaining and alignment band width
 81 | 
 82 | * **best_n**: max number of alignments to return
 83 | 
 84 | * **n_threads**: number of indexing threads; 3 by default
 85 | 
 86 | * **extra_flags**: additional flags defined in minimap.h
 87 | 
 88 | * **fn_idx_out**: name of file to which the index is written. This parameter
 89 |   has no effect if **seq** is set.
 90 | 
 91 | * **scoring**: scoring system. It is a tuple/list consisting of 4, 6 or 7
 92 |   positive integers. The first 4 elements specify match scoring, mismatch
 93 |   penalty, gap open and gap extension penalty. The 5th and 6th elements, if
 94 |   present, set long-gap open and long-gap extension penalty. The 7th sets a
 95 |   mismatch penalty involving ambiguous bases.
 96 | 
 97 | .. code:: python
 98 | 
 99 | 	mappy.Aligner.map(seq, seq2=None, cs=False, MD=False)
100 | 
101 | This method aligns :code:`seq` against the index. It is a generator, *yielding*
102 | a series of :code:`mappy.Alignment` objects. If :code:`seq2` is present, mappy
103 | performs paired-end alignment, assuming the two ends are in the FR orientation.
104 | Alignments of the two ends can be distinguished by the :code:`read_num` field
105 | (see Class mappy.Alignment below). Argument :code:`cs` asks mappy to generate
106 | the :code:`cs` tag; :code:`MD` is similar. These two arguments might slightly
107 | degrade performance and are not enabled by default.
108 | 
109 | .. code:: python
110 | 
111 | 	mappy.Aligner.seq(name, start=0, end=0x7fffffff)
112 | 
113 | This method retrieves a (sub)sequence from the index and returns it as a Python
114 | string. :code:`None` is returned if :code:`name` is not present in the index or
115 | the start/end coordinates are invalid.
116 | 
117 | .. code:: python
118 | 
119 | 	mappy.Aligner.seq_names
120 | 
121 | This property gives the array of sequence names in the index.
122 | 
123 | Class mappy.Alignment
124 | ~~~~~~~~~~~~~~~~~~~~~
125 | 
126 | This class describes an alignment. An object of this class has the following
127 | properties:
128 | 
129 | * **ctg**: name of the reference sequence the query is mapped to
130 | 
131 | * **ctg_len**: total length of the reference sequence
132 | 
133 | * **r_st** and **r_en**: start and end positions on the reference
134 | 
135 | * **q_st** and **q_en**: start and end positions on the query
136 | 
137 | * **strand**: +1 if on the forward strand; -1 if on the reverse strand
138 | 
139 | * **mapq**: mapping quality
140 | 
141 | * **blen**: length of the alignment, including both alignment matches and gaps
142 |   but excluding ambiguous bases.
143 | 
144 | * **mlen**: length of the matching bases in the alignment, excluding ambiguous
145 |   base matches.
146 | 
147 | * **NM**: number of mismatches, gaps and ambiguous positions in the alignment
148 | 
149 | * **trans_strand**: transcript strand. +1 if on the forward strand; -1 if on the
150 |   reverse strand; 0 if unknown
151 | 
152 | * **is_primary**: if the alignment is primary (typically the best and the first
153 |   to generate)
154 | 
155 | * **read_num**: read number that the alignment corresponds to; 1 for the first
156 |   read and 2 for the second read
157 | 
158 | * **cigar_str**: CIGAR string
159 | 
160 | * **cigar**: CIGAR returned as an array of shape :code:`(n_cigar,2)`. The two
161 |   numbers give the length and the operator of each CIGAR operation.
162 | 
163 | * **MD**: the :code:`MD` tag as in the SAM format. It is an empty string unless
164 |   the :code:`MD` argument is applied when calling :code:`mappy.Aligner.map()`.
165 | 
166 | * **cs**: the :code:`cs` tag.
167 | 
168 | An :code:`Alignment` object can be converted to a string with :code:`str()` in
169 | the following format:
170 | 
171 | ::
172 | 
173 | 	q_st  q_en  strand  ctg  ctg_len  r_st  r_en  mlen  blen  mapq  cg:Z:cigar_str
174 | 
175 | It is effectively the PAF format without the QueryName and QueryLength columns
176 | (the first two columns in PAF).
177 | 
178 | Miscellaneous Functions
179 | ~~~~~~~~~~~~~~~~~~~~~~~
180 | 
181 | .. code:: python
182 | 
183 | 	mappy.fastx_read(fn, read_comment=False)
184 | 
185 | This generator function opens a FASTA/FASTQ file and *yields* a
186 | :code:`(name,seq,qual)` tuple for each sequence entry. The input file may be
187 | optionally gzip'd. If :code:`read_comment` is True, this generator yields
188 | a :code:`(name,seq,qual,comment)` tuple instead.
189 | 
190 | .. code:: python
191 | 
192 | 	mappy.revcomp(seq)
193 | 
194 | Return the reverse complement of DNA string :code:`seq`. This function
195 | recognizes IUB code and preserves the letter cases. Uracil :code:`U` is
196 | complemented to :code:`A`.
197 | 


--------------------------------------------------------------------------------
/sdust.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <stdint.h>
  3 | #include <stdio.h>
  4 | #include "kalloc.h"
  5 | #include "kdq.h"
  6 | #include "kvec.h"
  7 | #include "sdust.h"
  8 | 
  9 | #define SD_WLEN 3
 10 | #define SD_WTOT (1<<(SD_WLEN<<1))
 11 | #define SD_WMSK (SD_WTOT - 1)
 12 | 
 13 | typedef struct {
 14 | 	int start, finish;
 15 | 	int r, l;
 16 | } perf_intv_t;
 17 | 
 18 | typedef kvec_t(perf_intv_t) perf_intv_v;
 19 | typedef kvec_t(uint64_t) uint64_v;
 20 | 
 21 | KDQ_INIT(int)
 22 | 
 23 | #if defined(_NO_NT4_TBL) || defined(_SDUST_MAIN)
 24 | unsigned char seq_nt4_table[256] = {
 25 | 	0, 1, 2, 3,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 26 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 27 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
 28 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 29 | 	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
 30 | 	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 31 | 	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
 32 | 	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 33 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 34 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 35 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 36 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 37 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 38 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 39 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
 40 | 	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
 41 | };
 42 | #else
 43 | extern unsigned char seq_nt4_table[256];
 44 | #endif
 45 | 
 46 | struct sdust_buf_s {
 47 | 	kdq_t(int) *w;
 48 | 	perf_intv_v P; // the list of perfect intervals for the current window, sorted by descending start and then by ascending finish
 49 | 	uint64_v res;  // the result
 50 | 	void *km;      // memory pool
 51 | };
 52 | 
 53 | sdust_buf_t *sdust_buf_init(void *km)
 54 | {
 55 | 	sdust_buf_t *buf;
 56 | 	buf = (sdust_buf_t*)kcalloc(km, 1, sizeof(sdust_buf_t));
 57 | 	buf->km = km;
 58 | 	buf->w = kdq_init(int, buf->km);
 59 | 	kdq_resize(int, buf->w, 8);
 60 | 	return buf;
 61 | }
 62 | 
 63 | void sdust_buf_destroy(sdust_buf_t *buf)
 64 | {
 65 | 	if (buf == 0) return;
 66 | 	kdq_destroy(int, buf->w);
 67 | 	kfree(buf->km, buf->P.a); kfree(buf->km, buf->res.a); kfree(buf->km, buf);
 68 | }
 69 | 
 70 | static inline void shift_window(int t, kdq_t(int) *w, int T, int W, int *L, int *rw, int *rv, int *cw, int *cv)
 71 | {
 72 | 	int s;
 73 | 	if ((int)kdq_size(w) >= W - SD_WLEN + 1) { // TODO: is this right for SD_WLEN!=3?
 74 | 		s = *kdq_shift(int, w);
 75 | 		*rw -= --cw[s];
 76 | 		if (*L > (int)kdq_size(w))
 77 | 			--*L, *rv -= --cv[s];
 78 | 	}
 79 | 	kdq_push(int, w, t);
 80 | 	++*L;
 81 | 	*rw += cw[t]++;
 82 | 	*rv += cv[t]++;
 83 | 	if (cv[t] * 10 > T<<1) {
 84 | 		do {
 85 | 			s = kdq_at(w, kdq_size(w) - *L);
 86 | 			*rv -= --cv[s];
 87 | 			--*L;
 88 | 		} while (s != t);
 89 | 	}
 90 | }
 91 | 
 92 | static inline void save_masked_regions(void *km, uint64_v *res, perf_intv_v *P, int start)
 93 | {
 94 | 	int i, saved = 0;
 95 | 	perf_intv_t *p;
 96 | 	if (P->n == 0 || P->a[P->n - 1].start >= start) return;
 97 | 	p = &P->a[P->n - 1];
 98 | 	if (res->n) {
 99 | 		int s = res->a[res->n - 1]>>32, f = (uint32_t)res->a[res->n - 1];
100 | 		if (p->start <= f) // if overlapping with or adjacent to the previous interval
101 | 			saved = 1, res->a[res->n - 1] = (uint64_t)s<<32 | (f > p->finish? f : p->finish);
102 | 	}
103 | 	if (!saved) kv_push(uint64_t, km, *res, (uint64_t)p->start<<32|p->finish);
104 | 	for (i = P->n - 1; i >= 0 && P->a[i].start < start; --i); // remove perfect intervals that have falled out of the window
105 | 	P->n = i + 1;
106 | }
107 | 
108 | static void find_perfect(void *km, perf_intv_v *P, const kdq_t(int) *w, int T, int start, int L, int rv, const int *cv)
109 | {
110 | 	int c[SD_WTOT], r = rv, i, max_r = 0, max_l = 0;
111 | 	memcpy(c, cv, SD_WTOT * sizeof(int));
112 | 	for (i = (long)kdq_size(w) - L - 1; i >= 0; --i) {
113 | 		int j, t = kdq_at(w, i), new_r, new_l;
114 | 		r += c[t]++;
115 | 		new_r = r, new_l = kdq_size(w) - i - 1;
116 | 		if (new_r * 10 > T * new_l) {
117 | 			for (j = 0; j < (int)P->n && P->a[j].start >= i + start; ++j) { // find insertion position
118 | 				perf_intv_t *p = &P->a[j];
119 | 				if (max_r == 0 || p->r * max_l > max_r * p->l)
120 | 					max_r = p->r, max_l = p->l;
121 | 			}
122 | 			if (max_r == 0 || new_r * max_l >= max_r * new_l) { // then insert
123 | 				max_r = new_r, max_l = new_l;
124 | 				if (P->n == P->m) kv_resize(perf_intv_t, km, *P, P->n + 1);
125 | 				memmove(&P->a[j+1], &P->a[j], (P->n - j) * sizeof(perf_intv_t)); // make room
126 | 				++P->n;
127 | 				P->a[j].start = i + start, P->a[j].finish = kdq_size(w) + (SD_WLEN - 1) + start;
128 | 				P->a[j].r = new_r, P->a[j].l = new_l;
129 | 			}
130 | 		}
131 | 	}
132 | }
133 | 
134 | const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf)
135 | {
136 | 	int rv = 0, rw = 0, L = 0, cv[SD_WTOT], cw[SD_WTOT];
137 | 	int i, start, l; // _start_: start of the current window; _l_: length of a contiguous A/C/G/T (sub)sequence
138 | 	unsigned t; // current word
139 | 
140 | 	buf->P.n = buf->res.n = 0;
141 | 	buf->w->front = buf->w->count = 0;
142 | 	memset(cv, 0, SD_WTOT * sizeof(int));
143 | 	memset(cw, 0, SD_WTOT * sizeof(int));
144 | 	if (l_seq < 0) l_seq = strlen((const char*)seq);
145 | 	for (i = l = t = 0; i <= l_seq; ++i) {
146 | 		int b = i < l_seq? seq_nt4_table[seq[i]] : 4;
147 | 		if (b < 4) { // an A/C/G/T base
148 | 			++l, t = (t<<2 | b) & SD_WMSK;
149 | 			if (l >= SD_WLEN) { // we have seen a word
150 | 				start = (l - W > 0? l - W : 0) + (i + 1 - l); // set the start of the current window
151 | 				save_masked_regions(buf->km, &buf->res, &buf->P, start); // save intervals falling out of the current window?
152 | 				shift_window(t, buf->w, T, W, &L, &rw, &rv, cw, cv);
153 | 				if (rw * 10 > L * T)
154 | 					find_perfect(buf->km, &buf->P, buf->w, T, start, L, rv, cv);
155 | 			}
156 | 		} else { // N or the end of sequence; N effectively breaks input into pieces of independent sequences
157 | 			start = (l - W + 1 > 0? l - W + 1 : 0) + (i + 1 - l);
158 | 			while (buf->P.n) save_masked_regions(buf->km, &buf->res, &buf->P, start++); // clear up unsaved perfect intervals
159 | 			l = t = 0;
160 | 		}
161 | 	}
162 | 	*n = buf->res.n;
163 | 	return buf->res.a;
164 | }
165 | 
166 | uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n)
167 | {
168 | 	uint64_t *ret;
169 | 	sdust_buf_t *buf;
170 | 	buf = sdust_buf_init(km);
171 | 	ret = (uint64_t*)sdust_core(seq, l_seq, T, W, n, buf);
172 | 	buf->res.a = 0;
173 | 	sdust_buf_destroy(buf);
174 | 	return ret;
175 | }
176 | 
177 | #ifdef _SDUST_MAIN
178 | #include <zlib.h>
179 | #include <stdio.h>
180 | #include "ketopt.h"
181 | #include "kseq.h"
182 | KSEQ_INIT(gzFile, gzread)
183 | 
184 | int main(int argc, char *argv[])
185 | {
186 | 	gzFile fp;
187 | 	kseq_t *ks;
188 | 	int W = 64, T = 20, c;
189 | 	ketopt_t o = KETOPT_INIT;
190 | 
191 | 	while ((c = ketopt(&o, argc, argv, 1, "w:t:", 0)) >= 0) {
192 | 		if (c == 'w') W = atoi(o.arg);
193 | 		else if (c == 't') T = atoi(o.arg);
194 | 	}
195 | 	if (o.ind == argc) {
196 | 		fprintf(stderr, "Usage: sdust [-w %d] [-t %d] <in.fa>\n", W, T);
197 | 		return 1;
198 | 	}
199 | 	fp = strcmp(argv[o.ind], "-")? gzopen(argv[o.ind], "r") : gzdopen(fileno(stdin), "r");
200 | 	ks = kseq_init(fp);
201 | 	while (kseq_read(ks) >= 0) {
202 | 		uint64_t *r;
203 | 		int i, n;
204 | 		r = sdust(0, (uint8_t*)ks->seq.s, -1, T, W, &n);
205 | 		for (i = 0; i < n; ++i)
206 | 			printf("%s\t%d\t%d\n", ks->name.s, (int)(r[i]>>32), (int)r[i]);
207 | 		free(r);
208 | 	}
209 | 	kseq_destroy(ks);
210 | 	gzclose(fp);
211 | 	return 0;
212 | }
213 | #endif
214 | 


--------------------------------------------------------------------------------
/test/t-inv.fa:
--------------------------------------------------------------------------------
  1 | >ref
  2 | TGCGGAGGCTGAAGCAACTCCATCTTGGAAGCTAATCTACCATGTTGGCTTCTGATTAAC
  3 | ATCAGTTCTGGGAAGGCTTGTAAGATTTCCTGTTTGTCTATTATTTCCTAGGTAAGAGCA
  4 | GATACTTACTGTAAATCCTGCCCCTAGATTAAACAACCTTGGTGTTATCGTACTTCCATT
  5 | GTCCTATACATCCCTTCGGAATCCCCCTTTCCCTATGGTCCTCAAGCCCTTGGTCTGGGG
  6 | AGTAACAGCATAGGGATCAACCATCTCGTCTTGCCACTGCCCGAAATACAGACATGGCTT
  7 | CTGTTCCTAAGTCCCTATTCAACTTTTCTTTCTAAGAAACTGGATTTGTCAGCCTCTTTC
  8 | TTCACCTCTCAGCTTCCTTGGACTTTGGGGGTAGGTTTGCGTAGACATGCTCACCACAGA
  9 | CACAATATCAGCTTCATTCTACAGATGAGGAAGGCAAGCCTTGGGGAGCTTAACCAACTT
 10 | GTCGAGACTCATGTATATACCAACACTGAAAAGCAGATATTCCAGACTCCCAGTCATGCC
 11 | ACAGGCACACCCCTCAGTGAGAGGTGGGGTTTGTAGTTGAGGCTATTTCCTGCCCAGGGA
 12 | GCAGGGAGGCACTCTAGCTTCCCTGAGCTAACGTGGTTCTGCTTGTGTCTGACTTCCAGG
 13 | TCTCTGCCCTTTCCAAGCTCACTAGGATGGGCTTCGGGTGTGTCAAATGCCTCAGACAGT
 14 | ACAGATCCACACAGAATGGGCATATGCAACCAATCAGTGTCATAAAAAAGAAGGAAATGA
 15 | CTCGGGCCCCCTGTGTGTTCAACATGTCGAAGGTATCTGTGCAGCAGAAGAAAGAGGGGC
 16 | AAAAGCCCCCAGTGCCACAGGCCAGAGGCAGCAGCTTGGGCCCATGTGGGAGGGTTTGCT
 17 | TTCCCCTGCCAAAGTGATGGGCTGCTGCAGCCTGGGGCTTGTGGGAATCCTTCCTGGGCC
 18 | TGTGTGGGAAGTGTAGGCAGGGAGAGTGCTGCTTTCCCAAGCTCATCCCAGCTACAGCTA
 19 | CCTTTGTGCTCTGGGATTCAGGACCCCCGAGGGGGCTGGCAGGAGAGTCTCTGTTCTCGG
 20 | ATGGGTTGTCACCAGGGCATACATGGGAAGTGGGCTCTCTGGAGTCACCCTCCAGGGGAC
 21 | AATGCCAATTCCAGACACATTTACTGGAACCCCTACACTGATGACCTTTTGTTGAGGGTT
 22 | GAATTATGTCCCCAAAAAAGATACATTGAAGTCCAAACCTCTGGTGTCTATAAATGTGAT
 23 | TTTATTTGAAAATGAGGTTTCTATGGACTAAATTGTGTCCCTCCCAAATTCATATTTTGA
 24 | AGCCCTAGCCCCCAGTGTGACTATACCTAGAGACAGAGATCTTTAGGAGGTAATTAAGGT
 25 | TCAATGAGGTCAGGTGGGTGGGGCCCTAAACCAACAGGAAGGACTGTGGCCTTACTAGAA
 26 | AAGGAAGAAAAAGCATTTCCTCTCTTCTAGTATAAAAGGACACAGAAAGAAGGCAGATAT
 27 | CTACAAGCCACGAAGAGAGACGTCACTGAGAACTGAATTTGTGTACATTGATCTGGAACT
 28 | TCCAGCCTCCAGAACTTGAGAAATACATTTCTGTTGTTTATTTTTTTTTCATGTAATCAA
 29 | TTCATTTATCATATATTTATTGAGTGCCTACTATGTGCCAGAGGATACAGCAGTAACAAA
 30 | ACTAGGCAAAAATTGTGCCTAAAAGAGGGAAGATGACTTTTCTTAAAGTGTGGAATAAAG
 31 | AAAAGTAAGATAGCGGATAGAAGCTTGAAGTGAAAGCAGGTTCACAGGAAGTTTCTTTGG
 32 | TCATTTGTTTTGTTTTTAAATAGTGGAAAGATGTATATGTTTATGGAGAAAGATTGCCTT
 33 | GAAGATGCAAGAGGAAGAGATGATCAAAATTCAAGAAGAAGCAGAAAGTGATAGAATAAA
 34 | GAGCACAAGTGGAGAATTAGTGTTAATGAAAAGAAGGATGCTTCCTTTGATATGAAGTGA
 35 | AGGAAGAGAGAATGAGTAAAGACCAAGACTTGAAGTCCCTAGTTTAATAGAGGGAGATTT
 36 | CTTCTTTTGATAGCAACAATGGTATTCTGAATTATTTGAAGACATGTCATATTTCTCTTG
 37 | TGCCATTTTCCTCCCAGTTTAAACATTCTCATAACCTCTATTCCTCACATGATGTTTTTC
 38 | CAGGTCCTTTATTCTTTGGCACTCTCTTCTCTGGACACATTGTATTCTGTCATTGGTCCT
 39 | AAAATTTAGATACCCACAATTGAACATACTCCTCTAGATATGGTCTAGCTAATGCAAAAG
 40 | AACTGCTGCCTTCCAACTTGTTCAGACATCATATGTTTGTTGTCAAACGCTAAGTTGAGT
 41 | TGTTATCTTTTAAGTTTTGTTTTTGTTTTTTTTTTTTTTTTTTAATTCCAAGAGGTGCCC
 42 | ACGTTGGCTAAGTACCAAACAGGGTACTAGGGAATTTTACTTCTGAGTTAAATGCCATTC
 43 | TAGTTGTTTTTTCTTCATCTCCAGTAAGGTTATCTTTATTCACCAGTTGTTACAATAGCT
 44 | GTGGGTCTTGCTTCTCACAGTTTTATGCTGTCTGTGCTATTTTCTCTACTGATCATCACC
 45 | ACAATCATTATTGCTTATCATAATTGTTATCTTTATTTTCTCCTTTAATCAAGAATCAGT
 46 | CTTCCTTTATCTCATTATTCTCTTTTGCAGGCTTCAGGATAATTATGGTTGGAGTGCACT
 47 | GGGGGAACCAGTGCAGCTAAGCTCTGACATCTTTGCATCCCTTTTCCATCTGCTGTTTTG
 48 | GCACTCTGGTAGAATAGATAACCTAAAAACGACTTTAAAACATCTAGAAATTTTGGATAA
 49 | AATATAACAAACATCCCTTTAAATGCACAACTGATCTTCCATGGAAGTCACAGAAATATA
 50 | TAACGCCAAAAAGAAGGGAAGCTGAAACCCAGGGCTGTAAACATGAACATCATCTTCTCT
 51 | CCCTTTTTCTTGTGACTTATCTTGTTTTTCTCAGCTTTGGTGCTACCAAGGCTTGACTTT
 52 | AATAGGCATTTCCAATCAATGAGAGAATTTCTTTTGCTTTCATCAACAATTCAGTTATTG
 53 | ATGTTAACATATATATCATTTGAGTACTTTTCTTTTTTTTATTATTATTATACTTTAAGT
 54 | TTTAGGGTCCATGTGCACAATGTGCAGGTTAGTTACGTATGTATACATGTGCCATGCTGG
 55 | TGTGCTGCACCCATTAACTCATCATTTAGCATTAGGTATATCTCCTAATGCTATCCCTTC
 56 | CCCCTCTCCCCACCCCACAACAGTCCCCAGAGTGTTCCCCTTCCTGTGTCCATGTGTTCT
 57 | CATTGTTCAATCCCCATCTATGAGTGAGAACATGCGGTGTTTGGTTTTTTGTCCTTGCAA
 58 | TAGTTTACTGAGAATGATGATTTCTAATTTCATCCATGTCCCTAAAGAGCTTCTGCACAG
 59 | CAAAAGAAACTACCATCAGAGTGAACAGGCAACCTACAAAATGGGAGAAAATTTTCACAA
 60 | CCTGCTCATCTGACAAAGGGCTAATATCCAGAATCTACAATGAACTCAAACAAATTTACA
 61 | AGAAAAAAACAAACAACCCCATCAAAAAGTGGGCAAAGGATATGAACAGACACTTCTCAA
 62 | AAGAAGACATTTATGCAGCCAAAAGACACATGAAAAAATGCTCATCATCACTGGCCATCA
 63 | GAGAAATGCAAACCAAAACCACAATGAGATACCATCTCACACCAGTTAAAATGGCAATCA
 64 | TTAAAAAGTCAGGAAACAACAGGTGCTGGAGAGGATGTGGAGAAACAGGAACACTTTTAC
 65 | ACTGTTGGTGGGACTGTAAACTAGTTCAACCATTGTGGAAGTCAGTGTGCTGATTCCTCA
 66 | GGGATCTAGAACTAGAAATACCATTTGACCCAGCCATCCCATTACTGGGTATATACCCAA
 67 | AGGACTATAAATCATGCTGCTATAAAGACACATGCACACGTATGTTTATTGCGGCACTAT
 68 | TCACAATAGCAAAGACTTGGAACCAACCCAAATGTCCAACAATGATAGACTGGATTAAGA
 69 | AAATGTGGCACATATACACCACGGAATACTGTGCAGCCATAAAAAATGATGAGTTCATGT
 70 | CCTTTGTAGGGACACGGATGAAATTGGAAATCATTTCTGTTGTTTAAACCACGAAGTCTA
 71 | TGGTATCTGGTTATGACAACCTGAGAATACTAACTCAAGGGTCTTTCGCAGATGTCATTA
 72 | AGTTGTTAAAGTGAGGTCATTATGGTGGGTCCTAATCCAAGAGAAGAGATGCATGGACAG
 73 | ACGTGCACAACGGGAGGACCAAGCCAAGACACACAGGGAGAATGGCCATGGGAAGATGGA
 74 | GGCAGAGATCAAAGTGAGGCACCCACAAGCCAAGAAATGGCAGGAGCTACCAGCAGCTGG
 75 | AAGATGCAGAGAAGCATTCCTTCTTAGAGGTTTCAGAGAGAGTATGGTGCTACTGACACC
 76 | TTGATTTTGAACTTCTAGTCTCCAGAACTATGAGAGAATAAATTTCTGTTGGTTAAGCCA
 77 | TCGAGTTTGTGTAAGTTTGTTATAAGAGCCCTAGGAAATAAACATATCCATTTATTCAGG
 78 | AAAGCCTGCTAGAGTGCAAATATTTGGAAAAGATACTACTATGCAAATGTTTGAAAAAGA
 79 | TATTGCTCTTGATTCTGCCTTATGGGTTTTTCATTTCTGTAAGCTATTCTCAAAGTTTTG
 80 | TTCTTGGACTACTATTGGTAATTAAGACTGCAACATGTTTGGCAACATCAGTTGAGAACT
 81 | GTTGCTCTGGGAACGTTTTCGGCAAGCCTCAGCCCTTCTTTTCCCTTGGCTTGCATTGAG
 82 | GAGTTAGGTGATACTCTGCTGCTCAGGCCCAGCACCTTTATGGACCGTATTCCCCTGGTG
 83 | GAATGACCATCTCTGCTTGCTCTGATTGGCTGTTGGGGTTTTCTAGCATGCCCTATTTAA
 84 | TATGTATGATTTATCTCTTACTTCAGTTGGAAGGTACAGTTGCTCTGTAGTTGGCATGCA
 85 | GTCATGGTGACTATGAAAATATAAAATAATGTTTTGGTTTACAGACACTTAGAAATAAGT
 86 | TGTGTCTCAAAATTGGGTGACTATTCTAGTTATCTGCTACTCAATATCCTTGTGCGAGCC
 87 | CTCTTTACCCAGAATCAAACTAAACCATGAGGGGCACTATAGAATGTCACCCCTGGGTCC
 88 | AGGATACTATGGGGACTCAGAAGCCAAGCTCCCACTGGGGGATCTAGGGCATGCCCCCAA
 89 | GGTAAGATTCCCACCTCTTTGTTCAGCAGGAAGCACCCATCACACAAGGAGGTAGGAATA
 90 | AACAAGCATTCGTCAAGAACAAAAGATACAGATGTTCTGCTGGAGCTTGGATACATAGCA
 91 | TAAGAGGGAACAGTTCTCACAGGTAAGAGTAAGTTTTCCTCTGGTGGTGACAGTGGGACC
 92 | TGTGGGGGAGAGAATTGGGAGTACTGACAGGAAGGCAGAGTGGCTGTCCAAATGAACGGA
 93 | TTGTTTGCACATGGCCTTTAGGGCACGTTGTGTTAGCCTTCCATTGCTGCTTATATTAGT
 94 | CTGTTTTCACACTGCCCATAAATGCATACCTGAGACTGGATAATTTATAAAGAAAAAGAG
 95 | CCTTAATGTACTCATAGTTGCATGTGGCTGGGGAGGCCTCACAATCATGGCAGAAGGTGA
 96 | AAGGCACATCTTACATGGAAGCAGACAAGAGAGAATTGAGGACCAAGTGAAAGGGGTTTC
 97 | CCCTTATAAAACCATCAGATCACATGAGACTTTTTCACCACCATGAGAACAGTAAGGGGA
 98 | AAACTATGCTCATGATTCAATTGTCTCCCACTGGATTCCTCCCACAACACATAGGAATTA
 99 | TGGGAGCTAAAATTCAAGATGAGATTTGGGTGAGGACACAGCCAAACCCTATCACTGCTG
100 | TAATCAATTCCCACCAACTTAGTGGCTCGAAACATCACAGATTTATGATCTTATGACGGT
101 | GGAGGTCCCCAAATGGATCTTCTAGGTCTAGAATCAAGGTATCAGCAGACCACTTCTTTT
102 | GGAGGCTCTGGTGGAGAAACCATTTCCTCGCCTTTTCCAGCTTCTAGAGGCTGCCCTTCT
103 | CATTCCTTGGTTCACGGCCACACTCATTTCCATCTCTGCTTCCACTGTGACAACTTCTCT
104 | GCCTCAGACCCTCCTGCTTTGCCTTTGTAAGGACCCTTGTGATGAGATCAGGCCCATCCA
105 | GGATTATCCCTCATCTCAAGACCTTTACCTTAATCACATTTGCAAGGTCTCTTCCACTGT
106 | GTCAGGTAACATTTTCACAGGTTCCAGGGATTAGGGTGTGGACATCTTGGGGAGCTGGAG
107 | GATATTATTTCATCTACCACACACATCTCTACCTTGTACAGGCAAGCACTTGCAAAGTGC
108 | AATGTGATCCTCTGGAGCCACTGTCCTCCCAGAGCTTATATATACTCTGAAAGTCAACTC
109 | TCAGACCACAGCCTCCTGTCCATGCACCACTCTCATCAACACCCCCACCCGAAACACTTT
110 | CACTCCACCCTCTTTGTCCCCTAACTCATGGAGAAGAAAATCTAATTAGTAGGAGTGGAA
111 | TTTGGCTTTCATCTTTACCAGTACTAGAAATATGGTGTGTGTCTTTTTGTAAAAATTCTC
112 | TCAACTAAATTGTTTTTATTAATTTCTGCAAAATGTGAACATCAACTCCCTTCATGTGAA
113 | TGTCAATAAGATTAAATGAGCTGTCTCAGCTCCTAGCCTGTGCAAGCTAACAGCTCAGGA
114 | GATGTTTATTTCTTTCCCTCTTCTTTCCTTAATGAAGCCCTCTCCTTTGACATCTTCAAT
115 | TCTGGAGCGCTTCTTTTCTGAGGCCTTGGCTCCCCCACATTGCCCACCCTTTTCCTGCTC
116 | GTCCACATTTCTGGCTTCTATTCTCTTGTCTTTACCATCTCCCTGAACAATGTTATCCGT
117 | TCCAATGACTTCAACAGTCTCTCCGCTTACATATGATGCCTCTCAAACTCTGATCTCCAA
118 | CTCTTCCAAAGAGCTCTGGACCTTTGTTCCAATTACCTGAAAAACATCTTCTTGGATGTC
119 | CCATTAGCACTGTTAAATCAAACAAGAATTTCCCTCCCTCCTGCCTTGCTGTAGTTCCCC
120 | TAGGGATTCGGTTGTGTGGGAAGATGTGTGGAGAGCTCTTAGTTGACTCCCTTCTCTGCA
121 | GTTCTACCTCTCTAGAGACTTGGAGGACCCACTGTTTCCGCCTCGCTTTTTCAGGCCTAG
122 | AGATTGCTCGCTCCTGGGCTGGCTGCTTCATAATTCCTTATTAGTAGTTTCCCAAGCTTA
123 | CATATCTGTAAATATTTACTTTAGTTAAATTCTCCCCAATTTCCACAATATGTTGGCTGC
124 | ACATGCTTTCTACTAGGAGTCACACAACTATGATAAGAACCAAGAAATATTAGTAAACGT
125 | TTTTTACCATTATTGGCCTATACCCTGGAATAGCCAACAATAACCTAGAACCTATGCAAC
126 | AAGAATATCCAACAAGAACCTAGAGACCTGTCAGTCTATAGGTGGGAACTACAGGATGAG
127 | A
128 | 


--------------------------------------------------------------------------------
/misc/README.md:
--------------------------------------------------------------------------------
  1 | ## <a name="started"></a>Getting Started
  2 | 
  3 | ```sh
  4 | # install minimap2
  5 | git clone https://github.com/lh3/minimap2
  6 | cd minimap2 && make
  7 | # install the k8 javascript shell
  8 | curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf -
  9 | cp k8-0.2.4/k8-`uname -s` k8              # or copy it to a directory on your $PATH
 10 | # export PATH="$PATH:`pwd`:`pwd`/misc"    # run this if k8, minimap2 or paftools.js not on your $PATH
 11 | minimap2 --cs test/MT-human.fa test/MT-orang.fa | paftools.js view -     # view alignment
 12 | minimap2 -c test/MT-human.fa test/MT-orang.fa | paftools.js stat -       # basic alignment statistics
 13 | minimap2 -c --cs test/MT-human.fa test/MT-orang.fa \
 14 |   | sort -k6,6 -k8,8n | paftools.js call -L15000 -     # calling variants from asm-to-ref alignment
 15 | minimap2 -c test/MT-human.fa test/MT-orang.fa \
 16 |   | paftools.js liftover -l10000 - <(echo -e "MT_orang\t2000\t5000")     # liftOver
 17 | # no test data for the following examples
 18 | paftools.js junceval -e anno.gtf splice.sam > out.txt  # compare splice junctions to annotations
 19 | paftools.js splice2bed anno.gtf > anno.bed             # convert GTF/GFF3 to BED12
 20 | ```
 21 | 
 22 | ## Table of Contents
 23 | 
 24 | - [Getting Started](#started)
 25 | - [Introduction](#intro)
 26 | - [Evaluation](#eval)
 27 |   - [Evaluating mapping accuracy with simulated reads](#mapeval)
 28 |   - [Evaluating read overlap sensitivity](#oveval)
 29 | - [Calling Variants from Assemblies](#asmvar)
 30 | 
 31 | ## <a name="intro"></a>Introduction
 32 | 
 33 | paftools.js is a script that processes alignments in the [PAF format][paf],
 34 | such as converting between formats, evaluating mapping accuracy, lifting over
 35 | BED files based on alignment, and calling variants from assembly-to-assembly
 36 | alignment. This script *requires* the [k8 Javascript shell][k8] to run. On
 37 | Linux or Mac, you can download the precompiled k8 binary with:
 38 | 
 39 | ```sh
 40 | curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf -
 41 | cp k8-0.2.4/k8-`uname -s` $HOME/bin/k8  # assuming $HOME/bin in your $PATH
 42 | ```
 43 | 
 44 | It is highly recommended to copy the executable `k8` to a directory on your
 45 | `$PATH` such as `/usr/bin/env` can find it. Like python scripts, once you
 46 | install `k8`, you can launch paftools.js in one of the two ways:
 47 | 
 48 | ```sh
 49 | path/to/paftools.js             # only if k8 is on your $PATH
 50 | k8 path/to/paftools.js
 51 | ```
 52 | 
 53 | In a nutshell, paftools.js has the following commands:
 54 | 
 55 | ```
 56 | Usage: paftools.js <command> [arguments]
 57 | Commands:
 58 |   view       convert PAF to BLAST-like (for eyeballing) or MAF
 59 |   splice2bed convert spliced alignment in PAF/SAM to BED12
 60 |   sam2paf    convert SAM to PAF
 61 |   delta2paf  convert MUMmer's delta to PAF
 62 |   gff2bed    convert GTF/GFF3 to BED12
 63 | 
 64 |   stat       collect basic mapping information in PAF/SAM
 65 |   liftover   simplistic liftOver
 66 |   call       call variants from asm-to-ref alignment with the cs tag
 67 |   bedcov     compute the number of bases covered
 68 | 
 69 |   mapeval    evaluate mapping accuracy using mason2/PBSIM-simulated FASTQ
 70 |   mason2fq   convert mason2-simulated SAM to FASTQ
 71 |   pbsim2fq   convert PBSIM-simulated MAF to FASTQ
 72 |   junceval   evaluate splice junction consistency with known annotations
 73 |   ov-eval    evaluate read overlap sensitivity using read-to-ref mapping
 74 | ```
 75 | 
 76 | paftools.js seamlessly reads both plain text files and gzip'd text files.
 77 | 
 78 | ## <a name="eval"></a>Evaluation
 79 | 
 80 | ### <a name="mapeval"></a>Evaluating mapping accuracy with simulated reads
 81 | 
 82 | The **pbsim2fq** command of paftools.js converts the MAF output of [pbsim][pbsim]
 83 | to FASTQ and encodes the true mapping position in the read name in a format like
 84 | `S1_33!chr1!225258409!225267761!-`. Similarly, the **mason2fq** command
 85 | converts [mason2][mason2] simulated SAM to FASTQ.
 86 | 
 87 | Command **mapeval** evaluates mapped SAM/PAF. Here is example output:
 88 | 
 89 | ```
 90 | Q       60      32478   0       0.000000000     32478
 91 | Q       22      16      1       0.000030775     32494
 92 | Q       21      43      1       0.000061468     32537
 93 | Q       19      73      1       0.000091996     32610
 94 | Q       14      66      1       0.000122414     32676
 95 | Q       10      27      3       0.000214048     32703
 96 | Q       8       14      1       0.000244521     32717
 97 | Q       7       13      2       0.000305530     32730
 98 | Q       6       46      1       0.000335611     32776
 99 | Q       3       10      1       0.000366010     32786
100 | Q       2       20      2       0.000426751     32806
101 | Q       1       248     94      0.003267381     33054
102 | Q       0       31      17      0.003778147     33085
103 | U       3
104 | ```
105 | 
106 | where each Q-line gives the quality threshold, the number of reads mapped with
107 | mapping quality equal to or greater than the threshold, number of wrong
108 | mappings, accumulative mapping error rate and the accumulative number of
109 | mapped reads. The U-line, if present, gives the number of unmapped reads if
110 | they are present in the SAM file.
111 | 
112 | Suppose the reported mapping coordinate overlap with the true coordinate like
113 | the following:
114 | 
115 | ```
116 | truth:   --------------------
117 | mapper:           ----------------------
118 |          |<- l1 ->|<-- o -->|<-- l2 -->|
119 | ```
120 | 
121 | Let `r=o/(l1+o+l2)`. The reported mapping is considered correct if `r>0.1` by
122 | default.
123 | 
124 | ### <a name="oveval"></a>Evaluating read overlap sensitivity
125 | 
126 | Command **ov-eval** takes *sorted* read-to-reference alignment and read
127 | overlaps in PAF as input, and evaluates the sensitivity. For example:
128 | 
129 | ```sh
130 | minimap2 -cx map-pb ref.fa reads.fq.gz | sort -k6,6 -k8,8n > reads-to-ref.paf
131 | minimap2 -x ava-pb reads.fq.gz reads.fq.gz > ovlp.paf
132 | k8 ov-eval.js reads-to-ref.paf ovlp.paf
133 | ```
134 | 
135 | ## <a name="asmvar"></a>Calling Variants from Haploid Assemblies
136 | 
137 | The **call** command of paftools.js calls variants from coordinate-sorted
138 | assembly-to-reference alignment. It calls variants from the [cs tag][cs] and
139 | identifies confident/callable regions as those covered by exactly one contig.
140 | Here are example command lines:
141 | 
142 | ```sh
143 | minimap2 -cx asm5 -t8 --cs ref.fa asm.fa > asm.paf  # keeping this file is recommended; --cs required!
144 | sort -k6,6 -k8,8n asm.paf > asm.srt.paf             # sort by reference start coordinate
145 | k8 paftools.js call asm.srt.paf > asm.var.txt
146 | ```
147 | 
148 | Here is sample output:
149 | 
150 | ```
151 | V   chr1    2276040 2276041 1   60  c   g   LJII01000171.1  1217409 1217410 +
152 | V   chr1    2280409 2280410 1   60  a   g   LJII01000171.1  1221778 1221779 +
153 | V   chr1    2280504 2280505 1   60  a   g   LJII01000171.1  1221873 1221874 +
154 | R   chr1    2325140 2436340
155 | V   chr1    2325287 2325287 1   60  -   ct  LJII01000171.1  1272894 1272896 +
156 | V   chr1    2325642 2325644 1   60  tt  -   LJII01000171.1  1273251 1273251 +
157 | V   chr1    2326051 2326052 1   60  c   t   LJII01000171.1  1273658 1273659 +
158 | V   chr1    2326287 2326288 1   60  c   t   LJII01000171.1  1273894 1273895 +
159 | ```
160 | 
161 | where a line starting with `R` gives regions covered by one query contig, and a
162 | V-line encodes a variant in the following format: chr, start, end, query depth,
163 | mapping quality, REF allele, ALT allele, query name, query start, end and the
164 | query orientation. Generally, you should only look at variants where column 5
165 | is one.
166 | 
167 | By default, when calling variants, "paftools.js call" ignores alignments 50kb
168 | or shorter; when deriving callable regions, it ignores alignments 10kb or
169 | shorter.  It uses two thresholds to avoid edge effects. These defaults are
170 | designed for long-read assemblies. For short reads, both should be reduced.
171 | 
172 | 
173 | 
174 | [paf]: https://github.com/lh3/miniasm/blob/master/PAF.md
175 | [cs]: https://github.com/lh3/minimap2#cs
176 | [k8]: https://github.com/attractivechaos/k8
177 | [maf]: https://genome.ucsc.edu/FAQ/FAQformat#format5
178 | [pbsim]: https://github.com/pfaucon/PBSIM-PacBio-Simulator
179 | [mason2]: https://github.com/seqan/seqan/tree/master/apps/mason2
180 | 


--------------------------------------------------------------------------------
/kalloc.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include "kalloc.h"
  5 | 
  6 | /* In kalloc, a *core* is a large chunk of contiguous memory. Each core is
  7 |  * associated with a master header, which keeps the size of the current core
  8 |  * and the pointer to next core. Kalloc allocates small *blocks* of memory from
  9 |  * the cores and organizes free memory blocks in a circular single-linked list.
 10 |  *
 11 |  * In the following diagram, "@" stands for the header of a free block (of type
 12 |  * header_t), "#" for the header of an allocated block (of type size_t), "-"
 13 |  * for free memory, and "+" for allocated memory.
 14 |  *
 15 |  * master        This region is core 1.          master           This region is core 2.
 16 |  *      |                                             |
 17 |  *      *@-------#++++++#++++++++++++@--------        *@----------#++++++++++++#+++++++@------------
 18 |  *       |                           |                 |                               |
 19 |  *       p=p->ptr->ptr->ptr->ptr     p->ptr            p->ptr->ptr                     p->ptr->ptr->ptr
 20 |  */
 21 | typedef struct header_t {
 22 | 	size_t size;
 23 | 	struct header_t *ptr;
 24 | } header_t;
 25 | 
 26 | typedef struct {
 27 | 	void *par;
 28 | 	size_t min_core_size;
 29 | 	header_t base, *loop_head, *core_head; /* base is a zero-sized block always kept in the loop */
 30 | } kmem_t;
 31 | 
 32 | static void panic(const char *s)
 33 | {
 34 | 	fprintf(stderr, "%s\n", s);
 35 | 	abort();
 36 | }
 37 | 
 38 | void *km_init2(void *km_par, size_t min_core_size)
 39 | {
 40 | 	kmem_t *km;
 41 | 	km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t));
 42 | 	km->par = km_par;
 43 | 	km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
 44 | 	return (void*)km;
 45 | }
 46 | 
 47 | void *km_init(void) { return km_init2(0, 0); }
 48 | 
 49 | void km_destroy(void *_km)
 50 | {
 51 | 	kmem_t *km = (kmem_t*)_km;
 52 | 	void *km_par;
 53 | 	header_t *p, *q;
 54 | 	if (km == NULL) return;
 55 | 	km_par = km->par;
 56 | 	for (p = km->core_head; p != NULL;) {
 57 | 		q = p->ptr;
 58 | 		kfree(km_par, p);
 59 | 		p = q;
 60 | 	}
 61 | 	kfree(km_par, km);
 62 | }
 63 | 
 64 | static header_t *morecore(kmem_t *km, size_t nu)
 65 | {
 66 | 	header_t *q;
 67 | 	size_t bytes, *p;
 68 | 	nu = (nu + 1 + (km->min_core_size - 1)) / km->min_core_size * km->min_core_size; /* the first +1 for core header */
 69 | 	bytes = nu * sizeof(header_t);
 70 | 	q = (header_t*)kmalloc(km->par, bytes);
 71 | 	if (!q) panic("[morecore] insufficient memory");
 72 | 	q->ptr = km->core_head, q->size = nu, km->core_head = q;
 73 | 	p = (size_t*)(q + 1);
 74 | 	*p = nu - 1; /* the size of the free block; -1 because the first unit is used for the core header */
 75 | 	kfree(km, p + 1); /* initialize the new "core"; NB: the core header is not looped. */
 76 | 	return km->loop_head;
 77 | }
 78 | 
 79 | void kfree(void *_km, void *ap) /* kfree() also adds a new core to the circular list */
 80 | {
 81 | 	header_t *p, *q;
 82 | 	kmem_t *km = (kmem_t*)_km;
 83 | 	
 84 | 	if (!ap) return;
 85 | 	if (km == NULL) {
 86 | 		free(ap);
 87 | 		return;
 88 | 	}
 89 | 	p = (header_t*)((size_t*)ap - 1);
 90 | 	p->size = *((size_t*)ap - 1);
 91 | 	/* Find the pointer that points to the block to be freed. The following loop can stop on two conditions:
 92 | 	 *
 93 | 	 * a) "p>q && p<q->ptr": @------#++++++++#+++++++@-------    @---------------#+++++++@-------
 94 | 	 *    (can also be in    |      |                |        -> |                       |
 95 | 	 *     two cores)        q      p           q->ptr           q                  q->ptr
 96 | 	 *
 97 | 	 *                       @--------    #+++++++++@--------    @--------    @------------------
 98 | 	 *                       |            |         |         -> |            |
 99 | 	 *                       q            p    q->ptr            q       q->ptr
100 | 	 *
101 | 	 * b) "q>=q->ptr && (p>q || p<q->ptr)":  @-------#+++++   @--------#+++++++     @-------#+++++   @----------------
102 | 	 *                                       |                |        |         -> |                |
103 | 	 *                                  q->ptr                q        p       q->ptr                q
104 | 	 *
105 | 	 *                                       #+++++++@-----   #++++++++@-------     @-------------   #++++++++@-------
106 | 	 *                                       |       |                 |         -> |                         |
107 | 	 *                                       p  q->ptr                 q       q->ptr                         q
108 | 	 */
109 | 	for (q = km->loop_head; !(p > q && p < q->ptr); q = q->ptr)
110 | 		if (q >= q->ptr && (p > q || p < q->ptr)) break;
111 | 	if (p + p->size == q->ptr) { /* two adjacent blocks, merge p and q->ptr (the 2nd and 4th cases) */
112 | 		p->size += q->ptr->size;
113 | 		p->ptr = q->ptr->ptr;
114 | 	} else if (p + p->size > q->ptr && q->ptr >= p) {
115 | 		panic("[kfree] The end of the allocated block enters a free block.");
116 | 	} else p->ptr = q->ptr; /* backup q->ptr */
117 | 
118 | 	if (q + q->size == p) { /* two adjacent blocks, merge q and p (the other two cases) */
119 | 		q->size += p->size;
120 | 		q->ptr = p->ptr;
121 | 		km->loop_head = q;
122 | 	} else if (q + q->size > p && p >= q) {
123 | 		panic("[kfree] The end of a free block enters the allocated block.");
124 | 	} else km->loop_head = p, q->ptr = p; /* in two cores, cannot be merged; create a new block in the list */
125 | }
126 | 
127 | void *kmalloc(void *_km, size_t n_bytes)
128 | {
129 | 	kmem_t *km = (kmem_t*)_km;
130 | 	size_t n_units;
131 | 	header_t *p, *q;
132 | 
133 | 	if (n_bytes == 0) return 0;
134 | 	if (km == NULL) return malloc(n_bytes);
135 | 	n_units = (n_bytes + sizeof(size_t) + sizeof(header_t) - 1) / sizeof(header_t); /* header+n_bytes requires at least this number of units */
136 | 
137 | 	if (!(q = km->loop_head)) /* the first time when kmalloc() is called, intialize it */
138 | 		q = km->loop_head = km->base.ptr = &km->base;
139 | 	for (p = q->ptr;; q = p, p = p->ptr) { /* search for a suitable block */
140 | 		if (p->size >= n_units) { /* p->size if the size of current block. This line means the current block is large enough. */
141 | 			if (p->size == n_units) q->ptr = p->ptr; /* no need to split the block */
142 | 			else { /* split the block. NB: memory is allocated at the end of the block! */
143 | 				p->size -= n_units; /* reduce the size of the free block */
144 | 				p += p->size; /* p points to the allocated block */
145 | 				*(size_t*)p = n_units; /* set the size */
146 | 			}
147 | 			km->loop_head = q; /* set the end of chain */
148 | 			return (size_t*)p + 1;
149 | 		}
150 | 		if (p == km->loop_head) { /* then ask for more "cores" */
151 | 			if ((p = morecore(km, n_units)) == 0) return 0;
152 | 		}
153 | 	}
154 | }
155 | 
156 | void *kcalloc(void *_km, size_t count, size_t size)
157 | {
158 | 	kmem_t *km = (kmem_t*)_km;
159 | 	void *p;
160 | 	if (size == 0 || count == 0) return 0;
161 | 	if (km == NULL) return calloc(count, size);
162 | 	p = kmalloc(km, count * size);
163 | 	memset(p, 0, count * size);
164 | 	return p;
165 | }
166 | 
167 | void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made more efficient in principle
168 | {
169 | 	kmem_t *km = (kmem_t*)_km;
170 | 	size_t cap, *p, *q;
171 | 
172 | 	if (n_bytes == 0) {
173 | 		kfree(km, ap); return 0;
174 | 	}
175 | 	if (km == NULL) return realloc(ap, n_bytes);
176 | 	if (ap == NULL) return kmalloc(km, n_bytes);
177 | 	p = (size_t*)ap - 1;
178 | 	cap = (*p) * sizeof(header_t) - sizeof(size_t);
179 | 	if (cap >= n_bytes) return ap; /* TODO: this prevents shrinking */
180 | 	q = (size_t*)kmalloc(km, n_bytes);
181 | 	memcpy(q, ap, cap);
182 | 	kfree(km, ap);
183 | 	return q;
184 | }
185 | 
186 | void km_stat(const void *_km, km_stat_t *s)
187 | {
188 | 	kmem_t *km = (kmem_t*)_km;
189 | 	header_t *p;
190 | 	memset(s, 0, sizeof(km_stat_t));
191 | 	if (km == NULL || km->loop_head == NULL) return;
192 | 	for (p = km->loop_head;; p = p->ptr) {
193 | 		s->available += p->size * sizeof(header_t);
194 | 		if (p->size != 0) ++s->n_blocks; /* &kmem_t::base is always one of the cores. It is zero-sized. */
195 | 		if (p->ptr > p && p + p->size > p->ptr)
196 | 			panic("[km_stat] The end of a free block enters another free block.");
197 | 		if (p->ptr == km->loop_head) break;
198 | 	}
199 | 	for (p = km->core_head; p != NULL; p = p->ptr) {
200 | 		size_t size = p->size * sizeof(header_t);
201 | 		++s->n_cores;
202 | 		s->capacity += size;
203 | 		s->largest = s->largest > size? s->largest : size;
204 | 	}
205 | 
206 |     s->meta_size = s->n_cores * sizeof(header_t);
207 | }
208 | 


--------------------------------------------------------------------------------
/gpu/planalyze.cu:
--------------------------------------------------------------------------------
  1 | /* Implement kernel performance analysis that requires extra device
  2 |  * synchornization. disabled unless DEBUG_LEVEL is set to analyze.
  3 |  * Enable individual verbose prints in planalyze.cu 
  4 |  */
  5 | #include "planalyze.cuh"
  6 | 
  7 | #ifdef DEBUG_CHECK
  8 | void planalyze_short_kernel(stream_ptr_t stream, int uid, float throughput[]){
  9 |     cudaStreamSynchronize(stream.cudastream);
 10 |     size_t total_n = stream.host_mems[uid].total_n;
 11 |     chain_read_t* reads = stream.reads;
 12 |     deviceMemPtr* dev_mem = &stream.dev_mem;
 13 |     hostMemPtr* host_mem = &stream.host_mems[uid];
 14 |     size_t cut_num = stream.host_mems[uid].cut_num;
 15 |     unsigned int num_mid_seg, num_long_seg;
 16 |     cudaMemcpy(&num_mid_seg, dev_mem->d_mid_seg_count, sizeof(unsigned int),
 17 |         cudaMemcpyDeviceToHost);
 18 |     num_long_seg = host_mem->long_segs_num[0] - (uid>0 ? stream.host_mems[uid-1].long_segs_num[0] : 0);
 19 |     cudaMemcpy(&num_long_seg, dev_mem->d_long_seg_count, sizeof(unsigned int),
 20 |         cudaMemcpyDeviceToHost);
 21 | #ifdef DEBUG_VERBOSE
 22 |     fprintf(stderr, "[DEBUG](MICROBATCH# %d) total segs: %lu, short:%lu mid: %u \n", uid, cut_num, cut_num - num_mid_seg - num_long_seg, num_mid_seg);
 23 | #endif // DEBUG_VERBOSE
 24 | 
 25 |     int32_t* range = (int32_t*)malloc(sizeof(int32_t) * total_n);
 26 |     cudaMemcpy(range, dev_mem->d_range, sizeof(int32_t) * total_n,
 27 |                 cudaMemcpyDeviceToHost);
 28 |     size_t* cut = (size_t*)malloc(sizeof(size_t) * cut_num);
 29 |     cudaMemcpy(cut, dev_mem->d_cut, sizeof(size_t) * cut_num,
 30 |                 cudaMemcpyDeviceToHost);
 31 | 
 32 |     seg_t* mid_segs = (seg_t*)malloc(sizeof(seg_t) * num_mid_seg);
 33 |     cudaMemcpy(mid_segs, dev_mem->d_mid_seg, sizeof(seg_t) * num_mid_seg,
 34 |                 cudaMemcpyDeviceToHost);
 35 | 
 36 | 
 37 |     longMemPtr* long_mem = &stream.long_mem;
 38 |     unsigned int num_aggregated_long_segs;
 39 |     cudaMemcpy(&num_aggregated_long_segs, dev_mem->d_long_seg_count, sizeof(unsigned int),
 40 |                 cudaMemcpyDeviceToHost);
 41 | #ifdef DEBUG_VERBOSE
 42 |     fprintf(stderr,
 43 |             "[DEBUG] aggreagated num of long segs %u, %u-%u belongs to this "
 44 |             "minibatch\n",
 45 |             num_aggregated_long_segs,
 46 |             uid > 0 ? stream.host_mems[uid-1].long_segs_num[0] : 0,
 47 |             num_aggregated_long_segs);
 48 | #endif // DEBUG_VERBOSE
 49 | 
 50 |     seg_t* long_segs = (seg_t*)malloc(sizeof(seg_t) * num_aggregated_long_segs);
 51 |     cudaMemcpy(long_segs, dev_mem->d_long_seg, sizeof(seg_t) * num_aggregated_long_segs,
 52 |                 cudaMemcpyDeviceToHost);
 53 | 
 54 |     size_t long_segs_total_n;
 55 |     cudaMemcpy(&long_segs_total_n, dev_mem->d_total_n_long, sizeof(size_t), cudaMemcpyDeviceToHost);
 56 |     int32_t* long_range = (int32_t*)malloc(sizeof(int32_t) * long_segs_total_n);
 57 |     cudaMemcpy(long_range, dev_mem->d_range_long, sizeof(int32_t) * long_segs_total_n, cudaMemcpyDeviceToHost);
 58 | 
 59 | // Calculate long segs total workload (sc pairs)
 60 |     size_t long_seg_sc_pairs = 0;
 61 |     for(unsigned int segid = (uid>0 ? stream.host_mems[uid-1].long_segs_num[0] : 0); segid < num_aggregated_long_segs; segid++){
 62 |         for (size_t i = long_segs[segid].start_idx; i < long_segs[segid].end_idx; i++)
 63 |             long_seg_sc_pairs += long_range[i];
 64 |     }
 65 | #ifdef DEBUG_VERBOSE
 66 |     fprintf(stderr, "[DEBUG] workload (sc pairs) in long segs: %lu\n", long_seg_sc_pairs);
 67 | #endif // DEBUG_VERBOSE
 68 | 
 69 | // Calculate total workload (sc pairs)
 70 |     size_t total_sc_pairs = 0;
 71 |     for (size_t i = 0; i < total_n; i++) {
 72 |         total_sc_pairs += range[i];
 73 |     }
 74 | 
 75 | #ifdef DEBUG_VERBOSE
 76 |     fprintf(stderr, "[DEBUG] Total workload (sc pairs) in batch: %lu. %.2f%% work are in long segs.\n", total_sc_pairs, (float)long_seg_sc_pairs/total_sc_pairs*100);
 77 | #endif // DEBUG_VERBOSE
 78 |     assert(long_seg_sc_pairs <= total_sc_pairs); 
 79 | 
 80 |     // calculate short kernel throughput
 81 |     float short_kernel_runtime_ms = 0;
 82 |     cudaEventElapsedTime(&short_kernel_runtime_ms, stream.short_kernel_start_event[uid], stream.short_kernel_stop_event[uid]);
 83 |     throughput[uid] = (total_sc_pairs - long_seg_sc_pairs) / short_kernel_runtime_ms / (float)1000;
 84 | #ifdef DEBUG_VERBOSE
 85 |     fprintf(stderr, "[DEBUG] Short Seg kernel #%d throughput: %.2f Mpairs/s\n", uid, throughput[uid]);
 86 | #endif // DEBUG_VERBOSE
 87 | 
 88 | // Check range w.r.t input (MAKE SURE INPUT RANGE EXISTS)
 89 | #if 0
 90 |     int64_t read_start = 0;
 91 |     for (int i = 0; i < dev_mem->size; i++) {
 92 | // DEBUG: print range
 93 | #if defined(DEBUG_VERBOSE) && 0
 94 |         debug_print_successor_range(range + read_start, reads[i].n);
 95 | #endif
 96 |         debug_check_range(range + read_start, input_arr[i].range, input_arr[i].n);
 97 |         read_start += reads[i].n;
 98 |     }
 99 | #endif
100 | 
101 | // DEBUG: Check voilation of cut
102 | #if defined(DEBUG_CHECK) && 0
103 |     for (int readid = 0, cid = 0, idx = 0; readid < dev_mem->size; readid++) {
104 | // DEBUG: Print cuts
105 | #if defined(DEBUG_VERBOSE) && 0
106 |     debug_print_cut(cut + cid, cut_num - cid, reads[readid].n, idx, reads[readid].seq.name);
107 | #endif
108 |     cid += debug_check_cut(cut + cid, range, cut_num - cid, reads[readid].n, idx);
109 |     idx += reads[readid].n;
110 |     }
111 | #endif
112 | 
113 | #if defined(DEBUG_VERBOSE) && 0
114 |     int32_t* ax = (int32_t*) malloc(sizeof(int32_t) * dev_mem->buffer_size_long);
115 |     cudaMemcpy(ax, dev_mem->d_ax_long, sizeof(int32_t) * dev_mem->buffer_size_long, cudaMemcpyDeviceToHost);
116 |     debug_print_segs(host_mem->long_segs, reads, host_mem->long_segs_num[0], stream.host_mems[uid].size);
117 |     debug_check_anchors(host_mem->long_segs, host_mem->long_segs_num[0], ax, host_mem->ax);
118 | #endif
119 | 
120 | //DEBUG: Calculate range distribution
121 | #if defined(DEBUG_VERBOSE) && 0
122 |         debug_cal_range_dis(total_n, cut_num, range);
123 | #endif // DEBUG_VERBOSE
124 | 
125 | // Calculate range distribution for mid segs
126 | #if defined(DEBUG_VERBOSE) && 0
127 |     for (int seg_id = 0; seg_id < num_mid_seg; seg_id++){
128 |         debug_cal_mid_range_dis(mid_segs[seg_id].end_idx - mid_segs[seg_id].start_idx, 1, range + mid_segs[seg_id].start_idx);
129 |     }
130 | #endif // DEBUG_VERBOSE
131 | 
132 | // DEBUG: Calculate workload distribution
133 | #if defined(DEBUG_VERBOSE) && 0
134 |     debug_cal_sc_pair_density(total_n, cut_num, cut, range);
135 | #endif // DEBUG_VERBOSE
136 | 
137 |     free(cut);
138 |     free(range);
139 | 
140 | }
141 | #endif 
142 | 
143 | 
144 | 
145 | 
146 | #ifdef DEBUG_CHECK
147 | 
148 | void planalyze_long_kernel(stream_ptr_t stream, float* throughput){
149 |     deviceMemPtr* dev_mem = &stream.dev_mem;
150 |     longMemPtr* long_mem = &stream.long_mem;
151 | 
152 |     unsigned int num_long_seg = long_mem->total_long_segs_num[0];
153 | #ifdef DEBUG_VERBOSE
154 |     fprintf(stderr, "[DEBUG]LONG Kernel: num of long segs %u\n", num_long_seg);
155 | #endif // DEBUG_VERBOSE
156 | 
157 | 
158 |     seg_t* long_segs = (seg_t*)malloc(sizeof(seg_t) * num_long_seg);
159 |     cudaMemcpy(long_segs, dev_mem->d_long_seg, sizeof(seg_t) * num_long_seg,
160 |                 cudaMemcpyDeviceToHost);
161 |     int32_t* long_range = (int32_t*)malloc(sizeof(int32_t) * *(long_mem->total_long_segs_n));
162 |     cudaMemcpy(long_range, dev_mem->d_range_long, sizeof(int32_t) * *(long_mem->total_long_segs_n), cudaMemcpyDeviceToHost);
163 | #ifdef DEBUG_VERBOSE
164 |     fprintf(stderr, "[DEBUG] Total n of anchors in long segs %lu\n", *long_mem->total_long_segs_n);
165 | #endif // DEBUG_VERBOSE
166 | 
167 | // Calculate total workload (sc pairs)
168 |     size_t long_seg_sc_pairs = 0;
169 |     for(size_t i = 0; i < *long_mem->total_long_segs_n; i++){
170 |         long_seg_sc_pairs += long_range[i];
171 |     }
172 | #ifdef DEBUG_VERBOSE
173 |     fprintf(stderr, "[DEBUG] workload (sc pairs) in long kernel: %lu\n", long_seg_sc_pairs);
174 | #endif // DEBUG_VERBOSE
175 | 
176 | 
177 |     // calculate long kernel throughput
178 |     float long_kernel_runtime_ms = 0;
179 |     cudaEventElapsedTime(&long_kernel_runtime_ms, stream.long_kernel_event, stream.stopevent);
180 |     float long_kernel_througput = long_seg_sc_pairs / long_kernel_runtime_ms / (float)1000;
181 | #ifdef DEBUG_VERBOSE
182 |     fprintf(stderr, "[DEBUG] Long Seg kernel throughput: %.2f Mpairs/s\n", long_kernel_througput);
183 | #endif // DEBUG_VERBOSE
184 |     throughput[score_kernel_config.micro_batch] = long_kernel_througput;
185 | 
186 | // Check range w.r.t input (MAKE SURE INPUT RANGE EXISTS)
187 | #if defined(DEBUG_VERBOSE) && 0
188 |     debug_print_successor_range(long_range, *long_mem->total_long_segs_n);
189 | #endif 
190 | 
191 | //DEBUG: Calculate range distribution
192 | #if defined(DEBUG_VERBOSE) && 0
193 |     debug_cal_long_seg_range_dis(*long_mem->total_long_segs_n, num_long_seg, long_range);
194 | #endif // DEBUG_VERBOSE
195 | 
196 |     free(long_segs);
197 |     free(long_range);
198 |     
199 | }
200 | 
201 | #endif // DEBUG_CHECK


--------------------------------------------------------------------------------
/ksw2.h:
--------------------------------------------------------------------------------
  1 | #ifndef KSW2_H_
  2 | #define KSW2_H_
  3 | 
  4 | #include <stdint.h>
  5 | 
  6 | #define KSW_NEG_INF -0x40000000
  7 | 
  8 | #define KSW_EZ_SCORE_ONLY  0x01 // don't record alignment path/cigar
  9 | #define KSW_EZ_RIGHT       0x02 // right-align gaps
 10 | #define KSW_EZ_GENERIC_SC  0x04 // without this flag: match/mismatch only; last symbol is a wildcard
 11 | #define KSW_EZ_APPROX_MAX  0x08 // approximate max; this is faster with sse
 12 | #define KSW_EZ_APPROX_DROP 0x10 // approximate Z-drop; faster with sse
 13 | #define KSW_EZ_EXTZ_ONLY   0x40 // only perform extension
 14 | #define KSW_EZ_REV_CIGAR   0x80 // reverse CIGAR in the output
 15 | #define KSW_EZ_SPLICE_FOR  0x100
 16 | #define KSW_EZ_SPLICE_REV  0x200
 17 | #define KSW_EZ_SPLICE_FLANK 0x400
 18 | 
 19 | // The subset of CIGAR operators used by ksw code.
 20 | // Use MM_CIGAR_* from minimap.h if you need the full list.
 21 | #define KSW_CIGAR_MATCH  0
 22 | #define KSW_CIGAR_INS    1
 23 | #define KSW_CIGAR_DEL    2
 24 | #define KSW_CIGAR_N_SKIP 3
 25 | 
 26 | #ifdef __cplusplus
 27 | extern "C" {
 28 | #endif
 29 | 
 30 | typedef struct {
 31 | 	uint32_t max:31, zdropped:1;
 32 | 	int max_q, max_t;      // max extension coordinate
 33 | 	int mqe, mqe_t;        // max score when reaching the end of query
 34 | 	int mte, mte_q;        // max score when reaching the end of target
 35 | 	int score;             // max score reaching both ends; may be KSW_NEG_INF
 36 | 	int m_cigar, n_cigar;
 37 | 	int reach_end;
 38 | 	uint32_t *cigar;
 39 | } ksw_extz_t;
 40 | 
 41 | /**
 42 |  * NW-like extension
 43 |  *
 44 |  * @param km        memory pool, when used with kalloc
 45 |  * @param qlen      query length
 46 |  * @param query     query sequence with 0 <= query[i] < m
 47 |  * @param tlen      target length
 48 |  * @param target    target sequence with 0 <= target[i] < m
 49 |  * @param m         number of residue types
 50 |  * @param mat       m*m scoring mattrix in one-dimension array
 51 |  * @param gapo      gap open penalty; a gap of length l cost "-(gapo+l*gape)"
 52 |  * @param gape      gap extension penalty
 53 |  * @param w         band width (<0 to disable)
 54 |  * @param zdrop     off-diagonal drop-off to stop extension (positive; <0 to disable)
 55 |  * @param flag      flag (see KSW_EZ_* macros)
 56 |  * @param ez        (out) scores and cigar
 57 |  */
 58 | void ksw_extz(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
 59 | 			  int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
 60 | 
 61 | void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
 62 | 				   int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
 63 | 
 64 | void ksw_extd(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
 65 | 			  int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez);
 66 | 
 67 | void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
 68 | 				   int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
 69 | 
 70 | void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
 71 | 				   int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez);
 72 | 
 73 | void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez);
 74 | 
 75 | /**
 76 |  * Global alignment
 77 |  *
 78 |  * (first 10 parameters identical to ksw_extz_sse())
 79 |  * @param m_cigar   (modified) max CIGAR length; feed 0 if cigar==0
 80 |  * @param n_cigar   (out) number of CIGAR elements
 81 |  * @param cigar     (out) BAM-encoded CIGAR; caller need to deallocate with kfree(km, )
 82 |  *
 83 |  * @return          score of the alignment
 84 |  */
 85 | int ksw_gg(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_);
 86 | int ksw_gg2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_);
 87 | int ksw_gg2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_);
 88 | 
 89 | void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, const int8_t *mat);
 90 | int ksw_ll_i16(void *q, int tlen, const uint8_t *target, int gapo, int gape, int *qe, int *te);
 91 | 
 92 | #ifdef __cplusplus
 93 | }
 94 | #endif
 95 | 
 96 | /************************************
 97 |  *** Private macros and functions ***
 98 |  ************************************/
 99 | 
100 | #ifdef HAVE_KALLOC
101 | #include "kalloc.h"
102 | #else
103 | #include <stdlib.h>
104 | #define kmalloc(km, size) malloc((size))
105 | #define kcalloc(km, count, size) calloc((count), (size))
106 | #define krealloc(km, ptr, size) realloc((ptr), (size))
107 | #define kfree(km, ptr) free((ptr))
108 | #endif
109 | 
110 | static inline uint32_t *ksw_push_cigar(void *km, int *n_cigar, int *m_cigar, uint32_t *cigar, uint32_t op, int len)
111 | {
112 | 	if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) {
113 | 		if (*n_cigar == *m_cigar) {
114 | 			*m_cigar = *m_cigar? (*m_cigar)<<1 : 4;
115 | 			cigar = (uint32_t*)krealloc(km, cigar, (*m_cigar) << 2);
116 | 		}
117 | 		cigar[(*n_cigar)++] = len<<4 | op;
118 | 	} else cigar[(*n_cigar)-1] += len<<4;
119 | 	return cigar;
120 | }
121 | 
122 | // In the backtrack matrix, value p[] has the following structure:
123 | //   bit 0-2: which type gets the max - 0 for H, 1 for E, 2 for F, 3 for \tilde{E} and 4 for \tilde{F}
124 | //   bit 3/0x08: 1 if a continuation on the E state (bit 5/0x20 for a continuation on \tilde{E})
125 | //   bit 4/0x10: 1 if a continuation on the F state (bit 6/0x40 for a continuation on \tilde{F})
126 | static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int min_intron_len, const uint8_t *p, const int *off, const int *off_end, int n_col, int i0, int j0,
127 | 								 int *m_cigar_, int *n_cigar_, uint32_t **cigar_)
128 | { // p[] - lower 3 bits: which type gets the max; bit
129 | 	int n_cigar = 0, m_cigar = *m_cigar_, i = i0, j = j0, r, state = 0;
130 | 	uint32_t *cigar = *cigar_, tmp;
131 | 	while (i >= 0 && j >= 0) { // at the beginning of the loop, _state_ tells us which state to check
132 | 		int force_state = -1;
133 | 		if (is_rot) {
134 | 			r = i + j;
135 | 			if (i < off[r]) force_state = 2;
136 | 			if (off_end && i > off_end[r]) force_state = 1;
137 | 			tmp = force_state < 0? p[(size_t)r * n_col + i - off[r]] : 0;
138 | 		} else {
139 | 			if (j < off[i]) force_state = 2;
140 | 			if (off_end && j > off_end[i]) force_state = 1;
141 | 			tmp = force_state < 0? p[(size_t)i * n_col + j - off[i]] : 0;
142 | 		}
143 | 		if (state == 0) state = tmp & 7; // if requesting the H state, find state one maximizes it.
144 | 		else if (!(tmp >> (state + 2) & 1)) state = 0; // if requesting other states, _state_ stays the same if it is a continuation; otherwise, set to H
145 | 		if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure
146 | 		if (force_state >= 0) state = force_state;
147 | 		if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_MATCH, 1), --i, --j;
148 | 		else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_DEL, 1), --i;
149 | 		else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_N_SKIP, 1), --i;
150 | 		else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, 1), --j;
151 | 	}
152 | 	if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, min_intron_len > 0 && i >= min_intron_len? KSW_CIGAR_N_SKIP : KSW_CIGAR_DEL, i + 1); // first deletion
153 | 	if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, j + 1); // first insertion
154 | 	if (!is_rev)
155 | 		for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
156 | 			tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
157 | 	*m_cigar_ = m_cigar, *n_cigar_ = n_cigar, *cigar_ = cigar;
158 | }
159 | 
160 | static inline void ksw_reset_extz(ksw_extz_t *ez)
161 | {
162 | 	ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1;
163 | 	ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF;
164 | 	ez->n_cigar = 0, ez->zdropped = 0, ez->reach_end = 0;
165 | }
166 | 
167 | static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a, int b, int zdrop, int8_t e)
168 | {
169 | 	int r, t;
170 | 	if (is_rot) r = a, t = b;
171 | 	else r = a + b, t = a;
172 | 	if (H > (int32_t)ez->max) {
173 | 		ez->max = H, ez->max_t = t, ez->max_q = r - t;
174 | 	} else if (t >= ez->max_t && r - t >= ez->max_q) {
175 | 		int tl = t - ez->max_t, ql = (r - t) - ez->max_q, l;
176 | 		l = tl > ql? tl - ql : ql - tl;
177 | 		if (zdrop >= 0 && ez->max - H > zdrop + l * e) {
178 | 			ez->zdropped = 1;
179 | 			return 1;
180 | 		}
181 | 	}
182 | 	return 0;
183 | }
184 | #endif
185 | 


--------------------------------------------------------------------------------