├── .ci └── install_dependencies.sh ├── .github └── workflows │ └── build.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── Singularity.def ├── src ├── CLI11.hpp ├── Makefile ├── ext │ ├── gzstream │ │ ├── COPYING.LIB │ │ ├── Makefile │ │ ├── README │ │ ├── gzstream.C │ │ ├── gzstream.h │ │ ├── index.html │ │ ├── logo.gif │ │ ├── test_gunzip.C │ │ └── test_gzip.C │ └── minimap2-2.22 │ │ ├── FAQ.md │ │ ├── LICENSE.txt │ │ ├── MANIFEST.in │ │ ├── Makefile │ │ ├── NEWS.md │ │ ├── README.md │ │ ├── align.c │ │ ├── bseq.c │ │ ├── bseq.h │ │ ├── code_of_conduct.md │ │ ├── cookbook.md │ │ ├── esterr.c │ │ ├── example.c │ │ ├── format.c │ │ ├── hit.c │ │ ├── index.c │ │ ├── kalloc.c │ │ ├── kalloc.h │ │ ├── kdq.h │ │ ├── ketopt.h │ │ ├── khash.h │ │ ├── krmq.h │ │ ├── kseq.h │ │ ├── ksort.h │ │ ├── ksw2.h │ │ ├── ksw2_dispatch.c │ │ ├── ksw2_extd2_sse.c │ │ ├── ksw2_exts2_sse.c │ │ ├── ksw2_extz2_sse.c │ │ ├── ksw2_ll_sse.c │ │ ├── kthread.c │ │ ├── kthread.h │ │ ├── kvec.h │ │ ├── lchain.c │ │ ├── main.c │ │ ├── map.c │ │ ├── minimap.h │ │ ├── minimap2.1 │ │ ├── misc.c │ │ ├── misc │ │ ├── README.md │ │ └── paftools.js │ │ ├── mmpriv.h │ │ ├── options.c │ │ ├── pe.c │ │ ├── python │ │ ├── README.rst │ │ ├── cmappy.h │ │ ├── cmappy.pxd │ │ ├── mappy.pyx │ │ └── minimap2.py │ │ ├── sdust.c │ │ ├── sdust.h │ │ ├── seed.c │ │ ├── setup.py │ │ ├── sketch.c │ │ ├── splitidx.c │ │ ├── sse2neon │ │ └── emmintrin.h │ │ └── test │ │ ├── MT-human.fa │ │ ├── MT-orang.fa │ │ ├── q-inv.fa │ │ ├── q2.fa │ │ ├── t-inv.fa │ │ └── t2.fa └── readItAndKeep.cpp └── tests ├── MN908947.3.fa ├── MN908947.3.no_poly_A.fa └── test_all.py /.ci/install_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -vexu 3 | 4 | install_root=$1 5 | 6 | apt-get update 7 | apt-get install -y software-properties-common 8 | apt-add-repository universe 9 | apt-get update 10 | 11 | apt-get install -y \ 12 | build-essential \ 13 | git \ 14 | liblzma-dev \ 15 | libbz2-dev \ 16 | python3 \ 17 | python3-pip \ 18 | python3-setuptools \ 19 | wget \ 20 | zlib1g-dev 21 | 22 | python3 -m pip install pytest pyfastaq 23 | 24 | if [ ! -d $install_root ]; then 25 | mkdir $install_root 26 | fi 27 | cd $install_root 28 | 29 | 30 | #_________________________ ART ____________________________# 31 | cd $install_root 32 | wget https://www.niehs.nih.gov/research/resources/assets/docs/artbinmountrainier2016.06.05linux64.tgz 33 | tar xf artbinmountrainier2016.06.05linux64.tgz 34 | rm artbinmountrainier2016.06.05linux64.tgz 35 | cp -s art_bin_MountRainier/art_illumina . 36 | 37 | 38 | #_________________________ badread ________________________# 39 | cd $install_root 40 | wget -q https://github.com/rrwick/Badread/archive/refs/tags/v0.2.0.tar.gz 41 | tar xf v0.2.0.tar.gz 42 | rm v0.2.0.tar.gz 43 | cd Badread-0.2.0 44 | python3 -m pip install . 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build read-it-and-keep images 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' 7 | branches: 8 | - main 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | env: 14 | REGISTRY: ghcr.io 15 | IMAGE_NAME: ${{ github.repository }} 16 | 17 | jobs: 18 | build: 19 | name: Build linux/singularity 20 | runs-on: ubuntu-20.04 21 | steps: 22 | 23 | - name: Set up Go 1.16 24 | uses: actions/setup-go@v1 25 | with: 26 | go-version: 1.16 27 | id: go 28 | 29 | - name: Install Dependencies 30 | run: | 31 | sudo apt-get update && sudo apt-get install -y \ 32 | build-essential \ 33 | libssl-dev \ 34 | uuid-dev \ 35 | libgpgme11-dev \ 36 | squashfs-tools \ 37 | libseccomp-dev \ 38 | pkg-config \ 39 | debootstrap \ 40 | debian-keyring \ 41 | debian-archive-keyring \ 42 | rsync 43 | 44 | - name: Install Singularity 45 | env: 46 | SINGULARITY_VERSION: 3.5.3 47 | GOPATH: /tmp/go 48 | run: | 49 | mkdir -p $GOPATH 50 | sudo mkdir -p /usr/local/var/singularity/mnt 51 | mkdir -p $GOPATH/src/github.com/sylabs 52 | cd $GOPATH/src/github.com/sylabs 53 | wget https://github.com/hpcng/singularity/releases/download/v${SINGULARITY_VERSION}/singularity-${SINGULARITY_VERSION}.tar.gz 54 | tar -xzf singularity-${SINGULARITY_VERSION}.tar.gz 55 | cd singularity 56 | ./mconfig -v -p /usr/local 57 | make -j `nproc 2>/dev/null || echo 1` -C ./builddir all 58 | sudo make -C ./builddir install 59 | 60 | 61 | - name: Check out code for the container build 62 | uses: actions/checkout@v2 63 | 64 | - name: Set release version if is a release 65 | if: startsWith(github.event.ref, 'refs/tags/v') 66 | run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV 67 | 68 | - name: Set release version if not a release 69 | if: false == startsWith(github.event.ref, 'refs/tags/v') 70 | run: echo "RELEASE_VERSION=test" >> $GITHUB_ENV 71 | 72 | - name: Build Singularity container 73 | env: 74 | SINGULARITY_RECIPE: Singularity.def 75 | OUTPUT_CONTAINER: readItAndKeep_${{env.RELEASE_VERSION}}.img 76 | run: | 77 | ls 78 | if [ -f "${SINGULARITY_RECIPE}" ]; then 79 | sudo -E singularity build ${OUTPUT_CONTAINER} ${SINGULARITY_RECIPE} 80 | else 81 | echo "${SINGULARITY_RECIPE} is not found." 82 | echo "Present working directory: $PWD" 83 | ls 84 | fi 85 | 86 | - name: Release 87 | if: startsWith(github.event.ref, 'refs/tags/v') 88 | uses: softprops/action-gh-release@v1 89 | with: 90 | files: readItAndKeep*.img 91 | env: 92 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 93 | 94 | - name: Log in to github container registry 95 | if: startsWith(github.event.ref, 'refs/tags/v') 96 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 97 | with: 98 | registry: ${{ env.REGISTRY }} 99 | username: ${{ github.actor }} 100 | password: ${{ secrets.GITHUB_TOKEN }} 101 | 102 | - name: Extract metadata (tags, labels) for Docker 103 | if: startsWith(github.event.ref, 'refs/tags/v') 104 | id: meta 105 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 106 | with: 107 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 108 | 109 | - name: Build and push Docker image 110 | if: startsWith(github.event.ref, 'refs/tags/v') 111 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc 112 | with: 113 | context: . 114 | push: true 115 | tags: ${{ steps.meta.outputs.tags }} 116 | labels: ${{ steps.meta.outputs.labels }} 117 | 118 | 119 | mac_build: 120 | name: Build mac os 121 | runs-on: macos-11 122 | steps: 123 | 124 | - name: Check out code for the build 125 | uses: actions/checkout@v2 126 | 127 | - name: Set up Python 128 | uses: actions/setup-python@v4 129 | with: 130 | python-version: '3.10' 131 | 132 | - name: Install dependencies 133 | run: | 134 | pip3 install pytest pyfastaq 135 | wget -q https://ftp.gnu.org/gnu/gsl/gsl-2.7.1.tar.gz 136 | tar xf gsl-2.7.1.tar.gz 137 | rm gsl-2.7.1.tar.gz 138 | cd gsl-2.7.1 139 | ./configure 140 | make 141 | make install 142 | cd .. 143 | 144 | wget -q https://www.niehs.nih.gov/research/resources/assets/docs/artsrcmountrainier2016.06.05macos.tgz 145 | tar xf artsrcmountrainier2016.06.05macos.tgz 146 | rm artsrcmountrainier2016.06.05macos.tgz 147 | cd art_src_MountRainier_MacOS 148 | export CFLAGS="$CFLAGS -I/usr/local/include" CPPFLAGS="$CPPFLAGS -I/usr/local/include" LDFLAGS="$LDFLAGS -L/usr/local/lib" 149 | ./configure 150 | make 151 | cd .. 152 | pwd 153 | 154 | wget -q https://github.com/rrwick/Badread/archive/refs/tags/v0.2.0.tar.gz 155 | tar xf v0.2.0.tar.gz 156 | rm v0.2.0.tar.gz 157 | cd Badread-0.2.0 158 | pip3 install . 159 | 160 | - name: Build and run tests 161 | run: | 162 | export PATH=$PWD/art_src_MountRainier_MacOS:$PATH 163 | cd src 164 | make 165 | make test 166 | 167 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # executables made when running make 35 | minimap2 36 | readItAndKeep 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV PATH=/bioinf-tools/:$PATH 5 | ENV LANG=C.UTF-8 6 | 7 | ARG RIK_WF_DIR=/readItAndKeep 8 | RUN mkdir -p $RIK_WF_DIR/.ci/ 9 | COPY .ci/install_dependencies.sh $RIK_WF_DIR/.ci/install_dependencies.sh 10 | RUN $RIK_WF_DIR/.ci/install_dependencies.sh /bioinf-tools 11 | 12 | COPY . $RIK_WF_DIR 13 | WORKDIR $RIK_WF_DIR 14 | RUN pip3 install tox \ 15 | && cd /readItAndKeep/src \ 16 | && make \ 17 | && make test \ 18 | && cd /bioinf-tools \ 19 | && cp -s /readItAndKeep/src/readItAndKeep . 20 | 21 | ENTRYPOINT [ "readItAndKeep" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 GPAS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Build Status](https://github.com/GlobalPathogenAnalysisService/read-it-and-keep/actions/workflows/build.yaml/badge.svg) 2 | [![DOI](https://img.shields.io/badge/DOI-10.1093/bioinformatics/btac311-blue.svg)](https://doi.org/10.1093/bioinformatics/btac311) 3 | # read-it-and-keep 4 | 5 | Read contamination removal. 6 | 7 | 8 | ## Installation 9 | ### From source 10 | Make the executable `src/readItAndKeep` by running: 11 | ``` 12 | cd src && make 13 | ``` 14 | 15 | ### Bioconda ![Platforms](https://anaconda.org/bioconda/read-it-and-keep/badges/platforms.svg) ![Versions](https://anaconda.org/bioconda/read-it-and-keep/badges/version.svg) ![Downloads](https://anaconda.org/bioconda/read-it-and-keep/badges/downloads.svg) 16 | 17 | From an existing environment: 18 | ``` 19 | conda install -c bioconda read-it-and-keep 20 | ``` 21 | Using a new environment (recommended): 22 | ``` 23 | conda create -n read-it-and-keep -c bioconda python=3 read-it-and-keep 24 | conda activate read-it-and-keep 25 | ``` 26 | 27 | ### Docker 28 | 29 | Get a Docker container of the latest release: 30 | ``` 31 | docker pull ghcr.io/globalpathogenanalysisservice/read-it-and-keep:latest 32 | ``` 33 | 34 | Alternatively, build a docker container by cloning this repository and running: 35 | ``` 36 | docker build -f Dockerfile -t . 37 | ``` 38 | 39 | ### Singularity 40 | [Releases](https://github.com/GlobalPathogenAnalysisService/read-it-and-keep/releases) 41 | include a Singularity image to download, called 42 | `readItAndKeep_vX.Y.Z.img`, where X.Y.Z is the release version. 43 | 44 | 45 | Alternatively, build a singularity container by cloning this repository and running: 46 | ``` 47 | sudo singularity build readItAndKeep.sif Singularity.def 48 | ``` 49 | 50 | 51 | ## Running on SARS-CoV-2 data 52 | 53 | If you are using `readItAndKeep` for SARS-CoV-2 reads, then we recommend that 54 | you use the reference genome MN908947.3, but with the poly-A tail removed. 55 | This is explained in the publication 56 | https://doi.org/10.1093/bioinformatics/btac311. 57 | A FASTA file of MN908947.3 without the poly-A tail is available in 58 | this repository: `tests/MN908947.3.no_poly_A.fa`. 59 | 60 | 61 | ## Usage 62 | 63 | ReadItAndKeep works by keeping the reads that match the provided target genome. 64 | To run on paired Illumina reads, in two files `reads1.fq.gz` and `reads2.fq.gz`, keeping 65 | only reads that match the genome in `ref_genome.fasta`: 66 | 67 | ``` 68 | readItAndKeep --ref_fasta ref_genome.fasta --reads1 reads1.fq.gz --reads2 reads2.fq.gz -o out 69 | ``` 70 | 71 | This will output `out.reads_1.fastq.gz` and `out.reads_2.fastq.gz`. 72 | 73 | To run on one file of nanopore reads `reads.fq.gz`: 74 | 75 | ``` 76 | readItAndKeep --tech ont --ref_fasta ref_genome.fasta --reads1 reads.fq.gz -o out 77 | ``` 78 | 79 | This will output `out.reads.fastq.gz`. 80 | 81 | If the input reads files are in FASTA format, then it will output reads in FASTA format, calling the files `*.fasta.*` instead of `*.fastq.*`. 82 | 83 | It always writes the counts of input and output reads to `STDOUT` in tab-delimited format, for example: 84 | 85 | ``` 86 | Input reads file 1 1000 87 | Input reads file 2 1000 88 | Kept reads 1 950 89 | Kept reads 2 950 90 | ``` 91 | 92 | All logging messages are sent to `STDERR`. 93 | 94 | **Required arguments:** 95 | 96 | - `--ref_fasta`: reference genome in FASTA format. 97 | - `--reads1`: at least one reads file in FASTA[.GZ] or FASTQ[.GZ] format. 98 | - `-o,--outprefix`: prefix of output files. 99 | 100 | Please note there is an option `--tech`, which defaults to `illumina`. Use `--tech ont` for nanopore reads. 101 | 102 | **Optional arguments:** 103 | 104 | - `--reads2`: name of second reads file, i.e. mates file for paired reads 105 | - `--enumerate_names`: rename the reads `1`,`2`,`3`,... (for paired reads, will also add `/1` or `/2` on the end of names) 106 | - `--debug`: debug mode. More verbose and writes debugging files 107 | - `--min_map_length`: minimum length of match required to keep a read in bp (default `50`) 108 | - `--min_map_length_pc`: minimum length of match required to keep a read, as a percent of the read length (default `50.0`) 109 | - `-V,--version`: show version and exit 110 | 111 | ### Docker 112 | Additional arguments need to be supplied to allow Docker to access input and output files. Below is a functional example: 113 | 114 | ``` 115 | docker run /path/to/read-it-and-keep/tests:/tests [-v /path/to/input:/input -v /path/to/output:/output] --ref_fasta /tests/MN908947.3.fa --reads1 /input/_1.fastq.gz --reads2 /input/_2.fastq.gz --outprefix /output/ 116 | ``` 117 | ## Tests 118 | 119 | These are under development. To run them you will need: 120 | 1. Python 3 121 | 2. Python package [pytest](https://docs.pytest.org/en/stable/) (`pip install pytest`) 122 | 3. Python package [pyfastaq](https://github.com/sanger-pathogens/Fastaq) (`pip install pyfastaq`) 123 | 4. [ART read simulator](https://www.niehs.nih.gov/research/resources/software/biostatistics/art/index.cfm) 124 | installed, so that `art_illumina` is in your `$PATH` 125 | 5. [badread](https://github.com/rrwick/Badread) for nanopore read simulation. 126 | 127 | Run the tests after compiling the source code, ie: 128 | ``` 129 | cd src 130 | make 131 | make test 132 | ``` 133 | 134 | ## Acknowledgements 135 | 136 | This repository includes unedited copies of the code from: 137 | * [gzstream](https://www.cs.unc.edu/Research/compgeom/gzstream/), [LGPL 2.1 licence](https://github.com/GlobalPathogenAnalysisService/read-it-and-keep/blob/main/src/ext/gzstream/COPYING.LIB) 138 | * [minimap2](https://github.com/lh3/minimap2), [MIT licence](https://github.com/GlobalPathogenAnalysisService/read-it-and-keep/blob/main/src/ext/minimap2-2.22/LICENSE.txt) 139 | * [CLI11](https://github.com/CLIUtils/CLI11) header file, licence is at start of [cli11.hpp](https://github.com/GlobalPathogenAnalysisService/read-it-and-keep/blob/main/src/CLI11.hpp) 140 | -------------------------------------------------------------------------------- /Singularity.def: -------------------------------------------------------------------------------- 1 | BootStrap: debootstrap 2 | OSVersion: focal 3 | MirrorURL: http://us.archive.ubuntu.com/ubuntu/ 4 | 5 | %environment 6 | export PATH=/bioinf-tools:$PATH 7 | 8 | 9 | %setup 10 | mkdir $SINGULARITY_ROOTFS/readItAndKeep 11 | rsync -a .ci/install_dependencies.sh src tests $SINGULARITY_ROOTFS/readItAndKeep 12 | 13 | 14 | %post 15 | #_____________________ setup $PATH _______________________# 16 | export PATH=/bioinf-tools/:$PATH 17 | 18 | /readItAndKeep/install_dependencies.sh /bioinf-tools 19 | cd /readItAndKeep/src 20 | make 21 | make test 22 | cd /bioinf-tools 23 | cp -s /readItAndKeep/src/readItAndKeep . 24 | 25 | %runscript 26 | readItAndKeep "$@" 27 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | EXT_DIR = ext 2 | MINIMAP2_VERSION = 2.22 3 | MINIMAP2_DIR = $(EXT_DIR)/minimap2-$(MINIMAP2_VERSION) 4 | GZSTREAM_DIR = $(EXT_DIR)/gzstream 5 | LIBGZSTREAM_A = $(GZSTREAM_DIR)/libgzstream.a 6 | LIBMINIMAP2_A = $(MINIMAP2_DIR)/libminimap2.a 7 | 8 | UNAME_P := $(shell uname -p) 9 | MINIMAP2_OPT = 10 | ifneq ($(filter arm%,$(UNAME_P)),) 11 | MINIMAP2_OPT = arm_neon=1 aarch64=1 12 | endif 13 | ifneq ($(filter aarch%,$(UNAME_P)),) 14 | MINIMAP2_OPT = arm_neon=1 aarch64=1 15 | endif 16 | 17 | readItAndKeep: $(LIBMINIMAP2_A) $(LIBGZSTREAM_A) 18 | g++ -std=c++11 -Wall -O2 -I$(MINIMAP2_DIR)/ -I$(GZSTREAM_DIR) -pthread -O2 readItAndKeep.cpp $(LIBMINIMAP2_A) $(LIBGZSTREAM_A) -lz -lm -o readItAndKeep 19 | 20 | $(LIBGZSTREAM_A) : 21 | $(MAKE) -C $(GZSTREAM_DIR) 22 | 23 | $(LIBMINIMAP2_A) : 24 | $(MAKE) -C $(MINIMAP2_DIR) $(MINIMAP2_OPT) 25 | 26 | clean: 27 | $(RM) readItAndKeep 28 | $(MAKE) -C $(MINIMAP2_DIR) clean 29 | $(RM) $(GZSTREAM_DIR)/*.o $(GZSTREAM_DIR)/*.a 30 | 31 | test: 32 | cd ../tests && pytest 33 | -------------------------------------------------------------------------------- /src/ext/gzstream/Makefile: -------------------------------------------------------------------------------- 1 | # ============================================================================ 2 | # gzstream, C++ iostream classes wrapping the zlib compression library. 3 | # Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | # ============================================================================ 19 | # 20 | # File : Makefile 21 | # Revision : $Revision: 1.3 $ 22 | # Revision_date : $Date: 2001/10/04 15:09:28 $ 23 | # Author(s) : Deepak Bandyopadhyay, Lutz Kettner 24 | # 25 | # ============================================================================ 26 | 27 | # ---------------------------------------------------------------------------- 28 | # adapt these settings to your need: 29 | # add '-DGZSTREAM_NAMESPACE=name' to CPPFLAGS to place the classes 30 | # in its own namespace. Note, this macro needs to be set while creating 31 | # the library as well while compiling applications based on it. 32 | # As an alternative, gzstream.C and gzstream.h can be edited. 33 | # ---------------------------------------------------------------------------- 34 | 35 | # CXX = CC -n32 -LANG:std # for SGI Irix 6.5, MIPSpro CC version 7.30 36 | CXX = g++ # for Linux RedHat 6.1, g++ version 2.95.2 37 | 38 | CPPFLAGS = -I. -O 39 | LDFLAGS = -L. -lgzstream -lz 40 | AR = ar cr 41 | 42 | # ---------------------------------------------------------------------------- 43 | # plain simple rules to make and cleanup the library: 44 | # make default; compiles the library 45 | # make test; compiles and executes test. O.K. message marks success. 46 | # make clean; removes temporary files 47 | # make cleanall; removes temporary files, the library, and programs 48 | # ---------------------------------------------------------------------------- 49 | 50 | default: libgzstream.a 51 | 52 | test: test_gzip test_gunzip 53 | ./test_gzip COPYING.LIB gz.tmp.gz 54 | gunzip gz.tmp.gz 55 | diff COPYING.LIB gz.tmp 56 | gzip gz.tmp 57 | ./test_gunzip gz.tmp.gz gz.tmp 58 | diff COPYING.LIB gz.tmp 59 | rm gz.tmp.gz gz.tmp 60 | # *** O.K. Test finished successfully. *** 61 | 62 | gzstream.o : gzstream.C gzstream.h 63 | ${CXX} ${CPPFLAGS} -c -o gzstream.o gzstream.C 64 | 65 | test_gzip.o : test_gzip.C gzstream.h 66 | ${CXX} ${CPPFLAGS} -c -o test_gzip.o test_gzip.C 67 | 68 | test_gunzip.o : test_gunzip.C gzstream.h 69 | ${CXX} ${CPPFLAGS} -c -o test_gunzip.o test_gunzip.C 70 | 71 | libgzstream.a : gzstream.o 72 | ${AR} libgzstream.a gzstream.o 73 | 74 | test_gzip : test_gzip.o libgzstream.a 75 | ${CXX} -o test_gzip test_gzip.o ${LDFLAGS} 76 | 77 | test_gunzip : test_gunzip.o libgzstream.a 78 | ${CXX} -o test_gunzip test_gunzip.o ${LDFLAGS} 79 | 80 | clean : 81 | rm *.o 82 | 83 | cleanall : 84 | rm *.o libgzstream.a test_gzip test_gunzip 85 | 86 | # ============================================================================ 87 | # EOF 88 | 89 | -------------------------------------------------------------------------------- /src/ext/gzstream/README: -------------------------------------------------------------------------------- 1 | 2 | gzstream 3 | C++ iostream classes wrapping the zlib compression library. 4 | =========================================================================== 5 | 6 | See index.html for documentation and installation instructions. 7 | -------------------------------------------------------------------------------- /src/ext/gzstream/gzstream.C: -------------------------------------------------------------------------------- 1 | // ============================================================================ 2 | // gzstream, C++ iostream classes wrapping the zlib compression library. 3 | // Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner 4 | // 5 | // This library is free software; you can redistribute it and/or 6 | // modify it under the terms of the GNU Lesser General Public 7 | // License as published by the Free Software Foundation; either 8 | // version 2.1 of the License, or (at your option) any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | // Lesser General Public License for more details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public 16 | // License along with this library; if not, write to the Free Software 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | // ============================================================================ 19 | // 20 | // File : gzstream.C 21 | // Revision : $Revision: 1.7 $ 22 | // Revision_date : $Date: 2003/01/08 14:41:27 $ 23 | // Author(s) : Deepak Bandyopadhyay, Lutz Kettner 24 | // 25 | // Standard streambuf implementation following Nicolai Josuttis, "The 26 | // Standard C++ Library". 27 | // ============================================================================ 28 | 29 | #include 30 | #include 31 | #include // for memcpy 32 | 33 | #ifdef GZSTREAM_NAMESPACE 34 | namespace GZSTREAM_NAMESPACE { 35 | #endif 36 | 37 | // ---------------------------------------------------------------------------- 38 | // Internal classes to implement gzstream. See header file for user classes. 39 | // ---------------------------------------------------------------------------- 40 | 41 | // -------------------------------------- 42 | // class gzstreambuf: 43 | // -------------------------------------- 44 | 45 | gzstreambuf* gzstreambuf::open( const char* name, int open_mode) { 46 | if ( is_open()) 47 | return (gzstreambuf*)0; 48 | mode = open_mode; 49 | // no append nor read/write mode 50 | if ((mode & std::ios::ate) || (mode & std::ios::app) 51 | || ((mode & std::ios::in) && (mode & std::ios::out))) 52 | return (gzstreambuf*)0; 53 | char fmode[10]; 54 | char* fmodeptr = fmode; 55 | if ( mode & std::ios::in) 56 | *fmodeptr++ = 'r'; 57 | else if ( mode & std::ios::out) 58 | *fmodeptr++ = 'w'; 59 | *fmodeptr++ = 'b'; 60 | *fmodeptr = '\0'; 61 | file = gzopen( name, fmode); 62 | if (file == 0) 63 | return (gzstreambuf*)0; 64 | opened = 1; 65 | return this; 66 | } 67 | 68 | gzstreambuf * gzstreambuf::close() { 69 | if ( is_open()) { 70 | sync(); 71 | opened = 0; 72 | if ( gzclose( file) == Z_OK) 73 | return this; 74 | } 75 | return (gzstreambuf*)0; 76 | } 77 | 78 | int gzstreambuf::underflow() { // used for input buffer only 79 | if ( gptr() && ( gptr() < egptr())) 80 | return * reinterpret_cast( gptr()); 81 | 82 | if ( ! (mode & std::ios::in) || ! opened) 83 | return EOF; 84 | // Josuttis' implementation of inbuf 85 | int n_putback = gptr() - eback(); 86 | if ( n_putback > 4) 87 | n_putback = 4; 88 | memcpy( buffer + (4 - n_putback), gptr() - n_putback, n_putback); 89 | 90 | int num = gzread( file, buffer+4, bufferSize-4); 91 | if (num <= 0) // ERROR or EOF 92 | return EOF; 93 | 94 | // reset buffer pointers 95 | setg( buffer + (4 - n_putback), // beginning of putback area 96 | buffer + 4, // read position 97 | buffer + 4 + num); // end of buffer 98 | 99 | // return next character 100 | return * reinterpret_cast( gptr()); 101 | } 102 | 103 | int gzstreambuf::flush_buffer() { 104 | // Separate the writing of the buffer from overflow() and 105 | // sync() operation. 106 | int w = pptr() - pbase(); 107 | if ( gzwrite( file, pbase(), w) != w) 108 | return EOF; 109 | pbump( -w); 110 | return w; 111 | } 112 | 113 | int gzstreambuf::overflow( int c) { // used for output buffer only 114 | if ( ! ( mode & std::ios::out) || ! opened) 115 | return EOF; 116 | if (c != EOF) { 117 | *pptr() = c; 118 | pbump(1); 119 | } 120 | if ( flush_buffer() == EOF) 121 | return EOF; 122 | return c; 123 | } 124 | 125 | int gzstreambuf::sync() { 126 | // Changed to use flush_buffer() instead of overflow( EOF) 127 | // which caused improper behavior with std::endl and flush(), 128 | // bug reported by Vincent Ricard. 129 | if ( pptr() && pptr() > pbase()) { 130 | if ( flush_buffer() == EOF) 131 | return -1; 132 | } 133 | return 0; 134 | } 135 | 136 | // -------------------------------------- 137 | // class gzstreambase: 138 | // -------------------------------------- 139 | 140 | gzstreambase::gzstreambase( const char* name, int mode) { 141 | init( &buf); 142 | open( name, mode); 143 | } 144 | 145 | gzstreambase::~gzstreambase() { 146 | buf.close(); 147 | } 148 | 149 | void gzstreambase::open( const char* name, int open_mode) { 150 | if ( ! buf.open( name, open_mode)) 151 | clear( rdstate() | std::ios::badbit); 152 | } 153 | 154 | void gzstreambase::close() { 155 | if ( buf.is_open()) 156 | if ( ! buf.close()) 157 | clear( rdstate() | std::ios::badbit); 158 | } 159 | 160 | #ifdef GZSTREAM_NAMESPACE 161 | } // namespace GZSTREAM_NAMESPACE 162 | #endif 163 | 164 | // ============================================================================ 165 | // EOF // 166 | -------------------------------------------------------------------------------- /src/ext/gzstream/gzstream.h: -------------------------------------------------------------------------------- 1 | // ============================================================================ 2 | // gzstream, C++ iostream classes wrapping the zlib compression library. 3 | // Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner 4 | // 5 | // This library is free software; you can redistribute it and/or 6 | // modify it under the terms of the GNU Lesser General Public 7 | // License as published by the Free Software Foundation; either 8 | // version 2.1 of the License, or (at your option) any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | // Lesser General Public License for more details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public 16 | // License along with this library; if not, write to the Free Software 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | // ============================================================================ 19 | // 20 | // File : gzstream.h 21 | // Revision : $Revision: 1.5 $ 22 | // Revision_date : $Date: 2002/04/26 23:30:15 $ 23 | // Author(s) : Deepak Bandyopadhyay, Lutz Kettner 24 | // 25 | // Standard streambuf implementation following Nicolai Josuttis, "The 26 | // Standard C++ Library". 27 | // ============================================================================ 28 | 29 | #ifndef GZSTREAM_H 30 | #define GZSTREAM_H 1 31 | 32 | // standard C++ with new header file names and std:: namespace 33 | #include 34 | #include 35 | #include 36 | 37 | #ifdef GZSTREAM_NAMESPACE 38 | namespace GZSTREAM_NAMESPACE { 39 | #endif 40 | 41 | // ---------------------------------------------------------------------------- 42 | // Internal classes to implement gzstream. See below for user classes. 43 | // ---------------------------------------------------------------------------- 44 | 45 | class gzstreambuf : public std::streambuf { 46 | private: 47 | static const int bufferSize = 47+256; // size of data buff 48 | // totals 512 bytes under g++ for igzstream at the end. 49 | 50 | gzFile file; // file handle for compressed file 51 | char buffer[bufferSize]; // data buffer 52 | char opened; // open/close state of stream 53 | int mode; // I/O mode 54 | 55 | int flush_buffer(); 56 | public: 57 | gzstreambuf() : opened(0) { 58 | setp( buffer, buffer + (bufferSize-1)); 59 | setg( buffer + 4, // beginning of putback area 60 | buffer + 4, // read position 61 | buffer + 4); // end position 62 | // ASSERT: both input & output capabilities will not be used together 63 | } 64 | int is_open() { return opened; } 65 | gzstreambuf* open( const char* name, int open_mode); 66 | gzstreambuf* close(); 67 | ~gzstreambuf() { close(); } 68 | 69 | virtual int overflow( int c = EOF); 70 | virtual int underflow(); 71 | virtual int sync(); 72 | }; 73 | 74 | class gzstreambase : virtual public std::ios { 75 | protected: 76 | gzstreambuf buf; 77 | public: 78 | gzstreambase() { init(&buf); } 79 | gzstreambase( const char* name, int open_mode); 80 | ~gzstreambase(); 81 | void open( const char* name, int open_mode); 82 | void close(); 83 | gzstreambuf* rdbuf() { return &buf; } 84 | }; 85 | 86 | // ---------------------------------------------------------------------------- 87 | // User classes. Use igzstream and ogzstream analogously to ifstream and 88 | // ofstream respectively. They read and write files based on the gz* 89 | // function interface of the zlib. Files are compatible with gzip compression. 90 | // ---------------------------------------------------------------------------- 91 | 92 | class igzstream : public gzstreambase, public std::istream { 93 | public: 94 | igzstream() : std::istream( &buf) {} 95 | igzstream( const char* name, int open_mode = std::ios::in) 96 | : gzstreambase( name, open_mode), std::istream( &buf) {} 97 | gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } 98 | void open( const char* name, int open_mode = std::ios::in) { 99 | gzstreambase::open( name, open_mode); 100 | } 101 | }; 102 | 103 | class ogzstream : public gzstreambase, public std::ostream { 104 | public: 105 | ogzstream() : std::ostream( &buf) {} 106 | ogzstream( const char* name, int mode = std::ios::out) 107 | : gzstreambase( name, mode), std::ostream( &buf) {} 108 | gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } 109 | void open( const char* name, int open_mode = std::ios::out) { 110 | gzstreambase::open( name, open_mode); 111 | } 112 | }; 113 | 114 | #ifdef GZSTREAM_NAMESPACE 115 | } // namespace GZSTREAM_NAMESPACE 116 | #endif 117 | 118 | #endif // GZSTREAM_H 119 | // ============================================================================ 120 | // EOF // 121 | 122 | -------------------------------------------------------------------------------- /src/ext/gzstream/index.html: -------------------------------------------------------------------------------- 1 | 2 | Gzstream Library Home Page 3 | 4 | 5 | 6 |

Gzstream Library Home Page

7 | 8 |
9 |
10 | 11 | 12 | 21 |
22 | 23 | 24 |
25 |

Introduction

26 | 27 | Gzstream is a small C++ library, basically just a wrapper, 28 | that provides the functionality of the 29 | zlib C-library in a C++ iostream. 30 | It is freely available under the LGPL license.

31 | 32 | Gzstream has been written by 33 | Deepak Bandyopadhyay and 34 | Lutz Kettner at 35 | the Computational 36 | Geometry Group at UNC Chapel Hill.

37 | 38 | 39 |


40 |

Supported Systems

41 | 42 | Gzstream requires a standard compliant C++ compiler (we use the new 43 | header file conventions and the new iostream in the std:: name space) 44 | and, of course, zlib. We used zlib 1.1.3 so far, but see the zlib home page for why you should 46 | upgrade to zlib 1.1.4. So, in theory, the provided sources could run 47 | on many platforms. However, we used only the following few 48 | platforms.

49 |

50 | 51 |

    52 |
  • PC Linux, RedHat 6.1, g++ version 2.95.2 53 |
  • PC Linux, Debian, g++ version 2.95.2 and 3.1 54 |
  • SGI Irix 6.5, MIPSpro CC version 7.30 55 |

56 | 57 | 58 |


59 |

Installation

60 | 61 | Either compile gzstream.C by hand, place it in some library, 62 | and move gzstream.h into the include search path of your 63 | compiler. Or use the provided Makefile, adapt its 64 | variables, and follow the remarks in the Makefile. Two 65 | test programs are provided, test_gzip.C and test_gunzip.C. 66 | The Makefile contains a rule that performs a small test 67 | with these programs.

68 | 69 | 70 |


71 |

Documentation

72 | 73 | The library provides two classes, igzstream and ogzstream, 74 | that can be used analogously to ifstream and ofstream 75 | respectively.

76 | 77 | The classes are by default in the global name space. This can 78 | be changed by setting the macro GZSTREAM_NAMESPACE to 79 | the desired name space, e.g., by setting the option 80 | -DGZSTREAM_NAMESPACE=gz in the Makefile. 81 | However, this needs to be consistent for both, the library compilation 82 | and the application that uses the library.

83 | 84 | 85 |


86 |

What's Missing

87 | 88 |
    89 |
  • Seek. The zlib library provides the necessary functionality, 90 | but we have not realized that in the wrapper (yet? ;-). 91 |
  • Both streams are based on the same streambuffer. So, they 92 | cannot be used to derive an iogzstream class that would allow 93 | simultaneous reading and writing to the same file. 94 |

95 | 96 | 97 |


98 |

Download and Release Notes

99 | 100 |
    101 |
  • Gzstream library 1.5 (08 Apr 2003): 102 | gzstream.tgz
    103 | Fixed bug that did not set the state correctly on failure to open or 104 | close a file.
    105 | Fixed bug in the indexing of the write buffer that 106 | caused the write buffer to shrink continously and finally caused 107 | wrong results when writing compressed files (only observed on some 108 | platforms).

    109 |

  • Gzstream library 1.4 (27 Apr 2002):
    110 | Fixed a bug that stopped stream output after calling flush() 111 | or using std::endl.

    112 |

  • Gzstream library 1.3 (06 Nov 2001):
    113 | Fixed unsigned char -- signed char bug. Increased buffer size 114 | for better performance.

    115 |

  • Gzstream library 1.2 (04 Oct 2001):
    116 | Initial release as gzstream, renamed from zipstream.

    117 |

  • Zipstream library 1.1 (09 Sep 2001):
    118 | Initial release. 119 |
120 | 121 |
122 |

Acknowledgements

123 | 124 | Credits for finding bugs and improving this software go to: 125 | Vincent Ricard, Peter Milley, Peter J. Torelli, and Ares Lagae. 126 |

127 | 128 |


129 |

Links

130 | 131 | 138 | 139 |
140 |
141 | The Computational Geometry Group at UNC Chapel Hill, Jan. 08, 2003. 142 |
143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /src/ext/gzstream/logo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GlobalPathogenAnalysisService/read-it-and-keep/68066301f91045258a0384b96eef704745c9b6f2/src/ext/gzstream/logo.gif -------------------------------------------------------------------------------- /src/ext/gzstream/test_gunzip.C: -------------------------------------------------------------------------------- 1 | // ============================================================================ 2 | // gzstream, C++ iostream classes wrapping the zlib compression library. 3 | // Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner 4 | // 5 | // This library is free software; you can redistribute it and/or 6 | // modify it under the terms of the GNU Lesser General Public 7 | // License as published by the Free Software Foundation; either 8 | // version 2.1 of the License, or (at your option) any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | // Lesser General Public License for more details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public 16 | // License along with this library; if not, write to the Free Software 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | // ============================================================================ 19 | // 20 | // File : test_gunzip.C 21 | // Revision : $Revision: 1.3 $ 22 | // Revision_date : $Date: 2001/10/04 15:09:28 $ 23 | // Author(s) : Deepak Bandyopadhyay, Lutz Kettner 24 | // 25 | // Short test program reading a file, uncompressing it, and writing it. 26 | // ============================================================================ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | int main( int argc, char*argv[]) { 34 | if ( argc != 3) { 35 | std::cerr << "Usage: " << argv[0] <<" \n"; 36 | return EXIT_FAILURE; 37 | } 38 | // check alternate way of opening file 39 | igzstream in2; 40 | in2.open( argv[1]); 41 | if ( ! in2.good()) { 42 | std::cerr << "ERROR: Opening file `" << argv[1] << "' failed.\n"; 43 | return EXIT_FAILURE; 44 | } 45 | in2.close(); 46 | if ( ! in2.good()) { 47 | std::cerr << "ERROR: Closing file `" << argv[1] << "' failed.\n"; 48 | return EXIT_FAILURE; 49 | } 50 | // now use the shorter way with the constructor to open the same file 51 | igzstream in( argv[1]); 52 | if ( ! in.good()) { 53 | std::cerr << "ERROR: Opening file `" << argv[1] << "' failed.\n"; 54 | return EXIT_FAILURE; 55 | } 56 | std::ofstream out( argv[2]); 57 | if ( ! out.good()) { 58 | std::cerr << "ERROR: Opening file `" << argv[2] << "' failed.\n"; 59 | return EXIT_FAILURE; 60 | } 61 | char c; 62 | while ( in.get(c)) 63 | out << c; 64 | in.close(); 65 | out.close(); 66 | if ( ! in.eof()) { 67 | std::cerr << "ERROR: Reading file `" << argv[1] << "' failed.\n"; 68 | return EXIT_FAILURE; 69 | } 70 | if ( ! out.good()) { 71 | std::cerr << "ERROR: Writing file `" << argv[2] << "' failed.\n"; 72 | return EXIT_FAILURE; 73 | } 74 | return EXIT_SUCCESS; 75 | } 76 | 77 | // ============================================================================ 78 | // EOF 79 | -------------------------------------------------------------------------------- /src/ext/gzstream/test_gzip.C: -------------------------------------------------------------------------------- 1 | // ============================================================================ 2 | // gzstream, C++ iostream classes wrapping the zlib compression library. 3 | // Copyright (C) 2001 Deepak Bandyopadhyay, Lutz Kettner 4 | // 5 | // This library is free software; you can redistribute it and/or 6 | // modify it under the terms of the GNU Lesser General Public 7 | // License as published by the Free Software Foundation; either 8 | // version 2.1 of the License, or (at your option) any later version. 9 | // 10 | // This library is distributed in the hope that it will be useful, 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | // Lesser General Public License for more details. 14 | // 15 | // You should have received a copy of the GNU Lesser General Public 16 | // License along with this library; if not, write to the Free Software 17 | // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 | // ============================================================================ 19 | // 20 | // File : test_gzip.C 21 | // Revision : $Revision: 1.3 $ 22 | // Revision_date : $Date: 2001/10/04 15:09:28 $ 23 | // Author(s) : Deepak Bandyopadhyay, Lutz Kettner 24 | // 25 | // Short test program reading a file, compressing it, and writing it. 26 | // ============================================================================ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | int main( int argc, char*argv[]) { 34 | if ( argc != 3) { 35 | std::cerr << "Usage: " << argv[0] <<" \n"; 36 | return EXIT_FAILURE; 37 | } 38 | // check alternate way of opening file 39 | ogzstream out2; 40 | out2.open( argv[2]); 41 | if ( ! out2.good()) { 42 | std::cerr << "ERROR: Opening file `" << argv[2] << "' failed.\n"; 43 | return EXIT_FAILURE; 44 | } 45 | out2.close(); 46 | if ( ! out2.good()) { 47 | std::cerr << "ERROR: Closing file `" << argv[2] << "' failed.\n"; 48 | return EXIT_FAILURE; 49 | } 50 | // now use the shorter way with the constructor to open the same file 51 | ogzstream out( argv[2]); 52 | if ( ! out.good()) { 53 | std::cerr << "ERROR: Opening file `" << argv[2] << "' failed.\n"; 54 | return EXIT_FAILURE; 55 | } 56 | std::ifstream in( argv[1]); 57 | if ( ! in.good()) { 58 | std::cerr << "ERROR: Opening file `" << argv[1] << "' failed.\n"; 59 | return EXIT_FAILURE; 60 | } 61 | char c; 62 | while ( in.get(c)) 63 | out << c; 64 | in.close(); 65 | out.close(); 66 | if ( ! in.eof()) { 67 | std::cerr << "ERROR: Reading file `" << argv[1] << "' failed.\n"; 68 | return EXIT_FAILURE; 69 | } 70 | if ( ! out.good()) { 71 | std::cerr << "ERROR: Writing file `" << argv[2] << "' failed.\n"; 72 | return EXIT_FAILURE; 73 | } 74 | return EXIT_SUCCESS; 75 | } 76 | 77 | // ============================================================================ 78 | // EOF 79 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/FAQ.md: -------------------------------------------------------------------------------- 1 | #### 1. Alignment different with option `-a` or `-c`? 2 | 3 | Without `-a`, `-c` or `--cs`, minimap2 only finds *approximate* mapping 4 | locations without detailed base alignment. In particular, the start and end 5 | positions of the alignment are impricise. With one of those options, minimap2 6 | will perform base alignment, which is generally more accurate but is much 7 | slower. 8 | 9 | #### 2. How to map Illumina short reads to noisy long reads? 10 | 11 | No good solutions. The better approach is to assemble short reads into contigs 12 | and then map noisy reads to contigs. 13 | 14 | #### 3. The output SAM doesn't have a header. 15 | 16 | By default, minimap2 indexes 4 billion reference bases (4Gb) in a batch and map 17 | all reads against each reference batch. Given a reference longer than 4Gb, 18 | minimap2 is unable to see all the sequences and thus can't produce a correct 19 | SAM header. In this case, minimap2 doesn't output any SAM header. There are two 20 | solutions to this issue. First, you may increase option `-I` to, for example, 21 | `-I8g` to index more reference bases in a batch. This is preferred if your 22 | machine has enough memory. Second, if your machines doesn't have enough memory 23 | to hold the reference index, you can use the `--split-prefix` option in a 24 | command line like: 25 | ```sh 26 | minimap2 -ax map-ont --split-prefix=tmp ref.fa reads.fq 27 | ``` 28 | This second approach uses less memory, but it is slower and requires temporary 29 | disk space. 30 | 31 | #### 4. The output SAM is malformatted. 32 | 33 | This typically happens when you use nohup to wrap a minimap2 command line. 34 | Nohup is discouraged as it breaks piping. If you have to use nohup, please 35 | specify an output file with option `-o`. 36 | 37 | #### 5. How to output one alignment per read? 38 | 39 | You can use `--secondary=no` to suppress secondary alignments (aka multiple 40 | mappings), but you can't suppress supplementary alignment (aka split or 41 | chimeric alignment) this way. You can use samtools to filter out these 42 | alignments: 43 | ```sh 44 | minimap2 -ax map-out ref.fa reads.fq | samtools view -F0x900 45 | ``` 46 | However, this is discouraged as supplementary alignment is informative. 47 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2018- Dana-Farber Cancer Institute 4 | 2017-2018 Broad Institute, Inc. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining 7 | a copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to 11 | permit persons to whom the Software is furnished to do so, subject to 12 | the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.h 2 | include Makefile 3 | include ksw2_dispatch.c 4 | include main.c 5 | include README.md 6 | include sse2neon/emmintrin.h 7 | include python/cmappy.h 8 | include python/cmappy.pxd 9 | include python/mappy.pyx 10 | include python/README.rst 11 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra 2 | CPPFLAGS= -DHAVE_KALLOC 3 | INCLUDES= 4 | OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o \ 5 | lchain.o align.o hit.o seed.o map.o format.o pe.o esterr.o splitidx.o \ 6 | ksw2_ll_sse.o 7 | PROG= minimap2 8 | PROG_EXTRA= sdust minimap2-lite 9 | LIBS= -lm -lz -lpthread 10 | 11 | ifeq ($(arm_neon),) # if arm_neon is not defined 12 | ifeq ($(sse2only),) # if sse2only is not defined 13 | OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o 14 | else # if sse2only is defined 15 | OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o 16 | endif 17 | else # if arm_neon is defined 18 | OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o 19 | INCLUDES+=-Isse2neon 20 | ifeq ($(aarch64),) #if aarch64 is not defined 21 | CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char 22 | else #if aarch64 is defined 23 | CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char 24 | endif 25 | endif 26 | 27 | ifneq ($(asan),) 28 | CFLAGS+=-fsanitize=address 29 | LIBS+=-fsanitize=address 30 | endif 31 | 32 | ifneq ($(tsan),) 33 | CFLAGS+=-fsanitize=thread 34 | LIBS+=-fsanitize=thread 35 | endif 36 | 37 | .PHONY:all extra clean depend 38 | .SUFFIXES:.c .o 39 | 40 | .c.o: 41 | $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@ 42 | 43 | all:$(PROG) 44 | 45 | extra:all $(PROG_EXTRA) 46 | 47 | minimap2:main.o libminimap2.a 48 | $(CC) $(CFLAGS) main.o -o $@ -L. -lminimap2 $(LIBS) 49 | 50 | minimap2-lite:example.o libminimap2.a 51 | $(CC) $(CFLAGS) $< -o $@ -L. -lminimap2 $(LIBS) 52 | 53 | libminimap2.a:$(OBJS) 54 | $(AR) -csru $@ $(OBJS) 55 | 56 | sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h 57 | $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz 58 | 59 | # SSE-specific targets on x86/x86_64 60 | 61 | ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2) 62 | ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h 63 | $(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@ 64 | endif 65 | 66 | ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h 67 | $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ 68 | 69 | ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h 70 | $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ 71 | 72 | ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h 73 | $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ 74 | 75 | ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h 76 | $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ 77 | 78 | ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h 79 | $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ 80 | 81 | ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h 82 | $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ 83 | 84 | ksw2_dispatch.o:ksw2_dispatch.c ksw2.h 85 | $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ 86 | 87 | # NEON-specific targets on ARM 88 | 89 | ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h 90 | $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@ 91 | 92 | ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h 93 | $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@ 94 | 95 | ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h 96 | $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@ 97 | 98 | # other non-file targets 99 | 100 | clean: 101 | rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM build dist mappy*.so mappy.c python/mappy.c mappy.egg* 102 | 103 | depend: 104 | (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c) 105 | 106 | # DO NOT DELETE 107 | 108 | align.o: minimap.h mmpriv.h bseq.h kseq.h ksw2.h kalloc.h 109 | bseq.o: bseq.h kvec.h kalloc.h kseq.h 110 | esterr.o: mmpriv.h minimap.h bseq.h kseq.h 111 | example.o: minimap.h kseq.h 112 | format.o: kalloc.h mmpriv.h minimap.h bseq.h kseq.h 113 | hit.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h khash.h 114 | index.o: kthread.h bseq.h minimap.h mmpriv.h kseq.h kvec.h kalloc.h khash.h 115 | index.o: ksort.h 116 | kalloc.o: kalloc.h 117 | ksw2_extd2_sse.o: ksw2.h kalloc.h 118 | ksw2_exts2_sse.o: ksw2.h kalloc.h 119 | ksw2_extz2_sse.o: ksw2.h kalloc.h 120 | ksw2_ll_sse.o: ksw2.h kalloc.h 121 | kthread.o: kthread.h 122 | lchain.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h krmq.h 123 | main.o: bseq.h minimap.h mmpriv.h kseq.h ketopt.h 124 | map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h kseq.h 125 | map.o: khash.h ksort.h 126 | misc.o: mmpriv.h minimap.h bseq.h kseq.h ksort.h 127 | options.o: mmpriv.h minimap.h bseq.h kseq.h 128 | pe.o: mmpriv.h minimap.h bseq.h kseq.h kvec.h kalloc.h ksort.h 129 | sdust.o: kalloc.h kdq.h kvec.h sdust.h 130 | seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h 131 | sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h 132 | splitidx.o: mmpriv.h minimap.h bseq.h kseq.h 133 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/bseq.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define __STDC_LIMIT_MACROS 6 | #include "bseq.h" 7 | #include "kvec.h" 8 | #include "kseq.h" 9 | KSEQ_INIT2(, gzFile, gzread) 10 | 11 | unsigned char seq_comp_table[256] = { 12 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 14 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 15 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 16 | 64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O', 17 | 'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z', 91, 92, 93, 94, 95, 18 | 96, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o', 19 | 'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127, 20 | 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 21 | 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 22 | 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 23 | 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 24 | 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 25 | 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 26 | 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 27 | 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 28 | }; 29 | 30 | #define CHECK_PAIR_THRES 1000000 31 | 32 | struct mm_bseq_file_s { 33 | gzFile fp; 34 | kseq_t *ks; 35 | mm_bseq1_t s; 36 | }; 37 | 38 | mm_bseq_file_t *mm_bseq_open(const char *fn) 39 | { 40 | mm_bseq_file_t *fp; 41 | gzFile f; 42 | f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(0, "r"); 43 | if (f == 0) return 0; 44 | fp = (mm_bseq_file_t*)calloc(1, sizeof(mm_bseq_file_t)); 45 | fp->fp = f; 46 | fp->ks = kseq_init(fp->fp); 47 | return fp; 48 | } 49 | 50 | void mm_bseq_close(mm_bseq_file_t *fp) 51 | { 52 | kseq_destroy(fp->ks); 53 | gzclose(fp->fp); 54 | free(fp); 55 | } 56 | 57 | static inline char *kstrdup(const kstring_t *s) 58 | { 59 | char *t; 60 | t = (char*)malloc(s->l + 1); 61 | memcpy(t, s->s, s->l + 1); 62 | return t; 63 | } 64 | 65 | static inline void kseq2bseq(kseq_t *ks, mm_bseq1_t *s, int with_qual, int with_comment) 66 | { 67 | int i; 68 | if (ks->name.l == 0) 69 | fprintf(stderr, "[WARNING]\033[1;31m empty sequence name in the input.\033[0m\n"); 70 | s->name = kstrdup(&ks->name); 71 | s->seq = kstrdup(&ks->seq); 72 | for (i = 0; i < (int)ks->seq.l; ++i) // convert U to T 73 | if (s->seq[i] == 'u' || s->seq[i] == 'U') 74 | --s->seq[i]; 75 | s->qual = with_qual && ks->qual.l? kstrdup(&ks->qual) : 0; 76 | s->comment = with_comment && ks->comment.l? kstrdup(&ks->comment) : 0; 77 | s->l_seq = ks->seq.l; 78 | } 79 | 80 | mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_) 81 | { 82 | int64_t size = 0; 83 | int ret; 84 | kvec_t(mm_bseq1_t) a = {0,0,0}; 85 | kseq_t *ks = fp->ks; 86 | *n_ = 0; 87 | if (fp->s.seq) { 88 | kv_resize(mm_bseq1_t, 0, a, 256); 89 | kv_push(mm_bseq1_t, 0, a, fp->s); 90 | size = fp->s.l_seq; 91 | memset(&fp->s, 0, sizeof(mm_bseq1_t)); 92 | } 93 | while ((ret = kseq_read(ks)) >= 0) { 94 | mm_bseq1_t *s; 95 | assert(ks->seq.l <= INT32_MAX); 96 | if (a.m == 0) kv_resize(mm_bseq1_t, 0, a, 256); 97 | kv_pushp(mm_bseq1_t, 0, a, &s); 98 | kseq2bseq(ks, s, with_qual, with_comment); 99 | size += s->l_seq; 100 | if (size >= chunk_size) { 101 | if (frag_mode && a.a[a.n-1].l_seq < CHECK_PAIR_THRES) { 102 | while ((ret = kseq_read(ks)) >= 0) { 103 | kseq2bseq(ks, &fp->s, with_qual, with_comment); 104 | if (mm_qname_same(fp->s.name, a.a[a.n-1].name)) { 105 | kv_push(mm_bseq1_t, 0, a, fp->s); 106 | memset(&fp->s, 0, sizeof(mm_bseq1_t)); 107 | } else break; 108 | } 109 | } 110 | break; 111 | } 112 | } 113 | if (ret < -1) { 114 | if (a.n) fprintf(stderr, "[WARNING]\033[1;31m failed to parse the FASTA/FASTQ record next to '%s'. Continue anyway.\033[0m\n", a.a[a.n-1].name); 115 | else fprintf(stderr, "[WARNING]\033[1;31m failed to parse the first FASTA/FASTQ record. Continue anyway.\033[0m\n"); 116 | } 117 | *n_ = a.n; 118 | return a.a; 119 | } 120 | 121 | mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_) 122 | { 123 | return mm_bseq_read3(fp, chunk_size, with_qual, 0, frag_mode, n_); 124 | } 125 | 126 | mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_) 127 | { 128 | return mm_bseq_read2(fp, chunk_size, with_qual, 0, n_); 129 | } 130 | 131 | mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_) 132 | { 133 | int i; 134 | int64_t size = 0; 135 | kvec_t(mm_bseq1_t) a = {0,0,0}; 136 | *n_ = 0; 137 | if (n_fp < 1) return 0; 138 | while (1) { 139 | int n_read = 0; 140 | for (i = 0; i < n_fp; ++i) 141 | if (kseq_read(fp[i]->ks) >= 0) 142 | ++n_read; 143 | if (n_read < n_fp) { 144 | if (n_read > 0) 145 | fprintf(stderr, "[W::%s]\033[1;31m query files have different number of records; extra records skipped.\033[0m\n", __func__); 146 | break; // some file reaches the end 147 | } 148 | if (a.m == 0) kv_resize(mm_bseq1_t, 0, a, 256); 149 | for (i = 0; i < n_fp; ++i) { 150 | mm_bseq1_t *s; 151 | kv_pushp(mm_bseq1_t, 0, a, &s); 152 | kseq2bseq(fp[i]->ks, s, with_qual, with_comment); 153 | size += s->l_seq; 154 | } 155 | if (size >= chunk_size) break; 156 | } 157 | *n_ = a.n; 158 | return a.a; 159 | } 160 | 161 | mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_) 162 | { 163 | return mm_bseq_read_frag2(n_fp, fp, chunk_size, with_qual, 0, n_); 164 | } 165 | 166 | int mm_bseq_eof(mm_bseq_file_t *fp) 167 | { 168 | return (ks_eof(fp->ks->f) && fp->s.seq == 0); 169 | } 170 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/bseq.h: -------------------------------------------------------------------------------- 1 | #ifndef MM_BSEQ_H 2 | #define MM_BSEQ_H 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | struct mm_bseq_file_s; 12 | typedef struct mm_bseq_file_s mm_bseq_file_t; 13 | 14 | typedef struct { 15 | int l_seq, rid; 16 | char *name, *seq, *qual, *comment; 17 | } mm_bseq1_t; 18 | 19 | mm_bseq_file_t *mm_bseq_open(const char *fn); 20 | void mm_bseq_close(mm_bseq_file_t *fp); 21 | mm_bseq1_t *mm_bseq_read3(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int with_comment, int frag_mode, int *n_); 22 | mm_bseq1_t *mm_bseq_read2(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int frag_mode, int *n_); 23 | mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int64_t chunk_size, int with_qual, int *n_); 24 | mm_bseq1_t *mm_bseq_read_frag2(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int with_comment, int *n_); 25 | mm_bseq1_t *mm_bseq_read_frag(int n_fp, mm_bseq_file_t **fp, int64_t chunk_size, int with_qual, int *n_); 26 | int mm_bseq_eof(mm_bseq_file_t *fp); 27 | 28 | extern unsigned char seq_nt4_table[256]; 29 | extern unsigned char seq_comp_table[256]; 30 | 31 | static inline int mm_qname_len(const char *s) 32 | { 33 | int l; 34 | l = strlen(s); 35 | return l >= 3 && s[l-1] >= '0' && s[l-1] <= '9' && s[l-2] == '/'? l - 2 : l; 36 | } 37 | 38 | static inline int mm_qname_same(const char *s1, const char *s2) 39 | { 40 | int l1, l2; 41 | l1 = mm_qname_len(s1); 42 | l2 = mm_qname_len(s2); 43 | return (l1 == l2 && strncmp(s1, s2, l1) == 0); 44 | } 45 | 46 | static inline void mm_revcomp_bseq(mm_bseq1_t *s) 47 | { 48 | int i, t, l = s->l_seq; 49 | for (i = 0; i < l>>1; ++i) { 50 | t = s->seq[l - i - 1]; 51 | s->seq[l - i - 1] = seq_comp_table[(uint8_t)s->seq[i]]; 52 | s->seq[i] = seq_comp_table[t]; 53 | } 54 | if (l&1) s->seq[l>>1] = seq_comp_table[(uint8_t)s->seq[l>>1]]; 55 | if (s->qual) 56 | for (i = 0; i < l>>1; ++i) 57 | t = s->qual[l - i - 1], s->qual[l - i - 1] = s->qual[i], s->qual[i] = t; 58 | } 59 | 60 | #ifdef __cplusplus 61 | } 62 | #endif 63 | 64 | #endif 65 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/code_of_conduct.md: -------------------------------------------------------------------------------- 1 | ## Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all 4 | people who contribute through reporting issues, posting feature requests, 5 | updating documentation, submitting pull requests or patches, and other 6 | activities. 7 | 8 | We are committed to making participation in this project a harassment-free 9 | experience for everyone, regardless of level of experience, gender, gender 10 | identity and expression, sexual orientation, disability, personal appearance, 11 | body size, race, age, or religion. 12 | 13 | Examples of unacceptable behavior by participants include the use of sexual 14 | language or imagery, derogatory comments or personal attacks, trolling, public 15 | or private harassment, insults, or other unprofessional conduct. 16 | 17 | Project maintainers have the right and responsibility to remove, edit, or 18 | reject comments, commits, code, wiki edits, issues, and other contributions 19 | that are not aligned to this Code of Conduct. Project maintainers or 20 | contributors who do not follow the Code of Conduct may be removed from the 21 | project team. 22 | 23 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 24 | reported by opening an issue or contacting the maintainer via email. 25 | 26 | This Code of Conduct is adapted from the [Contributor Covenant][cc], [version 27 | 1.0.0][v1]. 28 | 29 | [cc]: http://contributor-covenant.org/ 30 | [v1]: http://contributor-covenant.org/version/1/0/0/ 31 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/esterr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "mmpriv.h" 6 | 7 | static inline int32_t get_for_qpos(int32_t qlen, const mm128_t *a) 8 | { 9 | int32_t x = (int32_t)a->y; 10 | int32_t q_span = a->y>>32 & 0xff; 11 | if (a->x>>63) 12 | x = qlen - 1 - (x + 1 - q_span); // revert the position to the forward strand of query 13 | return x; 14 | } 15 | 16 | static int get_mini_idx(int qlen, const mm128_t *a, int32_t n, const uint64_t *mini_pos) 17 | { 18 | int32_t x, L = 0, R = n - 1; 19 | x = get_for_qpos(qlen, a); 20 | while (L <= R) { // binary search 21 | int32_t m = ((uint64_t)L + R) >> 1; 22 | int32_t y = (int32_t)mini_pos[m]; 23 | if (y < x) L = m + 1; 24 | else if (y > x) R = m - 1; 25 | else return m; 26 | } 27 | return -1; 28 | } 29 | 30 | void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos) 31 | { 32 | int i; 33 | uint64_t sum_k = 0; 34 | float avg_k; 35 | 36 | if (n == 0) return; 37 | for (i = 0; i < n; ++i) 38 | sum_k += mini_pos[i] >> 32 & 0xff; 39 | avg_k = (float)sum_k / n; 40 | 41 | for (i = 0; i < n_regs; ++i) { 42 | mm_reg1_t *r = ®s[i]; 43 | int32_t st, en, j, k, n_match, n_tot, l_ref; 44 | r->div = -1.0f; 45 | if (r->cnt == 0) continue; 46 | st = en = get_mini_idx(qlen, r->rev? &a[r->as + r->cnt - 1] : &a[r->as], n, mini_pos); 47 | if (st < 0) { 48 | if (mm_verbose >= 2) 49 | fprintf(stderr, "[WARNING] logic inconsistency in mm_est_err(). Please contact the developer.\n"); 50 | continue; 51 | } 52 | l_ref = mi->seq[r->rid].len; 53 | for (k = 1, j = st + 1, n_match = 1; j < n && k < r->cnt; ++j) { 54 | int32_t x; 55 | x = get_for_qpos(qlen, r->rev? &a[r->as + r->cnt - 1 - k] : &a[r->as + k]); 56 | if (x == (int32_t)mini_pos[j]) 57 | ++k, en = j, ++n_match; 58 | } 59 | n_tot = en - st + 1; 60 | if (r->qs > avg_k && r->rs > avg_k) ++n_tot; 61 | if (qlen - r->qs > avg_k && l_ref - r->re > avg_k) ++n_tot; 62 | r->div = n_match >= n_tot? 0.0f : (float)(1.0 - pow((double)n_match / n_tot, 1.0 / avg_k)); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/example.c: -------------------------------------------------------------------------------- 1 | // To compile: 2 | // gcc -g -O2 example.c libminimap2.a -lz 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "minimap.h" 9 | #include "kseq.h" 10 | KSEQ_INIT(gzFile, gzread) 11 | 12 | int main(int argc, char *argv[]) 13 | { 14 | mm_idxopt_t iopt; 15 | mm_mapopt_t mopt; 16 | int n_threads = 3; 17 | 18 | mm_verbose = 2; // disable message output to stderr 19 | mm_set_opt(0, &iopt, &mopt); 20 | mopt.flag |= MM_F_CIGAR; // perform alignment 21 | 22 | if (argc < 3) { 23 | fprintf(stderr, "Usage: minimap2-lite \n"); 24 | return 1; 25 | } 26 | 27 | // open query file for reading; you may use your favorite FASTA/Q parser 28 | gzFile f = gzopen(argv[2], "r"); 29 | assert(f); 30 | kseq_t *ks = kseq_init(f); 31 | 32 | // open index reader 33 | mm_idx_reader_t *r = mm_idx_reader_open(argv[1], &iopt, 0); 34 | mm_idx_t *mi; 35 | while ((mi = mm_idx_reader_read(r, n_threads)) != 0) { // traverse each part of the index 36 | mm_mapopt_update(&mopt, mi); // this sets the maximum minimizer occurrence; TODO: set a better default in mm_mapopt_init()! 37 | mm_tbuf_t *tbuf = mm_tbuf_init(); // thread buffer; for multi-threading, allocate one tbuf for each thread 38 | gzrewind(f); 39 | kseq_rewind(ks); 40 | while (kseq_read(ks) >= 0) { // each kseq_read() call reads one query sequence 41 | mm_reg1_t *reg; 42 | int j, i, n_reg; 43 | reg = mm_map(mi, ks->seq.l, ks->seq.s, &n_reg, tbuf, &mopt, 0); // get all hits for the query 44 | for (j = 0; j < n_reg; ++j) { // traverse hits and print them out 45 | mm_reg1_t *r = ®[j]; 46 | assert(r->p); // with MM_F_CIGAR, this should not be NULL 47 | printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]); 48 | printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\tcg:Z:", mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re, r->mlen, r->blen, r->mapq); 49 | for (i = 0; i < r->p->n_cigar; ++i) // IMPORTANT: this gives the CIGAR in the aligned regions. NO soft/hard clippings! 50 | printf("%d%c", r->p->cigar[i]>>4, MM_CIGAR_STR[r->p->cigar[i]&0xf]); 51 | putchar('\n'); 52 | free(r->p); 53 | } 54 | free(reg); 55 | } 56 | mm_tbuf_destroy(tbuf); 57 | mm_idx_destroy(mi); 58 | } 59 | mm_idx_reader_close(r); // close the index reader 60 | kseq_destroy(ks); // close the query file 61 | gzclose(f); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kalloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "kalloc.h" 5 | 6 | /* In kalloc, a *core* is a large chunk of contiguous memory. Each core is 7 | * associated with a master header, which keeps the size of the current core 8 | * and the pointer to next core. Kalloc allocates small *blocks* of memory from 9 | * the cores and organizes free memory blocks in a circular single-linked list. 10 | * 11 | * In the following diagram, "@" stands for the header of a free block (of type 12 | * header_t), "#" for the header of an allocated block (of type size_t), "-" 13 | * for free memory, and "+" for allocated memory. 14 | * 15 | * master This region is core 1. master This region is core 2. 16 | * | | 17 | * *@-------#++++++#++++++++++++@-------- *@----------#++++++++++++#+++++++@------------ 18 | * | | | | 19 | * p=p->ptr->ptr->ptr->ptr p->ptr p->ptr->ptr p->ptr->ptr->ptr 20 | */ 21 | typedef struct header_t { 22 | size_t size; 23 | struct header_t *ptr; 24 | } header_t; 25 | 26 | typedef struct { 27 | void *par; 28 | size_t min_core_size; 29 | header_t base, *loop_head, *core_head; /* base is a zero-sized block always kept in the loop */ 30 | } kmem_t; 31 | 32 | static void panic(const char *s) 33 | { 34 | fprintf(stderr, "%s\n", s); 35 | abort(); 36 | } 37 | 38 | void *km_init2(void *km_par, size_t min_core_size) 39 | { 40 | kmem_t *km; 41 | km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t)); 42 | km->par = km_par; 43 | km->min_core_size = min_core_size > 0? min_core_size : 0x80000; 44 | return (void*)km; 45 | } 46 | 47 | void *km_init(void) { return km_init2(0, 0); } 48 | 49 | void km_destroy(void *_km) 50 | { 51 | kmem_t *km = (kmem_t*)_km; 52 | void *km_par; 53 | header_t *p, *q; 54 | if (km == NULL) return; 55 | km_par = km->par; 56 | for (p = km->core_head; p != NULL;) { 57 | q = p->ptr; 58 | kfree(km_par, p); 59 | p = q; 60 | } 61 | kfree(km_par, km); 62 | } 63 | 64 | static header_t *morecore(kmem_t *km, size_t nu) 65 | { 66 | header_t *q; 67 | size_t bytes, *p; 68 | nu = (nu + 1 + (km->min_core_size - 1)) / km->min_core_size * km->min_core_size; /* the first +1 for core header */ 69 | bytes = nu * sizeof(header_t); 70 | q = (header_t*)kmalloc(km->par, bytes); 71 | if (!q) panic("[morecore] insufficient memory"); 72 | q->ptr = km->core_head, q->size = nu, km->core_head = q; 73 | p = (size_t*)(q + 1); 74 | *p = nu - 1; /* the size of the free block; -1 because the first unit is used for the core header */ 75 | kfree(km, p + 1); /* initialize the new "core"; NB: the core header is not looped. */ 76 | return km->loop_head; 77 | } 78 | 79 | void kfree(void *_km, void *ap) /* kfree() also adds a new core to the circular list */ 80 | { 81 | header_t *p, *q; 82 | kmem_t *km = (kmem_t*)_km; 83 | 84 | if (!ap) return; 85 | if (km == NULL) { 86 | free(ap); 87 | return; 88 | } 89 | p = (header_t*)((size_t*)ap - 1); 90 | p->size = *((size_t*)ap - 1); 91 | /* Find the pointer that points to the block to be freed. The following loop can stop on two conditions: 92 | * 93 | * a) "p>q && pptr": @------#++++++++#+++++++@------- @---------------#+++++++@------- 94 | * (can also be in | | | -> | | 95 | * two cores) q p q->ptr q q->ptr 96 | * 97 | * @-------- #+++++++++@-------- @-------- @------------------ 98 | * | | | -> | | 99 | * q p q->ptr q q->ptr 100 | * 101 | * b) "q>=q->ptr && (p>q || pptr)": @-------#+++++ @--------#+++++++ @-------#+++++ @---------------- 102 | * | | | -> | | 103 | * q->ptr q p q->ptr q 104 | * 105 | * #+++++++@----- #++++++++@------- @------------- #++++++++@------- 106 | * | | | -> | | 107 | * p q->ptr q q->ptr q 108 | */ 109 | for (q = km->loop_head; !(p > q && p < q->ptr); q = q->ptr) 110 | if (q >= q->ptr && (p > q || p < q->ptr)) break; 111 | if (p + p->size == q->ptr) { /* two adjacent blocks, merge p and q->ptr (the 2nd and 4th cases) */ 112 | p->size += q->ptr->size; 113 | p->ptr = q->ptr->ptr; 114 | } else if (p + p->size > q->ptr && q->ptr >= p) { 115 | panic("[kfree] The end of the allocated block enters a free block."); 116 | } else p->ptr = q->ptr; /* backup q->ptr */ 117 | 118 | if (q + q->size == p) { /* two adjacent blocks, merge q and p (the other two cases) */ 119 | q->size += p->size; 120 | q->ptr = p->ptr; 121 | km->loop_head = q; 122 | } else if (q + q->size > p && p >= q) { 123 | panic("[kfree] The end of a free block enters the allocated block."); 124 | } else km->loop_head = p, q->ptr = p; /* in two cores, cannot be merged; create a new block in the list */ 125 | } 126 | 127 | void *kmalloc(void *_km, size_t n_bytes) 128 | { 129 | kmem_t *km = (kmem_t*)_km; 130 | size_t n_units; 131 | header_t *p, *q; 132 | 133 | if (n_bytes == 0) return 0; 134 | if (km == NULL) return malloc(n_bytes); 135 | n_units = (n_bytes + sizeof(size_t) + sizeof(header_t) - 1) / sizeof(header_t); /* header+n_bytes requires at least this number of units */ 136 | 137 | if (!(q = km->loop_head)) /* the first time when kmalloc() is called, intialize it */ 138 | q = km->loop_head = km->base.ptr = &km->base; 139 | for (p = q->ptr;; q = p, p = p->ptr) { /* search for a suitable block */ 140 | if (p->size >= n_units) { /* p->size if the size of current block. This line means the current block is large enough. */ 141 | if (p->size == n_units) q->ptr = p->ptr; /* no need to split the block */ 142 | else { /* split the block. NB: memory is allocated at the end of the block! */ 143 | p->size -= n_units; /* reduce the size of the free block */ 144 | p += p->size; /* p points to the allocated block */ 145 | *(size_t*)p = n_units; /* set the size */ 146 | } 147 | km->loop_head = q; /* set the end of chain */ 148 | return (size_t*)p + 1; 149 | } 150 | if (p == km->loop_head) { /* then ask for more "cores" */ 151 | if ((p = morecore(km, n_units)) == 0) return 0; 152 | } 153 | } 154 | } 155 | 156 | void *kcalloc(void *_km, size_t count, size_t size) 157 | { 158 | kmem_t *km = (kmem_t*)_km; 159 | void *p; 160 | if (size == 0 || count == 0) return 0; 161 | if (km == NULL) return calloc(count, size); 162 | p = kmalloc(km, count * size); 163 | memset(p, 0, count * size); 164 | return p; 165 | } 166 | 167 | void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made more efficient in principle 168 | { 169 | kmem_t *km = (kmem_t*)_km; 170 | size_t cap, *p, *q; 171 | 172 | if (n_bytes == 0) { 173 | kfree(km, ap); return 0; 174 | } 175 | if (km == NULL) return realloc(ap, n_bytes); 176 | if (ap == NULL) return kmalloc(km, n_bytes); 177 | p = (size_t*)ap - 1; 178 | cap = (*p) * sizeof(header_t) - sizeof(size_t); 179 | if (cap >= n_bytes) return ap; /* TODO: this prevents shrinking */ 180 | q = (size_t*)kmalloc(km, n_bytes); 181 | memcpy(q, ap, cap); 182 | kfree(km, ap); 183 | return q; 184 | } 185 | 186 | void km_stat(const void *_km, km_stat_t *s) 187 | { 188 | kmem_t *km = (kmem_t*)_km; 189 | header_t *p; 190 | memset(s, 0, sizeof(km_stat_t)); 191 | if (km == NULL || km->loop_head == NULL) return; 192 | for (p = km->loop_head;; p = p->ptr) { 193 | s->available += p->size * sizeof(header_t); 194 | if (p->size != 0) ++s->n_blocks; /* &kmem_t::base is always one of the cores. It is zero-sized. */ 195 | if (p->ptr > p && p + p->size > p->ptr) 196 | panic("[km_stat] The end of a free block enters another free block."); 197 | if (p->ptr == km->loop_head) break; 198 | } 199 | for (p = km->core_head; p != NULL; p = p->ptr) { 200 | size_t size = p->size * sizeof(header_t); 201 | ++s->n_cores; 202 | s->capacity += size; 203 | s->largest = s->largest > size? s->largest : size; 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kalloc.h: -------------------------------------------------------------------------------- 1 | #ifndef _KALLOC_H_ 2 | #define _KALLOC_H_ 3 | 4 | #include /* for size_t */ 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | size_t capacity, available, n_blocks, n_cores, largest; 12 | } km_stat_t; 13 | 14 | void *kmalloc(void *km, size_t size); 15 | void *krealloc(void *km, void *ptr, size_t size); 16 | void *kcalloc(void *km, size_t count, size_t size); 17 | void kfree(void *km, void *ptr); 18 | 19 | void *km_init(void); 20 | void *km_init2(void *km_par, size_t min_core_size); 21 | void km_destroy(void *km); 22 | void km_stat(const void *_km, km_stat_t *s); 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | 28 | #define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr)))) 29 | #define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr)))) 30 | #define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr)))) 31 | 32 | #define KEXPAND(km, a, m) do { \ 33 | (m) = (m) >= 4? (m) + ((m)>>1) : 16; \ 34 | KREALLOC((km), (a), (m)); \ 35 | } while (0) 36 | 37 | #ifndef klib_unused 38 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 39 | #define klib_unused __attribute__ ((__unused__)) 40 | #else 41 | #define klib_unused 42 | #endif 43 | #endif /* klib_unused */ 44 | 45 | #define KALLOC_POOL_INIT2(SCOPE, name, kmptype_t) \ 46 | typedef struct { \ 47 | size_t cnt, n, max; \ 48 | kmptype_t **buf; \ 49 | void *km; \ 50 | } kmp_##name##_t; \ 51 | SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \ 52 | kmp_##name##_t *mp; \ 53 | KCALLOC(km, mp, 1); \ 54 | mp->km = km; \ 55 | return mp; \ 56 | } \ 57 | SCOPE void kmp_destroy_##name(kmp_##name##_t *mp) { \ 58 | size_t k; \ 59 | for (k = 0; k < mp->n; ++k) kfree(mp->km, mp->buf[k]); \ 60 | kfree(mp->km, mp->buf); kfree(mp->km, mp); \ 61 | } \ 62 | SCOPE kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ 63 | ++mp->cnt; \ 64 | if (mp->n == 0) return (kmptype_t*)kcalloc(mp->km, 1, sizeof(kmptype_t)); \ 65 | return mp->buf[--mp->n]; \ 66 | } \ 67 | SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ 68 | --mp->cnt; \ 69 | if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \ 70 | mp->buf[mp->n++] = p; \ 71 | } 72 | 73 | #define KALLOC_POOL_INIT(name, kmptype_t) \ 74 | KALLOC_POOL_INIT2(static inline klib_unused, name, kmptype_t) 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kdq.h: -------------------------------------------------------------------------------- 1 | #ifndef __AC_KDQ_H 2 | #define __AC_KDQ_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "kalloc.h" 8 | 9 | #define __KDQ_TYPE(type) \ 10 | typedef struct { \ 11 | uint64_t front:58, bits:6, count, mask; \ 12 | type *a; \ 13 | void *km; \ 14 | } kdq_##type##_t; 15 | 16 | #define kdq_t(type) kdq_##type##_t 17 | #define kdq_size(q) ((q)->count) 18 | #define kdq_first(q) ((q)->a[(q)->front]) 19 | #define kdq_last(q) ((q)->a[((q)->front + (q)->count - 1) & (q)->mask]) 20 | #define kdq_at(q, i) ((q)->a[((q)->front + (i)) & (q)->mask]) 21 | 22 | #define __KDQ_IMPL(type, SCOPE) \ 23 | SCOPE kdq_##type##_t *kdq_init_##type(void *km) \ 24 | { \ 25 | kdq_##type##_t *q; \ 26 | q = (kdq_##type##_t*)kcalloc(km, 1, sizeof(kdq_##type##_t)); \ 27 | q->bits = 2, q->mask = (1ULL<bits) - 1; \ 28 | q->a = (type*)kmalloc(km, (1<bits) * sizeof(type)); \ 29 | q->km = km; \ 30 | return q; \ 31 | } \ 32 | SCOPE void kdq_destroy_##type(kdq_##type##_t *q) \ 33 | { \ 34 | if (q == 0) return; \ 35 | kfree(q->km, q->a); kfree(q->km, q); \ 36 | } \ 37 | SCOPE int kdq_resize_##type(kdq_##type##_t *q, int new_bits) \ 38 | { \ 39 | size_t new_size = 1ULL<bits; \ 40 | if (new_size < q->count) { /* not big enough */ \ 41 | int i; \ 42 | for (i = 0; i < 64; ++i) \ 43 | if (1ULL< q->count) break; \ 44 | new_bits = i, new_size = 1ULL<bits) return q->bits; /* unchanged */ \ 47 | if (new_bits > q->bits) q->a = (type*)krealloc(q->km, q->a, (1ULL<front + q->count <= old_size) { /* unwrapped */ \ 49 | if (q->front + q->count > new_size) /* only happens for shrinking */ \ 50 | memmove(q->a, q->a + new_size, (q->front + q->count - new_size) * sizeof(type)); \ 51 | } else { /* wrapped */ \ 52 | memmove(q->a + (new_size - (old_size - q->front)), q->a + q->front, (old_size - q->front) * sizeof(type)); \ 53 | q->front = new_size - (old_size - q->front); \ 54 | } \ 55 | q->bits = new_bits, q->mask = (1ULL<bits) - 1; \ 56 | if (new_bits < q->bits) q->a = (type*)krealloc(q->km, q->a, (1ULL<bits; \ 58 | } \ 59 | SCOPE type *kdq_pushp_##type(kdq_##type##_t *q) \ 60 | { \ 61 | if (q->count == 1ULL<bits) kdq_resize_##type(q, q->bits + 1); \ 62 | return &q->a[((q->count++) + q->front) & (q)->mask]; \ 63 | } \ 64 | SCOPE void kdq_push_##type(kdq_##type##_t *q, type v) \ 65 | { \ 66 | if (q->count == 1ULL<bits) kdq_resize_##type(q, q->bits + 1); \ 67 | q->a[((q->count++) + q->front) & (q)->mask] = v; \ 68 | } \ 69 | SCOPE type *kdq_unshiftp_##type(kdq_##type##_t *q) \ 70 | { \ 71 | if (q->count == 1ULL<bits) kdq_resize_##type(q, q->bits + 1); \ 72 | ++q->count; \ 73 | q->front = q->front? q->front - 1 : (1ULL<bits) - 1; \ 74 | return &q->a[q->front]; \ 75 | } \ 76 | SCOPE void kdq_unshift_##type(kdq_##type##_t *q, type v) \ 77 | { \ 78 | type *p; \ 79 | p = kdq_unshiftp_##type(q); \ 80 | *p = v; \ 81 | } \ 82 | SCOPE type *kdq_pop_##type(kdq_##type##_t *q) \ 83 | { \ 84 | return q->count? &q->a[((--q->count) + q->front) & q->mask] : 0; \ 85 | } \ 86 | SCOPE type *kdq_shift_##type(kdq_##type##_t *q) \ 87 | { \ 88 | type *d = 0; \ 89 | if (q->count == 0) return 0; \ 90 | d = &q->a[q->front++]; \ 91 | q->front &= q->mask; \ 92 | --q->count; \ 93 | return d; \ 94 | } 95 | 96 | #define KDQ_INIT2(type, SCOPE) \ 97 | __KDQ_TYPE(type) \ 98 | __KDQ_IMPL(type, SCOPE) 99 | 100 | #ifndef klib_unused 101 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 102 | #define klib_unused __attribute__ ((__unused__)) 103 | #else 104 | #define klib_unused 105 | #endif 106 | #endif /* klib_unused */ 107 | 108 | #define KDQ_INIT(type) KDQ_INIT2(type, static inline klib_unused) 109 | 110 | #define KDQ_DECLARE(type) \ 111 | __KDQ_TYPE(type) \ 112 | kdq_##type##_t *kdq_init_##type(); \ 113 | void kdq_destroy_##type(kdq_##type##_t *q); \ 114 | int kdq_resize_##type(kdq_##type##_t *q, int new_bits); \ 115 | type *kdq_pushp_##type(kdq_##type##_t *q); \ 116 | void kdq_push_##type(kdq_##type##_t *q, type v); \ 117 | type *kdq_unshiftp_##type(kdq_##type##_t *q); \ 118 | void kdq_unshift_##type(kdq_##type##_t *q, type v); \ 119 | type *kdq_pop_##type(kdq_##type##_t *q); \ 120 | type *kdq_shift_##type(kdq_##type##_t *q); 121 | 122 | #define kdq_init(type, km) kdq_init_##type(km) 123 | #define kdq_destroy(type, q) kdq_destroy_##type(q) 124 | #define kdq_resize(type, q, new_bits) kdq_resize_##type(q, new_bits) 125 | #define kdq_pushp(type, q) kdq_pushp_##type(q) 126 | #define kdq_push(type, q, v) kdq_push_##type(q, v) 127 | #define kdq_pop(type, q) kdq_pop_##type(q) 128 | #define kdq_unshiftp(type, q) kdq_unshiftp_##type(q) 129 | #define kdq_unshift(type, q, v) kdq_unshift_##type(q, v) 130 | #define kdq_shift(type, q) kdq_shift_##type(q) 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/ketopt.h: -------------------------------------------------------------------------------- 1 | #ifndef KETOPT_H 2 | #define KETOPT_H 3 | 4 | #include /* for strchr() and strncmp() */ 5 | 6 | #define ko_no_argument 0 7 | #define ko_required_argument 1 8 | #define ko_optional_argument 2 9 | 10 | typedef struct { 11 | int ind; /* equivalent to optind */ 12 | int opt; /* equivalent to optopt */ 13 | char *arg; /* equivalent to optarg */ 14 | int longidx; /* index of a long option; or -1 if short */ 15 | /* private variables not intended for external uses */ 16 | int i, pos, n_args; 17 | } ketopt_t; 18 | 19 | typedef struct { 20 | char *name; 21 | int has_arg; 22 | int val; 23 | } ko_longopt_t; 24 | 25 | static ketopt_t KETOPT_INIT = { 1, 0, 0, -1, 1, 0, 0 }; 26 | 27 | static void ketopt_permute(char *argv[], int j, int n) /* move argv[j] over n elements to the left */ 28 | { 29 | int k; 30 | char *p = argv[j]; 31 | for (k = 0; k < n; ++k) 32 | argv[j - k] = argv[j - k - 1]; 33 | argv[j - k] = p; 34 | } 35 | 36 | /** 37 | * Parse command-line options and arguments 38 | * 39 | * This fuction has a similar interface to GNU's getopt_long(). Each call 40 | * parses one option and returns the option name. s->arg points to the option 41 | * argument if present. The function returns -1 when all command-line arguments 42 | * are parsed. In this case, s->ind is the index of the first non-option 43 | * argument. 44 | * 45 | * @param s status; shall be initialized to KETOPT_INIT on the first call 46 | * @param argc length of argv[] 47 | * @param argv list of command-line arguments; argv[0] is ignored 48 | * @param permute non-zero to move options ahead of non-option arguments 49 | * @param ostr option string 50 | * @param longopts long options 51 | * 52 | * @return ASCII for a short option; ko_longopt_t::val for a long option; -1 if 53 | * argv[] is fully processed; '?' for an unknown option or an ambiguous 54 | * long option; ':' if an option argument is missing 55 | */ 56 | static int ketopt(ketopt_t *s, int argc, char *argv[], int permute, const char *ostr, const ko_longopt_t *longopts) 57 | { 58 | int opt = -1, i0, j; 59 | if (permute) { 60 | while (s->i < argc && (argv[s->i][0] != '-' || argv[s->i][1] == '\0')) 61 | ++s->i, ++s->n_args; 62 | } 63 | s->arg = 0, s->longidx = -1, i0 = s->i; 64 | if (s->i >= argc || argv[s->i][0] != '-' || argv[s->i][1] == '\0') { 65 | s->ind = s->i - s->n_args; 66 | return -1; 67 | } 68 | if (argv[s->i][0] == '-' && argv[s->i][1] == '-') { /* "--" or a long option */ 69 | if (argv[s->i][2] == '\0') { /* a bare "--" */ 70 | ketopt_permute(argv, s->i, s->n_args); 71 | ++s->i, s->ind = s->i - s->n_args; 72 | return -1; 73 | } 74 | s->opt = 0, opt = '?', s->pos = -1; 75 | if (longopts) { /* parse long options */ 76 | int k, n_exact = 0, n_partial = 0; 77 | const ko_longopt_t *o = 0, *o_exact = 0, *o_partial = 0; 78 | for (j = 2; argv[s->i][j] != '\0' && argv[s->i][j] != '='; ++j) {} /* find the end of the option name */ 79 | for (k = 0; longopts[k].name != 0; ++k) 80 | if (strncmp(&argv[s->i][2], longopts[k].name, j - 2) == 0) { 81 | if (longopts[k].name[j - 2] == 0) ++n_exact, o_exact = &longopts[k]; 82 | else ++n_partial, o_partial = &longopts[k]; 83 | } 84 | if (n_exact > 1 || (n_exact == 0 && n_partial > 1)) return '?'; 85 | o = n_exact == 1? o_exact : n_partial == 1? o_partial : 0; 86 | if (o) { 87 | s->opt = opt = o->val, s->longidx = o - longopts; 88 | if (argv[s->i][j] == '=') s->arg = &argv[s->i][j + 1]; 89 | if (o->has_arg == 1 && argv[s->i][j] == '\0') { 90 | if (s->i < argc - 1) s->arg = argv[++s->i]; 91 | else opt = ':'; /* missing option argument */ 92 | } 93 | } 94 | } 95 | } else { /* a short option */ 96 | char *p; 97 | if (s->pos == 0) s->pos = 1; 98 | opt = s->opt = argv[s->i][s->pos++]; 99 | p = strchr((char*)ostr, opt); 100 | if (p == 0) { 101 | opt = '?'; /* unknown option */ 102 | } else if (p[1] == ':') { 103 | if (argv[s->i][s->pos] == 0) { 104 | if (s->i < argc - 1) s->arg = argv[++s->i]; 105 | else opt = ':'; /* missing option argument */ 106 | } else s->arg = &argv[s->i][s->pos]; 107 | s->pos = -1; 108 | } 109 | } 110 | if (s->pos < 0 || argv[s->i][s->pos] == 0) { 111 | ++s->i, s->pos = 0; 112 | if (s->n_args > 0) /* permute */ 113 | for (j = i0; j < s->i; ++j) 114 | ketopt_permute(argv, j, s->n_args); 115 | } 116 | s->ind = s->i - s->n_args; 117 | return opt; 118 | } 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kseq.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, 2009, 2011 Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Last Modified: 05MAR2012 */ 27 | 28 | #ifndef AC_KSEQ_H 29 | #define AC_KSEQ_H 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r 36 | #define KS_SEP_TAB 1 // isspace() && !' ' 37 | #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) 38 | #define KS_SEP_MAX 2 39 | 40 | #ifndef klib_unused 41 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 42 | #define klib_unused __attribute__ ((__unused__)) 43 | #else 44 | #define klib_unused 45 | #endif 46 | #endif /* klib_unused */ 47 | 48 | #define __KS_TYPE(type_t) \ 49 | typedef struct __kstream_t { \ 50 | int begin, end; \ 51 | int is_eof:2, bufsize:30; \ 52 | type_t f; \ 53 | unsigned char *buf; \ 54 | } kstream_t; 55 | 56 | #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end) 57 | #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0) 58 | 59 | #define __KS_BASIC(SCOPE, type_t, __bufsize) \ 60 | SCOPE kstream_t *ks_init(type_t f) \ 61 | { \ 62 | kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ 63 | ks->f = f; ks->bufsize = __bufsize; \ 64 | ks->buf = (unsigned char*)malloc(__bufsize); \ 65 | return ks; \ 66 | } \ 67 | SCOPE void ks_destroy(kstream_t *ks) \ 68 | { \ 69 | if (!ks) return; \ 70 | free(ks->buf); \ 71 | free(ks); \ 72 | } 73 | 74 | #define __KS_INLINED(__read) \ 75 | static inline klib_unused int ks_getc(kstream_t *ks) \ 76 | { \ 77 | if (ks->is_eof && ks->begin >= ks->end) return -1; \ 78 | if (ks->begin >= ks->end) { \ 79 | ks->begin = 0; \ 80 | ks->end = __read(ks->f, ks->buf, ks->bufsize); \ 81 | if (ks->end < ks->bufsize) ks->is_eof = 1; \ 82 | if (ks->end == 0) return -1; \ 83 | } \ 84 | return (int)ks->buf[ks->begin++]; \ 85 | } \ 86 | static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ 87 | { return ks_getuntil2(ks, delimiter, str, dret, 0); } 88 | 89 | #ifndef KSTRING_T 90 | #define KSTRING_T kstring_t 91 | typedef struct __kstring_t { 92 | size_t l, m; 93 | char *s; 94 | } kstring_t; 95 | #endif 96 | 97 | #ifndef kroundup32 98 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 99 | #endif 100 | 101 | #define __KS_GETUNTIL(SCOPE, __read) \ 102 | SCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \ 103 | { \ 104 | if (dret) *dret = 0; \ 105 | str->l = append? str->l : 0; \ 106 | if (ks->begin >= ks->end && ks->is_eof) return -1; \ 107 | for (;;) { \ 108 | int i; \ 109 | if (ks->begin >= ks->end) { \ 110 | if (!ks->is_eof) { \ 111 | ks->begin = 0; \ 112 | ks->end = __read(ks->f, ks->buf, ks->bufsize); \ 113 | if (ks->end < ks->bufsize) ks->is_eof = 1; \ 114 | if (ks->end == 0) break; \ 115 | } else break; \ 116 | } \ 117 | if (delimiter == KS_SEP_LINE) { \ 118 | for (i = ks->begin; i < ks->end; ++i) \ 119 | if (ks->buf[i] == '\n') break; \ 120 | } else if (delimiter > KS_SEP_MAX) { \ 121 | for (i = ks->begin; i < ks->end; ++i) \ 122 | if (ks->buf[i] == delimiter) break; \ 123 | } else if (delimiter == KS_SEP_SPACE) { \ 124 | for (i = ks->begin; i < ks->end; ++i) \ 125 | if (isspace(ks->buf[i])) break; \ 126 | } else if (delimiter == KS_SEP_TAB) { \ 127 | for (i = ks->begin; i < ks->end; ++i) \ 128 | if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ 129 | } else i = 0; /* never come to here! */ \ 130 | if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ 131 | str->m = str->l + (i - ks->begin) + 1; \ 132 | kroundup32(str->m); \ 133 | str->s = (char*)realloc(str->s, str->m); \ 134 | } \ 135 | memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ 136 | str->l = str->l + (i - ks->begin); \ 137 | ks->begin = i + 1; \ 138 | if (i < ks->end) { \ 139 | if (dret) *dret = ks->buf[i]; \ 140 | break; \ 141 | } \ 142 | } \ 143 | if (str->s == 0) { \ 144 | str->m = 1; \ 145 | str->s = (char*)calloc(1, 1); \ 146 | } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ 147 | str->s[str->l] = '\0'; \ 148 | return str->l; \ 149 | } 150 | 151 | #define KSTREAM_INIT2(SCOPE, type_t, __read, __bufsize) \ 152 | __KS_TYPE(type_t) \ 153 | __KS_BASIC(SCOPE, type_t, __bufsize) \ 154 | __KS_GETUNTIL(SCOPE, __read) \ 155 | __KS_INLINED(__read) 156 | 157 | #define KSTREAM_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize) 158 | 159 | #define KSTREAM_DECLARE(type_t, __read) \ 160 | __KS_TYPE(type_t) \ 161 | extern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \ 162 | extern kstream_t *ks_init(type_t f); \ 163 | extern void ks_destroy(kstream_t *ks); \ 164 | __KS_INLINED(__read) 165 | 166 | /****************** 167 | * FASTA/Q parser * 168 | ******************/ 169 | 170 | #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) 171 | 172 | #define __KSEQ_BASIC(SCOPE, type_t) \ 173 | SCOPE kseq_t *kseq_init(type_t fd) \ 174 | { \ 175 | kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ 176 | s->f = ks_init(fd); \ 177 | return s; \ 178 | } \ 179 | SCOPE void kseq_destroy(kseq_t *ks) \ 180 | { \ 181 | if (!ks) return; \ 182 | free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ 183 | ks_destroy(ks->f); \ 184 | free(ks); \ 185 | } 186 | 187 | /* Return value: 188 | >=0 length of the sequence (normal) 189 | -1 end-of-file 190 | -2 truncated quality string 191 | */ 192 | #define __KSEQ_READ(SCOPE) \ 193 | SCOPE int kseq_read(kseq_t *seq) \ 194 | { \ 195 | int c; \ 196 | kstream_t *ks = seq->f; \ 197 | if (seq->last_char == 0) { /* then jump to the next header line */ \ 198 | while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \ 199 | if (c == -1) return -1; /* end of file */ \ 200 | seq->last_char = c; \ 201 | } /* else: the first header char has been read in the previous call */ \ 202 | seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ 203 | if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \ 204 | if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ 205 | if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ 206 | seq->seq.m = 256; \ 207 | seq->seq.s = (char*)malloc(seq->seq.m); \ 208 | } \ 209 | while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \ 210 | if (c == '\n') continue; /* skip empty lines */ \ 211 | seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ 212 | ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ 213 | } \ 214 | if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ 215 | if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ 216 | seq->seq.m = seq->seq.l + 2; \ 217 | kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \ 218 | seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ 219 | } \ 220 | seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ 221 | if (c != '+') return seq->seq.l; /* FASTA */ \ 222 | if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \ 223 | seq->qual.m = seq->seq.m; \ 224 | seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ 225 | } \ 226 | while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ 227 | if (c == -1) return -2; /* error: no quality string */ \ 228 | while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \ 229 | seq->last_char = 0; /* we have not come to the next header line */ \ 230 | if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ 231 | return seq->seq.l; \ 232 | } 233 | 234 | #define __KSEQ_TYPE(type_t) \ 235 | typedef struct { \ 236 | kstring_t name, comment, seq, qual; \ 237 | int last_char; \ 238 | kstream_t *f; \ 239 | } kseq_t; 240 | 241 | #define KSEQ_INIT2(SCOPE, type_t, __read) \ 242 | KSTREAM_INIT2(SCOPE, type_t, __read, 16384) \ 243 | __KSEQ_TYPE(type_t) \ 244 | __KSEQ_BASIC(SCOPE, type_t) \ 245 | __KSEQ_READ(SCOPE) 246 | 247 | #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) 248 | 249 | #define KSEQ_DECLARE(type_t) \ 250 | __KS_TYPE(type_t) \ 251 | __KSEQ_TYPE(type_t) \ 252 | extern kseq_t *kseq_init(type_t fd); \ 253 | void kseq_destroy(kseq_t *ks); \ 254 | int kseq_read(kseq_t *seq); 255 | 256 | #endif 257 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/ksort.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, 2011 Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | // This is a simplified version of ksort.h 27 | 28 | #ifndef AC_KSORT_H 29 | #define AC_KSORT_H 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | typedef struct { 36 | void *left, *right; 37 | int depth; 38 | } ks_isort_stack_t; 39 | 40 | #define KSORT_SWAP(type_t, a, b) { type_t t=(a); (a)=(b); (b)=t; } 41 | 42 | #define KSORT_INIT(name, type_t, __sort_lt) \ 43 | void ks_heapdown_##name(size_t i, size_t n, type_t l[]) \ 44 | { \ 45 | size_t k = i; \ 46 | type_t tmp = l[i]; \ 47 | while ((k = (k << 1) + 1) < n) { \ 48 | if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \ 49 | if (__sort_lt(l[k], tmp)) break; \ 50 | l[i] = l[k]; i = k; \ 51 | } \ 52 | l[i] = tmp; \ 53 | } \ 54 | void ks_heapmake_##name(size_t lsize, type_t l[]) \ 55 | { \ 56 | size_t i; \ 57 | for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \ 58 | ks_heapdown_##name(i, lsize, l); \ 59 | } \ 60 | type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \ 61 | { \ 62 | type_t *low, *high, *k, *ll, *hh, *mid; \ 63 | low = arr; high = arr + n - 1; k = arr + kk; \ 64 | for (;;) { \ 65 | if (high <= low) return *k; \ 66 | if (high == low + 1) { \ 67 | if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ 68 | return *k; \ 69 | } \ 70 | mid = low + (high - low) / 2; \ 71 | if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \ 72 | if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ 73 | if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \ 74 | KSORT_SWAP(type_t, *mid, *(low+1)); \ 75 | ll = low + 1; hh = high; \ 76 | for (;;) { \ 77 | do ++ll; while (__sort_lt(*ll, *low)); \ 78 | do --hh; while (__sort_lt(*low, *hh)); \ 79 | if (hh < ll) break; \ 80 | KSORT_SWAP(type_t, *ll, *hh); \ 81 | } \ 82 | KSORT_SWAP(type_t, *low, *hh); \ 83 | if (hh <= k) low = ll; \ 84 | if (hh >= k) high = hh - 1; \ 85 | } \ 86 | } \ 87 | 88 | #define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k) 89 | 90 | #define ks_lt_generic(a, b) ((a) < (b)) 91 | #define ks_lt_str(a, b) (strcmp((a), (b)) < 0) 92 | 93 | typedef const char *ksstr_t; 94 | 95 | #define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic) 96 | #define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) 97 | 98 | #define RS_MIN_SIZE 64 99 | #define RS_MAX_BITS 8 100 | 101 | #define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \ 102 | typedef struct { \ 103 | rstype_t *b, *e; \ 104 | } rsbucket_##name##_t; \ 105 | void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \ 106 | { \ 107 | rstype_t *i; \ 108 | for (i = beg + 1; i < end; ++i) \ 109 | if (rskey(*i) < rskey(*(i - 1))) { \ 110 | rstype_t *j, tmp = *i; \ 111 | for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \ 112 | *j = *(j - 1); \ 113 | *j = tmp; \ 114 | } \ 115 | } \ 116 | void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \ 117 | { \ 118 | rstype_t *i; \ 119 | int size = 1<b = k->e = beg; \ 123 | for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \ 124 | for (k = b + 1; k != be; ++k) \ 125 | k->e += (k-1)->e - beg, k->b = (k-1)->e; \ 126 | for (k = b; k != be;) { \ 127 | if (k->b != k->e) { \ 128 | rsbucket_##name##_t *l; \ 129 | if ((l = b + (rskey(*k->b)>>s&m)) != k) { \ 130 | rstype_t tmp = *k->b, swap; \ 131 | do { \ 132 | swap = tmp; tmp = *l->b; *l->b++ = swap; \ 133 | l = b + (rskey(tmp)>>s&m); \ 134 | } while (l != k); \ 135 | *k->b++ = tmp; \ 136 | } else ++k->b; \ 137 | } else ++k; \ 138 | } \ 139 | for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \ 140 | if (s) { \ 141 | s = s > n_bits? s - n_bits : 0; \ 142 | for (k = b; k != be; ++k) \ 143 | if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \ 144 | else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \ 145 | } \ 146 | } \ 147 | void radix_sort_##name(rstype_t *beg, rstype_t *end) \ 148 | { \ 149 | if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \ 150 | else rs_sort_##name(beg, end, RS_MAX_BITS, (sizeof_key - 1) * RS_MAX_BITS); \ 151 | } 152 | 153 | #endif 154 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/ksw2.h: -------------------------------------------------------------------------------- 1 | #ifndef KSW2_H_ 2 | #define KSW2_H_ 3 | 4 | #include 5 | 6 | #define KSW_NEG_INF -0x40000000 7 | 8 | #define KSW_EZ_SCORE_ONLY 0x01 // don't record alignment path/cigar 9 | #define KSW_EZ_RIGHT 0x02 // right-align gaps 10 | #define KSW_EZ_GENERIC_SC 0x04 // without this flag: match/mismatch only; last symbol is a wildcard 11 | #define KSW_EZ_APPROX_MAX 0x08 // approximate max; this is faster with sse 12 | #define KSW_EZ_APPROX_DROP 0x10 // approximate Z-drop; faster with sse 13 | #define KSW_EZ_EXTZ_ONLY 0x40 // only perform extension 14 | #define KSW_EZ_REV_CIGAR 0x80 // reverse CIGAR in the output 15 | #define KSW_EZ_SPLICE_FOR 0x100 16 | #define KSW_EZ_SPLICE_REV 0x200 17 | #define KSW_EZ_SPLICE_FLANK 0x400 18 | 19 | // The subset of CIGAR operators used by ksw code. 20 | // Use MM_CIGAR_* from minimap.h if you need the full list. 21 | #define KSW_CIGAR_MATCH 0 22 | #define KSW_CIGAR_INS 1 23 | #define KSW_CIGAR_DEL 2 24 | #define KSW_CIGAR_N_SKIP 3 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | typedef struct { 31 | uint32_t max:31, zdropped:1; 32 | int max_q, max_t; // max extension coordinate 33 | int mqe, mqe_t; // max score when reaching the end of query 34 | int mte, mte_q; // max score when reaching the end of target 35 | int score; // max score reaching both ends; may be KSW_NEG_INF 36 | int m_cigar, n_cigar; 37 | int reach_end; 38 | uint32_t *cigar; 39 | } ksw_extz_t; 40 | 41 | /** 42 | * NW-like extension 43 | * 44 | * @param km memory pool, when used with kalloc 45 | * @param qlen query length 46 | * @param query query sequence with 0 <= query[i] < m 47 | * @param tlen target length 48 | * @param target target sequence with 0 <= target[i] < m 49 | * @param m number of residue types 50 | * @param mat m*m scoring mattrix in one-dimension array 51 | * @param gapo gap open penalty; a gap of length l cost "-(gapo+l*gape)" 52 | * @param gape gap extension penalty 53 | * @param w band width (<0 to disable) 54 | * @param zdrop off-diagonal drop-off to stop extension (positive; <0 to disable) 55 | * @param flag flag (see KSW_EZ_* macros) 56 | * @param ez (out) scores and cigar 57 | */ 58 | void ksw_extz(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 59 | int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); 60 | 61 | void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 62 | int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez); 63 | 64 | void ksw_extd(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 65 | int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez); 66 | 67 | void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 68 | int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez); 69 | 70 | void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 71 | int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez); 72 | 73 | void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez); 74 | 75 | /** 76 | * Global alignment 77 | * 78 | * (first 10 parameters identical to ksw_extz_sse()) 79 | * @param m_cigar (modified) max CIGAR length; feed 0 if cigar==0 80 | * @param n_cigar (out) number of CIGAR elements 81 | * @param cigar (out) BAM-encoded CIGAR; caller need to deallocate with kfree(km, ) 82 | * 83 | * @return score of the alignment 84 | */ 85 | int ksw_gg(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_); 86 | int ksw_gg2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_); 87 | int ksw_gg2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_); 88 | 89 | void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, const int8_t *mat); 90 | int ksw_ll_i16(void *q, int tlen, const uint8_t *target, int gapo, int gape, int *qe, int *te); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | /************************************ 97 | *** Private macros and functions *** 98 | ************************************/ 99 | 100 | #ifdef HAVE_KALLOC 101 | #include "kalloc.h" 102 | #else 103 | #include 104 | #define kmalloc(km, size) malloc((size)) 105 | #define kcalloc(km, count, size) calloc((count), (size)) 106 | #define krealloc(km, ptr, size) realloc((ptr), (size)) 107 | #define kfree(km, ptr) free((ptr)) 108 | #endif 109 | 110 | static inline uint32_t *ksw_push_cigar(void *km, int *n_cigar, int *m_cigar, uint32_t *cigar, uint32_t op, int len) 111 | { 112 | if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) { 113 | if (*n_cigar == *m_cigar) { 114 | *m_cigar = *m_cigar? (*m_cigar)<<1 : 4; 115 | cigar = (uint32_t*)krealloc(km, cigar, (*m_cigar) << 2); 116 | } 117 | cigar[(*n_cigar)++] = len<<4 | op; 118 | } else cigar[(*n_cigar)-1] += len<<4; 119 | return cigar; 120 | } 121 | 122 | // In the backtrack matrix, value p[] has the following structure: 123 | // bit 0-2: which type gets the max - 0 for H, 1 for E, 2 for F, 3 for \tilde{E} and 4 for \tilde{F} 124 | // bit 3/0x08: 1 if a continuation on the E state (bit 5/0x20 for a continuation on \tilde{E}) 125 | // bit 4/0x10: 1 if a continuation on the F state (bit 6/0x40 for a continuation on \tilde{F}) 126 | static inline void ksw_backtrack(void *km, int is_rot, int is_rev, int min_intron_len, const uint8_t *p, const int *off, const int *off_end, int n_col, int i0, int j0, 127 | int *m_cigar_, int *n_cigar_, uint32_t **cigar_) 128 | { // p[] - lower 3 bits: which type gets the max; bit 129 | int n_cigar = 0, m_cigar = *m_cigar_, i = i0, j = j0, r, state = 0; 130 | uint32_t *cigar = *cigar_, tmp; 131 | while (i >= 0 && j >= 0) { // at the beginning of the loop, _state_ tells us which state to check 132 | int force_state = -1; 133 | if (is_rot) { 134 | r = i + j; 135 | if (i < off[r]) force_state = 2; 136 | if (off_end && i > off_end[r]) force_state = 1; 137 | tmp = force_state < 0? p[(size_t)r * n_col + i - off[r]] : 0; 138 | } else { 139 | if (j < off[i]) force_state = 2; 140 | if (off_end && j > off_end[i]) force_state = 1; 141 | tmp = force_state < 0? p[(size_t)i * n_col + j - off[i]] : 0; 142 | } 143 | if (state == 0) state = tmp & 7; // if requesting the H state, find state one maximizes it. 144 | else if (!(tmp >> (state + 2) & 1)) state = 0; // if requesting other states, _state_ stays the same if it is a continuation; otherwise, set to H 145 | if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure 146 | if (force_state >= 0) state = force_state; 147 | if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_MATCH, 1), --i, --j; 148 | else if (state == 1 || (state == 3 && min_intron_len <= 0)) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_DEL, 1), --i; 149 | else if (state == 3 && min_intron_len > 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_N_SKIP, 1), --i; 150 | else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, 1), --j; 151 | } 152 | if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, min_intron_len > 0 && i >= min_intron_len? KSW_CIGAR_N_SKIP : KSW_CIGAR_DEL, i + 1); // first deletion 153 | if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, KSW_CIGAR_INS, j + 1); // first insertion 154 | if (!is_rev) 155 | for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR 156 | tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp; 157 | *m_cigar_ = m_cigar, *n_cigar_ = n_cigar, *cigar_ = cigar; 158 | } 159 | 160 | static inline void ksw_reset_extz(ksw_extz_t *ez) 161 | { 162 | ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1; 163 | ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF; 164 | ez->n_cigar = 0, ez->zdropped = 0, ez->reach_end = 0; 165 | } 166 | 167 | static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a, int b, int zdrop, int8_t e) 168 | { 169 | int r, t; 170 | if (is_rot) r = a, t = b; 171 | else r = a + b, t = a; 172 | if (H > (int32_t)ez->max) { 173 | ez->max = H, ez->max_t = t, ez->max_q = r - t; 174 | } else if (t >= ez->max_t && r - t >= ez->max_q) { 175 | int tl = t - ez->max_t, ql = (r - t) - ez->max_q, l; 176 | l = tl > ql? tl - ql : ql - tl; 177 | if (zdrop >= 0 && ez->max - H > zdrop + l * e) { 178 | ez->zdropped = 1; 179 | return 1; 180 | } 181 | } 182 | return 0; 183 | } 184 | #endif 185 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/ksw2_dispatch.c: -------------------------------------------------------------------------------- 1 | #ifdef KSW_CPU_DISPATCH 2 | #include 3 | #include "ksw2.h" 4 | 5 | #define SIMD_SSE 0x1 6 | #define SIMD_SSE2 0x2 7 | #define SIMD_SSE3 0x4 8 | #define SIMD_SSSE3 0x8 9 | #define SIMD_SSE4_1 0x10 10 | #define SIMD_SSE4_2 0x20 11 | #define SIMD_AVX 0x40 12 | #define SIMD_AVX2 0x80 13 | #define SIMD_AVX512F 0x100 14 | 15 | #ifndef _MSC_VER 16 | // adapted from https://github.com/01org/linux-sgx/blob/master/common/inc/internal/linux/cpuid_gnu.h 17 | void __cpuidex(int cpuid[4], int func_id, int subfunc_id) 18 | { 19 | #if defined(__x86_64__) 20 | __asm__ volatile ("cpuid" 21 | : "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3]) 22 | : "0" (func_id), "2" (subfunc_id)); 23 | #else // on 32bit, ebx can NOT be used as PIC code 24 | __asm__ volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" 25 | : "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3]) 26 | : "0" (func_id), "2" (subfunc_id)); 27 | #endif 28 | } 29 | #endif 30 | 31 | static int ksw_simd = -1; 32 | 33 | static int x86_simd(void) 34 | { 35 | int flag = 0, cpuid[4], max_id; 36 | __cpuidex(cpuid, 0, 0); 37 | max_id = cpuid[0]; 38 | if (max_id == 0) return 0; 39 | __cpuidex(cpuid, 1, 0); 40 | if (cpuid[3]>>25&1) flag |= SIMD_SSE; 41 | if (cpuid[3]>>26&1) flag |= SIMD_SSE2; 42 | if (cpuid[2]>>0 &1) flag |= SIMD_SSE3; 43 | if (cpuid[2]>>9 &1) flag |= SIMD_SSSE3; 44 | if (cpuid[2]>>19&1) flag |= SIMD_SSE4_1; 45 | if (cpuid[2]>>20&1) flag |= SIMD_SSE4_2; 46 | if (cpuid[2]>>28&1) flag |= SIMD_AVX; 47 | if (max_id >= 7) { 48 | __cpuidex(cpuid, 7, 0); 49 | if (cpuid[1]>>5 &1) flag |= SIMD_AVX2; 50 | if (cpuid[1]>>16&1) flag |= SIMD_AVX512F; 51 | } 52 | return flag; 53 | } 54 | 55 | void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez) 56 | { 57 | extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez); 58 | extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez); 59 | if (ksw_simd < 0) ksw_simd = x86_simd(); 60 | if (ksw_simd & SIMD_SSE4_1) 61 | ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, end_bonus, flag, ez); 62 | else if (ksw_simd & SIMD_SSE2) 63 | ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, end_bonus, flag, ez); 64 | else abort(); 65 | } 66 | 67 | void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 68 | int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez) 69 | { 70 | extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 71 | int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez); 72 | extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 73 | int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez); 74 | if (ksw_simd < 0) ksw_simd = x86_simd(); 75 | if (ksw_simd & SIMD_SSE4_1) 76 | ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, end_bonus, flag, ez); 77 | else if (ksw_simd & SIMD_SSE2) 78 | ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, end_bonus, flag, ez); 79 | else abort(); 80 | } 81 | 82 | void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 83 | int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez) 84 | { 85 | extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 86 | int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez); 87 | extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, 88 | int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int8_t junc_bonus, int flag, const uint8_t *junc, ksw_extz_t *ez); 89 | if (ksw_simd < 0) ksw_simd = x86_simd(); 90 | if (ksw_simd & SIMD_SSE4_1) 91 | ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez); 92 | else if (ksw_simd & SIMD_SSE2) 93 | ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, junc_bonus, flag, junc, ez); 94 | else abort(); 95 | } 96 | #endif 97 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/ksw2_ll_sse.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "ksw2.h" 5 | 6 | #ifdef USE_SIMDE 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | #ifdef __GNUC__ 13 | #define LIKELY(x) __builtin_expect((x),1) 14 | #define UNLIKELY(x) __builtin_expect((x),0) 15 | #else 16 | #define LIKELY(x) (x) 17 | #define UNLIKELY(x) (x) 18 | #endif 19 | 20 | typedef struct { 21 | int qlen, slen; 22 | uint8_t shift, mdiff, max, size; 23 | __m128i *qp, *H0, *H1, *E, *Hmax; 24 | } kswq_t; 25 | 26 | /** 27 | * Initialize the query data structure 28 | * 29 | * @param size Number of bytes used to store a score; valid valures are 1 or 2 30 | * @param qlen Length of the query sequence 31 | * @param query Query sequence 32 | * @param m Size of the alphabet 33 | * @param mat Scoring matrix in a one-dimension array 34 | * 35 | * @return Query data structure 36 | */ 37 | void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, const int8_t *mat) 38 | { 39 | kswq_t *q; 40 | int slen, a, tmp, p; 41 | 42 | size = size > 1? 2 : 1; 43 | p = 8 * (3 - size); // # values per __m128i 44 | slen = (qlen + p - 1) / p; // segmented length 45 | q = (kswq_t*)kmalloc(km, sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory 46 | q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory 47 | q->H0 = q->qp + slen * m; 48 | q->H1 = q->H0 + slen; 49 | q->E = q->H1 + slen; 50 | q->Hmax = q->E + slen; 51 | q->slen = slen; q->qlen = qlen; q->size = size; 52 | // compute shift 53 | tmp = m * m; 54 | for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score 55 | if (mat[a] < (int8_t)q->shift) q->shift = mat[a]; 56 | if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a]; 57 | } 58 | q->max = q->mdiff; 59 | q->shift = 256 - q->shift; // NB: q->shift is uint8_t 60 | q->mdiff += q->shift; // this is the difference between the min and max scores 61 | // An example: p=8, qlen=19, slen=3 and segmentation: 62 | // {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}} 63 | if (size == 1) { 64 | int8_t *t = (int8_t*)q->qp; 65 | for (a = 0; a < m; ++a) { 66 | int i, k, nlen = slen * p; 67 | const int8_t *ma = mat + a * m; 68 | for (i = 0; i < slen; ++i) 69 | for (k = i; k < nlen; k += slen) // p iterations 70 | *t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift; 71 | } 72 | } else { 73 | int16_t *t = (int16_t*)q->qp; 74 | for (a = 0; a < m; ++a) { 75 | int i, k, nlen = slen * p; 76 | const int8_t *ma = mat + a * m; 77 | for (i = 0; i < slen; ++i) 78 | for (k = i; k < nlen; k += slen) // p iterations 79 | *t++ = (k >= qlen? 0 : ma[query[k]]); 80 | } 81 | } 82 | return q; 83 | } 84 | 85 | int ksw_ll_i16(void *q_, int tlen, const uint8_t *target, int _gapo, int _gape, int *qe, int *te) 86 | { 87 | kswq_t *q = (kswq_t*)q_; 88 | int slen, i, gmax = 0, qlen8; 89 | __m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax; 90 | uint16_t *H8; 91 | 92 | #define __max_8(ret, xx) do { \ 93 | (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \ 94 | (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \ 95 | (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \ 96 | (ret) = _mm_extract_epi16((xx), 0); \ 97 | } while (0) 98 | 99 | // initialization 100 | *qe = *te = -1; 101 | zero = _mm_set1_epi32(0); 102 | gapoe = _mm_set1_epi16(_gapo + _gape); 103 | gape = _mm_set1_epi16(_gape); 104 | H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax; 105 | slen = q->slen, qlen8 = slen * 8; 106 | memset(E, 0, slen * sizeof(__m128i)); 107 | memset(H0, 0, slen * sizeof(__m128i)); 108 | memset(Hmax, 0, slen * sizeof(__m128i)); 109 | // the core loop 110 | for (i = 0; i < tlen; ++i) { 111 | int j, k, imax; 112 | __m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector 113 | h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example 114 | h = _mm_slli_si128(h, 2); 115 | for (j = 0; LIKELY(j < slen); ++j) { 116 | h = _mm_adds_epi16(h, *S++); 117 | e = _mm_load_si128(E + j); 118 | h = _mm_max_epi16(h, e); 119 | h = _mm_max_epi16(h, f); 120 | max = _mm_max_epi16(max, h); 121 | _mm_store_si128(H1 + j, h); 122 | h = _mm_subs_epu16(h, gapoe); 123 | e = _mm_subs_epu16(e, gape); 124 | e = _mm_max_epi16(e, h); 125 | _mm_store_si128(E + j, e); 126 | f = _mm_subs_epu16(f, gape); 127 | f = _mm_max_epi16(f, h); 128 | h = _mm_load_si128(H0 + j); 129 | } 130 | for (k = 0; LIKELY(k < 8); ++k) { 131 | f = _mm_slli_si128(f, 2); 132 | for (j = 0; LIKELY(j < slen); ++j) { 133 | h = _mm_load_si128(H1 + j); 134 | h = _mm_max_epi16(h, f); 135 | _mm_store_si128(H1 + j, h); 136 | h = _mm_subs_epu16(h, gapoe); 137 | f = _mm_subs_epu16(f, gape); 138 | if(UNLIKELY(!_mm_movemask_epi8(_mm_cmpgt_epi16(f, h)))) goto end_loop_i16; 139 | } 140 | } 141 | end_loop_i16: 142 | __max_8(imax, max); 143 | if (imax >= gmax) { 144 | gmax = imax; *te = i; 145 | memcpy(Hmax, H1, slen * sizeof(__m128i)); 146 | } 147 | S = H1; H1 = H0; H0 = S; 148 | } 149 | for (i = 0, H8 = (uint16_t*)Hmax; i < qlen8; ++i) 150 | if ((int)H8[i] == gmax) *qe = i / 8 + i % 8 * slen; 151 | return gmax; 152 | } 153 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kthread.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "kthread.h" 6 | 7 | #if (defined(WIN32) || defined(_WIN32)) && defined(_MSC_VER) 8 | #define __sync_fetch_and_add(ptr, addend) _InterlockedExchangeAdd((void*)ptr, addend) 9 | #endif 10 | 11 | /************ 12 | * kt_for() * 13 | ************/ 14 | 15 | struct kt_for_t; 16 | 17 | typedef struct { 18 | struct kt_for_t *t; 19 | long i; 20 | } ktf_worker_t; 21 | 22 | typedef struct kt_for_t { 23 | int n_threads; 24 | long n; 25 | ktf_worker_t *w; 26 | void (*func)(void*,long,int); 27 | void *data; 28 | } kt_for_t; 29 | 30 | static inline long steal_work(kt_for_t *t) 31 | { 32 | int i, min_i = -1; 33 | long k, min = LONG_MAX; 34 | for (i = 0; i < t->n_threads; ++i) 35 | if (min > t->w[i].i) min = t->w[i].i, min_i = i; 36 | k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads); 37 | return k >= t->n? -1 : k; 38 | } 39 | 40 | static void *ktf_worker(void *data) 41 | { 42 | ktf_worker_t *w = (ktf_worker_t*)data; 43 | long i; 44 | for (;;) { 45 | i = __sync_fetch_and_add(&w->i, w->t->n_threads); 46 | if (i >= w->t->n) break; 47 | w->t->func(w->t->data, i, w - w->t->w); 48 | } 49 | while ((i = steal_work(w->t)) >= 0) 50 | w->t->func(w->t->data, i, w - w->t->w); 51 | pthread_exit(0); 52 | } 53 | 54 | void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n) 55 | { 56 | if (n_threads > 1) { 57 | int i; 58 | kt_for_t t; 59 | pthread_t *tid; 60 | t.func = func, t.data = data, t.n_threads = n_threads, t.n = n; 61 | t.w = (ktf_worker_t*)calloc(n_threads, sizeof(ktf_worker_t)); 62 | tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); 63 | for (i = 0; i < n_threads; ++i) 64 | t.w[i].t = &t, t.w[i].i = i; 65 | for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]); 66 | for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); 67 | free(tid); free(t.w); 68 | } else { 69 | long j; 70 | for (j = 0; j < n; ++j) func(data, j, 0); 71 | } 72 | } 73 | 74 | /***************** 75 | * kt_pipeline() * 76 | *****************/ 77 | 78 | struct ktp_t; 79 | 80 | typedef struct { 81 | struct ktp_t *pl; 82 | int64_t index; 83 | int step; 84 | void *data; 85 | } ktp_worker_t; 86 | 87 | typedef struct ktp_t { 88 | void *shared; 89 | void *(*func)(void*, int, void*); 90 | int64_t index; 91 | int n_workers, n_steps; 92 | ktp_worker_t *workers; 93 | pthread_mutex_t mutex; 94 | pthread_cond_t cv; 95 | } ktp_t; 96 | 97 | static void *ktp_worker(void *data) 98 | { 99 | ktp_worker_t *w = (ktp_worker_t*)data; 100 | ktp_t *p = w->pl; 101 | while (w->step < p->n_steps) { 102 | // test whether we can kick off the job with this worker 103 | pthread_mutex_lock(&p->mutex); 104 | for (;;) { 105 | int i; 106 | // test whether another worker is doing the same step 107 | for (i = 0; i < p->n_workers; ++i) { 108 | if (w == &p->workers[i]) continue; // ignore itself 109 | if (p->workers[i].step <= w->step && p->workers[i].index < w->index) 110 | break; 111 | } 112 | if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps 113 | pthread_cond_wait(&p->cv, &p->mutex); 114 | } 115 | pthread_mutex_unlock(&p->mutex); 116 | 117 | // working on w->step 118 | w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL 119 | 120 | // update step and let other workers know 121 | pthread_mutex_lock(&p->mutex); 122 | w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps; 123 | if (w->step == 0) w->index = p->index++; 124 | pthread_cond_broadcast(&p->cv); 125 | pthread_mutex_unlock(&p->mutex); 126 | } 127 | pthread_exit(0); 128 | } 129 | 130 | void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps) 131 | { 132 | ktp_t aux; 133 | pthread_t *tid; 134 | int i; 135 | 136 | if (n_threads < 1) n_threads = 1; 137 | aux.n_workers = n_threads; 138 | aux.n_steps = n_steps; 139 | aux.func = func; 140 | aux.shared = shared_data; 141 | aux.index = 0; 142 | pthread_mutex_init(&aux.mutex, 0); 143 | pthread_cond_init(&aux.cv, 0); 144 | 145 | aux.workers = (ktp_worker_t*)calloc(n_threads, sizeof(ktp_worker_t)); 146 | for (i = 0; i < n_threads; ++i) { 147 | ktp_worker_t *w = &aux.workers[i]; 148 | w->step = 0; w->pl = &aux; w->data = 0; 149 | w->index = aux.index++; 150 | } 151 | 152 | tid = (pthread_t*)calloc(n_threads, sizeof(pthread_t)); 153 | for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]); 154 | for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0); 155 | free(tid); free(aux.workers); 156 | 157 | pthread_mutex_destroy(&aux.mutex); 158 | pthread_cond_destroy(&aux.cv); 159 | } 160 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kthread.h: -------------------------------------------------------------------------------- 1 | #ifndef KTHREAD_H 2 | #define KTHREAD_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n); 9 | void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/kvec.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* 27 | An example: 28 | 29 | #include "kvec.h" 30 | int main() { 31 | kvec_t(int) array; 32 | kv_init(array); 33 | kv_push(int, array, 10); // append 34 | kv_a(int, array, 20) = 5; // dynamic 35 | kv_A(array, 20) = 4; // static 36 | kv_destroy(array); 37 | return 0; 38 | } 39 | */ 40 | 41 | /* 42 | 2008-09-22 (0.1.0): 43 | 44 | * The initial version. 45 | 46 | */ 47 | 48 | #ifndef AC_KVEC_H 49 | #define AC_KVEC_H 50 | 51 | #include 52 | #include "kalloc.h" 53 | 54 | #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 55 | 56 | #define kvec_t(type) struct { size_t n, m; type *a; } 57 | #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) 58 | #define kv_destroy(v) free((v).a) 59 | #define kv_A(v, i) ((v).a[(i)]) 60 | #define kv_pop(v) ((v).a[--(v).n]) 61 | #define kv_size(v) ((v).n) 62 | #define kv_max(v) ((v).m) 63 | 64 | #define kv_resize(type, km, v, s) do { \ 65 | if ((v).m < (s)) { \ 66 | (v).m = (s); \ 67 | kv_roundup32((v).m); \ 68 | (v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \ 69 | } \ 70 | } while (0) 71 | 72 | #define kv_copy(type, km, v1, v0) do { \ 73 | if ((v1).m < (v0).n) kv_resize(type, (km), (v1), (v0).n); \ 74 | (v1).n = (v0).n; \ 75 | memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ 76 | } while (0) \ 77 | 78 | #define kv_push(type, km, v, x) do { \ 79 | if ((v).n == (v).m) { \ 80 | (v).m = (v).m? (v).m<<1 : 2; \ 81 | (v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \ 82 | } \ 83 | (v).a[(v).n++] = (x); \ 84 | } while (0) 85 | 86 | #define kv_pushp(type, km, v, p) do { \ 87 | if ((v).n == (v).m) { \ 88 | (v).m = (v).m? (v).m<<1 : 2; \ 89 | (v).a = (type*)krealloc((km), (v).a, sizeof(type) * (v).m); \ 90 | } \ 91 | *(p) = &(v).a[(v).n++]; \ 92 | } while (0) 93 | 94 | #define kv_reverse(type, v, start) do { \ 95 | if ((v).m > 0 && (v).n > (start)) { \ 96 | size_t __i, __end = (v).n - (start); \ 97 | type *__a = (v).a + (start); \ 98 | for (__i = 0; __i < __end>>1; ++__i) { \ 99 | type __t = __a[__end - 1 - __i]; \ 100 | __a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \ 101 | } \ 102 | } \ 103 | } while (0) 104 | 105 | #endif 106 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/misc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mmpriv.h" 3 | 4 | int mm_verbose = 1; 5 | int mm_dbg_flag = 0; 6 | double mm_realtime0; 7 | 8 | #if defined(WIN32) || defined(_WIN32) 9 | #include 10 | 11 | struct timezone 12 | { 13 | __int32 tz_minuteswest; /* minutes W of Greenwich */ 14 | int tz_dsttime; /* type of dst correction */ 15 | }; 16 | 17 | /* 18 | * gettimeofday.c 19 | * Win32 gettimeofday() replacement 20 | * taken from PostgreSQL, according to 21 | * https://stackoverflow.com/questions/1676036/what-should-i-use-to-replace-gettimeofday-on-windows 22 | * 23 | * src/port/gettimeofday.c 24 | * 25 | * Copyright (c) 2003 SRA, Inc. 26 | * Copyright (c) 2003 SKC, Inc. 27 | * 28 | * Permission to use, copy, modify, and distribute this software and 29 | * its documentation for any purpose, without fee, and without a 30 | * written agreement is hereby granted, provided that the above 31 | * copyright notice and this paragraph and the following two 32 | * paragraphs appear in all copies. 33 | * 34 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, 35 | * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING 36 | * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS 37 | * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED 38 | * OF THE POSSIBILITY OF SUCH DAMAGE. 39 | * 40 | * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT 41 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 42 | * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS 43 | * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, 44 | * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 45 | */ 46 | 47 | /* FILETIME of Jan 1 1970 00:00:00. */ 48 | static const unsigned __int64 epoch = ((unsigned __int64) 116444736000000000ULL); 49 | 50 | /* 51 | * timezone information is stored outside the kernel so tzp isn't used anymore. 52 | * 53 | * Note: this function is not for Win32 high precision timing purpose. See 54 | * elapsed_time(). 55 | */ 56 | int gettimeofday(struct timeval * tp, struct timezone *tzp) 57 | { 58 | FILETIME file_time; 59 | SYSTEMTIME system_time; 60 | ULARGE_INTEGER ularge; 61 | 62 | GetSystemTime(&system_time); 63 | SystemTimeToFileTime(&system_time, &file_time); 64 | ularge.LowPart = file_time.dwLowDateTime; 65 | ularge.HighPart = file_time.dwHighDateTime; 66 | 67 | tp->tv_sec = (long) ((ularge.QuadPart - epoch) / 10000000L); 68 | tp->tv_usec = (long) (system_time.wMilliseconds * 1000); 69 | 70 | return 0; 71 | } 72 | 73 | // taken from https://stackoverflow.com/questions/5272470/c-get-cpu-usage-on-linux-and-windows 74 | double cputime() 75 | { 76 | HANDLE hProcess = GetCurrentProcess(); 77 | FILETIME ftCreation, ftExit, ftKernel, ftUser; 78 | SYSTEMTIME stKernel; 79 | SYSTEMTIME stUser; 80 | 81 | GetProcessTimes(hProcess, &ftCreation, &ftExit, &ftKernel, &ftUser); 82 | FileTimeToSystemTime(&ftKernel, &stKernel); 83 | FileTimeToSystemTime(&ftUser, &stUser); 84 | 85 | double kernelModeTime = ((stKernel.wHour * 60.) + stKernel.wMinute * 60.) + stKernel.wSecond * 1. + stKernel.wMilliseconds / 1000.; 86 | double userModeTime = ((stUser.wHour * 60.) + stUser.wMinute * 60.) + stUser.wSecond * 1. + stUser.wMilliseconds / 1000.; 87 | 88 | return kernelModeTime + userModeTime; 89 | } 90 | 91 | long peakrss(void) { return 0; } 92 | #else 93 | #include 94 | #include 95 | 96 | double cputime(void) 97 | { 98 | struct rusage r; 99 | getrusage(RUSAGE_SELF, &r); 100 | return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); 101 | } 102 | 103 | long peakrss(void) 104 | { 105 | struct rusage r; 106 | getrusage(RUSAGE_SELF, &r); 107 | #ifdef __linux__ 108 | return r.ru_maxrss * 1024; 109 | #else 110 | return r.ru_maxrss; 111 | #endif 112 | } 113 | 114 | #endif /* WIN32 || _WIN32 */ 115 | 116 | double realtime(void) 117 | { 118 | struct timeval tp; 119 | gettimeofday(&tp, NULL); 120 | return tp.tv_sec + tp.tv_usec * 1e-6; 121 | } 122 | 123 | void mm_err_puts(const char *str) 124 | { 125 | int ret; 126 | ret = puts(str); 127 | if (ret == EOF) { 128 | perror("[ERROR] failed to write the results"); 129 | exit(EXIT_FAILURE); 130 | } 131 | } 132 | 133 | void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp) 134 | { 135 | int ret; 136 | ret = fwrite(p, size, nitems, fp); 137 | if (ret == EOF) { 138 | perror("[ERROR] failed to write data"); 139 | exit(EXIT_FAILURE); 140 | } 141 | } 142 | 143 | void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp) 144 | { 145 | int ret; 146 | ret = fread(p, size, nitems, fp); 147 | if (ret == EOF) { 148 | perror("[ERROR] failed to read data"); 149 | exit(EXIT_FAILURE); 150 | } 151 | } 152 | 153 | #include "ksort.h" 154 | 155 | #define sort_key_128x(a) ((a).x) 156 | KRADIX_SORT_INIT(128x, mm128_t, sort_key_128x, 8) 157 | 158 | #define sort_key_64(x) (x) 159 | KRADIX_SORT_INIT(64, uint64_t, sort_key_64, 8) 160 | 161 | KSORT_INIT_GENERIC(uint32_t) 162 | KSORT_INIT_GENERIC(uint64_t) 163 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/misc/README.md: -------------------------------------------------------------------------------- 1 | ## Getting Started 2 | 3 | ```sh 4 | # install minimap2 5 | git clone https://github.com/lh3/minimap2 6 | cd minimap2 && make 7 | # install the k8 javascript shell 8 | curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf - 9 | cp k8-0.2.4/k8-`uname -s` k8 # or copy it to a directory on your $PATH 10 | # export PATH="$PATH:`pwd`:`pwd`/misc" # run this if k8, minimap2 or paftools.js not on your $PATH 11 | minimap2 --cs test/MT-human.fa test/MT-orang.fa | paftools.js view - # view alignment 12 | minimap2 -c test/MT-human.fa test/MT-orang.fa | paftools.js stat - # basic alignment statistics 13 | minimap2 -c --cs test/MT-human.fa test/MT-orang.fa \ 14 | | sort -k6,6 -k8,8n | paftools.js call -L15000 - # calling variants from asm-to-ref alignment 15 | minimap2 -c test/MT-human.fa test/MT-orang.fa \ 16 | | paftools.js liftover -l10000 - <(echo -e "MT_orang\t2000\t5000") # liftOver 17 | # no test data for the following examples 18 | paftools.js junceval -e anno.gtf splice.sam > out.txt # compare splice junctions to annotations 19 | paftools.js splice2bed anno.gtf > anno.bed # convert GTF/GFF3 to BED12 20 | ``` 21 | 22 | ## Table of Contents 23 | 24 | - [Getting Started](#started) 25 | - [Introduction](#intro) 26 | - [Evaluation](#eval) 27 | - [Evaluating mapping accuracy with simulated reads](#mapeval) 28 | - [Evaluating read overlap sensitivity](#oveval) 29 | - [Calling Variants from Assemblies](#asmvar) 30 | 31 | ## Introduction 32 | 33 | paftools.js is a script that processes alignments in the [PAF format][paf], 34 | such as converting between formats, evaluating mapping accuracy, lifting over 35 | BED files based on alignment, and calling variants from assembly-to-assembly 36 | alignment. This script *requires* the [k8 Javascript shell][k8] to run. On 37 | Linux or Mac, you can download the precompiled k8 binary with: 38 | 39 | ```sh 40 | curl -L https://github.com/attractivechaos/k8/releases/download/v0.2.4/k8-0.2.4.tar.bz2 | tar -jxf - 41 | cp k8-0.2.4/k8-`uname -s` $HOME/bin/k8 # assuming $HOME/bin in your $PATH 42 | ``` 43 | 44 | It is highly recommended to copy the executable `k8` to a directory on your 45 | `$PATH` such as `/usr/bin/env` can find it. Like python scripts, once you 46 | install `k8`, you can launch paftools.js in one of the two ways: 47 | 48 | ```sh 49 | path/to/paftools.js # only if k8 is on your $PATH 50 | k8 path/to/paftools.js 51 | ``` 52 | 53 | In a nutshell, paftools.js has the following commands: 54 | 55 | ``` 56 | Usage: paftools.js [arguments] 57 | Commands: 58 | view convert PAF to BLAST-like (for eyeballing) or MAF 59 | splice2bed convert spliced alignment in PAF/SAM to BED12 60 | sam2paf convert SAM to PAF 61 | delta2paf convert MUMmer's delta to PAF 62 | gff2bed convert GTF/GFF3 to BED12 63 | 64 | stat collect basic mapping information in PAF/SAM 65 | liftover simplistic liftOver 66 | call call variants from asm-to-ref alignment with the cs tag 67 | bedcov compute the number of bases covered 68 | 69 | mapeval evaluate mapping accuracy using mason2/PBSIM-simulated FASTQ 70 | mason2fq convert mason2-simulated SAM to FASTQ 71 | pbsim2fq convert PBSIM-simulated MAF to FASTQ 72 | junceval evaluate splice junction consistency with known annotations 73 | ov-eval evaluate read overlap sensitivity using read-to-ref mapping 74 | ``` 75 | 76 | paftools.js seamlessly reads both plain text files and gzip'd text files. 77 | 78 | ## Evaluation 79 | 80 | ### Evaluating mapping accuracy with simulated reads 81 | 82 | The **pbsim2fq** command of paftools.js converts the MAF output of [pbsim][pbsim] 83 | to FASTQ and encodes the true mapping position in the read name in a format like 84 | `S1_33!chr1!225258409!225267761!-`. Similarly, the **mason2fq** command 85 | converts [mason2][mason2] simulated SAM to FASTQ. 86 | 87 | Command **mapeval** evaluates mapped SAM/PAF. Here is example output: 88 | 89 | ``` 90 | Q 60 32478 0 0.000000000 32478 91 | Q 22 16 1 0.000030775 32494 92 | Q 21 43 1 0.000061468 32537 93 | Q 19 73 1 0.000091996 32610 94 | Q 14 66 1 0.000122414 32676 95 | Q 10 27 3 0.000214048 32703 96 | Q 8 14 1 0.000244521 32717 97 | Q 7 13 2 0.000305530 32730 98 | Q 6 46 1 0.000335611 32776 99 | Q 3 10 1 0.000366010 32786 100 | Q 2 20 2 0.000426751 32806 101 | Q 1 248 94 0.003267381 33054 102 | Q 0 31 17 0.003778147 33085 103 | U 3 104 | ``` 105 | 106 | where each Q-line gives the quality threshold, the number of reads mapped with 107 | mapping quality equal to or greater than the threshold, number of wrong 108 | mappings, accumulative mapping error rate and the accumulative number of 109 | mapped reads. The U-line, if present, gives the number of unmapped reads if 110 | they are present in the SAM file. 111 | 112 | Suppose the reported mapping coordinate overlap with the true coordinate like 113 | the following: 114 | 115 | ``` 116 | truth: -------------------- 117 | mapper: ---------------------- 118 | |<- l1 ->|<-- o -->|<-- l2 -->| 119 | ``` 120 | 121 | Let `r=o/(l1+o+l2)`. The reported mapping is considered correct if `r>0.1` by 122 | default. 123 | 124 | ### Evaluating read overlap sensitivity 125 | 126 | Command **ov-eval** takes *sorted* read-to-reference alignment and read 127 | overlaps in PAF as input, and evaluates the sensitivity. For example: 128 | 129 | ```sh 130 | minimap2 -cx map-pb ref.fa reads.fq.gz | sort -k6,6 -k8,8n > reads-to-ref.paf 131 | minimap2 -x ava-pb reads.fq.gz reads.fq.gz > ovlp.paf 132 | k8 ov-eval.js reads-to-ref.paf ovlp.paf 133 | ``` 134 | 135 | ## Calling Variants from Haploid Assemblies 136 | 137 | The **call** command of paftools.js calls variants from coordinate-sorted 138 | assembly-to-reference alignment. It calls variants from the [cs tag][cs] and 139 | identifies confident/callable regions as those covered by exactly one contig. 140 | Here are example command lines: 141 | 142 | ```sh 143 | minimap2 -cx asm5 -t8 --cs ref.fa asm.fa > asm.paf # keeping this file is recommended; --cs required! 144 | sort -k6,6 -k8,8n asm.paf > asm.srt.paf # sort by reference start coordinate 145 | k8 paftools.js call asm.srt.paf > asm.var.txt 146 | ``` 147 | 148 | Here is sample output: 149 | 150 | ``` 151 | V chr1 2276040 2276041 1 60 c g LJII01000171.1 1217409 1217410 + 152 | V chr1 2280409 2280410 1 60 a g LJII01000171.1 1221778 1221779 + 153 | V chr1 2280504 2280505 1 60 a g LJII01000171.1 1221873 1221874 + 154 | R chr1 2325140 2436340 155 | V chr1 2325287 2325287 1 60 - ct LJII01000171.1 1272894 1272896 + 156 | V chr1 2325642 2325644 1 60 tt - LJII01000171.1 1273251 1273251 + 157 | V chr1 2326051 2326052 1 60 c t LJII01000171.1 1273658 1273659 + 158 | V chr1 2326287 2326288 1 60 c t LJII01000171.1 1273894 1273895 + 159 | ``` 160 | 161 | where a line starting with `R` gives regions covered by one query contig, and a 162 | V-line encodes a variant in the following format: chr, start, end, query depth, 163 | mapping quality, REF allele, ALT allele, query name, query start, end and the 164 | query orientation. Generally, you should only look at variants where column 5 165 | is one. 166 | 167 | By default, when calling variants, "paftools.js call" ignores alignments 50kb 168 | or shorter; when deriving callable regions, it ignores alignments 10kb or 169 | shorter. It uses two thresholds to avoid edge effects. These defaults are 170 | designed for long-read assemblies. For short reads, both should be reduced. 171 | 172 | 173 | 174 | [paf]: https://github.com/lh3/miniasm/blob/master/PAF.md 175 | [cs]: https://github.com/lh3/minimap2#cs 176 | [k8]: https://github.com/attractivechaos/k8 177 | [maf]: https://genome.ucsc.edu/FAQ/FAQformat#format5 178 | [pbsim]: https://github.com/pfaucon/PBSIM-PacBio-Simulator 179 | [mason2]: https://github.com/seqan/seqan/tree/master/apps/mason2 180 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/mmpriv.h: -------------------------------------------------------------------------------- 1 | #ifndef MMPRIV2_H 2 | #define MMPRIV2_H 3 | 4 | #include 5 | #include "minimap.h" 6 | #include "bseq.h" 7 | #include "kseq.h" 8 | 9 | #define MM_PARENT_UNSET (-1) 10 | #define MM_PARENT_TMP_PRI (-2) 11 | 12 | #define MM_DBG_NO_KALLOC 0x1 13 | #define MM_DBG_PRINT_QNAME 0x2 14 | #define MM_DBG_PRINT_SEED 0x4 15 | #define MM_DBG_PRINT_ALN_SEQ 0x8 16 | 17 | #define MM_SEED_LONG_JOIN (1ULL<<40) 18 | #define MM_SEED_IGNORE (1ULL<<41) 19 | #define MM_SEED_TANDEM (1ULL<<42) 20 | #define MM_SEED_SELF (1ULL<<43) 21 | 22 | #define MM_SEED_SEG_SHIFT 48 23 | #define MM_SEED_SEG_MASK (0xffULL<<(MM_SEED_SEG_SHIFT)) 24 | 25 | #ifndef kroundup32 26 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 27 | #endif 28 | 29 | #define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2)) 30 | #define mm_seq4_get(s, i) ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf) 31 | 32 | #define MALLOC(type, len) ((type*)malloc((len) * sizeof(type))) 33 | #define CALLOC(type, len) ((type*)calloc((len), sizeof(type))) 34 | 35 | #ifdef __cplusplus 36 | extern "C" { 37 | #endif 38 | 39 | typedef struct { 40 | uint32_t n; 41 | uint32_t q_pos; 42 | uint32_t q_span:31, flt:1; 43 | uint32_t seg_id:31, is_tandem:1; 44 | const uint64_t *cr; 45 | } mm_seed_t; 46 | 47 | typedef struct { 48 | int n_u, n_a; 49 | uint64_t *u; 50 | mm128_t *a; 51 | } mm_seg_t; 52 | 53 | double cputime(void); 54 | double realtime(void); 55 | long peakrss(void); 56 | 57 | void radix_sort_128x(mm128_t *beg, mm128_t *end); 58 | void radix_sort_64(uint64_t *beg, uint64_t *end); 59 | uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk); 60 | 61 | void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p); 62 | 63 | mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos); 64 | 65 | double mm_event_identity(const mm_reg1_t *r); 66 | int mm_write_sam_hdr(const mm_idx_t *mi, const char *rg, const char *ver, int argc, char *argv[]); 67 | void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag); 68 | void mm_write_paf3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int64_t opt_flag, int rep_len); 69 | void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs); 70 | void mm_write_sam2(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regs, const mm_reg1_t *const* regs, void *km, int64_t opt_flag); 71 | void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int seg_idx, int reg_idx, int n_seg, const int *n_regss, const mm_reg1_t *const* regss, void *km, int64_t opt_flag, int rep_len); 72 | 73 | void mm_idxopt_init(mm_idxopt_t *opt); 74 | const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n); 75 | int32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f); 76 | int mm_idx_getseq2(const mm_idx_t *mi, int is_rev, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq); 77 | mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a); 78 | mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, mm128_t *a, int is_qstrand); 79 | 80 | mm128_t *mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float gap_scale, 81 | int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km); 82 | mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, 83 | int is_cdna, int n_segs, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km); 84 | mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, 85 | int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km); 86 | 87 | void mm_mark_alt(const mm_idx_t *mi, int n, mm_reg1_t *r); 88 | void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a, int is_qstrand); 89 | void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); 90 | int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a); 91 | int mm_set_sam_pri(int n, mm_reg1_t *r); 92 | void mm_set_parent(void *km, float mask_level, int mask_len, int n, mm_reg1_t *r, int sub_diff, int hard_mask_level, float alt_diff_frac); 93 | void mm_select_sub(void *km, float pri_ratio, int min_diff, int best_n, int *n_, mm_reg1_t *r); 94 | void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r); 95 | void mm_filter_regs(const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs); 96 | void mm_hit_sort(void *km, int *n_regs, mm_reg1_t *r, float alt_diff_frac); 97 | void mm_set_mapq(void *km, int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr); 98 | void mm_update_dp_max(int qlen, int n_regs, mm_reg1_t *regs, float frac, int a, int b); 99 | 100 | void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos); 101 | 102 | mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a); 103 | void mm_seg_free(void *km, int n_segs, mm_seg_t *segs); 104 | void mm_pair(void *km, int max_gap_ref, int dp_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs); 105 | 106 | FILE *mm_split_init(const char *prefix, const mm_idx_t *mi); 107 | mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part); 108 | int mm_split_merge(int n_segs, const char **fn, const mm_mapopt_t *opt, int n_split_idx); 109 | void mm_split_rm_tmp(const char *prefix, int n_splits); 110 | 111 | void mm_err_puts(const char *str); 112 | void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp); 113 | void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp); 114 | 115 | static inline float mg_log2(float x) // NB: this doesn't work when x<2 116 | { 117 | union { float f; uint32_t i; } z = { x }; 118 | float log_2 = ((z.i >> 23) & 255) - 128; 119 | z.i &= ~(255 << 23); 120 | z.i += 127 << 23; 121 | log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f; 122 | return log_2; 123 | } 124 | 125 | #ifdef __cplusplus 126 | } 127 | #endif 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/options.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mmpriv.h" 4 | 5 | void mm_idxopt_init(mm_idxopt_t *opt) 6 | { 7 | memset(opt, 0, sizeof(mm_idxopt_t)); 8 | opt->k = 15, opt->w = 10, opt->flag = 0; 9 | opt->bucket_bits = 14; 10 | opt->mini_batch_size = 50000000; 11 | opt->batch_size = 4000000000ULL; 12 | } 13 | 14 | void mm_mapopt_init(mm_mapopt_t *opt) 15 | { 16 | memset(opt, 0, sizeof(mm_mapopt_t)); 17 | opt->seed = 11; 18 | opt->mid_occ_frac = 2e-4f; 19 | opt->min_mid_occ = 10; 20 | opt->max_mid_occ = 1000000; 21 | opt->sdust_thres = 0; // no SDUST masking 22 | 23 | opt->min_cnt = 3; 24 | opt->min_chain_score = 40; 25 | opt->bw = 500, opt->bw_long = 20000; 26 | opt->max_gap = 5000; 27 | opt->max_gap_ref = -1; 28 | opt->max_chain_skip = 25; 29 | opt->max_chain_iter = 5000; 30 | opt->rmq_inner_dist = 1000; 31 | opt->rmq_size_cap = 100000; 32 | opt->rmq_rescue_size = 1000; 33 | opt->rmq_rescue_ratio = 0.1f; 34 | opt->chain_gap_scale = 0.8f; 35 | opt->max_max_occ = 4095; 36 | opt->occ_dist = 500; 37 | 38 | opt->mask_level = 0.5f; 39 | opt->mask_len = INT_MAX; 40 | opt->pri_ratio = 0.8f; 41 | opt->best_n = 5; 42 | 43 | opt->alt_drop = 0.15f; 44 | 45 | opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1; 46 | opt->sc_ambi = 1; 47 | opt->zdrop = 400, opt->zdrop_inv = 200; 48 | opt->end_bonus = -1; 49 | opt->min_dp_max = opt->min_chain_score * opt->a; 50 | opt->min_ksw_len = 200; 51 | opt->anchor_ext_len = 20, opt->anchor_ext_shift = 6; 52 | opt->max_clip_ratio = 1.0f; 53 | opt->mini_batch_size = 500000000; 54 | opt->max_sw_mat = 100000000; 55 | 56 | opt->rank_min_len = 500; 57 | opt->rank_frac = 0.9f; 58 | 59 | opt->pe_ori = 0; // FF 60 | opt->pe_bonus = 33; 61 | } 62 | 63 | void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) 64 | { 65 | if ((opt->flag & MM_F_SPLICE_FOR) || (opt->flag & MM_F_SPLICE_REV)) 66 | opt->flag |= MM_F_SPLICE; 67 | if (opt->mid_occ <= 0) { 68 | opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac); 69 | if (opt->mid_occ < opt->min_mid_occ) 70 | opt->mid_occ = opt->min_mid_occ; 71 | if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ) 72 | opt->mid_occ = opt->max_mid_occ; 73 | } 74 | if (mm_verbose >= 3) 75 | fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ); 76 | } 77 | 78 | void mm_mapopt_max_intron_len(mm_mapopt_t *opt, int max_intron_len) 79 | { 80 | if ((opt->flag & MM_F_SPLICE) && max_intron_len > 0) 81 | opt->max_gap_ref = opt->bw = opt->bw_long = max_intron_len; 82 | } 83 | 84 | int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) 85 | { 86 | if (preset == 0) { 87 | mm_idxopt_init(io); 88 | mm_mapopt_init(mo); 89 | } else if (strcmp(preset, "map-ont") == 0) { // this is the same as the default 90 | } else if (strcmp(preset, "ava-ont") == 0) { 91 | io->flag = 0, io->k = 15, io->w = 5; 92 | mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN; 93 | mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25; 94 | mo->bw = mo->bw_long = 2000; 95 | mo->occ_dist = 0; 96 | } else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) { 97 | io->flag |= MM_I_HPC, io->k = 19; 98 | } else if (strcmp(preset, "ava-pb") == 0) { 99 | io->flag |= MM_I_HPC, io->k = 19, io->w = 5; 100 | mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN; 101 | mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25; 102 | mo->bw_long = mo->bw; 103 | mo->occ_dist = 0; 104 | } else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) { 105 | io->flag = 0, io->k = 19, io->w = 19; 106 | mo->max_gap = 10000; 107 | mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1; 108 | mo->occ_dist = 500; 109 | mo->min_mid_occ = 50, mo->max_mid_occ = 500; 110 | mo->min_dp_max = 200; 111 | } else if (strncmp(preset, "asm", 3) == 0) { 112 | io->flag = 0, io->k = 19, io->w = 19; 113 | mo->bw = mo->bw_long = 100000; 114 | mo->max_gap = 10000; 115 | mo->flag |= MM_F_RMQ; 116 | mo->min_mid_occ = 50, mo->max_mid_occ = 500; 117 | mo->min_dp_max = 200; 118 | mo->best_n = 50; 119 | if (strcmp(preset, "asm5") == 0) { 120 | mo->a = 1, mo->b = 19, mo->q = 39, mo->q2 = 81, mo->e = 3, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; 121 | } else if (strcmp(preset, "asm10") == 0) { 122 | mo->a = 1, mo->b = 9, mo->q = 16, mo->q2 = 41, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; 123 | } else if (strcmp(preset, "asm20") == 0) { 124 | mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1, mo->zdrop = mo->zdrop_inv = 200; 125 | io->w = 10; 126 | } else return -1; 127 | } else if (strcmp(preset, "short") == 0 || strcmp(preset, "sr") == 0) { 128 | io->flag = 0, io->k = 21, io->w = 11; 129 | mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND | MM_F_2_IO_THREADS | MM_F_HEAP_SORT; 130 | mo->pe_ori = 0<<1|1; // FR 131 | mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 24, mo->e2 = 1; 132 | mo->zdrop = mo->zdrop_inv = 100; 133 | mo->end_bonus = 10; 134 | mo->max_frag_len = 800; 135 | mo->max_gap = 100; 136 | mo->bw = mo->bw_long = 100; 137 | mo->pri_ratio = 0.5f; 138 | mo->min_cnt = 2; 139 | mo->min_chain_score = 25; 140 | mo->min_dp_max = 40; 141 | mo->best_n = 20; 142 | mo->mid_occ = 1000; 143 | mo->max_occ = 5000; 144 | mo->mini_batch_size = 50000000; 145 | } else if (strncmp(preset, "splice", 6) == 0 || strcmp(preset, "cdna") == 0) { 146 | io->flag = 0, io->k = 15, io->w = 5; 147 | mo->flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV | MM_F_SPLICE_FLANK; 148 | mo->max_sw_mat = 0; 149 | mo->max_gap = 2000, mo->max_gap_ref = mo->bw = mo->bw_long = 200000; 150 | mo->a = 1, mo->b = 2, mo->q = 2, mo->e = 1, mo->q2 = 32, mo->e2 = 0; 151 | mo->noncan = 9; 152 | mo->junc_bonus = 9; 153 | mo->zdrop = 200, mo->zdrop_inv = 100; // because mo->a is halved 154 | if (strcmp(preset, "splice:hq") == 0) 155 | mo->junc_bonus = 5, mo->b = 4, mo->q = 6, mo->q2 = 24; 156 | } else return -1; 157 | return 0; 158 | } 159 | 160 | int mm_check_opt(const mm_idxopt_t *io, const mm_mapopt_t *mo) 161 | { 162 | if (mo->bw > mo->bw_long) { 163 | if (mm_verbose >= 1) 164 | fprintf(stderr, "[ERROR]\033[1;31m with '-rNUM1,NUM2', NUM1 (%d) can't be larger than NUM2 (%d)\033[0m\n", mo->bw, mo->bw_long); 165 | return -8; 166 | } 167 | if ((mo->flag & MM_F_RMQ) && (mo->flag & (MM_F_SR|MM_F_SPLICE))) { 168 | if (mm_verbose >= 1) 169 | fprintf(stderr, "[ERROR]\033[1;31m --rmq doesn't work with --sr or --splice\033[0m\n"); 170 | return -7; 171 | } 172 | if (mo->split_prefix && (mo->flag & (MM_F_OUT_CS|MM_F_OUT_MD))) { 173 | if (mm_verbose >= 1) 174 | fprintf(stderr, "[ERROR]\033[1;31m --cs or --MD doesn't work with --split-prefix\033[0m\n"); 175 | return -6; 176 | } 177 | if (io->k <= 0 || io->w <= 0) { 178 | if (mm_verbose >= 1) 179 | fprintf(stderr, "[ERROR]\033[1;31m -k and -w must be positive\033[0m\n"); 180 | return -5; 181 | } 182 | if (mo->best_n < 0) { 183 | if (mm_verbose >= 1) 184 | fprintf(stderr, "[ERROR]\033[1;31m -N must be no less than 0\033[0m\n"); 185 | return -4; 186 | } 187 | if (mo->best_n == 0 && mm_verbose >= 2) 188 | fprintf(stderr, "[WARNING]\033[1;31m '-N 0' reduces mapping accuracy. Please use '--secondary=no' instead.\033[0m\n"); 189 | if (mo->pri_ratio < 0.0f || mo->pri_ratio > 1.0f) { 190 | if (mm_verbose >= 1) 191 | fprintf(stderr, "[ERROR]\033[1;31m -p must be within 0 and 1 (including 0 and 1)\033[0m\n"); 192 | return -4; 193 | } 194 | if ((mo->flag & MM_F_FOR_ONLY) && (mo->flag & MM_F_REV_ONLY)) { 195 | if (mm_verbose >= 1) 196 | fprintf(stderr, "[ERROR]\033[1;31m --for-only and --rev-only can't be applied at the same time\033[0m\n"); 197 | return -3; 198 | } 199 | if (mo->e <= 0 || mo->q <= 0) { 200 | if (mm_verbose >= 1) 201 | fprintf(stderr, "[ERROR]\033[1;31m -O and -E must be positive\033[0m\n"); 202 | return -1; 203 | } 204 | if ((mo->q != mo->q2 || mo->e != mo->e2) && !(mo->e > mo->e2 && mo->q + mo->e < mo->q2 + mo->e2)) { 205 | if (mm_verbose >= 1) 206 | fprintf(stderr, "[ERROR]\033[1;31m dual gap penalties violating E1>E2 and O1+E1q + mo->e) + (mo->q2 + mo->e2) > 127) { 210 | if (mm_verbose >= 1) 211 | fprintf(stderr, "[ERROR]\033[1;31m scoring system violating ({-O}+{-E})+({-O2}+{-E2}) <= 127\033[0m\n"); 212 | return -1; 213 | } 214 | if (mo->zdrop < mo->zdrop_inv) { 215 | if (mm_verbose >= 1) 216 | fprintf(stderr, "[ERROR]\033[1;31m Z-drop should not be less than inversion-Z-drop\033[0m\n"); 217 | return -5; 218 | } 219 | if ((mo->flag & MM_F_NO_PRINT_2ND) && (mo->flag & MM_F_ALL_CHAINS)) { 220 | if (mm_verbose >= 1) 221 | fprintf(stderr, "[ERROR]\033[1;31m -X/-P and --secondary=no can't be applied at the same time\033[0m\n"); 222 | return -5; 223 | } 224 | if ((mo->flag & MM_F_QSTRAND) && ((mo->flag & (MM_F_OUT_SAM|MM_F_SPLICE|MM_F_FRAG_MODE)) || (io->flag & MM_I_HPC))) { 225 | if (mm_verbose >= 1) 226 | fprintf(stderr, "[ERROR]\033[1;31m --qstrand doesn't work with -a, -H, --frag or --splice\033[0m\n"); 227 | return -5; 228 | } 229 | return 0; 230 | } 231 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/pe.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mmpriv.h" 4 | #include "kvec.h" 5 | 6 | void mm_select_sub_multi(void *km, float pri_ratio, float pri1, float pri2, int max_gap_ref, int min_diff, int best_n, int n_segs, const int *qlens, int *n_, mm_reg1_t *r) 7 | { 8 | if (pri_ratio > 0.0f && *n_ > 0) { 9 | int i, k, n = *n_, n_2nd = 0; 10 | int max_dist = n_segs == 2? qlens[0] + qlens[1] + max_gap_ref : 0; 11 | for (i = k = 0; i < n; ++i) { 12 | int to_keep = 0; 13 | if (r[i].parent == i) { // primary 14 | to_keep = 1; 15 | } else if (r[i].score + min_diff >= r[r[i].parent].score) { 16 | to_keep = 1; 17 | } else { 18 | mm_reg1_t *p = &r[r[i].parent], *q = &r[i]; 19 | if (p->rev == q->rev && p->rid == q->rid && q->re - p->rs < max_dist && p->re - q->rs < max_dist) { // child and parent are close on the ref 20 | if (q->score >= p->score * pri1) 21 | to_keep = 1; 22 | } else { 23 | int is_par_both = (n_segs == 2 && p->qs < qlens[0] && p->qe > qlens[0]); 24 | int is_chi_both = (n_segs == 2 && q->qs < qlens[0] && q->qe > qlens[0]); 25 | if (is_chi_both || is_chi_both == is_par_both) { 26 | if (q->score >= p->score * pri_ratio) 27 | to_keep = 1; 28 | } else { // the remaining case: is_chi_both == 0 && is_par_both == 1 29 | if (q->score >= p->score * pri2) 30 | to_keep = 1; 31 | } 32 | } 33 | } 34 | if (to_keep && r[i].parent != i) { 35 | if (n_2nd++ >= best_n) to_keep = 0; // don't keep if there are too many secondary hits 36 | } 37 | if (to_keep) r[k++] = r[i]; 38 | else if (r[i].p) free(r[i].p); 39 | } 40 | if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync() 41 | *n_ = k; 42 | } 43 | } 44 | 45 | void mm_set_pe_thru(const int *qlens, int *n_regs, mm_reg1_t **regs) 46 | { 47 | int s, i, n_pri[2], pri[2]; 48 | n_pri[0] = n_pri[1] = 0; 49 | pri[0] = pri[1] = -1; 50 | for (s = 0; s < 2; ++s) 51 | for (i = 0; i < n_regs[s]; ++i) 52 | if (regs[s][i].id == regs[s][i].parent) 53 | ++n_pri[s], pri[s] = i; 54 | if (n_pri[0] == 1 && n_pri[1] == 1) { 55 | mm_reg1_t *p = ®s[0][pri[0]]; 56 | mm_reg1_t *q = ®s[1][pri[1]]; 57 | if (p->rid == q->rid && p->rev == q->rev && abs(p->rs - q->rs) < 3 && abs(p->re - q->re) < 3 58 | && ((p->qs == 0 && qlens[1] - q->qe == 0) || (q->qs == 0 && qlens[0] - p->qe == 0))) 59 | { 60 | p->pe_thru = q->pe_thru = 1; 61 | } 62 | } 63 | } 64 | 65 | #include "ksort.h" 66 | 67 | typedef struct { 68 | int s, rev; 69 | uint64_t key; 70 | mm_reg1_t *r; 71 | } pair_arr_t; 72 | 73 | #define sort_key_pair(a) ((a).key) 74 | KRADIX_SORT_INIT(pair, pair_arr_t, sort_key_pair, 8) 75 | 76 | void mm_pair(void *km, int max_gap_ref, int pe_bonus, int sub_diff, int match_sc, const int *qlens, int *n_regs, mm_reg1_t **regs) 77 | { 78 | int i, j, s, n, last[2], dp_thres, segs = 0, max_idx[2]; 79 | int64_t max; 80 | pair_arr_t *a; 81 | kvec_t(uint64_t) sc = {0,0,0}; 82 | 83 | a = (pair_arr_t*)kmalloc(km, (n_regs[0] + n_regs[1]) * sizeof(pair_arr_t)); 84 | for (s = n = 0, dp_thres = 0; s < 2; ++s) { 85 | int max = 0; 86 | for (i = 0; i < n_regs[s]; ++i) { 87 | a[n].s = s; 88 | a[n].r = ®s[s][i]; 89 | a[n].rev = a[n].r->rev; 90 | a[n].key = (uint64_t)a[n].r->rid << 32 | a[n].r->rs<<1 | (s^a[n].rev); 91 | max = max > a[n].r->p->dp_max? max : a[n].r->p->dp_max; 92 | ++n; 93 | segs |= 1<rid != q->rid || r->rs - q->re > max_gap_ref) continue; 116 | for (j = last[a[i].rev]; j >= 0; --j) { 117 | int64_t score; 118 | if (a[j].rev != a[i].rev || a[j].s == a[i].s) continue; 119 | q = a[j].r; 120 | if (r->rid != q->rid || r->rs - q->re > max_gap_ref) break; 121 | if (r->p->dp_max + q->p->dp_max < dp_thres) continue; 122 | score = (int64_t)(r->p->dp_max + q->p->dp_max) << 32 | (r->hash + q->hash); 123 | if (score > max) 124 | max = score, max_idx[a[j].s] = j, max_idx[a[i].s] = i; 125 | kv_push(uint64_t, km, sc, score); 126 | } 127 | } else { // forward first read or reverse second read 128 | last[a[i].rev] = i; 129 | } 130 | } 131 | if (sc.n > 1) 132 | radix_sort_64(sc.a, sc.a + sc.n); 133 | 134 | if (sc.n > 0 && max > 0) { // found at least one pair 135 | int n_sub = 0, mapq_pe; 136 | mm_reg1_t *r[2]; 137 | r[0] = a[max_idx[0]].r, r[1] = a[max_idx[1]].r; 138 | r[0]->proper_frag = r[1]->proper_frag = 1; 139 | for (s = 0; s < 2; ++s) { 140 | if (r[s]->id != r[s]->parent) { // then lift to primary and update parent 141 | mm_reg1_t *p = ®s[s][r[s]->parent]; 142 | for (i = 0; i < n_regs[s]; ++i) 143 | if (regs[s][i].parent == p->id) 144 | regs[s][i].parent = r[s]->id; 145 | p->mapq = 0; 146 | } 147 | if (!r[s]->sam_pri) { // then sync sam_pri 148 | for (i = 0; i < n_regs[s]; ++i) 149 | regs[s][i].sam_pri = 0; 150 | r[s]->sam_pri = 1; 151 | } 152 | } 153 | mapq_pe = r[0]->mapq > r[1]->mapq? r[0]->mapq : r[1]->mapq; 154 | for (i = 0; i < (int)sc.n; ++i) 155 | if ((sc.a[i]>>32) + sub_diff >= (uint64_t)max>>32) 156 | ++n_sub; 157 | if (sc.n > 1) { 158 | int mapq_pe_alt; 159 | mapq_pe_alt = (int)(6.02f * ((max>>32) - (sc.a[sc.n - 2]>>32)) / match_sc - 4.343f * logf(n_sub)); // n_sub > 0 because it counts the optimal, too 160 | mapq_pe = mapq_pe < mapq_pe_alt? mapq_pe : mapq_pe_alt; 161 | } 162 | if (r[0]->mapq < mapq_pe) r[0]->mapq = (int)(.2f * r[0]->mapq + .8f * mapq_pe + .499f); 163 | if (r[1]->mapq < mapq_pe) r[1]->mapq = (int)(.2f * r[1]->mapq + .8f * mapq_pe + .499f); 164 | if (sc.n == 1) { 165 | if (r[0]->mapq < 2) r[0]->mapq = 2; 166 | if (r[1]->mapq < 2) r[1]->mapq = 2; 167 | } else if ((uint64_t)max>>32 > sc.a[sc.n - 2]>>32) { 168 | if (r[0]->mapq < 1) r[0]->mapq = 1; 169 | if (r[1]->mapq < 1) r[1]->mapq = 1; 170 | } 171 | } 172 | 173 | kfree(km, a); 174 | kfree(km, sc.a); 175 | 176 | mm_set_pe_thru(qlens, n_regs, regs); 177 | } 178 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/python/README.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | Mappy: Minimap2 Python Binding 3 | ============================== 4 | 5 | Mappy provides a convenient interface to `minimap2 6 | `_, a fast and accurate C program to align 7 | genomic and transcribe nucleotide sequences. 8 | 9 | Installation 10 | ------------ 11 | 12 | Mappy depends on `zlib `_. It can be installed with `pip 13 | `_: 14 | 15 | .. code:: shell 16 | 17 | pip install --user mappy 18 | 19 | or from the minimap2 github repo (`Cython `_ required): 20 | 21 | .. code:: shell 22 | 23 | git clone https://github.com/lh3/minimap2 24 | cd minimap2 25 | python setup.py install 26 | 27 | Usage 28 | ----- 29 | 30 | The following Python script demonstrates the key functionality of mappy: 31 | 32 | .. code:: python 33 | 34 | import mappy as mp 35 | a = mp.Aligner("test/MT-human.fa") # load or build index 36 | if not a: raise Exception("ERROR: failed to load/build index") 37 | s = a.seq("MT_human", 100, 200) # retrieve a subsequence from the index 38 | print(mp.revcomp(s)) # reverse complement 39 | for name, seq, qual in mp.fastx_read("test/MT-orang.fa"): # read a fasta/q sequence 40 | for hit in a.map(seq): # traverse alignments 41 | print("{}\t{}\t{}\t{}".format(hit.ctg, hit.r_st, hit.r_en, hit.cigar_str)) 42 | 43 | APIs 44 | ---- 45 | 46 | Mappy implements two classes and two global function. 47 | 48 | Class mappy.Aligner 49 | ~~~~~~~~~~~~~~~~~~~ 50 | 51 | .. code:: python 52 | 53 | mappy.Aligner(fn_idx_in=None, preset=None, ...) 54 | 55 | This constructor accepts the following arguments: 56 | 57 | * **fn_idx_in**: index or sequence file name. Minimap2 automatically tests the 58 | file type. If a sequence file is provided, minimap2 builds an index. The 59 | sequence file can be optionally gzip'd. This option has no effect if **seq** 60 | is set. 61 | 62 | * **seq**: a single sequence to index. The sequence name will be set to 63 | :code:`N/A`. 64 | 65 | * **preset**: minimap2 preset. Currently, minimap2 supports the following 66 | presets: **sr** for single-end short reads; **map-pb** for PacBio 67 | read-to-reference mapping; **map-ont** for Oxford Nanopore read mapping; 68 | **splice** for long-read spliced alignment; **asm5** for assembly-to-assembly 69 | alignment; **asm10** for full genome alignment of closely related species. Note 70 | that the Python module does not support all-vs-all read overlapping. 71 | 72 | * **k**: k-mer length, no larger than 28 73 | 74 | * **w**: minimizer window size, no larger than 255 75 | 76 | * **min_cnt**: mininum number of minimizers on a chain 77 | 78 | * **min_chain_score**: minimum chaing score 79 | 80 | * **bw**: chaining and alignment band width 81 | 82 | * **best_n**: max number of alignments to return 83 | 84 | * **n_threads**: number of indexing threads; 3 by default 85 | 86 | * **extra_flags**: additional flags defined in minimap.h 87 | 88 | * **fn_idx_out**: name of file to which the index is written. This parameter 89 | has no effect if **seq** is set. 90 | 91 | * **scoring**: scoring system. It is a tuple/list consisting of 4, 6 or 7 92 | positive integers. The first 4 elements specify match scoring, mismatch 93 | penalty, gap open and gap extension penalty. The 5th and 6th elements, if 94 | present, set long-gap open and long-gap extension penalty. The 7th sets a 95 | mismatch penalty involving ambiguous bases. 96 | 97 | .. code:: python 98 | 99 | mappy.Aligner.map(seq, seq2=None, cs=False, MD=False) 100 | 101 | This method aligns :code:`seq` against the index. It is a generator, *yielding* 102 | a series of :code:`mappy.Alignment` objects. If :code:`seq2` is present, mappy 103 | performs paired-end alignment, assuming the two ends are in the FR orientation. 104 | Alignments of the two ends can be distinguished by the :code:`read_num` field 105 | (see Class mappy.Alignment below). Argument :code:`cs` asks mappy to generate 106 | the :code:`cs` tag; :code:`MD` is similar. These two arguments might slightly 107 | degrade performance and are not enabled by default. 108 | 109 | .. code:: python 110 | 111 | mappy.Aligner.seq(name, start=0, end=0x7fffffff) 112 | 113 | This method retrieves a (sub)sequence from the index and returns it as a Python 114 | string. :code:`None` is returned if :code:`name` is not present in the index or 115 | the start/end coordinates are invalid. 116 | 117 | .. code:: python 118 | 119 | mappy.Aligner.seq_names 120 | 121 | This property gives the array of sequence names in the index. 122 | 123 | Class mappy.Alignment 124 | ~~~~~~~~~~~~~~~~~~~~~ 125 | 126 | This class describes an alignment. An object of this class has the following 127 | properties: 128 | 129 | * **ctg**: name of the reference sequence the query is mapped to 130 | 131 | * **ctg_len**: total length of the reference sequence 132 | 133 | * **r_st** and **r_en**: start and end positions on the reference 134 | 135 | * **q_st** and **q_en**: start and end positions on the query 136 | 137 | * **strand**: +1 if on the forward strand; -1 if on the reverse strand 138 | 139 | * **mapq**: mapping quality 140 | 141 | * **blen**: length of the alignment, including both alignment matches and gaps 142 | but excluding ambiguous bases. 143 | 144 | * **mlen**: length of the matching bases in the alignment, excluding ambiguous 145 | base matches. 146 | 147 | * **NM**: number of mismatches, gaps and ambiguous positions in the alignment 148 | 149 | * **trans_strand**: transcript strand. +1 if on the forward strand; -1 if on the 150 | reverse strand; 0 if unknown 151 | 152 | * **is_primary**: if the alignment is primary (typically the best and the first 153 | to generate) 154 | 155 | * **read_num**: read number that the alignment corresponds to; 1 for the first 156 | read and 2 for the second read 157 | 158 | * **cigar_str**: CIGAR string 159 | 160 | * **cigar**: CIGAR returned as an array of shape :code:`(n_cigar,2)`. The two 161 | numbers give the length and the operator of each CIGAR operation. 162 | 163 | * **MD**: the :code:`MD` tag as in the SAM format. It is an empty string unless 164 | the :code:`MD` argument is applied when calling :code:`mappy.Aligner.map()`. 165 | 166 | * **cs**: the :code:`cs` tag. 167 | 168 | An :code:`Alignment` object can be converted to a string with :code:`str()` in 169 | the following format: 170 | 171 | :: 172 | 173 | q_st q_en strand ctg ctg_len r_st r_en mlen blen mapq cg:Z:cigar_str 174 | 175 | It is effectively the PAF format without the QueryName and QueryLength columns 176 | (the first two columns in PAF). 177 | 178 | Miscellaneous Functions 179 | ~~~~~~~~~~~~~~~~~~~~~~~ 180 | 181 | .. code:: python 182 | 183 | mappy.fastx_read(fn, read_comment=False) 184 | 185 | This generator function opens a FASTA/FASTQ file and *yields* a 186 | :code:`(name,seq,qual)` tuple for each sequence entry. The input file may be 187 | optionally gzip'd. If :code:`read_comment` is True, this generator yields 188 | a :code:`(name,seq,qual,comment)` tuple instead. 189 | 190 | .. code:: python 191 | 192 | mappy.revcomp(seq) 193 | 194 | Return the reverse complement of DNA string :code:`seq`. This function 195 | recognizes IUB code and preserves the letter cases. Uracil :code:`U` is 196 | complemented to :code:`A`. 197 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/python/cmappy.h: -------------------------------------------------------------------------------- 1 | #ifndef CMAPPY_H 2 | #define CMAPPY_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "minimap.h" 8 | #include "kseq.h" 9 | KSEQ_DECLARE(gzFile) 10 | 11 | typedef struct { 12 | const char *ctg; 13 | int32_t ctg_start, ctg_end; 14 | int32_t qry_start, qry_end; 15 | int32_t blen, mlen, NM, ctg_len; 16 | uint8_t mapq, is_primary; 17 | int8_t strand, trans_strand; 18 | int32_t seg_id; 19 | int32_t n_cigar32; 20 | uint32_t *cigar32; 21 | } mm_hitpy_t; 22 | 23 | static inline void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h) 24 | { 25 | h->ctg = mi->seq[r->rid].name; 26 | h->ctg_len = mi->seq[r->rid].len; 27 | h->ctg_start = r->rs, h->ctg_end = r->re; 28 | h->qry_start = r->qs, h->qry_end = r->qe; 29 | h->strand = r->rev? -1 : 1; 30 | h->mapq = r->mapq; 31 | h->mlen = r->mlen; 32 | h->blen = r->blen; 33 | h->NM = r->blen - r->mlen + r->p->n_ambi; 34 | h->trans_strand = r->p->trans_strand == 1? 1 : r->p->trans_strand == 2? -1 : 0; 35 | h->is_primary = (r->id == r->parent); 36 | h->seg_id = r->seg_id; 37 | h->n_cigar32 = r->p->n_cigar; 38 | h->cigar32 = r->p->cigar; 39 | } 40 | 41 | static inline void mm_free_reg1(mm_reg1_t *r) 42 | { 43 | free(r->p); 44 | } 45 | 46 | static inline kseq_t *mm_fastx_open(const char *fn) 47 | { 48 | gzFile fp; 49 | fp = fn && strcmp(fn, "-") != 0? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); 50 | return kseq_init(fp); 51 | } 52 | 53 | static inline void mm_fastx_close(kseq_t *ks) 54 | { 55 | gzFile fp; 56 | fp = ks->f->f; 57 | kseq_destroy(ks); 58 | gzclose(fp); 59 | } 60 | 61 | static inline int mm_verbose_level(int v) 62 | { 63 | if (v >= 0) mm_verbose = v; 64 | return mm_verbose; 65 | } 66 | 67 | static inline void mm_reset_timer(void) 68 | { 69 | extern double realtime(void); 70 | mm_realtime0 = realtime(); 71 | } 72 | 73 | extern unsigned char seq_comp_table[256]; 74 | static inline mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) 75 | { 76 | mm_reg1_t *r; 77 | 78 | Py_BEGIN_ALLOW_THREADS 79 | if (seq2 == 0) { 80 | r = mm_map(mi, strlen(seq1), seq1, n_regs, b, opt, NULL); 81 | } else { 82 | int _n_regs[2]; 83 | mm_reg1_t *regs[2]; 84 | char *seq[2]; 85 | int i, len[2]; 86 | 87 | len[0] = strlen(seq1); 88 | len[1] = strlen(seq2); 89 | seq[0] = (char*)seq1; 90 | seq[1] = strdup(seq2); 91 | for (i = 0; i < len[1]>>1; ++i) { 92 | int t = seq[1][len[1] - i - 1]; 93 | seq[1][len[1] - i - 1] = seq_comp_table[(uint8_t)seq[1][i]]; 94 | seq[1][i] = seq_comp_table[t]; 95 | } 96 | if (len[1]&1) seq[1][len[1]>>1] = seq_comp_table[(uint8_t)seq[1][len[1]>>1]]; 97 | mm_map_frag(mi, 2, len, (const char**)seq, _n_regs, regs, b, opt, NULL); 98 | for (i = 0; i < _n_regs[1]; ++i) 99 | regs[1][i].rev = !regs[1][i].rev; 100 | *n_regs = _n_regs[0] + _n_regs[1]; 101 | regs[0] = (mm_reg1_t*)realloc(regs[0], sizeof(mm_reg1_t) * (*n_regs)); 102 | memcpy(®s[0][_n_regs[0]], regs[1], _n_regs[1] * sizeof(mm_reg1_t)); 103 | free(regs[1]); 104 | r = regs[0]; 105 | } 106 | Py_END_ALLOW_THREADS 107 | 108 | return r; 109 | } 110 | 111 | static inline char *mappy_revcomp(int len, const uint8_t *seq) 112 | { 113 | int i; 114 | char *rev; 115 | rev = (char*)malloc(len + 1); 116 | for (i = 0; i < len; ++i) 117 | rev[len - i - 1] = seq_comp_table[seq[i]]; 118 | rev[len] = 0; 119 | return rev; 120 | } 121 | 122 | static char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *len) 123 | { 124 | int i, rid; 125 | char *s; 126 | *len = 0; 127 | rid = mm_idx_name2id(mi, name); 128 | if (rid < 0) return 0; 129 | if ((uint32_t)st >= mi->seq[rid].len || st >= en) return 0; 130 | if (en < 0 || (uint32_t)en > mi->seq[rid].len) 131 | en = mi->seq[rid].len; 132 | s = (char*)malloc(en - st + 1); 133 | *len = mm_idx_getseq(mi, rid, st, en, (uint8_t*)s); 134 | for (i = 0; i < *len; ++i) 135 | s[i] = "ACGTN"[(uint8_t)s[i]]; 136 | s[*len] = 0; 137 | return s; 138 | } 139 | 140 | static mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int len) 141 | { 142 | const char *fake_name = "N/A"; 143 | char *s; 144 | mm_idx_t *mi; 145 | s = (char*)calloc(len + 1, 1); 146 | memcpy(s, seq, len); 147 | mi = mm_idx_str(w, k, is_hpc, bucket_bits, 1, (const char**)&s, (const char**)&fake_name); 148 | free(s); 149 | return mi; 150 | } 151 | 152 | #endif 153 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/python/cmappy.pxd: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport int8_t, uint8_t, int32_t, int64_t, uint32_t, uint64_t 2 | 3 | cdef extern from "minimap.h": 4 | # 5 | # Options 6 | # 7 | ctypedef struct mm_idxopt_t: 8 | short k, w, flag, bucket_bits 9 | int64_t mini_batch_size 10 | uint64_t batch_size 11 | 12 | ctypedef struct mm_mapopt_t: 13 | int64_t flag 14 | int seed 15 | int sdust_thres 16 | 17 | int max_qlen 18 | 19 | int bw, bw_long 20 | int max_gap, max_gap_ref 21 | int max_frag_len 22 | int max_chain_skip, max_chain_iter 23 | int min_cnt 24 | int min_chain_score 25 | float chain_gap_scale 26 | int rmq_size_cap, rmq_inner_dist 27 | int rmq_rescue_size 28 | float rmq_rescue_ratio 29 | 30 | float mask_level 31 | int mask_len 32 | float pri_ratio 33 | int best_n 34 | 35 | float alt_drop 36 | 37 | int a, b, q, e, q2, e2 38 | int sc_ambi 39 | int noncan 40 | int junc_bonus 41 | int zdrop, zdrop_inv 42 | int end_bonus 43 | int min_dp_max 44 | int min_ksw_len 45 | int anchor_ext_len, anchor_ext_shift 46 | float max_clip_ratio 47 | 48 | int rank_min_len 49 | float rank_frac 50 | 51 | int pe_ori, pe_bonus 52 | 53 | float mid_occ_frac 54 | int32_t min_mid_occ 55 | int32_t mid_occ 56 | int32_t max_occ 57 | int64_t mini_batch_size 58 | int64_t max_sw_mat 59 | int64_t cap_kalloc 60 | 61 | const char *split_prefix 62 | 63 | int mm_set_opt(char *preset, mm_idxopt_t *io, mm_mapopt_t *mo) 64 | int mm_verbose 65 | 66 | # 67 | # Indexing 68 | # 69 | ctypedef struct mm_idx_seq_t: 70 | char *name 71 | uint64_t offset 72 | uint32_t len 73 | 74 | ctypedef struct mm_idx_bucket_t: 75 | pass 76 | 77 | ctypedef struct mm_idx_t: 78 | int32_t b, w, k, flag 79 | uint32_t n_seq 80 | mm_idx_seq_t *seq 81 | uint32_t *S 82 | mm_idx_bucket_t *B 83 | void *km 84 | void *h 85 | 86 | ctypedef struct mm_idx_reader_t: 87 | pass 88 | 89 | mm_idx_reader_t *mm_idx_reader_open(const char *fn, const mm_idxopt_t *opt, const char *fn_out) 90 | mm_idx_t *mm_idx_reader_read(mm_idx_reader_t *r, int n_threads) 91 | void mm_idx_reader_close(mm_idx_reader_t *r) 92 | void mm_idx_destroy(mm_idx_t *mi) 93 | void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) 94 | 95 | int mm_idx_index_name(mm_idx_t *mi) 96 | 97 | # 98 | # Mapping (key struct defined in cmappy.h below) 99 | # 100 | ctypedef struct mm_reg1_t: 101 | pass 102 | 103 | ctypedef struct mm_tbuf_t: 104 | pass 105 | 106 | mm_tbuf_t *mm_tbuf_init() 107 | void mm_tbuf_destroy(mm_tbuf_t *b) 108 | void *mm_tbuf_get_km(mm_tbuf_t *b) 109 | int mm_gen_cs(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq, int no_iden) 110 | int mm_gen_MD(void *km, char **buf, int *max_len, const mm_idx_t *mi, const mm_reg1_t *r, const char *seq) 111 | 112 | # 113 | # Helper header (because it is hard to expose mm_reg1_t with Cython) 114 | # 115 | cdef extern from "cmappy.h": 116 | ctypedef struct mm_hitpy_t: 117 | const char *ctg 118 | int32_t ctg_start, ctg_end 119 | int32_t qry_start, qry_end 120 | int32_t blen, mlen, NM, ctg_len 121 | uint8_t mapq, is_primary 122 | int8_t strand, trans_strand 123 | int32_t seg_id 124 | int32_t n_cigar32 125 | uint32_t *cigar32 126 | 127 | void mm_reg2hitpy(const mm_idx_t *mi, mm_reg1_t *r, mm_hitpy_t *h) 128 | void mm_free_reg1(mm_reg1_t *r) 129 | mm_reg1_t *mm_map_aux(const mm_idx_t *mi, const char *seq1, const char *seq2, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt) 130 | char *mappy_fetch_seq(const mm_idx_t *mi, const char *name, int st, int en, int *l) 131 | mm_idx_t *mappy_idx_seq(int w, int k, int is_hpc, int bucket_bits, const char *seq, int l) 132 | 133 | ctypedef struct kstring_t: 134 | unsigned l, m 135 | char *s 136 | 137 | ctypedef struct kstream_t: 138 | pass 139 | 140 | ctypedef struct kseq_t: 141 | kstring_t name, comment, seq, qual 142 | int last_char 143 | kstream_t *f 144 | 145 | kseq_t *mm_fastx_open(const char *fn) 146 | void mm_fastx_close(kseq_t *ks) 147 | int kseq_read(kseq_t *seq) 148 | 149 | char *mappy_revcomp(int l, const uint8_t *seq) 150 | int mm_verbose_level(int v) 151 | void mm_reset_timer() 152 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/python/mappy.pyx: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport uint8_t, int8_t 2 | from libc.stdlib cimport free 3 | cimport cmappy 4 | import sys 5 | 6 | __version__ = '2.22' 7 | 8 | cmappy.mm_reset_timer() 9 | 10 | cdef class Alignment: 11 | cdef int _ctg_len, _r_st, _r_en 12 | cdef int _q_st, _q_en 13 | cdef int _NM, _mlen, _blen 14 | cdef int8_t _strand, _trans_strand 15 | cdef uint8_t _mapq, _is_primary 16 | cdef int _seg_id 17 | cdef _ctg, _cigar, _cs, _MD # these are python objects 18 | 19 | def __cinit__(self, ctg, cl, cs, ce, strand, qs, qe, mapq, cigar, is_primary, mlen, blen, NM, trans_strand, seg_id, cs_str, MD_str): 20 | self._ctg = ctg if isinstance(ctg, str) else ctg.decode() 21 | self._ctg_len, self._r_st, self._r_en = cl, cs, ce 22 | self._strand, self._q_st, self._q_en = strand, qs, qe 23 | self._NM, self._mlen, self._blen = NM, mlen, blen 24 | self._mapq = mapq 25 | self._cigar = cigar 26 | self._is_primary = is_primary 27 | self._trans_strand = trans_strand 28 | self._seg_id = seg_id 29 | self._cs = cs_str 30 | self._MD = MD_str 31 | 32 | @property 33 | def ctg(self): return self._ctg 34 | 35 | @property 36 | def ctg_len(self): return self._ctg_len 37 | 38 | @property 39 | def r_st(self): return self._r_st 40 | 41 | @property 42 | def r_en(self): return self._r_en 43 | 44 | @property 45 | def strand(self): return self._strand 46 | 47 | @property 48 | def trans_strand(self): return self._trans_strand 49 | 50 | @property 51 | def blen(self): return self._blen 52 | 53 | @property 54 | def mlen(self): return self._mlen 55 | 56 | @property 57 | def NM(self): return self._NM 58 | 59 | @property 60 | def is_primary(self): return (self._is_primary != 0) 61 | 62 | @property 63 | def q_st(self): return self._q_st 64 | 65 | @property 66 | def q_en(self): return self._q_en 67 | 68 | @property 69 | def mapq(self): return self._mapq 70 | 71 | @property 72 | def cigar(self): return self._cigar 73 | 74 | @property 75 | def read_num(self): return self._seg_id + 1 76 | 77 | @property 78 | def cs(self): return self._cs 79 | 80 | @property 81 | def MD(self): return self._MD 82 | 83 | @property 84 | def cigar_str(self): 85 | return "".join(map(lambda x: str(x[0]) + 'MIDNSHP=XB'[x[1]], self._cigar)) 86 | 87 | def __str__(self): 88 | if self._strand > 0: strand = '+' 89 | elif self._strand < 0: strand = '-' 90 | else: strand = '?' 91 | if self._is_primary != 0: tp = 'tp:A:P' 92 | else: tp = 'tp:A:S' 93 | if self._trans_strand > 0: ts = 'ts:A:+' 94 | elif self._trans_strand < 0: ts = 'ts:A:-' 95 | else: ts = 'ts:A:.' 96 | a = [str(self._q_st), str(self._q_en), strand, self._ctg, str(self._ctg_len), str(self._r_st), str(self._r_en), 97 | str(self._mlen), str(self._blen), str(self._mapq), tp, ts, "cg:Z:" + self.cigar_str] 98 | if self._cs != "": a.append("cs:Z:" + self._cs) 99 | return "\t".join(a) 100 | 101 | cdef class ThreadBuffer: 102 | cdef cmappy.mm_tbuf_t *_b 103 | 104 | def __cinit__(self): 105 | self._b = cmappy.mm_tbuf_init() 106 | 107 | def __dealloc__(self): 108 | cmappy.mm_tbuf_destroy(self._b) 109 | 110 | cdef class Aligner: 111 | cdef cmappy.mm_idx_t *_idx 112 | cdef cmappy.mm_idxopt_t idx_opt 113 | cdef cmappy.mm_mapopt_t map_opt 114 | 115 | def __cinit__(self, fn_idx_in=None, preset=None, k=None, w=None, min_cnt=None, min_chain_score=None, min_dp_score=None, bw=None, best_n=None, n_threads=3, fn_idx_out=None, max_frag_len=None, extra_flags=None, seq=None, scoring=None): 116 | self._idx = NULL 117 | cmappy.mm_set_opt(NULL, &self.idx_opt, &self.map_opt) # set the default options 118 | if preset is not None: 119 | cmappy.mm_set_opt(str.encode(preset), &self.idx_opt, &self.map_opt) # apply preset 120 | self.map_opt.flag |= 4 # always perform alignment 121 | self.idx_opt.batch_size = 0x7fffffffffffffffL # always build a uni-part index 122 | if k is not None: self.idx_opt.k = k 123 | if w is not None: self.idx_opt.w = w 124 | if min_cnt is not None: self.map_opt.min_cnt = min_cnt 125 | if min_chain_score is not None: self.map_opt.min_chain_score = min_chain_score 126 | if min_dp_score is not None: self.map_opt.min_dp_max = min_dp_score 127 | if bw is not None: self.map_opt.bw = bw 128 | if best_n is not None: self.map_opt.best_n = best_n 129 | if max_frag_len is not None: self.map_opt.max_frag_len = max_frag_len 130 | if extra_flags is not None: self.map_opt.flag |= extra_flags 131 | if scoring is not None and len(scoring) >= 4: 132 | self.map_opt.a, self.map_opt.b = scoring[0], scoring[1] 133 | self.map_opt.q, self.map_opt.e = scoring[2], scoring[3] 134 | self.map_opt.q2, self.map_opt.e2 = self.map_opt.q, self.map_opt.e 135 | if len(scoring) >= 6: 136 | self.map_opt.q2, self.map_opt.e2 = scoring[4], scoring[5] 137 | if len(scoring) >= 7: 138 | self.map_opt.sc_ambi = scoring[6] 139 | 140 | cdef cmappy.mm_idx_reader_t *r; 141 | 142 | if seq is None: 143 | if fn_idx_out is None: 144 | r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, NULL) 145 | else: 146 | r = cmappy.mm_idx_reader_open(str.encode(fn_idx_in), &self.idx_opt, str.encode(fn_idx_out)) 147 | if r is not NULL: 148 | self._idx = cmappy.mm_idx_reader_read(r, n_threads) # NB: ONLY read the first part 149 | cmappy.mm_idx_reader_close(r) 150 | cmappy.mm_mapopt_update(&self.map_opt, self._idx) 151 | cmappy.mm_idx_index_name(self._idx) 152 | else: 153 | self._idx = cmappy.mappy_idx_seq(self.idx_opt.w, self.idx_opt.k, self.idx_opt.flag&1, self.idx_opt.bucket_bits, str.encode(seq), len(seq)) 154 | cmappy.mm_mapopt_update(&self.map_opt, self._idx) 155 | self.map_opt.mid_occ = 1000 # don't filter high-occ seeds 156 | 157 | def __dealloc__(self): 158 | if self._idx is not NULL: 159 | cmappy.mm_idx_destroy(self._idx) 160 | 161 | def __bool__(self): 162 | return (self._idx != NULL) 163 | 164 | def map(self, seq, seq2=None, buf=None, cs=False, MD=False, max_frag_len=None, extra_flags=None): 165 | cdef cmappy.mm_reg1_t *regs 166 | cdef cmappy.mm_hitpy_t h 167 | cdef ThreadBuffer b 168 | cdef int n_regs 169 | cdef char *cs_str = NULL 170 | cdef int l_cs_str, m_cs_str = 0 171 | cdef void *km 172 | cdef cmappy.mm_mapopt_t map_opt 173 | 174 | if self._idx == NULL: return 175 | map_opt = self.map_opt 176 | if max_frag_len is not None: map_opt.max_frag_len = max_frag_len 177 | if extra_flags is not None: map_opt.flag |= extra_flags 178 | 179 | if self._idx is NULL: return None 180 | if buf is None: b = ThreadBuffer() 181 | else: b = buf 182 | km = cmappy.mm_tbuf_get_km(b._b) 183 | 184 | _seq = seq if isinstance(seq, bytes) else seq.encode() 185 | if seq2 is None: 186 | regs = cmappy.mm_map_aux(self._idx, _seq, NULL, &n_regs, b._b, &map_opt) 187 | else: 188 | _seq2 = seq2 if isinstance(seq2, bytes) else seq2.encode() 189 | regs = cmappy.mm_map_aux(self._idx, _seq, _seq2, &n_regs, b._b, &map_opt) 190 | 191 | try: 192 | i = 0 193 | while i < n_regs: 194 | cmappy.mm_reg2hitpy(self._idx, ®s[i], &h) 195 | cigar, _cs, _MD = [], '', '' 196 | for k in range(h.n_cigar32): # convert the 32-bit CIGAR encoding to Python array 197 | c = h.cigar32[k] 198 | cigar.append([c>>4, c&0xf]) 199 | if cs or MD: # generate the cs and/or the MD tag, if requested 200 | if cs: 201 | l_cs_str = cmappy.mm_gen_cs(km, &cs_str, &m_cs_str, self._idx, ®s[i], _seq, 1) 202 | _cs = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode() 203 | if MD: 204 | l_cs_str = cmappy.mm_gen_MD(km, &cs_str, &m_cs_str, self._idx, ®s[i], _seq) 205 | _MD = cs_str[:l_cs_str] if isinstance(cs_str, str) else cs_str[:l_cs_str].decode() 206 | yield Alignment(h.ctg, h.ctg_len, h.ctg_start, h.ctg_end, h.strand, h.qry_start, h.qry_end, h.mapq, cigar, h.is_primary, h.mlen, h.blen, h.NM, h.trans_strand, h.seg_id, _cs, _MD) 207 | cmappy.mm_free_reg1(®s[i]) 208 | i += 1 209 | finally: 210 | while i < n_regs: 211 | cmappy.mm_free_reg1(®s[i]) 212 | i += 1 213 | free(regs) 214 | free(cs_str) 215 | 216 | def seq(self, str name, int start=0, int end=0x7fffffff): 217 | cdef int l 218 | cdef char *s 219 | if self._idx == NULL: return 220 | s = cmappy.mappy_fetch_seq(self._idx, name.encode(), start, end, &l) 221 | if l == 0: return None 222 | r = s[:l] if isinstance(s, str) else s[:l].decode() 223 | free(s) 224 | return r 225 | 226 | @property 227 | def k(self): return self._idx.k 228 | 229 | @property 230 | def w(self): return self._idx.w 231 | 232 | @property 233 | def n_seq(self): return self._idx.n_seq 234 | 235 | @property 236 | def seq_names(self): 237 | cdef char *p 238 | if self._idx == NULL: return 239 | sn = [] 240 | for i in range(self._idx.n_seq): 241 | p = self._idx.seq[i].name 242 | s = p if isinstance(p, str) else p.decode() 243 | sn.append(s) 244 | return sn 245 | 246 | def fastx_read(fn, read_comment=False): 247 | cdef cmappy.kseq_t *ks 248 | ks = cmappy.mm_fastx_open(str.encode(fn)) 249 | if ks is NULL: return None 250 | while cmappy.kseq_read(ks) >= 0: 251 | if ks.qual.l > 0: qual = ks.qual.s if isinstance(ks.qual.s, str) else ks.qual.s.decode() 252 | else: qual = None 253 | name = ks.name.s if isinstance(ks.name.s, str) else ks.name.s.decode() 254 | seq = ks.seq.s if isinstance(ks.seq.s, str) else ks.seq.s.decode() 255 | if read_comment: 256 | if ks.comment.l > 0: comment = ks.comment.s if isinstance(ks.comment.s, str) else ks.comment.s.decode() 257 | else: comment = None 258 | yield name, seq, qual, comment 259 | else: 260 | yield name, seq, qual 261 | cmappy.mm_fastx_close(ks) 262 | 263 | def revcomp(seq): 264 | l = len(seq) 265 | bseq = seq if isinstance(seq, bytes) else seq.encode() 266 | cdef char *s = cmappy.mappy_revcomp(l, bseq) 267 | r = s[:l] if isinstance(s, str) else s[:l].decode() 268 | free(s) 269 | return r 270 | 271 | def verbose(v=None): 272 | if v is None: v = -1 273 | return cmappy.mm_verbose_level(v) 274 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/python/minimap2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import getopt 5 | import mappy as mp 6 | 7 | def main(argv): 8 | opts, args = getopt.getopt(argv[1:], "x:n:m:k:w:r:c") 9 | if len(args) < 2: 10 | print("Usage: minimap2.py [options] | ") 11 | print("Options:") 12 | print(" -x STR preset: sr, map-pb, map-ont, asm5, asm10 or splice") 13 | print(" -n INT mininum number of minimizers") 14 | print(" -m INT mininum chaining score") 15 | print(" -k INT k-mer length") 16 | print(" -w INT minimizer window length") 17 | print(" -r INT band width") 18 | print(" -c output the cs tag") 19 | sys.exit(1) 20 | 21 | preset = min_cnt = min_sc = k = w = bw = None 22 | out_cs = False 23 | for opt, arg in opts: 24 | if opt == '-x': preset = arg 25 | elif opt == '-n': min_cnt = int(arg) 26 | elif opt == '-m': min_chain_score = int(arg) 27 | elif opt == '-r': bw = int(arg) 28 | elif opt == '-k': k = int(arg) 29 | elif opt == '-w': w = int(arg) 30 | elif opt == '-c': out_cs = True 31 | 32 | a = mp.Aligner(args[0], preset=preset, min_cnt=min_cnt, min_chain_score=min_sc, k=k, w=w, bw=bw) 33 | if not a: raise Exception("ERROR: failed to load/build index file '{}'".format(args[0])) 34 | for name, seq, qual in mp.fastx_read(args[1]): # read one sequence 35 | for h in a.map(seq, cs=out_cs): # traverse hits 36 | print('{}\t{}\t{}'.format(name, len(seq), h)) 37 | 38 | if __name__ == "__main__": 39 | main(sys.argv) 40 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/sdust.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "kalloc.h" 5 | #include "kdq.h" 6 | #include "kvec.h" 7 | #include "sdust.h" 8 | 9 | #define SD_WLEN 3 10 | #define SD_WTOT (1<<(SD_WLEN<<1)) 11 | #define SD_WMSK (SD_WTOT - 1) 12 | 13 | typedef struct { 14 | int start, finish; 15 | int r, l; 16 | } perf_intv_t; 17 | 18 | typedef kvec_t(perf_intv_t) perf_intv_v; 19 | typedef kvec_t(uint64_t) uint64_v; 20 | 21 | KDQ_INIT(int) 22 | 23 | #if defined(_NO_NT4_TBL) || defined(_SDUST_MAIN) 24 | unsigned char seq_nt4_table[256] = { 25 | 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 26 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 27 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 28 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29 | 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 30 | 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 31 | 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 32 | 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 33 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 34 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 35 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 36 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 37 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 38 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 39 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 40 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 41 | }; 42 | #else 43 | extern unsigned char seq_nt4_table[256]; 44 | #endif 45 | 46 | struct sdust_buf_s { 47 | kdq_t(int) *w; 48 | perf_intv_v P; // the list of perfect intervals for the current window, sorted by descending start and then by ascending finish 49 | uint64_v res; // the result 50 | void *km; // memory pool 51 | }; 52 | 53 | sdust_buf_t *sdust_buf_init(void *km) 54 | { 55 | sdust_buf_t *buf; 56 | buf = (sdust_buf_t*)kcalloc(km, 1, sizeof(sdust_buf_t)); 57 | buf->km = km; 58 | buf->w = kdq_init(int, buf->km); 59 | kdq_resize(int, buf->w, 8); 60 | return buf; 61 | } 62 | 63 | void sdust_buf_destroy(sdust_buf_t *buf) 64 | { 65 | if (buf == 0) return; 66 | kdq_destroy(int, buf->w); 67 | kfree(buf->km, buf->P.a); kfree(buf->km, buf->res.a); kfree(buf->km, buf); 68 | } 69 | 70 | static inline void shift_window(int t, kdq_t(int) *w, int T, int W, int *L, int *rw, int *rv, int *cw, int *cv) 71 | { 72 | int s; 73 | if ((int)kdq_size(w) >= W - SD_WLEN + 1) { // TODO: is this right for SD_WLEN!=3? 74 | s = *kdq_shift(int, w); 75 | *rw -= --cw[s]; 76 | if (*L > (int)kdq_size(w)) 77 | --*L, *rv -= --cv[s]; 78 | } 79 | kdq_push(int, w, t); 80 | ++*L; 81 | *rw += cw[t]++; 82 | *rv += cv[t]++; 83 | if (cv[t] * 10 > T<<1) { 84 | do { 85 | s = kdq_at(w, kdq_size(w) - *L); 86 | *rv -= --cv[s]; 87 | --*L; 88 | } while (s != t); 89 | } 90 | } 91 | 92 | static inline void save_masked_regions(void *km, uint64_v *res, perf_intv_v *P, int start) 93 | { 94 | int i, saved = 0; 95 | perf_intv_t *p; 96 | if (P->n == 0 || P->a[P->n - 1].start >= start) return; 97 | p = &P->a[P->n - 1]; 98 | if (res->n) { 99 | int s = res->a[res->n - 1]>>32, f = (uint32_t)res->a[res->n - 1]; 100 | if (p->start <= f) // if overlapping with or adjacent to the previous interval 101 | saved = 1, res->a[res->n - 1] = (uint64_t)s<<32 | (f > p->finish? f : p->finish); 102 | } 103 | if (!saved) kv_push(uint64_t, km, *res, (uint64_t)p->start<<32|p->finish); 104 | for (i = P->n - 1; i >= 0 && P->a[i].start < start; --i); // remove perfect intervals that have falled out of the window 105 | P->n = i + 1; 106 | } 107 | 108 | static void find_perfect(void *km, perf_intv_v *P, const kdq_t(int) *w, int T, int start, int L, int rv, const int *cv) 109 | { 110 | int c[SD_WTOT], r = rv, i, max_r = 0, max_l = 0; 111 | memcpy(c, cv, SD_WTOT * sizeof(int)); 112 | for (i = (long)kdq_size(w) - L - 1; i >= 0; --i) { 113 | int j, t = kdq_at(w, i), new_r, new_l; 114 | r += c[t]++; 115 | new_r = r, new_l = kdq_size(w) - i - 1; 116 | if (new_r * 10 > T * new_l) { 117 | for (j = 0; j < (int)P->n && P->a[j].start >= i + start; ++j) { // find insertion position 118 | perf_intv_t *p = &P->a[j]; 119 | if (max_r == 0 || p->r * max_l > max_r * p->l) 120 | max_r = p->r, max_l = p->l; 121 | } 122 | if (max_r == 0 || new_r * max_l >= max_r * new_l) { // then insert 123 | max_r = new_r, max_l = new_l; 124 | if (P->n == P->m) kv_resize(perf_intv_t, km, *P, P->n + 1); 125 | memmove(&P->a[j+1], &P->a[j], (P->n - j) * sizeof(perf_intv_t)); // make room 126 | ++P->n; 127 | P->a[j].start = i + start, P->a[j].finish = kdq_size(w) + (SD_WLEN - 1) + start; 128 | P->a[j].r = new_r, P->a[j].l = new_l; 129 | } 130 | } 131 | } 132 | } 133 | 134 | const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf) 135 | { 136 | int rv = 0, rw = 0, L = 0, cv[SD_WTOT], cw[SD_WTOT]; 137 | int i, start, l; // _start_: start of the current window; _l_: length of a contiguous A/C/G/T (sub)sequence 138 | unsigned t; // current word 139 | 140 | buf->P.n = buf->res.n = 0; 141 | buf->w->front = buf->w->count = 0; 142 | memset(cv, 0, SD_WTOT * sizeof(int)); 143 | memset(cw, 0, SD_WTOT * sizeof(int)); 144 | if (l_seq < 0) l_seq = strlen((const char*)seq); 145 | for (i = l = t = 0; i <= l_seq; ++i) { 146 | int b = i < l_seq? seq_nt4_table[seq[i]] : 4; 147 | if (b < 4) { // an A/C/G/T base 148 | ++l, t = (t<<2 | b) & SD_WMSK; 149 | if (l >= SD_WLEN) { // we have seen a word 150 | start = (l - W > 0? l - W : 0) + (i + 1 - l); // set the start of the current window 151 | save_masked_regions(buf->km, &buf->res, &buf->P, start); // save intervals falling out of the current window? 152 | shift_window(t, buf->w, T, W, &L, &rw, &rv, cw, cv); 153 | if (rw * 10 > L * T) 154 | find_perfect(buf->km, &buf->P, buf->w, T, start, L, rv, cv); 155 | } 156 | } else { // N or the end of sequence; N effectively breaks input into pieces of independent sequences 157 | start = (l - W + 1 > 0? l - W + 1 : 0) + (i + 1 - l); 158 | while (buf->P.n) save_masked_regions(buf->km, &buf->res, &buf->P, start++); // clear up unsaved perfect intervals 159 | l = t = 0; 160 | } 161 | } 162 | *n = buf->res.n; 163 | return buf->res.a; 164 | } 165 | 166 | uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n) 167 | { 168 | uint64_t *ret; 169 | sdust_buf_t *buf; 170 | buf = sdust_buf_init(km); 171 | ret = (uint64_t*)sdust_core(seq, l_seq, T, W, n, buf); 172 | buf->res.a = 0; 173 | sdust_buf_destroy(buf); 174 | return ret; 175 | } 176 | 177 | #ifdef _SDUST_MAIN 178 | #include 179 | #include 180 | #include "ketopt.h" 181 | #include "kseq.h" 182 | KSEQ_INIT(gzFile, gzread) 183 | 184 | int main(int argc, char *argv[]) 185 | { 186 | gzFile fp; 187 | kseq_t *ks; 188 | int W = 64, T = 20, c; 189 | ketopt_t o = KETOPT_INIT; 190 | 191 | while ((c = ketopt(&o, argc, argv, 1, "w:t:", 0)) >= 0) { 192 | if (c == 'w') W = atoi(o.arg); 193 | else if (c == 't') T = atoi(o.arg); 194 | } 195 | if (o.ind == argc) { 196 | fprintf(stderr, "Usage: sdust [-w %d] [-t %d] \n", W, T); 197 | return 1; 198 | } 199 | fp = strcmp(argv[o.ind], "-")? gzopen(argv[o.ind], "r") : gzdopen(fileno(stdin), "r"); 200 | ks = kseq_init(fp); 201 | while (kseq_read(ks) >= 0) { 202 | uint64_t *r; 203 | int i, n; 204 | r = sdust(0, (uint8_t*)ks->seq.s, -1, T, W, &n); 205 | for (i = 0; i < n; ++i) 206 | printf("%s\t%d\t%d\n", ks->name.s, (int)(r[i]>>32), (int)r[i]); 207 | free(r); 208 | } 209 | kseq_destroy(ks); 210 | gzclose(fp); 211 | return 0; 212 | } 213 | #endif 214 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/sdust.h: -------------------------------------------------------------------------------- 1 | #ifndef SDUST_H 2 | #define SDUST_H 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | struct sdust_buf_s; 11 | typedef struct sdust_buf_s sdust_buf_t; 12 | 13 | // the simple interface 14 | uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n); 15 | 16 | // the following interface dramatically reduce heap allocations when sdust is frequently called. 17 | sdust_buf_t *sdust_buf_init(void *km); 18 | void sdust_buf_destroy(sdust_buf_t *buf); 19 | const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/seed.c: -------------------------------------------------------------------------------- 1 | #include "mmpriv.h" 2 | #include "kalloc.h" 3 | #include "ksort.h" 4 | 5 | mm_seed_t *mm_seed_collect_all(void *km, const mm_idx_t *mi, const mm128_v *mv, int32_t *n_m_) 6 | { 7 | mm_seed_t *m; 8 | size_t i; 9 | int32_t k; 10 | m = (mm_seed_t*)kmalloc(km, mv->n * sizeof(mm_seed_t)); 11 | for (i = k = 0; i < mv->n; ++i) { 12 | const uint64_t *cr; 13 | mm_seed_t *q; 14 | mm128_t *p = &mv->a[i]; 15 | uint32_t q_pos = (uint32_t)p->y, q_span = p->x & 0xff; 16 | int t; 17 | cr = mm_idx_get(mi, p->x>>8, &t); 18 | if (t == 0) continue; 19 | q = &m[k++]; 20 | q->q_pos = q_pos, q->q_span = q_span, q->cr = cr, q->n = t, q->seg_id = p->y >> 32; 21 | q->is_tandem = q->flt = 0; 22 | if (i > 0 && p->x>>8 == mv->a[i - 1].x>>8) q->is_tandem = 1; 23 | if (i < mv->n - 1 && p->x>>8 == mv->a[i + 1].x>>8) q->is_tandem = 1; 24 | } 25 | *n_m_ = k; 26 | return m; 27 | } 28 | 29 | #define MAX_MAX_HIGH_OCC 128 30 | 31 | void mm_seed_select(int32_t n, mm_seed_t *a, int len, int max_occ, int max_max_occ, int dist) 32 | { // for high-occ minimizers, choose up to max_high_occ in each high-occ streak 33 | extern void ks_heapdown_uint64_t(size_t i, size_t n, uint64_t*); 34 | extern void ks_heapmake_uint64_t(size_t n, uint64_t*); 35 | int32_t i, last0, m; 36 | uint64_t b[MAX_MAX_HIGH_OCC]; // this is to avoid a heap allocation 37 | 38 | if (n == 0 || n == 1) return; 39 | for (i = m = 0; i < n; ++i) 40 | if (a[i].n > max_occ) ++m; 41 | if (m == 0) return; // no high-frequency k-mers; do nothing 42 | for (i = 0, last0 = -1; i <= n; ++i) { 43 | if (i == n || a[i].n <= max_occ) { 44 | if (i - last0 > 1) { 45 | int32_t ps = last0 < 0? 0 : (uint32_t)a[last0].q_pos>>1; 46 | int32_t pe = i == n? len : (uint32_t)a[i].q_pos>>1; 47 | int32_t j, k, st = last0 + 1, en = i; 48 | int32_t max_high_occ = (int32_t)((double)(pe - ps) / dist + .499); 49 | if (max_high_occ > 0) { 50 | if (max_high_occ > MAX_MAX_HIGH_OCC) 51 | max_high_occ = MAX_MAX_HIGH_OCC; 52 | for (j = st, k = 0; j < en && k < max_high_occ; ++j, ++k) 53 | b[k] = (uint64_t)a[j].n<<32 | j; 54 | ks_heapmake_uint64_t(k, b); // initialize the binomial heap 55 | for (; j < en; ++j) { // if there are more, choose top max_high_occ 56 | if (a[j].n < (int32_t)(b[0]>>32)) { // then update the heap 57 | b[0] = (uint64_t)a[j].n<<32 | j; 58 | ks_heapdown_uint64_t(0, k, b); 59 | } 60 | } 61 | for (j = 0; j < k; ++j) a[(uint32_t)b[j]].flt = 1; 62 | } 63 | for (j = st; j < en; ++j) a[j].flt ^= 1; 64 | for (j = st; j < en; ++j) 65 | if (a[j].n > max_max_occ) 66 | a[j].flt = 1; 67 | } 68 | last0 = i; 69 | } 70 | } 71 | } 72 | 73 | mm_seed_t *mm_collect_matches(void *km, int *_n_m, int qlen, int max_occ, int max_max_occ, int dist, const mm_idx_t *mi, const mm128_v *mv, int64_t *n_a, int *rep_len, int *n_mini_pos, uint64_t **mini_pos) 74 | { 75 | int rep_st = 0, rep_en = 0, n_m, n_m0; 76 | size_t i; 77 | mm_seed_t *m; 78 | *n_mini_pos = 0; 79 | *mini_pos = (uint64_t*)kmalloc(km, mv->n * sizeof(uint64_t)); 80 | m = mm_seed_collect_all(km, mi, mv, &n_m0); 81 | if (dist > 0 && max_max_occ > max_occ) { 82 | mm_seed_select(n_m0, m, qlen, max_occ, max_max_occ, dist); 83 | } else { 84 | for (i = 0; i < n_m0; ++i) 85 | if (m[i].n > max_occ) 86 | m[i].flt = 1; 87 | } 88 | for (i = 0, n_m = 0, *rep_len = 0, *n_a = 0; i < n_m0; ++i) { 89 | mm_seed_t *q = &m[i]; 90 | //fprintf(stderr, "X\t%d\t%d\t%d\n", q->q_pos>>1, q->n, q->flt); 91 | if (q->flt) { 92 | int en = (q->q_pos >> 1) + 1, st = en - q->q_span; 93 | if (st > rep_en) { 94 | *rep_len += rep_en - rep_st; 95 | rep_st = st, rep_en = en; 96 | } else rep_en = en; 97 | } else { 98 | *n_a += q->n; 99 | (*mini_pos)[(*n_mini_pos)++] = (uint64_t)q->q_span<<32 | q->q_pos>>1; 100 | m[n_m++] = *q; 101 | } 102 | } 103 | *rep_len += rep_en - rep_st; 104 | *_n_m = n_m; 105 | return m; 106 | } 107 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup, Extension 3 | except ImportError: 4 | from distutils.core import setup 5 | from distutils.extension import Extension 6 | 7 | import sys, platform 8 | 9 | sys.path.append('python') 10 | 11 | extra_compile_args = ['-DHAVE_KALLOC'] 12 | include_dirs = ["."] 13 | 14 | if platform.machine() in ["aarch64", "arm64"]: 15 | include_dirs.append("sse2neon/") 16 | extra_compile_args.extend(['-ftree-vectorize', '-DKSW_SSE2_ONLY', '-D__SSE2__']) 17 | else: 18 | extra_compile_args.append('-msse4.1') # WARNING: ancient x86_64 CPUs don't have SSE4 19 | 20 | def readme(): 21 | with open('python/README.rst') as f: 22 | return f.read() 23 | 24 | setup( 25 | name = 'mappy', 26 | version = '2.22', 27 | url = 'https://github.com/lh3/minimap2', 28 | description = 'Minimap2 python binding', 29 | long_description = readme(), 30 | author = 'Heng Li', 31 | author_email = 'lh3@me.com', 32 | license = 'MIT', 33 | keywords = 'sequence-alignment', 34 | scripts = ['python/minimap2.py'], 35 | ext_modules = [Extension('mappy', 36 | sources = ['python/mappy.pyx', 'align.c', 'bseq.c', 'lchain.c', 'seed.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'options.c', 37 | 'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c', 38 | 'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c', 'splitidx.c'], 39 | depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h', 40 | 'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h', 41 | 'python/cmappy.h', 'python/cmappy.pxd'], 42 | extra_compile_args = extra_compile_args, 43 | include_dirs = include_dirs, 44 | libraries = ['z', 'm', 'pthread'])], 45 | classifiers = [ 46 | 'Development Status :: 5 - Production/Stable', 47 | 'License :: OSI Approved :: MIT License', 48 | 'Operating System :: POSIX', 49 | 'Programming Language :: C', 50 | 'Programming Language :: Cython', 51 | 'Programming Language :: Python :: 2.7', 52 | 'Programming Language :: Python :: 3', 53 | 'Intended Audience :: Science/Research', 54 | 'Topic :: Scientific/Engineering :: Bio-Informatics'], 55 | setup_requires=["cython"]) 56 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/sketch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #define __STDC_LIMIT_MACROS 6 | #include "kvec.h" 7 | #include "mmpriv.h" 8 | 9 | unsigned char seq_nt4_table[256] = { 10 | 0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 11 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 13 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14 | 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 15 | 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 16 | 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 17 | 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 18 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 19 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 21 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 22 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 23 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 24 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 25 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 26 | }; 27 | 28 | static inline uint64_t hash64(uint64_t key, uint64_t mask) 29 | { 30 | key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1; 31 | key = key ^ key >> 24; 32 | key = ((key + (key << 3)) + (key << 8)) & mask; // key * 265 33 | key = key ^ key >> 14; 34 | key = ((key + (key << 2)) + (key << 4)) & mask; // key * 21 35 | key = key ^ key >> 28; 36 | key = (key + (key << 31)) & mask; 37 | return key; 38 | } 39 | 40 | typedef struct { // a simplified version of kdq 41 | int front, count; 42 | int a[32]; 43 | } tiny_queue_t; 44 | 45 | static inline void tq_push(tiny_queue_t *q, int x) 46 | { 47 | q->a[((q->count++) + q->front) & 0x1f] = x; 48 | } 49 | 50 | static inline int tq_shift(tiny_queue_t *q) 51 | { 52 | int x; 53 | if (q->count == 0) return -1; 54 | x = q->a[q->front++]; 55 | q->front &= 0x1f; 56 | --q->count; 57 | return x; 58 | } 59 | 60 | /** 61 | * Find symmetric (w,k)-minimizers on a DNA sequence 62 | * 63 | * @param km thread-local memory pool; using NULL falls back to malloc() 64 | * @param str DNA sequence 65 | * @param len length of $str 66 | * @param w find a minimizer for every $w consecutive k-mers 67 | * @param k k-mer size 68 | * @param rid reference ID; will be copied to the output $p array 69 | * @param is_hpc homopolymer-compressed or not 70 | * @param p minimizers 71 | * p->a[i].x = kMer<<8 | kmerSpan 72 | * p->a[i].y = rid<<32 | lastPos<<1 | strand 73 | * where lastPos is the position of the last base of the i-th minimizer, 74 | * and strand indicates whether the minimizer comes from the top or the bottom strand. 75 | * Callers may want to set "p->n = 0"; otherwise results are appended to p 76 | */ 77 | void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p) 78 | { 79 | uint64_t shift1 = 2 * (k - 1), mask = (1ULL<<2*k) - 1, kmer[2] = {0,0}; 80 | int i, j, l, buf_pos, min_pos, kmer_span = 0; 81 | mm128_t buf[256], min = { UINT64_MAX, UINT64_MAX }; 82 | tiny_queue_t tq; 83 | 84 | assert(len > 0 && (w > 0 && w < 256) && (k > 0 && k <= 28)); // 56 bits for k-mer; could use long k-mers, but 28 enough in practice 85 | memset(buf, 0xff, w * 16); 86 | memset(&tq, 0, sizeof(tiny_queue_t)); 87 | kv_resize(mm128_t, km, *p, p->n + len/w); 88 | 89 | for (i = l = buf_pos = min_pos = 0; i < len; ++i) { 90 | int c = seq_nt4_table[(uint8_t)str[i]]; 91 | mm128_t info = { UINT64_MAX, UINT64_MAX }; 92 | if (c < 4) { // not an ambiguous base 93 | int z; 94 | if (is_hpc) { 95 | int skip_len = 1; 96 | if (i + 1 < len && seq_nt4_table[(uint8_t)str[i + 1]] == c) { 97 | for (skip_len = 2; i + skip_len < len; ++skip_len) 98 | if (seq_nt4_table[(uint8_t)str[i + skip_len]] != c) 99 | break; 100 | i += skip_len - 1; // put $i at the end of the current homopolymer run 101 | } 102 | tq_push(&tq, skip_len); 103 | kmer_span += skip_len; 104 | if (tq.count > k) kmer_span -= tq_shift(&tq); 105 | } else kmer_span = l + 1 < k? l + 1 : k; 106 | kmer[0] = (kmer[0] << 2 | c) & mask; // forward k-mer 107 | kmer[1] = (kmer[1] >> 2) | (3ULL^c) << shift1; // reverse k-mer 108 | if (kmer[0] == kmer[1]) continue; // skip "symmetric k-mers" as we don't know it strand 109 | z = kmer[0] < kmer[1]? 0 : 1; // strand 110 | ++l; 111 | if (l >= k && kmer_span < 256) { 112 | info.x = hash64(kmer[z], mask) << 8 | kmer_span; 113 | info.y = (uint64_t)rid<<32 | (uint32_t)i<<1 | z; 114 | } 115 | } else l = 0, tq.count = tq.front = 0, kmer_span = 0; 116 | buf[buf_pos] = info; // need to do this here as appropriate buf_pos and buf[buf_pos] are needed below 117 | if (l == w + k - 1 && min.x != UINT64_MAX) { // special case for the first window - because identical k-mers are not stored yet 118 | for (j = buf_pos + 1; j < w; ++j) 119 | if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mm128_t, km, *p, buf[j]); 120 | for (j = 0; j < buf_pos; ++j) 121 | if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mm128_t, km, *p, buf[j]); 122 | } 123 | if (info.x <= min.x) { // a new minimum; then write the old min 124 | if (l >= w + k && min.x != UINT64_MAX) kv_push(mm128_t, km, *p, min); 125 | min = info, min_pos = buf_pos; 126 | } else if (buf_pos == min_pos) { // old min has moved outside the window 127 | if (l >= w + k - 1 && min.x != UINT64_MAX) kv_push(mm128_t, km, *p, min); 128 | for (j = buf_pos + 1, min.x = UINT64_MAX; j < w; ++j) // the two loops are necessary when there are identical k-mers 129 | if (min.x >= buf[j].x) min = buf[j], min_pos = j; // >= is important s.t. min is always the closest k-mer 130 | for (j = 0; j <= buf_pos; ++j) 131 | if (min.x >= buf[j].x) min = buf[j], min_pos = j; 132 | if (l >= w + k - 1 && min.x != UINT64_MAX) { // write identical k-mers 133 | for (j = buf_pos + 1; j < w; ++j) // these two loops make sure the output is sorted 134 | if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mm128_t, km, *p, buf[j]); 135 | for (j = 0; j <= buf_pos; ++j) 136 | if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mm128_t, km, *p, buf[j]); 137 | } 138 | } 139 | if (++buf_pos == w) buf_pos = 0; 140 | } 141 | if (min.x != UINT64_MAX) 142 | kv_push(mm128_t, km, *p, min); 143 | } 144 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/splitidx.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "mmpriv.h" 7 | 8 | FILE *mm_split_init(const char *prefix, const mm_idx_t *mi) 9 | { 10 | char *fn; 11 | FILE *fp; 12 | uint32_t i, k = mi->k; 13 | fn = (char*)calloc(strlen(prefix) + 10, 1); 14 | sprintf(fn, "%s.%.4d.tmp", prefix, mi->index); 15 | if ((fp = fopen(fn, "wb")) == NULL) { 16 | if (mm_verbose >= 1) 17 | fprintf(stderr, "[ERROR]\033[1;31m failed to write to temporary file '%s'\033[0m: %s\n", fn, strerror(errno)); 18 | exit(1); 19 | } 20 | mm_err_fwrite(&k, 4, 1, fp); 21 | mm_err_fwrite(&mi->n_seq, 4, 1, fp); 22 | for (i = 0; i < mi->n_seq; ++i) { 23 | uint32_t l; 24 | l = strlen(mi->seq[i].name); 25 | mm_err_fwrite(&l, 1, 4, fp); 26 | mm_err_fwrite(mi->seq[i].name, 1, l, fp); 27 | mm_err_fwrite(&mi->seq[i].len, 4, 1, fp); 28 | } 29 | free(fn); 30 | return fp; 31 | } 32 | 33 | mm_idx_t *mm_split_merge_prep(const char *prefix, int n_splits, FILE **fp, uint32_t *n_seq_part) 34 | { 35 | mm_idx_t *mi = 0; 36 | char *fn; 37 | int i, j; 38 | 39 | if (n_splits < 1) return 0; 40 | fn = CALLOC(char, strlen(prefix) + 10); 41 | for (i = 0; i < n_splits; ++i) { 42 | sprintf(fn, "%s.%.4d.tmp", prefix, i); 43 | if ((fp[i] = fopen(fn, "rb")) == 0) { 44 | if (mm_verbose >= 1) 45 | fprintf(stderr, "ERROR: failed to open temporary file '%s': %s\n", fn, strerror(errno)); 46 | for (j = 0; j < i; ++j) 47 | fclose(fp[j]); 48 | free(fn); 49 | return 0; 50 | } 51 | } 52 | free(fn); 53 | 54 | mi = CALLOC(mm_idx_t, 1); 55 | for (i = 0; i < n_splits; ++i) { 56 | mm_err_fread(&mi->k, 4, 1, fp[i]); // TODO: check if k is all the same 57 | mm_err_fread(&n_seq_part[i], 4, 1, fp[i]); 58 | mi->n_seq += n_seq_part[i]; 59 | } 60 | mi->seq = CALLOC(mm_idx_seq_t, mi->n_seq); 61 | for (i = j = 0; i < n_splits; ++i) { 62 | uint32_t k; 63 | for (k = 0; k < n_seq_part[i]; ++k, ++j) { 64 | uint32_t l; 65 | mm_err_fread(&l, 1, 4, fp[i]); 66 | mi->seq[j].name = (char*)calloc(l + 1, 1); 67 | mm_err_fread(mi->seq[j].name, 1, l, fp[i]); 68 | mm_err_fread(&mi->seq[j].len, 4, 1, fp[i]); 69 | } 70 | } 71 | return mi; 72 | } 73 | 74 | void mm_split_rm_tmp(const char *prefix, int n_splits) 75 | { 76 | int i; 77 | char *fn; 78 | fn = CALLOC(char, strlen(prefix) + 10); 79 | for (i = 0; i < n_splits; ++i) { 80 | sprintf(fn, "%s.%.4d.tmp", prefix, i); 81 | remove(fn); 82 | } 83 | free(fn); 84 | } 85 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/test/q2.fa: -------------------------------------------------------------------------------- 1 | >q2 2 | GGACATCCCGATGGTGCAGTCCTACCTGTACGAAAGGAC 3 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/test/t-inv.fa: -------------------------------------------------------------------------------- 1 | >ref 2 | TGCGGAGGCTGAAGCAACTCCATCTTGGAAGCTAATCTACCATGTTGGCTTCTGATTAAC 3 | ATCAGTTCTGGGAAGGCTTGTAAGATTTCCTGTTTGTCTATTATTTCCTAGGTAAGAGCA 4 | GATACTTACTGTAAATCCTGCCCCTAGATTAAACAACCTTGGTGTTATCGTACTTCCATT 5 | GTCCTATACATCCCTTCGGAATCCCCCTTTCCCTATGGTCCTCAAGCCCTTGGTCTGGGG 6 | AGTAACAGCATAGGGATCAACCATCTCGTCTTGCCACTGCCCGAAATACAGACATGGCTT 7 | CTGTTCCTAAGTCCCTATTCAACTTTTCTTTCTAAGAAACTGGATTTGTCAGCCTCTTTC 8 | TTCACCTCTCAGCTTCCTTGGACTTTGGGGGTAGGTTTGCGTAGACATGCTCACCACAGA 9 | CACAATATCAGCTTCATTCTACAGATGAGGAAGGCAAGCCTTGGGGAGCTTAACCAACTT 10 | GTCGAGACTCATGTATATACCAACACTGAAAAGCAGATATTCCAGACTCCCAGTCATGCC 11 | ACAGGCACACCCCTCAGTGAGAGGTGGGGTTTGTAGTTGAGGCTATTTCCTGCCCAGGGA 12 | GCAGGGAGGCACTCTAGCTTCCCTGAGCTAACGTGGTTCTGCTTGTGTCTGACTTCCAGG 13 | TCTCTGCCCTTTCCAAGCTCACTAGGATGGGCTTCGGGTGTGTCAAATGCCTCAGACAGT 14 | ACAGATCCACACAGAATGGGCATATGCAACCAATCAGTGTCATAAAAAAGAAGGAAATGA 15 | CTCGGGCCCCCTGTGTGTTCAACATGTCGAAGGTATCTGTGCAGCAGAAGAAAGAGGGGC 16 | AAAAGCCCCCAGTGCCACAGGCCAGAGGCAGCAGCTTGGGCCCATGTGGGAGGGTTTGCT 17 | TTCCCCTGCCAAAGTGATGGGCTGCTGCAGCCTGGGGCTTGTGGGAATCCTTCCTGGGCC 18 | TGTGTGGGAAGTGTAGGCAGGGAGAGTGCTGCTTTCCCAAGCTCATCCCAGCTACAGCTA 19 | CCTTTGTGCTCTGGGATTCAGGACCCCCGAGGGGGCTGGCAGGAGAGTCTCTGTTCTCGG 20 | ATGGGTTGTCACCAGGGCATACATGGGAAGTGGGCTCTCTGGAGTCACCCTCCAGGGGAC 21 | AATGCCAATTCCAGACACATTTACTGGAACCCCTACACTGATGACCTTTTGTTGAGGGTT 22 | GAATTATGTCCCCAAAAAAGATACATTGAAGTCCAAACCTCTGGTGTCTATAAATGTGAT 23 | TTTATTTGAAAATGAGGTTTCTATGGACTAAATTGTGTCCCTCCCAAATTCATATTTTGA 24 | AGCCCTAGCCCCCAGTGTGACTATACCTAGAGACAGAGATCTTTAGGAGGTAATTAAGGT 25 | TCAATGAGGTCAGGTGGGTGGGGCCCTAAACCAACAGGAAGGACTGTGGCCTTACTAGAA 26 | AAGGAAGAAAAAGCATTTCCTCTCTTCTAGTATAAAAGGACACAGAAAGAAGGCAGATAT 27 | CTACAAGCCACGAAGAGAGACGTCACTGAGAACTGAATTTGTGTACATTGATCTGGAACT 28 | TCCAGCCTCCAGAACTTGAGAAATACATTTCTGTTGTTTATTTTTTTTTCATGTAATCAA 29 | TTCATTTATCATATATTTATTGAGTGCCTACTATGTGCCAGAGGATACAGCAGTAACAAA 30 | ACTAGGCAAAAATTGTGCCTAAAAGAGGGAAGATGACTTTTCTTAAAGTGTGGAATAAAG 31 | AAAAGTAAGATAGCGGATAGAAGCTTGAAGTGAAAGCAGGTTCACAGGAAGTTTCTTTGG 32 | TCATTTGTTTTGTTTTTAAATAGTGGAAAGATGTATATGTTTATGGAGAAAGATTGCCTT 33 | GAAGATGCAAGAGGAAGAGATGATCAAAATTCAAGAAGAAGCAGAAAGTGATAGAATAAA 34 | GAGCACAAGTGGAGAATTAGTGTTAATGAAAAGAAGGATGCTTCCTTTGATATGAAGTGA 35 | AGGAAGAGAGAATGAGTAAAGACCAAGACTTGAAGTCCCTAGTTTAATAGAGGGAGATTT 36 | CTTCTTTTGATAGCAACAATGGTATTCTGAATTATTTGAAGACATGTCATATTTCTCTTG 37 | TGCCATTTTCCTCCCAGTTTAAACATTCTCATAACCTCTATTCCTCACATGATGTTTTTC 38 | CAGGTCCTTTATTCTTTGGCACTCTCTTCTCTGGACACATTGTATTCTGTCATTGGTCCT 39 | AAAATTTAGATACCCACAATTGAACATACTCCTCTAGATATGGTCTAGCTAATGCAAAAG 40 | AACTGCTGCCTTCCAACTTGTTCAGACATCATATGTTTGTTGTCAAACGCTAAGTTGAGT 41 | TGTTATCTTTTAAGTTTTGTTTTTGTTTTTTTTTTTTTTTTTTAATTCCAAGAGGTGCCC 42 | ACGTTGGCTAAGTACCAAACAGGGTACTAGGGAATTTTACTTCTGAGTTAAATGCCATTC 43 | TAGTTGTTTTTTCTTCATCTCCAGTAAGGTTATCTTTATTCACCAGTTGTTACAATAGCT 44 | GTGGGTCTTGCTTCTCACAGTTTTATGCTGTCTGTGCTATTTTCTCTACTGATCATCACC 45 | ACAATCATTATTGCTTATCATAATTGTTATCTTTATTTTCTCCTTTAATCAAGAATCAGT 46 | CTTCCTTTATCTCATTATTCTCTTTTGCAGGCTTCAGGATAATTATGGTTGGAGTGCACT 47 | GGGGGAACCAGTGCAGCTAAGCTCTGACATCTTTGCATCCCTTTTCCATCTGCTGTTTTG 48 | GCACTCTGGTAGAATAGATAACCTAAAAACGACTTTAAAACATCTAGAAATTTTGGATAA 49 | AATATAACAAACATCCCTTTAAATGCACAACTGATCTTCCATGGAAGTCACAGAAATATA 50 | TAACGCCAAAAAGAAGGGAAGCTGAAACCCAGGGCTGTAAACATGAACATCATCTTCTCT 51 | CCCTTTTTCTTGTGACTTATCTTGTTTTTCTCAGCTTTGGTGCTACCAAGGCTTGACTTT 52 | AATAGGCATTTCCAATCAATGAGAGAATTTCTTTTGCTTTCATCAACAATTCAGTTATTG 53 | ATGTTAACATATATATCATTTGAGTACTTTTCTTTTTTTTATTATTATTATACTTTAAGT 54 | TTTAGGGTCCATGTGCACAATGTGCAGGTTAGTTACGTATGTATACATGTGCCATGCTGG 55 | TGTGCTGCACCCATTAACTCATCATTTAGCATTAGGTATATCTCCTAATGCTATCCCTTC 56 | CCCCTCTCCCCACCCCACAACAGTCCCCAGAGTGTTCCCCTTCCTGTGTCCATGTGTTCT 57 | CATTGTTCAATCCCCATCTATGAGTGAGAACATGCGGTGTTTGGTTTTTTGTCCTTGCAA 58 | TAGTTTACTGAGAATGATGATTTCTAATTTCATCCATGTCCCTAAAGAGCTTCTGCACAG 59 | CAAAAGAAACTACCATCAGAGTGAACAGGCAACCTACAAAATGGGAGAAAATTTTCACAA 60 | CCTGCTCATCTGACAAAGGGCTAATATCCAGAATCTACAATGAACTCAAACAAATTTACA 61 | AGAAAAAAACAAACAACCCCATCAAAAAGTGGGCAAAGGATATGAACAGACACTTCTCAA 62 | AAGAAGACATTTATGCAGCCAAAAGACACATGAAAAAATGCTCATCATCACTGGCCATCA 63 | GAGAAATGCAAACCAAAACCACAATGAGATACCATCTCACACCAGTTAAAATGGCAATCA 64 | TTAAAAAGTCAGGAAACAACAGGTGCTGGAGAGGATGTGGAGAAACAGGAACACTTTTAC 65 | ACTGTTGGTGGGACTGTAAACTAGTTCAACCATTGTGGAAGTCAGTGTGCTGATTCCTCA 66 | GGGATCTAGAACTAGAAATACCATTTGACCCAGCCATCCCATTACTGGGTATATACCCAA 67 | AGGACTATAAATCATGCTGCTATAAAGACACATGCACACGTATGTTTATTGCGGCACTAT 68 | TCACAATAGCAAAGACTTGGAACCAACCCAAATGTCCAACAATGATAGACTGGATTAAGA 69 | AAATGTGGCACATATACACCACGGAATACTGTGCAGCCATAAAAAATGATGAGTTCATGT 70 | CCTTTGTAGGGACACGGATGAAATTGGAAATCATTTCTGTTGTTTAAACCACGAAGTCTA 71 | TGGTATCTGGTTATGACAACCTGAGAATACTAACTCAAGGGTCTTTCGCAGATGTCATTA 72 | AGTTGTTAAAGTGAGGTCATTATGGTGGGTCCTAATCCAAGAGAAGAGATGCATGGACAG 73 | ACGTGCACAACGGGAGGACCAAGCCAAGACACACAGGGAGAATGGCCATGGGAAGATGGA 74 | GGCAGAGATCAAAGTGAGGCACCCACAAGCCAAGAAATGGCAGGAGCTACCAGCAGCTGG 75 | AAGATGCAGAGAAGCATTCCTTCTTAGAGGTTTCAGAGAGAGTATGGTGCTACTGACACC 76 | TTGATTTTGAACTTCTAGTCTCCAGAACTATGAGAGAATAAATTTCTGTTGGTTAAGCCA 77 | TCGAGTTTGTGTAAGTTTGTTATAAGAGCCCTAGGAAATAAACATATCCATTTATTCAGG 78 | AAAGCCTGCTAGAGTGCAAATATTTGGAAAAGATACTACTATGCAAATGTTTGAAAAAGA 79 | TATTGCTCTTGATTCTGCCTTATGGGTTTTTCATTTCTGTAAGCTATTCTCAAAGTTTTG 80 | TTCTTGGACTACTATTGGTAATTAAGACTGCAACATGTTTGGCAACATCAGTTGAGAACT 81 | GTTGCTCTGGGAACGTTTTCGGCAAGCCTCAGCCCTTCTTTTCCCTTGGCTTGCATTGAG 82 | GAGTTAGGTGATACTCTGCTGCTCAGGCCCAGCACCTTTATGGACCGTATTCCCCTGGTG 83 | GAATGACCATCTCTGCTTGCTCTGATTGGCTGTTGGGGTTTTCTAGCATGCCCTATTTAA 84 | TATGTATGATTTATCTCTTACTTCAGTTGGAAGGTACAGTTGCTCTGTAGTTGGCATGCA 85 | GTCATGGTGACTATGAAAATATAAAATAATGTTTTGGTTTACAGACACTTAGAAATAAGT 86 | TGTGTCTCAAAATTGGGTGACTATTCTAGTTATCTGCTACTCAATATCCTTGTGCGAGCC 87 | CTCTTTACCCAGAATCAAACTAAACCATGAGGGGCACTATAGAATGTCACCCCTGGGTCC 88 | AGGATACTATGGGGACTCAGAAGCCAAGCTCCCACTGGGGGATCTAGGGCATGCCCCCAA 89 | GGTAAGATTCCCACCTCTTTGTTCAGCAGGAAGCACCCATCACACAAGGAGGTAGGAATA 90 | AACAAGCATTCGTCAAGAACAAAAGATACAGATGTTCTGCTGGAGCTTGGATACATAGCA 91 | TAAGAGGGAACAGTTCTCACAGGTAAGAGTAAGTTTTCCTCTGGTGGTGACAGTGGGACC 92 | TGTGGGGGAGAGAATTGGGAGTACTGACAGGAAGGCAGAGTGGCTGTCCAAATGAACGGA 93 | TTGTTTGCACATGGCCTTTAGGGCACGTTGTGTTAGCCTTCCATTGCTGCTTATATTAGT 94 | CTGTTTTCACACTGCCCATAAATGCATACCTGAGACTGGATAATTTATAAAGAAAAAGAG 95 | CCTTAATGTACTCATAGTTGCATGTGGCTGGGGAGGCCTCACAATCATGGCAGAAGGTGA 96 | AAGGCACATCTTACATGGAAGCAGACAAGAGAGAATTGAGGACCAAGTGAAAGGGGTTTC 97 | CCCTTATAAAACCATCAGATCACATGAGACTTTTTCACCACCATGAGAACAGTAAGGGGA 98 | AAACTATGCTCATGATTCAATTGTCTCCCACTGGATTCCTCCCACAACACATAGGAATTA 99 | TGGGAGCTAAAATTCAAGATGAGATTTGGGTGAGGACACAGCCAAACCCTATCACTGCTG 100 | TAATCAATTCCCACCAACTTAGTGGCTCGAAACATCACAGATTTATGATCTTATGACGGT 101 | GGAGGTCCCCAAATGGATCTTCTAGGTCTAGAATCAAGGTATCAGCAGACCACTTCTTTT 102 | GGAGGCTCTGGTGGAGAAACCATTTCCTCGCCTTTTCCAGCTTCTAGAGGCTGCCCTTCT 103 | CATTCCTTGGTTCACGGCCACACTCATTTCCATCTCTGCTTCCACTGTGACAACTTCTCT 104 | GCCTCAGACCCTCCTGCTTTGCCTTTGTAAGGACCCTTGTGATGAGATCAGGCCCATCCA 105 | GGATTATCCCTCATCTCAAGACCTTTACCTTAATCACATTTGCAAGGTCTCTTCCACTGT 106 | GTCAGGTAACATTTTCACAGGTTCCAGGGATTAGGGTGTGGACATCTTGGGGAGCTGGAG 107 | GATATTATTTCATCTACCACACACATCTCTACCTTGTACAGGCAAGCACTTGCAAAGTGC 108 | AATGTGATCCTCTGGAGCCACTGTCCTCCCAGAGCTTATATATACTCTGAAAGTCAACTC 109 | TCAGACCACAGCCTCCTGTCCATGCACCACTCTCATCAACACCCCCACCCGAAACACTTT 110 | CACTCCACCCTCTTTGTCCCCTAACTCATGGAGAAGAAAATCTAATTAGTAGGAGTGGAA 111 | TTTGGCTTTCATCTTTACCAGTACTAGAAATATGGTGTGTGTCTTTTTGTAAAAATTCTC 112 | TCAACTAAATTGTTTTTATTAATTTCTGCAAAATGTGAACATCAACTCCCTTCATGTGAA 113 | TGTCAATAAGATTAAATGAGCTGTCTCAGCTCCTAGCCTGTGCAAGCTAACAGCTCAGGA 114 | GATGTTTATTTCTTTCCCTCTTCTTTCCTTAATGAAGCCCTCTCCTTTGACATCTTCAAT 115 | TCTGGAGCGCTTCTTTTCTGAGGCCTTGGCTCCCCCACATTGCCCACCCTTTTCCTGCTC 116 | GTCCACATTTCTGGCTTCTATTCTCTTGTCTTTACCATCTCCCTGAACAATGTTATCCGT 117 | TCCAATGACTTCAACAGTCTCTCCGCTTACATATGATGCCTCTCAAACTCTGATCTCCAA 118 | CTCTTCCAAAGAGCTCTGGACCTTTGTTCCAATTACCTGAAAAACATCTTCTTGGATGTC 119 | CCATTAGCACTGTTAAATCAAACAAGAATTTCCCTCCCTCCTGCCTTGCTGTAGTTCCCC 120 | TAGGGATTCGGTTGTGTGGGAAGATGTGTGGAGAGCTCTTAGTTGACTCCCTTCTCTGCA 121 | GTTCTACCTCTCTAGAGACTTGGAGGACCCACTGTTTCCGCCTCGCTTTTTCAGGCCTAG 122 | AGATTGCTCGCTCCTGGGCTGGCTGCTTCATAATTCCTTATTAGTAGTTTCCCAAGCTTA 123 | CATATCTGTAAATATTTACTTTAGTTAAATTCTCCCCAATTTCCACAATATGTTGGCTGC 124 | ACATGCTTTCTACTAGGAGTCACACAACTATGATAAGAACCAAGAAATATTAGTAAACGT 125 | TTTTTACCATTATTGGCCTATACCCTGGAATAGCCAACAATAACCTAGAACCTATGCAAC 126 | AAGAATATCCAACAAGAACCTAGAGACCTGTCAGTCTATAGGTGGGAACTACAGGATGAG 127 | A 128 | -------------------------------------------------------------------------------- /src/ext/minimap2-2.22/test/t2.fa: -------------------------------------------------------------------------------- 1 | >t2 2 | GGACATCCCGATGGTGCAGgtGCTATTAAAGGTTCGTTTGTTCAACGATTAAagTCCTACCTGTACGAAAGGAC 3 | --------------------------------------------------------------------------------