├── .gitattributes ├── .gitignore ├── .gitmodules ├── Makefile ├── README.md ├── benchmarks ├── abea │ ├── LICENSE │ ├── Makefile │ ├── README │ ├── README.md │ └── src │ │ ├── align.c │ │ ├── align.cu │ │ ├── config.h │ │ ├── error.h │ │ ├── eventalign.c │ │ ├── events.c │ │ ├── f5c.c │ │ ├── f5c.cu │ │ ├── f5c.h │ │ ├── f5cmisc.cuh │ │ ├── f5cmisc.h │ │ ├── fast5lite.h │ │ ├── freq.c │ │ ├── freq_merge.c │ │ ├── hmm.c │ │ ├── khash.h │ │ ├── ksort.h │ │ ├── logsum.h │ │ ├── main.c │ │ ├── matrix.h │ │ ├── meth.c │ │ ├── meth_main.c │ │ ├── model.c │ │ ├── model.h │ │ ├── nanopolish_fast5_io.c │ │ ├── nanopolish_index.c │ │ ├── nanopolish_read_db.c │ │ ├── nanopolish_read_db.h │ │ └── profiles.h ├── bsw │ ├── Makefile │ ├── README.md │ ├── bandedSWA.cpp │ ├── bandedSWA.h │ ├── macro.h │ ├── main_banded.cpp │ └── utils.h ├── chain │ ├── Makefile │ ├── README.md │ └── src │ │ ├── common.cpp │ │ ├── common.h │ │ ├── host_data.h │ │ ├── host_data_io.cpp │ │ ├── host_data_io.h │ │ ├── host_kernel.cpp │ │ ├── host_kernel.h │ │ └── main.cpp ├── dbg │ ├── Makefile │ ├── README.md │ ├── common.cpp │ ├── common.h │ └── debruijn.cpp ├── fmi │ ├── Makefile │ ├── README.md │ └── fmi.cpp ├── kmer-cnt │ ├── Makefile │ ├── README.md │ ├── config.h │ ├── kmer.h │ ├── kmer_cnt.cpp │ ├── libcuckoo │ │ ├── LICENSE │ │ ├── README.md │ │ ├── cuckoohash_config.hh │ │ ├── cuckoohash_map.hh │ │ ├── cuckoohash_util.hh │ │ └── libcuckoo_bucket_container.hh │ ├── logger.h │ ├── memory_info.h │ ├── parallel.h │ ├── progress_bar.h │ ├── sequence.cpp │ ├── sequence.h │ ├── sequence_container.cpp │ ├── sequence_container.h │ ├── utils.h │ ├── vertex_index.cpp │ └── vertex_index.h ├── nn-base │ ├── README.md │ ├── bonito │ │ └── basecall.py │ ├── models │ │ └── bonito_dna_r941 │ │ │ ├── config.toml │ │ │ └── weights_0.tar │ └── run_bonito.sh ├── nn-variant │ ├── README.md │ ├── clair │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── model.cpython-37.pyc │ │ │ └── selu.cpython-37.pyc │ │ ├── callVarBam.py │ │ ├── callVarBamParallel.py │ │ ├── call_var.py │ │ ├── evaluate.py │ │ ├── learning_rate_finder.py │ │ ├── model.py │ │ ├── plot_tensor.py │ │ ├── post_processing │ │ │ ├── __init__.py │ │ │ ├── ensemble.py │ │ │ └── overlap_variant.py │ │ ├── selu.py │ │ ├── task │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── genotype.cpython-37.pyc │ │ │ │ ├── gt21.cpython-37.pyc │ │ │ │ ├── main.cpython-37.pyc │ │ │ │ └── variant_length.cpython-37.pyc │ │ │ ├── genotype.py │ │ │ ├── gt21.py │ │ │ ├── main.py │ │ │ └── variant_length.py │ │ ├── train.py │ │ ├── train_clr.py │ │ └── utils.py │ ├── prediction.py │ └── shared │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── param.cpython-37.pyc │ │ ├── command_options.py │ │ ├── interval_tree.py │ │ ├── param.py │ │ └── utils.py ├── phmm │ ├── Makefile │ ├── PairHMMUnitTest.cpp │ ├── PairHMMUnitTest.h │ ├── README.md │ ├── pairhmm_common.h │ └── shacc_pairhmm.h ├── pileup │ ├── Makefile │ ├── README.md │ ├── khash.h │ ├── kvec.h │ ├── medaka_bamiter.c │ ├── medaka_bamiter.h │ ├── medaka_common.c │ ├── medaka_common.h │ ├── medaka_counts.c │ └── medaka_counts.h └── poa │ ├── Makefile │ ├── README │ ├── README.md │ └── msa_spoa_omp.cpp ├── debian.prerequisites ├── img ├── GenomicsBenchLogo-Colored.png └── GenomicsBenchLogo.png ├── requirements.txt ├── rhel.prerequisites └── scripts ├── run-cpu.sh ├── run-gpu.sh ├── vtune.pc.sh └── vtune.uarch.sh /.gitattributes: -------------------------------------------------------------------------------- 1 | long-reads/basecalling/data/10/reads.fast5 filter=lfs diff=lfs merge=lfs -text 2 | long-reads/basecalling/data/100/reads.fast5 filter=lfs diff=lfs merge=lfs -text 3 | long-reads/basecalling/data/1000/reads.fast5 filter=lfs diff=lfs merge=lfs -text 4 | long-reads/basecalling/data/500/reads.fast5 filter=lfs diff=lfs merge=lfs -text 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv3 2 | long-reads/basecalling/output 3 | *.nfs* 4 | *.out 5 | *.txt 6 | *.o 7 | *.log 8 | *.out 9 | *.csv 10 | *.conf 11 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "tools/bwa"] 2 | path = tools/bwa 3 | url = https://github.com/arun-sub/bwa.git 4 | branch = master 5 | [submodule "tools/GKL"] 6 | path = tools/GKL 7 | url = https://github.com/arun-sub/GKL.git 8 | branch = pv_c_interface 9 | [submodule "tools/bwa-mem2"] 10 | path = tools/bwa-mem2 11 | url = https://github.com/bwa-mem2/bwa-mem2.git 12 | [submodule "tools/minimap2"] 13 | path = tools/minimap2 14 | url = https://github.com/arun-sub/minimap2.git 15 | [submodule "tools/minimap2-acceleration"] 16 | path = tools/minimap2-acceleration 17 | url = https://github.com/arun-sub/minimap2-acceleration.git 18 | [submodule "tools/spoa"] 19 | path = tools/spoa 20 | url = https://github.com/arun-sub/spoa.git 21 | [submodule "tools/abPOA"] 22 | path = tools/abPOA 23 | url = https://github.com/arun-sub/abPOA.git 24 | [submodule "tools/racon"] 25 | path = tools/racon 26 | url = https://github.com/arun-sub/racon.git 27 | [submodule "tools/Clair"] 28 | path = tools/Clair 29 | url = https://github.com/Yufeng98/Clair.git 30 | [submodule "tools/medaka"] 31 | path = tools/medaka 32 | url = https://github.com/arun-sub/medaka.git 33 | [submodule "tools/Platypus"] 34 | path = tools/Platypus 35 | url = https://github.com/arun-sub/Platypus.git 36 | [submodule "tools/htslib"] 37 | path = tools/htslib 38 | url = https://github.com/arun-sub/htslib.git 39 | [submodule "tools/bonito"] 40 | path = tools/bonito 41 | url = https://github.com/TimD1/bonito.git 42 | [submodule "tools/Flye"] 43 | path = tools/Flye 44 | url = https://github.com/arun-sub/Flye.git 45 | [submodule "benchmarks/grm"] 46 | path = benchmarks/grm 47 | url = https://github.com/arun-sub/plink-ng.git 48 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CC=gcc 3 | ARCH=avx2 4 | #VTUNE_HOME=/opt/intel/oneapi/vtune/2021.1.1 5 | MKLROOT=/opt/intel/oneapi/mkl/2021.1.1 6 | MKL_IOMP5_DIR=/opt/intel/oneapi/compiler/2021.1.2/linux/compiler/lib/intel64_lin 7 | CUDA_LIB=/usr/local/cuda 8 | 9 | .PHONY: clean 10 | 11 | all: 12 | $(info Starting build..this may take a while..) 13 | cd tools/htslib && autoreconf -i && ./configure && $(MAKE) 14 | cd tools/bwa-mem2; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 15 | cd benchmarks/fmi; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 16 | cd benchmarks/bsw; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 17 | cd benchmarks/dbg; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 18 | cd tools/GKL; ./gradlew test 19 | cd benchmarks/phmm; $(MAKE) CC=$(CC) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 20 | cd tools/minimap2; $(MAKE) 21 | cd benchmarks/chain; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 22 | cd tools/spoa; mkdir build; cd build; cmake -DCMAKE_BUILD_TYPE=Release ..; $(MAKE) 23 | cd benchmarks/poa; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 24 | cd benchmarks/pileup; $(MAKE) CC=$(CC) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 25 | cd benchmarks/kmer-cnt; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) 26 | cd benchmarks/grm/2.0/build_dynamic; $(MAKE) CC=$(CC) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) MKLROOT=$(MKLROOT) MKL_IOMP5_DIR=$(MKL_IOMP5_DIR) #needs MKL 27 | 28 | gpu: 29 | cd benchmarks/abea; $(MAKE) CUDA_LIB=$(CUDA_LIB) 30 | 31 | clean: 32 | cd tools/bwa-mem2; $(MAKE) clean 33 | cd benchmarks/fmi; $(MAKE) clean 34 | cd benchmarks/bsw; $(MAKE) clean 35 | cd benchmarks/dbg; $(MAKE) clean 36 | cd tools/GKL; ./gradlew clean 37 | cd benchmarks/phmm; $(MAKE) clean 38 | cd tools/minimap2; $(MAKE) 39 | cd benchmarks/chain; $(MAKE) clean 40 | cd benchmarks/poa; $(MAKE) clean 41 | cd benchmarks/pileup; $(MAKE) clean 42 | cd benchmarks/kmer-cnt; $(MAKE) clean 43 | cd benchmarks/grm/2.0/build_dynamic; $(MAKE) clean 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | # About 4 | 5 | A benchmark suite covering the major steps in short and long-read genome sequence analysis pipelines such as basecalling, sequence mapping, de-novo assembly, variant calling and polishing. 6 | 7 | ## Download 8 | 9 | * Latest source code 10 | 11 | ```bash 12 | git clone --recursive https://github.com/arun-sub/genomicsbench.git 13 | ``` 14 | 15 | * Input datasets 16 | 17 | ```bash 18 | wget https://genomicsbench.eecs.umich.edu/input-datasets.tar.gz 19 | ``` 20 | 21 | ## Prerequisites 22 | 23 | * RHEL/Fedora system prerequisites 24 | 25 | ```bash 26 | sudo yum -y install $(cat rhel.prerequisites) 27 | ``` 28 | * Debian system prerequisites 29 | 30 | ```bash 31 | sudo apt-get install $(cat debian.prerequisites) 32 | ``` 33 | 34 | ## Python setup (optional: only needed for GPU benchmarks) 35 | 36 | To run Python-based benchmarks nn-base, nn-variant and abea, follow the steps below: 37 | 38 | * Download and install miniconda from [this](https://docs.conda.io/projects/continuumio-conda/en/latest/user-guide/install/download.html) link. 39 | 40 | * Follow the steps below to set up a conda environment: 41 | 42 | ```bash 43 | # make sure channels are added in conda 44 | conda config --add channels defaults 45 | conda config --add channels bioconda 46 | conda config --add channels conda-forge 47 | 48 | # create conda environment named "genomicsbench" 49 | conda create -n genomicsbench -c bioconda clair python==3.6.8 50 | conda activate genomicsbench 51 | conda install deepdish 52 | 53 | pip install --upgrade pip 54 | pip install -r requirements.txt 55 | pypy3 -m ensurepip 56 | pypy3 -m pip install --no-cache-dir intervaltree==3.0.2 57 | ``` 58 | 59 | ## Compile 60 | 61 | Note that the benchmarks have only been tested on gcc/g++-9 because of the dependency of related kernels. If there are multiple gcc/g++ versions, please refer to [update-alternative](https://linuxconfig.org/how-to-switch-between-multiple-gcc-and-g-compiler-versions-on-ubuntu-20-04-lts-focal-fossa) to configure gcc/g++-9. 62 | 63 | * CPU benchmarks 64 | * MKLROOT and MKL_IOMPS_DIR variables need to be set in Makefile to run `grm`. If you don't want to run `grm`, please comment `grm` related commands in Makefile 65 | * VTUNE_HOME variable needs to be set if you want to run any VTune based analyses 66 | 67 | ```bash 68 | make -j 69 | ``` 70 | 71 | * GPU benchmarks 72 | * Set CUDA_LIB=/usr/local/cuda or to the path of the local CUDA installation in Makefile. 73 | * Also ensure environment variables PATH and LD_LIBRARY_PATH include the path to CUDA binaries and libraries. 74 | 75 | ```bash 76 | make -j gpu 77 | ``` 78 | 79 | ## Running 80 | 81 | * CPU benchmarks 82 | 83 | ```bash 84 | cd scripts 85 | chmod +x ./run_cpu.sh 86 | ./run_cpu.sh 87 | ``` 88 | 89 | * GPU benchmarks 90 | 91 | ```bash 92 | cd scripts 93 | chmod +x ./run_gpu.sh 94 | ./run_gpu.sh 95 | ``` 96 | 97 | ## Citation 98 | 99 | If you use GenomicsBench or find GenomicsBench useful, please cite this work: 100 | 101 | > **Arun Subramaniyan, Yufeng Gu, Timothy Dunn, Somnath Paul, Md. Vasimuddin, Sanchit Misra, David Blaauw, Satish Narayanasamy, Reetuparna Das. *GenomicsBench: A Benchmark Suite for Genomics*, In IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), 2021 (to appear)** 102 | 103 | ``` 104 | @inproceedings{genomicsbench, 105 | title={GenomicsBench: A Benchmark Suite for Genomics}}, 106 | author={Subramaniyan, Arun and Gu, Yufeng and Dunn, Timothy and Paul, Somnath and Vasimuddin, Md. and Misra, Sanchit and Blaauw, David and Narayanasamy, Satish and Das, Reetuparna}, 107 | booktitle={Proceedings of the IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)}, 108 | year={2021} 109 | } 110 | ``` 111 | 112 | ## Issues and bug reporting 113 | 114 | GenomicsBench is under active development and we appreciate any feedback and suggestions from the community. Feel free to raise an issue or submit a pull request on Github. For assistance in using GenomicsBench, please contact: Arun Subramaniyan (arunsub@umich.edu), Yufeng Gu (yufenggu@umich.edu), Timothy Dunn (timdunn@umich.edu) 115 | 116 | ## Licensing 117 | 118 | Each benchmark is individually licensed according to the tool it is extracted from. 119 | 120 | ## Acknowledgement 121 | 122 | This work was supported in part by Precision Health at the University of Michigan, by the Kahn foundation, by the NSF under the CAREER-1652294 award and the Applications Driving Architectures (ADA) Research Center, a JUMP Center co-sponsored by SRC and DARPA. 123 | 124 | -------------------------------------------------------------------------------- /benchmarks/abea/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Hasindu Gamaarachchi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmarks/abea/Makefile: -------------------------------------------------------------------------------- 1 | $(info $(shell mkdir -p build)) 2 | 3 | HDF5 ?= install 4 | HTS ?= install 5 | 6 | HTS_VERSION = 1.9 7 | HDF5_VERSION = 1.10.4 8 | HDF5_MAJOR_MINOR = `echo $(HDF5_VERSION) | sed -E 's/\.[0-9]+$$//'` 9 | 10 | ifdef ENABLE_PROFILE 11 | CFLAGS += -p 12 | endif 13 | 14 | ifeq ($(HDF5), install) 15 | HDF5_LIB = $(BUILD_DIR)/lib/libhdf5.a 16 | HDF5_INC = -I$(BUILD_DIR)/include 17 | LDFLAGS += $(HDF5_LIB) -ldl 18 | else 19 | ifneq ($(HDF5), autoconf) 20 | HDF5_LIB = 21 | HDF5_SYS_LIB = `pkg-config --libs hdf5` 22 | HDF5_INC = `pkg-config --cflags-only-I hdf5` 23 | endif 24 | endif 25 | 26 | ifeq ($(HTS), install) 27 | HTS_LIB = $(BUILD_DIR)/lib/libhts.a 28 | HTS_INC = -I$(BUILD_DIR)/include 29 | LDFLAGS += $(HTS_LIB) 30 | else 31 | ifneq ($(HTS), autoconf) 32 | HTS_LIB = 33 | HTS_SYS_LIB = `pkg-config --libs htslib` 34 | HTS_INC = `pkg-config --cflags-only-I htslib` 35 | endif 36 | endif 37 | 38 | CPPFLAGS += $(HDF5_INC) $(HTS_INC) 39 | LDFLAGS += $(HTS_SYS_LIB) $(HDF5_SYS_LIB) 40 | 41 | CC = gcc 42 | CXX = g++ 43 | LANG = -x c++ 44 | CFLAGS += -g -Wall -O2 -std=c++11 45 | LDFLAGS += $(LIBS) -lpthread -lz -rdynamic 46 | BUILD_DIR = build 47 | 48 | BINARY = f5c 49 | OBJ = $(BUILD_DIR)/main.o \ 50 | $(BUILD_DIR)/meth_main.o \ 51 | $(BUILD_DIR)/f5c.o \ 52 | $(BUILD_DIR)/events.o \ 53 | $(BUILD_DIR)/nanopolish_read_db.o \ 54 | $(BUILD_DIR)/nanopolish_index.o \ 55 | $(BUILD_DIR)/nanopolish_fast5_io.o \ 56 | $(BUILD_DIR)/model.o \ 57 | $(BUILD_DIR)/align.o \ 58 | $(BUILD_DIR)/meth.o \ 59 | $(BUILD_DIR)/hmm.o \ 60 | $(BUILD_DIR)/freq.o \ 61 | $(BUILD_DIR)/eventalign.o \ 62 | $(BUILD_DIR)/freq_merge.o 63 | 64 | PREFIX = /usr/local 65 | VERSION = `git describe --tags` 66 | 67 | CUDA_ROOT = /usr/local/cuda 68 | CUDA_LIB ?= $(CUDA_ROOT)/lib64 69 | CUDA_OBJ = $(BUILD_DIR)/f5c_cuda.o $(BUILD_DIR)/align_cuda.o 70 | NVCC ?= nvcc 71 | CUDA_CFLAGS += -g -O2 -std=c++11 -lineinfo $(CUDA_ARCH) -Xcompiler -Wall 72 | CUDA_LDFLAGS = -L$(CUDA_LIB) -lcudart_static -lrt -ldl 73 | OBJ += $(BUILD_DIR)/gpucode.o $(CUDA_OBJ) 74 | CPPFLAGS += -DHAVE_CUDA=1 75 | 76 | .PHONY: clean distclean 77 | 78 | $(BINARY): src/config.h $(HTS_LIB) $(HDF5_LIB) $(OBJ) 79 | $(CXX) $(CFLAGS) $(OBJ) $(LDFLAGS) $(CUDA_LDFLAGS) -o $@ 80 | 81 | $(BUILD_DIR)/main.o: src/main.c src/f5cmisc.h src/error.h 82 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 83 | 84 | $(BUILD_DIR)/meth_main.o: src/meth_main.c src/f5c.h src/fast5lite.h src/f5cmisc.h src/logsum.h 85 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 86 | 87 | $(BUILD_DIR)/f5c.o: src/f5c.c src/f5c.h src/fast5lite.h src/f5cmisc.h 88 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 89 | 90 | $(BUILD_DIR)/events.o: src/events.c src/f5c.h src/fast5lite.h src/f5cmisc.h src/fast5lite.h src/nanopolish_read_db.h src/ksort.h 91 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 92 | 93 | $(BUILD_DIR)/nanopolish_read_db.o: src/nanopolish_read_db.c src/nanopolish_read_db.h 94 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 95 | 96 | $(BUILD_DIR)/nanopolish_index.o: src/nanopolish_index.c src/nanopolish_read_db.h src/fast5lite.h 97 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 98 | 99 | $(BUILD_DIR)/nanopolish_fast5_io.o: src/nanopolish_fast5_io.c src/fast5lite.h 100 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 101 | 102 | $(BUILD_DIR)/model.o: src/model.c src/model.h src/f5c.h src/fast5lite.h src/f5cmisc.h 103 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 104 | 105 | $(BUILD_DIR)/align.o: src/align.c src/f5c.h src/fast5lite.h 106 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 107 | 108 | $(BUILD_DIR)/meth.o: src/meth.c src/f5c.h src/fast5lite.h src/f5cmisc.h 109 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 110 | 111 | $(BUILD_DIR)/hmm.o: src/hmm.c src/f5c.h src/fast5lite.h src/f5cmisc.h src/matrix.h src/logsum.h 112 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 113 | 114 | $(BUILD_DIR)/freq.o: src/freq.c src/khash.h 115 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 116 | 117 | $(BUILD_DIR)/eventalign.o: src/eventalign.c 118 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 119 | 120 | $(BUILD_DIR)/freq_merge.o: src/freq_merge.c 121 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@ 122 | 123 | # cuda stuff 124 | $(BUILD_DIR)/gpucode.o: $(CUDA_OBJ) 125 | $(NVCC) $(CUDA_CFLAGS) -dlink $^ -o $@ 126 | 127 | $(BUILD_DIR)/f5c_cuda.o: src/f5c.cu src/error.h src/f5c.h src/fast5lite.h src/f5cmisc.cuh src/f5cmisc.h 128 | $(NVCC) -x cu $(CUDA_CFLAGS) $(CPPFLAGS) -rdc=true -c $< -o $@ 129 | 130 | $(BUILD_DIR)/align_cuda.o: src/align.cu src/f5c.h src/fast5lite.h src/f5cmisc.cuh 131 | $(NVCC) -x cu $(CUDA_CFLAGS) $(CPPFLAGS) -rdc=true -c $< -o $@ 132 | 133 | src/config.h: 134 | echo "/* Default config.h generated by Makefile */" >> $@ 135 | echo "#define HAVE_HDF5_H 1" >> $@ 136 | 137 | $(BUILD_DIR)/lib/libhts.a: 138 | @if command -v curl; then \ 139 | curl -o $(BUILD_DIR)/htslib.tar.bz2 -L https://github.com/samtools/htslib/releases/download/$(HTS_VERSION)/htslib-$(HTS_VERSION).tar.bz2; \ 140 | else \ 141 | wget -O $(BUILD_DIR)/htslib.tar.bz2 https://github.com/samtools/htslib/releases/download/$(HTS_VERSION)/htslib-$(HTS_VERSION).tar.bz2; \ 142 | fi 143 | tar -xf $(BUILD_DIR)/htslib.tar.bz2 -C $(BUILD_DIR) 144 | mv $(BUILD_DIR)/htslib-$(HTS_VERSION) $(BUILD_DIR)/htslib 145 | rm -f $(BUILD_DIR)/htslib.tar.bz2 146 | cd $(BUILD_DIR)/htslib && \ 147 | ./configure --prefix=`pwd`/../ --enable-bz2=no --enable-lzma=no --with-libdeflate=no --enable-libcurl=no --enable-gcs=no --enable-s3=no && \ 148 | make -j8 && \ 149 | make install 150 | 151 | $(BUILD_DIR)/lib/libhdf5.a: 152 | if command -v curl; then \ 153 | curl -o $(BUILD_DIR)/hdf5.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$(HDF5_MAJOR_MINOR)/hdf5-$(HDF5_VERSION)/src/hdf5-$(HDF5_VERSION).tar.bz2; \ 154 | else \ 155 | wget -O $(BUILD_DIR)/hdf5.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$(HDF5_MAJOR_MINOR)/hdf5-$(HDF5_VERSION)/src/hdf5-$(HDF5_VERSION).tar.bz2; \ 156 | fi 157 | tar -xf $(BUILD_DIR)/hdf5.tar.bz2 -C $(BUILD_DIR) 158 | mv $(BUILD_DIR)/hdf5-$(HDF5_VERSION) $(BUILD_DIR)/hdf5 159 | rm -f $(BUILD_DIR)/hdf5.tar.bz2 160 | cd $(BUILD_DIR)/hdf5 && \ 161 | ./configure --prefix=`pwd`/../ && \ 162 | make -j8 && \ 163 | make install 164 | 165 | clean: 166 | rm -rf $(BINARY) $(BUILD_DIR) 167 | -------------------------------------------------------------------------------- /benchmarks/abea/README: -------------------------------------------------------------------------------- 1 | Build: 'make CUDA_LIB=' or just 'make' if /usr/local/cuda points to the install location 2 | Command line: './f5c eventalign -b -g -r ' 3 | -------------------------------------------------------------------------------- /benchmarks/abea/README.md: -------------------------------------------------------------------------------- 1 | `abea` uses the same license as [f5c](https://github.com/hasindu2008/f5c). 2 | 3 | If you find `abea` useful, please cite: 4 | 5 | ``` 6 | @article{gamaarachchi2020gpu, 7 | title={GPU accelerated adaptive banded event alignment for rapid comparative nanopore signal analysis}, 8 | author={Gamaarachchi, Hasindu and Lam, Chun Wai and Jayatilaka, Gihan and Samarakoon, Hiruna and Simpson, Jared T and Smith, Martin A and Parameswaran, Sri}, 9 | journal={BMC bioinformatics}, 10 | volume={21}, 11 | number={1}, 12 | pages={1--13}, 13 | year={2020}, 14 | publisher={BioMed Central} 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /benchmarks/abea/src/config.h: -------------------------------------------------------------------------------- 1 | /* Default config.h generated by Makefile */ 2 | #define HAVE_HDF5_H 1 3 | -------------------------------------------------------------------------------- /benchmarks/abea/src/error.h: -------------------------------------------------------------------------------- 1 | #ifndef ERROR_H 2 | #define ERROR_H 3 | 4 | #include 5 | 6 | #define WARN "[%s::WARNING]\033[1;33m " 7 | #define ERR "[%s::ERROR]\033[1;31m " 8 | #define CEND "\033[0m\n" 9 | 10 | #define STDERR(arg, ...) \ 11 | fprintf(stderr, "[%s] " arg "\n", __func__, \ 12 | __VA_ARGS__) 13 | #define WARNING(arg, ...) \ 14 | fprintf(stderr, "[%s::WARNING]\033[1;33m " arg "\033[0m\n", __func__, \ 15 | __VA_ARGS__) 16 | #define ERROR(arg, ...) \ 17 | fprintf(stderr, "[%s::ERROR]\033[1;31m " arg "\033[0m\n", __func__, \ 18 | __VA_ARGS__) 19 | #define INFO(arg, ...) \ 20 | fprintf(stderr, "[%s::INFO]\033[1;34m " arg "\033[0m\n", __func__, \ 21 | __VA_ARGS__) 22 | #define SUCCESS(arg, ...) \ 23 | fprintf(stderr, "[%s::SUCCESS]\033[1;32m " arg "\033[0m\n", __func__, \ 24 | __VA_ARGS__) 25 | #define DEBUG(arg, ...) \ 26 | fprintf(stderr, \ 27 | "[%s::DEBUG]\033[1;35m Error occured at %s:%d. " arg "\033[0m\n", \ 28 | __func__, __FILE__, __LINE__ - 2, __VA_ARGS__) 29 | 30 | #define MALLOC_CHK(ret) malloc_chk((void*)ret, __func__, __FILE__, __LINE__ - 1) 31 | #define F_CHK(ret, filename) \ 32 | f_chk((void*)ret, __func__, __FILE__, __LINE__ - 1, filename); 33 | #define NULL_CHK(ret) null_chk((void*)ret, __func__, __FILE__, __LINE__ - 1) 34 | #define NEG_CHK(ret) neg_chk(ret, __func__, __FILE__, __LINE__ - 1) 35 | 36 | static inline void malloc_chk(void* ret, const char* func, const char* file, 37 | int line) { 38 | if (ret != NULL) 39 | return; 40 | fprintf( 41 | stderr, 42 | "[%s::ERROR]\033[1;31m Failed to allocate memory : " 43 | "%s.\033[0m\n[%s::DEBUG]\033[1;35m Error occured at %s:%d. Try with a small batchsize (-K and/or -B options) to reduce the peak memory\033[0m\n\n", 44 | func, strerror(errno), func, file, line); 45 | exit(EXIT_FAILURE); 46 | } 47 | 48 | static inline void f_chk(void* ret, const char* func, const char* file, 49 | int line, const char* fopen_f) { 50 | if (ret != NULL) 51 | return; 52 | fprintf( 53 | stderr, 54 | "[%s::ERROR]\033[1;31m Failed to open %s : " 55 | "%s.\033[0m\n[%s::DEBUG]\033[1;35m Error occured at %s:%d.\033[0m\n\n", 56 | func, fopen_f, strerror(errno), func, file, line); 57 | exit(EXIT_FAILURE); 58 | } 59 | 60 | // Die on error. Print the error and exit if the return value of the previous function NULL 61 | static inline void null_chk(void* ret, const char* func, const char* file, 62 | int line) { 63 | if (ret != NULL) 64 | return; 65 | fprintf(stderr, 66 | "[%s::ERROR]\033[1;31m %s.\033[0m\n[%s::DEBUG]\033[1;35m Error " 67 | "occured at %s:%d.\033[0m\n\n", 68 | func, strerror(errno), func, file, line); 69 | exit(EXIT_FAILURE); 70 | } 71 | 72 | // Die on error. Print the error and exit if the return value of the previous function is -1 73 | static inline void neg_chk(int ret, const char* func, const char* file, 74 | int line) { 75 | if (ret >= 0) 76 | return; 77 | fprintf(stderr, 78 | "[%s::ERROR]\033[1;31m %s.\033[0m\n[%s::DEBUG]\033[1;35m Error " 79 | "occured at %s:%d.\033[0m\n\n", 80 | func, strerror(errno), func, file, line); 81 | exit(EXIT_FAILURE); 82 | } 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /benchmarks/abea/src/f5cmisc.h: -------------------------------------------------------------------------------- 1 | #ifndef F5CMISC_H 2 | #define F5CMISC_H 3 | 4 | #include "error.h" 5 | #include "f5c.h" 6 | #include 7 | #include 8 | 9 | #define MIN_CALIBRATION_VAR 2.5 10 | #define MAX_EVENT_TO_BP_RATIO 20 11 | 12 | // Flags to modify the behaviour of the HMM 13 | enum HMMAlignmentFlags 14 | { 15 | HAF_ALLOW_PRE_CLIP = 1, // allow events to go unmatched before the aligning region 16 | HAF_ALLOW_POST_CLIP = 2 // allow events to go unmatched after the aligning region 17 | }; 18 | 19 | event_table getevents(size_t nsample, float* rawptr); 20 | void read_model(model_t* model, const char* file); 21 | void set_model(model_t* model); 22 | void set_cpgmodel(model_t* model); 23 | scalings_t estimate_scalings_using_mom(char* sequence, int32_t sequence_len, 24 | model_t* pore_model, event_table et); 25 | int32_t align(AlignedPair* out_2, char* sequence, int32_t sequence_len, 26 | event_table events, model_t* models, scalings_t scaling, 27 | float sample_rate); 28 | int32_t postalign(event_alignment_t* alignment, index_pair_t* base_to_event_map, double* events_per_base, 29 | char* sequence, int32_t n_kmers, AlignedPair* event_alignment, 30 | int32_t n_events); 31 | bool recalibrate_model(model_t* pore_model, event_table et, 32 | scalings_t* scallings, 33 | const event_alignment_t* alignment_output, 34 | int32_t num_alignments, bool scale_var); 35 | 36 | float profile_hmm_score(const char *m_seq,const char *m_rc_seq, event_t* event, scalings_t scaling, model_t* cpgmodel, uint32_t event_start_idx, 37 | uint32_t event_stop_idx, 38 | uint8_t strand, 39 | int8_t event_stride, 40 | uint8_t rc,double events_per_base,uint32_t hmm_flags 41 | ); 42 | void calculate_methylation_for_read(std::map* site_score_map, char* ref, bam1_t* record, int32_t read_length, event_t* event, index_pair_t* base_to_event_map, 43 | scalings_t scaling, model_t* cpgmodel,double events_per_base); 44 | 45 | void emit_event_alignment_tsv(FILE* fp, 46 | uint32_t strand_idx, 47 | const event_table* et, model_t* model, scalings_t scalings, 48 | const std::vector& alignments, 49 | int8_t print_read_names, int8_t scale_events, int8_t write_samples, 50 | int64_t read_index, char* read_name, char *ref_name, float sample_rate); 51 | 52 | void emit_event_alignment_tsv_header(FILE* fp, int8_t print_read_names, int8_t write_samples); 53 | 54 | void emit_sam_header(samFile* fp, const bam_hdr_t* hdr); 55 | 56 | void emit_event_alignment_sam(htsFile* fp, 57 | char* read_name, 58 | bam_hdr_t* base_hdr, 59 | bam1_t* base_record, 60 | const std::vector& alignments 61 | ); 62 | 63 | void realign_read(std::vector* event_alignment_result, EventalignSummary *summary, FILE *summary_fp,char* ref, 64 | const bam_hdr_t* hdr, 65 | const bam1_t* record, int32_t read_length, 66 | size_t read_idx, 67 | int region_start, 68 | int region_end, 69 | event_table* events, model_t* model,index_pair_t* base_to_event_map,scalings_t scaling,double events_per_base, float sample_rate); 70 | 71 | //basically the functions in nanopolish_profile_hmm_r9.* 72 | float profile_hmm_score_r9(const char *m_seq, 73 | const char *m_rc_seq, 74 | event_t* event, 75 | scalings_t scaling, 76 | model_t* cpgmodel, 77 | uint32_t event_start_idx, 78 | uint32_t event_stop_idx, 79 | uint8_t strand, 80 | int8_t event_stride, 81 | uint8_t rc, 82 | double events_per_base, 83 | uint32_t hmm_flags); 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | #ifdef HAVE_CUDA 94 | void align_cuda(core_t* core, db_t* db); 95 | #endif 96 | 97 | // taken from minimap2/misc 98 | static inline double realtime(void) { 99 | struct timeval tp; 100 | struct timezone tzp; 101 | gettimeofday(&tp, &tzp); 102 | return tp.tv_sec + tp.tv_usec * 1e-6; 103 | } 104 | 105 | // taken from minimap2/misc 106 | static inline double cputime(void) { 107 | struct rusage r; 108 | getrusage(RUSAGE_SELF, &r); 109 | return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 110 | 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); 111 | } 112 | 113 | //taken from minimap2 114 | static inline long peakrss(void) 115 | { 116 | struct rusage r; 117 | getrusage(RUSAGE_SELF, &r); 118 | #ifdef __linux__ 119 | return r.ru_maxrss * 1024; 120 | #else 121 | return r.ru_maxrss; 122 | #endif 123 | } 124 | 125 | // Prints to the provided buffer a nice number of bytes (KB, MB, GB, etc) 126 | //from https://www.mbeckler.org/blog/?p=114 127 | static inline void print_size(const char* name, uint64_t bytes) 128 | { 129 | const char* suffixes[7]; 130 | suffixes[0] = "B"; 131 | suffixes[1] = "KB"; 132 | suffixes[2] = "MB"; 133 | suffixes[3] = "GB"; 134 | suffixes[4] = "TB"; 135 | suffixes[5] = "PB"; 136 | suffixes[6] = "EB"; 137 | uint64_t s = 0; // which suffix to use 138 | double count = bytes; 139 | while (count >= 1024 && s < 7) 140 | { 141 | s++; 142 | count /= 1024; 143 | } 144 | if (count - floor(count) == 0.0) 145 | fprintf(stderr, "[%s] %s : %d %s\n", __func__ , name, (int)count, suffixes[s]); 146 | else 147 | fprintf(stderr, "[%s] %s : %.1f %s\n", __func__, name, count, suffixes[s]); 148 | } 149 | 150 | 151 | #endif 152 | -------------------------------------------------------------------------------- /benchmarks/abea/src/logsum.h: -------------------------------------------------------------------------------- 1 | // 2 | // logsum -- a port of Sean Eddy's fast table-driven log sum 3 | // This code was originally part of HMMER. This version is used with 4 | // Sean Eddy's permission as public domain code. 5 | // 6 | 7 | #ifndef LOGSUM_H 8 | #define LOGSUM_H 9 | 10 | /* p7_LOGSUM_SCALE defines the precision of the calculation; the 11 | * default of 1000.0 means rounding differences to the nearest 0.001 12 | * nat. p7_LOGSUM_TBL defines the size of the lookup table; the 13 | * default of 16000 means entries are calculated for differences of 0 14 | * to 16.000 nats (when p7_LOGSUM_SCALE is 1000.0). e^{-p7_LOGSUM_TBL / 15 | * p7_LOGSUM_SCALE} should be on the order of the machine FLT_EPSILON, 16 | * typically 1.2e-7. 17 | */ 18 | #define p7_LOGSUM_TBL 16000 19 | #define p7_LOGSUM_SCALE 1000.f 20 | #define ESL_MAX(a,b) (((a)>(b))?(a):(b)) 21 | #define ESL_MIN(a,b) (((a)<(b))?(a):(b)) 22 | #define eslINFINITY INFINITY 23 | 24 | /* Synopsis: Initialize the p7_Logsum() function. 25 | * 26 | * Purpose: Initialize the lookup table for . 27 | * This function must be called once before any 28 | * call to . 29 | * 30 | * The precision of the lookup table is determined 31 | * by the compile-time constant. 32 | * 33 | * Returns: on success. 34 | */ 35 | static inline int p7_FLogsumInit(void) 36 | { 37 | 38 | // static int firsttime = TRUE; 39 | // if (!firsttime) return 1; 40 | // firsttime = FALSE; 41 | 42 | extern float flogsum_lookup[p7_LOGSUM_TBL]; 43 | int i; 44 | for (i = 0; i < p7_LOGSUM_TBL; i++) { 45 | flogsum_lookup[i] = log(1. + exp((double) -i / p7_LOGSUM_SCALE)); 46 | } 47 | return 1; 48 | } 49 | 50 | /* Function: p7_FLogsum() 51 | * Synopsis: Approximate $\log(e^a + e^b)$. 52 | * 53 | * Purpose: Returns a fast table-driven approximation to 54 | * $\log(e^a + e^b)$. 55 | * 56 | * Either or (or both) may be $-\infty$, 57 | * but neither may be $+\infty$ or . 58 | * 59 | * Note: This function is a critical optimization target, because 60 | * it's in the inner loop of generic Forward() algorithms.*/ 61 | static inline float p7_FLogsum(float a, float b){ 62 | 63 | extern float flogsum_lookup[p7_LOGSUM_TBL]; /* p7_LOGSUM_TBL=16000: (A-B) = 0..16 nats, steps of 0.001 */ 64 | 65 | const float max = ESL_MAX(a, b); 66 | const float min = ESL_MIN(a, b); 67 | 68 | //return (min == -eslINFINITY || (max-min) >= 15.7f) ? max : max + log(1.0 + exp(min-max)); /* SRE: While debugging SSE impl. Remember to remove! */ 69 | 70 | return (min == -eslINFINITY || (max-min) >= 15.7f) ? max : max + flogsum_lookup[(int)((max-min)*p7_LOGSUM_SCALE)]; 71 | } 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /benchmarks/abea/src/main.c: -------------------------------------------------------------------------------- 1 | /* @f5c 2 | ** 3 | ** main 4 | ** @author: Hasindu Gamaarachchi (hasindu@unsw.edu.au) 5 | ** @@ 6 | ******************************************************************************/ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "f5cmisc.h" 14 | #include "error.h" 15 | 16 | #ifdef HAVE_EXECINFO_H 17 | #include 18 | #endif 19 | 20 | //make the segmentation faults a bit cool 21 | void sig_handler(int sig) { 22 | #ifdef HAVE_EXECINFO_H 23 | void* array[100]; 24 | size_t size = backtrace(array, 100); 25 | ERROR("I regret to inform that a segmentation fault occurred. But at least " 26 | "it is better than a wrong answer%s", 27 | "."); 28 | fprintf(stderr, 29 | "[%s::DEBUG]\033[1;35m Here is the backtrace in case it is of any " 30 | "use:\n", 31 | __func__); 32 | backtrace_symbols_fd(&array[2], size - 1, STDERR_FILENO); 33 | fprintf(stderr, "\033[0m\n"); 34 | #else 35 | ERROR("I regret to inform that a segmentation fault occurred. But at least " 36 | "it is better than a wrong answer%s", 37 | "."); 38 | #endif 39 | exit(EXIT_FAILURE); 40 | } 41 | 42 | int meth_main(int argc, char* argv[], int8_t mode); 43 | int index_main(int argc, char** argv); 44 | int freq_main(int argc, char **argv); 45 | int freq_merge_main(int argc, char **argv); 46 | 47 | int print_usage(FILE *fp_help){ 48 | 49 | fprintf(fp_help,"Usage: f5c [options]\n\n"); 50 | fprintf(fp_help,"command:\n"); 51 | fprintf(fp_help," index Build an index mapping from basecalled reads to the signals measured by the sequencer (same as nanopolish index)\n"); 52 | fprintf(fp_help," call-methylation Classify nucleotides as methylated or not (optimised nanopolish call-methylation)\n"); 53 | fprintf(fp_help," meth-freq Calculate methylation frequency at genomic CpG sites (optimised nanopolish calculate_methylation_frequency.py)\n"); 54 | fprintf(fp_help," eventalign Align nanopore events to reference k-mers (optimised nanopolish eventalign)\n"); 55 | fprintf(fp_help," freq-merge Merge calculated methylation frequency tsv files\n\n"); 56 | if(fp_help==stderr){ 57 | exit(EXIT_FAILURE); 58 | } 59 | else if(fp_help==stdout){ 60 | exit(EXIT_SUCCESS); 61 | } 62 | else{ 63 | assert(0); 64 | } 65 | 66 | 67 | } 68 | 69 | 70 | int main(int argc, char* argv[]){ 71 | 72 | double realtime0 = realtime(); 73 | signal(SIGSEGV, sig_handler); 74 | 75 | int ret=1; 76 | 77 | if(argc<2){ 78 | return print_usage(stderr); 79 | } 80 | if(strcmp(argv[1],"index")==0){ 81 | ret=index_main(argc-1, argv+1); 82 | } 83 | else if(strcmp(argv[1],"call-methylation")==0){ 84 | ret=meth_main(argc-1, argv+1,0); 85 | } 86 | else if(strcmp(argv[1],"eventalign")==0){ 87 | ret=meth_main(argc-1, argv+1,1); 88 | } 89 | else if(strcmp(argv[1],"meth-freq")==0){ 90 | ret=freq_main(argc-1, argv+1); 91 | } 92 | else if(strcmp(argv[1],"freq-merge")==0){ 93 | ret=freq_merge_main(argc-1, argv+1); 94 | } 95 | else if(strcmp(argv[1],"--version")==0 || strcmp(argv[1],"-V")==0){ 96 | fprintf(stdout,"F5C %s\n",F5C_VERSION); 97 | exit(EXIT_SUCCESS); 98 | } 99 | else if(strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-h")==0){ 100 | print_usage(stdout); 101 | } 102 | else{ 103 | fprintf(stderr,"[f5c] Unrecognised command %s\n",argv[1]); 104 | print_usage(stderr); 105 | } 106 | 107 | fprintf(stderr,"[%s] Version: %s\n", __func__,F5C_VERSION); 108 | fprintf(stderr, "[%s] CMD:", __func__); 109 | for (int i = 0; i < argc; ++i) { 110 | fprintf(stderr, " %s", argv[i]); 111 | } 112 | 113 | fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU time: %.3f sec; Peak RAM: %.3f GB\n\n", 114 | __func__, realtime() - realtime0, cputime(),peakrss() / 1024.0 / 1024.0 / 1024.0); 115 | 116 | return ret; 117 | } 118 | -------------------------------------------------------------------------------- /benchmarks/abea/src/matrix.h: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------- 2 | // Copyright 2015 Ontario Institute for Cancer Research 3 | // Written by Jared Simpson (jared.simpson@oicr.on.ca) 4 | //--------------------------------------------------------- 5 | // 6 | // nanopolish_matrix -- matrix manipulation functions 7 | // 8 | #ifndef MATRIX_H 9 | #define MATRIX_H 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | // 18 | // Template Matrix for POD types 19 | // 20 | template 21 | struct Matrix 22 | { 23 | T* cells; 24 | uint32_t n_rows; 25 | uint32_t n_cols; 26 | }; 27 | 28 | typedef Matrix DoubleMatrix; 29 | typedef Matrix FloatMatrix; 30 | typedef Matrix UInt32Matrix; 31 | typedef Matrix UInt8Matrix; 32 | 33 | // 34 | template 35 | void allocate_matrix(Matrix& matrix, uint32_t n_rows, uint32_t n_cols) 36 | { 37 | matrix.n_rows = n_rows; 38 | matrix.n_cols = n_cols; 39 | 40 | uint32_t N = matrix.n_rows * matrix.n_cols; 41 | matrix.cells = (T*)malloc(N * sizeof(T)); 42 | memset(matrix.cells, 0, N * sizeof(T)); 43 | } 44 | 45 | // 46 | template 47 | void free_matrix(Matrix& matrix) 48 | { 49 | assert(matrix.cells != NULL); 50 | free(matrix.cells); 51 | matrix.cells = NULL; 52 | } 53 | 54 | // Copy a matrix and its contents 55 | template 56 | void copy_matrix(Matrix& new_matrix, const Matrix& old_matrix) 57 | { 58 | allocate_matrix(new_matrix, old_matrix.n_rows, old_matrix.n_cols); 59 | uint32_t bytes = sizeof(T) * new_matrix.n_rows * new_matrix.n_cols; 60 | memcpy(new_matrix.cells, old_matrix.cells, bytes); 61 | } 62 | 63 | template 64 | inline uint32_t cell(const Matrix& matrix, uint32_t row, uint32_t col) 65 | { 66 | return row * matrix.n_cols + col; 67 | } 68 | 69 | template 70 | inline void set(Matrix& matrix, uint32_t row, uint32_t col, U v) 71 | { 72 | uint32_t c = cell(matrix, row, col); 73 | matrix.cells[c] = v; 74 | } 75 | 76 | template 77 | inline T get(const Matrix& matrix, uint32_t row, uint32_t col) 78 | { 79 | uint32_t c = cell(matrix, row, col); 80 | return matrix.cells[c]; 81 | } 82 | 83 | // 84 | inline void print_matrix(const DoubleMatrix& matrix, bool do_exp = false) 85 | { 86 | for(uint32_t i = 0; i < matrix.n_rows; ++i) { 87 | for(uint32_t j = 0; j < matrix.n_cols; ++j) { 88 | uint32_t c = cell(matrix, i, j); 89 | double v = matrix.cells[c]; 90 | if(do_exp) 91 | v = exp(v); 92 | printf("%.3lf\t", v); 93 | } 94 | printf("\n"); 95 | } 96 | } 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /benchmarks/abea/src/model.c: -------------------------------------------------------------------------------- 1 | 2 | #include "model.h" 3 | #include "f5c.h" 4 | #include "f5cmisc.h" 5 | #include 6 | #include 7 | #include 8 | 9 | //#define DEBUG_MODEL_PRINT 1 10 | 11 | void read_model(model_t* model, const char* file) { 12 | FILE* fp = fopen(file, "r"); 13 | F_CHK(fp, file); 14 | 15 | //these two are discarded from the model. hollow vars 16 | char kmer[10]; 17 | float weight; 18 | 19 | //buffers for geline 20 | char* buffer = 21 | (char*)malloc(sizeof(char) * (100)); //READ+newline+nullcharacter 22 | MALLOC_CHK(buffer); 23 | size_t bufferSize = 100; 24 | ssize_t readlinebytes = 0; 25 | 26 | uint32_t num_k = 0; 27 | uint32_t i = 0; 28 | 29 | while ((readlinebytes = getline(&buffer, &bufferSize, fp)) != -1) { 30 | if (buffer[0] == '#' || 31 | strcmp( 32 | buffer, 33 | "kmer\tlevel_mean\tlevel_stdv\tsd_mean\tsd_stdv\tweight\n") == 34 | 0 || 35 | buffer[0] == '\n' || buffer[0] == '\r') { //comments and header 36 | //todo : (make generic) 37 | //fprintf(stderr, "%s\n", buffer); 38 | continue; 39 | } else { 40 | //as sd_mean and sd_stdv seems not to be used just read to the summy weight 41 | #ifdef LOAD_SD_MEANSSTDV 42 | int32_t ret = 43 | sscanf(buffer, "%s\t%f\t%f\t%f\t%f\t%f", kmer, 44 | &model[num_k].level_mean, &model[num_k].level_stdv, 45 | &model[num_k].sd_mean, &model[num_k].sd_stdv, &weight); 46 | #else 47 | int32_t ret = 48 | sscanf(buffer, "%s\t%f\t%f\t%f\t%f\t%f", kmer, 49 | &model[num_k].level_mean, &model[num_k].level_stdv, 50 | &weight, &weight, &weight); 51 | #endif 52 | #ifdef CACHED_LOG 53 | model[num_k].level_log_stdv=log(model[num_k].level_stdv); 54 | #endif 55 | num_k++; 56 | if (ret != 6) { 57 | ERROR("File %s is corrupted at line %d", file, i); 58 | } 59 | if (num_k > NUM_KMER) { 60 | ERROR("File %s has too many entries. Expected %d kmers in the " 61 | "model, but file had more than that", 62 | file, NUM_KMER); 63 | exit(EXIT_FAILURE); 64 | } 65 | } 66 | i++; 67 | } 68 | 69 | if (num_k != NUM_KMER) { 70 | ERROR("File %s prematurely ended. Expected %d kmers in the model, but " 71 | "file had only%d", 72 | file, NUM_KMER, num_k); 73 | exit(EXIT_FAILURE); 74 | } 75 | 76 | #ifdef DEBUG_MODEL_PRINT 77 | i = 0; 78 | fprintf(stderr, "level_mean\tlevel_stdv\tsd_mean\tsd_stdv\n"); 79 | for (i = 0; i < NUM_KMER; i++) { 80 | fprintf(stderr, "%f\t%f\t%f\t%f\n", model[i].level_mean, 81 | model[i].level_stdv, model[i].sd_mean, model[i].sd_stdv); 82 | } 83 | #endif 84 | 85 | free(buffer); 86 | fclose(fp); 87 | } 88 | 89 | //this function can be made more efficient by setting the address to the global variable 90 | void set_model(model_t* model) { 91 | uint32_t i = 0; 92 | for (i = 0; i < NUM_KMER; i++) { 93 | model[i].level_mean = 94 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 0]; 95 | model[i].level_stdv = 96 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 1]; 97 | #ifdef LOAD_SD_MEANSSTDV 98 | model[i].sd_mean = 99 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 2]; 100 | model[i].sd_stdv = 101 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 3]; 102 | #endif 103 | #ifdef CACHED_LOG 104 | model[i].level_log_stdv=log(model[i].level_stdv); 105 | #endif 106 | } 107 | #ifdef DEBUG_MODEL_PRINT 108 | i = 0; 109 | fprintf(stderr, "level_mean\tlevel_stdv\tsd_mean\tsd_stdv\n"); 110 | for (i = 0; i < NUM_KMER; i++) { 111 | fprintf(stderr, "%f\t%f\t%f\t%f\n", model[i].level_mean, 112 | model[i].level_stdv, model[i].sd_mean, model[i].sd_stdv); 113 | } 114 | #endif 115 | } 116 | 117 | //todo : this function can be made more efficient by setting the address to the global variable 118 | //todo : duplicate function can be removed 119 | void set_cpgmodel(model_t* model) { 120 | uint32_t i = 0; 121 | for (i = 0; i < NUM_KMER_METH; i++) { 122 | model[i].level_mean = 123 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 0]; 124 | model[i].level_stdv = 125 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 1]; 126 | #ifdef LOAD_SD_MEANSSTDV 127 | model[i].sd_mean = 128 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 2]; 129 | model[i].sd_stdv = 130 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 3]; 131 | #endif 132 | #ifdef CACHED_LOG 133 | model[i].level_log_stdv=log(model[i].level_stdv); 134 | #endif 135 | } 136 | #ifdef DEBUG_MODEL_PRINT 137 | i = 0; 138 | fprintf(stderr, "level_mean\tlevel_stdv\tsd_mean\tsd_stdv\n"); 139 | for (i = 0; i < NUM_KMER; i++) { 140 | fprintf(stderr, "%f\t%f\t%f\t%f\n", model[i].level_mean, 141 | model[i].level_stdv, model[i].sd_mean, model[i].sd_stdv); 142 | } 143 | #endif 144 | } 145 | -------------------------------------------------------------------------------- /benchmarks/abea/src/nanopolish_read_db.h: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------- 2 | // Copyright 2017 Ontario Institute for Cancer Research 3 | // Written by Jared Simpson (jared.simpson@oicr.on.ca) 4 | //--------------------------------------------------------- 5 | // 6 | // nanopolish_read_db -- database of reads and their 7 | // associated signal data 8 | // 9 | #ifndef NANOPOLISH_READ_DB 10 | #define NANOPOLISH_READ_DB 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | struct ReadDBData { 18 | // path to the signal-level data for this read 19 | std::string signal_data_path; 20 | }; 21 | 22 | class ReadDB { 23 | public: 24 | ReadDB(); 25 | ~ReadDB(); 26 | 27 | // 28 | // I/O 29 | // 30 | 31 | // construct the database from an input reads file 32 | void build(const std::string& reads_filename); 33 | 34 | // save the database to disk 35 | void save() const; 36 | 37 | // restore the database from disk 38 | void load(const std::string& reads_filename); 39 | 40 | // 41 | // Data Access 42 | // 43 | 44 | // set the signal path for the given read 45 | void add_signal_path(const std::string& read_id, const std::string& path); 46 | 47 | // returns the path to the signal data for the given read 48 | std::string get_signal_path(const std::string& read_id) const; 49 | 50 | // returns true if a read with this ID is in the DB 51 | bool has_read(const std::string& read_id) const; 52 | 53 | // returns the basecalled sequence for the given read 54 | std::string get_read_sequence(const std::string& read_id) const; 55 | 56 | // returns the number of reads in the database 57 | size_t get_num_reads() const { return m_data.size(); } 58 | 59 | // 60 | // Summaries and sanity checks 61 | // 62 | 63 | // return the number of reads with a fast5 file 64 | size_t get_num_reads_with_path() const; 65 | 66 | // returns true if all reads in the database have paths to their 67 | // signal-level data 68 | bool check_signal_paths() const; 69 | 70 | // print some summary stats about the database 71 | void print_stats() const; 72 | 73 | private: 74 | // 75 | void import_reads(const std::string& input_filename, 76 | const std::string& output_fasta_filename); 77 | 78 | // the filename of the indexed data, after converting to fasta 79 | std::string m_indexed_reads_filename; 80 | 81 | // 82 | std::map m_data; 83 | 84 | // 85 | faidx_t* m_fai; 86 | }; 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /benchmarks/abea/src/profiles.h: -------------------------------------------------------------------------------- 1 | #ifndef PROFILES_H 2 | #define PROFILES_H 3 | 4 | #include 5 | 6 | typedef struct{ 7 | float cuda_max_readlen; // max-lf 8 | float cuda_avg_events_per_kmer; // avg-epk 9 | float cuda_max_events_per_kmer; // max-epk 10 | int32_t batch_size; // K 11 | int64_t batch_size_bases; // B 12 | int32_t num_thread; // t 13 | int64_t ultra_thresh; // ultra-thresh 14 | } parameters; 15 | 16 | parameters JetsonTx2 = { 17 | .cuda_max_readlen = 3.0, 18 | .cuda_avg_events_per_kmer = 2.0, 19 | .cuda_max_events_per_kmer = 5.0, 20 | .batch_size = 512, 21 | .batch_size_bases = 2350000, 22 | .num_thread = 6, 23 | .ultra_thresh = 100000 24 | }; 25 | 26 | parameters Nanojet = { 27 | .cuda_max_readlen = 3.0, 28 | .cuda_avg_events_per_kmer = 2.0, 29 | .cuda_max_events_per_kmer = 5.0, 30 | .batch_size = 200, 31 | .batch_size_bases = 1400000, 32 | .num_thread = 4, 33 | .ultra_thresh = 100000 34 | }; 35 | 36 | parameters Xavier = { 37 | .cuda_max_readlen = 3.0, 38 | .cuda_avg_events_per_kmer = 2.0, 39 | .cuda_max_events_per_kmer = 6.25, 40 | .batch_size = 1024, 41 | .batch_size_bases = 4700000, 42 | .num_thread = 8, 43 | .ultra_thresh = 100000 44 | }; 45 | 46 | void set_opt_profile(opt_t *opt, parameters machine); 47 | #endif 48 | -------------------------------------------------------------------------------- /benchmarks/bsw/Makefile: -------------------------------------------------------------------------------- 1 | ##/************************************************************************************* 2 | ## The MIT License 3 | ## 4 | ## BWA-MEM2 (Sequence alignment using Burrows-Wheeler Transform), 5 | ## Copyright (C) 2019 Vasimuddin Md, Sanchit Misra, Intel Corporation, Heng Li. 6 | ## 7 | ## Permission is hereby granted, free of charge, to any person obtaining 8 | ## a copy of this software and associated documentation files (the 9 | ## "Software"), to deal in the Software without restriction, including 10 | ## without limitation the rights to use, copy, modify, merge, publish, 11 | ## distribute, sublicense, and/or sell copies of the Software, and to 12 | ## permit persons to whom the Software is furnished to do so, subject to 13 | ## the following conditions: 14 | ## 15 | ## The above copyright notice and this permission notice shall be 16 | ## included in all copies or substantial portions of the Software. 17 | ## 18 | ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | ## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | ## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | ## NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | ## BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | ## ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | ## CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | ## SOFTWARE. 26 | ## 27 | ##Contacts: Vasimuddin Md ; Sanchit Misra ; 28 | ## Heng Li 29 | ##*****************************************************************************************/ 30 | 31 | 32 | EXE= bsw 33 | CXX= g++ 34 | #CXX= icpc 35 | 36 | ARCH_FLAGS= -msse4.1 37 | 38 | ifeq ($(arch),sse41) 39 | ARCH_FLAGS=-msse4.1 40 | else ifeq ($(arch),avx2) 41 | ifeq ($(CXX), icpc) 42 | ARCH_FLAGS=-march=core-avx2 #-xCORE-AVX2 43 | else 44 | ARCH_FLAGS=-mavx2 45 | endif 46 | else ifeq ($(arch),avx512) 47 | ifeq ($(CXX), icpc) 48 | ARCH_FLAGS=-xCORE-AVX512 49 | else 50 | ARCH_FLAGS=-mavx512bw 51 | endif 52 | else ifeq ($(arch),native) 53 | ARCH_FLAGS=-march=native 54 | else ifneq ($(arch),) 55 | ## To provide a different architecture flag like -march=core-avx2. 56 | ARCH_FLAGS=$(arch) 57 | endif 58 | 59 | CXXFLAGS= -DSORT_PAIRS -DENABLE_PREFETCH -DBWA_OTHER_ELE=0 -O3 -std=c++11 -fopenmp $(ARCH_FLAGS) #-mtune=native -march=native 60 | #VTUNE_HOME= /opt/intel/oneapi/vtune/2021.1.1 61 | INCLUDES= 62 | LIBS= -fopenmp -lz -ldl 63 | 64 | ifneq ($(VTUNE_HOME),) 65 | CXXFLAGS+= -DVTUNE_ANALYSIS=1 66 | INCLUDES+= -I${VTUNE_HOME}/include 67 | LIBS+=-L${VTUNE_HOME}/lib64 -littnotify 68 | endif 69 | 70 | .PHONY:all clean depend 71 | .SUFFIXES:.cpp .o 72 | 73 | .cpp.o: 74 | $(CXX) -c $(CXXFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@ 75 | 76 | all:$(EXE) 77 | 78 | bsw:main_banded.o bandedSWA.o 79 | $(CXX) -o $@ $^ $(LIBS) 80 | 81 | clean: 82 | rm -fr *.o $(EXE) 83 | 84 | 85 | # DO NOT DELETE 86 | main_banded.o: bandedSWA.h macro.h utils.h 87 | bandedSWA.o: bandedSWA.h macro.h utils.h 88 | -------------------------------------------------------------------------------- /benchmarks/bsw/README.md: -------------------------------------------------------------------------------- 1 | `bsw` uses the same license as [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2). 2 | 3 | If you use `bsw`, please cite: 4 | 5 | ``` 6 | @inproceedings{DBLP:conf/ipps/VasimuddinMLA19, 7 | author = {Md. Vasimuddin and 8 | Sanchit Misra and 9 | Heng Li and 10 | Srinivas Aluru}, 11 | title = {Efficient Architecture-Aware Acceleration of {BWA-MEM} for Multicore 12 | Systems}, 13 | booktitle = {2019 {IEEE} International Parallel and Distributed Processing Symposium, 14 | {IPDPS} 2019, Rio de Janeiro, Brazil, May 20-24, 2019}, 15 | pages = {314--324}, 16 | publisher = {{IEEE}}, 17 | year = {2019}, 18 | url = {https://doi.org/10.1109/IPDPS.2019.00041}, 19 | doi = {10.1109/IPDPS.2019.00041}, 20 | timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, 21 | biburl = {https://dblp.org/rec/conf/ipps/VasimuddinMLA19.bib}, 22 | bibsource = {dblp computer science bibliography, https://dblp.org} 23 | } 24 | ``` 25 | 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/bsw/macro.h: -------------------------------------------------------------------------------- 1 | /************************************************************************************* 2 | The MIT License 3 | 4 | BWA-MEM2 (Sequence alignment using Burrows-Wheeler Transform), 5 | Copyright (C) 2019 Intel Corporation, Heng Li. 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining 8 | a copy of this software and associated documentation files (the 9 | "Software"), to deal in the Software without restriction, including 10 | without limitation the rights to use, copy, modify, merge, publish, 11 | distribute, sublicense, and/or sell copies of the Software, and to 12 | permit persons to whom the Software is furnished to do so, subject to 13 | the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | Authors: Vasimuddin Md ; Sanchit Misra 28 | *****************************************************************************************/ 29 | 30 | #ifndef _MACRO_HPP 31 | #define _MACRO_HPP 32 | 33 | #include 34 | 35 | #define VER 0 36 | #define printf_(x,y...) \ 37 | { \ 38 | if(x) \ 39 | fprintf(stderr, y); \ 40 | } 41 | 42 | /* Note: BSW-specific macros are in src/bandedSWA.h file */ 43 | 44 | #define H0_ -99 45 | #define SEEDS_PER_READ 500 /* Avg seeds per read */ 46 | #define MAX_SEEDS_PER_READ 500 /* Max seeds per read */ 47 | #define AVG_SEEDS_PER_READ 64 /* Used for storing seeds in chains*/ 48 | #define BATCH_SIZE 512 /* Block of reads alloacted to a thread for processing*/ 49 | #define BATCH_MUL 20 50 | #define SEEDS_PER_CHAIN 1 51 | 52 | #define READ_LEN 151 53 | 54 | #define SEQ_LEN8 128 // redundant?? 55 | 56 | #define MAX_LINE_LEN 256 57 | #define CACHE_LINE 16 // 16 INT32 58 | #define ALIGN_OFF 1 59 | 60 | #define MAX_THREADS 256 61 | #define LIM_R 128 62 | #define LIM_C 128 63 | 64 | #define SA_COMPRESSION 1 65 | #define SA_COMPX 03 // (= power of 2) 66 | #define SA_COMPX_MASK 0x7 // 0x7 or 0x3 or 0x1 67 | 68 | /*** Runtime profiling macros ***/ 69 | #define INDEX 0 70 | #define MEM 1 71 | #define MEM2 2 72 | #define MEM3 4 73 | #define SAM1 5 74 | #define SAM2 3 75 | #define SAM3 7 76 | #define MPI_TIME 8 77 | #define MEM_PROCESS10 9 78 | #define MEM_PROCESS2 10 79 | #define READ_IO 11 80 | #define PROCESS 12 81 | #define REF_IO 13 82 | #define PREPROCESS 14 83 | #define CONVERT 15 84 | #define MPI_TIME_SUM 16 85 | #define OUTPUT 17 86 | #define MPI_TIME_MIN 18 87 | #define POST_SWA 19 88 | #define MPI_TIME_MAX 20 89 | #define SAM_IO 21 90 | #define ALIGN1 22 91 | 92 | #define KT_FOR 24 93 | #define KTF_WORKER 26 94 | #define WORKER20 28 95 | #define WORKER21 30 96 | #define WORKER10 32 97 | #define WORKER11 34 98 | #define MEM_ALN 36 99 | #define MEM_CHAIN 38 100 | #define MEM_COLLECT 40 101 | #define BWT_REVERSE 41 102 | #define BWA_BUILD 42 103 | #define PACKED 43 104 | #define SA 44 105 | #define BWT_REVERSE_A 45 106 | #define BWT_REVERSE_B 46 107 | #define MEM_SA 47 108 | #define MEM_ALN2 48 109 | #define MEM_ALN2_A 49 110 | #define MEM_ALN2_B 50 111 | #define MEM_ALN2_C 51 112 | #define EXTEND 52 113 | #define FORWARD 53 114 | #define MEM_CHAIN1 54 115 | #define MEM_CHAIN2 55 116 | #define SMEM1 56 117 | #define SMEM2 57 118 | #define SMEM3 58 119 | #define BWT_FORWARD_A 59 120 | #define STR 60 121 | #define MISC 61 122 | #define MEM_ALN2_UP 62 123 | #define BWT_FORWARD_B 63 124 | #define CLEFT 64 125 | #define CRIGHT 65 126 | #define MEM_ALN_M1 66 127 | #define MEM_ALN_M2 67 128 | #define MEM_SA_BLOCK 68 129 | #define SEQ_FETCH 69 130 | #define MEM_ALN2_PRE 70 131 | #define QLEN 71 132 | #define TLEN 72 133 | #define CNT 73 134 | #define WAVG 74 135 | #define WCNT 75 136 | #define WMAX 76 137 | #define WMIN 77 138 | #define KSW 78 139 | #define PE 79 140 | #define PESW 80 141 | #define PESORT 81 142 | #define INTROSORT 82 143 | #define PE1 83 144 | #define PE3 84 145 | #define PE2 85 146 | #define PE4 86 147 | #define PE5 87 148 | #define PE6 88 149 | #define PE7 89 150 | #define PE8 90 151 | #define PE11 91 152 | #define PE12 92 153 | #define PE13 93 154 | #define PE14 94 155 | #define PE15 95 156 | #define PE16 96 157 | #define PE17 97 158 | #define PE18 98 159 | #define PE19 99 160 | #define PE20 100 161 | #define PE21 101 162 | #define PE22 102 163 | #define PE23 103 164 | #define MEM_ALN2_DOWN 104 165 | #define MEM_ALN2_DOWN1 105 166 | #define SORT 106 167 | #define FMI 107 168 | #define MEM_ALN2_D 108 169 | #define MEM_ALN2_E 109 170 | #define PE24 110 171 | #define PE25 111 172 | #define PE26 112 173 | 174 | 175 | #endif 176 | -------------------------------------------------------------------------------- /benchmarks/bsw/utils.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008 Genome Research Ltd (GRL). 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Contact: Heng Li */ 27 | 28 | #ifndef LH3_UTILS_H 29 | #define LH3_UTILS_H 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #ifdef __GNUC__ 36 | // Tell GCC to validate printf format string and args 37 | #define ATTRIBUTE(list) __attribute__ (list) 38 | #else 39 | #define ATTRIBUTE(list) 40 | #endif 41 | 42 | #define err_fatal_simple(msg) _err_fatal_simple(__func__, msg) 43 | #define err_fatal_simple_core(msg) _err_fatal_simple_core(__func__, msg) 44 | 45 | #define xopen(fn, mode) err_xopen_core(__func__, fn, mode) 46 | #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp) 47 | #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode) 48 | 49 | #define xassert(cond, msg) if ((cond) == 0) _err_fatal_simple_core(__func__, msg) 50 | 51 | #if defined(__GNUC__) && __GNUC__ < 11 && !defined(__clang__) 52 | #if defined(__i386__) 53 | static inline unsigned long long __rdtsc(void) 54 | { 55 | unsigned long long int x; 56 | __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); 57 | return x; 58 | } 59 | #elif defined(__x86_64__) 60 | static inline unsigned long long __rdtsc(void) 61 | { 62 | unsigned hi, lo; 63 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 64 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 65 | } 66 | #endif 67 | #endif 68 | 69 | typedef struct { 70 | uint64_t x, y; 71 | } pair64_t; 72 | 73 | typedef struct { size_t n, m; uint64_t *a; } uint64_v; 74 | typedef struct { size_t n, m; pair64_t *a; } pair64_v; 75 | 76 | #ifdef __cplusplus 77 | extern "C" { 78 | #endif 79 | 80 | void err_fatal(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); 81 | void err_fatal_core(const char *header, const char *fmt, ...) ATTRIBUTE((noreturn)); 82 | void _err_fatal_simple(const char *func, const char *msg) ATTRIBUTE((noreturn)); 83 | void _err_fatal_simple_core(const char *func, const char *msg) ATTRIBUTE((noreturn)); 84 | FILE *err_xopen_core(const char *func, const char *fn, const char *mode); 85 | FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp); 86 | gzFile err_xzopen_core(const char *func, const char *fn, const char *mode); 87 | size_t err_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); 88 | size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream); 89 | 90 | int err_gzread(gzFile file, void *ptr, unsigned int len); 91 | int err_fseek(FILE *stream, long offset, int whence); 92 | #define err_rewind(FP) err_fseek((FP), 0, SEEK_SET) 93 | long err_ftell(FILE *stream); 94 | int err_fprintf(FILE *stream, const char *format, ...) 95 | ATTRIBUTE((format(printf, 2, 3))); 96 | int err_printf(const char *format, ...) 97 | ATTRIBUTE((format(printf, 1, 2))); 98 | int err_fputc(int c, FILE *stream); 99 | #define err_putchar(C) err_fputc((C), stdout) 100 | char* err_fgets(char *str, int size, FILE *stream); 101 | int err_fputs(const char *s, FILE *stream); 102 | int err_puts(const char *s); 103 | int err_fflush(FILE *stream); 104 | int err_fclose(FILE *stream); 105 | int err_gzclose(gzFile file); 106 | 107 | double cputime(); 108 | double realtime(); 109 | 110 | void ks_introsort_64 (size_t n, uint64_t *a); 111 | void ks_introsort_128(size_t n, pair64_t *a); 112 | 113 | #ifdef __cplusplus 114 | } 115 | #endif 116 | 117 | static inline uint64_t hash_64(uint64_t key) 118 | { 119 | key += ~(key << 32); 120 | key ^= (key >> 22); 121 | key += ~(key << 13); 122 | key ^= (key >> 8); 123 | key += (key << 3); 124 | key ^= (key >> 15); 125 | key += ~(key << 27); 126 | key ^= (key >> 31); 127 | return key; 128 | } 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /benchmarks/chain/Makefile: -------------------------------------------------------------------------------- 1 | #CXX = icpc 2 | CXX = g++ 3 | 4 | # path # 5 | SRC_PATH = src 6 | BUILD_PATH = build 7 | BIN_PATH = $(BUILD_PATH)/bin 8 | 9 | # executable # 10 | BIN_NAME = chain 11 | 12 | # extensions # 13 | SRC_EXT = cpp 14 | 15 | # code lists # 16 | # Find all source files in the source directory, sorted by 17 | # most recently modified 18 | SOURCES = $(shell find $(SRC_PATH) -name '*.$(SRC_EXT)' | sort -k 1nr | cut -f2-) 19 | # Set the object file names, with the source directory stripped 20 | # from the path, and the build path prepended in its place 21 | OBJECTS = $(SOURCES:$(SRC_PATH)/%.$(SRC_EXT)=$(BUILD_PATH)/%.o) 22 | # Set the dependency files that will be used to add header dependencies 23 | DEPS = $(OBJECTS:.o=.d) 24 | 25 | 26 | ARCH_FLAGS= -msse4.1 27 | 28 | ifeq ($(arch),sse41) 29 | ARCH_FLAGS=-msse4.1 30 | else ifeq ($(arch),avx2) 31 | ifeq ($(CXX), icpc) 32 | ARCH_FLAGS=-march=core-avx2 #-xCORE-AVX2 33 | else 34 | ARCH_FLAGS=-mavx2 35 | endif 36 | else ifeq ($(arch),avx512) 37 | ifeq ($(CXX), icpc) 38 | ARCH_FLAGS=-xCORE-AVX512 39 | else 40 | ARCH_FLAGS=-mavx512bw 41 | endif 42 | else ifeq ($(arch),native) 43 | ARCH_FLAGS=-march=native 44 | else ifneq ($(arch),) 45 | ## To provide a different architecture flag like -march=core-avx2. 46 | ARCH_FLAGS=$(arch) 47 | endif 48 | 49 | # flags # 50 | #COMPILE_FLAGS = -std=c++11 -Wall -Wextra -g -O3 -fopenmp -xAVX2 -axAVX2 51 | #VTUNE_HOME= /opt/intel/vtune_profiler 52 | COMPILE_FLAGS = -std=c++11 -Wall -Wextra -g -O3 -fopenmp $(ARCH_FLAGS) 53 | INCLUDES = -I../../tools/minimap2 54 | # Space-separated pkg-config libraries used by this project 55 | LIBS = -L../../tools/minimap2 -lminimap2 -ldl 56 | 57 | 58 | ifneq ($(VTUNE_HOME),) 59 | COMPILE_FLAGS += -DVTUNE_ANALYSIS=1 60 | INCLUDES += -I${VTUNE_HOME}/include 61 | LIBS += -L${VTUNE_HOME}/lib64 -littnotify 62 | endif 63 | 64 | 65 | .PHONY: default_target 66 | default_target: release 67 | 68 | .PHONY: release 69 | release: export CXXFLAGS := $(CXXFLAGS) $(COMPILE_FLAGS) 70 | release: dirs 71 | @$(MAKE) all 72 | 73 | .PHONY: dirs 74 | dirs: 75 | @echo "Creating directories" 76 | @mkdir -p $(dir $(OBJECTS)) 77 | @mkdir -p $(BIN_PATH) 78 | 79 | .PHONY: clean 80 | clean: 81 | @echo "Deleting $(BIN_NAME) symlink" 82 | @$(RM) $(BIN_NAME) 83 | @echo "Deleting directories" 84 | @$(RM) -r $(BUILD_PATH) 85 | @$(RM) -r $(BIN_PATH) 86 | 87 | # checks the executable and symlinks to the output 88 | .PHONY: all 89 | all: $(BIN_PATH)/$(BIN_NAME) 90 | @echo "Making symlink: $(BIN_NAME) -> $<" 91 | @$(RM) $(BIN_NAME) 92 | @ln -s $(BIN_PATH)/$(BIN_NAME) $(BIN_NAME) 93 | 94 | # Creation of the executable 95 | $(BIN_PATH)/$(BIN_NAME): $(OBJECTS) 96 | @echo "Linking: $@" 97 | #$(CXX) -O3 -fopenmp -xAVX2 -axAVX2 $(OBJECTS) -o $@ 98 | $(CXX) -O3 -fopenmp $(ARCH_FLAGS) $(OBJECTS) $(INCLUDES) $(LIBS) -o $@ 99 | 100 | # Add dependency files, if they exist 101 | -include $(DEPS) 102 | 103 | # Source file rules 104 | # After the first compilation they will be joined with the rules from the 105 | # dependency files to provide header dependencies 106 | $(BUILD_PATH)/%.o: $(SRC_PATH)/%.$(SRC_EXT) 107 | @echo "Compiling: $< -> $@" 108 | $(CXX) $(CXXFLAGS) $(INCLUDES) $(LIBS) -MP -MMD -c $< -o $@ 109 | -------------------------------------------------------------------------------- /benchmarks/chain/README.md: -------------------------------------------------------------------------------- 1 | `chain` uses the same license as [Minimap2](https://github.com/lh3/minimap2). 2 | 3 | If you find `chain` useful, please cite: 4 | 5 | ``` 6 | @article{li2018minimap2, 7 | title={Minimap2: pairwise alignment for nucleotide sequences}, 8 | author={Li, Heng}, 9 | journal={Bioinformatics}, 10 | volume={34}, 11 | number={18}, 12 | pages={3094--3100}, 13 | year={2018}, 14 | publisher={Oxford University Press} 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /benchmarks/chain/src/common.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | const score_t NEG_INF_SCORE = -0x3FFFFFFF; 4 | 5 | -------------------------------------------------------------------------------- /benchmarks/chain/src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | #include "host_data.h" 6 | 7 | #define BACK_SEARCH_COUNT 65 8 | extern const score_t NEG_INF_SCORE; 9 | 10 | 11 | #endif // COMMON_H 12 | -------------------------------------------------------------------------------- /benchmarks/chain/src/host_data.h: -------------------------------------------------------------------------------- 1 | #ifndef HOST_INPUT_H 2 | #define HOST_INPUT_H 3 | 4 | #include 5 | #include 6 | 7 | typedef int64_t anchor_idx_t; 8 | typedef uint32_t tag_t; 9 | typedef int32_t loc_t; 10 | typedef int32_t loc_dist_t; 11 | typedef int32_t score_t; 12 | typedef int32_t parent_t; 13 | typedef int32_t target_t; 14 | typedef int32_t peak_score_t; 15 | 16 | #define ANCHOR_NULL (anchor_idx_t)(-1) 17 | 18 | struct anchor_t { 19 | uint64_t x; 20 | uint64_t y; 21 | }; 22 | 23 | struct call_t { 24 | anchor_idx_t n; 25 | float avg_qspan; 26 | int max_dist_x, max_dist_y, bw, n_segs; 27 | std::vector anchors; 28 | }; 29 | 30 | struct return_t { 31 | anchor_idx_t n; 32 | std::vector scores; 33 | std::vector parents; 34 | std::vector targets; 35 | std::vector peak_scores; 36 | }; 37 | 38 | #endif // HOST_INPUT_H 39 | -------------------------------------------------------------------------------- /benchmarks/chain/src/host_data_io.cpp: -------------------------------------------------------------------------------- 1 | #include "host_data_io.h" 2 | #include "host_data.h" 3 | 4 | void skip_to_EOR(FILE *fp) { 5 | const char *loc = "EOR"; 6 | while (*loc != '\0') { 7 | if (fgetc(fp) == *loc) { 8 | loc++; 9 | } 10 | } 11 | } 12 | 13 | call_t read_call(FILE *fp) { 14 | call_t call; 15 | 16 | long long n; 17 | float avg_qspan; 18 | int max_dist_x, max_dist_y, bw, n_segs; 19 | 20 | int t = fscanf(fp, "%lld%f%d%d%d%d", 21 | &n, &avg_qspan, &max_dist_x, &max_dist_y, &bw, &n_segs); 22 | // fprintf(stderr, "read %d arguments\n", t); 23 | if (t != 6) { 24 | call.n = ANCHOR_NULL; 25 | call.avg_qspan = .0; 26 | return call; 27 | } 28 | 29 | call.n = n; 30 | call.avg_qspan = avg_qspan; 31 | call.max_dist_x = max_dist_x; 32 | call.max_dist_y = max_dist_y; 33 | call.bw = bw; 34 | call.n_segs = n_segs; 35 | // fprintf(stderr, "%lld\t%f\t%d\t%d\t%d\t%d\n", n, avg_qspan, max_dist_x, max_dist_y, bw, n_segs); 36 | 37 | call.anchors.resize(call.n); 38 | 39 | for (anchor_idx_t i = 0; i < call.n; i++) { 40 | uint64_t x, y; 41 | fscanf(fp, "%llu%llu", &x, &y); 42 | 43 | anchor_t t; 44 | t.x = x; t.y = y; 45 | 46 | call.anchors[i] = t; 47 | } 48 | 49 | skip_to_EOR(fp); 50 | return call; 51 | } 52 | 53 | void print_return(FILE *fp, const return_t &data) 54 | { 55 | fprintf(fp, "%lld\n", (long long)data.n); 56 | for (anchor_idx_t i = 0; i < data.n; i++) { 57 | fprintf(fp, "%d\t%d\n", (int)data.scores[i], (int)data.parents[i]); 58 | } 59 | fprintf(fp, "EOR\n"); 60 | } 61 | -------------------------------------------------------------------------------- /benchmarks/chain/src/host_data_io.h: -------------------------------------------------------------------------------- 1 | #ifndef HOST_KERNEL_IO_H 2 | #define HOST_KERNEL_IO_H 3 | 4 | #include 5 | #include "host_data.h" 6 | 7 | call_t read_call(FILE *fp); 8 | void print_return(FILE *fp, const return_t &data); 9 | 10 | #endif // HOST_KERNEL_IO_H 11 | -------------------------------------------------------------------------------- /benchmarks/chain/src/host_kernel.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "omp.h" 6 | #include "host_kernel.h" 7 | #include "common.h" 8 | #include "minimap.h" 9 | #include "mmpriv.h" 10 | #include "kalloc.h" 11 | 12 | static const char LogTable256[256] = { 13 | #define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n 14 | -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 15 | LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6), 16 | LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7) 17 | }; 18 | 19 | static inline int ilog2_32(uint32_t v) 20 | { 21 | uint32_t t, tt; 22 | if ((tt = v>>16)) return (t = tt>>8) ? 24 + LogTable256[t] : 16 + LogTable256[tt]; 23 | return (t = v>>8) ? 8 + LogTable256[t] : LogTable256[v]; 24 | } 25 | 26 | const int BACKSEARCH = 65; 27 | #define MM_SEED_SEG_SHIFT 48 28 | #define MM_SEED_SEG_MASK (0xffULL<<(MM_SEED_SEG_SHIFT)) 29 | 30 | void chain_dp(call_t* a, return_t* ret) 31 | { 32 | 33 | // TODO: make sure this works when n has more than 32 bits 34 | int64_t i, j, st = 0; 35 | int is_cdna = 0; 36 | const float gap_scale = 1.0f; 37 | const int max_iter = 5000; 38 | const int max_skip = 25; 39 | int max_dist_x = a->max_dist_x, max_dist_y = a->max_dist_y, bw = a->bw; 40 | float avg_qspan = a->avg_qspan; 41 | int n_segs = a->n_segs; 42 | int64_t n = a->n; 43 | ret->n = n; 44 | ret->scores.resize(n); 45 | ret->parents.resize(n); 46 | ret->targets.resize(n); 47 | ret->peak_scores.resize(n); 48 | 49 | // fill the score and backtrack arrays 50 | for (i = 0; i < n; ++i) { 51 | uint64_t ri = a->anchors[i].x; 52 | int64_t max_j = -1; 53 | int32_t qi = (int32_t)a->anchors[i].y, q_span = a->anchors[i].y>>32&0xff; // NB: only 8 bits of span is used!!! 54 | int32_t max_f = q_span, n_skip = 0, min_d; 55 | int32_t sidi = (a->anchors[i].y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT; 56 | while (st < i && ri > a->anchors[st].x + max_dist_x) ++st; 57 | if (i - st > max_iter) st = i - max_iter; 58 | for (j = i - 1; j >= st; --j) { 59 | int64_t dr = ri - a->anchors[j].x; 60 | int32_t dq = qi - (int32_t)a->anchors[j].y, dd, sc, log_dd, gap_cost; 61 | int32_t sidj = (a->anchors[j].y & MM_SEED_SEG_MASK) >> MM_SEED_SEG_SHIFT; 62 | if ((sidi == sidj && dr == 0) || dq <= 0) continue; // don't skip if an anchor is used by multiple segments; see below 63 | if ((sidi == sidj && dq > max_dist_y) || dq > max_dist_x) continue; 64 | dd = dr > dq? dr - dq : dq - dr; 65 | if (sidi == sidj && dd > bw) continue; 66 | if (n_segs > 1 && !is_cdna && sidi == sidj && dr > max_dist_y) continue; 67 | min_d = dq < dr? dq : dr; 68 | sc = min_d > q_span? q_span : dq < dr? dq : dr; 69 | log_dd = dd? ilog2_32(dd) : 0; 70 | gap_cost = 0; 71 | if (is_cdna || sidi != sidj) { 72 | int c_log, c_lin; 73 | c_lin = (int)(dd * .01 * avg_qspan); 74 | c_log = log_dd; 75 | if (sidi != sidj && dr == 0) ++sc; // possibly due to overlapping paired ends; give a minor bonus 76 | else if (dr > dq || sidi != sidj) gap_cost = c_lin < c_log? c_lin : c_log; 77 | else gap_cost = c_lin + (c_log>>1); 78 | } else gap_cost = (int)(dd * .01 * avg_qspan) + (log_dd>>1); 79 | sc -= (int)((double)gap_cost * gap_scale + .499); 80 | sc += ret->scores[j]; 81 | if (sc > max_f) { 82 | max_f = sc, max_j = j; 83 | if (n_skip > 0) --n_skip; 84 | } else if (ret->targets[j] == i) { 85 | if (++n_skip > max_skip) { 86 | break; 87 | } 88 | } 89 | if (ret->parents[j] >= 0) ret->targets[ret->parents[j]] = i; 90 | } 91 | ret->scores[i] = max_f, ret->parents[i] = max_j; 92 | ret->peak_scores[i] = max_j >= 0 && ret->peak_scores[max_j] > max_f ? ret->peak_scores[max_j] : max_f; 93 | } 94 | } 95 | 96 | void host_chain_kernel(std::vector &args, std::vector &rets, int numThreads) 97 | { 98 | #pragma omp parallel num_threads(numThreads) 99 | { 100 | #pragma omp for schedule(dynamic) 101 | for (size_t batch = 0; batch < args.size(); batch++) { 102 | call_t* arg = &args[batch]; 103 | return_t* ret = &rets[batch]; 104 | // fprintf(stderr, "%lld\t%f\t%d\t%d\t%d\t%d\n", arg->n, arg->avg_qspan, arg->max_dist_x, arg->max_dist_y, arg->bw, arg->n_segs); 105 | chain_dp(arg, ret); 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /benchmarks/chain/src/host_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef HOST_KERNEL_H 2 | #define HOST_KERNEL_H 3 | 4 | #include "host_data.h" 5 | 6 | void host_chain_kernel(std::vector &arg, std::vector &ret, int numThreads); 7 | 8 | #endif // HOST_KERNEL_H 9 | -------------------------------------------------------------------------------- /benchmarks/chain/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "omp.h" 9 | #include "host_data_io.h" 10 | #include "host_data.h" 11 | #include "host_kernel.h" 12 | 13 | // #define PRINT_OUTPUT 1 14 | 15 | // #define VTUNE_ANALYSIS 1 16 | 17 | #ifdef VTUNE_ANALYSIS 18 | #include 19 | #endif 20 | 21 | void help() { 22 | std::cout << 23 | "\n" 24 | "usage: ./chain [options ...]\n" 25 | "\n" 26 | " options:\n" 27 | " -i \n" 28 | " default: NULL\n" 29 | " the input anchor set\n" 30 | " -o \n" 31 | " default: NULL\n" 32 | " the output scores, best predecessor set\n" 33 | " -t \n" 34 | " default: 1\n" 35 | " number of CPU threads\n" 36 | " -h \n" 37 | " prints the usage\n"; 38 | } 39 | 40 | 41 | int main(int argc, char **argv) { 42 | #ifdef VTUNE_ANALYSIS 43 | __itt_pause(); 44 | #endif 45 | FILE *in, *out; 46 | std::string inputFileName, outputFileName; 47 | 48 | int opt, numThreads = 1; 49 | while ((opt = getopt(argc, argv, ":i:o:t:h")) != -1) { 50 | switch (opt) { 51 | case 'i': inputFileName = optarg; break; 52 | case 'o': outputFileName = optarg; break; 53 | case 't': numThreads = atoi(optarg); break; 54 | case 'h': help(); return 0; 55 | default: help(); return 1; 56 | } 57 | } 58 | 59 | if (argc == 1 || argc != optind) { 60 | help(); 61 | exit(EXIT_FAILURE); 62 | } 63 | 64 | fprintf(stderr, "Input file: %s\n", inputFileName.c_str()); 65 | fprintf(stderr, "Output file: %s\n", outputFileName.c_str()); 66 | 67 | in = fopen(inputFileName.c_str(), "r"); 68 | out = fopen(outputFileName.c_str(), "w"); 69 | 70 | std::vector calls; 71 | std::vector rets; 72 | 73 | for (call_t call = read_call(in); 74 | call.n != ANCHOR_NULL; 75 | call = read_call(in)) { 76 | calls.push_back(call); 77 | } 78 | 79 | rets.resize(calls.size()); 80 | 81 | #pragma omp parallel num_threads(numThreads) 82 | { 83 | int tid = omp_get_thread_num(); 84 | if (tid == 0) { 85 | fprintf(stderr, "Running with threads: %d\n", numThreads); 86 | } 87 | } 88 | 89 | struct timeval start_time, end_time; 90 | double runtime = 0; 91 | 92 | gettimeofday(&start_time, NULL); 93 | #ifdef VTUNE_ANALYSIS 94 | __itt_resume(); 95 | #endif 96 | host_chain_kernel(calls, rets, numThreads); 97 | #ifdef VTUNE_ANALYSIS 98 | __itt_pause(); 99 | #endif 100 | gettimeofday(&end_time, NULL); 101 | 102 | runtime += (end_time.tv_sec - start_time.tv_sec) * 1e6 + (end_time.tv_usec - start_time.tv_usec); 103 | 104 | #ifdef PRINT_OUTPUT 105 | for (auto it = rets.begin(); it != rets.end(); it++) { 106 | print_return(out, *it); 107 | } 108 | #endif 109 | 110 | fprintf(stderr, "Time in kernel: %.2f sec\n", runtime * 1e-6); 111 | 112 | fclose(in); 113 | fclose(out); 114 | 115 | return 0; 116 | } 117 | -------------------------------------------------------------------------------- /benchmarks/dbg/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | #CXX = icpc 3 | #VTUNE_HOME= /opt/intel/vtune_profiler 4 | LIBS = -L../../tools/htslib -lcurl -lcrypto -lz -lm -lbz2 -llzma -lpthread -ldl -fopenmp 5 | INC = -I../../tools/htslib 6 | CXXFLAGS = -g -Wall -O2 -fopenmp 7 | 8 | ifneq ($(VTUNE_HOME),) 9 | CXXFLAGS+= -DVTUNE_ANALYSIS=1 10 | INC+= -I${VTUNE_HOME}/include 11 | LIBS+=-L${VTUNE_HOME}/lib64 -littnotify 12 | endif 13 | 14 | all: debruijn.cpp common.cpp common.h 15 | $(CXX) debruijn.cpp common.cpp ../../tools/htslib/libhts.a $(CXXFLAGS) $(INC) $(LIBS) -o dbg 16 | 17 | clean: 18 | rm -f dbg 19 | -------------------------------------------------------------------------------- /benchmarks/dbg/README.md: -------------------------------------------------------------------------------- 1 | `dbg` uses the same license as [Platypus](https://github.com/andyrimmer/Platypus). 2 | 3 | If you find `dbg` useful, please cite: 4 | 5 | ``` 6 | @article{rimmer2014integrating, 7 | title={Integrating mapping-, assembly-and haplotype-based approaches for calling variants in clinical sequencing applications}, 8 | author={Rimmer, Andy and Phan, Hang and Mathieson, Iain and Iqbal, Zamin and Twigg, Stephen RF and Wilkie, Andrew OM and McVean, Gil and Lunter, Gerton}, 9 | journal={Nature genetics}, 10 | volume={46}, 11 | number={8}, 12 | pages={912--918}, 13 | year={2014}, 14 | publisher={Nature Publishing Group} 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /benchmarks/dbg/common.h: -------------------------------------------------------------------------------- 1 | /*********************** Some value definitions *********************/ 2 | 3 | //We do some static memory allocations. Some definitions for maximum sizes. Increase them if they are not adequate 4 | #define MAX_READNAME_LEN 100 //The maximun size of a read name (qname size in bytes +1 ) 5 | #define MAX_READ_LEN 151 //maximum size of a read (number of bases +1) 6 | #define MAX_N_CIGAR 16 //no idea what this number of CIGAR ops mean at the moment 7 | #define MAX_READS_IN_REGION 5000000 // Maximum number of reads per BAM region 8 | 9 | /*********************** Some error checks *********************/ 10 | /*Die on error. Print the error and exit if the return value of the previous function NULL*/ 11 | #define errorCheckNULL(ret) ({\ 12 | if (ret==NULL){ \ 13 | fprintf(stderr,"Error at File %s line number %d : %s\n",__FILE__, __LINE__,strerror(errno));\ 14 | exit(EXIT_FAILURE);\ 15 | }\ 16 | }) 17 | 18 | /*Die on error. Print the error and exit if the return value of the previous function is -1*/ 19 | #define errorCheck(ret) ({\ 20 | if (ret<0){ \ 21 | fprintf(stderr,"Error at File %s line number %d : %s\n",__FILE__, __LINE__,strerror(errno));\ 22 | exit(EXIT_FAILURE);\ 23 | }\ 24 | }) 25 | 26 | 27 | /**************************** The data structure that stores a read ******************/ 28 | 29 | struct alignedRead { 30 | 31 | char qname[MAX_READNAME_LEN]; //Query template NAME (the name of the read) 32 | uint32_t flag; //bitwise FLAG 33 | int32_t chromID; //References sequence ID (Note that this is not the chromosome name. The chromosome name that maps to this ID must be found through the BAM header, See printRead function in common.c to see how to do this) 34 | uint32_t pos; //0-based leftmost mapping POSition (note that this is not 1-based coordinates) 35 | uint8_t mapq; //MAPping Quality 36 | uint32_t cigarOps[2*MAX_N_CIGAR]; //CIGAR ops 37 | uint32_t mateChromID; //Reference ID of the mate/next read (Note that this is not the chromosome name. The chromosome name that maps to this ID must be found through the BAM header, See printRead function in common.c to see how to do this) 38 | uint32_t matePos; //Position of the mate/next read (0-based) 39 | uint32_t tlen; //observed Template LENgth (I just later set this to 0) 40 | char seq[MAX_READ_LEN]; //segment SEQuence (the actual read sequence) 41 | uint8_t qual[MAX_READ_LEN]; //quality string 42 | 43 | //some other dtuff 44 | uint32_t cigarLen; 45 | uint32_t rlen; //Length of SEQuence 46 | uint32_t end; 47 | uint32_t insertSize; 48 | 49 | //need to get some other fields in the BAM 50 | 51 | }; 52 | 53 | 54 | 55 | struct ReadArray{ 56 | struct alignedRead* array; 57 | struct alignedRead* windowStart; 58 | struct alignedRead* windowEnd; 59 | int __size; 60 | int __capacity; 61 | int __longestRead; 62 | }; 63 | 64 | 65 | struct bamReadBuffer{ 66 | // char* chrom; 67 | // int chromID; 68 | // int* filteredReadCountsByType; 69 | // int isSorted; 70 | // int startBase; 71 | // int endBase; 72 | // int windowStartBase; 73 | // int windowEndBase; 74 | // int maxReads; 75 | // int minMapQual; 76 | // int minBaseQual; 77 | // int minFlank; 78 | // int trimReadFlank; 79 | // int verbosity; 80 | // int minGoodBases; 81 | // int trimOverlapping; 82 | // int trimAdapter; 83 | // int trimSoftClipped; 84 | // struct alignedRead* lastRead; 85 | // char sample; 86 | struct ReadArray reads; 87 | struct ReadArray badReads; 88 | struct ReadArray brokenMates; 89 | 90 | }; 91 | 92 | 93 | /* The function that gets a read to the alignedRead structure 94 | Arguments : The destination structure for the read to be stored, and the bam1_t pointer from htslib (See sequentialaccess.c for example usage) 95 | Return value : The same pointer of the input argument theRead */ 96 | struct alignedRead* getRead(struct alignedRead* theRead, bam1_t *b/*, int storeRgID, char** rgID*/); 97 | 98 | 99 | /*A function that prints a read to the stdout. First call getRead function and then this*/ 100 | void printRead(struct alignedRead* theRead, bam_hdr_t *header); 101 | 102 | void setWindowPointers(struct ReadArray* array, int start, int end); 103 | int bisectReadsLeft(struct alignedRead* reads, int testPos, int nReads, int testMatePos = 0); 104 | 105 | #ifndef BAM_FQCFAIL 106 | #define BAM_FQCFAIL = 512 // QC failure 107 | #endif 108 | 109 | //an internally used function 110 | //Is this efficient? Should try optimising 111 | inline char _getBase(uint8_t *s, int i); 112 | inline int Read_IsQCFail(struct alignedRead* theRead); 113 | 114 | typedef struct { 115 | struct alignedRead* windowStart; 116 | struct alignedRead* windowEnd; 117 | char* ref; 118 | int offset; 119 | } Batch; -------------------------------------------------------------------------------- /benchmarks/fmi/Makefile: -------------------------------------------------------------------------------- 1 | ##/************************************************************************************* 2 | ## The MIT License 3 | ## 4 | ## BWA-MEM2 (Sequence alignment using Burrows-Wheeler Transform), 5 | ## Copyright (C) 2019 Vasimuddin Md, Sanchit Misra, Intel Corporation, Heng Li. 6 | ## 7 | ## Permission is hereby granted, free of charge, to any person obtaining 8 | ## a copy of this software and associated documentation files (the 9 | ## "Software"), to deal in the Software without restriction, including 10 | ## without limitation the rights to use, copy, modify, merge, publish, 11 | ## distribute, sublicense, and/or sell copies of the Software, and to 12 | ## permit persons to whom the Software is furnished to do so, subject to 13 | ## the following conditions: 14 | ## 15 | ## The above copyright notice and this permission notice shall be 16 | ## included in all copies or substantial portions of the Software. 17 | ## 18 | ## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | ## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | ## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | ## NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | ## BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 23 | ## ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | ## CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | ## SOFTWARE. 26 | ## 27 | ##Contacts: Vasimuddin Md ; Sanchit Misra ; 28 | ## Heng Li 29 | ##*****************************************************************************************/ 30 | 31 | 32 | EXE= fmi 33 | #CXX= icpx 34 | CXX= g++ 35 | #VTUNE_HOME= /opt/intel/oneapi/vtune/2021.1.1 36 | ARCH_FLAGS= -msse4.1 37 | 38 | ifeq ($(arch),sse41) 39 | ARCH_FLAGS=-msse4.1 40 | else ifeq ($(arch),avx2) 41 | ifeq ($(CXX), icpc) 42 | ARCH_FLAGS=-march=core-avx2 #-xCORE-AVX2 43 | else 44 | ARCH_FLAGS=-mavx2 45 | endif 46 | else ifeq ($(arch),avx512) 47 | ifeq ($(CXX), icpc) 48 | ARCH_FLAGS=-xCORE-AVX512 49 | else 50 | ARCH_FLAGS=-mavx512bw 51 | endif 52 | else ifeq ($(arch),native) 53 | ARCH_FLAGS=-march=native 54 | else ifneq ($(arch),) 55 | ## To provide a different architecture flag like -march=core-avx2. 56 | ARCH_FLAGS=$(arch) 57 | endif 58 | 59 | CXXFLAGS= -std=c++11 -fopenmp $(ARCH_FLAGS) #-mtune=native -march=native 60 | #CPPFLAGS= -DPRINT_OUTPUT -DENABLE_PREFETCH -DBWA_OTHER_ELE=0 61 | CPPFLAGS= -DENABLE_PREFETCH -DBWA_OTHER_ELE=0 62 | BWAMEM2_PATH=../../tools/bwa-mem2 63 | INCLUDES= -I$(BWAMEM2_PATH)/src -I$(BWAMEM2_PATH)/ext/safestringlib/include 64 | LIBS= -L$(BWAMEM2_PATH) -L$(BWAMEM2_PATH)/ext/safestringlib -lsafestring -fopenmp -lz -lbwa -ldl 65 | 66 | ifneq ($(VTUNE_HOME),) 67 | CPPFLAGS+= -DVTUNE_ANALYSIS=1 68 | INCLUDES+= -I${VTUNE_HOME}/include 69 | LIBS+=-L${VTUNE_HOME}/lib64 -littnotify 70 | endif 71 | 72 | .PHONY:all clean depend 73 | .SUFFIXES:.cpp .o 74 | 75 | .cpp.o: 76 | $(CXX) -c $(CXXFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@ 77 | 78 | all:$(EXE) 79 | 80 | fmi:fmi.o $(BWAMEM2_PATH)/libbwa.a 81 | $(CXX) -o $@ $^ $(LIBS) 82 | 83 | clean: 84 | rm -fr *.o $(EXE) 85 | 86 | 87 | # DO NOT DELETE 88 | 89 | fmi.o: $(BWAMEM2_PATH)/src/FMI_search.h $(BWAMEM2_PATH)/src/bntseq.h $(BWAMEM2_PATH)/src/read_index_ele.h 90 | fmi.o: $(BWAMEM2_PATH)/src/bwa.h $(BWAMEM2_PATH)/src/bwt.h $(BWAMEM2_PATH)/src/utils.h $(BWAMEM2_PATH)/src/macro.h 91 | -------------------------------------------------------------------------------- /benchmarks/fmi/README.md: -------------------------------------------------------------------------------- 1 | `fmi` uses the same license as [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2). 2 | 3 | If you use `fmi`, please cite: 4 | 5 | ``` 6 | @inproceedings{DBLP:conf/ipps/VasimuddinMLA19, 7 | author = {Md. Vasimuddin and 8 | Sanchit Misra and 9 | Heng Li and 10 | Srinivas Aluru}, 11 | title = {Efficient Architecture-Aware Acceleration of {BWA-MEM} for Multicore 12 | Systems}, 13 | booktitle = {2019 {IEEE} International Parallel and Distributed Processing Symposium, 14 | {IPDPS} 2019, Rio de Janeiro, Brazil, May 20-24, 2019}, 15 | pages = {314--324}, 16 | publisher = {{IEEE}}, 17 | year = {2019}, 18 | url = {https://doi.org/10.1109/IPDPS.2019.00041}, 19 | doi = {10.1109/IPDPS.2019.00041}, 20 | timestamp = {Wed, 16 Oct 2019 14:14:51 +0200}, 21 | biburl = {https://dblp.org/rec/conf/ipps/VasimuddinMLA19.bib}, 22 | bibsource = {dblp computer science bibliography, https://dblp.org} 23 | } 24 | ``` 25 | 26 | 27 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | #CXX = icpc 3 | #VTUNE_HOME= /opt/intel/vtune_profiler 4 | LIBS= -lz -lm -lbz2 -llzma -ldl -fopenmp 5 | CXXFLAGS = -O3 -fopenmp 6 | INC = -Ilibcuckoo 7 | 8 | ifneq ($(VTUNE_HOME),) 9 | CXXFLAGS+= -DVTUNE_ANALYSIS=1 10 | LIBS += -L${VTUNE_HOME}/lib64 -littnotify 11 | INC += -I${VTUNE_HOME}/include 12 | endif 13 | 14 | 15 | all: sequence_container.cpp sequence.cpp vertex_index.cpp kmer_cnt.cpp 16 | $(CXX) $(CXXFLAGS) sequence_container.cpp sequence.cpp vertex_index.cpp kmer_cnt.cpp $(INC) $(LIBS) -o kmer-cnt 17 | 18 | .PHONY: clean 19 | 20 | clean: 21 | rm -f kmer-cnt 22 | 23 | sequence_container.cpp: sequence_container.h sequence.h logger.h 24 | sequence.cpp: sequence.h 25 | vertex_index.cpp: vertex_index.h parallel.h memory_info.h logger.h config.h sequence_container.h kmer.h 26 | kmer-cnt.cpp: sequence_container.h sequence.h vertex_index.h memory_info.h logger.h utils.h config.h parallel.h 27 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/README.md: -------------------------------------------------------------------------------- 1 | `kmer-cnt` uses the same license as [Flye](https://github.com/fenderglass/Flye). 2 | 3 | If you find `kmer-cnt` useful, please cite: 4 | 5 | ``` 6 | @article{kolmogorov2019assembly, 7 | title={Assembly of long, error-prone reads using repeat graphs}, 8 | author={Kolmogorov, Mikhail and Yuan, Jeffrey and Lin, Yu and Pevzner, Pavel A}, 9 | journal={Nature biotechnology}, 10 | volume={37}, 11 | number={5}, 12 | pages={540--546}, 13 | year={2019}, 14 | publisher={Nature Publishing Group} 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/config.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include "logger.h" 11 | #include "utils.h" 12 | 13 | namespace 14 | { 15 | std::string trimString(const std::string& str) 16 | { 17 | size_t left = 0; 18 | size_t right = str.size() - 1; 19 | while(left < str.size() && std::isspace(str[left])) ++left; 20 | while(right > 0 && std::isspace(str[right])) --right; 21 | 22 | if (right + 1 > left) return str.substr(left, right - left + 1); 23 | return str; 24 | } 25 | } 26 | 27 | class Config 28 | { 29 | public: 30 | static Config& instance() 31 | { 32 | static Config cfg; 33 | return cfg; 34 | } 35 | 36 | static void load(const std::string& filename) 37 | { 38 | std::ifstream fin(filename); 39 | if (!fin) throw std::runtime_error("Can't open config file: " + filename); 40 | 41 | std::string dirname = ""; 42 | size_t slash = filename.find_last_of("/\\"); 43 | if (slash != std::string::npos) 44 | { 45 | dirname = filename.substr(0, slash + 1); 46 | } 47 | 48 | std::string buffer; 49 | Logger::get().debug() << "Loading " << filename; 50 | while(!fin.eof()) 51 | { 52 | std::getline(fin, buffer); 53 | if (buffer.empty() || buffer.front() == '#') continue; 54 | 55 | if (!buffer.compare(0, 8, std::string("\%include"))) 56 | { 57 | auto tokens = splitString(buffer, ' '); 58 | Config::load(dirname + tokens[1]); 59 | continue; 60 | } 61 | 62 | auto tokens = splitString(buffer, '='); 63 | if (tokens.size() != 2) 64 | { 65 | throw std::runtime_error("Error parsing config file"); 66 | } 67 | std::string key = trimString(tokens[0]); 68 | std::string value = trimString(tokens[1]); 69 | Config::instance()._parameters[key] = std::atof(value.c_str()); 70 | Logger::get().debug() << "\t" << key << "=" << value; 71 | } 72 | } 73 | 74 | static float get(const std::string& key) 75 | { 76 | auto itVal = Config::instance()._parameters.find(key); 77 | if (itVal == Config::instance()._parameters.end()) 78 | { 79 | throw std::runtime_error("No such parameter: " + key); 80 | } 81 | return itVal->second; 82 | } 83 | 84 | static void addParameters(const std::string& paramsString) 85 | { 86 | Logger::get().debug() << "Extrta parameters:"; 87 | auto params = splitString(paramsString, ','); 88 | for (auto param : params) 89 | { 90 | auto keyVal = splitString(param, '='); 91 | std::string key = trimString(keyVal[0]); 92 | std::string value = trimString(keyVal[1]); 93 | Config::instance()._parameters[key] = std::atof(value.c_str()); 94 | Logger::get().debug() << "\t" << key << "=" << value; 95 | } 96 | } 97 | 98 | private: 99 | Config(){} 100 | std::unordered_map _parameters; 101 | }; 102 | 103 | struct Parameters 104 | { 105 | static Parameters& get() 106 | { 107 | static Parameters param; 108 | return param; 109 | } 110 | 111 | int minimumOverlap; 112 | size_t kmerSize; 113 | size_t numThreads; 114 | bool unevenCoverage; 115 | }; 116 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/kmer.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "sequence_container.h" 12 | #include "config.h" 13 | 14 | static_assert(sizeof(size_t) == 8, "32-bit architectures are not supported"); 15 | 16 | class Kmer 17 | { 18 | public: 19 | typedef size_t KmerRepr; 20 | 21 | explicit Kmer(KmerRepr repr=0): _representation(repr) {} 22 | 23 | Kmer(const DnaSequence& dnaString, 24 | size_t start, size_t length): 25 | _representation(0) 26 | { 27 | if (length != Parameters::get().kmerSize) 28 | { 29 | throw std::runtime_error("Kmer length inconsistency"); 30 | } 31 | 32 | for (size_t i = start; i < start + length; ++i) 33 | { 34 | _representation <<= 2; 35 | _representation += dnaString.atRaw(i); 36 | } 37 | } 38 | 39 | Kmer reverseComplement() 40 | { 41 | KmerRepr tmpRepr = _representation; 42 | Kmer newKmer; 43 | 44 | for (unsigned int i = 0; i < Parameters::get().kmerSize; ++i) 45 | { 46 | newKmer._representation <<= 2; 47 | newKmer._representation += ~tmpRepr & 3; 48 | tmpRepr >>= 2; 49 | } 50 | 51 | return newKmer; 52 | } 53 | 54 | bool standardForm() 55 | { 56 | Kmer complKmer = this->reverseComplement(); 57 | if (complKmer._representation < _representation) 58 | { 59 | _representation = complKmer._representation; 60 | return true; 61 | } 62 | return false; 63 | } 64 | 65 | void appendRight(DnaSequence::NuclType dnaSymbol) 66 | { 67 | _representation <<= 2; 68 | _representation += dnaSymbol; 69 | 70 | KmerRepr kmerSize = Parameters::get().kmerSize; 71 | KmerRepr kmerMask = ((KmerRepr)1 << kmerSize * 2) - 1; 72 | _representation &= kmerMask; 73 | } 74 | 75 | void appendLeft(DnaSequence::NuclType dnaSymbol) 76 | { 77 | _representation >>= 2; 78 | 79 | KmerRepr kmerSize = Parameters::get().kmerSize; 80 | KmerRepr shift = kmerSize * 2 - 2; 81 | _representation += dnaSymbol << shift; 82 | } 83 | 84 | 85 | bool operator == (const Kmer& other) const 86 | {return this->_representation == other._representation;} 87 | 88 | bool operator != (const Kmer& other) const 89 | {return !(*this == other);} 90 | 91 | size_t hash() const 92 | { 93 | size_t x = _representation; 94 | size_t z = (x += 0x9E3779B97F4A7C15ULL); 95 | z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL; 96 | z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL; 97 | return z ^ (z >> 31); 98 | } 99 | 100 | bool operator< (const Kmer& other) 101 | { 102 | return _representation < other._representation; 103 | } 104 | 105 | size_t numRepr() {return _representation;} 106 | 107 | private: 108 | KmerRepr _representation; 109 | }; 110 | 111 | namespace std 112 | { 113 | template <> 114 | struct hash 115 | { 116 | std::size_t operator()(const Kmer& kmer) const 117 | { 118 | return kmer.hash(); 119 | } 120 | }; 121 | } 122 | 123 | struct KmerPosition 124 | { 125 | KmerPosition(Kmer kmer, int32_t position): 126 | kmer(kmer), position(position) {} 127 | Kmer kmer; 128 | int32_t position; 129 | }; 130 | 131 | class KmerIterator 132 | { 133 | public: 134 | typedef std::forward_iterator_tag iterator_category; 135 | 136 | KmerIterator(const DnaSequence* readSeq, size_t position): 137 | _readSeq(readSeq), 138 | _position(position) 139 | { 140 | if (position != readSeq->length() - Parameters::get().kmerSize) 141 | { 142 | //_kmer = Kmer(readSeq->substr(0, Parameters::get().kmerSize)); 143 | _kmer = Kmer(*readSeq, 0, Parameters::get().kmerSize); 144 | } 145 | } 146 | 147 | bool operator==(const KmerIterator& other) const 148 | { 149 | return _readSeq == other._readSeq && _position == other._position; 150 | } 151 | 152 | bool operator!=(const KmerIterator& other) const 153 | { 154 | return !(*this == other); 155 | } 156 | 157 | KmerPosition operator*() const 158 | { 159 | return KmerPosition(_kmer, _position); 160 | } 161 | 162 | KmerIterator& operator++() 163 | { 164 | size_t appendPos = _position + Parameters::get().kmerSize; 165 | _kmer.appendRight(_readSeq->atRaw(appendPos)); 166 | ++_position; 167 | return *this; 168 | } 169 | 170 | protected: 171 | const DnaSequence* _readSeq; 172 | size_t _position; 173 | Kmer _kmer; 174 | }; 175 | 176 | 177 | class IterKmers 178 | { 179 | public: 180 | IterKmers(const DnaSequence& sequence, size_t start = 0, 181 | size_t length = std::string::npos): 182 | _sequence(sequence), _start(start), _length(length) 183 | {} 184 | 185 | KmerIterator begin() 186 | { 187 | if (_sequence.length() < Parameters::get().kmerSize + _start) 188 | return this->end(); 189 | 190 | return KmerIterator(&_sequence, _start); 191 | } 192 | 193 | KmerIterator end() 194 | { 195 | size_t end = _length == std::string::npos ? 196 | _sequence.length() : _length + _start; 197 | return KmerIterator(&_sequence, end - Parameters::get().kmerSize); 198 | } 199 | 200 | private: 201 | const DnaSequence& _sequence; 202 | const size_t _start; 203 | const size_t _length; 204 | }; 205 | 206 | inline std::vector yieldMinimizers(const DnaSequence& sequence, int window) 207 | { 208 | if (window < 1) throw std::runtime_error("wrong minimizer length"); 209 | 210 | struct KmerAndHash 211 | { 212 | KmerPosition kp; 213 | size_t hash; 214 | }; 215 | thread_local std::deque miniQueue; 216 | miniQueue.clear(); 217 | 218 | std::vector minimizers; 219 | const size_t expectedSize = sequence.length() / window * 2; 220 | minimizers.reserve(1.5 * expectedSize); 221 | 222 | if (window == 1) 223 | { 224 | for (auto kmerPos : IterKmers(sequence)) 225 | { 226 | minimizers.push_back(kmerPos); 227 | } 228 | return minimizers; 229 | } 230 | 231 | for (auto kmerPos : IterKmers(sequence)) 232 | { 233 | auto stdKmer = kmerPos.kmer; 234 | stdKmer.standardForm(); 235 | size_t curHash = stdKmer.hash(); 236 | 237 | while (!miniQueue.empty() && miniQueue.back().hash > curHash) 238 | { 239 | miniQueue.pop_back(); 240 | } 241 | miniQueue.push_back({kmerPos, curHash}); 242 | if (miniQueue.front().kp.position <= kmerPos.position - window) 243 | { 244 | while (miniQueue.front().kp.position <= kmerPos.position - window) 245 | { 246 | miniQueue.pop_front(); 247 | } 248 | while (miniQueue.size() >= 2 && miniQueue[0].hash == miniQueue[1].hash) 249 | { 250 | miniQueue.pop_front(); 251 | } 252 | } 253 | if (minimizers.empty() || minimizers.back().position != 254 | miniQueue.front().kp.position) 255 | { 256 | minimizers.push_back(miniQueue.front().kp); 257 | } 258 | } 259 | 260 | //Logger::get().debug() << _seqContainer.seqLen(seqId) << " " << minimizers.size(); 261 | return minimizers; 262 | } 263 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/libcuckoo/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013, Carnegie Mellon University and Intel Corporation 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | --------------------------- 16 | 17 | CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and 18 | has its own license, as detailed in the source files. 19 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/libcuckoo/README.md: -------------------------------------------------------------------------------- 1 | Note to existing users: the iterator implementation has changed significantly 2 | since we introduced the `locked_table` in [this 3 | commit](https://github.com/efficient/libcuckoo/commit/2bedb3d0c811cd8b3adb3e78e2d2a28c66ba1d1d). 4 | Please see the [`locked_table` 5 | documentation](http://efficient.github.io/libcuckoo/classcuckoohash__map_1_1locked__table.html) 6 | and [examples 7 | directory](https://github.com/efficient/libcuckoo/tree/master/examples) for 8 | information and examples of how to use iterators. 9 | 10 | libcuckoo 11 | ========= 12 | 13 | libcuckoo provides a high-performance, compact hash table that allows 14 | multiple concurrent reader and writer threads. 15 | 16 | The Doxygen-generated documentation is available at the 17 | [project page](http://efficient.github.io/libcuckoo/). 18 | 19 | Authors: Manu Goyal, Bin Fan, Xiaozhou Li, David G. Andersen, and Michael Kaminsky 20 | 21 | For details about this algorithm and citations, please refer to 22 | our papers in [NSDI 2013][1] and [EuroSys 2014][2]. Some of the details of the hashing 23 | algorithm have been improved since that work (e.g., the previous algorithm 24 | in [1] serializes all writer threads, while our current 25 | implementation supports multiple concurrent writers), however, and this source 26 | code is now the definitive reference. 27 | 28 | [1]: http://www.cs.cmu.edu/~dga/papers/memc3-nsdi2013.pdf "MemC3: Compact and Concurrent Memcache with Dumber Caching and Smarter Hashing" 29 | [2]: http://www.cs.princeton.edu/~mfreed/docs/cuckoo-eurosys14.pdf "Algorithmic Improvements for Fast Concurrent Cuckoo Hashing" 30 | 31 | Requirements 32 | ================ 33 | 34 | This library has been tested on Mac OSX >= 10.8 and Ubuntu >= 12.04. 35 | 36 | It compiles with clang++ >= 3.1 and g++ >= 4.7, however we strongly suggest 37 | using the latest versions of both compilers, as they have greatly improved 38 | support for atomic operations. Building the library requires the 39 | autotools. Install them on Ubuntu 40 | 41 | $ sudo apt-get update && sudo apt-get install build-essential autoconf libtool 42 | 43 | Building 44 | ========== 45 | 46 | $ autoreconf -fis 47 | $ ./configure 48 | $ make 49 | $ make install 50 | 51 | Usage 52 | ========== 53 | 54 | To build a program with the hash table, include 55 | `libcuckoo/cuckoohash_map.hh` into your source file. If you want to 56 | use CityHash, which we recommend, we have provided a wrapper 57 | compatible with the `std::hash` type around it in the 58 | `libcuckoo/city_hasher.hh` file. If compiling with CityHash, add the 59 | `-lcityhash` flag. You must also enable C++11 features on your 60 | compiler. Compiling the file `examples/count_freq.cpp` with g++ 61 | might look like this: 62 | 63 | $ g++ -std=c++11 examples/count_freq.cpp -lcityhash 64 | 65 | The 66 | [examples directory](https://github.com/efficient/libcuckoo/tree/master/examples) 67 | contains some simple demonstrations of some of the basic features of the hash 68 | table. 69 | 70 | Tests 71 | ========== 72 | 73 | The [tests directory](https://github.com/efficient/libcuckoo/tree/master/tests) 74 | directory contains a number of tests and benchmarks of the hash table, which 75 | also can serve as useful examples of how to use the table's various features. 76 | After running `make all`, the entire test suite can be run with the `make check` 77 | command. This will not run the benchmarks, which must be run individually. The 78 | test executables, which have the suffix `.out`, can be run individually as well. 79 | 80 | Issue Report 81 | ============ 82 | 83 | To let us know your questions or issues, we recommend you 84 | [report an issue](https://github.com/efficient/libcuckoo/issues) on 85 | github. You can also email us at 86 | [libcuckoo-dev@googlegroups.com](mailto:libcuckoo-dev@googlegroups.com). 87 | 88 | Licence 89 | =========== 90 | Copyright (C) 2013, Carnegie Mellon University and Intel Corporation 91 | 92 | Licensed under the Apache License, Version 2.0 (the "License"); 93 | you may not use this file except in compliance with the License. 94 | You may obtain a copy of the License at 95 | 96 | http://www.apache.org/licenses/LICENSE-2.0 97 | 98 | Unless required by applicable law or agreed to in writing, software 99 | distributed under the License is distributed on an "AS IS" BASIS, 100 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 101 | See the License for the specific language governing permissions and 102 | limitations under the License. 103 | 104 | --------------------------- 105 | 106 | CityHash (lib/city.h, lib/city.cc) is Copyright (c) Google, Inc. and 107 | has its own license, as detailed in the source files. -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/libcuckoo/cuckoohash_config.hh: -------------------------------------------------------------------------------- 1 | /** \file */ 2 | 3 | #ifndef _CUCKOOHASH_CONFIG_HH 4 | #define _CUCKOOHASH_CONFIG_HH 5 | 6 | #include 7 | #include 8 | 9 | //! The default maximum number of keys per bucket 10 | constexpr size_t LIBCUCKOO_DEFAULT_SLOT_PER_BUCKET = 4; 11 | 12 | //! The default number of elements in an empty hash table 13 | constexpr size_t LIBCUCKOO_DEFAULT_SIZE = 14 | (1U << 16) * LIBCUCKOO_DEFAULT_SLOT_PER_BUCKET; 15 | 16 | //! The default minimum load factor that the table allows for automatic 17 | //! expansion. It must be a number between 0.0 and 1.0. The table will throw 18 | //! libcuckoo_load_factor_too_low if the load factor falls below this value 19 | //! during an automatic expansion. 20 | constexpr double LIBCUCKOO_DEFAULT_MINIMUM_LOAD_FACTOR = 0.05; 21 | 22 | //! An alias for the value that sets no limit on the maximum hashpower. If this 23 | //! value is set as the maximum hashpower limit, there will be no limit. This 24 | //! is also the default initial value for the maximum hashpower in a table. 25 | constexpr size_t LIBCUCKOO_NO_MAXIMUM_HASHPOWER = 26 | std::numeric_limits::max(); 27 | 28 | //! set LIBCUCKOO_DEBUG to 1 to enable debug output 29 | #define LIBCUCKOO_DEBUG 0 30 | 31 | #endif // _CUCKOOHASH_CONFIG_HH 32 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/libcuckoo/cuckoohash_util.hh: -------------------------------------------------------------------------------- 1 | /** \file */ 2 | 3 | #ifndef _CUCKOOHASH_UTIL_HH 4 | #define _CUCKOOHASH_UTIL_HH 5 | 6 | #include "cuckoohash_config.hh" // for LIBCUCKOO_DEBUG 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #if LIBCUCKOO_DEBUG 13 | //! When \ref LIBCUCKOO_DEBUG is 0, LIBCUCKOO_DBG will printing out status 14 | //! messages in various situations 15 | #define LIBCUCKOO_DBG(fmt, ...) \ 16 | fprintf(stderr, "\x1b[32m" \ 17 | "[libcuckoo:%s:%d:%lu] " fmt "" \ 18 | "\x1b[0m", \ 19 | __FILE__, __LINE__, \ 20 | std::hash()(std::this_thread::get_id()), \ 21 | __VA_ARGS__) 22 | #else 23 | //! When \ref LIBCUCKOO_DEBUG is 0, LIBCUCKOO_DBG does nothing 24 | #define LIBCUCKOO_DBG(fmt, ...) \ 25 | do { \ 26 | } while (0) 27 | #endif 28 | 29 | /** 30 | * alignas() requires GCC >= 4.9, so we stick with the alignment attribute for 31 | * GCC. 32 | */ 33 | #ifdef __GNUC__ 34 | #define LIBCUCKOO_ALIGNAS(x) __attribute__((aligned(x))) 35 | #else 36 | #define LIBCUCKOO_ALIGNAS(x) alignas(x) 37 | #endif 38 | 39 | /** 40 | * At higher warning levels, MSVC produces an annoying warning that alignment 41 | * may cause wasted space: "structure was padded due to __declspec(align())". 42 | */ 43 | #ifdef _MSC_VER 44 | #define LIBCUCKOO_SQUELCH_PADDING_WARNING __pragma(warning(suppress : 4324)) 45 | #else 46 | #define LIBCUCKOO_SQUELCH_PADDING_WARNING 47 | #endif 48 | 49 | /** 50 | * At higher warning levels, MSVC may issue a deadcode warning which depends on 51 | * the template arguments given. For certain other template arguments, the code 52 | * is not really "dead". 53 | */ 54 | #ifdef _MSC_VER 55 | #define LIBCUCKOO_SQUELCH_DEADCODE_WARNING_BEGIN \ 56 | do { \ 57 | __pragma(warning(push)); \ 58 | __pragma(warning(disable : 4702)) \ 59 | } while (0) 60 | #define LIBCUCKOO_SQUELCH_DEADCODE_WARNING_END __pragma(warning(pop)) 61 | #else 62 | #define LIBCUCKOO_SQUELCH_DEADCODE_WARNING_BEGIN 63 | #define LIBCUCKOO_SQUELCH_DEADCODE_WARNING_END 64 | #endif 65 | 66 | /** 67 | * Thrown when an automatic expansion is triggered, but the load factor of the 68 | * table is below a minimum threshold, which can be set by the \ref 69 | * cuckoohash_map::minimum_load_factor method. This can happen if the hash 70 | * function does not properly distribute keys, or for certain adversarial 71 | * workloads. 72 | */ 73 | class libcuckoo_load_factor_too_low : public std::exception { 74 | public: 75 | /** 76 | * Constructor 77 | * 78 | * @param lf the load factor of the table when the exception was thrown 79 | */ 80 | libcuckoo_load_factor_too_low(const double lf) : load_factor_(lf) {} 81 | 82 | /** 83 | * @return a descriptive error message 84 | */ 85 | virtual const char *what() const noexcept override { 86 | return "Automatic expansion triggered when load factor was below " 87 | "minimum threshold"; 88 | } 89 | 90 | /** 91 | * @return the load factor of the table when the exception was thrown 92 | */ 93 | double load_factor() const { return load_factor_; } 94 | 95 | private: 96 | const double load_factor_; 97 | }; 98 | 99 | /** 100 | * Thrown when an expansion is triggered, but the hashpower specified is greater 101 | * than the maximum, which can be set with the \ref 102 | * cuckoohash_map::maximum_hashpower method. 103 | */ 104 | class libcuckoo_maximum_hashpower_exceeded : public std::exception { 105 | public: 106 | /** 107 | * Constructor 108 | * 109 | * @param hp the hash power we were trying to expand to 110 | */ 111 | libcuckoo_maximum_hashpower_exceeded(const size_t hp) : hashpower_(hp) {} 112 | 113 | /** 114 | * @return a descriptive error message 115 | */ 116 | virtual const char *what() const noexcept override { 117 | return "Expansion beyond maximum hashpower"; 118 | } 119 | 120 | /** 121 | * @return the hashpower we were trying to expand to 122 | */ 123 | size_t hashpower() const { return hashpower_; } 124 | 125 | private: 126 | const size_t hashpower_; 127 | }; 128 | 129 | #endif // _CUCKOOHASH_UTIL_HH 130 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/logger.h: -------------------------------------------------------------------------------- 1 | //(c) 2013-2016 by Authors 2 | //This file is a part of Ragout program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | class Logger 13 | { 14 | public: 15 | static Logger& get() 16 | { 17 | static Logger instance; 18 | return instance; 19 | } 20 | 21 | void setOutputFile(const std::string& filename) 22 | { 23 | _logFile.open(filename, std::ofstream::out | std::ofstream::app); 24 | _logFileSet = true; 25 | if (!_logFile.is_open()) 26 | { 27 | throw std::runtime_error("Can't open log file"); 28 | } 29 | } 30 | 31 | void setDebugging(bool debug) {_debug = debug;} 32 | 33 | class StreamWriter 34 | { 35 | public: 36 | StreamWriter(const std::string& level, 37 | std::ostream* consoleStream = nullptr, 38 | std::ostream* fileStream = nullptr): 39 | _fileStream(fileStream), _consoleStream(consoleStream) 40 | { 41 | if (_fileStream) *_fileStream << timestamp() << " " << level << " "; 42 | if (_consoleStream) *_consoleStream << timestamp() 43 | << " " << level << " "; 44 | } 45 | ~StreamWriter() 46 | { 47 | if (_fileStream) *_fileStream << std::endl; 48 | if (_consoleStream) *_consoleStream << std::endl; 49 | } 50 | 51 | template 52 | Logger::StreamWriter& operator<< (const T& val) 53 | { 54 | if (_fileStream) *_fileStream << val; 55 | if (_consoleStream) *_consoleStream << val; 56 | return *this; 57 | } 58 | 59 | private: 60 | std::ostream* _fileStream; 61 | std::ostream* _consoleStream; 62 | }; 63 | 64 | StreamWriter info() 65 | { 66 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 67 | return StreamWriter("INFO:", &std::cerr, logPtr); 68 | } 69 | 70 | StreamWriter warning() 71 | { 72 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 73 | return StreamWriter("WARNING:", &std::cerr, logPtr); 74 | } 75 | 76 | StreamWriter error() 77 | { 78 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 79 | return StreamWriter("ERROR:", &std::cerr, logPtr); 80 | } 81 | 82 | StreamWriter debug() 83 | { 84 | std::ostream* logPtr = _logFileSet ? &_logFile : nullptr; 85 | std::ostream* consolePtr = _debug ? &std::cerr : nullptr; 86 | return StreamWriter("DEBUG:", consolePtr, logPtr); 87 | } 88 | 89 | private: 90 | static std::string timestamp(const char* format = "[%Y-%m-%d %H:%M:%S]") 91 | { 92 | std::time_t t = std::time(0); 93 | char cstr[256]; 94 | std::strftime(cstr, sizeof(cstr), format, std::localtime(&t)); 95 | return cstr; 96 | } 97 | 98 | Logger(): 99 | _debug(false), _logFileSet(false) 100 | {} 101 | ~Logger() 102 | { 103 | if (_logFileSet) 104 | { 105 | _logFile << "-----------End assembly log------------\n"; 106 | } 107 | } 108 | 109 | bool _debug; 110 | bool _logFileSet; 111 | std::ofstream _logFile; 112 | }; 113 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/parallel.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "progress_bar.h" 11 | 12 | //simple thread pool implementation 13 | //updateFun should be thread-safe! 14 | template 15 | void processInParallel(const std::vector& scheduledTasks, 16 | std::function updateFun, 17 | size_t maxThreads, bool progressBar) 18 | { 19 | if (scheduledTasks.empty()) return; 20 | 21 | std::atomic jobId(0); 22 | ProgressPercent progress(scheduledTasks.size()); 23 | if (progressBar) progress.advance(0); 24 | 25 | auto threadWorker = [&jobId, &scheduledTasks, &updateFun, 26 | &progress, progressBar]() 27 | { 28 | while (true) 29 | { 30 | size_t expected = 0; 31 | while(true) 32 | { 33 | expected = jobId; 34 | if (jobId == scheduledTasks.size()) 35 | { 36 | return; 37 | } 38 | if (jobId.compare_exchange_weak(expected, expected + 1)) 39 | { 40 | break; 41 | } 42 | } 43 | updateFun(scheduledTasks[expected]); 44 | if (progressBar) progress.advance(); 45 | } 46 | }; 47 | 48 | std::vector threads(std::min(maxThreads, 49 | scheduledTasks.size())); 50 | for (size_t i = 0; i < threads.size(); ++i) 51 | { 52 | threads[i] = std::thread(threadWorker); 53 | } 54 | for (size_t i = 0; i < threads.size(); ++i) 55 | { 56 | threads[i].join(); 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/progress_bar.h: -------------------------------------------------------------------------------- 1 | //(c) 2013-2016 by Authors 2 | //This file is a part of Ragout program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | class ProgressPercent 11 | { 12 | public: 13 | ProgressPercent(size_t finalCount = 0): 14 | _finalCount(finalCount), _curCount(0), _prevPercent(-1), 15 | _stopped(false) 16 | {} 17 | 18 | void setFinalCount(size_t finalCount) {_finalCount = finalCount;} 19 | void setValue(size_t value) 20 | { 21 | this->advance(value - _curCount); 22 | } 23 | void setDone() 24 | { 25 | this->setValue(_finalCount); 26 | } 27 | void advance(size_t step = 1) 28 | { 29 | if (_stopped) return; 30 | 31 | _curCount += step; 32 | int percent = 10UL * _curCount / _finalCount; 33 | 34 | if (percent > _prevPercent) 35 | { 36 | int expected = _prevPercent; 37 | if (_prevPercent.compare_exchange_weak(expected, percent)) 38 | { 39 | std::cerr << percent * 10 << "% "; 40 | if (percent >= 10) 41 | { 42 | std::cerr << std::endl; 43 | _stopped = true; 44 | } 45 | } 46 | } 47 | } 48 | 49 | private: 50 | size_t _finalCount; 51 | std::atomic _curCount; 52 | std::atomic _prevPercent; 53 | bool _stopped; 54 | }; 55 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/sequence.cpp: -------------------------------------------------------------------------------- 1 | #include "sequence.h" 2 | 3 | std::vector DnaSequence::_dnaTable; 4 | DnaSequence::TableFiller DnaSequence::_filler; 5 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/sequence.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | //Immutable dna sequence class 15 | class DnaSequence 16 | { 17 | public: 18 | typedef size_t NuclType; 19 | 20 | private: 21 | static const int NUCL_BITS = 2; 22 | static const int NUCL_IN_CHUNK = sizeof(NuclType) * 8 / NUCL_BITS; 23 | 24 | struct SharedBuffer 25 | { 26 | SharedBuffer(): useCount(0), length(0) {} 27 | size_t useCount; 28 | size_t length; 29 | std::vector chunks; 30 | }; 31 | 32 | public: 33 | DnaSequence(): 34 | _complement(false) 35 | { 36 | _data = new SharedBuffer; 37 | ++_data->useCount; 38 | } 39 | 40 | ~DnaSequence() 41 | { 42 | if (_data != nullptr) 43 | { 44 | //std::cout << "Destructor!\n"; 45 | --_data->useCount; 46 | if (_data->useCount == 0) 47 | { 48 | //std::cout << "Deleting\n"; 49 | delete _data; 50 | } 51 | } 52 | } 53 | 54 | explicit DnaSequence(const std::string& string): 55 | _complement(false) 56 | { 57 | _data = new SharedBuffer; 58 | ++_data->useCount; 59 | 60 | if (string.empty()) return; 61 | 62 | _data->length = string.length(); 63 | _data->chunks.assign((_data->length - 1) / NUCL_IN_CHUNK + 1, 0); 64 | for (size_t i = 0; i < string.length(); ++i) 65 | { 66 | size_t chunkId = i / NUCL_IN_CHUNK; 67 | _data->chunks[chunkId] |= dnaToId(string[i]) << (i % NUCL_IN_CHUNK) * 2; 68 | } 69 | } 70 | 71 | DnaSequence(const DnaSequence& other): 72 | _data(other._data), 73 | _complement(other._complement) 74 | { 75 | ++_data->useCount; 76 | } 77 | 78 | DnaSequence(DnaSequence&& other): 79 | _data(other._data), 80 | _complement(other._complement) 81 | { 82 | other._data = nullptr; 83 | } 84 | 85 | DnaSequence& operator=(const DnaSequence& other) 86 | { 87 | --_data->useCount; 88 | if (_data->useCount == 0) delete _data; 89 | 90 | _complement = other._complement; 91 | _data = other._data; 92 | ++_data->useCount; 93 | return *this; 94 | } 95 | 96 | DnaSequence& operator=(DnaSequence&& other) 97 | { 98 | --_data->useCount; 99 | if (_data->useCount == 0) delete _data; 100 | 101 | _data = other._data; 102 | _complement = other._complement; 103 | other._data = nullptr; 104 | return *this; 105 | } 106 | 107 | size_t length() const {return _data->length;} 108 | 109 | char at(size_t index) const 110 | { 111 | if (_complement) 112 | { 113 | index = _data->length - index - 1; 114 | } 115 | size_t id = (_data->chunks[index / NUCL_IN_CHUNK] >> 116 | (index % NUCL_IN_CHUNK) * 2 ) & 3; 117 | return idToDna(!_complement ? id : ~id & 3); 118 | } 119 | 120 | NuclType atRaw(size_t index) const 121 | { 122 | if (_complement) 123 | { 124 | index = _data->length - index - 1; 125 | } 126 | size_t id = (_data->chunks[index / NUCL_IN_CHUNK] >> 127 | (index % NUCL_IN_CHUNK) * 2 ) & 3; 128 | return !_complement ? id : ~id & 3; 129 | } 130 | 131 | //TODO: use the same shared buffer 132 | 133 | DnaSequence complement() const 134 | { 135 | DnaSequence complSequence(*this); 136 | complSequence._complement = true; 137 | return complSequence; 138 | } 139 | 140 | DnaSequence substr(size_t start, size_t length) const; 141 | std::string str() const; 142 | 143 | static size_t dnaToId(char c) 144 | { 145 | return _dnaTable[(size_t)c]; 146 | } 147 | 148 | static char idToDna(size_t id) 149 | { 150 | static char table[] = {'A', 'C', 'G', 'T'}; 151 | return table[id]; 152 | } 153 | 154 | private: 155 | static std::vector _dnaTable; 156 | 157 | struct TableFiller 158 | { 159 | TableFiller() 160 | { 161 | static bool tableFilled = false; 162 | if (!tableFilled) 163 | { 164 | tableFilled = true; 165 | _dnaTable.assign(256, -1); //256 chars 166 | _dnaTable[(size_t)'A'] = 0; 167 | _dnaTable[(size_t)'a'] = 0; 168 | _dnaTable[(size_t)'C'] = 1; 169 | _dnaTable[(size_t)'c'] = 1; 170 | _dnaTable[(size_t)'G'] = 2; 171 | _dnaTable[(size_t)'g'] = 2; 172 | _dnaTable[(size_t)'T'] = 3; 173 | _dnaTable[(size_t)'t'] = 3; 174 | } 175 | } 176 | }; 177 | static TableFiller _filler; 178 | 179 | SharedBuffer* _data; 180 | bool _complement; 181 | }; 182 | 183 | inline std::string DnaSequence::str() const 184 | { 185 | std::string result; 186 | result.reserve(this->length()); 187 | for (size_t i = 0; i < this->length(); ++i) 188 | { 189 | result.push_back(this->at(i)); 190 | } 191 | return result; 192 | } 193 | 194 | inline DnaSequence DnaSequence::substr(size_t start, size_t length) const 195 | { 196 | if (length == 0) throw std::runtime_error("Zero length subtring"); 197 | if (start >= _data->length) throw std::runtime_error("Incorrect substring start"); 198 | 199 | if (start + length > _data->length) 200 | { 201 | length = _data->length - start; 202 | } 203 | 204 | DnaSequence newSequence; 205 | newSequence._data->length = length; 206 | newSequence._data->chunks.assign((length - 1) / NUCL_IN_CHUNK + 1, 0); 207 | 208 | for (size_t i = 0; i < length; ++i) 209 | { 210 | size_t nucId = this->atRaw(start + i); 211 | size_t newChunkId = i / NUCL_IN_CHUNK; 212 | newSequence._data->chunks[newChunkId] |= nucId << (i % NUCL_IN_CHUNK) * 2; 213 | } 214 | 215 | return newSequence; 216 | } 217 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/sequence_container.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "sequence.h" 13 | 14 | struct FastaRecord 15 | { 16 | class Id 17 | { 18 | public: 19 | Id(): _id(std::numeric_limits::max()) {} 20 | 21 | explicit Id(uint32_t id): _id(id) {} 22 | 23 | bool operator==(const Id& other) const 24 | {return _id == other._id;} 25 | 26 | bool operator!=(const Id& other) const 27 | {return !(*this == other);} 28 | 29 | Id rc() const //reverse complement 30 | {return Id(_id + 1 - (_id % 2) * 2);} 31 | 32 | bool strand() const //true = positive, false = negative 33 | {return !(_id % 2);} 34 | 35 | size_t hash() const 36 | { 37 | size_t x = _id; 38 | size_t z = (x += 0x9E3779B97F4A7C15ULL); 39 | z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ULL; 40 | z = (z ^ (z >> 27)) * 0x94D049BB133111EBULL; 41 | return z ^ (z >> 31); 42 | } 43 | 44 | int signedId() const 45 | {return (_id % 2) ? -((int)_id + 1) / 2 : (int)_id / 2 + 1;} 46 | 47 | friend std::ostream& operator << (std::ostream& stream, const Id& id) 48 | { 49 | stream << std::to_string(id._id); 50 | return stream; 51 | } 52 | 53 | friend std::istream& operator >> (std::istream& stream, Id& id) 54 | { 55 | std::string buffer; 56 | stream >> buffer; 57 | id._id = std::stoi(buffer); 58 | return stream; 59 | } 60 | 61 | bool operator < (const FastaRecord::Id& other) const 62 | { 63 | return _id < other._id; 64 | } 65 | 66 | friend class SequenceContainer; 67 | 68 | private: 69 | uint32_t _id; 70 | }; 71 | static const Id ID_NONE; 72 | typedef std::tuple IdPair; 73 | 74 | FastaRecord(): id(ID_NONE) {} 75 | FastaRecord(const DnaSequence& sequence, const std::string& description, 76 | Id id): 77 | id(id), sequence(sequence), description(description) 78 | { 79 | } 80 | 81 | FastaRecord(const FastaRecord& other): 82 | id(other.id), sequence(other.sequence), 83 | description(other.description) {} 84 | 85 | FastaRecord(FastaRecord&& other): 86 | id (other.id) 87 | { 88 | *this = std::move(other); 89 | } 90 | 91 | FastaRecord& operator=(const FastaRecord& other) 92 | { 93 | id = other.id; 94 | sequence = other.sequence; 95 | description = other.description; 96 | return *this; 97 | } 98 | 99 | FastaRecord& operator=(FastaRecord&& other) 100 | { 101 | id = other.id; 102 | sequence = std::move(other.sequence); 103 | description = std::move(other.description); 104 | return *this; 105 | } 106 | 107 | Id id; 108 | DnaSequence sequence; 109 | std::string description; 110 | }; 111 | 112 | namespace std 113 | { 114 | template <> 115 | struct hash 116 | { 117 | size_t operator() (const FastaRecord::Id& h) const throw() 118 | { 119 | return h.hash(); 120 | } 121 | }; 122 | 123 | template <> 124 | struct hash 125 | { 126 | size_t operator()(const FastaRecord::IdPair& k) const 127 | { 128 | size_t lhs = std::get<0>(k).hash(); 129 | size_t rhs = std::get<1>(k).hash(); 130 | lhs ^= rhs + 0x9ddfea08eb382d69ULL + (lhs << 6) + (lhs >> 2); 131 | return lhs; 132 | } 133 | }; 134 | } 135 | 136 | class SequenceContainer 137 | { 138 | public: 139 | class ParseException : public std::runtime_error 140 | { 141 | public: 142 | ParseException(const std::string & what): 143 | std::runtime_error(what) 144 | {} 145 | }; 146 | 147 | typedef std::vector SequenceIndex; 148 | 149 | SequenceContainer(): 150 | _offsetInitialized(false) {} 151 | 152 | void loadFromFile(const std::string& filename, int minReadLength = 0); 153 | 154 | static void writeFasta(const std::vector& records, 155 | const std::string& fileName, 156 | bool onlyPositiveStrand = false); 157 | 158 | static size_t getMaxSeqId() {return g_nextSeqId;} 159 | 160 | const FastaRecord& addSequence(const DnaSequence& sequence, 161 | const std::string& description); 162 | 163 | const SequenceIndex& iterSeqs() const 164 | { 165 | return _seqIndex; 166 | } 167 | 168 | const FastaRecord& getRecord(FastaRecord::Id seqId) const 169 | { 170 | assert(seqId._id - _seqIdOffest < _seqIndex.size()); 171 | assert(_seqIndex[seqId._id - _seqIdOffest].id == seqId); 172 | return _seqIndex[seqId._id - _seqIdOffest]; 173 | } 174 | 175 | const DnaSequence& getSeq(FastaRecord::Id readId) const 176 | { 177 | assert(readId._id - _seqIdOffest < _seqIndex.size()); 178 | assert(_seqIndex[readId._id - _seqIdOffest].id == readId); 179 | return _seqIndex[readId._id - _seqIdOffest].sequence; 180 | } 181 | 182 | int32_t seqLen(FastaRecord::Id readId) const 183 | { 184 | assert(readId._id - _seqIdOffest < _seqIndex.size()); 185 | assert(_seqIndex[readId._id - _seqIdOffest].id == readId); 186 | return _seqIndex[readId._id - _seqIdOffest].sequence.length(); 187 | } 188 | 189 | std::string seqName(FastaRecord::Id readId) const 190 | { 191 | assert(readId._id - _seqIdOffest < _seqIndex.size()); 192 | assert(_seqIndex[readId._id - _seqIdOffest].id == readId); 193 | return _seqIndex[readId._id - _seqIdOffest].description; 194 | } 195 | 196 | int computeNxStat(float fraction) const; 197 | 198 | void buildPositionIndex(); 199 | 200 | size_t globalPosition(FastaRecord::Id seqId, int32_t position) const 201 | { 202 | assert(position >= 0 && position < this->seqLen(seqId)); 203 | assert(seqId._id - _seqIdOffest < _seqIndex.size()); 204 | #ifndef NDEBUG 205 | auto checkGlob = _sequenceOffsets[seqId._id - _seqIdOffest].offset + position; 206 | FastaRecord::Id checkId; 207 | int32_t checkPos; 208 | int32_t outLen; 209 | this->seqPosition(checkGlob, checkId, checkPos, outLen); 210 | assert(checkId == seqId && checkPos == position); 211 | #endif 212 | return _sequenceOffsets[seqId._id - _seqIdOffest].offset + position; 213 | } 214 | 215 | const FastaRecord& recordByName(const std::string& name) const 216 | { 217 | return this->getRecord(_nameIndex.at(name)); 218 | } 219 | 220 | void seqPosition(size_t globPos, FastaRecord::Id& outSeqId, 221 | int32_t& outPosition, int32_t& outLen) const 222 | { 223 | assert(globPos < _sequenceOffsets.back().offset); 224 | 225 | size_t hint = _offsetsHint[globPos / CHUNK]; 226 | while (_sequenceOffsets[hint + 1].offset <= globPos) ++hint; 227 | 228 | outSeqId = FastaRecord::Id(_seqIdOffest + hint); 229 | outPosition = globPos - _sequenceOffsets[hint].offset; 230 | outLen = (int32_t)_sequenceOffsets[hint].length; 231 | 232 | assert(outSeqId._id - _seqIdOffest < _seqIndex.size()); 233 | assert(outPosition >= 0 && outPosition < outLen); 234 | //assert(this->globalPosition(outSeqId, outPosition) == globPos); 235 | } 236 | static size_t g_nextSeqId; 237 | 238 | private: 239 | struct OffsetPair 240 | { 241 | size_t offset; 242 | size_t length; 243 | }; 244 | 245 | FastaRecord::Id addSequence(const FastaRecord& sequence); 246 | 247 | size_t readFasta(std::vector& record, 248 | const std::string& fileName); 249 | 250 | size_t readFastq(std::vector& record, 251 | const std::string& fileName); 252 | 253 | bool isFasta(const std::string& fileName); 254 | 255 | void validateSequence(std::string& sequence); 256 | 257 | void validateHeader(std::string& header); 258 | 259 | SequenceIndex _seqIndex; 260 | size_t _seqIdOffest; 261 | bool _offsetInitialized; 262 | std::unordered_map _nameIndex; 264 | 265 | //global/local position convertions 266 | const size_t MAX_SEQUENCE = 1ULL << (8 * 5); 267 | const size_t CHUNK = 1000; 268 | std::vector _sequenceOffsets; 269 | std::vector _offsetsHint; 270 | }; 271 | 272 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/utils.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "logger.h" 13 | 14 | template 15 | void vecRemove(std::vector& v, T val) 16 | { 17 | v.erase(std::remove(v.begin(), v.end(), val), v.end()); 18 | } 19 | 20 | struct pairhash 21 | { 22 | public: 23 | template 24 | std::size_t operator()(const std::pair &x) const 25 | { 26 | return std::hash()(x.first) ^ std::hash()(x.second); 27 | } 28 | }; 29 | 30 | 31 | template 32 | T quantile(const std::vector& vec, int percent) 33 | { 34 | if (vec.empty()) return 0; 35 | //NOTE: there's a bug in libstdc++ nth_element, 36 | //that sometimes leads to a segfault. This is why 37 | //we have this inefficient impleemntation here 38 | //std::nth_element(vec.begin(), vec.begin() + vec.size() / 2, 39 | // vec.end()); 40 | auto sortedVec = vec; 41 | std::sort(sortedVec.begin(), sortedVec.end()); 42 | size_t targetId = std::min(vec.size() * (size_t)percent / 100, 43 | vec.size() - 1); 44 | return sortedVec[targetId]; 45 | } 46 | 47 | template 48 | T median(const std::vector& vec) 49 | { 50 | return quantile(vec, 50); 51 | } 52 | 53 | inline std::vector 54 | splitString(const std::string &s, char delim) 55 | { 56 | std::vector elems; 57 | std::stringstream ss(s); 58 | std::string item; 59 | while (std::getline(ss, item, delim)) elems.push_back(item); 60 | return elems; 61 | } 62 | 63 | inline bool fileExists(const std::string& path) 64 | { 65 | std::ifstream fin(path); 66 | return fin.good(); 67 | } 68 | 69 | inline void segfaultHandler(int signal __attribute__((unused))) 70 | { 71 | void *stackArray[20]; 72 | size_t size = backtrace(stackArray, 10); 73 | Logger::get().error() << "Segmentation fault! Backtrace:"; 74 | char** backtrace = backtrace_symbols(stackArray, size); 75 | for (size_t i = 0; i < size; ++i) 76 | { 77 | Logger::get().error() << "\t" << backtrace[i]; 78 | } 79 | abort(); 80 | } 81 | 82 | inline void exceptionHandler() 83 | { 84 | static bool triedThrow = false; 85 | try 86 | { 87 | if (!triedThrow) 88 | { 89 | triedThrow = true; 90 | throw; 91 | } 92 | } 93 | catch (const std::exception &e) 94 | { 95 | Logger::get().error() << "Caught unhandled exception: " << e.what(); 96 | } 97 | catch (...) {} 98 | 99 | void *stackArray[20]; 100 | size_t size = backtrace(stackArray, 10); 101 | char** backtrace = backtrace_symbols(stackArray, size); 102 | for (size_t i = 0; i < size; ++i) 103 | { 104 | Logger::get().error() << "\t" << backtrace[i]; 105 | } 106 | abort(); 107 | } 108 | 109 | -------------------------------------------------------------------------------- /benchmarks/kmer-cnt/vertex_index.h: -------------------------------------------------------------------------------- 1 | //(c) 2016 by Authors 2 | //This file is a part of ABruijn program. 3 | //Released under the BSD license (see LICENSE file) 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | #include "kmer.h" 18 | #include "sequence_container.h" 19 | #include "config.h" 20 | #include "logger.h" 21 | 22 | 23 | typedef std::map KmerDistribution; 24 | 25 | class KmerCounter 26 | { 27 | public: 28 | KmerCounter(const SequenceContainer& seqContainer): 29 | _seqContainer(seqContainer), 30 | _flatCounter(nullptr), _numKmers(0) 31 | {} 32 | 33 | ~KmerCounter() 34 | { 35 | if (_flatCounter) 36 | { 37 | delete[] _flatCounter; 38 | _flatCounter = nullptr; 39 | } 40 | } 41 | 42 | const KmerDistribution& getKmerHist() const 43 | { 44 | return _kmerDistribution; 45 | } 46 | 47 | void count(bool useFlatCounter); 48 | size_t getFreq(Kmer kmer) const; 49 | size_t getKmerNum() const; 50 | void clear(); 51 | void setOutputProgress(bool progress) {_outputProgress = progress;} 52 | 53 | private: 54 | const SequenceContainer& _seqContainer; 55 | bool _outputProgress; 56 | bool _useFlatCounter; 57 | 58 | std::atomic* _flatCounter; 59 | //std::vector> _flatCounter; 60 | cuckoohash_map _hashCounter; 61 | KmerDistribution _kmerDistribution; 62 | 63 | std::atomic _numKmers; 64 | }; 65 | 66 | class VertexIndex 67 | { 68 | public: 69 | ~VertexIndex() 70 | { 71 | this->clear(); 72 | } 73 | VertexIndex(const SequenceContainer& seqContainer, float sampleRate): 74 | _seqContainer(seqContainer), _outputProgress(false), 75 | _sampleRate(sampleRate), _repetitiveFrequency(0), 76 | _kmerCounter(seqContainer) 77 | //_solidMultiplier(1) 78 | //_flankRepeatSize(flankRepeatSize) 79 | {} 80 | 81 | VertexIndex(const VertexIndex&) = delete; 82 | void operator=(const VertexIndex&) = delete; 83 | 84 | private: 85 | struct IndexChunk 86 | { 87 | IndexChunk(): 88 | hi(0), low(0) {} 89 | IndexChunk(const IndexChunk& other): 90 | hi(other.hi), low(other.low) {} 91 | IndexChunk(IndexChunk&& other): 92 | hi(other.hi), low(other.low) {} 93 | IndexChunk& operator=(const IndexChunk& other) 94 | { 95 | hi = other.hi; 96 | low = other.low; 97 | return *this; 98 | } 99 | 100 | size_t get() const 101 | { 102 | return ((size_t)hi << 32) + (size_t)low; 103 | } 104 | void set(size_t val) 105 | { 106 | low = val & ((1ULL << 32) - 1); 107 | hi = val >> 32; 108 | } 109 | 110 | uint8_t hi; 111 | uint32_t low; 112 | } __attribute__((packed)); 113 | static_assert(sizeof(IndexChunk) == 5, 114 | "Unexpected size of IndexChunk structure"); 115 | 116 | //static const size_t MAX_INDEX = 1ULL << (sizeof(IndexChunk) * 8); 117 | 118 | struct ReadPosition 119 | { 120 | ReadPosition(FastaRecord::Id readId = FastaRecord::ID_NONE, 121 | int32_t position = 0): 122 | readId(readId), position(position) {} 123 | FastaRecord::Id readId; 124 | int32_t position; 125 | }; 126 | 127 | struct ReadVector 128 | { 129 | ReadVector(uint32_t capacity = 0, uint32_t size = 0): 130 | capacity(capacity), size(size), data(nullptr) {} 131 | uint32_t capacity; 132 | uint32_t size; 133 | IndexChunk* data; 134 | }; 135 | 136 | public: 137 | typedef std::map KmerDistribution; 138 | 139 | class KmerPosIterator 140 | { 141 | public: 142 | KmerPosIterator(ReadVector rv, size_t index, bool revComp, 143 | const SequenceContainer& seqContainer): 144 | rv(rv), index(index), revComp(revComp), 145 | seqContainer(seqContainer), kmerSize(Parameters::get().kmerSize) 146 | {} 147 | 148 | bool operator==(const KmerPosIterator& other) const 149 | { 150 | return index == other.index && rv.data == other.rv.data; 151 | } 152 | bool operator!=(const KmerPosIterator& other) const 153 | { 154 | return !(*this == other); 155 | } 156 | 157 | //__attribute__((always_inline)) 158 | ReadPosition operator*() const 159 | { 160 | size_t globPos = rv.data[index].get(); 161 | FastaRecord::Id seqId; 162 | int32_t position; 163 | int32_t seqLen; 164 | seqContainer.seqPosition(globPos, seqId, position, seqLen); 165 | 166 | if (!revComp) 167 | { 168 | return ReadPosition(seqId, position); 169 | } 170 | else 171 | { 172 | return ReadPosition(seqId.rc(), seqLen - position - kmerSize); 173 | } 174 | } 175 | 176 | KmerPosIterator& operator++() 177 | { 178 | ++index; 179 | return *this; 180 | } 181 | 182 | private: 183 | ReadVector rv; 184 | size_t index; 185 | bool revComp; 186 | const SequenceContainer& seqContainer; 187 | size_t kmerSize; 188 | }; 189 | 190 | class IterHelper 191 | { 192 | public: 193 | IterHelper(ReadVector rv, bool revComp, 194 | const SequenceContainer& seqContainer): 195 | rv(rv), revComp(revComp), seqContainer(seqContainer) {} 196 | 197 | KmerPosIterator begin() 198 | { 199 | return KmerPosIterator(rv, 0, revComp, seqContainer); 200 | } 201 | 202 | KmerPosIterator end() 203 | { 204 | return KmerPosIterator(rv, rv.size, revComp, seqContainer); 205 | } 206 | 207 | private: 208 | ReadVector rv; 209 | bool revComp; 210 | const SequenceContainer& seqContainer; 211 | }; 212 | 213 | void countKmers(); 214 | void buildIndex(int minCoverage); 215 | void buildIndexUnevenCoverage(int minCoverage, float selectRate, 216 | int tandemFreq); 217 | void buildIndexMinimizers(int minCoverage, int wndLen); 218 | void clear(); 219 | 220 | IterHelper iterKmerPos(Kmer kmer) const 221 | { 222 | bool revComp = kmer.standardForm(); 223 | return IterHelper(_kmerIndex.find(kmer), revComp, 224 | _seqContainer); 225 | } 226 | 227 | //__attribute__((always_inline)) 228 | /*bool isSolid(Kmer kmer) const 229 | { 230 | kmer.standardForm(); 231 | return _kmerIndex.contains(kmer); 232 | }*/ 233 | 234 | bool isRepetitive(Kmer kmer) const 235 | { 236 | kmer.standardForm(); 237 | return _repetitiveKmers.contains(kmer); 238 | } 239 | 240 | size_t kmerFreq(Kmer kmer) const 241 | { 242 | kmer.standardForm(); 243 | ReadVector rv; 244 | _kmerIndex.find(kmer, rv); 245 | return rv.size; 246 | } 247 | 248 | void outputProgress(bool set) 249 | { 250 | _outputProgress = set; 251 | _kmerCounter.setOutputProgress(set); 252 | } 253 | 254 | const KmerDistribution& getKmerHist() const 255 | { 256 | return _kmerCounter.getKmerHist(); 257 | //return _kmerDistribution; 258 | } 259 | 260 | float getSampleRate() const {return _sampleRate;} 261 | 262 | private: 263 | //void setRepeatCutoff(int minCoverage); 264 | 265 | 266 | struct KmerFreq 267 | { 268 | Kmer kmer; 269 | int32_t position; 270 | size_t freq; 271 | }; 272 | std::vector 273 | yieldFrequentKmers(const FastaRecord::Id& seqId, 274 | float selctRate, int tandemFreq); 275 | 276 | void allocateIndexMemory(); 277 | void filterFrequentKmers(int minCoverage, float rate); 278 | 279 | const SequenceContainer& _seqContainer; 280 | //KmerDistribution _kmerDistribution; 281 | bool _outputProgress; 282 | float _sampleRate; 283 | size_t _repetitiveFrequency; 284 | //int32_t _solidMultiplier; 285 | 286 | const size_t MEM_CHUNK = 32 * 1024 * 1024 / sizeof(IndexChunk); 287 | std::vector _memoryChunks; 288 | 289 | cuckoohash_map _kmerIndex; 290 | //cuckoohash_map _kmerCounts; 291 | cuckoohash_map _repetitiveKmers; 292 | 293 | KmerCounter _kmerCounter; 294 | }; 295 | -------------------------------------------------------------------------------- /benchmarks/nn-base/README.md: -------------------------------------------------------------------------------- 1 | `nn-base` uses the same license as [Bonito](https://github.com/nanoporetech/bonito). 2 | -------------------------------------------------------------------------------- /benchmarks/nn-base/models/bonito_dna_r941/config.toml: -------------------------------------------------------------------------------- 1 | model = "dna_r9.4.1" 2 | 3 | [labels] 4 | labels = [ "N", "A", "C", "G", "T",] 5 | 6 | [input] 7 | features = 1 8 | 9 | [qscore] 10 | bias = -0.4 11 | scale = 1.3 12 | 13 | [encoder] 14 | activation = "swish" 15 | 16 | [[block]] 17 | filters = 344 18 | repeat = 1 19 | kernel = [ 9,] 20 | stride = [ 3,] 21 | dilation = [ 1,] 22 | dropout = 0.05 23 | residual = false 24 | separable = false 25 | 26 | [[block]] 27 | filters = 424 28 | repeat = 2 29 | kernel = [ 115,] 30 | stride = [ 1,] 31 | dilation = [ 1,] 32 | dropout = 0.05 33 | residual = true 34 | separable = true 35 | 36 | [[block]] 37 | filters = 464 38 | repeat = 7 39 | kernel = [ 5,] 40 | stride = [ 1,] 41 | dilation = [ 1,] 42 | dropout = 0.05 43 | residual = true 44 | separable = true 45 | 46 | [[block]] 47 | filters = 456 48 | repeat = 4 49 | kernel = [ 123,] 50 | stride = [ 1,] 51 | dilation = [ 1,] 52 | dropout = 0.05 53 | residual = true 54 | separable = true 55 | 56 | [[block]] 57 | filters = 440 58 | repeat = 9 59 | kernel = [ 9,] 60 | stride = [ 1,] 61 | dilation = [ 1,] 62 | dropout = 0.05 63 | residual = true 64 | separable = true 65 | 66 | [[block]] 67 | filters = 280 68 | repeat = 6 69 | kernel = [ 31,] 70 | stride = [ 1,] 71 | dilation = [ 1,] 72 | dropout = 0.05 73 | residual = true 74 | separable = true 75 | 76 | [[block]] 77 | filters = 384 78 | repeat = 1 79 | kernel = [ 67,] 80 | stride = [ 1,] 81 | dilation = [ 1,] 82 | dropout = 0.05 83 | residual = false 84 | separable = true 85 | 86 | [[block]] 87 | filters = 48 88 | repeat = 1 89 | kernel = [ 15,] 90 | stride = [ 1,] 91 | dilation = [ 1,] 92 | dropout = 0.05 93 | residual = false 94 | separable = false 95 | 96 | [training] 97 | config = "config/dna_r9.4.1.toml" 98 | command = "train" 99 | device = "cuda" 100 | lr = 0.001 101 | seed = 25 102 | epochs = 400 103 | batch = 512 104 | chunks = 3000000 105 | validation_split = 0.97 106 | amp = true 107 | multi_gpu = true 108 | force = true 109 | 110 | -------------------------------------------------------------------------------- /benchmarks/nn-base/models/bonito_dna_r941/weights_0.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-base/models/bonito_dna_r941/weights_0.tar -------------------------------------------------------------------------------- /benchmarks/nn-base/run_bonito.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | reads=10 4 | 5 | mkdir -p ./output 6 | /usr/local/cuda/bin/nvprof \ 7 | --metrics all \ 8 | python3 bonito/basecall.py \ 9 | models/bonito_dna_r941 \ 10 | data/$reads \ 11 | --half \ 12 | --fastq \ 13 | > output/bonito.fastq 14 | 15 | # python3 bonito/basecall.py \ 16 | # models/bonito_dna_r941 \ 17 | # data/$reads \ 18 | # --half \ 19 | # --fastq \ 20 | # --chunksize 3000 \ 21 | # --cudart \ 22 | # > output/bonito.fastq 23 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/README.md: -------------------------------------------------------------------------------- 1 | `nn-variant` uses the same license as [Clair](https://github.com/HKU-BAL/Clair). 2 | 3 | If you find `nn-variant` useful, please cite: 4 | 5 | ``` 6 | @article{luo2020exploring, 7 | title={Exploring the limit of using a deep neural network on pileup data for germline variant calling}, 8 | author={Luo, Ruibang and Wong, Chak-Lim and Wong, Yat-Sing and Tang, Chi-Ian and Liu, Chi-Man and Leung, Chi-Ming and Lam, Tak-Wah}, 9 | journal={Nature Machine Intelligence}, 10 | volume={2}, 11 | number={4}, 12 | pages={220--227}, 13 | year={2020}, 14 | publisher={Nature Publishing Group} 15 | } 16 | ``` 17 | 18 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/__init__.py -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/__pycache__/selu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/__pycache__/selu.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/plot_tensor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | import matplotlib 5 | matplotlib.use('Agg') 6 | from matplotlib import pyplot as plt 7 | from argparse import ArgumentParser 8 | 9 | from clair.utils import setup_environment 10 | 11 | def plot_tensor(ofn, XArray): 12 | plot = plt.figure(figsize=(15, 8)) 13 | 14 | plot_min = -30 15 | plot_max = 30 16 | plot_arr = ["A+", "C+", "G+", "T+", "A-", "C-", "G-", "T-"] 17 | 18 | plt.subplot(4, 1, 1) 19 | plt.xticks(np.arange(0, 33, 1)) 20 | plt.yticks(np.arange(0, 8, 1), plot_arr) 21 | plt.imshow(XArray[0, :, :, 0].transpose(), vmin=0, vmax=plot_max, interpolation="nearest", cmap=plt.cm.hot) 22 | plt.colorbar() 23 | 24 | plt.subplot(4, 1, 2) 25 | plt.xticks(np.arange(0, 33, 1)) 26 | plt.yticks(np.arange(0, 8, 1), plot_arr) 27 | plt.imshow(XArray[0, :, :, 1].transpose(), vmin=plot_min, vmax=plot_max, interpolation="nearest", cmap=plt.cm.bwr) 28 | plt.colorbar() 29 | 30 | plt.subplot(4, 1, 3) 31 | plt.xticks(np.arange(0, 33, 1)) 32 | plt.yticks(np.arange(0, 8, 1), plot_arr) 33 | plt.imshow(XArray[0, :, :, 2].transpose(), vmin=plot_min, vmax=plot_max, interpolation="nearest", cmap=plt.cm.bwr) 34 | plt.colorbar() 35 | 36 | plt.subplot(4, 1, 4) 37 | plt.xticks(np.arange(0, 33, 1)) 38 | plt.yticks(np.arange(0, 8, 1), plot_arr) 39 | plt.imshow(XArray[0, :, :, 3].transpose(), vmin=plot_min, vmax=plot_max, interpolation="nearest", cmap=plt.cm.bwr) 40 | plt.colorbar() 41 | 42 | plot.savefig(ofn, dpi=300, transparent=True, bbox_inches='tight') 43 | plt.close(plot) 44 | 45 | 46 | def create_png(args): 47 | f = open(args.array_fn, 'r') 48 | array = f.read() 49 | f.close() 50 | import re 51 | array = re.split("\n", array) 52 | array = [x for x in array if x] 53 | print(array) 54 | 55 | splitted_array = [] 56 | for i in range(len(array)): 57 | splitted_array += re.split(",", array[i]) 58 | 59 | print("splitted array length") 60 | print(len(splitted_array)) 61 | print(splitted_array[0]) 62 | # for i in range(len(splitted_array)): 63 | # splitted_array[i] = int(splitted_array[i]) 64 | 65 | XArray = np.array(splitted_array, dtype=np.float32).reshape((-1, 33, 8, 4)) 66 | XArray[0, :, :, 1] -= XArray[0, :, :, 0] 67 | XArray[0, :, :, 2] -= XArray[0, :, :, 0] 68 | XArray[0, :, :, 3] -= XArray[0, :, :, 0] 69 | 70 | _YArray = np.zeros((1, 16)) 71 | varName = args.name 72 | print("Plotting %s..." % (varName), file=sys.stderr) 73 | 74 | # Create folder 75 | if not os.path.exists(varName): 76 | os.makedirs(varName) 77 | 78 | # Plot tensors 79 | plot_tensor(varName+"/tensor.png", XArray) 80 | 81 | 82 | def ParseArgs(): 83 | parser = ArgumentParser( 84 | description="Visualize tensors and hidden layers in PNG") 85 | 86 | parser.add_argument('--array_fn', type=str, default="vartensors", 87 | help="Array input") 88 | 89 | parser.add_argument('--name', type=str, default=None, 90 | help="output name") 91 | 92 | args = parser.parse_args() 93 | 94 | if len(sys.argv[1:]) == 0: 95 | parser.print_help() 96 | sys.exit(1) 97 | 98 | return args 99 | 100 | 101 | def main(): 102 | args = ParseArgs() 103 | setup_environment() 104 | create_png(args) 105 | 106 | 107 | if __name__ == "__main__": 108 | main() 109 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/post_processing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/post_processing/__init__.py -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/post_processing/ensemble.py: -------------------------------------------------------------------------------- 1 | from sys import stdin, stderr, argv, exit 2 | from collections import namedtuple, defaultdict 3 | from argparse import ArgumentParser 4 | 5 | EnsembleConfig = namedtuple('EnsembleConfig', [ 6 | 'minimum_count_to_output', 7 | ]) 8 | 9 | 10 | def dicts_from_stdin(): 11 | counter = defaultdict(lambda: 0) 12 | 13 | sequence_dict = {} 14 | tensor_dict = {} 15 | probabilities_dict = {} 16 | 17 | for row in stdin.readlines(): 18 | columns = row.split(sep="\t") 19 | 20 | chromosome, position, sequence = columns[0], columns[1], columns[2] 21 | 22 | key = (chromosome, position) 23 | 24 | counter[key] = counter[key] + 1 25 | 26 | if not key in sequence_dict: 27 | sequence_dict[key] = sequence 28 | 29 | if not key in tensor_dict: 30 | tensor = [int(str_value) for str_value in columns[3:3 + 33*8*4]] 31 | tensor_dict[key] = tensor 32 | 33 | if not key in probabilities_dict: 34 | probabilities = [float(no) for no in columns[3+ 33*8*4:]] 35 | probabilities_dict[key] = probabilities 36 | else: 37 | probabilities_from_input = [float(no) for no in columns[3 + 33*8*4:]] 38 | 39 | probabilities = list.copy(probabilities_dict[key]) 40 | for index, probability in enumerate(probabilities): 41 | probabilities[index] = probability + probabilities_from_input[index] 42 | 43 | probabilities_dict[key] = probabilities 44 | 45 | return counter, sequence_dict, tensor_dict, probabilities_dict 46 | 47 | 48 | def output_with( 49 | counter, 50 | sequence_dict, 51 | tensor_dict, 52 | probabilities_dict, 53 | ensemble_config, 54 | ): 55 | minimum_count_to_output = ensemble_config.minimum_count_to_output 56 | 57 | for key, count in counter.items(): 58 | if count < minimum_count_to_output: 59 | continue 60 | 61 | chromosome, position = key 62 | sequence = sequence_dict[key] 63 | tensor = tensor_dict[key] 64 | probabilities = probabilities_dict[key] 65 | 66 | tensor_str = "\t".join([str(int_value) for int_value in tensor]) 67 | probabilities_str = "\t".join(["{:.6f}".format(probability / count) for probability in probabilities]) 68 | 69 | print("\t".join([ 70 | chromosome, 71 | position, 72 | sequence, 73 | tensor_str, 74 | probabilities_str, 75 | ])) 76 | 77 | 78 | def run_pipeline(ensemble_config): 79 | counter, sequence_dict, tensor_dict, probabilities_dict = dicts_from_stdin() 80 | 81 | output_with( 82 | counter, 83 | sequence_dict, 84 | tensor_dict, 85 | probabilities_dict, 86 | ensemble_config, 87 | ) 88 | 89 | 90 | def main(): 91 | parser = ArgumentParser(description="Call variants using a trained model and tensors of candididate variants") 92 | 93 | parser.add_argument('--minimum_count_to_output', type=int, default=0, 94 | help="minimum # of calls to output the probabilities") 95 | 96 | args = parser.parse_args() 97 | 98 | if len(argv[1:]) == 0: 99 | parser.print_help() 100 | exit(1) 101 | 102 | ensemble_config = EnsembleConfig( 103 | minimum_count_to_output=args.minimum_count_to_output 104 | ) 105 | run_pipeline(ensemble_config=ensemble_config) 106 | 107 | 108 | if __name__ == "__main__": 109 | main() 110 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/selu.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tensorflow Implementation of the Scaled ELU function and Dropout 3 | ''' 4 | import warnings 5 | with warnings.catch_warnings(): 6 | warnings.filterwarnings('ignore', category=DeprecationWarning) 7 | warnings.filterwarnings("ignore", category=FutureWarning) 8 | from tensorflow.python.util import deprecation 9 | deprecation._PRINT_DEPRECATION_WARNINGS = False 10 | import tensorflow as tf 11 | from tensorflow.contrib import layers 12 | from tensorflow.python.framework import ops 13 | from tensorflow.python.framework import tensor_shape 14 | from tensorflow.python.framework import tensor_util 15 | from tensorflow.python.ops import math_ops 16 | from tensorflow.python.ops import random_ops 17 | from tensorflow.python.ops import array_ops 18 | from tensorflow.contrib.layers.python.layers import utils 19 | import numbers 20 | 21 | 22 | # (1) scale inputs to zero mean and unit variance 23 | 24 | 25 | # (2) use SELUs 26 | def selu(x): 27 | with ops.name_scope('elu') as scope: 28 | alpha = 1.6732632423543772848170429916717 29 | scale = 1.0507009873554804934193349852946 30 | return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x)) 31 | 32 | 33 | # (3) initialize weights with stddev sqrt(1/n) 34 | # e.g. use: 35 | initializer = layers.variance_scaling_initializer(factor=1.0, mode='FAN_IN') 36 | 37 | 38 | # (4) use this dropout 39 | def dropout_selu(x, rate, alpha= -1.7580993408473766, fixedPointMean=0.0, fixedPointVar=1.0, 40 | noise_shape=None, seed=None, name=None, training=False): 41 | """Dropout to a value with rescaling.""" 42 | 43 | def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name): 44 | keep_prob = 1.0 - rate 45 | x = ops.convert_to_tensor(x, name="x") 46 | if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1: 47 | raise ValueError("keep_prob must be a scalar tensor or a float in the " 48 | "range (0, 1], got %g" % keep_prob) 49 | keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob") 50 | keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar()) 51 | 52 | alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha") 53 | alpha.get_shape().assert_is_compatible_with(tensor_shape.scalar()) 54 | 55 | if tensor_util.constant_value(keep_prob) == 1: 56 | return x 57 | 58 | noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x) 59 | random_tensor = keep_prob 60 | random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype) 61 | binary_tensor = math_ops.floor(random_tensor) 62 | ret = x * binary_tensor + alpha * (1-binary_tensor) 63 | 64 | a = math_ops.sqrt(fixedPointVar / (keep_prob *((1-keep_prob) * math_ops.pow(alpha-fixedPointMean,2) + fixedPointVar))) 65 | 66 | b = fixedPointMean - a * (keep_prob * fixedPointMean + (1 - keep_prob) * alpha) 67 | ret = a * ret + b 68 | ret.set_shape(x.get_shape()) 69 | return ret 70 | 71 | with ops.name_scope(name, "dropout", [x]) as name: 72 | return utils.smart_cond(training, 73 | lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name), 74 | lambda: array_ops.identity(x)) 75 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/task/__init__.py -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/task/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/__pycache__/genotype.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/task/__pycache__/genotype.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/__pycache__/gt21.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/task/__pycache__/gt21.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/__pycache__/main.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/task/__pycache__/main.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/__pycache__/variant_length.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/clair/task/__pycache__/variant_length.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/genotype.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum 2 | 3 | GENOTYPES = ["0/0", "1/1", "0/1", "1/2"] 4 | 5 | 6 | class Genotype(IntEnum): 7 | homo_reference = 0 # 0/0 8 | homo_variant = 1 # 1/1 9 | hetero_variant = 2 # 0/1 OR (1/2 for genotype task) 10 | hetero_variant_multi = 3 # 1/2 11 | 12 | 13 | def genotype_string_from(genotype_enum): 14 | try: 15 | return GENOTYPES[genotype_enum] 16 | except: 17 | return "" 18 | 19 | 20 | def genotype_enum_from(genotype_1, genotype_2): 21 | if genotype_1 == 0 and genotype_2 == 0: 22 | return Genotype.homo_reference 23 | if genotype_1 == genotype_2: 24 | return Genotype.homo_variant 25 | if genotype_1 != 0 and genotype_2 != 0: 26 | return Genotype.hetero_variant_multi 27 | return Genotype.hetero_variant 28 | 29 | 30 | def genotype_enum_for_task(genotype): 31 | if genotype == Genotype.hetero_variant_multi: 32 | return Genotype.hetero_variant 33 | return genotype 34 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/gt21.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum 2 | 3 | GT21_LABELS = [ 4 | 'AA', 5 | 'AC', 6 | 'AG', 7 | 'AT', 8 | 'CC', 9 | 'CG', 10 | 'CT', 11 | 'GG', 12 | 'GT', 13 | 'TT', 14 | 'DelDel', 15 | 'ADel', 16 | 'CDel', 17 | 'GDel', 18 | 'TDel', 19 | 'InsIns', 20 | 'AIns', 21 | 'CIns', 22 | 'GIns', 23 | 'TIns', 24 | 'InsDel' 25 | ] 26 | GT21_LABELS_MAP = dict(zip(GT21_LABELS, range(0, 21))) 27 | 28 | 29 | class GT21_Type(IntEnum): 30 | AA = 0 31 | AC = 1 32 | AG = 2 33 | AT = 3 34 | CC = 4 35 | CG = 5 36 | CT = 6 37 | GG = 7 38 | GT = 8 39 | TT = 9 40 | DelDel = 10 41 | ADel = 11 42 | CDel = 12 43 | GDel = 13 44 | TDel = 14 45 | InsIns = 15 46 | AIns = 16 47 | CIns = 17 48 | GIns = 18 49 | TIns = 19 50 | InsDel = 20 51 | 52 | 53 | def gt21_label_from(gt21_enum): 54 | try: 55 | return GT21_LABELS[gt21_enum] 56 | except: 57 | return "" 58 | 59 | 60 | def gt21_enum_from_label(gt21_label): 61 | return GT21_LABELS_MAP[gt21_label] 62 | 63 | 64 | def partial_label_from(ref, alt): 65 | if len(ref) > len(alt): 66 | return "Del" 67 | elif len(ref) < len(alt): 68 | return "Ins" 69 | return alt[0] 70 | 71 | 72 | def mix_two_partial_labels(label1, label2): 73 | # AA, AC, AG, AT, CC, CG, CT, GG, GT, TT 74 | if len(label1) == 1 and len(label2) == 1: 75 | return label1 + label2 if label1 <= label2 else label2 + label1 76 | 77 | # ADel, CDel, GDel, TDel, AIns, CIns, GIns, TIns 78 | tmp_label1, tmp_label2 = label1, label2 79 | if len(label1) > 1 and len(label2) == 1: 80 | tmp_label1, tmp_label2 = label2, label1 81 | if len(tmp_label2) > 1 and len(tmp_label1) == 1: 82 | return tmp_label1 + tmp_label2 83 | 84 | # InsIns, DelDel 85 | if len(label1) > 0 and len(label2) > 0 and label1 == label2: 86 | return label1 + label2 87 | 88 | # InsDel 89 | return gt21_label_from(GT21_Type.InsDel) 90 | 91 | 92 | def gt21_enum_from(reference, alternate, genotype_1, genotype_2, alternate_arr=None): 93 | if alternate_arr is not None: 94 | partial_labels = [partial_label_from(reference, alternate) for alternate in alternate_arr] 95 | gt21_label = mix_two_partial_labels(partial_labels[0], partial_labels[1]) 96 | return gt21_enum_from_label(gt21_label) 97 | 98 | alternate_arr = alternate.split(',') 99 | if len(alternate_arr) == 1: 100 | alternate_arr = ( 101 | [reference if genotype_1 == 0 or genotype_2 == 0 else alternate_arr[0]] + 102 | alternate_arr 103 | ) 104 | 105 | partial_labels = [partial_label_from(reference, alternate) for alternate in alternate_arr] 106 | gt21_label = mix_two_partial_labels(partial_labels[0], partial_labels[1]) 107 | 108 | return gt21_enum_from_label(gt21_label) 109 | 110 | 111 | HOMO_SNP_GT21 = [GT21_Type.AA, GT21_Type.CC, GT21_Type.GG, GT21_Type.TT] 112 | HOMO_SNP_LABELS = [gt21_label_from(gt21_enum) for gt21_enum in HOMO_SNP_GT21] 113 | 114 | HETERO_SNP_GT21 = [GT21_Type.AC, GT21_Type.AG, GT21_Type.AT, GT21_Type.CG, GT21_Type.CT, GT21_Type.GT] 115 | HETERO_SNP_LABELS = [gt21_label_from(gt21_enum) for gt21_enum in HETERO_SNP_GT21] 116 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/main.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from clair.task.genotype import Genotype, genotype_enum_from, genotype_enum_for_task 4 | from clair.task.gt21 import gt21_enum_from_label, gt21_enum_from 5 | from clair.task.variant_length import VariantLength 6 | 7 | OutputLabelNamedTuple = namedtuple( 8 | 'BasePredictNamedTuple', ['output_label_count', 'y_start_index', 'y_end_index'] 9 | ) 10 | GT21 = OutputLabelNamedTuple( 11 | output_label_count=21, 12 | y_start_index=0, 13 | y_end_index=21, 14 | ) 15 | GENOTYPE = OutputLabelNamedTuple( 16 | output_label_count=3, 17 | y_start_index=GT21.y_end_index, 18 | y_end_index=GT21.y_end_index + 3, 19 | ) 20 | VARIANT_LENGTH_1 = OutputLabelNamedTuple( 21 | output_label_count=VariantLength.output_label_count, 22 | y_start_index=GENOTYPE.y_end_index, 23 | y_end_index=GENOTYPE.y_end_index + VariantLength.output_label_count, 24 | ) 25 | VARIANT_LENGTH_2 = OutputLabelNamedTuple( 26 | output_label_count=VariantLength.output_label_count, 27 | y_start_index=VARIANT_LENGTH_1.y_end_index, 28 | y_end_index=VARIANT_LENGTH_1.y_end_index + VariantLength.output_label_count, 29 | ) 30 | 31 | 32 | def min_max(value, minimum, maximum): 33 | return max(min(value, maximum), minimum) 34 | 35 | 36 | def output_labels_from_reference(reference_base): 37 | gt21_vec = [0] * GT21.output_label_count 38 | gt21_vec[gt21_enum_from_label(reference_base + reference_base)] = 1 39 | 40 | genotype_vec = [0] * GENOTYPE.output_label_count 41 | genotype_vec[Genotype.homo_reference] = 1 42 | 43 | variant_length_vec_1 = [0] * VARIANT_LENGTH_1.output_label_count 44 | variant_length_vec_2 = [0] * VARIANT_LENGTH_2.output_label_count 45 | variant_length_vec_1[0 + VariantLength.index_offset] = 1 46 | variant_length_vec_2[0 + VariantLength.index_offset] = 1 47 | 48 | return gt21_vec + genotype_vec + variant_length_vec_1 + variant_length_vec_2 49 | 50 | 51 | def output_labels_from_vcf_columns(columns): 52 | reference, alternate = columns[2], columns[3] 53 | genotype_1, genotype_2 = int(columns[4]), int(columns[5]) 54 | 55 | alternate_arr = alternate.split(',') 56 | if len(alternate_arr) == 1: 57 | alternate_arr = ( 58 | [reference if genotype_1 == 0 or genotype_2 == 0 else alternate_arr[0]] + 59 | alternate_arr 60 | ) 61 | 62 | gt21 = gt21_enum_from(reference, alternate, genotype_1, genotype_2, alternate_arr) 63 | gt21_vec = [0] * GT21.output_label_count 64 | gt21_vec[gt21] = 1 65 | 66 | genotype = genotype_enum_from(genotype_1, genotype_2) 67 | genotype_for_task = genotype_enum_for_task(genotype) 68 | genotype_vec = [0] * GENOTYPE.output_label_count 69 | genotype_vec[genotype_for_task] = 1 70 | 71 | variant_lengths = [ 72 | min_max(len(alt) - len(reference), VariantLength.min, VariantLength.max) 73 | for alt in alternate_arr 74 | ] 75 | variant_lengths.sort() 76 | variant_length_vec_1 = [0] * VARIANT_LENGTH_1.output_label_count 77 | variant_length_vec_2 = [0] * VARIANT_LENGTH_2.output_label_count 78 | variant_length_vec_1[variant_lengths[0] + VariantLength.index_offset] = 1 79 | variant_length_vec_2[variant_lengths[1] + VariantLength.index_offset] = 1 80 | 81 | return gt21_vec + genotype_vec + variant_length_vec_1 + variant_length_vec_2 82 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/clair/task/variant_length.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | VariantLengthNamedTuple = namedtuple( 4 | 'VariantLengthNamedTuple', ['index_offset', 'min', 'max', 'output_label_count'] 5 | ) 6 | variant_length_index_offset = 16 7 | VariantLength = VariantLengthNamedTuple( 8 | index_offset=variant_length_index_offset, 9 | min=-variant_length_index_offset, 10 | max=variant_length_index_offset, 11 | output_label_count=variant_length_index_offset * 2 + 1, 12 | ) 13 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/prediction.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from time import time 4 | import numpy as np 5 | import deepdish as dd 6 | import shared.param as param 7 | from clair.model import Clair 8 | from argparse import ArgumentParser 9 | 10 | 11 | def prediction(args, m): 12 | 13 | print("Begin predicting...") 14 | prediction_output = [] 15 | input_mini_match = dd.io.load(args.input_fn) 16 | output_mini_match = dd.io.load(args.output_fn) 17 | time_counter = {"Load_mini_batch": [], 18 | "Model_prediction": [], 19 | "Write_batch_to_output": []} 20 | 21 | begin_time = time() 22 | for i in range(len(input_mini_match)): 23 | mini_batch = input_mini_match[i] 24 | X, _ = mini_batch 25 | tmp_time = time() 26 | m.predict(X) 27 | cost_time = time() - tmp_time 28 | #print(cost_time) 29 | time_counter["Model_prediction"].append(round(cost_time, 4)) 30 | prediction_output.append(m.prediction) 31 | 32 | end_time = time() - begin_time 33 | 34 | comp = [] 35 | #for i in range(len(input_mini_match)): 36 | # print(prediction_output[i][0], output_mini_match[i][0]) 37 | # comp.append(np.all(np.round(prediction_output[i][0], 3) == np.round(output_mini_match[i][0], 3))) 38 | 39 | #print(comp) 40 | #if False not in comp: 41 | # print("My_prediction function is correct, which takes %.4f s" % end_time) 42 | #else: 43 | # print("My_prediction function is wrong, which takes %.4f s" % end_time) 44 | #dd.io.save("time_counter_my_prediction.h5", time_counter) 45 | print("Time taken: %.4f s" % end_time) 46 | 47 | def Run(args): 48 | 49 | os.environ["OMP_NUM_THREADS"] = "1" 50 | os.environ["OPENBLAS_NUM_THREADS"] = "1" 51 | os.environ["MKL_NUM_THREADS"] = "1" 52 | os.environ["MKL_NUM_THREADS"] = "1" 53 | os.environ["NUMEXPR_NUM_THREADS"] = "1" 54 | 55 | if args.threads is None: 56 | if args.tensor_fn == "PIPE": 57 | param.NUM_THREADS = 4 58 | else: 59 | param.NUM_THREADS = args.threads 60 | param.NUM_THREADS -= 1 61 | if param.NUM_THREADS < 1: 62 | param.NUM_THREADS = 1 63 | 64 | m = Clair() 65 | m.init() 66 | m.restore_parameters(os.path.abspath(args.chkpnt_fn)) 67 | 68 | prediction(args, m) 69 | 70 | 71 | def main(): 72 | parser = ArgumentParser(description="Call variants using a trained model and tensors of candididate variants") 73 | 74 | parser.add_argument('--input_fn', type=str, default="prediction_input.h5", 75 | help="input file") 76 | 77 | parser.add_argument('--output_fn', type=str, default="prediction_output.h5", 78 | help="output file") 79 | 80 | parser.add_argument('--tensor_fn', type=str, default="PIPE", 81 | help="Tensor input, use PIPE for standard input") 82 | 83 | parser.add_argument('--chkpnt_fn', type=str, default=None, 84 | help="Input a checkpoint for testing") 85 | 86 | parser.add_argument('--call_fn', type=str, default=None, 87 | help="Output variant predictions") 88 | 89 | parser.add_argument('--bam_fn', type=str, default="bam.bam", 90 | help="BAM file input, default: %(default)s") 91 | 92 | parser.add_argument('--qual', type=int, default=None, 93 | help="If set, variant with equal or higher quality will be marked PASS, or LowQual otherwise, optional") 94 | 95 | parser.add_argument('--sampleName', type=str, default="SAMPLE", 96 | help="Define the sample name to be shown in the VCF file") 97 | 98 | parser.add_argument('--showRef', action='store_true', 99 | help="Show reference calls, optional") 100 | 101 | parser.add_argument('--debug', action='store_true', 102 | help="Debug mode, optional") 103 | 104 | parser.add_argument('--ref_fn', type=str, default=None, 105 | help="Reference fasta file input, optional, print contig tags in the VCF header if set") 106 | 107 | parser.add_argument('--threads', type=int, default=None, 108 | help="Number of threads, optional") 109 | 110 | 111 | args = parser.parse_args() 112 | 113 | if len(sys.argv[1:]) == 0: 114 | parser.print_help() 115 | sys.exit(1) 116 | 117 | Run(args) 118 | 119 | 120 | if __name__ == "__main__": 121 | main() 122 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/shared/__init__.py -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/shared/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/__pycache__/param.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/benchmarks/nn-variant/shared/__pycache__/param.cpython-37.pyc -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/command_options.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | CommandOption = namedtuple('CommandOption', ['option', 'value']) 4 | CommandOptionWithNoValue = namedtuple('CommandOptionWithNoValue', ['option']) 5 | ExecuteCommand = namedtuple('ExecuteCommand', ['bin', 'bin_value']) 6 | 7 | 8 | def command_option_string_from(command): 9 | if isinstance(command, CommandOption): 10 | return "--{} \"{}\"".format(command.option, command.value) if command.value is not None else None 11 | elif isinstance(command, CommandOptionWithNoValue): 12 | return "--{}".format(command.option) 13 | elif isinstance(command, ExecuteCommand): 14 | return " ".join([command.bin, command.bin_value]) 15 | else: 16 | return command 17 | 18 | 19 | def command_string_from(command_options): 20 | return " ".join(x for x in map(command_option_string_from, command_options) if x is not None) 21 | 22 | 23 | def command_option_from(args_value, option_name, option_value=None): 24 | if args_value is None: 25 | return None 26 | if args_value is True and option_value is None: 27 | return CommandOptionWithNoValue(option_name) 28 | return CommandOption(option_name, option_value) 29 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/interval_tree.py: -------------------------------------------------------------------------------- 1 | import shlex 2 | from intervaltree import IntervalTree 3 | 4 | from shared.utils import subprocess_popen 5 | 6 | 7 | def bed_tree_from(bed_file_path): 8 | """ 9 | 0-based interval tree [start, end) 10 | """ 11 | 12 | tree = {} 13 | if bed_file_path is None: 14 | return tree 15 | 16 | unzip_process = subprocess_popen(shlex.split("gzip -fdc %s" % (bed_file_path))) 17 | while True: 18 | row = unzip_process.stdout.readline() 19 | is_finish_reading_output = row == '' and unzip_process.poll() is not None 20 | if is_finish_reading_output: 21 | break 22 | 23 | if row: 24 | columns = row.strip().split() 25 | 26 | ctg_name = columns[0] 27 | if ctg_name not in tree: 28 | tree[ctg_name] = IntervalTree() 29 | 30 | ctg_start, ctg_end = int(columns[1]), int(columns[2]) 31 | if ctg_start == ctg_end: 32 | ctg_end += 1 33 | 34 | tree[ctg_name].addi(ctg_start, ctg_end) 35 | 36 | unzip_process.stdout.close() 37 | unzip_process.wait() 38 | 39 | return tree 40 | 41 | 42 | def is_region_in(tree, contig_name, region_start=None, region_end=None): 43 | if (contig_name is None) or (contig_name not in tree): 44 | return False 45 | 46 | interval_tree = tree[contig_name] 47 | is_interval_tree_version_3 = hasattr(interval_tree, 'at') 48 | if is_interval_tree_version_3: 49 | return len( 50 | interval_tree.at(region_start) 51 | if region_end is None else 52 | interval_tree.overlap(begin=region_start, end=region_end) 53 | ) > 0 54 | 55 | # interval tree version 2 56 | return len(interval_tree.search(begin=region_start, end=region_end, strict=False)) > 0 57 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/param.py: -------------------------------------------------------------------------------- 1 | REPO_NAME="Clair" 2 | 3 | NUM_THREADS = 12 4 | parameterOutputPlaceHolder = 6 5 | expandReferenceRegion = 1000000 6 | SAMTOOLS_VIEW_FILTER_FLAG = 2316 7 | 8 | # Tensor related parameters, please use the same values for creating tensor, model training and variant calling 9 | flankingBaseNum = 16 10 | matrixRow = 8 11 | matrixNum = 4 12 | bloscBlockSize = 500 13 | 14 | # Model hyperparameters 15 | trainBatchSize = 10000 16 | predictBatchSize = 1000 17 | initialLearningRate = 1e-3 18 | learningRateDecay = 0.1 19 | maxLearningRateSwitch = 3 20 | trainingDatasetPercentage = 0.9 21 | 22 | # other hyperparameters 23 | l2RegularizationLambda = 0.005 24 | l2RegularizationLambdaDecay = 1 25 | dropoutRateFC4 = 0.5 26 | dropoutRateFC5 = 0.0 27 | dropoutRate = 0.05 28 | default_optimizer = "Adam" # Adam / SGDM 29 | default_loss_function = "FocalLoss" # CrossEntropy / FocalLoss 30 | 31 | # Cyclical learning rate param(s) 32 | clr_max_lr = 3e-2 33 | clr_min_lr = 1e-4 34 | stepsizeConstant = 1 35 | clrGamma = 0.95 36 | momentum = 0.9 37 | maxEpoch = 30 38 | 39 | # Cyclical learning rate finder param(s) 40 | min_lr = 1e-6 41 | max_lr = 1e-1 42 | lr_finder_max_epoch = 1 43 | 44 | # random seed (None to make it random for every run) 45 | # set to None because cuDNN may introduce additional sources of randomness 46 | # https://machinelearningmastery.com/reproducible-results-neural-networks-keras/ 47 | RANDOM_SEED = None 48 | OPERATION_SEED = None 49 | 50 | 51 | def get_model_parameters(): 52 | return dict( 53 | flankingBaseNum=flankingBaseNum, 54 | matrixNum=matrixNum, 55 | expandReferenceRegion=expandReferenceRegion, 56 | ) 57 | -------------------------------------------------------------------------------- /benchmarks/nn-variant/shared/utils.py: -------------------------------------------------------------------------------- 1 | from os.path import isfile, abspath 2 | from sys import exit, stderr 3 | from subprocess import check_output, PIPE, Popen 4 | 5 | # A->A 6 | # C->C 7 | # G->G 8 | # T or U->T 9 | # R->A or G 10 | # Y->C or T 11 | # S->G or C 12 | # W->A or T 13 | # K->G or T 14 | # M->A or C 15 | # B->C or G or T 16 | # D->A or G or T 17 | # H->A or C or T 18 | # V->A or C or G 19 | IUPAC_base_to_ACGT_base_dict = dict(zip( 20 | "ACGTURYSWKMBDHVN", 21 | ("A", "C", "G", "T", "T", "A", "C", "C", "A", "G", "A", "C", "A", "A", "A", "A") 22 | )) 23 | 24 | IUPAC_base_to_num_dict = dict(zip( 25 | "ACGTURYSWKMBDHVN", 26 | (0, 1, 2, 3, 3, 0, 1, 1, 0, 2, 0, 1, 0, 0, 0, 0) 27 | )) 28 | 29 | BASIC_BASES = set("ACGTU") 30 | 31 | def is_file_exists(file_name, suffix=""): 32 | if not isinstance(file_name, str) or not isinstance(suffix, str): 33 | return False 34 | return isfile(file_name + suffix) 35 | 36 | 37 | def file_path_from(file_name, suffix="", exit_on_not_found=False): 38 | if is_file_exists(file_name, suffix): 39 | return abspath(file_name) 40 | if exit_on_not_found: 41 | exit("[ERROR] file %s not found" % (file_name + suffix)) 42 | return None 43 | 44 | 45 | def is_command_exists(command): 46 | if not isinstance(command, str): 47 | return False 48 | 49 | try: 50 | check_output("which %s" % (command), shell=True) 51 | return True 52 | except: 53 | return False 54 | 55 | 56 | def executable_command_string_from(command_to_execute, exit_on_not_found=False): 57 | if is_command_exists(command_to_execute): 58 | return command_to_execute 59 | if exit_on_not_found: 60 | exit("[ERROR] %s executable not found" % (command_to_execute)) 61 | return None 62 | 63 | 64 | def subprocess_popen(args, stdin=None, stdout=PIPE, stderr=stderr, bufsize=8388608): 65 | return Popen(args, stdin=stdin, stdout=stdout, stderr=stderr, bufsize=bufsize, universal_newlines=True) 66 | -------------------------------------------------------------------------------- /benchmarks/phmm/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | #CC = icpc 3 | ARCH_FLAGS= -msse4.1 4 | 5 | ifeq ($(arch),sse41) 6 | ARCH_FLAGS=-msse4.1 7 | else ifeq ($(arch),avx2) 8 | ifeq ($(CXX), icpc) 9 | ARCH_FLAGS=-march=core-avx2 #-xCORE-AVX2 10 | else 11 | ARCH_FLAGS=-mavx2 12 | endif 13 | else ifeq ($(arch),avx512) 14 | ifeq ($(CXX), icpc) 15 | ARCH_FLAGS=-xCORE-AVX512 16 | else 17 | ARCH_FLAGS=-mavx512bw 18 | endif 19 | else ifeq ($(arch),native) 20 | ARCH_FLAGS=-march=native 21 | else ifneq ($(arch),) 22 | ## To provide a different architecture flag like -march=core-avx2. 23 | ARCH_FLAGS=$(arch) 24 | endif 25 | CFLAGS= -g -O2 -std=c++11 $(ARCH_FLAGS) 26 | 27 | #VTUNE_HOME= /opt/intel/vtune_profiler 28 | GKLPATH= ../../tools/GKL 29 | INC= 30 | SHARED_LIBRARIES = -fopenmp -L$(GKLPATH)/build/native -lgkl_pairhmm_c -lstdc++ -ldl 31 | 32 | ifneq ($(VTUNE_HOME),) 33 | CFLAGS+= -DVTUNE_ANALYSIS=1 34 | INC+= -I${VTUNE_HOME}/include 35 | SHARED_LIBRARIES+= -L${VTUNE_HOME}/lib64 -littnotify 36 | endif 37 | 38 | all: phmm 39 | 40 | phmm: PairHMMUnitTest.o 41 | $(CC) -o $@ $^ $(CFLAGS) ${INC} $(SHARED_LIBRARIES) 42 | 43 | PairHMMUnitTest.o: PairHMMUnitTest.cpp PairHMMUnitTest.h 44 | $(CC) -c $(CFLAGS) ${INC} -fopenmp $< 45 | 46 | .PHONY: clean 47 | 48 | clean: 49 | rm -f phmm *.o 50 | -------------------------------------------------------------------------------- /benchmarks/phmm/PairHMMUnitTest.h: -------------------------------------------------------------------------------- 1 | // *************************************************************************** 2 | // Copyright (c) 2016-2017, Intel Corporation 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are met: 6 | // 7 | // * Redistributions of source code must retain the above copyright notice, 8 | // this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above copyright notice, 10 | // this list of conditions and the following disclaimer in the documentation 11 | // and/or other materials provided with the distribution. 12 | // * Neither the name of Intel Corporation nor the names of its contributors 13 | // may be used to endorse or promote products derived from this software 14 | // without specific prior written permission. 15 | // 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | // POSSIBILITY OF SUCH DAMAGE. 27 | // *************************************************************************** 28 | 29 | #ifndef PAIRHMM_H 30 | #define PAIRHMM_H 31 | 32 | #define QUOTE(x) #x 33 | #define STR(x) QUOTE(x) 34 | 35 | #define MAX_READ_LENGTH 32*1024 36 | #define MAX_HAP_LENGTH 32*1024 37 | 38 | #define MAX_NUM_RESULTS 64*1024*1024 39 | 40 | #ifndef ROWS 41 | #define ROWS 26 42 | #endif 43 | 44 | #ifndef COLS 45 | #define COLS 8 46 | #endif 47 | 48 | 49 | #ifndef DBG 50 | #ifdef DEBUG 51 | #define DBG(M, ...) printf("[AOC-DEBUG] (%s:%d) " M "\n", __FUNCTION__, __LINE__, ##__VA_ARGS__) 52 | #else 53 | #define DBG(M, ...) 54 | #endif 55 | #endif 56 | 57 | 58 | typedef struct { 59 | char base; 60 | char position; 61 | short hap_num; 62 | float y_init; 63 | } HapData; 64 | 65 | typedef struct { 66 | char base; 67 | char position; 68 | short read_num; 69 | float mx; // Pr(match to insert gap) 70 | float my; // Pr(match to delete gap) 71 | float gg; // Pr(gap to gap) 72 | float mm_1m_qual; // Pr(match to match) * (1 - read_qual) 73 | float mm_qual_div3; // Pr(match to match) * read_qual / 3 74 | float gm_1m_qual; // Pr(gap to match) * (1 - read_qual) 75 | float gm_qual_div3; // Pr(gap to match) * read_qual / 3 76 | } ReadData; 77 | 78 | typedef struct { 79 | float m, x, y; 80 | } PeData; 81 | 82 | typedef struct { 83 | float m, x; 84 | bool final_result; 85 | bool first_hap; 86 | int read_num; 87 | int hap_num; 88 | } RowResults; 89 | 90 | typedef struct { 91 | HapData hap_row[COLS+1]; 92 | HapData hap_col[ROWS+2]; 93 | ReadData reads[COLS]; 94 | PeData pe_data[2][COLS]; 95 | float result; 96 | bool first_col; 97 | bool last_row; 98 | } PairHmmInputData; 99 | 100 | typedef struct { 101 | unsigned int read_length; 102 | unsigned int hap_length; 103 | float y_init; 104 | unsigned int num_rows; 105 | } PairHmmGlobalControlData; 106 | 107 | typedef struct { 108 | PeData pe_data[2][COLS]; 109 | HapData hap_row[COLS+1]; 110 | float result; 111 | } PairHmmOutputData; 112 | 113 | typedef struct { 114 | int result_read_num; 115 | int result_hap_num; 116 | float result; 117 | } PairHmmResultData; 118 | 119 | typedef struct { 120 | char id[64]; 121 | char version[64]; 122 | int rows; 123 | int cols; 124 | } PairHmmAttributes; 125 | 126 | 127 | void print_pe_data(int r, int c, ReadData read, HapData hap, PeData out) { 128 | read.base = read.base > 32 ? read.base : '.'; 129 | hap.base = hap.base > 32 ? hap.base : '.'; 130 | DBG("PE %d %d %c %c %.6e %.6e %.6e %d %d %d %d %d", 131 | r, c, 132 | read.base, hap.base, // 0, 1 133 | out.m, out.x, out.y, // 2, 3, 4 134 | read.position & FIRST, // 5 135 | read.position & LAST, // 6 136 | hap.position & FIRST, // 7 137 | hap.position & LAST, // 8 138 | read.position & LAST && hap.position & LAST // 9 139 | ); 140 | } 141 | 142 | 143 | #endif 144 | -------------------------------------------------------------------------------- /benchmarks/phmm/README.md: -------------------------------------------------------------------------------- 1 | `phmm` code from GATK in [Intel's Genomic Kernel Library](https://github.com/Intel-HLS/GKL) is licensed under the BSD 3-Clause License. 2 | -------------------------------------------------------------------------------- /benchmarks/phmm/pairhmm_common.h: -------------------------------------------------------------------------------- 1 | #ifndef PAIRHMM_COMMON_H 2 | #define PAIRHMM_COMMON_H 3 | 4 | #if defined(_MSC_VER) 5 | #include // SIMD intrinsics for Windows 6 | #else 7 | #include // SIMD intrinsics for GCC 8 | #endif 9 | 10 | #include 11 | #include 12 | 13 | #define CAT(X,Y) X##Y 14 | #define CONCAT(X,Y) CAT(X,Y) 15 | 16 | #define MIN_ACCEPTED 1e-28f 17 | #define NUM_DISTINCT_CHARS 5 18 | #define AMBIG_CHAR 4 19 | 20 | typedef struct { 21 | int rslen, haplen; 22 | const char *q, *i, *d, *c; 23 | const char *hap, *rs; 24 | } testcase; 25 | 26 | class ConvertChar { 27 | static uint8_t conversionTable[255] ; 28 | 29 | public: 30 | static void init() { 31 | assert (NUM_DISTINCT_CHARS == 5) ; 32 | assert (AMBIG_CHAR == 4) ; 33 | 34 | conversionTable['A'] = 0 ; 35 | conversionTable['C'] = 1 ; 36 | conversionTable['T'] = 2 ; 37 | conversionTable['G'] = 3 ; 38 | conversionTable['N'] = 4 ; 39 | } 40 | 41 | static inline uint8_t get(uint8_t input) { 42 | return conversionTable[input] ; 43 | } 44 | }; 45 | 46 | #endif // PAIRHMM_COMMON_H 47 | -------------------------------------------------------------------------------- /benchmarks/phmm/shacc_pairhmm.h: -------------------------------------------------------------------------------- 1 | #ifndef SHACC_PAIRHMM_H 2 | #define SHACC_PAIRHMM_H 3 | 4 | #ifdef __APPLE__ 5 | #define WEAK __attribute__((weak_import)) 6 | #else 7 | #define WEAK __attribute__((weak)) 8 | #endif 9 | 10 | namespace shacc_pairhmm { 11 | 12 | struct Read { 13 | int length; 14 | const char* bases; 15 | const char* q; 16 | const char* i; 17 | const char* d; 18 | const char* c; 19 | }; 20 | 21 | struct Haplotype { 22 | int length; 23 | const char* bases; 24 | }; 25 | 26 | struct Batch { 27 | int id; 28 | int num_reads; 29 | int num_haps; 30 | long num_cells; 31 | Read* reads; 32 | Haplotype* haps; 33 | double* results; 34 | 35 | bool operator < (const Batch& b) const 36 | { 37 | return (num_cells < b.num_cells); 38 | } 39 | }; 40 | 41 | struct SortByCells 42 | { 43 | bool operator()( const Batch& lx, const Batch& rx ) const { 44 | return lx.num_cells < rx.num_cells; 45 | } 46 | }; 47 | 48 | struct SortById 49 | { 50 | bool operator()( const Batch& lx, const Batch& rx ) const { 51 | return lx.id < rx.id; 52 | } 53 | }; 54 | 55 | extern WEAK bool calculate(Batch& batch); 56 | } 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /benchmarks/pileup/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | #CC = icc 3 | #VTUNE_HOME= /opt/intel/vtune_profiler 4 | LIBS = -lz -lm -lbz2 -llzma -lpthread -lcurl -lcrypto -ldl 5 | CFLAGS = -g -Wall -pthread -fstack-protector-strong -D_FORTIFY_SOURCE=2 -fPIC -std=c99 -O3 -fopenmp 6 | INC = -I../../tools/htslib 7 | 8 | ifneq ($(VTUNE_HOME),) 9 | CFLAGS += -DVTUNE_ANALYSIS=1 10 | LIBS += -L${VTUNE_HOME}/lib64 -littnotify 11 | INC += -I${VTUNE_HOME}/include 12 | endif 13 | 14 | all: medaka_common.c medaka_counts.c medaka_bamiter.c 15 | $(CC) medaka_common.c medaka_counts.c medaka_bamiter.c ../../tools/htslib/libhts.a $(CFLAGS) $(INC) $(LIBS) -o pileup 16 | 17 | .PHONY: clean 18 | 19 | clean: 20 | #cd ../../tools/htslib && $(MAKE) clean 21 | rm -f pileup 22 | 23 | medaka_common.c: medaka_common.h 24 | medaka_counts.c: medaka_counts.h medaka_common.h medaka_bamiter.h 25 | medaka_bamiter.c: medaka_bamiter.h 26 | -------------------------------------------------------------------------------- /benchmarks/pileup/README.md: -------------------------------------------------------------------------------- 1 | `pileup` uses the same license as [Medaka](https://github.com/nanoporetech/medaka). 2 | -------------------------------------------------------------------------------- /benchmarks/pileup/kvec.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* 27 | An example: 28 | 29 | #include "kvec.h" 30 | int main() { 31 | kvec_t(int) array; 32 | kv_init(array); 33 | kv_push(int, array, 10); // append 34 | kv_a(int, array, 20) = 5; // dynamic 35 | kv_A(array, 20) = 4; // static 36 | kv_destroy(array); 37 | return 0; 38 | } 39 | */ 40 | 41 | /* 42 | 2008-09-22 (0.1.0): 43 | 44 | * The initial version. 45 | 46 | */ 47 | 48 | #ifndef AC_KVEC_H 49 | #define AC_KVEC_H 50 | 51 | #include 52 | 53 | #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 54 | 55 | #define kvec_t(type) struct { size_t n, m; type *a; } 56 | #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0) 57 | #define kv_destroy(v) free((v).a) 58 | #define kv_A(v, i) ((v).a[(i)]) 59 | #define kv_pop(v) ((v).a[--(v).n]) 60 | #define kv_size(v) ((v).n) 61 | #define kv_max(v) ((v).m) 62 | 63 | #define kv_resize(type, v, s) ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m)) 64 | 65 | #define kv_copy(type, v1, v0) do { \ 66 | if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n); \ 67 | (v1).n = (v0).n; \ 68 | memcpy((v1).a, (v0).a, sizeof(type) * (v0).n); \ 69 | } while (0) \ 70 | 71 | #define kv_push(type, v, x) do { \ 72 | if ((v).n == (v).m) { \ 73 | (v).m = (v).m? (v).m<<1 : 2; \ 74 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \ 75 | } \ 76 | (v).a[(v).n++] = (x); \ 77 | } while (0) 78 | 79 | #define kv_pushp(type, v) (((v).n == (v).m)? \ 80 | ((v).m = ((v).m? (v).m<<1 : 2), \ 81 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ 82 | : 0), ((v).a + ((v).n++)) 83 | 84 | #define kv_a(type, v, i) (((v).m <= (size_t)(i)? \ 85 | ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \ 86 | (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \ 87 | : (v).n <= (size_t)(i)? (v).n = (i) + 1 \ 88 | : 0), (v).a[(i)]) 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /benchmarks/pileup/medaka_bamiter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "medaka_bamiter.h" 5 | 6 | // iterator for reading bam 7 | int read_bam(void *data, bam1_t *b) { 8 | mplp_data *aux = (mplp_data*) data; 9 | uint8_t *tag; 10 | bool check_tag = (strcmp(aux->tag_name, "") != 0); 11 | bool have_rg = (aux->read_group != NULL); 12 | uint8_t *rg; 13 | char *rg_val; 14 | int ret; 15 | while (1) { 16 | ret = aux->iter ? sam_itr_next(aux->fp, aux->iter, b) : sam_read1(aux->fp, aux->hdr, b); 17 | if (ret<0) break; 18 | // only take primary alignments 19 | if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FSUPPLEMENTARY | BAM_FQCFAIL | BAM_FDUP)) continue; 20 | // filter by mapping quality 21 | if ((int)b->core.qual < aux->min_mapQ) continue; 22 | // filter by tag 23 | if (check_tag) { 24 | tag = bam_aux_get((const bam1_t*) b, aux->tag_name); 25 | if (tag == NULL){ // tag isn't present or is currupt 26 | if (aux->keep_missing) { 27 | break; 28 | } else { 29 | continue; 30 | } 31 | } 32 | int tag_value = bam_aux2i(tag); 33 | if (errno == EINVAL) continue; // tag was not integer 34 | if (tag_value != aux->tag_value) continue; 35 | } 36 | // filter by RG (read group): 37 | if (have_rg) { 38 | rg = bam_aux_get((const bam1_t*) b, "RG"); 39 | if (rg == NULL) continue; // missing 40 | rg_val = bam_aux2Z(rg); 41 | if (errno == EINVAL) continue; // bad parse 42 | if (strcmp(aux->read_group, rg_val) != 0) continue; // not wanted 43 | } 44 | break; 45 | } 46 | return ret; 47 | } 48 | -------------------------------------------------------------------------------- /benchmarks/pileup/medaka_bamiter.h: -------------------------------------------------------------------------------- 1 | #ifndef _MEDAKA_BAMITER_H 2 | #define _MEDAKA_BAMITER_H 3 | 4 | #include 5 | #include "htslib/sam.h" 6 | 7 | // parameters for bam iteration 8 | typedef struct { 9 | htsFile *fp; 10 | sam_hdr_t *hdr; 11 | hts_itr_t *iter; 12 | int min_mapQ; 13 | char tag_name[2]; 14 | int tag_value; 15 | bool keep_missing; 16 | const char *read_group; 17 | } mplp_data; 18 | 19 | // iterator for reading bam 20 | int read_bam(void *data, bam1_t *b); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /benchmarks/pileup/medaka_common.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "medaka_common.h" 8 | 9 | 10 | /** Allocates zero-initialised memory with a message on failure. 11 | * 12 | * @param num number of elements to allocate. 13 | * @param size size of each element. 14 | * @param msg message to describe allocation on failure. 15 | * @returns pointer to allocated memory 16 | * 17 | */ 18 | void *xalloc(size_t num, size_t size, char* msg){ 19 | void *res = calloc(num, size); 20 | if (res == NULL){ 21 | fprintf(stderr, "Failed to allocate mem for %s\n", msg); 22 | exit(1); 23 | } 24 | return res; 25 | } 26 | 27 | 28 | /** Reallocates memory with a message on failure. 29 | * 30 | * @param ptr pointer to realloc. 31 | * @param size size of each element. 32 | * @param msg message to describe allocation on failure. 33 | * @returns pointer to allocated memory 34 | * 35 | */ 36 | void *xrealloc(void *ptr, size_t size, char* msg){ 37 | void *res = realloc(ptr, size); 38 | if (res == NULL){ 39 | fprintf(stderr, "Failed to reallocate mem for %s\n", msg); 40 | exit(1); 41 | } 42 | return res; 43 | } 44 | 45 | 46 | /** Retrieves a substring. 47 | * 48 | * @param string input string. 49 | * @param postion start position of substring. 50 | * @param length length of substring required. 51 | * @returns string pointer. 52 | * 53 | */ 54 | char *substring(char *string, int position, int length) { 55 | char *ptr; 56 | size_t i; 57 | 58 | ptr = malloc(length + 1); 59 | 60 | for (i = 0 ; i < length ; i++) { 61 | *(ptr + i) = *(string + position); 62 | string++; 63 | } 64 | 65 | *(ptr + i) = '\0'; 66 | return ptr; 67 | } 68 | 69 | 70 | /** Format a uint32_t to a string 71 | * 72 | * @param value to format. 73 | * @param dst destination char. 74 | * @returns length of string. 75 | * 76 | */ 77 | size_t uint8_to_str(uint8_t value, char *dst) { 78 | static char* digits[] = { 79 | "0","1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20", 80 | "21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40", 81 | "41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56","57","58","59","60", 82 | "61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80", 83 | "81","82","83","84","85","86","87","88","89","90","91","92","93","94","95","96","97","98","99","100", 84 | "101","102","103","104","105","106","107","108","109","110","111","112","113","114","115","116","117","118","119","120", 85 | "121","122","123","124","125","126","127","128","129","130","131","132","133","134","135","136","137","138","139","140", 86 | "141","142","143","144","145","146","147","148","149","150","151","152","153","154","155","156","157","158","159","160", 87 | "161","162","163","164","165","166","167","168","169","170","171","172","173","174","175","176","177","178","179","180", 88 | "181","182","183","184","185","186","187","188","189","190","191","192","193","194","195","196","197","198","199","200", 89 | "201","202","203","204","205","206","207","208","209","210","211","212","213","214","215","216","217","218","219","220", 90 | "221","222","223","224","225","226","227","228","229","230","231","232","233","234","235","236","237","238","239","240", 91 | "241","242","243","244","245","246","247","248","249","250","251","252","253","254","255"}; 92 | static const uint8_t TEN = 10; 93 | static const uint8_t HUNDRED = 100; 94 | strcpy(dst, digits[value]); 95 | if (value < TEN) return 1; 96 | if (value < HUNDRED) return 2; 97 | else return 3; 98 | } 99 | 100 | -------------------------------------------------------------------------------- /benchmarks/pileup/medaka_common.h: -------------------------------------------------------------------------------- 1 | #ifndef _MEDAKA_COMMON_H 2 | #define _MEDAKA_COMMON_H 3 | 4 | #include 5 | 6 | 7 | /** Simple integer min/max 8 | * @param a 9 | * @param b 10 | * 11 | * @returns the min/max of a and b 12 | * 13 | */ 14 | static inline int max ( int a, int b ) { return a > b ? a : b; } 15 | static inline int min ( int a, int b ) { return a < b ? a : b; } 16 | 17 | 18 | /** Allocates zero-initialised memory with a message on failure. 19 | * 20 | * @param num number of elements to allocate. 21 | * @param size size of each element. 22 | * @param msg message to describe allocation on failure. 23 | * @returns pointer to allocated memory 24 | * 25 | */ 26 | void *xalloc(size_t num, size_t size, char* msg); 27 | 28 | 29 | /** Reallocates memory with a message on failure. 30 | * 31 | * @param ptr pointer to realloc. 32 | * @param size size of each element. 33 | * @param msg message to describe allocation on failure. 34 | * @returns pointer to allocated memory 35 | * 36 | */ 37 | void *xrealloc(void *ptr, size_t size, char* msg); 38 | 39 | 40 | /** Retrieves a substring. 41 | * 42 | * @param string input string. 43 | * @param postion start position of substring. 44 | * @param length length of substring required. 45 | * @returns string pointer. 46 | * 47 | */ 48 | char *substring(char *string, int position, int length); 49 | 50 | 51 | /** Format a uint32_t to a string 52 | * 53 | * @param value to format. 54 | * @param dst destination char. 55 | * @returns length of string. 56 | * 57 | */ 58 | size_t uint8_to_str(uint8_t value, char *dst); 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /benchmarks/pileup/medaka_counts.h: -------------------------------------------------------------------------------- 1 | #ifndef _MEDAKA_COUNTS_H 2 | #define _MEDAKA_COUNTS_H 3 | 4 | // medaka-style feature data 5 | typedef struct _plp_data { 6 | size_t buffer_cols; 7 | size_t num_dtypes; 8 | size_t num_homop; 9 | size_t n_cols; 10 | size_t *matrix; 11 | size_t *major; 12 | size_t *minor; 13 | } _plp_data; 14 | typedef _plp_data *plp_data; 15 | 16 | typedef struct { 17 | char region_string[1024]; 18 | plp_data pileup; 19 | } Batch; 20 | 21 | 22 | /** Format an array values as a comma seperate string 23 | * 24 | * @param values integer input array 25 | * @param length size of input array 26 | * @param result output char buffer of size 4 * length * sizeof char 27 | * @returns void 28 | * 29 | * The output buffer size comes from: 30 | * a single value is max 3 chars 31 | * + 1 for comma (or \0 at end) 32 | */ 33 | void format_uint8_array(uint8_t* values, size_t length, char* result); 34 | 35 | 36 | // Simple container for strings 37 | typedef struct string_set { 38 | size_t n; 39 | char **strings; 40 | } string_set; 41 | 42 | 43 | /** Destroys a string set 44 | * 45 | * @param data the object to cleanup. 46 | * @returns void. 47 | * 48 | */ 49 | void destroy_string_set(string_set strings); 50 | 51 | 52 | /** Retrieves contents of key-value tab delimited file. 53 | * 54 | * @param fname input file path. 55 | * @returns a string_set 56 | * 57 | * The return value can be free'd with destroy_string_set. 58 | * key-value pairs are stored sequentially in the string set 59 | * 60 | */ 61 | string_set read_key_value(char * fname); 62 | 63 | 64 | // medaka-style base encoding 65 | static const char plp_bases[] = "acgtACGTdD"; 66 | static const size_t featlen = 10; // len of the above 67 | static const size_t fwd_del = 9; // position of D 68 | static const size_t rev_del = 8; // position of d 69 | 70 | // bam tag used for datatypes 71 | static const char datatype_tag[] = "DT"; 72 | 73 | // convert 16bit IUPAC (+16 for strand) to plp_bases index 74 | static const int num2countbase[32] = { 75 | -1, 4, 5, -1, 6, -1, -1, -1, 76 | 7, -1, -1, -1, -1, -1, -1, -1, 77 | -1, 0, 1, -1, 2, -1, -1, -1, 78 | 3, -1, -1, -1, -1, -1, -1, -1, 79 | }; 80 | 81 | 82 | /** Constructs a pileup data structure. 83 | * 84 | * @param n_cols number of pileup columns. 85 | * @param buffer_cols number of pileup columns. 86 | * @param num_dtypes number of datatypes in pileup. 87 | * @param num_homop maximum homopolymer length to consider. 88 | * @param fixed_size if not zero data matrix is allocated as fixed_size * n_cols, ignoring other arguments 89 | * @see destroy_plp_data 90 | * @returns a plp_data pointer. 91 | * 92 | * The return value can be freed with destroy_plp_data. 93 | * 94 | */ 95 | plp_data create_plp_data(size_t n_cols, size_t buffer_cols, size_t num_dtypes, size_t num_homop, size_t fixed_size); 96 | 97 | 98 | /** Enlarge the internal buffers of a pileup data structure. 99 | * 100 | * @param pileup a plp_data pointer. 101 | * @param buffer_cols number of pileup columns for which to allocate memory 102 | * 103 | */ 104 | void enlarge_plp_data(plp_data pileup, size_t buffer_cols); 105 | 106 | 107 | /** Destroys a pileup data structure. 108 | * 109 | * @param data the object to cleanup. 110 | * @returns void. 111 | * 112 | */ 113 | void destroy_plp_data(plp_data data); 114 | 115 | 116 | /** Prints a pileup data structure. 117 | * 118 | * @param pileup a pileup counts structure. 119 | * @param num_dtypes number of datatypes in the pileup. 120 | * @param dtypes datatype prefix strings. 121 | * @param num_homop maximum homopolymer length to consider. 122 | * @returns void 123 | * 124 | */ 125 | void print_pileup_data(plp_data pileup, size_t num_dtypes, char *dtypes[], size_t num_homop); 126 | 127 | 128 | /** Generates medaka-style feature data in a region of a bam. 129 | * 130 | * @param region 1-based region string. 131 | * @param bam_file input aligment file. 132 | * @param num_dtypes number of datatypes in bam. 133 | * @param dtypes prefixes on query names indicating datatype. 134 | * @param num_homop maximum homopolymer length to consider. 135 | * @param tag_name by which to filter alignments 136 | * @param tag_value by which to filter data 137 | * @param keep_missing alignments which do not have tag 138 | * @param weibull_summation use predefined bam tags to perform homopolymer partial counts. 139 | * @returns a pileup counts data pointer. 140 | * 141 | * The return value can be freed with destroy_plp_data. 142 | * 143 | * If num_dtypes is 1, dtypes should be NULL; all reads in the bam will be 144 | * treated equally. If num_dtypes is not 1, dtypes should be an array of 145 | * strings, these strings being prefixes of query names of reads within the 146 | * bam file. Any read not matching the prefixes will cause exit(1). 147 | * 148 | * If tag_name is not NULL alignments are filtered by the (integer) tag value. 149 | * When tag_name is given the behaviour for alignments without the tag is 150 | * determined by keep_missing. 151 | * 152 | */ 153 | plp_data calculate_pileup( 154 | const char *region, const char *bam_file, size_t num_dtypes, char *dtypes[], 155 | size_t num_homop, const char tag_name[2], const int tag_value, const _Bool keep_missing, 156 | bool weibull_summation, const char *read_group); 157 | 158 | 159 | #endif 160 | -------------------------------------------------------------------------------- /benchmarks/poa/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | #CXX=icpc 3 | 4 | 5 | ifeq ($(arch),sse41) 6 | ARCH_FLAGS=-msse4.1 7 | else ifeq ($(arch),avx2) 8 | ifeq ($(CXX), icpc) 9 | ARCH_FLAGS=-march=core-avx2 #-xCORE-AVX2 10 | else 11 | ARCH_FLAGS=-mavx2 12 | endif 13 | else ifeq ($(arch),avx512) 14 | ifeq ($(CXX), icpc) 15 | ARCH_FLAGS=-xCORE-AVX512 16 | else 17 | ARCH_FLAGS=-mavx512bw 18 | endif 19 | else ifeq ($(arch),native) 20 | ARCH_FLAGS=-march=native 21 | else ifneq ($(arch),) 22 | ## To provide a different architecture flag like -march=core-avx2. 23 | ARCH_FLAGS=$(arch) 24 | endif 25 | 26 | CXXFLAGS=-O3 -fopenmp $(ARCH_FLAGS) 27 | #VTUNE_HOME= /opt/intel/vtune_profiler 28 | INCLUDES=-I../../tools/spoa/include 29 | LIBS=-L../../tools/spoa/build/lib/ -lspoa -ldl -fopenmp 30 | 31 | ifneq ($(VTUNE_HOME),) 32 | CXXFLAGS+= -DVTUNE_ANALYSIS=1 33 | INCLUDES+= -I${VTUNE_HOME}/include 34 | LIBS+=-L${VTUNE_HOME}/lib64 -littnotify 35 | endif 36 | 37 | all: msa_spoa_omp.cpp 38 | $(CXX) $(CXXFLAGS) msa_spoa_omp.cpp ${INCLUDES} ${LIBS} -o poa 39 | 40 | .PHONY: clean 41 | 42 | clean: 43 | rm -f poa 44 | -------------------------------------------------------------------------------- /benchmarks/poa/README: -------------------------------------------------------------------------------- 1 | Pre-requisites: 2 | gcc-8 3 | scl enable devtoolset-8 bash 4 | Compile SPOA library: cd ../../tools/spoa; mkdir build; cd build; cmake -DCMAKE_BUILD_TYPE=Release .. 5 | 6 | ==== 7 | Compile kernel: make 8 | Usage: ./poa -s -t > cons.fasta 9 | 10 | E.g: small: ./poa -s input-1000.fasta -t 1 > cons-1000.fasta 11 | large: ./poa -s input.fasta -t 1 > cons.fasta 12 | -------------------------------------------------------------------------------- /benchmarks/poa/README.md: -------------------------------------------------------------------------------- 1 | `poa` uses the same license as [SIMD partial order alignment library](https://github.com/rvaser/spoa). 2 | 3 | If you find `poa` useful, please cite: 4 | 5 | ``` 6 | @article{vaser2017fast, 7 | title={Fast and accurate de novo genome assembly from long uncorrected reads}, 8 | author={Vaser, Robert and Sovi{\'c}, Ivan and Nagarajan, Niranjan and {\v{S}}iki{\'c}, Mile}, 9 | journal={Genome research}, 10 | volume={27}, 11 | number={5}, 12 | pages={737--746}, 13 | year={2017}, 14 | publisher={Cold Spring Harbor Lab} 15 | } 16 | ``` 17 | -------------------------------------------------------------------------------- /debian.prerequisites: -------------------------------------------------------------------------------- 1 | autoconf 2 | automake 3 | make 4 | gcc-9 5 | g++-9 6 | perl 7 | zlib1g-dev 8 | libbz2-dev 9 | liblzma-dev 10 | libcurl4-openssl-dev 11 | libssl-dev 12 | git 13 | cmake 14 | patch 15 | libtool 16 | yasm 17 | openjdk-8-jdk 18 | -------------------------------------------------------------------------------- /img/GenomicsBenchLogo-Colored.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/img/GenomicsBenchLogo-Colored.png -------------------------------------------------------------------------------- /img/GenomicsBenchLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arun-sub/genomicsbench/f1a0fa0cc3981a1d93d904ac5d023bef69db38d7/img/GenomicsBenchLogo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | toml==0.10.0 2 | tqdm==4.31.1 3 | torch==1.4.0 4 | parasail==1.2 5 | requests==2.22.0 6 | ont-fast5-api==3.1.6 7 | fast-ctc-decode==0.2.5 8 | bonito-cuda-runtime==0.0.2a2 9 | scipy==1.5.2 10 | tensorflow-gpu==1.13.2 11 | -------------------------------------------------------------------------------- /rhel.prerequisites: -------------------------------------------------------------------------------- 1 | autoconf 2 | automake 3 | make 4 | gcc 5 | perl-Data-Dumper 6 | zlib-devel 7 | bzip2-devel 8 | xz-devel 9 | curl-devel 10 | openssl-devel 11 | java-1.8.0-openjdk-devel 12 | git 13 | cmake 14 | patch 15 | libtool 16 | yasm 17 | centos-release-scl 18 | devtoolset-8-gcc-c++ 19 | -------------------------------------------------------------------------------- /scripts/run-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | usage() { 6 | echo -e "\n Usage $0 \n\n Example: $0 [../input-datasets] [small | large]\n" 7 | } 8 | 9 | if [[ ( $# == "--help" ) || ( $# == "-h" ) ]] 10 | then 11 | usage 12 | exit 0 13 | fi 14 | 15 | if [[ $# -lt 1 ]] 16 | then 17 | usage 18 | exit 1 19 | fi 20 | 21 | INPUTS_DIR=$1 22 | INPUTS_SIZE=$2 23 | 24 | if [[ ( $INPUTS_SIZE == "large" ) ]] 25 | then 26 | echo "Running fmi" 27 | ../benchmarks/fmi/fmi $INPUTS_DIR/fmi/broad $INPUTS_DIR/fmi/large/SRR7733443_10m_1.fastq 512 19 1 28 | 29 | echo "Running bsw" 30 | ../benchmarks/bsw/bsw -pairs $INPUTS_DIR/bsw/large/bandedSWA_SRR7733443_1m_input.txt -t 1 -b 512 31 | 32 | echo "Running phmm" 33 | export LD_LIBRARY_PATH=../tools/GKL/build/native:$LD_LIBRARY_PATH 34 | ../benchmarks/phmm/phmm -f $INPUTS_DIR/phmm/large/large.in -t 1 35 | 36 | echo "Running dbg" 37 | ../benchmarks/dbg/dbg $INPUTS_DIR/dbg/large/ERR194147-mem2-chr22.bam chr22:0-50818468 $INPUTS_DIR/dbg/large/Homo_sapiens_assembly38.fasta 1 38 | 39 | echo "Running chain" 40 | ../benchmarks/chain/chain -i $INPUTS_DIR/chain/large/c_elegans_40x.10k.in -o $INPUTS_DIR/chain/large/c_elegans_40x.10k.out 41 | 42 | echo "Running poa" 43 | ../benchmarks/poa/poa -s $INPUTS_DIR/poa/large/input.fasta -t 1 44 | 45 | echo "Running kmer-cnt" 46 | ../benchmarks/kmer-cnt/kmer-cnt --reads $INPUTS_DIR/kmer-cnt/large/Loman_E.coli_MAP006-1_2D_50x.fasta --config ../tools/Flye/flye/config/bin_cfg/asm_raw_reads.cfg --threads 1 --debug 47 | 48 | echo "Running pileup" 49 | ../benchmarks/pileup/pileup $INPUTS_DIR/pileup/large/HG002_prom_R941_guppy360_2_GRCh38_ch20.bam chr20:1-64444167 1 > $INPUTS_DIR/pileup/large/pileup.txt 50 | 51 | echo "Running grm" 52 | export LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/2021.1.1/lib/intel64:/opt/intel/oneapi/compiler/2021.1.2/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH 53 | ../benchmarks/grm/2.0/build_dynamic/plink2 --maf 0.01 --pgen $INPUTS_DIR/grm/large/chr1_phase3.pgen --pvar $INPUTS_DIR/grm/large/chr1_phase3.pvar --psam $INPUTS_DIR/grm/large/phase3_corrected.psam --make-grm-bin --out $INPUTS_DIR/grm/large/grm --threads 1 54 | 55 | else 56 | 57 | echo "Running fmi" 58 | ../benchmarks/fmi/fmi $INPUTS_DIR/fmi/broad $INPUTS_DIR/fmi/small/SRR7733443_1m_1.fastq 512 19 1 59 | 60 | echo "Running bsw" 61 | ../benchmarks/bsw/bsw -pairs $INPUTS_DIR/bsw/small/bandedSWA_SRR7733443_100k_input.txt -t 1 -b 512 62 | 63 | echo "Running phmm" 64 | export LD_LIBRARY_PATH=../tools/GKL/build/native:$LD_LIBRARY_PATH 65 | ../benchmarks/phmm/phmm -f $INPUTS_DIR/phmm/small/5m.in -t 1 66 | 67 | echo "Running dbg" 68 | ../benchmarks/dbg/dbg $INPUTS_DIR/dbg/small/ERR194147-mem2-chr22.bam chr22:16000000-16500000 $INPUTS_DIR/dbg/large/Homo_sapiens_assembly38.fasta 1 69 | 70 | echo "Running chain" 71 | ../benchmarks/chain/chain -i $INPUTS_DIR/chain/small/in-1k.txt -o $INPUTS_DIR/chain/small/out-1k.txt 72 | 73 | echo "Running poa" 74 | ../benchmarks/poa/poa -s $INPUTS_DIR/poa/small/input-1000.fasta -t 1 75 | 76 | echo "Running kmer-cnt" 77 | ../benchmarks/kmer-cnt/kmer-cnt --reads $INPUTS_DIR/kmer-cnt/small/Loman_E.coli_MAP006-1_2D_50x_1000.fasta --config ../tools/Flye/flye/config/bin_cfg/asm_raw_reads.cfg --threads 1 --debug 78 | 79 | echo "Running pileup" 80 | ../benchmarks/pileup/pileup $INPUTS_DIR/pileup/small/saureus.bam tig00000061:1-1499707 1 > $INPUTS_DIR/pileup/small/pileup.txt 81 | 82 | echo "Running grm" 83 | export LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/2021.1.1/lib/intel64:/opt/intel/oneapi/compiler/2021.1.2/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH 84 | ../benchmarks/grm/2.0/build_dynamic/plink2 --maf 0.01 --pgen $INPUTS_DIR/grm/small/chr22_phase3.pgen --pvar $INPUTS_DIR/grm/small/chr22_phase3.pvar --psam $INPUTS_DIR/grm/small/phase3_corrected.psam --make-grm-bin --out $INPUTS_DIR/grm/small/grm --threads 1 85 | 86 | fi 87 | -------------------------------------------------------------------------------- /scripts/run-gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | usage() { 6 | echo -e "\n Usage $0 \n\n Example: $0 [../input-datasets] [small | large]\n" 7 | } 8 | 9 | if [[ ( $# == "--help" ) || ( $# == "-h" ) ]] 10 | then 11 | usage 12 | exit 0 13 | fi 14 | 15 | if [[ $# -lt 1 ]] 16 | then 17 | usage 18 | exit 1 19 | fi 20 | 21 | INPUTS_DIR=$1 22 | INPUTS_SIZE=$2 23 | 24 | if [[ ( $INPUTS_SIZE == "small" ) ]] 25 | then 26 | 27 | echo "Running nn-base" 28 | python ../benchmarks/nn-base/bonito/basecall.py ../benchmarks/nn-base/models/bonito_dna_r941 $INPUTS_DIR/nn-base/small --device cuda:0 --fastq > $INPUTS_DIR/nn-base/small/out-small.fastq 29 | 30 | echo "Running nn-variant" 31 | python ../benchmarks/nn-variant/prediction.py --chkpnt_fn $INPUTS_DIR/nn-variant/model --sampleName chr20 --threads 1 --qual 100 --input_fn $INPUTS_DIR/nn-variant/small/prediction_input.h5 --output_fn $INPUTS_DIR/nn-variant/small/prediction_output.h5 32 | 33 | echo "Running abea" 34 | ../benchmarks/abea/f5c eventalign -b $INPUTS_DIR/abea/small/1000reads.bam -g $INPUTS_DIR/abea/humangenome.fa -r $INPUTS_DIR/abea/1000reads.fastq -B 3.7M > $INPUTS_DIR/abea/small/events.tsv 35 | 36 | else 37 | 38 | echo "Running nn-base" 39 | python ../benchmarks/nn-base/bonito/basecall.py ../benchmarks/nn-base/models/bonito_dna_r941 $INPUTS_DIR/nn-base/large --device cuda:0 --fastq > $INPUTS_DIR/nn-base/large/out-large.fastq 40 | 41 | echo "Running nn-variant" 42 | python ../benchmarks/nn-variant/prediction.py --chkpnt_fn $INPUTS_DIR/nn-variant/model --sampleName chr20 --threads 1 --qual 100 --input_fn $INPUTS_DIR/nn-variant/large/prediction_input.h5 --output_fn $INPUTS_DIR/nn-variant/large/prediction_output.h5 43 | 44 | echo "Running abea" 45 | ../benchmarks/abea/f5c eventalign -b $INPUTS_DIR/abea/large/10000reads.bam -g $INPUTS_DIR/abea/humangenome.fa -r $INPUTS_DIR/abea/10000reads.fastq -B 3.7M > $INPUTS_DIR/abea/large/events.tsv 46 | 47 | fi 48 | -------------------------------------------------------------------------------- /scripts/vtune.pc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | usage() { 6 | echo -e "\n Usage $0 \n" 7 | } 8 | 9 | 10 | vtune_pc() { 11 | vtune -collect-with runsa -start-paused -r $1 -knob event-config=INST_RETIRED.ANY,CPU_CLK_UNHALTED.THREAD,MEM_INST_RETIRED.ALL_LOADS:sa=100000,MEM_LOAD_RETIRED.L1_HIT:sa=100000,MEM_LOAD_RETIRED.L2_HIT:sa=100,MEM_LOAD_RETIRED.L3_HIT:sa=100,MEM_LOAD_RETIRED.L1_MISS:sa=100,MEM_LOAD_RETIRED.L2_MISS:sa=100,MEM_LOAD_RETIRED.L3_MISS:sa=100,OFFCORE_RESPONSE:request=DEMAND_DATA_RD:response=L3_MISS.SNOOP_MISS:sa=1000,OFFCORE_RESPONSE:request=DEMAND_DATA_RD:response=L3_MISS.SNOOP_HIT_NO_FWD:sa=1000,OFFCORE_RESPONSE:request=DEMAND_CODE_RD:response=L3_MISS.SNOOP_MISS:sa=1000,OFFCORE_RESPONSE:request=DEMAND_CODE_RD:response=L3_MISS.SNOOP_HIT_NO_FWD:sa=1000,OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD:sa=1000,OFFCORE_REQUESTS.DEMAND_DATA_RD:sa=1000,OFFCORE_REQUESTS:ALL_DATA_RD:sa=1000,OFFCORE_RESPONSE:request=OTHER:response=L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD:sa=1000,OFFCORE_RESPONSE:request=OTHER:response=L3_MISS_LOCAL_DRAM.SNOOP_MISS:sa=1000,LOAD_HIT_PRE.SW_PF:sa=100,BR_INST_RETIRED.ALL_BRANCHES,BR_MISP_RETIRED.ALL_BRANCHES,DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK,CYCLE_ACTIVITY.STALLS_L3_MISS,CYCLE_ACTIVITY.STALLS_MEM_ANY -- $2 12 | } 13 | 14 | if [[ ( $# == "--help" ) || ( $# == "-h" ) ]] 15 | then 16 | usage 17 | exit 0 18 | fi 19 | 20 | if [[ $# -lt 2 ]] 21 | then 22 | usage 23 | exit 1 24 | fi 25 | 26 | INPUTS_DIR=$1 27 | OUTPUTS_DIR=$2 28 | 29 | echo "Running fmi" 30 | vtune_pc $OUTPUTS_DIR/fmi_pc "../benchmarks/fmi/fmi $INPUTS_DIR/fmi/broad $INPUTS_DIR/fmi/large/SRR7733443_10m_1.fastq 512 19 1" 31 | 32 | echo "Running bsw" 33 | vtune_pc $OUTPUTS_DIR/bsw_pc "../benchmarks/bsw/bsw -pairs $INPUTS_DIR/bsw/large/banded_SRR7733443_1m_input.txt -t 1 -b 512" 34 | 35 | echo "Running phmm" 36 | export LD_LIBRARY_PATH=../tools/GKL/build/native:$LD_LIBRARY_PATH 37 | vtune_pc $OUTPUTS_DIR/phmm_pc "../benchmarks/phmm/phmm -f $INPUTS_DIR/phmm/large/large.in -t 1" 38 | 39 | echo "Running dbg" 40 | vtune_pc $OUTPUTS_DIR/dbg_pc "../benchmarks/dbg/dbg $INPUTS_DIR/dbg/large/ERR194147-mem2-chr22.bam chr22:0-50818468 $INPUTS_DIR/dbg/large/Homo_sapiens_assembly38.fasta 1" 41 | 42 | echo "Running chain" 43 | vtune_pc $OUTPUTS_DIR/chain_pc "../benchmarks/chain/chain -i $INPUTS_DIR/chain/large/c_elegans_40x.10k.in -o $INPUTS_DIR/chain/large/c_elegans_40x.10k.out" 44 | 45 | echo "Running poa" 46 | vtune_pc $OUTPUTS_DIR/poa_pc "../benchmarks/poa/poa -s $INPUTS_DIR/poa/large/input.fasta -t 1" 47 | 48 | echo "Running kmer-cnt" 49 | vtune_pc $OUTPUTS_DIR/kmer-cnt_pc "../benchmarks/kmer-cnt/kmer-cnt --reads $INPUTS_DIR/kmer-cnt/large/Loman_E.coli_MAP006-1_2D_50x.fasta --config ../tools/Flye/flye/config/bin_cfg/asm_raw_reads.cfg --threads 1 --debug" 50 | 51 | echo "Running pileup" 52 | vtune_pc $OUTPUTS_DIR/pileup_pc "../benchmarks/pileup/pileup $INPUTS_DIR/pileup/large/HG002_prom_R941_guppy360_2_GRCh38_ch20.bam chr20:1-64444167 1" 53 | 54 | echo "Running grm" 55 | export LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/2021.1.1/lib/intel64:/opt/intel/oneapi/compiler/2021.1.2/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH 56 | vtune_pc $OUTPUTS_DIR/grm_pc "../benchmarks/grm/2.0/build_dynamic/plink2 --maf 0.01 --pgen $INPUTS_DIR/grm/large/chr1_phase3.pgen --pvar $INPUTS_DIR/grm/large/chr1_phase3.pvar --psam $INPUTS_DIR/grm/large/phase3_corrected.psam --make-grm-bin --out $INPUTS_DIR/grm/large/grm --threads 1" 57 | 58 | -------------------------------------------------------------------------------- /scripts/vtune.uarch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | usage() { 6 | echo -e "\n Usage $0 \n" 7 | } 8 | 9 | 10 | vtune_uarch() { 11 | vtune -collect uarch-exploration -r $1 -- $2 12 | } 13 | 14 | if [[ ( $# == "--help" ) || ( $# == "-h" ) ]] 15 | then 16 | usage 17 | exit 0 18 | fi 19 | 20 | if [[ $# -lt 2 ]] 21 | then 22 | usage 23 | exit 1 24 | fi 25 | 26 | INPUTS_DIR=$1 27 | OUTPUTS_DIR=$2 28 | 29 | echo "Running fmi" 30 | vtune_uarch $OUTPUTS_DIR/fmi_uarch "../benchmarks/fmi/fmi $INPUTS_DIR/fmi/broad $INPUTS_DIR/fmi/large/SRR7733443_10m_1.fastq 512 19 1" 31 | 32 | echo "Running bsw" 33 | vtune_uarch $OUTPUTS_DIR/bsw_uarch "../benchmarks/bsw/bsw -pairs $INPUTS_DIR/bsw/large/banded_SRR7733443_1m_input.txt -t 1 -b 512" 34 | 35 | echo "Running phmm" 36 | export LD_LIBRARY_PATH=../tools/GKL/build/native:$LD_LIBRARY_PATH 37 | vtune_uarch $OUTPUTS_DIR/phmm_uarch "../benchmarks/phmm/phmm -f $INPUTS_DIR/phmm/large/large.in -t 1" 38 | 39 | echo "Running dbg" 40 | vtune_uarch $OUTPUTS_DIR/dbg_uarch "../benchmarks/dbg/dbg $INPUTS_DIR/dbg/large/ERR194147-mem2-chr22.bam chr22:0-50818468 $INPUTS_DIR/dbg/large/Homo_sapiens_assembly38.fasta 1" 41 | 42 | echo "Running chain" 43 | vtune_uarch $OUTPUTS_DIR/chain_uarch "../benchmarks/chain/chain -i $INPUTS_DIR/chain/large/c_elegans_40x.10k.in -o $INPUTS_DIR/chain/large/c_elegans_40x.10k.out" 44 | 45 | echo "Running poa" 46 | vtune_uarch $OUTPUTS_DIR/poa_uarch "../benchmarks/poa/poa -s $INPUTS_DIR/poa/large/input.fasta -t 1" 47 | 48 | echo "Running kmer-cnt" 49 | vtune_uarch $OUTPUTS_DIR/kmer-cnt_uarch "../benchmarks/kmer-cnt/kmer-cnt --reads $INPUTS_DIR/kmer-cnt/large/Loman_E.coli_MAP006-1_2D_50x.fasta --config ../tools/Flye/flye/config/bin_cfg/asm_raw_reads.cfg --threads 1 --debug" 50 | 51 | echo "Running pileup" 52 | vtune_uarch $OUTPUTS_DIR/pileup_uarch "../benchmarks/pileup/pileup $INPUTS_DIR/pileup/large/HG002_prom_R941_guppy360_2_GRCh38_ch20.bam chr20:1-64444167 1" 53 | 54 | echo "Running grm" 55 | export LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/2021.1.1/lib/intel64:/opt/intel/oneapi/compiler/2021.1.2/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH 56 | vtune_uarch $OUTPUTS_DIR/grm_uarch "../benchmarks/grm/2.0/build_dynamic/plink2 --maf 0.01 --pgen $INPUTS_DIR/grm/large/chr1_phase3.pgen --pvar $INPUTS_DIR/grm/large/chr1_phase3.pvar --psam $INPUTS_DIR/grm/large/phase3_corrected.psam --make-grm-bin --out $INPUTS_DIR/grm/large/grm --threads 1" 57 | 58 | --------------------------------------------------------------------------------