├── .gitattributes
├── .gitignore
├── .gitmodules
├── Makefile
├── README.md
├── benchmarks
├── abea
│ ├── LICENSE
│ ├── Makefile
│ ├── README
│ ├── README.md
│ └── src
│ │ ├── align.c
│ │ ├── align.cu
│ │ ├── config.h
│ │ ├── error.h
│ │ ├── eventalign.c
│ │ ├── events.c
│ │ ├── f5c.c
│ │ ├── f5c.cu
│ │ ├── f5c.h
│ │ ├── f5cmisc.cuh
│ │ ├── f5cmisc.h
│ │ ├── fast5lite.h
│ │ ├── freq.c
│ │ ├── freq_merge.c
│ │ ├── hmm.c
│ │ ├── khash.h
│ │ ├── ksort.h
│ │ ├── logsum.h
│ │ ├── main.c
│ │ ├── matrix.h
│ │ ├── meth.c
│ │ ├── meth_main.c
│ │ ├── model.c
│ │ ├── model.h
│ │ ├── nanopolish_fast5_io.c
│ │ ├── nanopolish_index.c
│ │ ├── nanopolish_read_db.c
│ │ ├── nanopolish_read_db.h
│ │ └── profiles.h
├── bsw
│ ├── Makefile
│ ├── README.md
│ ├── bandedSWA.cpp
│ ├── bandedSWA.h
│ ├── macro.h
│ ├── main_banded.cpp
│ └── utils.h
├── chain
│ ├── Makefile
│ ├── README.md
│ └── src
│ │ ├── common.cpp
│ │ ├── common.h
│ │ ├── host_data.h
│ │ ├── host_data_io.cpp
│ │ ├── host_data_io.h
│ │ ├── host_kernel.cpp
│ │ ├── host_kernel.h
│ │ └── main.cpp
├── dbg
│ ├── Makefile
│ ├── README.md
│ ├── common.cpp
│ ├── common.h
│ └── debruijn.cpp
├── fmi
│ ├── Makefile
│ ├── README.md
│ └── fmi.cpp
├── kmer-cnt
│ ├── Makefile
│ ├── README.md
│ ├── config.h
│ ├── kmer.h
│ ├── kmer_cnt.cpp
│ ├── libcuckoo
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── cuckoohash_config.hh
│ │ ├── cuckoohash_map.hh
│ │ ├── cuckoohash_util.hh
│ │ └── libcuckoo_bucket_container.hh
│ ├── logger.h
│ ├── memory_info.h
│ ├── parallel.h
│ ├── progress_bar.h
│ ├── sequence.cpp
│ ├── sequence.h
│ ├── sequence_container.cpp
│ ├── sequence_container.h
│ ├── utils.h
│ ├── vertex_index.cpp
│ └── vertex_index.h
├── nn-base
│ ├── README.md
│ ├── bonito
│ │ └── basecall.py
│ ├── models
│ │ └── bonito_dna_r941
│ │ │ ├── config.toml
│ │ │ └── weights_0.tar
│ └── run_bonito.sh
├── nn-variant
│ ├── README.md
│ ├── clair
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-37.pyc
│ │ │ ├── model.cpython-37.pyc
│ │ │ └── selu.cpython-37.pyc
│ │ ├── callVarBam.py
│ │ ├── callVarBamParallel.py
│ │ ├── call_var.py
│ │ ├── evaluate.py
│ │ ├── learning_rate_finder.py
│ │ ├── model.py
│ │ ├── plot_tensor.py
│ │ ├── post_processing
│ │ │ ├── __init__.py
│ │ │ ├── ensemble.py
│ │ │ └── overlap_variant.py
│ │ ├── selu.py
│ │ ├── task
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-37.pyc
│ │ │ │ ├── genotype.cpython-37.pyc
│ │ │ │ ├── gt21.cpython-37.pyc
│ │ │ │ ├── main.cpython-37.pyc
│ │ │ │ └── variant_length.cpython-37.pyc
│ │ │ ├── genotype.py
│ │ │ ├── gt21.py
│ │ │ ├── main.py
│ │ │ └── variant_length.py
│ │ ├── train.py
│ │ ├── train_clr.py
│ │ └── utils.py
│ ├── prediction.py
│ └── shared
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ └── param.cpython-37.pyc
│ │ ├── command_options.py
│ │ ├── interval_tree.py
│ │ ├── param.py
│ │ └── utils.py
├── phmm
│ ├── Makefile
│ ├── PairHMMUnitTest.cpp
│ ├── PairHMMUnitTest.h
│ ├── README.md
│ ├── pairhmm_common.h
│ └── shacc_pairhmm.h
├── pileup
│ ├── Makefile
│ ├── README.md
│ ├── khash.h
│ ├── kvec.h
│ ├── medaka_bamiter.c
│ ├── medaka_bamiter.h
│ ├── medaka_common.c
│ ├── medaka_common.h
│ ├── medaka_counts.c
│ └── medaka_counts.h
└── poa
│ ├── Makefile
│ ├── README
│ ├── README.md
│ └── msa_spoa_omp.cpp
├── debian.prerequisites
├── img
├── GenomicsBenchLogo-Colored.png
└── GenomicsBenchLogo.png
├── requirements.txt
├── rhel.prerequisites
└── scripts
├── run-cpu.sh
├── run-gpu.sh
├── vtune.pc.sh
└── vtune.uarch.sh
/.gitattributes:
--------------------------------------------------------------------------------
1 | long-reads/basecalling/data/10/reads.fast5 filter=lfs diff=lfs merge=lfs -text
2 | long-reads/basecalling/data/100/reads.fast5 filter=lfs diff=lfs merge=lfs -text
3 | long-reads/basecalling/data/1000/reads.fast5 filter=lfs diff=lfs merge=lfs -text
4 | long-reads/basecalling/data/500/reads.fast5 filter=lfs diff=lfs merge=lfs -text
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv3
2 | long-reads/basecalling/output
3 | *.nfs*
4 | *.out
5 | *.txt
6 | *.o
7 | *.log
8 | *.out
9 | *.csv
10 | *.conf
11 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "tools/bwa"]
2 | path = tools/bwa
3 | url = https://github.com/arun-sub/bwa.git
4 | branch = master
5 | [submodule "tools/GKL"]
6 | path = tools/GKL
7 | url = https://github.com/arun-sub/GKL.git
8 | branch = pv_c_interface
9 | [submodule "tools/bwa-mem2"]
10 | path = tools/bwa-mem2
11 | url = https://github.com/bwa-mem2/bwa-mem2.git
12 | [submodule "tools/minimap2"]
13 | path = tools/minimap2
14 | url = https://github.com/arun-sub/minimap2.git
15 | [submodule "tools/minimap2-acceleration"]
16 | path = tools/minimap2-acceleration
17 | url = https://github.com/arun-sub/minimap2-acceleration.git
18 | [submodule "tools/spoa"]
19 | path = tools/spoa
20 | url = https://github.com/arun-sub/spoa.git
21 | [submodule "tools/abPOA"]
22 | path = tools/abPOA
23 | url = https://github.com/arun-sub/abPOA.git
24 | [submodule "tools/racon"]
25 | path = tools/racon
26 | url = https://github.com/arun-sub/racon.git
27 | [submodule "tools/Clair"]
28 | path = tools/Clair
29 | url = https://github.com/Yufeng98/Clair.git
30 | [submodule "tools/medaka"]
31 | path = tools/medaka
32 | url = https://github.com/arun-sub/medaka.git
33 | [submodule "tools/Platypus"]
34 | path = tools/Platypus
35 | url = https://github.com/arun-sub/Platypus.git
36 | [submodule "tools/htslib"]
37 | path = tools/htslib
38 | url = https://github.com/arun-sub/htslib.git
39 | [submodule "tools/bonito"]
40 | path = tools/bonito
41 | url = https://github.com/TimD1/bonito.git
42 | [submodule "tools/Flye"]
43 | path = tools/Flye
44 | url = https://github.com/arun-sub/Flye.git
45 | [submodule "benchmarks/grm"]
46 | path = benchmarks/grm
47 | url = https://github.com/arun-sub/plink-ng.git
48 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CXX=g++
2 | CC=gcc
3 | ARCH=avx2
4 | #VTUNE_HOME=/opt/intel/oneapi/vtune/2021.1.1
5 | MKLROOT=/opt/intel/oneapi/mkl/2021.1.1
6 | MKL_IOMP5_DIR=/opt/intel/oneapi/compiler/2021.1.2/linux/compiler/lib/intel64_lin
7 | CUDA_LIB=/usr/local/cuda
8 |
9 | .PHONY: clean
10 |
11 | all:
12 | $(info Starting build..this may take a while..)
13 | cd tools/htslib && autoreconf -i && ./configure && $(MAKE)
14 | cd tools/bwa-mem2; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
15 | cd benchmarks/fmi; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
16 | cd benchmarks/bsw; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
17 | cd benchmarks/dbg; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
18 | cd tools/GKL; ./gradlew test
19 | cd benchmarks/phmm; $(MAKE) CC=$(CC) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
20 | cd tools/minimap2; $(MAKE)
21 | cd benchmarks/chain; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
22 | cd tools/spoa; mkdir build; cd build; cmake -DCMAKE_BUILD_TYPE=Release ..; $(MAKE)
23 | cd benchmarks/poa; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
24 | cd benchmarks/pileup; $(MAKE) CC=$(CC) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
25 | cd benchmarks/kmer-cnt; $(MAKE) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME)
26 | cd benchmarks/grm/2.0/build_dynamic; $(MAKE) CC=$(CC) CXX=$(CXX) arch=$(ARCH) VTUNE_HOME=$(VTUNE_HOME) MKLROOT=$(MKLROOT) MKL_IOMP5_DIR=$(MKL_IOMP5_DIR) #needs MKL
27 |
28 | gpu:
29 | cd benchmarks/abea; $(MAKE) CUDA_LIB=$(CUDA_LIB)
30 |
31 | clean:
32 | cd tools/bwa-mem2; $(MAKE) clean
33 | cd benchmarks/fmi; $(MAKE) clean
34 | cd benchmarks/bsw; $(MAKE) clean
35 | cd benchmarks/dbg; $(MAKE) clean
36 | cd tools/GKL; ./gradlew clean
37 | cd benchmarks/phmm; $(MAKE) clean
38 | cd tools/minimap2; $(MAKE)
39 | cd benchmarks/chain; $(MAKE) clean
40 | cd benchmarks/poa; $(MAKE) clean
41 | cd benchmarks/pileup; $(MAKE) clean
42 | cd benchmarks/kmer-cnt; $(MAKE) clean
43 | cd benchmarks/grm/2.0/build_dynamic; $(MAKE) clean
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |

2 |
3 | # About
4 |
5 | A benchmark suite covering the major steps in short and long-read genome sequence analysis pipelines such as basecalling, sequence mapping, de-novo assembly, variant calling and polishing.
6 |
7 | ## Download
8 |
9 | * Latest source code
10 |
11 | ```bash
12 | git clone --recursive https://github.com/arun-sub/genomicsbench.git
13 | ```
14 |
15 | * Input datasets
16 |
17 | ```bash
18 | wget https://genomicsbench.eecs.umich.edu/input-datasets.tar.gz
19 | ```
20 |
21 | ## Prerequisites
22 |
23 | * RHEL/Fedora system prerequisites
24 |
25 | ```bash
26 | sudo yum -y install $(cat rhel.prerequisites)
27 | ```
28 | * Debian system prerequisites
29 |
30 | ```bash
31 | sudo apt-get install $(cat debian.prerequisites)
32 | ```
33 |
34 | ## Python setup (optional: only needed for GPU benchmarks)
35 |
36 | To run Python-based benchmarks nn-base, nn-variant and abea, follow the steps below:
37 |
38 | * Download and install miniconda from [this](https://docs.conda.io/projects/continuumio-conda/en/latest/user-guide/install/download.html) link.
39 |
40 | * Follow the steps below to set up a conda environment:
41 |
42 | ```bash
43 | # make sure channels are added in conda
44 | conda config --add channels defaults
45 | conda config --add channels bioconda
46 | conda config --add channels conda-forge
47 |
48 | # create conda environment named "genomicsbench"
49 | conda create -n genomicsbench -c bioconda clair python==3.6.8
50 | conda activate genomicsbench
51 | conda install deepdish
52 |
53 | pip install --upgrade pip
54 | pip install -r requirements.txt
55 | pypy3 -m ensurepip
56 | pypy3 -m pip install --no-cache-dir intervaltree==3.0.2
57 | ```
58 |
59 | ## Compile
60 |
61 | Note that the benchmarks have only been tested on gcc/g++-9 because of the dependency of related kernels. If there are multiple gcc/g++ versions, please refer to [update-alternative](https://linuxconfig.org/how-to-switch-between-multiple-gcc-and-g-compiler-versions-on-ubuntu-20-04-lts-focal-fossa) to configure gcc/g++-9.
62 |
63 | * CPU benchmarks
64 | * MKLROOT and MKL_IOMPS_DIR variables need to be set in Makefile to run `grm`. If you don't want to run `grm`, please comment `grm` related commands in Makefile
65 | * VTUNE_HOME variable needs to be set if you want to run any VTune based analyses
66 |
67 | ```bash
68 | make -j
69 | ```
70 |
71 | * GPU benchmarks
72 | * Set CUDA_LIB=/usr/local/cuda or to the path of the local CUDA installation in Makefile.
73 | * Also ensure environment variables PATH and LD_LIBRARY_PATH include the path to CUDA binaries and libraries.
74 |
75 | ```bash
76 | make -j gpu
77 | ```
78 |
79 | ## Running
80 |
81 | * CPU benchmarks
82 |
83 | ```bash
84 | cd scripts
85 | chmod +x ./run_cpu.sh
86 | ./run_cpu.sh
87 | ```
88 |
89 | * GPU benchmarks
90 |
91 | ```bash
92 | cd scripts
93 | chmod +x ./run_gpu.sh
94 | ./run_gpu.sh
95 | ```
96 |
97 | ## Citation
98 |
99 | If you use GenomicsBench or find GenomicsBench useful, please cite this work:
100 |
101 | > **Arun Subramaniyan, Yufeng Gu, Timothy Dunn, Somnath Paul, Md. Vasimuddin, Sanchit Misra, David Blaauw, Satish Narayanasamy, Reetuparna Das. *GenomicsBench: A Benchmark Suite for Genomics*, In IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS), 2021 (to appear)**
102 |
103 | ```
104 | @inproceedings{genomicsbench,
105 | title={GenomicsBench: A Benchmark Suite for Genomics}},
106 | author={Subramaniyan, Arun and Gu, Yufeng and Dunn, Timothy and Paul, Somnath and Vasimuddin, Md. and Misra, Sanchit and Blaauw, David and Narayanasamy, Satish and Das, Reetuparna},
107 | booktitle={Proceedings of the IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)},
108 | year={2021}
109 | }
110 | ```
111 |
112 | ## Issues and bug reporting
113 |
114 | GenomicsBench is under active development and we appreciate any feedback and suggestions from the community. Feel free to raise an issue or submit a pull request on Github. For assistance in using GenomicsBench, please contact: Arun Subramaniyan (arunsub@umich.edu), Yufeng Gu (yufenggu@umich.edu), Timothy Dunn (timdunn@umich.edu)
115 |
116 | ## Licensing
117 |
118 | Each benchmark is individually licensed according to the tool it is extracted from.
119 |
120 | ## Acknowledgement
121 |
122 | This work was supported in part by Precision Health at the University of Michigan, by the Kahn foundation, by the NSF under the CAREER-1652294 award and the Applications Driving Architectures (ADA) Research Center, a JUMP Center co-sponsored by SRC and DARPA.
123 |
124 |
--------------------------------------------------------------------------------
/benchmarks/abea/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Hasindu Gamaarachchi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/benchmarks/abea/Makefile:
--------------------------------------------------------------------------------
1 | $(info $(shell mkdir -p build))
2 |
3 | HDF5 ?= install
4 | HTS ?= install
5 |
6 | HTS_VERSION = 1.9
7 | HDF5_VERSION = 1.10.4
8 | HDF5_MAJOR_MINOR = `echo $(HDF5_VERSION) | sed -E 's/\.[0-9]+$$//'`
9 |
10 | ifdef ENABLE_PROFILE
11 | CFLAGS += -p
12 | endif
13 |
14 | ifeq ($(HDF5), install)
15 | HDF5_LIB = $(BUILD_DIR)/lib/libhdf5.a
16 | HDF5_INC = -I$(BUILD_DIR)/include
17 | LDFLAGS += $(HDF5_LIB) -ldl
18 | else
19 | ifneq ($(HDF5), autoconf)
20 | HDF5_LIB =
21 | HDF5_SYS_LIB = `pkg-config --libs hdf5`
22 | HDF5_INC = `pkg-config --cflags-only-I hdf5`
23 | endif
24 | endif
25 |
26 | ifeq ($(HTS), install)
27 | HTS_LIB = $(BUILD_DIR)/lib/libhts.a
28 | HTS_INC = -I$(BUILD_DIR)/include
29 | LDFLAGS += $(HTS_LIB)
30 | else
31 | ifneq ($(HTS), autoconf)
32 | HTS_LIB =
33 | HTS_SYS_LIB = `pkg-config --libs htslib`
34 | HTS_INC = `pkg-config --cflags-only-I htslib`
35 | endif
36 | endif
37 |
38 | CPPFLAGS += $(HDF5_INC) $(HTS_INC)
39 | LDFLAGS += $(HTS_SYS_LIB) $(HDF5_SYS_LIB)
40 |
41 | CC = gcc
42 | CXX = g++
43 | LANG = -x c++
44 | CFLAGS += -g -Wall -O2 -std=c++11
45 | LDFLAGS += $(LIBS) -lpthread -lz -rdynamic
46 | BUILD_DIR = build
47 |
48 | BINARY = f5c
49 | OBJ = $(BUILD_DIR)/main.o \
50 | $(BUILD_DIR)/meth_main.o \
51 | $(BUILD_DIR)/f5c.o \
52 | $(BUILD_DIR)/events.o \
53 | $(BUILD_DIR)/nanopolish_read_db.o \
54 | $(BUILD_DIR)/nanopolish_index.o \
55 | $(BUILD_DIR)/nanopolish_fast5_io.o \
56 | $(BUILD_DIR)/model.o \
57 | $(BUILD_DIR)/align.o \
58 | $(BUILD_DIR)/meth.o \
59 | $(BUILD_DIR)/hmm.o \
60 | $(BUILD_DIR)/freq.o \
61 | $(BUILD_DIR)/eventalign.o \
62 | $(BUILD_DIR)/freq_merge.o
63 |
64 | PREFIX = /usr/local
65 | VERSION = `git describe --tags`
66 |
67 | CUDA_ROOT = /usr/local/cuda
68 | CUDA_LIB ?= $(CUDA_ROOT)/lib64
69 | CUDA_OBJ = $(BUILD_DIR)/f5c_cuda.o $(BUILD_DIR)/align_cuda.o
70 | NVCC ?= nvcc
71 | CUDA_CFLAGS += -g -O2 -std=c++11 -lineinfo $(CUDA_ARCH) -Xcompiler -Wall
72 | CUDA_LDFLAGS = -L$(CUDA_LIB) -lcudart_static -lrt -ldl
73 | OBJ += $(BUILD_DIR)/gpucode.o $(CUDA_OBJ)
74 | CPPFLAGS += -DHAVE_CUDA=1
75 |
76 | .PHONY: clean distclean
77 |
78 | $(BINARY): src/config.h $(HTS_LIB) $(HDF5_LIB) $(OBJ)
79 | $(CXX) $(CFLAGS) $(OBJ) $(LDFLAGS) $(CUDA_LDFLAGS) -o $@
80 |
81 | $(BUILD_DIR)/main.o: src/main.c src/f5cmisc.h src/error.h
82 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
83 |
84 | $(BUILD_DIR)/meth_main.o: src/meth_main.c src/f5c.h src/fast5lite.h src/f5cmisc.h src/logsum.h
85 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
86 |
87 | $(BUILD_DIR)/f5c.o: src/f5c.c src/f5c.h src/fast5lite.h src/f5cmisc.h
88 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
89 |
90 | $(BUILD_DIR)/events.o: src/events.c src/f5c.h src/fast5lite.h src/f5cmisc.h src/fast5lite.h src/nanopolish_read_db.h src/ksort.h
91 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
92 |
93 | $(BUILD_DIR)/nanopolish_read_db.o: src/nanopolish_read_db.c src/nanopolish_read_db.h
94 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
95 |
96 | $(BUILD_DIR)/nanopolish_index.o: src/nanopolish_index.c src/nanopolish_read_db.h src/fast5lite.h
97 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
98 |
99 | $(BUILD_DIR)/nanopolish_fast5_io.o: src/nanopolish_fast5_io.c src/fast5lite.h
100 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
101 |
102 | $(BUILD_DIR)/model.o: src/model.c src/model.h src/f5c.h src/fast5lite.h src/f5cmisc.h
103 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
104 |
105 | $(BUILD_DIR)/align.o: src/align.c src/f5c.h src/fast5lite.h
106 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
107 |
108 | $(BUILD_DIR)/meth.o: src/meth.c src/f5c.h src/fast5lite.h src/f5cmisc.h
109 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
110 |
111 | $(BUILD_DIR)/hmm.o: src/hmm.c src/f5c.h src/fast5lite.h src/f5cmisc.h src/matrix.h src/logsum.h
112 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
113 |
114 | $(BUILD_DIR)/freq.o: src/freq.c src/khash.h
115 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
116 |
117 | $(BUILD_DIR)/eventalign.o: src/eventalign.c
118 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
119 |
120 | $(BUILD_DIR)/freq_merge.o: src/freq_merge.c
121 | $(CXX) $(CFLAGS) $(CPPFLAGS) $(LANG) $< -c -o $@
122 |
123 | # cuda stuff
124 | $(BUILD_DIR)/gpucode.o: $(CUDA_OBJ)
125 | $(NVCC) $(CUDA_CFLAGS) -dlink $^ -o $@
126 |
127 | $(BUILD_DIR)/f5c_cuda.o: src/f5c.cu src/error.h src/f5c.h src/fast5lite.h src/f5cmisc.cuh src/f5cmisc.h
128 | $(NVCC) -x cu $(CUDA_CFLAGS) $(CPPFLAGS) -rdc=true -c $< -o $@
129 |
130 | $(BUILD_DIR)/align_cuda.o: src/align.cu src/f5c.h src/fast5lite.h src/f5cmisc.cuh
131 | $(NVCC) -x cu $(CUDA_CFLAGS) $(CPPFLAGS) -rdc=true -c $< -o $@
132 |
133 | src/config.h:
134 | echo "/* Default config.h generated by Makefile */" >> $@
135 | echo "#define HAVE_HDF5_H 1" >> $@
136 |
137 | $(BUILD_DIR)/lib/libhts.a:
138 | @if command -v curl; then \
139 | curl -o $(BUILD_DIR)/htslib.tar.bz2 -L https://github.com/samtools/htslib/releases/download/$(HTS_VERSION)/htslib-$(HTS_VERSION).tar.bz2; \
140 | else \
141 | wget -O $(BUILD_DIR)/htslib.tar.bz2 https://github.com/samtools/htslib/releases/download/$(HTS_VERSION)/htslib-$(HTS_VERSION).tar.bz2; \
142 | fi
143 | tar -xf $(BUILD_DIR)/htslib.tar.bz2 -C $(BUILD_DIR)
144 | mv $(BUILD_DIR)/htslib-$(HTS_VERSION) $(BUILD_DIR)/htslib
145 | rm -f $(BUILD_DIR)/htslib.tar.bz2
146 | cd $(BUILD_DIR)/htslib && \
147 | ./configure --prefix=`pwd`/../ --enable-bz2=no --enable-lzma=no --with-libdeflate=no --enable-libcurl=no --enable-gcs=no --enable-s3=no && \
148 | make -j8 && \
149 | make install
150 |
151 | $(BUILD_DIR)/lib/libhdf5.a:
152 | if command -v curl; then \
153 | curl -o $(BUILD_DIR)/hdf5.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$(HDF5_MAJOR_MINOR)/hdf5-$(HDF5_VERSION)/src/hdf5-$(HDF5_VERSION).tar.bz2; \
154 | else \
155 | wget -O $(BUILD_DIR)/hdf5.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$(HDF5_MAJOR_MINOR)/hdf5-$(HDF5_VERSION)/src/hdf5-$(HDF5_VERSION).tar.bz2; \
156 | fi
157 | tar -xf $(BUILD_DIR)/hdf5.tar.bz2 -C $(BUILD_DIR)
158 | mv $(BUILD_DIR)/hdf5-$(HDF5_VERSION) $(BUILD_DIR)/hdf5
159 | rm -f $(BUILD_DIR)/hdf5.tar.bz2
160 | cd $(BUILD_DIR)/hdf5 && \
161 | ./configure --prefix=`pwd`/../ && \
162 | make -j8 && \
163 | make install
164 |
165 | clean:
166 | rm -rf $(BINARY) $(BUILD_DIR)
167 |
--------------------------------------------------------------------------------
/benchmarks/abea/README:
--------------------------------------------------------------------------------
1 | Build: 'make CUDA_LIB=' or just 'make' if /usr/local/cuda points to the install location
2 | Command line: './f5c eventalign -b -g [ -r '
3 |
--------------------------------------------------------------------------------
/benchmarks/abea/README.md:
--------------------------------------------------------------------------------
1 | `abea` uses the same license as [f5c](https://github.com/hasindu2008/f5c).
2 |
3 | If you find `abea` useful, please cite:
4 |
5 | ```
6 | @article{gamaarachchi2020gpu,
7 | title={GPU accelerated adaptive banded event alignment for rapid comparative nanopore signal analysis},
8 | author={Gamaarachchi, Hasindu and Lam, Chun Wai and Jayatilaka, Gihan and Samarakoon, Hiruna and Simpson, Jared T and Smith, Martin A and Parameswaran, Sri},
9 | journal={BMC bioinformatics},
10 | volume={21},
11 | number={1},
12 | pages={1--13},
13 | year={2020},
14 | publisher={BioMed Central}
15 | }
16 | ```
17 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/config.h:
--------------------------------------------------------------------------------
1 | /* Default config.h generated by Makefile */
2 | #define HAVE_HDF5_H 1
3 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/error.h:
--------------------------------------------------------------------------------
1 | #ifndef ERROR_H
2 | #define ERROR_H
3 |
4 | #include
5 |
6 | #define WARN "[%s::WARNING]\033[1;33m "
7 | #define ERR "[%s::ERROR]\033[1;31m "
8 | #define CEND "\033[0m\n"
9 |
10 | #define STDERR(arg, ...) \
11 | fprintf(stderr, "[%s] " arg "\n", __func__, \
12 | __VA_ARGS__)
13 | #define WARNING(arg, ...) \
14 | fprintf(stderr, "[%s::WARNING]\033[1;33m " arg "\033[0m\n", __func__, \
15 | __VA_ARGS__)
16 | #define ERROR(arg, ...) \
17 | fprintf(stderr, "[%s::ERROR]\033[1;31m " arg "\033[0m\n", __func__, \
18 | __VA_ARGS__)
19 | #define INFO(arg, ...) \
20 | fprintf(stderr, "[%s::INFO]\033[1;34m " arg "\033[0m\n", __func__, \
21 | __VA_ARGS__)
22 | #define SUCCESS(arg, ...) \
23 | fprintf(stderr, "[%s::SUCCESS]\033[1;32m " arg "\033[0m\n", __func__, \
24 | __VA_ARGS__)
25 | #define DEBUG(arg, ...) \
26 | fprintf(stderr, \
27 | "[%s::DEBUG]\033[1;35m Error occured at %s:%d. " arg "\033[0m\n", \
28 | __func__, __FILE__, __LINE__ - 2, __VA_ARGS__)
29 |
30 | #define MALLOC_CHK(ret) malloc_chk((void*)ret, __func__, __FILE__, __LINE__ - 1)
31 | #define F_CHK(ret, filename) \
32 | f_chk((void*)ret, __func__, __FILE__, __LINE__ - 1, filename);
33 | #define NULL_CHK(ret) null_chk((void*)ret, __func__, __FILE__, __LINE__ - 1)
34 | #define NEG_CHK(ret) neg_chk(ret, __func__, __FILE__, __LINE__ - 1)
35 |
36 | static inline void malloc_chk(void* ret, const char* func, const char* file,
37 | int line) {
38 | if (ret != NULL)
39 | return;
40 | fprintf(
41 | stderr,
42 | "[%s::ERROR]\033[1;31m Failed to allocate memory : "
43 | "%s.\033[0m\n[%s::DEBUG]\033[1;35m Error occured at %s:%d. Try with a small batchsize (-K and/or -B options) to reduce the peak memory\033[0m\n\n",
44 | func, strerror(errno), func, file, line);
45 | exit(EXIT_FAILURE);
46 | }
47 |
48 | static inline void f_chk(void* ret, const char* func, const char* file,
49 | int line, const char* fopen_f) {
50 | if (ret != NULL)
51 | return;
52 | fprintf(
53 | stderr,
54 | "[%s::ERROR]\033[1;31m Failed to open %s : "
55 | "%s.\033[0m\n[%s::DEBUG]\033[1;35m Error occured at %s:%d.\033[0m\n\n",
56 | func, fopen_f, strerror(errno), func, file, line);
57 | exit(EXIT_FAILURE);
58 | }
59 |
60 | // Die on error. Print the error and exit if the return value of the previous function NULL
61 | static inline void null_chk(void* ret, const char* func, const char* file,
62 | int line) {
63 | if (ret != NULL)
64 | return;
65 | fprintf(stderr,
66 | "[%s::ERROR]\033[1;31m %s.\033[0m\n[%s::DEBUG]\033[1;35m Error "
67 | "occured at %s:%d.\033[0m\n\n",
68 | func, strerror(errno), func, file, line);
69 | exit(EXIT_FAILURE);
70 | }
71 |
72 | // Die on error. Print the error and exit if the return value of the previous function is -1
73 | static inline void neg_chk(int ret, const char* func, const char* file,
74 | int line) {
75 | if (ret >= 0)
76 | return;
77 | fprintf(stderr,
78 | "[%s::ERROR]\033[1;31m %s.\033[0m\n[%s::DEBUG]\033[1;35m Error "
79 | "occured at %s:%d.\033[0m\n\n",
80 | func, strerror(errno), func, file, line);
81 | exit(EXIT_FAILURE);
82 | }
83 |
84 | #endif
85 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/f5cmisc.h:
--------------------------------------------------------------------------------
1 | #ifndef F5CMISC_H
2 | #define F5CMISC_H
3 |
4 | #include "error.h"
5 | #include "f5c.h"
6 | #include
7 | #include
8 |
9 | #define MIN_CALIBRATION_VAR 2.5
10 | #define MAX_EVENT_TO_BP_RATIO 20
11 |
12 | // Flags to modify the behaviour of the HMM
13 | enum HMMAlignmentFlags
14 | {
15 | HAF_ALLOW_PRE_CLIP = 1, // allow events to go unmatched before the aligning region
16 | HAF_ALLOW_POST_CLIP = 2 // allow events to go unmatched after the aligning region
17 | };
18 |
19 | event_table getevents(size_t nsample, float* rawptr);
20 | void read_model(model_t* model, const char* file);
21 | void set_model(model_t* model);
22 | void set_cpgmodel(model_t* model);
23 | scalings_t estimate_scalings_using_mom(char* sequence, int32_t sequence_len,
24 | model_t* pore_model, event_table et);
25 | int32_t align(AlignedPair* out_2, char* sequence, int32_t sequence_len,
26 | event_table events, model_t* models, scalings_t scaling,
27 | float sample_rate);
28 | int32_t postalign(event_alignment_t* alignment, index_pair_t* base_to_event_map, double* events_per_base,
29 | char* sequence, int32_t n_kmers, AlignedPair* event_alignment,
30 | int32_t n_events);
31 | bool recalibrate_model(model_t* pore_model, event_table et,
32 | scalings_t* scallings,
33 | const event_alignment_t* alignment_output,
34 | int32_t num_alignments, bool scale_var);
35 |
36 | float profile_hmm_score(const char *m_seq,const char *m_rc_seq, event_t* event, scalings_t scaling, model_t* cpgmodel, uint32_t event_start_idx,
37 | uint32_t event_stop_idx,
38 | uint8_t strand,
39 | int8_t event_stride,
40 | uint8_t rc,double events_per_base,uint32_t hmm_flags
41 | );
42 | void calculate_methylation_for_read(std::map* site_score_map, char* ref, bam1_t* record, int32_t read_length, event_t* event, index_pair_t* base_to_event_map,
43 | scalings_t scaling, model_t* cpgmodel,double events_per_base);
44 |
45 | void emit_event_alignment_tsv(FILE* fp,
46 | uint32_t strand_idx,
47 | const event_table* et, model_t* model, scalings_t scalings,
48 | const std::vector& alignments,
49 | int8_t print_read_names, int8_t scale_events, int8_t write_samples,
50 | int64_t read_index, char* read_name, char *ref_name, float sample_rate);
51 |
52 | void emit_event_alignment_tsv_header(FILE* fp, int8_t print_read_names, int8_t write_samples);
53 |
54 | void emit_sam_header(samFile* fp, const bam_hdr_t* hdr);
55 |
56 | void emit_event_alignment_sam(htsFile* fp,
57 | char* read_name,
58 | bam_hdr_t* base_hdr,
59 | bam1_t* base_record,
60 | const std::vector& alignments
61 | );
62 |
63 | void realign_read(std::vector* event_alignment_result, EventalignSummary *summary, FILE *summary_fp,char* ref,
64 | const bam_hdr_t* hdr,
65 | const bam1_t* record, int32_t read_length,
66 | size_t read_idx,
67 | int region_start,
68 | int region_end,
69 | event_table* events, model_t* model,index_pair_t* base_to_event_map,scalings_t scaling,double events_per_base, float sample_rate);
70 |
71 | //basically the functions in nanopolish_profile_hmm_r9.*
72 | float profile_hmm_score_r9(const char *m_seq,
73 | const char *m_rc_seq,
74 | event_t* event,
75 | scalings_t scaling,
76 | model_t* cpgmodel,
77 | uint32_t event_start_idx,
78 | uint32_t event_stop_idx,
79 | uint8_t strand,
80 | int8_t event_stride,
81 | uint8_t rc,
82 | double events_per_base,
83 | uint32_t hmm_flags);
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | #ifdef HAVE_CUDA
94 | void align_cuda(core_t* core, db_t* db);
95 | #endif
96 |
97 | // taken from minimap2/misc
98 | static inline double realtime(void) {
99 | struct timeval tp;
100 | struct timezone tzp;
101 | gettimeofday(&tp, &tzp);
102 | return tp.tv_sec + tp.tv_usec * 1e-6;
103 | }
104 |
105 | // taken from minimap2/misc
106 | static inline double cputime(void) {
107 | struct rusage r;
108 | getrusage(RUSAGE_SELF, &r);
109 | return r.ru_utime.tv_sec + r.ru_stime.tv_sec +
110 | 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec);
111 | }
112 |
113 | //taken from minimap2
114 | static inline long peakrss(void)
115 | {
116 | struct rusage r;
117 | getrusage(RUSAGE_SELF, &r);
118 | #ifdef __linux__
119 | return r.ru_maxrss * 1024;
120 | #else
121 | return r.ru_maxrss;
122 | #endif
123 | }
124 |
125 | // Prints to the provided buffer a nice number of bytes (KB, MB, GB, etc)
126 | //from https://www.mbeckler.org/blog/?p=114
127 | static inline void print_size(const char* name, uint64_t bytes)
128 | {
129 | const char* suffixes[7];
130 | suffixes[0] = "B";
131 | suffixes[1] = "KB";
132 | suffixes[2] = "MB";
133 | suffixes[3] = "GB";
134 | suffixes[4] = "TB";
135 | suffixes[5] = "PB";
136 | suffixes[6] = "EB";
137 | uint64_t s = 0; // which suffix to use
138 | double count = bytes;
139 | while (count >= 1024 && s < 7)
140 | {
141 | s++;
142 | count /= 1024;
143 | }
144 | if (count - floor(count) == 0.0)
145 | fprintf(stderr, "[%s] %s : %d %s\n", __func__ , name, (int)count, suffixes[s]);
146 | else
147 | fprintf(stderr, "[%s] %s : %.1f %s\n", __func__, name, count, suffixes[s]);
148 | }
149 |
150 |
151 | #endif
152 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/logsum.h:
--------------------------------------------------------------------------------
1 | //
2 | // logsum -- a port of Sean Eddy's fast table-driven log sum
3 | // This code was originally part of HMMER. This version is used with
4 | // Sean Eddy's permission as public domain code.
5 | //
6 |
7 | #ifndef LOGSUM_H
8 | #define LOGSUM_H
9 |
10 | /* p7_LOGSUM_SCALE defines the precision of the calculation; the
11 | * default of 1000.0 means rounding differences to the nearest 0.001
12 | * nat. p7_LOGSUM_TBL defines the size of the lookup table; the
13 | * default of 16000 means entries are calculated for differences of 0
14 | * to 16.000 nats (when p7_LOGSUM_SCALE is 1000.0). e^{-p7_LOGSUM_TBL /
15 | * p7_LOGSUM_SCALE} should be on the order of the machine FLT_EPSILON,
16 | * typically 1.2e-7.
17 | */
18 | #define p7_LOGSUM_TBL 16000
19 | #define p7_LOGSUM_SCALE 1000.f
20 | #define ESL_MAX(a,b) (((a)>(b))?(a):(b))
21 | #define ESL_MIN(a,b) (((a)<(b))?(a):(b))
22 | #define eslINFINITY INFINITY
23 |
24 | /* Synopsis: Initialize the p7_Logsum() function.
25 | *
26 | * Purpose: Initialize the lookup table for .
27 | * This function must be called once before any
28 | * call to .
29 | *
30 | * The precision of the lookup table is determined
31 | * by the compile-time constant.
32 | *
33 | * Returns: on success.
34 | */
35 | static inline int p7_FLogsumInit(void)
36 | {
37 |
38 | // static int firsttime = TRUE;
39 | // if (!firsttime) return 1;
40 | // firsttime = FALSE;
41 |
42 | extern float flogsum_lookup[p7_LOGSUM_TBL];
43 | int i;
44 | for (i = 0; i < p7_LOGSUM_TBL; i++) {
45 | flogsum_lookup[i] = log(1. + exp((double) -i / p7_LOGSUM_SCALE));
46 | }
47 | return 1;
48 | }
49 |
50 | /* Function: p7_FLogsum()
51 | * Synopsis: Approximate $\log(e^a + e^b)$.
52 | *
53 | * Purpose: Returns a fast table-driven approximation to
54 | * $\log(e^a + e^b)$.
55 | *
56 | * Either or (or both) may be $-\infty$,
57 | * but neither may be $+\infty$ or .
58 | *
59 | * Note: This function is a critical optimization target, because
60 | * it's in the inner loop of generic Forward() algorithms.*/
61 | static inline float p7_FLogsum(float a, float b){
62 |
63 | extern float flogsum_lookup[p7_LOGSUM_TBL]; /* p7_LOGSUM_TBL=16000: (A-B) = 0..16 nats, steps of 0.001 */
64 |
65 | const float max = ESL_MAX(a, b);
66 | const float min = ESL_MIN(a, b);
67 |
68 | //return (min == -eslINFINITY || (max-min) >= 15.7f) ? max : max + log(1.0 + exp(min-max)); /* SRE: While debugging SSE impl. Remember to remove! */
69 |
70 | return (min == -eslINFINITY || (max-min) >= 15.7f) ? max : max + flogsum_lookup[(int)((max-min)*p7_LOGSUM_SCALE)];
71 | }
72 |
73 | #endif
74 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/main.c:
--------------------------------------------------------------------------------
1 | /* @f5c
2 | **
3 | ** main
4 | ** @author: Hasindu Gamaarachchi (hasindu@unsw.edu.au)
5 | ** @@
6 | ******************************************************************************/
7 |
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include "f5cmisc.h"
14 | #include "error.h"
15 |
16 | #ifdef HAVE_EXECINFO_H
17 | #include
18 | #endif
19 |
20 | //make the segmentation faults a bit cool
21 | void sig_handler(int sig) {
22 | #ifdef HAVE_EXECINFO_H
23 | void* array[100];
24 | size_t size = backtrace(array, 100);
25 | ERROR("I regret to inform that a segmentation fault occurred. But at least "
26 | "it is better than a wrong answer%s",
27 | ".");
28 | fprintf(stderr,
29 | "[%s::DEBUG]\033[1;35m Here is the backtrace in case it is of any "
30 | "use:\n",
31 | __func__);
32 | backtrace_symbols_fd(&array[2], size - 1, STDERR_FILENO);
33 | fprintf(stderr, "\033[0m\n");
34 | #else
35 | ERROR("I regret to inform that a segmentation fault occurred. But at least "
36 | "it is better than a wrong answer%s",
37 | ".");
38 | #endif
39 | exit(EXIT_FAILURE);
40 | }
41 |
42 | int meth_main(int argc, char* argv[], int8_t mode);
43 | int index_main(int argc, char** argv);
44 | int freq_main(int argc, char **argv);
45 | int freq_merge_main(int argc, char **argv);
46 |
47 | int print_usage(FILE *fp_help){
48 |
49 | fprintf(fp_help,"Usage: f5c [options]\n\n");
50 | fprintf(fp_help,"command:\n");
51 | fprintf(fp_help," index Build an index mapping from basecalled reads to the signals measured by the sequencer (same as nanopolish index)\n");
52 | fprintf(fp_help," call-methylation Classify nucleotides as methylated or not (optimised nanopolish call-methylation)\n");
53 | fprintf(fp_help," meth-freq Calculate methylation frequency at genomic CpG sites (optimised nanopolish calculate_methylation_frequency.py)\n");
54 | fprintf(fp_help," eventalign Align nanopore events to reference k-mers (optimised nanopolish eventalign)\n");
55 | fprintf(fp_help," freq-merge Merge calculated methylation frequency tsv files\n\n");
56 | if(fp_help==stderr){
57 | exit(EXIT_FAILURE);
58 | }
59 | else if(fp_help==stdout){
60 | exit(EXIT_SUCCESS);
61 | }
62 | else{
63 | assert(0);
64 | }
65 |
66 |
67 | }
68 |
69 |
70 | int main(int argc, char* argv[]){
71 |
72 | double realtime0 = realtime();
73 | signal(SIGSEGV, sig_handler);
74 |
75 | int ret=1;
76 |
77 | if(argc<2){
78 | return print_usage(stderr);
79 | }
80 | if(strcmp(argv[1],"index")==0){
81 | ret=index_main(argc-1, argv+1);
82 | }
83 | else if(strcmp(argv[1],"call-methylation")==0){
84 | ret=meth_main(argc-1, argv+1,0);
85 | }
86 | else if(strcmp(argv[1],"eventalign")==0){
87 | ret=meth_main(argc-1, argv+1,1);
88 | }
89 | else if(strcmp(argv[1],"meth-freq")==0){
90 | ret=freq_main(argc-1, argv+1);
91 | }
92 | else if(strcmp(argv[1],"freq-merge")==0){
93 | ret=freq_merge_main(argc-1, argv+1);
94 | }
95 | else if(strcmp(argv[1],"--version")==0 || strcmp(argv[1],"-V")==0){
96 | fprintf(stdout,"F5C %s\n",F5C_VERSION);
97 | exit(EXIT_SUCCESS);
98 | }
99 | else if(strcmp(argv[1],"--help")==0 || strcmp(argv[1],"-h")==0){
100 | print_usage(stdout);
101 | }
102 | else{
103 | fprintf(stderr,"[f5c] Unrecognised command %s\n",argv[1]);
104 | print_usage(stderr);
105 | }
106 |
107 | fprintf(stderr,"[%s] Version: %s\n", __func__,F5C_VERSION);
108 | fprintf(stderr, "[%s] CMD:", __func__);
109 | for (int i = 0; i < argc; ++i) {
110 | fprintf(stderr, " %s", argv[i]);
111 | }
112 |
113 | fprintf(stderr, "\n[%s] Real time: %.3f sec; CPU time: %.3f sec; Peak RAM: %.3f GB\n\n",
114 | __func__, realtime() - realtime0, cputime(),peakrss() / 1024.0 / 1024.0 / 1024.0);
115 |
116 | return ret;
117 | }
118 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/matrix.h:
--------------------------------------------------------------------------------
1 | //---------------------------------------------------------
2 | // Copyright 2015 Ontario Institute for Cancer Research
3 | // Written by Jared Simpson (jared.simpson@oicr.on.ca)
4 | //---------------------------------------------------------
5 | //
6 | // nanopolish_matrix -- matrix manipulation functions
7 | //
8 | #ifndef MATRIX_H
9 | #define MATRIX_H
10 |
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 |
17 | //
18 | // Template Matrix for POD types
19 | //
20 | template
21 | struct Matrix
22 | {
23 | T* cells;
24 | uint32_t n_rows;
25 | uint32_t n_cols;
26 | };
27 |
28 | typedef Matrix DoubleMatrix;
29 | typedef Matrix FloatMatrix;
30 | typedef Matrix UInt32Matrix;
31 | typedef Matrix UInt8Matrix;
32 |
33 | //
34 | template
35 | void allocate_matrix(Matrix& matrix, uint32_t n_rows, uint32_t n_cols)
36 | {
37 | matrix.n_rows = n_rows;
38 | matrix.n_cols = n_cols;
39 |
40 | uint32_t N = matrix.n_rows * matrix.n_cols;
41 | matrix.cells = (T*)malloc(N * sizeof(T));
42 | memset(matrix.cells, 0, N * sizeof(T));
43 | }
44 |
45 | //
46 | template
47 | void free_matrix(Matrix& matrix)
48 | {
49 | assert(matrix.cells != NULL);
50 | free(matrix.cells);
51 | matrix.cells = NULL;
52 | }
53 |
54 | // Copy a matrix and its contents
55 | template
56 | void copy_matrix(Matrix& new_matrix, const Matrix& old_matrix)
57 | {
58 | allocate_matrix(new_matrix, old_matrix.n_rows, old_matrix.n_cols);
59 | uint32_t bytes = sizeof(T) * new_matrix.n_rows * new_matrix.n_cols;
60 | memcpy(new_matrix.cells, old_matrix.cells, bytes);
61 | }
62 |
63 | template
64 | inline uint32_t cell(const Matrix& matrix, uint32_t row, uint32_t col)
65 | {
66 | return row * matrix.n_cols + col;
67 | }
68 |
69 | template
70 | inline void set(Matrix& matrix, uint32_t row, uint32_t col, U v)
71 | {
72 | uint32_t c = cell(matrix, row, col);
73 | matrix.cells[c] = v;
74 | }
75 |
76 | template
77 | inline T get(const Matrix& matrix, uint32_t row, uint32_t col)
78 | {
79 | uint32_t c = cell(matrix, row, col);
80 | return matrix.cells[c];
81 | }
82 |
83 | //
84 | inline void print_matrix(const DoubleMatrix& matrix, bool do_exp = false)
85 | {
86 | for(uint32_t i = 0; i < matrix.n_rows; ++i) {
87 | for(uint32_t j = 0; j < matrix.n_cols; ++j) {
88 | uint32_t c = cell(matrix, i, j);
89 | double v = matrix.cells[c];
90 | if(do_exp)
91 | v = exp(v);
92 | printf("%.3lf\t", v);
93 | }
94 | printf("\n");
95 | }
96 | }
97 |
98 | #endif
99 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/model.c:
--------------------------------------------------------------------------------
1 |
2 | #include "model.h"
3 | #include "f5c.h"
4 | #include "f5cmisc.h"
5 | #include
6 | #include
7 | #include
8 |
9 | //#define DEBUG_MODEL_PRINT 1
10 |
11 | void read_model(model_t* model, const char* file) {
12 | FILE* fp = fopen(file, "r");
13 | F_CHK(fp, file);
14 |
15 | //these two are discarded from the model. hollow vars
16 | char kmer[10];
17 | float weight;
18 |
19 | //buffers for geline
20 | char* buffer =
21 | (char*)malloc(sizeof(char) * (100)); //READ+newline+nullcharacter
22 | MALLOC_CHK(buffer);
23 | size_t bufferSize = 100;
24 | ssize_t readlinebytes = 0;
25 |
26 | uint32_t num_k = 0;
27 | uint32_t i = 0;
28 |
29 | while ((readlinebytes = getline(&buffer, &bufferSize, fp)) != -1) {
30 | if (buffer[0] == '#' ||
31 | strcmp(
32 | buffer,
33 | "kmer\tlevel_mean\tlevel_stdv\tsd_mean\tsd_stdv\tweight\n") ==
34 | 0 ||
35 | buffer[0] == '\n' || buffer[0] == '\r') { //comments and header
36 | //todo : (make generic)
37 | //fprintf(stderr, "%s\n", buffer);
38 | continue;
39 | } else {
40 | //as sd_mean and sd_stdv seems not to be used just read to the summy weight
41 | #ifdef LOAD_SD_MEANSSTDV
42 | int32_t ret =
43 | sscanf(buffer, "%s\t%f\t%f\t%f\t%f\t%f", kmer,
44 | &model[num_k].level_mean, &model[num_k].level_stdv,
45 | &model[num_k].sd_mean, &model[num_k].sd_stdv, &weight);
46 | #else
47 | int32_t ret =
48 | sscanf(buffer, "%s\t%f\t%f\t%f\t%f\t%f", kmer,
49 | &model[num_k].level_mean, &model[num_k].level_stdv,
50 | &weight, &weight, &weight);
51 | #endif
52 | #ifdef CACHED_LOG
53 | model[num_k].level_log_stdv=log(model[num_k].level_stdv);
54 | #endif
55 | num_k++;
56 | if (ret != 6) {
57 | ERROR("File %s is corrupted at line %d", file, i);
58 | }
59 | if (num_k > NUM_KMER) {
60 | ERROR("File %s has too many entries. Expected %d kmers in the "
61 | "model, but file had more than that",
62 | file, NUM_KMER);
63 | exit(EXIT_FAILURE);
64 | }
65 | }
66 | i++;
67 | }
68 |
69 | if (num_k != NUM_KMER) {
70 | ERROR("File %s prematurely ended. Expected %d kmers in the model, but "
71 | "file had only%d",
72 | file, NUM_KMER, num_k);
73 | exit(EXIT_FAILURE);
74 | }
75 |
76 | #ifdef DEBUG_MODEL_PRINT
77 | i = 0;
78 | fprintf(stderr, "level_mean\tlevel_stdv\tsd_mean\tsd_stdv\n");
79 | for (i = 0; i < NUM_KMER; i++) {
80 | fprintf(stderr, "%f\t%f\t%f\t%f\n", model[i].level_mean,
81 | model[i].level_stdv, model[i].sd_mean, model[i].sd_stdv);
82 | }
83 | #endif
84 |
85 | free(buffer);
86 | fclose(fp);
87 | }
88 |
89 | //this function can be made more efficient by setting the address to the global variable
90 | void set_model(model_t* model) {
91 | uint32_t i = 0;
92 | for (i = 0; i < NUM_KMER; i++) {
93 | model[i].level_mean =
94 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 0];
95 | model[i].level_stdv =
96 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 1];
97 | #ifdef LOAD_SD_MEANSSTDV
98 | model[i].sd_mean =
99 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 2];
100 | model[i].sd_stdv =
101 | r9_4_450bps_nucleotide_6mer_template_model_builtin_data[i * 4 + 3];
102 | #endif
103 | #ifdef CACHED_LOG
104 | model[i].level_log_stdv=log(model[i].level_stdv);
105 | #endif
106 | }
107 | #ifdef DEBUG_MODEL_PRINT
108 | i = 0;
109 | fprintf(stderr, "level_mean\tlevel_stdv\tsd_mean\tsd_stdv\n");
110 | for (i = 0; i < NUM_KMER; i++) {
111 | fprintf(stderr, "%f\t%f\t%f\t%f\n", model[i].level_mean,
112 | model[i].level_stdv, model[i].sd_mean, model[i].sd_stdv);
113 | }
114 | #endif
115 | }
116 |
117 | //todo : this function can be made more efficient by setting the address to the global variable
118 | //todo : duplicate function can be removed
119 | void set_cpgmodel(model_t* model) {
120 | uint32_t i = 0;
121 | for (i = 0; i < NUM_KMER_METH; i++) {
122 | model[i].level_mean =
123 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 0];
124 | model[i].level_stdv =
125 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 1];
126 | #ifdef LOAD_SD_MEANSSTDV
127 | model[i].sd_mean =
128 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 2];
129 | model[i].sd_stdv =
130 | r9_4_450bps_cpg_6mer_template_model_builtin_data[i * 4 + 3];
131 | #endif
132 | #ifdef CACHED_LOG
133 | model[i].level_log_stdv=log(model[i].level_stdv);
134 | #endif
135 | }
136 | #ifdef DEBUG_MODEL_PRINT
137 | i = 0;
138 | fprintf(stderr, "level_mean\tlevel_stdv\tsd_mean\tsd_stdv\n");
139 | for (i = 0; i < NUM_KMER; i++) {
140 | fprintf(stderr, "%f\t%f\t%f\t%f\n", model[i].level_mean,
141 | model[i].level_stdv, model[i].sd_mean, model[i].sd_stdv);
142 | }
143 | #endif
144 | }
145 |
--------------------------------------------------------------------------------
/benchmarks/abea/src/nanopolish_read_db.h:
--------------------------------------------------------------------------------
1 | //---------------------------------------------------------
2 | // Copyright 2017 Ontario Institute for Cancer Research
3 | // Written by Jared Simpson (jared.simpson@oicr.on.ca)
4 | //---------------------------------------------------------
5 | //
6 | // nanopolish_read_db -- database of reads and their
7 | // associated signal data
8 | //
9 | #ifndef NANOPOLISH_READ_DB
10 | #define NANOPOLISH_READ_DB
11 |
12 | #include ]