├── .gitignore
├── ci
    ├── gpu
    │   ├── prebuild.sh
    │   ├── build.sh
    │   └── cuda_test.sh
    ├── cpu
    │   ├── prebuild.sh
    │   └── build.sh
    ├── checks
    │   ├── changelog.sh
    │   └── style.sh
    └── common
    │   └── build.sh
├── meson_options.txt
├── test
    ├── data
    │   ├── sample_reads.fasta.gz
    │   ├── sample_reads.fastq.gz
    │   ├── sample_layout.fasta.gz
    │   ├── sample_overlaps.paf.gz
    │   ├── sample_overlaps.sam.gz
    │   ├── sample_ava_overlaps.paf.gz
    │   ├── sample_reference.fasta.gz
    │   └── sample_ava_overlaps.mhap.gz
    ├── racon_test_config.h.in
    ├── meson.build
    └── racon_test.cpp
├── subprojects
    ├── zlib.wrap
    └── gtest.wrap
├── src
    ├── cuda
    │   ├── cudautils.hpp
    │   ├── cudapolisher.hpp
    │   ├── cudaaligner.hpp
    │   ├── cudaaligner.cpp
    │   ├── cudabatch.hpp
    │   ├── cudabatch.cpp
    │   └── cudapolisher.cpp
    ├── meson.build
    ├── version.hpp.in
    ├── logger.hpp
    ├── logger.cpp
    ├── sequence.hpp
    ├── window.hpp
    ├── sequence.cpp
    ├── polisher.hpp
    ├── overlap.hpp
    ├── window.cpp
    ├── main.cpp
    ├── overlap.cpp
    └── polisher.cpp
├── .gitmodules
├── Makefile
├── LICENSE
├── .travis.yml
├── vendor
    └── meson.build
├── scripts
    ├── racon_preprocess.py
    └── racon_wrapper.py
├── meson.build
├── CMakeLists.txt
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled Object files
2 | build
3 | subprojects
4 | !subprojects/*.wrap
5 | build-meson
6 | 


--------------------------------------------------------------------------------
/ci/gpu/prebuild.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | export BUILD_FOR_GPU=1
4 | export TEST_ON_GPU=1
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/ci/cpu/prebuild.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | export BUILD_FOR_GPU=0
4 | export TEST_ON_CPU=1
5 | export TEST_ON_GPU=0
6 | 


--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
1 | option('tests', type : 'boolean', value : true, description : 'Enable dependencies required for testing')
2 | 


--------------------------------------------------------------------------------
/test/data/sample_reads.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_reads.fasta.gz


--------------------------------------------------------------------------------
/test/data/sample_reads.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_reads.fastq.gz


--------------------------------------------------------------------------------
/test/data/sample_layout.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_layout.fasta.gz


--------------------------------------------------------------------------------
/test/data/sample_overlaps.paf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_overlaps.paf.gz


--------------------------------------------------------------------------------
/test/data/sample_overlaps.sam.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_overlaps.sam.gz


--------------------------------------------------------------------------------
/test/data/sample_ava_overlaps.paf.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_ava_overlaps.paf.gz


--------------------------------------------------------------------------------
/test/data/sample_reference.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_reference.fasta.gz


--------------------------------------------------------------------------------
/test/data/sample_ava_overlaps.mhap.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-Genomics-Research/racon-gpu/HEAD/test/data/sample_ava_overlaps.mhap.gz


--------------------------------------------------------------------------------
/test/racon_test_config.h.in:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file racon_test_config.h.in
 3 |  *
 4 |  * @brief Racon test configuration file
 5 |  */
 6 | 
 7 | #include <string>
 8 | 
 9 | const std::string racon_test_data_path = "@racon_test_data_path@";
10 | 


--------------------------------------------------------------------------------
/subprojects/zlib.wrap:
--------------------------------------------------------------------------------
 1 | [wrap-file]
 2 | directory = zlib-1.2.11
 3 | 
 4 | source_url = http://zlib.net/fossils/zlib-1.2.11.tar.gz
 5 | source_filename = zlib-1.2.11.tar.gz
 6 | source_hash = c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1
 7 | 
 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/zlib/1.2.11/3/get_zip
 9 | patch_filename = zlib-1.2.11-3-wrap.zip
10 | patch_hash = f07dc491ab3d05daf00632a0591e2ae61b470615b5b73bcf9b3f061fff65cff0
11 | 


--------------------------------------------------------------------------------
/subprojects/gtest.wrap:
--------------------------------------------------------------------------------
 1 | [wrap-file]
 2 | directory = googletest-release-1.8.0
 3 | 
 4 | source_url = https://github.com/google/googletest/archive/release-1.8.0.zip
 5 | source_filename = gtest-1.8.0.zip
 6 | source_hash = f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf
 7 | 
 8 | patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.0/5/get_zip
 9 | patch_filename = gtest-1.8.0-5-wrap.zip
10 | patch_hash = 7eeaede4aa2610a403313b74e04baf91ccfbaef03203d8f56312e22df1834ec5
11 | 


--------------------------------------------------------------------------------
/src/cuda/cudautils.hpp:
--------------------------------------------------------------------------------
 1 | // Implementation file for CUDA POA utilities.
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <stdlib.h>
 6 | #include <cuda_runtime_api.h>
 7 | 
 8 | namespace racon {
 9 | 
10 | void cudaCheckError(std::string &msg)
11 | {
12 |     cudaError_t error = cudaGetLastError();
13 |     if (error != cudaSuccess)
14 |     {
15 |         fprintf(stderr, "%s (CUDA error %s)\n", msg.c_str(), cudaGetErrorString(error));
16 |         exit(-1);
17 |     }
18 | }
19 | 
20 | } // namespace racon
21 | 


--------------------------------------------------------------------------------
/test/meson.build:
--------------------------------------------------------------------------------
 1 | racon_test_cpp_sources = files([
 2 |   'racon_test.cpp'
 3 | ])
 4 | 
 5 | racon_test_include_directories = [include_directories('.')]
 6 | 
 7 | racon_test_extra_flags = []
 8 | 
 9 | racon_test_config_h_vars = configuration_data()
10 | racon_test_config_h_vars.set('racon_test_data_path', meson.source_root() + '/test/data/')
11 | racon_test_config_h = configure_file(
12 |   input : files('racon_test_config.h.in'),
13 |   output : 'racon_test_config.h',
14 |   configuration : racon_test_config_h_vars)
15 | 


--------------------------------------------------------------------------------
/ci/checks/changelog.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2018, NVIDIA CORPORATION.
 3 | #########################
 4 | # cuDF CHANGELOG Tester #
 5 | #########################
 6 | 
 7 | # Checkout master for comparison
 8 | git checkout --quiet master
 9 | 
10 | # Switch back to tip of PR branch
11 | git checkout --quiet current-pr-branch
12 | 
13 | # Ignore errors during searching
14 | set +e
15 | 
16 | # Get list of modified files between matster and PR branch
17 | CHANGELOG=`git diff --name-only master...current-pr-branch | grep CHANGELOG.md`
18 | RETVAL=0
19 | 
20 | exit $RETVAL
21 | 


--------------------------------------------------------------------------------
/ci/cpu/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2018, NVIDIA CORPORATION.
 3 | ######################################
 4 | # cuDF CPU conda build script for CI #
 5 | ######################################
 6 | set -e
 7 | 
 8 | # Logger function for build status output
 9 | function logger() {
10 |   echo -e "\n>>>> $@\n"
11 | }
12 | 
13 | cd ${WORKSPACE}
14 | 
15 | LOCAL_BUILD_DIR=${WORKSPACE}/build
16 | 
17 | . ci/common/build.sh ${LOCAL_BUILD_DIR}
18 | 
19 | if [ "${TEST_ON_CPU}" == '1' ]; then
20 |   logger "Running CPU-based test..."
21 |   cd ${LOCAL_BUILD_DIR}/bin
22 | 
23 |   logger "Test results..."
24 |   ./racon_test
25 | fi
26 | 
27 | 


--------------------------------------------------------------------------------
/src/meson.build:
--------------------------------------------------------------------------------
 1 | racon_cpp_sources = files([
 2 |   'logger.cpp',
 3 |   'overlap.cpp',
 4 |   'polisher.cpp',
 5 |   'sequence.cpp',
 6 |   'window.cpp'
 7 | ])
 8 | 
 9 | racon_extra_flags = []
10 | 
11 | racon_lib_install = (not meson.is_subproject()) or (get_option('default_library') == 'shared')
12 | 
13 | racon_lib = library(
14 |   'racon',
15 |   racon_cpp_sources,
16 |   soversion : 0,
17 |   version : meson.project_version(),
18 |   install : racon_lib_install,
19 |   link_with : vendor_lib,
20 |   dependencies : [racon_thread_dep, racon_zlib_dep],
21 |   include_directories : racon_include_directories + vendor_include_directories,
22 |   cpp_args : [racon_extra_flags, racon_warning_flags, racon_cpp_flags])
23 | 


--------------------------------------------------------------------------------
/ci/checks/style.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2018, NVIDIA CORPORATION.
 3 | #####################
 4 | # cuDF Style Tester #
 5 | #####################
 6 | 
 7 | # Ignore errors and set path
 8 | set +e
 9 | PATH=/conda/bin:$PATH
10 | 
11 | # Activate common conda env
12 | source activate gdf
13 | 
14 | # Run flake8 and get results/return code
15 | #FLAKE=`flake8 python`
16 | #RETVAL=$?
17 | RETVAL=0
18 | 
19 | # Output results if failure otherwise show pass
20 | #if [ "$FLAKE" != "" ]; then
21 | #  echo -e "\n\n>>>> FAILED: flake8 style check; begin output\n\n"
22 | #  echo -e "$FLAKE"
23 | #  echo -e "\n\n>>>> FAILED: flake8 style check; end output\n\n"
24 | #else
25 | #  echo -e "\n\n>>>> PASSED: flake8 style check\n\n"
26 | #fi
27 | 
28 | exit $RETVAL
29 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "vendor/bioparser"]
 2 | 	path = vendor/bioparser
 3 | 	url = https://github.com/rvaser/bioparser
 4 | [submodule "vendor/spoa"]
 5 | 	path = vendor/spoa
 6 | 	url = https://github.com/rvaser/spoa
 7 | [submodule "vendor/thread_pool"]
 8 | 	path = vendor/thread_pool
 9 | 	url = https://github.com/rvaser/thread_pool
10 | [submodule "vendor/edlib"]
11 | 	path = vendor/edlib
12 | 	url = https://github.com/martinsos/edlib
13 | [submodule "vendor/googletest"]
14 | 	path = vendor/googletest
15 | 	url = https://github.com/google/googletest
16 | [submodule "vendor/rampler"]
17 | 	path = vendor/rampler
18 | 	url = https://github.com/rvaser/rampler
19 | [submodule "vendor/GenomeWorks"]
20 | 	path = vendor/GenomeWorks
21 | 	url = https://github.com/clara-parabricks/GenomeWorks.git
22 | 	branch = master
23 | 


--------------------------------------------------------------------------------
/src/version.hpp.in:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file version.hpp
 3 |  *
 4 |  * @brief Version information for the entire project.
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <string>
10 | 
11 | static const int32_t RACON_VERSION_MAJOR = @RACON_VERSION_MAJOR@;
12 | static const int32_t RACON_VERSION_MINOR = @RACON_VERSION_MINOR@;
13 | static const int32_t RACON_VERSION_PATCH = @RACON_VERSION_PATCH@;
14 | static const std::string RACON_VERSION_COMMIT("@RACON_VERSION_COMMIT@");
15 | 
16 | static const std::string RACON_VERSION_STRING =
17 |     std::to_string(RACON_VERSION_MAJOR) + "." +
18 |     std::to_string(RACON_VERSION_MINOR) + "." +
19 |     std::to_string(RACON_VERSION_PATCH) + "-" +
20 |     RACON_VERSION_COMMIT;
21 | 
22 | static const std::string COMPILE_DATE = (std::string(__DATE__) + std::string(" at ") + std::string(__TIME__));
23 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all clean meson cmake debug dist modules
 2 | 
 3 | all: meson
 4 | 
 5 | clean:
 6 | 	rm -rf build build-meson
 7 | 
 8 | meson: modules
 9 | 	@echo "[Invoking Meson]"
10 | 	@mkdir -p build-meson && cd build-meson && meson --buildtype=release -Dc_args=-O3 -Dtests=true && ninja
11 | 
12 | rebuild: modules
13 | 	@echo "[Running Ninja only]"
14 | 	@ninja -C build-meson
15 | 
16 | cmake: modules
17 | 	@echo "[Invoking CMake]"
18 | 	@mkdir -p build && cd build && cmake -DCMAKE_BUILD_TYPE=Release -Dracon_build_tests=ON .. && make
19 | 
20 | debug: modules
21 | 	@echo "[Invoking Meson]"
22 | 	@mkdir -p build-debug && cd build-debug && (meson --buildtype=debugoptimized -Db_sanitize=address -Dtests=true) && ninja
23 | 
24 | dist: release
25 | 	cd build && ninja-dist
26 | 
27 | modules:
28 | 	@echo "[Fetching submodules]"
29 | 	@git submodule update --init
30 | 


--------------------------------------------------------------------------------
/ci/gpu/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2018, NVIDIA CORPORATION.
 3 | ######################################
 4 | # cuDF GPU conda build script for CI #
 5 | ######################################
 6 | set -e
 7 | 
 8 | # Logger function for build status output
 9 | function logger() {
10 |   echo -e "\n>>>> $@\n"
11 | }
12 | 
13 | cd ${WORKSPACE}
14 | 
15 | LOCAL_BUILD_DIR=${WORKSPACE}/build
16 | 
17 | . ci/common/build.sh ${LOCAL_BUILD_DIR}
18 | 
19 | if [ "${TEST_ON_GPU}" == '1' ]; then
20 |   logger "GPU config..."
21 |   nvidia-smi
22 | 
23 |   logger "Running GPU-based test..."
24 | 
25 |   logger "Pulling GPU test data..."
26 |   cd ${WORKSPACE}
27 |   if [ ! -d "ont-racon-data" ]; then
28 |     if [ ! -f "${ont-racon-data.tar.gz}" ]; then
29 |       wget -q -L https://s3.us-east-2.amazonaws.com/racon-data/ont-racon-data.tar.gz
30 |     fi
31 |     tar xvzf ont-racon-data.tar.gz
32 |   fi
33 |   ci/gpu/cuda_test.sh
34 | 
35 |   logger "Unit test results..."
36 |   cd ${LOCAL_BUILD_DIR}/bin
37 |   ./racon_test --gtest_filter=*CUDA*
38 | fi
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Ivan Sović, Robert Vaser, Mile Šikić
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | 
 3 | language: cpp
 4 | 
 5 | compiler:
 6 |     - clang
 7 |     - gcc
 8 | 
 9 | before_install:
10 |     # cmake 3.2
11 |     - sudo add-apt-repository ppa:george-edison55/cmake-3.x -y
12 | 
13 |     # g++4.8.1
14 |     - if [ "$CXX" == "g++" ]; then sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test; fi
15 | 
16 |     # clang 3.4
17 |     - if [ "$CXX" == "clang++" ]; then sudo add-apt-repository -y ppa:h-rayflood/llvm; fi
18 | 
19 |     - sudo apt-get update -qq
20 | 
21 | install:
22 |     # cmake 3.2
23 |     - sudo apt-get install cmake cmake-data
24 | 
25 |     # g++4.8.1
26 |     - if [ "$CXX" == "g++" ]; then sudo apt-get install -qq g++-4.8; fi
27 |     - if [ "$CXX" == "g++" ]; then export CXX="g++-4.8"; fi
28 | 
29 |     # clang 3.4
30 |     - if [ "$CXX" == "clang++" ]; then sudo apt-get install --allow-unauthenticated -qq clang-3.4; fi
31 |     - if [ "$CXX" == "clang++" ]; then export CXX="clang++-3.4"; fi
32 | 
33 | script:
34 |     - mkdir build
35 |     - cd build
36 |     - cmake -Dracon_build_tests=ON -DCMAKE_BUILD_TYPE=Release ..
37 |     - make
38 |     - ./bin/racon_test
39 | 
40 | notifications:
41 |     email:
42 |         on_success: change
43 |         on_failure: always
44 | 


--------------------------------------------------------------------------------
/ci/gpu/cuda_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPT_DIRECTORY=`dirname $0`
 4 | BIN_DIRECTORY="${SCRIPT_DIRECTORY}/../../build/bin"
 5 | DATA="${HOME}/ont-racon-data/nvidia"
 6 | RESULT_FILE="test-output.txt"
 7 | GOLDEN_FILE="$SCRIPT_DIRECTORY/golden-output.txt"
 8 | 
 9 | if [ $# -eq 0 ]; then
10 |     BATCHES=2
11 | else
12 |     BATCHES=$1
13 | fi
14 | 
15 | if [ ! -d $BIN_DIRECTORY ]; then
16 |     echo "Could not find bin directory for racon binary"
17 |     exit 1
18 | fi
19 | 
20 | if [ -f $RESULT_FILE ]; then
21 |     rm $RESULT_FILE
22 | fi
23 | 
24 | if [ ! -f $GOLDEN_FILE ]; then
25 |     echo "Could not find golden value file at $GOLDEN_FILE"
26 |     exit 1
27 | fi
28 | 
29 | CMD="$BIN_DIRECTORY/racon --cudaaligner-batches 0 -c ${BATCHES} -m 8 -x -6 -g -8  -w 500 -t 24 -q -1 $DATA/iterated_racon/reads.fa.gz $DATA/iterated_racon/reads2contigs_1_1.paf.gz $DATA/canu.contigs.fasta"
30 | echo "Running command:"
31 | echo $CMD
32 | $CMD > ${RESULT_FILE}
33 | diff ${RESULT_FILE} ${GOLDEN_FILE} >& /dev/null
34 | RES=$?
35 | 
36 | if [ $RES -eq "0" ]; then
37 |     echo "Test passed."
38 |     rm $RESULT_FILE
39 | else
40 |     echo "Test failed."
41 |     echo "Result in ${RESULT_FILE}, golden vales in ${GOLDEN_FILE}"
42 | fi
43 | 
44 | exit $RES
45 | 


--------------------------------------------------------------------------------
/vendor/meson.build:
--------------------------------------------------------------------------------
 1 | vendor_cpp_sources = files([
 2 |   'edlib/edlib/src/edlib.cpp',
 3 |   'rampler/src/sampler.cpp',
 4 |   'rampler/src/sequence.cpp',
 5 |   'spoa/src/alignment_engine.cpp',
 6 |   'spoa/src/graph.cpp',
 7 |   'spoa/src/sequence.cpp',
 8 |   'spoa/src/simd_alignment_engine.cpp',
 9 |   'spoa/src/sisd_alignment_engine.cpp',
10 |   'thread_pool/src/thread_pool.cpp'
11 | ])
12 | 
13 | vendor_include_directories = [
14 |                 include_directories('bioparser/include'),
15 |                 include_directories('edlib/edlib/include'),
16 |                 include_directories('rampler/src'),
17 |                 include_directories('spoa/include'),
18 |                 include_directories('thread_pool/include')
19 |                 ]
20 | 
21 | vendor_extra_flags = []
22 | 
23 | vendor_lib_install = (not meson.is_subproject()) or (get_option('default_library') == 'shared')
24 | 
25 | vendor_lib = library(
26 |   'vendor',
27 |   vendor_cpp_sources,
28 |   soversion : 0,
29 |   version : meson.project_version(),
30 |   install : vendor_lib_install,
31 |   link_with : [],
32 |   dependencies : [racon_thread_dep, racon_zlib_dep],
33 |   include_directories : vendor_include_directories,
34 |   cpp_args : [vendor_extra_flags, racon_warning_flags, racon_cpp_flags])
35 | 


--------------------------------------------------------------------------------
/src/logger.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file logger.hpp
 3 |  *
 4 |  * @brief Logger header file
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <cstdint>
10 | #include <chrono>
11 | #include <string>
12 | 
13 | namespace racon {
14 | 
15 | static const std::string version = "v1.0.0";
16 | 
17 | class Logger {
18 | public:
19 |     Logger();
20 | 
21 |     Logger(const Logger&) = default;
22 |     Logger& operator=(const Logger&) = default;
23 | 
24 |     Logger(Logger&&) = default;
25 |     Logger& operator=(Logger&&) = default;
26 | 
27 |     ~Logger();
28 | 
29 |     /*!
30 |      * @brief Resets the time point
31 |      */
32 |     void log();
33 | 
34 |     /*!
35 |      * @brief Prints the elapsed time from last time point to stderr
36 |      */
37 |     void log(const std::string& msg) const;
38 | 
39 |     /*!
40 |      * @brief Prints a progress bar and the elapsed time from last time to
41 |      * stderr (the progress bar resets after 20 calls)
42 |      */
43 |     void bar(const std::string& msg);
44 | 
45 |     /*!
46 |      * @brief Prints the total elapsed time from the first log() call
47 |      */
48 |     void total(const std::string& msg) const;
49 | 
50 | private:
51 |     double time_;
52 |     std::uint32_t bar_;
53 |     std::chrono::time_point<std::chrono::steady_clock> time_point_;
54 | };
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/src/logger.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file logger.cpp
 3 |  *
 4 |  * @brief Logger source file
 5 |  */
 6 | 
 7 | #include <iostream>
 8 | 
 9 | #include "logger.hpp"
10 | 
11 | namespace racon {
12 | 
13 | Logger::Logger()
14 |         : time_(0.), bar_(0), time_point_() {
15 | }
16 | 
17 | Logger::~Logger() {
18 | }
19 | 
20 | void Logger::log() {
21 |     auto now = std::chrono::steady_clock::now();
22 |     if (time_point_ != std::chrono::time_point<std::chrono::steady_clock>()) {
23 |         time_ += std::chrono::duration_cast<std::chrono::duration<double>>(now - time_point_).count();
24 |     }
25 |     time_point_ = now;
26 | }
27 | 
28 | void Logger::log(const std::string& msg) const {
29 |     std::cerr << msg << " " << std::fixed
30 |         << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - time_point_).count()
31 |         << " s" << std::endl;
32 | }
33 | 
34 | void Logger::bar(const std::string& msg) {
35 |     ++bar_;
36 |     std::string progress_bar = "[" + std::string(bar_, '=') + (bar_ == 20 ? "" : ">" + std::string(19 - bar_, ' ')) + "]";
37 | 
38 |     std::cerr << msg << " " << progress_bar << " " << std::fixed
39 |         << std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - time_point_).count()
40 |         << " s";
41 | 
42 |     bar_ %= 20;
43 |     if (bar_ == 0) {
44 |         std::cerr << std::endl;
45 |     } else {
46 |         std::cerr << "\r";
47 |     }
48 | }
49 | 
50 | void Logger::total(const std::string& msg) const {
51 |     std::cerr << msg << " " << std::fixed
52 |         << time_ + std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::steady_clock::now() - time_point_).count()
53 |         << " s" << std::endl;
54 | }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/ci/common/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2018, NVIDIA CORPORATION.
 3 | ######################################
 4 | # racon-gpu CPU/GPU conda build script for CI #
 5 | ######################################
 6 | set -e
 7 | 
 8 | # Get commandline arguments
 9 | LOCAL_BUILD_DIR=$1
10 | 
11 | # Logger function for build status output
12 | function logger() {
13 |   echo -e "\n>>>> $@\n"
14 | }
15 | 
16 | # Set path and build parallel level
17 | export PATH=/conda/bin:/usr/local/cuda/bin:$PATH
18 | export PARALLEL_LEVEL=4
19 | 
20 | # Set home to the job's workspace
21 | export HOME=$WORKSPACE
22 | 
23 | # Switch to project root; also root of repo checkout
24 | cd $WORKSPACE
25 | 
26 | ################################################################################
27 | # SETUP - Check environment
28 | ################################################################################
29 | 
30 | logger "Get env..."
31 | env
32 | 
33 | logger "Check versions..."
34 | gcc --version
35 | g++ --version
36 | 
37 | # FIX Added to deal with Anancoda SSL verification issues during conda builds
38 | conda config --set ssl_verify False
39 | 
40 | conda install \
41 |     -c conda-forge \
42 |     -c sarcasm \
43 |     -c bioconda \
44 |     doxygen \
45 |     ninja \
46 |     cmake
47 | 
48 | CUDA_REL=${CUDA:0:3}
49 | if [ "${CUDA:0:2}" == '10' ]; then
50 |   # CUDA 10 release
51 |   CUDA_REL=${CUDA:0:4}
52 | fi
53 | 
54 | git clean -xdf
55 | 
56 | CMAKE_COMMON_VARIABLES="-DCMAKE_BUILD_TYPE=Release -Dracon_build_tests=ON"
57 | 
58 | if [ "${BUILD_FOR_GPU}" == '1' ]; then
59 |   CMAKE_BUILD_GPU="-Dracon_enable_cuda=ON"
60 | else
61 |   CMAKE_BUILD_GPU="-Dracon_enable_cuda=OFF"
62 | fi
63 | 
64 | # Use CMake-based build procedure
65 | mkdir --parents ${LOCAL_BUILD_DIR}
66 | cd ${LOCAL_BUILD_DIR}
67 | 
68 | # configure
69 | cmake $CMAKE_COMMON_VARIABLES ${CMAKE_BUILD_GPU} ..
70 | # build
71 | make -j${PARALLEL_LEVEL} VERBOSE=1 all
72 | 


--------------------------------------------------------------------------------
/src/sequence.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file sequence.hpp
 3 |  *
 4 |  * @brief Sequence class header file
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <stdint.h>
10 | #include <memory>
11 | #include <vector>
12 | #include <string>
13 | 
14 | namespace bioparser {
15 |     template<class T>
16 |     class FastaParser;
17 | 
18 |     template<class T>
19 |     class FastqParser;
20 | }
21 | 
22 | namespace racon {
23 | 
24 | class Sequence;
25 | std::unique_ptr<Sequence> createSequence(const std::string& name,
26 |     const std::string& data);
27 | 
28 | class Sequence {
29 | public:
30 |     ~Sequence() = default;
31 | 
32 |     const std::string& name() const {
33 |         return name_;
34 |     }
35 | 
36 |     const std::string& data() const {
37 |         return data_;
38 |     }
39 | 
40 |     const std::string& reverse_complement() const {
41 |         return reverse_complement_;
42 |     }
43 | 
44 |     const std::string& quality() const {
45 |         return quality_;
46 |     }
47 | 
48 |     const std::string& reverse_quality() const {
49 |         return reverse_quality_;
50 |     }
51 | 
52 |     void create_reverse_complement();
53 | 
54 |     void transmute(bool has_name, bool has_data, bool has_reverse_data);
55 | 
56 |     friend bioparser::FastaParser<Sequence>;
57 |     friend bioparser::FastqParser<Sequence>;
58 |     friend std::unique_ptr<Sequence> createSequence(const std::string& name,
59 |         const std::string& data);
60 | private:
61 |     Sequence(const char* name, uint32_t name_length, const char* data,
62 |         uint32_t data_length);
63 |     Sequence(const char* name, uint32_t name_length, const char* data,
64 |         uint32_t data_length, const char* quality, uint32_t quality_length);
65 |     Sequence(const std::string& name, const std::string& data);
66 |     Sequence(const Sequence&) = delete;
67 |     const Sequence& operator=(const Sequence&) = delete;
68 | 
69 |     std::string name_;
70 |     std::string data_;
71 |     std::string reverse_complement_;
72 |     std::string quality_;
73 |     std::string reverse_quality_;
74 | };
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/src/window.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file window.hpp
 3 |  *
 4 |  * @brief Window class header file
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <stdlib.h>
10 | #include <vector>
11 | #include <memory>
12 | #include <string>
13 | #include <utility>
14 | 
15 | namespace spoa {
16 |     class AlignmentEngine;
17 | }
18 | 
19 | namespace racon {
20 | 
21 | enum class WindowType {
22 |     kNGS, // Next Generation Sequencing
23 |     kTGS // Third Generation Sequencing
24 | };
25 | 
26 | class Window;
27 | std::shared_ptr<Window> createWindow(uint64_t id, uint32_t rank, WindowType type,
28 |     const char* backbone, uint32_t backbone_length, const char* quality,
29 |     uint32_t quality_length);
30 | 
31 | class Window {
32 | 
33 | public:
34 |     ~Window();
35 | 
36 |     uint64_t id() const {
37 |         return id_;
38 |     }
39 |     uint32_t rank() const {
40 |         return rank_;
41 |     }
42 | 
43 |     const std::string& consensus() const {
44 |         return consensus_;
45 |     }
46 | 
47 |     bool generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment_engine,
48 |         bool trim);
49 | 
50 |     void add_layer(const char* sequence, uint32_t sequence_length,
51 |         const char* quality, uint32_t quality_length, uint32_t begin,
52 |         uint32_t end);
53 | 
54 |     friend std::shared_ptr<Window> createWindow(uint64_t id, uint32_t rank,
55 |         WindowType type, const char* backbone, uint32_t backbone_length,
56 |         const char* quality, uint32_t quality_length);
57 | 
58 | #ifdef CUDA_ENABLED
59 |     friend class CUDABatchProcessor;
60 | #endif
61 | private:
62 |     Window(uint64_t id, uint32_t rank, WindowType type, const char* backbone,
63 |         uint32_t backbone_length, const char* quality, uint32_t quality_length);
64 |     Window(const Window&) = delete;
65 |     const Window& operator=(const Window&) = delete;
66 | 
67 |     uint64_t id_;
68 |     uint32_t rank_;
69 |     WindowType type_;
70 |     std::string consensus_;
71 |     std::vector<std::pair<const char*, uint32_t>> sequences_;
72 |     std::vector<std::pair<const char*, uint32_t>> qualities_;
73 |     std::vector<std::pair<uint32_t, uint32_t>> positions_;
74 | };
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/src/cuda/cudapolisher.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * @file cudapolisher.hpp
 3 |  *
 4 |  * @brief CUDA Polisher class header file
 5 |  */
 6 | 
 7 | #pragma once
 8 | 
 9 | #include <mutex>
10 | 
11 | #include "polisher.hpp"
12 | #include "cudabatch.hpp"
13 | #include "cudaaligner.hpp"
14 | #include "thread_pool/thread_pool.hpp"
15 | 
16 | 
17 | namespace racon {
18 | 
19 | class CUDAPolisher : public Polisher {
20 | public:
21 |     ~CUDAPolisher();
22 | 
23 |     virtual void polish(std::vector<std::unique_ptr<Sequence>>& dst,
24 |         bool drop_unpolished_sequences) override;
25 | 
26 |     friend std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
27 |         const std::string& overlaps_path, const std::string& target_path,
28 |         PolisherType type, uint32_t window_length, double quality_threshold,
29 |         double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
30 |         uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
31 |         uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width);
32 | 
33 | protected:
34 |     CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
35 |         std::unique_ptr<bioparser::Parser<Overlap>> oparser,
36 |         std::unique_ptr<bioparser::Parser<Sequence>> tparser,
37 |         PolisherType type, uint32_t window_length, double quality_threshold,
38 |         double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
39 |         uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
40 |         uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width);
41 |     CUDAPolisher(const CUDAPolisher&) = delete;
42 |     const CUDAPolisher& operator=(const CUDAPolisher&) = delete;
43 |     virtual void find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps) override;
44 | 
45 |     static std::vector<uint32_t> calculate_batches_per_gpu(uint32_t cudapoa_batches, uint32_t gpus);
46 | 
47 |     // Vector of POA batches.
48 |     std::vector<std::unique_ptr<CUDABatchProcessor>> batch_processors_;
49 | 
50 |     // Vector of aligner batches.
51 |     std::vector<std::unique_ptr<CUDABatchAligner>> batch_aligners_;
52 | 
53 |     // Vector of bool indicating consensus generation status for each window.
54 |     std::vector<bool> window_consensus_status_;
55 | 
56 |     // Number of batches for POA processing.
57 |     uint32_t cudapoa_batches_;
58 | 
59 |     // Numbver of batches for Alignment processing.
60 |     uint32_t cudaaligner_batches_;
61 | 
62 |     // Number of GPU devices to run with.
63 |     int32_t num_devices_;
64 | 
65 |     // State transition scores.
66 |     int8_t gap_;
67 |     int8_t mismatch_;
68 |     int8_t match_;
69 | 
70 |     // Use banded POA alignment
71 |     bool cuda_banded_alignment_;
72 | 
73 |     // Band parameter for pairwise alignment
74 |     uint32_t cudaaligner_band_width_;
75 | };
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/scripts/racon_preprocess.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import print_function
 4 | import os, sys, argparse
 5 | 
 6 | def eprint(*args, **kwargs):
 7 |     print(*args, file=sys.stderr, **kwargs)
 8 | 
 9 | #*******************************************************************************
10 | 
11 | def parse_file(file_name, read_set):
12 |     line_id = 0
13 |     name = ''
14 |     data = ''
15 |     qual = ''
16 |     valid = False
17 |     with (open(file_name)) as f:
18 |         for line in f:
19 |             if (line_id == 0):
20 |                 if (valid):
21 |                     if (len(name) == 0 or len(data) == 0 or len(data) != len(qual)):
22 |                         eprint('File is not in FASTQ format')
23 |                         sys.exit(1)
24 |                     valid = False
25 |                     if (name in read_set):
26 |                         print(name + '2')
27 |                     else:
28 |                         read_set.add(name)
29 |                         print(name + '1')
30 |                     print(data)
31 |                     print('+')
32 |                     print(qual)
33 |                 name = line.rstrip().split(' ')[0]
34 |                 data = ''
35 |                 qual = ''
36 |                 line_id = 1
37 |             elif (line_id == 1):
38 |                 if (line[0] == '+'):
39 |                     line_id = 2
40 |                 else:
41 |                     data += line.rstrip()
42 |             elif (line_id == 2):
43 |                 qual += line.rstrip()
44 |                 if (len(qual) >= len(data)):
45 |                     valid = True
46 |                     line_id = 0
47 | 
48 |     if (valid):
49 |         if (len(name) == 0 or len(data) == 0 or len(data) != len(qual)):
50 |             eprint(len(name), len(data), len(qual))
51 |             eprint('File is not in FASTQ format')
52 |             sys.exit(1)
53 |         if (name in read_set):
54 |            print(name + '2')
55 |         else:
56 |            read_set.add(name)
57 |            print(name + '1')
58 |         print(data)
59 |         print('+')
60 |         print(qual)
61 | 
62 | #*******************************************************************************
63 | 
64 | if __name__ == '__main__':
65 | 
66 |     parser = argparse.ArgumentParser(description='''Script for preprocessing
67 |         Illumina paired-end reads for usage in Racon. Each read will get unique
68 |         header up to the first white space to distinguish those forming a pair.''')
69 |     parser.add_argument('first', help='''File containing the first read of a pair
70 |         or both.''')
71 |     parser.add_argument('second', nargs='?', help='''Optional file containing
72 |         read pairs of the same paired-end sequencing run.''')
73 | 
74 |     args = parser.parse_args()
75 | 
76 |     read_set = set()
77 |     parse_file(args.first, read_set)
78 |     if (args.second is not None):
79 |         parse_file(args.second, read_set)
80 | 


--------------------------------------------------------------------------------
/src/sequence.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file sequence.cpp
  3 |  *
  4 |  * @brief Sequence class source file
  5 |  */
  6 | 
  7 | #include <ctype.h>
  8 | 
  9 | #include "sequence.hpp"
 10 | 
 11 | namespace racon {
 12 | 
 13 | std::unique_ptr<Sequence> createSequence(const std::string& name,
 14 |     const std::string& data) {
 15 | 
 16 |     return std::unique_ptr<Sequence>(new Sequence(name, data));
 17 | }
 18 | 
 19 | Sequence::Sequence(const char* name, uint32_t name_length, const char* data,
 20 |     uint32_t data_length)
 21 |         : name_(name, name_length), data_(), reverse_complement_(), quality_(),
 22 |         reverse_quality_() {
 23 | 
 24 |     data_.reserve(data_length);
 25 |     for (uint32_t i = 0; i < data_length; ++i) {
 26 |         data_ += toupper(data[i]);
 27 |     }
 28 | }
 29 | 
 30 | Sequence::Sequence(const char* name, uint32_t name_length, const char* data,
 31 |     uint32_t data_length, const char* quality, uint32_t quality_length)
 32 |         : Sequence(name, name_length, data, data_length) {
 33 | 
 34 |     uint32_t quality_sum = 0;
 35 |     for (uint32_t i = 0; i < quality_length; ++i) {
 36 |         quality_sum += quality[i] - '!';
 37 |     }
 38 | 
 39 |     if (quality_sum > 0) {
 40 |         quality_.assign(quality, quality_length);
 41 |     }
 42 | }
 43 | 
 44 | Sequence::Sequence(const std::string& name, const std::string& data)
 45 |     : name_(name), data_(data), reverse_complement_(), quality_(),
 46 |     reverse_quality_() {
 47 | }
 48 | 
 49 | void Sequence::create_reverse_complement() {
 50 | 
 51 |     if (!reverse_complement_.empty()) {
 52 |         return;
 53 |     }
 54 | 
 55 |     reverse_complement_.clear();
 56 |     reverse_complement_.reserve(data_.size());
 57 | 
 58 |     for (int32_t i = data_.size() - 1; i >= 0; --i) {
 59 |         switch (data_[i]) {
 60 |             case 'A':
 61 |                 reverse_complement_ += 'T';
 62 |                 break;
 63 |             case 'T':
 64 |                 reverse_complement_ += 'A';
 65 |                 break;
 66 |             case 'C':
 67 |                 reverse_complement_ += 'G';
 68 |                 break;
 69 |             case 'G':
 70 |                 reverse_complement_ += 'C';
 71 |                 break;
 72 |             default:
 73 |                 reverse_complement_ += data_[i];
 74 |                 break;
 75 |         }
 76 |     }
 77 | 
 78 |     reverse_quality_.clear();
 79 |     reverse_quality_.reserve(quality_.size());
 80 | 
 81 |     for (int32_t i = quality_.size() - 1; i >= 0; --i) {
 82 |         reverse_quality_ += quality_[i];
 83 |     }
 84 | }
 85 | 
 86 | void Sequence::transmute(bool has_name, bool has_data, bool has_reverse_data) {
 87 | 
 88 |     if (!has_name) {
 89 |         std::string().swap(name_);
 90 |     }
 91 | 
 92 |     if (has_reverse_data) {
 93 |         create_reverse_complement();
 94 |     }
 95 | 
 96 |     if (!has_data) {
 97 |         std::string().swap(data_);
 98 |         std::string().swap(quality_);
 99 |     }
100 | }
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/src/cuda/cudaaligner.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 | * @file cudaaligner.hpp
 3 |  *
 4 |  * @brief CUDA aligner class header file
 5 |  */
 6 | #include <claraparabricks/genomeworks/cudaaligner/cudaaligner.hpp>
 7 | #include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
 8 | #include <claraparabricks/genomeworks/cudaaligner/alignment.hpp>
 9 | 
10 | #include "overlap.hpp"
11 | #include "sequence.hpp"
12 | 
13 | #include <vector>
14 | #include <atomic>
15 | 
16 | namespace racon {
17 | 
18 | class CUDABatchAligner;
19 | std::unique_ptr<CUDABatchAligner> createCUDABatchAligner(uint32_t max_bandwidth, uint32_t device_id, int64_t max_gpu_memory);
20 | 
21 | class CUDABatchAligner
22 | {
23 |     public:
24 |         virtual ~CUDABatchAligner();
25 | 
26 |         /**
27 |          * @brief Add a new overlap to the batch.
28 |          *
29 |          * @param[in] window   : The overlap to add to the batch.
30 |          * @param[in] sequences: Reference to a database of sequences.
31 |          *
32 |          * @return True if overlap could be added to the batch.
33 |          */
34 |         virtual bool addOverlap(Overlap* overlap, std::vector<std::unique_ptr<Sequence>>& sequences);
35 | 
36 |         /**
37 |          * @brief Checks if batch has any overlaps to process.
38 |          *
39 |          * @return Trie if there are overlaps in the batch.
40 |          */
41 |         virtual bool hasOverlaps() const {
42 |             return overlaps_.size() > 0;
43 |         };
44 | 
45 |         /**
46 |          * @brief Runs batched alignment of overlaps on GPU.
47 |          *
48 |          */
49 |         virtual void alignAll();
50 | 
51 |         /**
52 |          * @brief Generate cigar strings for overlaps that were successfully
53 |          *        copmuted on the GPU.
54 |          *
55 |          */
56 |         virtual void generate_cigar_strings();
57 | 
58 |         /**
59 |          * @brief Resets the state of the object, which includes
60 |          *        resetting buffer states and counters.
61 |          */
62 |         virtual void reset();
63 | 
64 |         /**
65 |          * @brief Get batch ID.
66 |          */
67 |         uint32_t getBatchID() const { return bid_; }
68 | 
69 |         // Builder function to create a new CUDABatchAligner object.
70 |         friend std::unique_ptr<CUDABatchAligner>
71 |         createCUDABatchAligner(uint32_t max_bandwidth, uint32_t device_id, int64_t max_gpu_memory);
72 | 
73 |     protected:
74 |         CUDABatchAligner(uint32_t max_bandwidth, uint32_t device_id, int64_t max_gpu_memory);
75 |         CUDABatchAligner(const CUDABatchAligner&) = delete;
76 |         const CUDABatchAligner& operator=(const CUDABatchAligner&) = delete;
77 | 
78 |         std::unique_ptr<claraparabricks::genomeworks::cudaaligner::Aligner> aligner_;
79 | 
80 |         std::vector<Overlap*> overlaps_;
81 | 
82 |         std::vector<std::pair<std::string, std::string>> cpu_overlap_data_;
83 | 
84 |         // Static batch count used to generate batch IDs.
85 |         static std::atomic<uint32_t> batches;
86 | 
87 |         // Batch ID.
88 |         uint32_t bid_ = 0;
89 | 
90 |         // CUDA stream for batch.
91 |         cudaStream_t stream_;
92 | };
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/src/polisher.hpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file polisher.hpp
  3 |  *
  4 |  * @brief Polisher class header file
  5 |  */
  6 | 
  7 | #pragma once
  8 | 
  9 | #include <stdlib.h>
 10 | #include <vector>
 11 | #include <memory>
 12 | #include <unordered_map>
 13 | #include <thread>
 14 | 
 15 | namespace bioparser {
 16 |     template<class T>
 17 |     class Parser;
 18 | }
 19 | 
 20 | namespace thread_pool {
 21 |     class ThreadPool;
 22 | }
 23 | 
 24 | namespace spoa {
 25 |     class AlignmentEngine;
 26 | }
 27 | 
 28 | 
 29 | namespace racon {
 30 | 
 31 | class Sequence;
 32 | class Overlap;
 33 | class Window;
 34 | class Logger;
 35 | 
 36 | enum class PolisherType {
 37 |     kC, // Contig polishing
 38 |     kF // Fragment error correction
 39 | };
 40 | 
 41 | class Polisher;
 42 | std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
 43 |     const std::string& overlaps_path, const std::string& target_path,
 44 |     PolisherType type, uint32_t window_length, double quality_threshold,
 45 |     double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
 46 |     uint32_t num_threads, uint32_t cuda_batches = 0,
 47 |     bool cuda_banded_alignment = false, uint32_t cudaaligner_batches = 0,
 48 |     uint32_t cudaaligner_band_width = 0);
 49 | 
 50 | class Polisher {
 51 | public:
 52 |     virtual ~Polisher();
 53 | 
 54 |     virtual void initialize();
 55 | 
 56 |     virtual void polish(std::vector<std::unique_ptr<Sequence>>& dst,
 57 |         bool drop_unpolished_sequences);
 58 | 
 59 |     friend std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
 60 |         const std::string& overlaps_path, const std::string& target_path,
 61 |         PolisherType type, uint32_t window_length, double quality_threshold,
 62 |         double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
 63 |         uint32_t num_threads, uint32_t cuda_batches, bool cuda_banded_alignment,
 64 |         uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width);
 65 | 
 66 | protected:
 67 |     Polisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
 68 |         std::unique_ptr<bioparser::Parser<Overlap>> oparser,
 69 |         std::unique_ptr<bioparser::Parser<Sequence>> tparser,
 70 |         PolisherType type, uint32_t window_length, double quality_threshold,
 71 |         double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
 72 |         uint32_t num_threads);
 73 |     Polisher(const Polisher&) = delete;
 74 |     const Polisher& operator=(const Polisher&) = delete;
 75 |     virtual void find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps);
 76 | 
 77 |     std::unique_ptr<bioparser::Parser<Sequence>> sparser_;
 78 |     std::unique_ptr<bioparser::Parser<Overlap>> oparser_;
 79 |     std::unique_ptr<bioparser::Parser<Sequence>> tparser_;
 80 | 
 81 |     PolisherType type_;
 82 |     double quality_threshold_;
 83 |     double error_threshold_;
 84 |     bool trim_;
 85 |     std::vector<std::shared_ptr<spoa::AlignmentEngine>> alignment_engines_;
 86 | 
 87 |     std::vector<std::unique_ptr<Sequence>> sequences_;
 88 |     std::vector<uint32_t> targets_coverages_;
 89 |     std::string dummy_quality_;
 90 | 
 91 |     uint32_t window_length_;
 92 |     std::vector<std::shared_ptr<Window>> windows_;
 93 | 
 94 |     std::unique_ptr<thread_pool::ThreadPool> thread_pool_;
 95 |     std::unordered_map<std::thread::id, uint32_t> thread_to_id_;
 96 | 
 97 |     std::unique_ptr<Logger> logger_;
 98 | };
 99 | 
100 | }
101 | 


--------------------------------------------------------------------------------
/src/overlap.hpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file overlap.hpp
  3 |  *
  4 |  * @brief Overlap class header file
  5 |  */
  6 | 
  7 | #pragma once
  8 | 
  9 | #include <stdlib.h>
 10 | #include <stdint.h>
 11 | #include <memory>
 12 | #include <vector>
 13 | #include <string>
 14 | #include <utility>
 15 | #include <unordered_map>
 16 | 
 17 | namespace bioparser {
 18 |     template<class T>
 19 |     class MhapParser;
 20 | 
 21 |     template<class T>
 22 |     class PafParser;
 23 | 
 24 |     template<class T>
 25 |     class SamParser;
 26 | }
 27 | 
 28 | namespace racon {
 29 | 
 30 | class Sequence;
 31 | 
 32 | class Overlap {
 33 | public:
 34 |     ~Overlap() = default;
 35 | 
 36 |     uint32_t q_id() const {
 37 |         return q_id_;
 38 |     }
 39 | 
 40 |     uint32_t t_id() const {
 41 |         return t_id_;
 42 |     }
 43 | 
 44 |     uint32_t strand() const {
 45 |         return strand_;
 46 |     }
 47 | 
 48 |     bool is_valid() const {
 49 |         return is_valid_;
 50 |     }
 51 | 
 52 |     void transmute(const std::vector<std::unique_ptr<Sequence>>& sequences,
 53 |         const std::unordered_map<std::string, uint64_t>& name_to_id,
 54 |         const std::unordered_map<uint64_t, uint64_t>& id_to_id);
 55 | 
 56 |     uint32_t length() const {
 57 |         return length_;
 58 |     }
 59 | 
 60 |     double error() const {
 61 |         return error_;
 62 |     }
 63 | 
 64 |     const std::string& cigar() const {
 65 |         return cigar_;
 66 |     }
 67 | 
 68 |     const std::vector<std::pair<uint32_t, uint32_t>>& breaking_points() const {
 69 |         return breaking_points_;
 70 |     }
 71 | 
 72 |     void find_breaking_points(const std::vector<std::unique_ptr<Sequence>>& sequences,
 73 |         uint32_t window_length);
 74 | 
 75 |     friend bioparser::MhapParser<Overlap>;
 76 |     friend bioparser::PafParser<Overlap>;
 77 |     friend bioparser::SamParser<Overlap>;
 78 | 
 79 | #ifdef CUDA_ENABLED
 80 |     friend class CUDABatchAligner;
 81 | #endif
 82 | private:
 83 |     Overlap(uint64_t a_id, uint64_t b_id, double accuracy, uint32_t minmers,
 84 |         uint32_t a_rc, uint32_t a_begin, uint32_t a_end, uint32_t a_length,
 85 |         uint32_t b_rc, uint32_t b_begin, uint32_t b_end, uint32_t b_length);
 86 |     Overlap(const char* q_name, uint32_t q_name_length, uint32_t q_length,
 87 |         uint32_t q_begin, uint32_t q_end, char orientation, const char* t_name,
 88 |         uint32_t t_name_length, uint32_t t_length, uint32_t t_begin,
 89 |         uint32_t t_end, uint32_t matching_bases, uint32_t overlap_length,
 90 |         uint32_t maping_quality);
 91 |     Overlap(const char* q_name, uint32_t q_name_length, uint32_t flag,
 92 |         const char* t_name, uint32_t t_name_length, uint32_t t_begin,
 93 |         uint32_t mapping_quality, const char* cigar, uint32_t cigar_length,
 94 |         const char* t_next_name, uint32_t t_next_name_length,
 95 |         uint32_t t_next_begin, uint32_t template_length, const char* sequence,
 96 |         uint32_t sequence_length, const char* quality, uint32_t quality_length);
 97 |     Overlap();
 98 |     Overlap(const Overlap&) = delete;
 99 |     const Overlap& operator=(const Overlap&) = delete;
100 |     virtual void find_breaking_points_from_cigar(uint32_t window_length);
101 |     virtual void align_overlaps(const char* q, uint32_t q_len, const char* t, uint32_t t_len);
102 | 
103 |     std::string q_name_;
104 |     uint64_t q_id_;
105 |     uint32_t q_begin_;
106 |     uint32_t q_end_;
107 |     uint32_t q_length_;
108 | 
109 |     std::string t_name_;
110 |     uint64_t t_id_;
111 |     uint32_t t_begin_;
112 |     uint32_t t_end_;
113 |     uint32_t t_length_;
114 | 
115 |     uint32_t strand_;
116 |     uint32_t length_;
117 |     double error_;
118 |     std::string cigar_;
119 | 
120 |     bool is_valid_;
121 |     bool is_transmuted_;
122 |     std::vector<std::pair<uint32_t, uint32_t>> breaking_points_;
123 |     std::vector<std::pair<uint32_t, uint32_t>> dual_breaking_points_;
124 | };
125 | 
126 | }
127 | 


--------------------------------------------------------------------------------
/src/cuda/cudaaligner.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file cudaaligner.cpp
  3 |  *
  4 |  * @brief CUDABatchAligner class source file
  5 |  */
  6 | 
  7 | #include <claraparabricks/genomeworks/utils/cudautils.hpp>
  8 | 
  9 | #include "cudaaligner.hpp"
 10 | 
 11 | namespace racon {
 12 | 
 13 | using namespace claraparabricks::genomeworks::cudaaligner;
 14 | 
 15 | std::atomic<uint32_t> CUDABatchAligner::batches;
 16 | 
 17 | std::unique_ptr<CUDABatchAligner> createCUDABatchAligner(uint32_t max_bandwidth,
 18 |                                                          uint32_t device_id,
 19 |                                                          int64_t max_gpu_memory)
 20 | {
 21 |     return std::unique_ptr<CUDABatchAligner>(new CUDABatchAligner(max_bandwidth,
 22 |                                                                   device_id,
 23 |                                                                   max_gpu_memory));
 24 | }
 25 | 
 26 | CUDABatchAligner::CUDABatchAligner(uint32_t max_bandwidth,
 27 |                                    uint32_t device_id,
 28 |                                    int64_t max_gpu_memory)
 29 |     : overlaps_()
 30 |     , stream_(0)
 31 | {
 32 |     bid_ = CUDABatchAligner::batches++;
 33 | 
 34 |     GW_CU_CHECK_ERR(cudaSetDevice(device_id));
 35 | 
 36 |     GW_CU_CHECK_ERR(cudaStreamCreate(&stream_));
 37 | 
 38 |     aligner_ = create_aligner(AlignmentType::global_alignment,
 39 |                               max_bandwidth,
 40 |                               stream_,
 41 |                               device_id,
 42 |                               max_gpu_memory);
 43 | }
 44 | 
 45 | CUDABatchAligner::~CUDABatchAligner()
 46 | {
 47 |     aligner_.reset();
 48 |     GW_CU_CHECK_ERR(cudaStreamDestroy(stream_));
 49 | }
 50 | 
 51 | bool CUDABatchAligner::addOverlap(Overlap* overlap, std::vector<std::unique_ptr<Sequence>>& sequences)
 52 | {
 53 |     const char* q = !overlap->strand_ ? &(sequences[overlap->q_id_]->data()[overlap->q_begin_]) :
 54 |         &(sequences[overlap->q_id_]->reverse_complement()[overlap->q_length_ - overlap->q_end_]);
 55 |     int32_t q_len = overlap->q_end_ - overlap->q_begin_;
 56 |     const char* t = &(sequences[overlap->t_id_]->data()[overlap->t_begin_]);
 57 |     int32_t t_len = overlap->t_end_ - overlap->t_begin_;
 58 | 
 59 |     // NOTE: The cudaaligner API for adding alignments is the opposite of edlib. Hence, what is
 60 |     // treated as target in edlib is query in cudaaligner and vice versa.
 61 |     StatusType s = aligner_->add_alignment(t, t_len,
 62 |                                                                        q, q_len);
 63 |     if (s == StatusType::exceeded_max_alignments)
 64 |     {
 65 |         return false;
 66 |     }
 67 |     else if (s == StatusType::exceeded_max_alignment_difference
 68 |              || s == StatusType::exceeded_max_length)
 69 |     {
 70 |         // Do nothing as this case will be handled by CPU aligner.
 71 |     }
 72 |     else if (s != StatusType::success)
 73 |     {
 74 |         fprintf(stderr, "Unknown error in cuda aligner!\n");
 75 |     }
 76 |     else
 77 |     {
 78 |         overlaps_.push_back(overlap);
 79 |     }
 80 |     return true;
 81 | }
 82 | 
 83 | void CUDABatchAligner::alignAll()
 84 | {
 85 |     aligner_->align_all();
 86 | }
 87 | 
 88 | void CUDABatchAligner::generate_cigar_strings()
 89 | {
 90 |     aligner_->sync_alignments();
 91 | 
 92 |     const std::vector<std::shared_ptr<Alignment>>& alignments = aligner_->get_alignments();
 93 |     // Number of alignments should be the same as number of overlaps.
 94 |     if (overlaps_.size() != alignments.size())
 95 |     {
 96 |         throw std::runtime_error("Number of alignments doesn't match number of overlaps in cudaaligner.");
 97 |     }
 98 |     for(std::size_t a = 0; a < alignments.size(); a++)
 99 |     {
100 |         overlaps_[a]->cigar_ = alignments[a]->convert_to_cigar();
101 |     }
102 | }
103 | 
104 | void CUDABatchAligner::reset()
105 | {
106 |     overlaps_.clear();
107 |     cpu_overlap_data_.clear();
108 |     aligner_->reset();
109 | }
110 | 
111 | }
112 | 


--------------------------------------------------------------------------------
/src/cuda/cudabatch.hpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 | * @file cudabatch.hpp
  3 |  *
  4 |  * @brief CUDA batch class header file
  5 |  */
  6 | 
  7 | #pragma once
  8 | 
  9 | #include <memory>
 10 | #include <cuda_runtime_api.h>
 11 | #include <atomic>
 12 | 
 13 | #include "window.hpp"
 14 | #include <claraparabricks/genomeworks/cudapoa/batch.hpp>
 15 | 
 16 | namespace spoa {
 17 |     class AlignmentEngine;
 18 | }
 19 | 
 20 | namespace racon {
 21 | 
 22 | class Window;
 23 | 
 24 | class CUDABatchProcessor;
 25 | std::unique_ptr<CUDABatchProcessor> createCUDABatch(uint32_t max_window_depth, uint32_t device, size_t avail_mem, int8_t gap, int8_t mismatch, int8_t match, bool cuda_banded_alignment);
 26 | 
 27 | class CUDABatchProcessor
 28 | {
 29 | public:
 30 |     ~CUDABatchProcessor();
 31 | 
 32 |     /**
 33 |      * @brief Add a new window to the batch.
 34 |      *
 35 |      * @param[in] window : The window to add to the batch.
 36 |      *
 37 |      * @return True of window could be added to the batch.
 38 |      */
 39 |     bool addWindow(std::shared_ptr<Window> window);
 40 | 
 41 |     /**
 42 |      * @brief Checks if batch has any windows to process.
 43 |      */
 44 |     bool hasWindows() const;
 45 | 
 46 |     /**
 47 |      * @brief Runs the core computation to generate consensus for
 48 |      *        all windows in the batch.
 49 |      *
 50 |      * @return Vector of bool indicating succesful generation of consensus
 51 |      *         for each window in the batch.
 52 |      */
 53 |     const std::vector<bool>& generateConsensus();
 54 | 
 55 |     /**
 56 |      * @brief Resets the state of the object, which includes
 57 |      *        resetting buffer states and counters.
 58 |      */
 59 |     void reset();
 60 | 
 61 |     /**
 62 |      * @brief Get batch ID.
 63 |      */
 64 |     uint32_t getBatchID() const { return bid_; }
 65 | 
 66 |     // Builder function to create a new CUDABatchProcessor object.
 67 |     friend std::unique_ptr<CUDABatchProcessor>
 68 |     createCUDABatch(uint32_t max_window_depth, uint32_t device, size_t avail_mem, int8_t gap, int8_t mismatch, int8_t match, bool cuda_banded_alignment);
 69 | 
 70 | protected:
 71 |     /**
 72 |      * @brief Constructor for CUDABatch class.
 73 |      *
 74 |      * @param[in] max_window_depth : Maximum number of sequences per window
 75 |      * @param[in] cuda_banded_alignment : Use banded POA alignment
 76 |      */
 77 |     CUDABatchProcessor(uint32_t max_window_depth, uint32_t device, size_t avail_mem, int8_t gap, int8_t mismatch, int8_t match, bool cuda_banded_alignment);
 78 |     CUDABatchProcessor(const CUDABatchProcessor&) = delete;
 79 |     const CUDABatchProcessor& operator=(const CUDABatchProcessor&) = delete;
 80 | 
 81 |     /*
 82 |      * @brief Run the CUDA kernel for generating POA on the batch.
 83 |      *        This call is asynchronous.
 84 |      */
 85 |     void generatePOA();
 86 | 
 87 |     /*
 88 |      * @brief Wait for execution to complete and grab the output
 89 |      *        consensus from the device.
 90 |      */
 91 |     void getConsensus();
 92 | 
 93 |     /*
 94 |      * @brief Convert PHRED quality scores to weights.
 95 |      *
 96 |      */
 97 |     void convertPhredQualityToWeights(const char* qual,
 98 |                                       uint32_t qual_length,
 99 |                                       std::vector<int8_t>& weights);
100 | 
101 | protected:
102 |     // Static batch count used to generate batch IDs.
103 |     static std::atomic<uint32_t> batches;
104 | 
105 |     // Batch ID.
106 |     uint32_t bid_ = 0;
107 | 
108 |     // CUDA-POA library object that manages POA batch.
109 |     std::unique_ptr<claraparabricks::genomeworks::cudapoa::Batch> cudapoa_batch_;
110 | 
111 |     // Stream for running POA batch.
112 |     cudaStream_t stream_;
113 |     // Windows belonging to the batch.
114 |     std::vector<std::shared_ptr<Window>> windows_;
115 | 
116 |     // Consensus generation status for each window.
117 |     std::vector<bool> window_consensus_status_;
118 | 
119 |     // Number of sequences actually added per window.
120 |     std::vector<uint32_t> seqs_added_per_window_;
121 | 
122 | };
123 | 
124 | } // namespace racon
125 | 


--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
  1 | project(
  2 |   'Racon',
  3 |   'cpp',
  4 |   version : '1.4.13',
  5 |   default_options : [
  6 |     'buildtype=release',
  7 |     'warning_level=3',
  8 |     'cpp_std=c++11'],
  9 |   license : 'MIT',
 10 |   meson_version : '>= 0.48')
 11 | 
 12 | cpp = meson.get_compiler('cpp')
 13 | 
 14 | opt_compile_with_tests = get_option('tests')
 15 | 
 16 | ############
 17 | # CXXFLAGS #
 18 | ############
 19 | 
 20 | racon_warning_flags = []
 21 | racon_cpp_flags = []
 22 | 
 23 | ################
 24 | # Dependencies #
 25 | ################
 26 | 
 27 | # Threads.
 28 | racon_thread_dep = dependency('threads', required : true)
 29 | 
 30 | # Zlib.
 31 | racon_zlib_dep = dependency('zlib', required: true, version : '>= 1.2.11', fallback : ['zlib', 'zlib_dep'])
 32 | 
 33 | # Google test.
 34 | if (not meson.is_subproject()) and opt_compile_with_tests
 35 |     gtest_dep = dependency('gtest', main : true, required : false)
 36 |     if not gtest_dep.found()
 37 |         gtest_proj = subproject('gtest')
 38 |         gtest_inc = gtest_proj.get_variable('gtest_incdir')
 39 |         gtest_lib = static_library('gtest', gtest_proj.get_variable('gtest_libsources'),
 40 |                                     gtest_proj.get_variable('gtest_mainsources'),
 41 |                                     include_directories : gtest_inc)
 42 | 
 43 |         gtest_dep = declare_dependency(include_directories : gtest_inc,
 44 |                                    link_with : gtest_lib, dependencies: racon_thread_dep)
 45 |     endif
 46 | endif
 47 | 
 48 | #######################
 49 | # Configuring headers #
 50 | #######################
 51 | racon_version_commit = 'unknown'
 52 | git_command = find_program('git', required: false)
 53 | if git_command.found()
 54 |     git_run = run_command('git', ['log', '-1', '--pretty=%h'])
 55 |     if git_run.returncode() == 0
 56 |         racon_version_commit = git_run.stdout().strip()
 57 |     endif
 58 | endif
 59 | 
 60 | racon_version_h_config = configuration_data()
 61 | racon_version = meson.project_version()
 62 | racon_version_split = meson.project_version().split('.')
 63 | racon_version_h_config.set('RACON_VERSION_MAJOR', racon_version_split[0])
 64 | racon_version_h_config.set('RACON_VERSION_MINOR', racon_version_split[1])
 65 | racon_version_h_config.set('RACON_VERSION_PATCH', racon_version_split[2])
 66 | racon_version_h_config.set('RACON_VERSION_COMMIT', racon_version_commit)
 67 | 
 68 | racon_version_h = configure_file(
 69 |   input : files('src/version.hpp.in'),
 70 |   output : 'version.hpp',
 71 |   configuration : racon_version_h_config)
 72 | 
 73 | racon_cpp_flags += ['-DRACON_VERSION="' + meson.project_version() + '-' + racon_version_commit + '"']
 74 | 
 75 | ###########
 76 | # Headers #
 77 | ###########
 78 | 
 79 | racon_include_directories = [include_directories('src'), include_directories('test')]
 80 | 
 81 | ######################
 82 | # Sources + codebase #
 83 | ######################
 84 | 
 85 | subdir('vendor')
 86 | subdir('src')
 87 | 
 88 | if (not meson.is_subproject()) and opt_compile_with_tests
 89 |   subdir('test')
 90 | endif
 91 | 
 92 | 
 93 | all_sources = racon_cpp_sources + vendor_cpp_sources
 94 | 
 95 | ######################
 96 | # The Racon exe.    #
 97 | ######################
 98 | 
 99 | racon_dep = declare_dependency(
100 |     include_directories: vendor_include_directories + racon_include_directories,
101 |     link_with: [racon_lib, vendor_lib],
102 |     dependencies: [racon_thread_dep, racon_zlib_dep],
103 |     version: meson.project_version(),
104 |     compile_args: racon_warning_flags + racon_cpp_flags)
105 | 
106 | if not meson.is_subproject()
107 |   racon_bin = executable(
108 |     'racon',
109 |     ['src/main.cpp'],
110 |     install : true,
111 |     dependencies : [racon_thread_dep, racon_zlib_dep],
112 |     include_directories : vendor_include_directories + racon_include_directories,
113 |     link_with : [racon_lib],
114 |     cpp_args : [racon_warning_flags, racon_cpp_flags])
115 | 
116 |   ######################
117 |   # Tests              #
118 |   ######################
119 |   if opt_compile_with_tests
120 |       if gtest_dep.found()
121 |           tests_bin = executable(
122 |               'racon_test',
123 |               racon_test_cpp_sources,
124 |               dependencies : [racon_thread_dep, racon_zlib_dep, gtest_dep],
125 |               include_directories : racon_include_directories + vendor_include_directories + racon_test_include_directories,
126 |               link_with : [racon_lib, vendor_lib],
127 |               cpp_args : [racon_warning_flags, racon_cpp_flags, racon_test_extra_flags])
128 |       endif
129 |   endif
130 | 
131 | endif
132 | 


--------------------------------------------------------------------------------
/src/window.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file window.cpp
  3 |  *
  4 |  * @brief Window class source file
  5 |  */
  6 | 
  7 | #include <algorithm>
  8 | 
  9 | #include "window.hpp"
 10 | 
 11 | #include "spoa/spoa.hpp"
 12 | 
 13 | namespace racon {
 14 | 
 15 | std::shared_ptr<Window> createWindow(uint64_t id, uint32_t rank, WindowType type,
 16 |     const char* backbone, uint32_t backbone_length, const char* quality,
 17 |     uint32_t quality_length) {
 18 | 
 19 |     if (backbone_length == 0 || backbone_length != quality_length) {
 20 |         fprintf(stderr, "[racon::createWindow] error: "
 21 |             "empty backbone sequence/unequal quality length!\n");
 22 |         exit(1);
 23 |     }
 24 | 
 25 |     return std::shared_ptr<Window>(new Window(id, rank, type, backbone,
 26 |         backbone_length, quality, quality_length));
 27 | }
 28 | 
 29 | Window::Window(uint64_t id, uint32_t rank, WindowType type, const char* backbone,
 30 |     uint32_t backbone_length, const char* quality, uint32_t quality_length)
 31 |         : id_(id), rank_(rank), type_(type), consensus_(), sequences_(),
 32 |         qualities_(), positions_() {
 33 | 
 34 |     sequences_.emplace_back(backbone, backbone_length);
 35 |     qualities_.emplace_back(quality, quality_length);
 36 |     positions_.emplace_back(0, 0);
 37 | }
 38 | 
 39 | Window::~Window() {
 40 | }
 41 | 
 42 | void Window::add_layer(const char* sequence, uint32_t sequence_length,
 43 |     const char* quality, uint32_t quality_length, uint32_t begin, uint32_t end) {
 44 | 
 45 |     if (sequence_length == 0 || begin == end) {
 46 |         return;
 47 |     }
 48 | 
 49 |     if (quality != nullptr && sequence_length != quality_length) {
 50 |         fprintf(stderr, "[racon::Window::add_layer] error: "
 51 |             "unequal quality size!\n");
 52 |         exit(1);
 53 |     }
 54 |     if (begin >= end || begin > sequences_.front().second || end > sequences_.front().second) {
 55 |         fprintf(stderr, "[racon::Window::add_layer] error: "
 56 |             "layer begin and end positions are invalid!\n");
 57 |         exit(1);
 58 |     }
 59 | 
 60 |     sequences_.emplace_back(sequence, sequence_length);
 61 |     qualities_.emplace_back(quality, quality_length);
 62 |     positions_.emplace_back(begin, end);
 63 | }
 64 | 
 65 | bool Window::generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment_engine,
 66 |     bool trim) {
 67 | 
 68 |     if (sequences_.size() < 3) {
 69 |         consensus_ = std::string(sequences_.front().first, sequences_.front().second);
 70 |         return false;
 71 |     }
 72 | 
 73 |     auto graph = spoa::createGraph();
 74 |     graph->add_alignment(spoa::Alignment(), sequences_.front().first,
 75 |         sequences_.front().second, qualities_.front().first,
 76 |         qualities_.front().second);
 77 | 
 78 |     std::vector<uint32_t> rank;
 79 |     rank.reserve(sequences_.size());
 80 |     for (uint32_t i = 0; i < sequences_.size(); ++i) {
 81 |         rank.emplace_back(i);
 82 |     }
 83 | 
 84 |     std::sort(rank.begin() + 1, rank.end(), [&](uint32_t lhs, uint32_t rhs) {
 85 |         return positions_[lhs].first < positions_[rhs].first; });
 86 | 
 87 |     uint32_t offset = 0.01 * sequences_.front().second;
 88 |     for (uint32_t j = 1; j < sequences_.size(); ++j) {
 89 |         uint32_t i = rank[j];
 90 | 
 91 |         spoa::Alignment alignment;
 92 |         if (positions_[i].first < offset && positions_[i].second >
 93 |             sequences_.front().second - offset) {
 94 |             alignment = alignment_engine->align(sequences_[i].first,
 95 |                 sequences_[i].second, graph);
 96 |         } else {
 97 |             std::vector<int32_t> mapping;
 98 |             auto subgraph = graph->subgraph(positions_[i].first,
 99 |                 positions_[i].second, mapping);
100 |             alignment = alignment_engine->align( sequences_[i].first,
101 |                 sequences_[i].second, subgraph);
102 |             subgraph->update_alignment(alignment, mapping);
103 |         }
104 | 
105 |         if (qualities_[i].first == nullptr) {
106 |             graph->add_alignment(alignment, sequences_[i].first,
107 |                 sequences_[i].second);
108 |         } else {
109 |             graph->add_alignment(alignment, sequences_[i].first,
110 |                 sequences_[i].second, qualities_[i].first,
111 |                 qualities_[i].second);
112 |         }
113 |     }
114 | 
115 |     std::vector<uint32_t> coverages;
116 |     consensus_ = graph->generate_consensus(coverages);
117 | 
118 |     if (type_ == WindowType::kTGS && trim) {
119 |         uint32_t average_coverage = (sequences_.size() - 1) / 2;
120 | 
121 |         int32_t begin = 0, end = consensus_.size() - 1;
122 |         for (; begin < static_cast<int32_t>(consensus_.size()); ++begin) {
123 |             if (coverages[begin] >= average_coverage) {
124 |                 break;
125 |             }
126 |         }
127 |         for (; end >= 0; --end) {
128 |             if (coverages[end] >= average_coverage) {
129 |                 break;
130 |             }
131 |         }
132 | 
133 |         if (begin >= end) {
134 |             fprintf(stderr, "[racon::Window::generate_consensus] warning: "
135 |                 "contig %lu might be chimeric in window %u!\n", id_, rank_);
136 |         } else {
137 |             consensus_ = consensus_.substr(begin, end - begin + 1);
138 |         }
139 |     }
140 | 
141 |     return true;
142 | }
143 | 
144 | }
145 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.2)
  2 | project(racon)
  3 | set(racon_version 1.4.17)
  4 | 
  5 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
  6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
  7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
  8 | 
  9 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic")
 10 | set(CMAKE_CXX_STANDARD 11)
 11 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 12 | set(CMAKE_CXX_EXTENSIONS OFF)
 13 | 
 14 | option(racon_build_tests "Build racon unit tests" OFF)
 15 | option(racon_build_wrapper "Build racon wrapper" OFF)
 16 | option(racon_enable_cuda "Build racon with NVIDIA CUDA support" OFF)
 17 | 
 18 | # Check CUDA compatibility.
 19 | if(racon_enable_cuda)
 20 |     find_package(CUDA 9.0 QUIET REQUIRED)
 21 |     if(NOT ${CUDA_FOUND})
 22 |         message(FATAL_ERROR "CUDA not detected on system. Please install")
 23 |     else()
 24 |         message(STATUS "Using CUDA ${CUDA_VERSION} from ${CUDA_TOOLKIT_ROOT_DIR}")
 25 |         set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo")
 26 |     endif()
 27 | endif()
 28 | 
 29 | include_directories(${PROJECT_SOURCE_DIR}/src)
 30 | 
 31 | set(racon_sources
 32 |     src/main.cpp
 33 |     src/logger.cpp
 34 |     src/polisher.cpp
 35 |     src/overlap.cpp
 36 |     src/sequence.cpp
 37 |     src/window.cpp)
 38 | 
 39 | if(racon_enable_cuda)
 40 |     list(APPEND racon_sources src/cuda/cudapolisher.cpp src/cuda/cudabatch.cpp src/cuda/cudaaligner.cpp)
 41 |     cuda_add_executable(racon ${racon_sources})
 42 |     target_compile_definitions(racon PRIVATE CUDA_ENABLED)
 43 | else()
 44 |     add_executable(racon ${racon_sources})
 45 | endif()
 46 | 
 47 | # Add version information to bibary.
 48 | target_compile_definitions(racon PRIVATE RACON_VERSION="v${racon_version}")
 49 | 
 50 | if (NOT TARGET bioparser)
 51 |     add_subdirectory(vendor/bioparser EXCLUDE_FROM_ALL)
 52 | endif()
 53 | if (NOT TARGET spoa)
 54 |     add_subdirectory(vendor/spoa EXCLUDE_FROM_ALL)
 55 | endif()
 56 | if (NOT TARGET thread_pool)
 57 |     add_subdirectory(vendor/thread_pool EXCLUDE_FROM_ALL)
 58 | endif()
 59 | if (NOT TARGET edlib)
 60 |     add_subdirectory(vendor/edlib EXCLUDE_FROM_ALL)
 61 | endif()
 62 | if (racon_enable_cuda)
 63 |     if (DEFINED CLARAGENOMICSANALYSIS_SDK_PATH)
 64 |         list(APPEND CMAKE_PREFIX_PATH "${CLARAGENOMICSANALYSIS_SDK_PATH}/cmake")
 65 |         find_package(cudapoa REQUIRED)
 66 |         find_package(cudaaligner REQUIRED)
 67 |     elseif (DEFINED CLARAGENOMICSANALYSIS_SRC_PATH)
 68 |         if (NOT TARGET cudapoa)
 69 |             add_subdirectory(${CLARAGENOMICSANALYSIS_SRC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
 70 |         endif()
 71 |         if (NOT TARGET cudaaligner)
 72 |             add_subdirectory(${CLARAGENOMICSANALYSIS_SRC_PATH} ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
 73 |         endif()
 74 |     elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/vendor/GenomeWorks)
 75 |         if (NOT TARGET cudapoa)
 76 |             add_subdirectory(vendor/GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
 77 |         endif()
 78 |         if (NOT TARGET cudaaligner)
 79 |             add_subdirectory(vendor/GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
 80 |         endif()
 81 |     else()
 82 |         if (NOT TARGET cudapoa)
 83 |             add_subdirectory(../GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
 84 |         endif()
 85 |         if (NOT TARGET cudaaligner)
 86 |             add_subdirectory(../GenomeWorks ${CMAKE_CURRENT_BINARY_DIR}/GenomeWorks EXCLUDE_FROM_ALL)
 87 |         endif()
 88 |     endif()
 89 | endif()
 90 | 
 91 | target_link_libraries(racon bioparser spoa thread_pool edlib_static)
 92 | if (racon_enable_cuda)
 93 |     target_link_libraries(racon cudapoa cudaaligner)
 94 | endif()
 95 | 
 96 | install(TARGETS racon DESTINATION bin)
 97 | 
 98 | if (racon_build_tests)
 99 |     set(racon_test_data_path ${PROJECT_SOURCE_DIR}/test/data/)
100 |     configure_file("${PROJECT_SOURCE_DIR}/test/racon_test_config.h.in"
101 |         "${PROJECT_BINARY_DIR}/config/racon_test_config.h")
102 |     include_directories(${PROJECT_BINARY_DIR}/config)
103 |     include_directories(${PROJECT_SOURCE_DIR}/src)
104 | 
105 |     set(racon_test_sources
106 |         test/racon_test.cpp
107 |         src/logger.cpp
108 |         src/polisher.cpp
109 |         src/overlap.cpp
110 |         src/sequence.cpp
111 |         src/window.cpp)
112 | 
113 |     if (racon_enable_cuda)
114 |         list(APPEND racon_test_sources src/cuda/cudapolisher.cpp src/cuda/cudabatch.cpp src/cuda/cudaaligner.cpp)
115 |         cuda_add_executable(racon_test ${racon_test_sources})
116 |         target_compile_definitions(racon_test PRIVATE CUDA_ENABLED)
117 |     else()
118 |         add_executable(racon_test ${racon_test_sources})
119 |     endif()
120 | 
121 |     if (NOT TARGET gtest_main)
122 |         add_subdirectory(vendor/googletest/googletest EXCLUDE_FROM_ALL)
123 |     endif()
124 | 
125 |     target_link_libraries(racon_test bioparser spoa thread_pool edlib_static gtest_main)
126 |     if (racon_enable_cuda)
127 |         target_link_libraries(racon_test cudapoa cudaaligner)
128 |     endif()
129 | endif()
130 | 
131 | if (racon_build_wrapper)
132 |     set(racon_path ${PROJECT_BINARY_DIR}/bin/racon)
133 |     set(rampler_path ${PROJECT_BINARY_DIR}/vendor/rampler/bin/rampler)
134 |     configure_file(${PROJECT_SOURCE_DIR}/scripts/racon_wrapper.py
135 |         ${PROJECT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/racon_wrapper)
136 |     file(COPY ${PROJECT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/racon_wrapper
137 |         DESTINATION ${PROJECT_BINARY_DIR}/bin
138 |         FILE_PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
139 |         WORLD_READ WORLD_EXECUTE)
140 | 
141 |     if (NOT TARGET rampler)
142 |         add_subdirectory(vendor/rampler)
143 |     endif()
144 | endif()
145 | 
146 | # Add Debian packaging
147 | SET(CPACK_GENERATOR "DEB")
148 | SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "Robert Vaser")
149 | set(CPACK_PACKAGE_VERSION "${racon_version}")
150 | include(CPack)
151 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Racon
  2 | 
  3 | [![Latest GitHub release](https://img.shields.io/github/release/lbcb-sci/racon.svg)](https://github.com/lbcb-sci/racon/releases/latest)
  4 | [![Build status for gcc/clang](https://travis-ci.org/lbcb-sci/racon.svg?branch=master)](https://travis-ci.org/lbcb-sci/racon)
  5 | [![Published in Genome Research](https://img.shields.io/badge/published%20in-Genome%20Research-blue.svg)](https://doi.org/10.1101/gr.214270.116)
  6 | 
  7 | Consensus module for raw de novo DNA assembly of long uncorrected reads.
  8 | 
  9 | ## Description
 10 | Racon is intended as a standalone consensus module to correct raw contigs generated by rapid assembly methods which do not include a consensus step. The goal of Racon is to generate genomic consensus which is of similar or better quality compared to the output generated by assembly methods which employ both error correction and consensus steps, while providing a speedup of several times compared to those methods. It supports data produced by both Pacific Biosciences and Oxford Nanopore Technologies.
 11 | 
 12 | Racon can be used as a polishing tool after the assembly with **either Illumina data or data produced by third generation of sequencing**. The type of data inputed is automatically detected.
 13 | 
 14 | Racon takes as input only three files: contigs in FASTA/FASTQ format, reads in FASTA/FASTQ format and overlaps/alignments between the reads and the contigs in MHAP/PAF/SAM format. Output is a set of polished contigs in FASTA format printed to stdout. All input files **can be compressed with gzip** (which will have impact on parsing time).
 15 | 
 16 | Racon can also be used as a read error-correction tool. In this scenario, the MHAP/PAF/SAM file needs to contain pairwise overlaps between reads **including dual overlaps**.
 17 | 
 18 | A **wrapper script** is also available to enable easier usage to the end-user for large datasets. It has the same interface as racon but adds two additional features from the outside. Sequences can be **subsampled** to decrease the total execution time (accuracy might be lower) while target sequences can be **split** into smaller chunks and run sequentially to decrease memory consumption. Both features can be run at the same time as well.
 19 | 
 20 | ## Dependencies
 21 | 1. gcc 4.8+ or clang 3.4+
 22 | 2. cmake 3.2+
 23 | 
 24 | ### CUDA Support
 25 | 1. gcc 5.0+
 26 | 2. cmake 3.10+
 27 | 4. CUDA 9.0+
 28 | 
 29 | ## Installation
 30 | To install Racon run the following commands:
 31 | 
 32 | ```bash
 33 | git clone --recursive https://github.com/clara-parabricks/racon-gpu.git
 34 | cd racon-gpu
 35 | mkdir build
 36 | cd build
 37 | cmake -DCMAKE_BUILD_TYPE=Release ..
 38 | make
 39 | ```
 40 | 
 41 | After successful installation, an executable named `racon` will appear in `build/bin`.
 42 | 
 43 | Optionally, you can run `sudo make install` to install racon executable to your machine.
 44 | 
 45 | ***Note***: if you omitted `--recursive` from `git clone`, run `git submodule update --init --recursive` before proceeding with compilation.
 46 | 
 47 | To build unit tests add `-Dracon_build_tests=ON` while running `cmake`. After installation, an executable named `racon_test` will be created in `build/bin`.
 48 | 
 49 | To build the wrapper script add `-Dracon_build_wrapper=ON` while running `cmake`. After installation, an executable named `racon_wrapper` (python script) will be created in `build/bin`.
 50 | 
 51 | ### CUDA Support
 52 | Racon makes use of [NVIDIA's GenomeWorks SDK](https://github.com/clara-parabricks/GenomeWorks) for CUDA accelerated polishing and alignment.
 53 | 
 54 | To build `racon` with CUDA support, add `-Dracon_enable_cuda=ON` while running `cmake`. If CUDA support is unavailable, the `cmake` step will error out.
 55 | Note that the CUDA support flag does not produce a new binary target. Instead it augments the existing `racon` binary itself.
 56 | 
 57 | ```bash
 58 | cd build
 59 | cmake -DCMAKE_BUILD_TYPE=Release -Dracon_enable_cuda=ON ..
 60 | make
 61 | ```
 62 | 
 63 | ***Note***: Short read polishing with CUDA is still in development!
 64 | 
 65 | ### Packaging
 66 | To generate a Debian package for `racon`, run the following command from the build folder -
 67 | 
 68 | ```bash
 69 | make package
 70 | ```
 71 | 
 72 | ## Usage
 73 | Usage of `racon` is as following:
 74 | 
 75 |     racon [options ...] <sequences> <overlaps> <target sequences>
 76 | 
 77 |         # default output is stdout
 78 |         <sequences>
 79 |             input file in FASTA/FASTQ format (can be compressed with gzip)
 80 |             containing sequences used for correction
 81 |         <overlaps>
 82 |             input file in MHAP/PAF/SAM format (can be compressed with gzip)
 83 |             containing overlaps between sequences and target sequences
 84 |         <target sequences>
 85 |             input file in FASTA/FASTQ format (can be compressed with gzip)
 86 |             containing sequences which will be corrected
 87 | 
 88 |     options:
 89 |         -u, --include-unpolished
 90 |             output unpolished target sequences
 91 |         -f, --fragment-correction
 92 |             perform fragment correction instead of contig polishing (overlaps
 93 |             file should contain dual/self overlaps!)
 94 |         -w, --window-length <int>
 95 |             default: 500
 96 |             size of window on which POA is performed
 97 |         -q, --quality-threshold <float>
 98 |             default: 10.0
 99 |             threshold for average base quality of windows used in POA
100 |         -e, --error-threshold <float>
101 |             default: 0.3
102 |             maximum allowed error rate used for filtering overlaps
103 |         --no-trimming
104 |             disables consensus trimming at window ends
105 |         -m, --match <int>
106 |             default: 3
107 |             score for matching bases
108 |         -x, --mismatch <int>
109 |             default: -5
110 |             score for mismatching bases
111 |         -g, --gap <int>
112 |             default: -4
113 |             gap penalty (must be negative)
114 |         -t, --threads <int>
115 |             default: 1
116 |             number of threads
117 |         --version
118 |             prints the version number
119 |         -h, --help
120 |             prints the usage
121 | 
122 |     only available when built with CUDA:
123 |         -c, --cudapoa-batches <int>
124 |             default: 0
125 |             number of batches for CUDA accelerated polishing per GPU
126 |         -b, --cuda-banded-alignment
127 |             use banding approximation for polishing on GPU. Only applicable when -c is used.
128 |         --cudaaligner-batches <int>
129 |             default: 0
130 |             number of batches for CUDA accelerated alignment per GPU
131 |         --cudaaligner-band-width <int>
132 |             default: 0
133 |             Band width for cuda alignment. Must be >= 0. Non-zero allows user defined
134 |             band width, whereas 0 implies auto band width determination.
135 | 
136 | `racon_test` is run without any parameters.
137 | 
138 | Usage of `racon_wrapper` equals the one of `racon` with two additional parameters:
139 | 
140 |     ...
141 |     options:
142 |         --split <int>
143 |             split target sequences into chunks of desired size in bytes
144 |         --subsample <int> <int>
145 |             subsample sequences to desired coverage (2nd argument) given the
146 |             reference length (1st argument)
147 |         ...
148 | 
149 | ## Contact information
150 | 
151 | For additional information, help and bug reports please send an email to one of the following: ivan.sovic@irb.hr, robert.vaser@fer.hr, mile.sikic@fer.hr, nagarajann@gis.a-star.edu.sg
152 | 
153 | ## Acknowledgment
154 | 
155 | This work has been supported in part by Croatian Science Foundation under the project UIP-11-2013-7353. IS is supported in part by the Croatian Academy of Sciences and Arts under the project "Methods for alignment and assembly of DNA sequences using nanopore sequencing data". NN is supported by funding from A*STAR, Singapore.
156 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <stdint.h>
  4 | #include <getopt.h>
  5 | 
  6 | #include <string>
  7 | #include <vector>
  8 | 
  9 | #include "sequence.hpp"
 10 | #include "polisher.hpp"
 11 | #ifdef CUDA_ENABLED
 12 | #include "cuda/cudapolisher.hpp"
 13 | #endif
 14 | 
 15 | #ifndef RACON_VERSION
 16 | #error "Undefined version for Racon. Please pass version using -DRACON_VERSION macro."
 17 | #endif
 18 | 
 19 | static const char* version = RACON_VERSION;
 20 | static const int32_t CUDAALIGNER_INPUT_CODE = 10000;
 21 | static const int32_t CUDAALIGNER_BAND_WIDTH_INPUT_CODE = 10001;
 22 | 
 23 | static struct option options[] = {
 24 |     {"include-unpolished", no_argument, 0, 'u'},
 25 |     {"fragment-correction", no_argument, 0, 'f'},
 26 |     {"window-length", required_argument, 0, 'w'},
 27 |     {"quality-threshold", required_argument, 0, 'q'},
 28 |     {"error-threshold", required_argument, 0, 'e'},
 29 |     {"no-trimming", no_argument, 0, 'T'},
 30 |     {"match", required_argument, 0, 'm'},
 31 |     {"mismatch", required_argument, 0, 'x'},
 32 |     {"gap", required_argument, 0, 'g'},
 33 |     {"threads", required_argument, 0, 't'},
 34 |     {"version", no_argument, 0, 'v'},
 35 |     {"help", no_argument, 0, 'h'},
 36 | #ifdef CUDA_ENABLED
 37 |     {"cudapoa-batches", optional_argument, 0, 'c'},
 38 |     {"cuda-banded-alignment", no_argument, 0, 'b'},
 39 |     {"cudaaligner-batches", required_argument, 0, CUDAALIGNER_INPUT_CODE},
 40 |     {"cudaaligner-band-width", required_argument, 0, CUDAALIGNER_BAND_WIDTH_INPUT_CODE},
 41 | #endif
 42 |     {0, 0, 0, 0}
 43 | };
 44 | 
 45 | void help();
 46 | 
 47 | int main(int argc, char** argv) {
 48 | 
 49 |     std::vector<std::string> input_paths;
 50 | 
 51 |     uint32_t window_length = 500;
 52 |     double quality_threshold = 10.0;
 53 |     double error_threshold = 0.3;
 54 |     bool trim = true;
 55 | 
 56 |     int8_t match = 3;
 57 |     int8_t mismatch = -5;
 58 |     int8_t gap = -4;
 59 |     uint32_t type = 0;
 60 | 
 61 |     bool drop_unpolished_sequences = true;
 62 |     uint32_t num_threads = 1;
 63 | 
 64 |     uint32_t cudapoa_batches = 0;
 65 |     uint32_t cudaaligner_batches = 0;
 66 |     uint32_t cudaaligner_band_width = 0;
 67 |     bool cuda_banded_alignment = false;
 68 | 
 69 |     std::string optstring = "ufw:q:e:m:x:g:t:h";
 70 | #ifdef CUDA_ENABLED
 71 |     optstring += "bc::";
 72 | #endif
 73 | 
 74 |     int32_t argument;
 75 |     while ((argument = getopt_long(argc, argv, optstring.c_str(), options, nullptr)) != -1) {
 76 |         switch (argument) {
 77 |             case 'u':
 78 |                 drop_unpolished_sequences = false;
 79 |                 break;
 80 |             case 'f':
 81 |                 type = 1;
 82 |                 break;
 83 |             case 'w':
 84 |                 window_length = atoi(optarg);
 85 |                 break;
 86 |             case 'q':
 87 |                 quality_threshold = atof(optarg);
 88 |                 break;
 89 |             case 'e':
 90 |                 error_threshold = atof(optarg);
 91 |                 break;
 92 |             case 'T':
 93 |                 trim = false;
 94 |                 break;
 95 |             case 'm':
 96 |                 match = atoi(optarg);
 97 |                 break;
 98 |             case 'x':
 99 |                 mismatch = atoi(optarg);
100 |                 break;
101 |             case 'g':
102 |                 gap = atoi(optarg);
103 |                 break;
104 |             case 't':
105 |                 num_threads = atoi(optarg);
106 |                 break;
107 |             case 'v':
108 |                 printf("%s\n", version);
109 |                 exit(0);
110 |             case 'h':
111 |                 help();
112 |                 exit(0);
113 | #ifdef CUDA_ENABLED
114 |             case 'c':
115 |                 //if option c encountered, cudapoa_batches initialized with a default value of 1.
116 |                 cudapoa_batches = 1;
117 |                 // next text entry is not an option, assuming it's the arg for option 'c'
118 |                 if (optarg == NULL && argv[optind] != NULL
119 |                     && argv[optind][0] != '-') {
120 |                     cudapoa_batches = atoi(argv[optind++]);
121 |                 }
122 |                 // optional argument provided in the ususal way
123 |                 if (optarg != NULL) {
124 |                     cudapoa_batches = atoi(optarg);
125 |                 }
126 |                 break;
127 |             case 'b':
128 |                 cuda_banded_alignment = true;
129 |                 break;
130 |             case CUDAALIGNER_INPUT_CODE: // cudaaligner-batches
131 |                 cudaaligner_batches = atoi(optarg);
132 |                 break;
133 |             case CUDAALIGNER_BAND_WIDTH_INPUT_CODE: // cudaaligner-band-width
134 |                 cudaaligner_band_width = atoi(optarg);
135 |                 break;
136 | #endif
137 |             default:
138 |                 exit(1);
139 |         }
140 |     }
141 | 
142 |     for (int32_t i = optind; i < argc; ++i) {
143 |         input_paths.emplace_back(argv[i]);
144 |     }
145 | 
146 |     if (input_paths.size() < 3) {
147 |         fprintf(stderr, "[racon::] error: missing input file(s)!\n");
148 |         help();
149 |         exit(1);
150 |     }
151 | 
152 |     auto polisher = racon::createPolisher(input_paths[0], input_paths[1],
153 |         input_paths[2], type == 0 ? racon::PolisherType::kC :
154 |         racon::PolisherType::kF, window_length, quality_threshold,
155 |         error_threshold, trim, match, mismatch, gap, num_threads,
156 |         cudapoa_batches, cuda_banded_alignment, cudaaligner_batches,
157 |         cudaaligner_band_width);
158 | 
159 |     polisher->initialize();
160 | 
161 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
162 |     polisher->polish(polished_sequences, drop_unpolished_sequences);
163 | 
164 |     for (const auto& it: polished_sequences) {
165 |         fprintf(stdout, ">%s\n%s\n", it->name().c_str(), it->data().c_str());
166 |     }
167 | 
168 |     return 0;
169 | }
170 | 
171 | void help() {
172 |     printf(
173 |         "usage: racon [options ...] <sequences> <overlaps> <target sequences>\n"
174 |         "\n"
175 |         "    #default output is stdout\n"
176 |         "    <sequences>\n"
177 |         "        input file in FASTA/FASTQ format (can be compressed with gzip)\n"
178 |         "        containing sequences used for correction\n"
179 |         "    <overlaps>\n"
180 |         "        input file in MHAP/PAF/SAM format (can be compressed with gzip)\n"
181 |         "        containing overlaps between sequences and target sequences\n"
182 |         "    <target sequences>\n"
183 |         "        input file in FASTA/FASTQ format (can be compressed with gzip)\n"
184 |         "        containing sequences which will be corrected\n"
185 |         "\n"
186 |         "    options:\n"
187 |         "        -u, --include-unpolished\n"
188 |         "            output unpolished target sequences\n"
189 |         "        -f, --fragment-correction\n"
190 |         "            perform fragment correction instead of contig polishing\n"
191 |         "            (overlaps file should contain dual/self overlaps!)\n"
192 |         "        -w, --window-length <int>\n"
193 |         "            default: 500\n"
194 |         "            size of window on which POA is performed\n"
195 |         "        -q, --quality-threshold <float>\n"
196 |         "            default: 10.0\n"
197 |         "            threshold for average base quality of windows used in POA\n"
198 |         "        -e, --error-threshold <float>\n"
199 |         "            default: 0.3\n"
200 |         "            maximum allowed error rate used for filtering overlaps\n"
201 |         "        --no-trimming\n"
202 |         "            disables consensus trimming at window ends\n"
203 |         "        -m, --match <int>\n"
204 |         "            default: 3\n"
205 |         "            score for matching bases\n"
206 |         "        -x, --mismatch <int>\n"
207 |         "            default: -5\n"
208 |         "            score for mismatching bases\n"
209 |         "        -g, --gap <int>\n"
210 |         "            default: -4\n"
211 |         "            gap penalty (must be negative)\n"
212 |         "        -t, --threads <int>\n"
213 |         "            default: 1\n"
214 |         "            number of threads\n"
215 |         "        --version\n"
216 |         "            prints the version number\n"
217 |         "        -h, --help\n"
218 |         "            prints the usage\n"
219 | #ifdef CUDA_ENABLED
220 |         "        -c, --cudapoa-batches <int>\n"
221 |         "            default: 0\n"
222 |         "            number of batches for CUDA accelerated polishing per GPU\n"
223 |         "        -b, --cuda-banded-alignment\n"
224 |         "            use banding approximation for alignment on GPU\n"
225 |         "        --cudaaligner-batches <int>\n"
226 |         "            default: 0\n"
227 |         "            number of batches for CUDA accelerated alignment per GPU\n"
228 |         "        --cudaaligner-band-width <int>\n"
229 |         "            default: 0\n"
230 |         "            Band width for cuda alignment. Must be >= 0. Non-zero allows user defined \n"
231 |         "            band width, whereas 0 implies auto band width determination.\n"
232 | #endif
233 |     );
234 | }
235 | 


--------------------------------------------------------------------------------
/scripts/racon_wrapper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import print_function
  4 | import os, sys, time, shutil, argparse, subprocess
  5 | 
  6 | def eprint(*args, **kwargs):
  7 |     print(*args, file=sys.stderr, **kwargs, flush=True)
  8 | 
  9 | #*******************************************************************************
 10 | 
 11 | class RaconWrapper:
 12 | 
 13 |     __racon = '@racon_path@'
 14 |     __rampler = '@rampler_path@'
 15 | 
 16 |     def __init__(self, sequences, overlaps, target_sequences, split, subsample,
 17 |         include_unpolished, fragment_correction, window_length, quality_threshold,
 18 |         error_threshold, match, mismatch, gap, threads,
 19 |         cudaaligner_batches, cudapoa_batches, cuda_banded_alignment):
 20 | 
 21 |         self.sequences = os.path.abspath(sequences)
 22 |         self.subsampled_sequences = None
 23 |         self.overlaps = os.path.abspath(overlaps)
 24 |         self.target_sequences = os.path.abspath(target_sequences)
 25 |         self.split_target_sequences = []
 26 |         self.chunk_size = split
 27 |         self.reference_length, self.coverage = subsample if subsample is not None\
 28 |             else (None, None)
 29 |         self.include_unpolished = include_unpolished
 30 |         self.fragment_correction = fragment_correction
 31 |         self.window_length = window_length
 32 |         self.quality_threshold = quality_threshold
 33 |         self.error_threshold = error_threshold
 34 |         self.match = match
 35 |         self.mismatch = mismatch
 36 |         self.gap = gap
 37 |         self.threads = threads
 38 |         # self.cudaaligner_batches = cudaaligner_batches
 39 |         # self.cudapoa_batches = cudapoa_batches
 40 |         # self.cuda_banded_alignment = cuda_banded_alignment
 41 |         self.work_directory = os.getcwd() + '/racon_work_directory_' + str(time.time())
 42 | 
 43 |     def __enter__(self):
 44 |         try:
 45 |             os.makedirs(self.work_directory)
 46 |         except OSError:
 47 |             if (not os.path.isdir(self.work_directory)):
 48 |                 eprint('[RaconWrapper::__enter__] error: unable to create work directory!')
 49 |                 sys.exit(1)
 50 | 
 51 |     def __exit__(self, exception_type, exception_value, traceback):
 52 |         try:
 53 |             shutil.rmtree(self.work_directory)
 54 |         except OSError:
 55 |             eprint('[RaconWrapper::__exit__] warning: unable to clean work directory!')
 56 | 
 57 |     def run(self):
 58 |         # run preprocess
 59 |         eprint('[RaconWrapper::run] preparing data with rampler')
 60 |         if (self.reference_length is not None and self.coverage is not None):
 61 |             try:
 62 |                 p = subprocess.Popen([RaconWrapper.__rampler, '-o', self.work_directory,
 63 |                     'subsample', self.sequences, self.reference_length, self.coverage])
 64 |             except OSError:
 65 |                 eprint('[RaconWrapper::run] error: unable to run rampler!')
 66 |                 sys.exit(1)
 67 |             p.communicate()
 68 |             if (p.returncode != 0):
 69 |                 sys.exit(1)
 70 | 
 71 |             base_name = os.path.basename(self.sequences).split('.')[0]
 72 |             extension = '.fasta' if (self.sequences.endswith('.fasta') or\
 73 |                 self.sequences.endswith('.fasta.gz') or\
 74 |                 self.sequences.endswith('.fa') or\
 75 |                 self.sequences.endswith('.fa.gz')) else\
 76 |                 '.fastq'
 77 |             self.subsampled_sequences = os.path.join(self.work_directory, base_name) +\
 78 |                 '_' + self.coverage + 'x' + extension
 79 |             if (not os.path.isfile(self.subsampled_sequences)):
 80 |                 eprint('[RaconWrapper::run] error: unable to find subsampled sequences!')
 81 |                 sys.exit(1)
 82 |         else:
 83 |             self.subsampled_sequences = self.sequences
 84 | 
 85 |         if (self.chunk_size is not None):
 86 |             try:
 87 |                 p = subprocess.Popen([RaconWrapper.__rampler, '-o', self.work_directory,
 88 |                     'split', self.target_sequences, self.chunk_size])
 89 |             except OSError:
 90 |                 eprint('[RaconWrapper::run] error: unable to run rampler!')
 91 |                 sys.exit(1)
 92 |             p.communicate()
 93 |             if (p.returncode != 0):
 94 |                 sys.exit(1)
 95 | 
 96 |             base_name = os.path.basename(self.target_sequences).split('.')[0]
 97 |             extension = '.fasta' if (self.target_sequences.endswith('.fasta') or\
 98 |                 self.target_sequences.endswith('.fasta.gz') or\
 99 |                 self.target_sequences.endswith('.fa') or\
100 |                 self.target_sequences.endswith('.fa.gz')) else\
101 |                 '.fastq'
102 | 
103 |             i = 0
104 |             while (True):
105 |                 target_sequences_part = os.path.join(self.work_directory, base_name) +\
106 |                     '_' + str(i) + extension
107 |                 if (not os.path.isfile(target_sequences_part)):
108 |                     break
109 |                 self.split_target_sequences.append(target_sequences_part)
110 |                 i += 1
111 |             eprint('[RaconWrapper::run] total number of splits: ' + str(i))
112 | 
113 |             if (len(self.split_target_sequences) == 0):
114 |                 eprint('[RaconWrapper::run] error: unable to find split target sequences!')
115 |                 sys.exit(1)
116 |         else:
117 |             self.split_target_sequences.append(self.target_sequences)
118 | 
119 |         racon_params = [RaconWrapper.__racon]
120 |         if (self.include_unpolished == True): racon_params.append('-u')
121 |         if (self.fragment_correction == True): racon_params.append('-f')
122 |         # if (self.cuda_banded_alignment == True): racon_params.append('-b')
123 |         racon_params.extend(['-w', str(self.window_length),
124 |             '-q', str(self.quality_threshold),
125 |             '-e', str(self.error_threshold),
126 |             '-m', str(self.match),
127 |             '-x', str(self.mismatch),
128 |             '-g', str(self.gap),
129 |             '-t', str(self.threads),
130 |             # '--cudaaligner-batches', str(self.cudaaligner_batches),
131 |             # '-c', str(self.cudapoa_batches),
132 |             self.subsampled_sequences, self.overlaps, ""])
133 | 
134 |         for target_sequences_part in self.split_target_sequences:
135 |             eprint('[RaconWrapper::run] processing data with racon')
136 |             racon_params[-1] = target_sequences_part
137 |             try:
138 |                 p = subprocess.Popen(racon_params)
139 |             except OSError:
140 |                 eprint('[RaconWrapper::run] error: unable to run racon!')
141 |                 sys.exit(1)
142 |             p.communicate()
143 |             if (p.returncode != 0):
144 |                 sys.exit(1)
145 | 
146 |         self.subsampled_sequences = None
147 |         self.split_target_sequences = []
148 | 
149 | #*******************************************************************************
150 | 
151 | if __name__ == '__main__':
152 | 
153 |     parser = argparse.ArgumentParser(description='''Racon_wrapper encapsulates
154 |         racon and adds two additional features from the outside to enable easier
155 |         usage to the end-user. Sequences can now be subsampled to decrease the
156 |         total execution time (accuracy might be lower) while target
157 |         sequences can be split into smaller chunks and run sequentially to
158 |         decrease memory consumption. Both features can be run at the same time
159 |         as well! The usage equals the one of racon.''',
160 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
161 |     parser.add_argument('sequences', help='''input file in FASTA/FASTQ format
162 |         (can be compressed with gzip) containing sequences used for correction''')
163 |     parser.add_argument('overlaps', help='''input file in MHAP/PAF/SAM format
164 |         (can be compressed with gzip) containing overlaps between sequences and
165 |         target sequences''')
166 |     parser.add_argument('target_sequences', help='''input file in FASTA/FASTQ
167 |         format (can be compressed with gzip) containing sequences which will be
168 |         corrected''')
169 |     parser.add_argument('--split', help='''split target sequences into chunks of
170 |         desired size in bytes''')
171 |     parser.add_argument('--subsample', nargs=2, help='''subsample sequences to
172 |         desired coverage (2nd argument) given the reference length (1st argument)''')
173 |     parser.add_argument('-u', '--include-unpolished', action='store_true',
174 |         help='''output unpolished target sequences''')
175 |     parser.add_argument('-f', '--fragment-correction', action='store_true',
176 |         help='''perform fragment correction instead of contig polishing
177 |         (overlaps file should contain dual/self overlaps!)''')
178 |     parser.add_argument('-w', '--window-length', default=500, help='''size of
179 |         window on which POA is performed''')
180 |     parser.add_argument('-q', '--quality-threshold', default=10.0,
181 |         help='''threshold for average base quality of windows used in POA''')
182 |     parser.add_argument('-e', '--error-threshold', default=0.3, help='''maximum
183 |         allowed error rate used for filtering overlaps''')
184 |     parser.add_argument('-m', '--match', default=5, help='''score for matching
185 |         bases''')
186 |     parser.add_argument('-x', '--mismatch', default=-4, help='''score for
187 |         mismatching bases''')
188 |     parser.add_argument('-g', '--gap', default=-8, help='''gap penalty (must be
189 |         negative)''')
190 |     parser.add_argument('-t', '--threads', default=1, help='''number of threads''')
191 |     parser.add_argument('--cudaaligner-batches', default=0, help='''number of batches for CUDA accelerated alignment''')
192 |     parser.add_argument('-c', '--cudapoa-batches', default=0, help='''number of batches for CUDA accelerated polishing''')
193 |     parser.add_argument('-b', '--cuda-banded-alignment', action='store_true', help='''use banding approximation for polishing on GPU. Only applicable when -c is used.''')
194 | 
195 |     args = parser.parse_args()
196 | 
197 |     racon = RaconWrapper(args.sequences, args.overlaps, args.target_sequences,
198 |         args.split, args.subsample, args.include_unpolished,
199 |         args.fragment_correction, args.window_length, args.quality_threshold,
200 |         args.error_threshold, args.match, args.mismatch, args.gap, args.threads,
201 |         args.cudaaligner_batches, args.cudapoa_batches, args.cuda_banded_alignment)
202 | 
203 |     with racon:
204 |         racon.run()
205 | 


--------------------------------------------------------------------------------
/src/cuda/cudabatch.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file cudabatch.cpp
  3 |  *
  4 |  * @brief CUDABatch class source file
  5 |  */
  6 | 
  7 | #include <string>
  8 | #include <iostream>
  9 | #include <cstring>
 10 | #include <algorithm>
 11 | 
 12 | #include "cudabatch.hpp"
 13 | #include "cudautils.hpp"
 14 | 
 15 | #include "spoa/spoa.hpp"
 16 | #include <claraparabricks/genomeworks/utils/cudautils.hpp>
 17 | 
 18 | namespace racon {
 19 | 
 20 | using namespace claraparabricks::genomeworks::cudapoa;
 21 | 
 22 | std::atomic<uint32_t> CUDABatchProcessor::batches;
 23 | 
 24 | std::unique_ptr<CUDABatchProcessor> createCUDABatch(uint32_t max_window_depth,
 25 |                                                     uint32_t device,
 26 |                                                     size_t avail_mem,
 27 |                                                     int8_t gap,
 28 |                                                     int8_t mismatch,
 29 |                                                     int8_t match,
 30 |                                                     bool cuda_banded_alignment)
 31 | {
 32 |     return std::unique_ptr<CUDABatchProcessor>(new CUDABatchProcessor(max_window_depth,
 33 |                                                                       device,
 34 |                                                                       avail_mem,
 35 |                                                                       gap,
 36 |                                                                       mismatch,
 37 |                                                                       match,
 38 |                                                                       cuda_banded_alignment));
 39 | }
 40 | 
 41 | CUDABatchProcessor::CUDABatchProcessor(uint32_t max_window_depth,
 42 |                                        uint32_t device,
 43 |                                        size_t avail_mem,
 44 |                                        int8_t gap,
 45 |                                        int8_t mismatch,
 46 |                                        int8_t match,
 47 |                                        bool cuda_banded_alignment)
 48 |     : windows_()
 49 |     , seqs_added_per_window_()
 50 | {
 51 |     bid_ = CUDABatchProcessor::batches++;
 52 |     
 53 |     // Create new CUDA stream.
 54 |     GW_CU_CHECK_ERR(cudaStreamCreate(&stream_));
 55 | 
 56 |     BatchConfig batch_config(1023,
 57 |                              max_window_depth,
 58 |                              256,
 59 |                              cuda_banded_alignment ? BandMode::static_band : BandMode::full_band);
 60 | 
 61 |     cudapoa_batch_ = create_batch(device,
 62 |                                   stream_,
 63 |                                   avail_mem,
 64 |                                   OutputType::consensus,
 65 |                                   batch_config,
 66 |                                   gap,
 67 |                                   mismatch,
 68 |                                   match);
 69 | }
 70 | 
 71 | CUDABatchProcessor::~CUDABatchProcessor()
 72 | {
 73 |     // Destroy CUDA stream.
 74 |     GW_CU_CHECK_ERR(cudaStreamDestroy(stream_));
 75 | }
 76 | 
 77 | bool CUDABatchProcessor::addWindow(std::shared_ptr<Window> window)
 78 | {
 79 |     Group poa_group;
 80 |     uint32_t num_seqs = window->sequences_.size();
 81 |     std::vector<std::vector<int8_t>> all_read_weights(num_seqs, std::vector<int8_t>());
 82 | 
 83 |     // Add first sequence as backbone to graph.
 84 |     std::pair<const char*, uint32_t> seq = window->sequences_.front();
 85 |     std::pair<const char*, uint32_t> qualities = window->qualities_.front();
 86 |     std::vector<int8_t> backbone_weights;
 87 |     convertPhredQualityToWeights(qualities.first, qualities.second, all_read_weights[0]);
 88 |     Entry e = {
 89 |         seq.first,
 90 |         all_read_weights[0].data(),
 91 |         static_cast<int32_t>(seq.second)
 92 |     };
 93 |     poa_group.push_back(e);
 94 | 
 95 |     // Add the rest of the sequences in sorted order of starting positions.
 96 |     std::vector<uint32_t> rank;
 97 |     rank.reserve(window->sequences_.size());
 98 | 
 99 |     for (uint32_t i = 0; i < num_seqs; ++i) {
100 |         rank.emplace_back(i);
101 |     }
102 | 
103 |     std::sort(rank.begin() + 1, rank.end(), [&](uint32_t lhs, uint32_t rhs) {
104 |             return window->positions_[lhs].first < window->positions_[rhs].first; });
105 | 
106 |     // Start from index 1 since first sequence has already been added as backbone.
107 |     uint32_t long_seq = 0;
108 |     uint32_t skipped_seq = 0;
109 |     for(uint32_t j = 1; j < num_seqs; j++)
110 |     {
111 |         uint32_t i = rank.at(j);
112 |         seq = window->sequences_.at(i);
113 |         qualities = window->qualities_.at(i);
114 |         convertPhredQualityToWeights(qualities.first, qualities.second, all_read_weights[i]);
115 | 
116 |         Entry p = {
117 |             seq.first,
118 |             all_read_weights[i].data(),
119 |             static_cast<int32_t>(seq.second)
120 |         };
121 |         poa_group.push_back(p);
122 |     }
123 | 
124 |     // Add group to CUDAPOA batch object.
125 |     std::vector<StatusType> entry_status;
126 |     StatusType status = cudapoa_batch_->add_poa_group(entry_status, poa_group);
127 | 
128 |     // If group was added, then push window in accepted windows list.
129 |     if (status != StatusType::success)
130 |     {
131 |         return false;
132 |     }
133 |     else
134 |     {
135 |         windows_.push_back(window);
136 |     }
137 | 
138 |     // Keep track of how many sequences were actually processed for this
139 |     // group. This acts as the effective coverage for that window.
140 |     int32_t seq_added = 0;
141 |     for(uint32_t i = 1; i < entry_status.size(); i++)
142 |     {
143 |         if (entry_status[i] == StatusType::exceeded_maximum_sequence_size)
144 |         {
145 |             long_seq++;
146 |             continue;
147 |         }
148 |         else if (entry_status[i] == StatusType::exceeded_maximum_sequences_per_poa)
149 |         {
150 |             skipped_seq++;
151 |             continue;
152 |         }
153 |         else if (entry_status[i] != StatusType::success)
154 |         {
155 |             fprintf(stderr, "Could not add sequence to POA in batch %d.\n",
156 |                     cudapoa_batch_->batch_id());
157 |             exit(1);
158 |         }
159 |         seq_added++;
160 |     }
161 |     seqs_added_per_window_.push_back(seq_added);
162 | 
163 | #ifndef NDEBUG
164 |     if (long_seq > 0)
165 |     {
166 |         fprintf(stderr, "Too long (%d / %d)\n", long_seq, num_seqs);
167 |     }
168 |     if (skipped_seq > 0)
169 |     {
170 |         fprintf(stderr, "Skipped (%d / %d)\n", skipped_seq, num_seqs);
171 |     }
172 | #endif
173 | 
174 |     return true;
175 | }
176 | 
177 | bool CUDABatchProcessor::hasWindows() const
178 | {
179 |     return (cudapoa_batch_->get_total_poas() > 0);
180 | }
181 | 
182 | void CUDABatchProcessor::convertPhredQualityToWeights(const char* qual,
183 |                                                       uint32_t qual_length,
184 |                                                       std::vector<int8_t>& weights)
185 | {
186 |     weights.clear();
187 |     for(uint32_t i = 0; i < qual_length; i++)
188 |     {
189 |         weights.push_back(static_cast<uint8_t>(qual[i]) - 33); // PHRED quality
190 |     }
191 | }
192 | 
193 | void CUDABatchProcessor::generatePOA()
194 | {
195 |     // call generate poa function
196 |     cudapoa_batch_->generate_poa();
197 | }
198 | 
199 | void CUDABatchProcessor::getConsensus()
200 | {
201 |     std::vector<std::string> consensuses;
202 |     std::vector<std::vector<uint16_t>> coverages;
203 |     std::vector<StatusType> output_status;
204 |     cudapoa_batch_->get_consensus(consensuses, coverages, output_status);
205 | 
206 |     for(uint32_t i = 0; i < windows_.size(); i++)
207 |     {
208 |         auto window = windows_.at(i);
209 |         if (output_status.at(i) != StatusType::success)
210 |         {
211 |             // leave the failure cases to CPU polisher
212 |             window_consensus_status_.emplace_back(false);
213 |         }
214 |         else
215 |         {
216 |             // This is a special case borrowed from the CPU version.
217 |             // TODO: We still run this case through the GPU, but could take it out.
218 |             bool consensus_status = false;
219 |             if (window->sequences_.size() < 3)
220 |             {
221 |                 window->consensus_ = std::string(window->sequences_.front().first,
222 |                         window->sequences_.front().second);
223 | 
224 |                 // This status is borrowed from the CPU version which considers this
225 |                 // a failed consensus. All other cases are true.
226 |                 consensus_status = false;
227 |             }
228 |             else
229 |             {
230 |                 window->consensus_ = consensuses[i];
231 |                 if (window->type_ ==  WindowType::kTGS)
232 |                 {
233 |                     uint32_t num_seqs_in_window = seqs_added_per_window_[i];
234 |                     uint32_t average_coverage = num_seqs_in_window / 2;
235 | 
236 |                     int32_t begin = 0, end =  window->consensus_.size() - 1;
237 |                     for (; begin < static_cast<int32_t>( window->consensus_.size()); ++begin) {
238 |                         if (coverages[i][begin] >= average_coverage) {
239 |                             break;
240 |                         }
241 |                     }
242 |                     for (; end >= 0; --end) {
243 |                         if (coverages[i][end] >= average_coverage) {
244 |                             break;
245 |                         }
246 |                     }
247 | 
248 |                     if (begin >= end) {
249 |                         fprintf(stderr, "[CUDABatchProcessor] warning: "
250 |                                 "contig might be chimeric in window %lu!\n", window->id_);
251 |                         consensus_status = false;
252 |                     } else {
253 |                         window->consensus_ =  window->consensus_.substr(begin, end - begin + 1);
254 |                         consensus_status = true;
255 |                     }
256 |                 }
257 |             }
258 |             window_consensus_status_.emplace_back(consensus_status);
259 |         }
260 |     }
261 | }
262 | 
263 | const std::vector<bool>& CUDABatchProcessor::generateConsensus()
264 | {
265 |     // Generate consensus for all windows in the batch
266 |     generatePOA();
267 |     getConsensus();
268 | 
269 |     return window_consensus_status_;
270 | }
271 | 
272 | void CUDABatchProcessor::reset()
273 | {
274 |     windows_.clear();
275 |     window_consensus_status_.clear();
276 |     seqs_added_per_window_.clear();
277 |     cudapoa_batch_->reset();
278 | }
279 | 
280 | } // namespace racon
281 | 


--------------------------------------------------------------------------------
/src/overlap.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file overlap.cpp
  3 |  *
  4 |  * @brief Overlap class source file
  5 |  */
  6 | 
  7 | #include <algorithm>
  8 | 
  9 | #include "sequence.hpp"
 10 | #include "overlap.hpp"
 11 | #include "edlib.h"
 12 | 
 13 | namespace racon {
 14 | 
 15 | Overlap::Overlap(uint64_t a_id, uint64_t b_id, double, uint32_t,
 16 |     uint32_t a_rc, uint32_t a_begin, uint32_t a_end, uint32_t a_length,
 17 |     uint32_t b_rc, uint32_t b_begin, uint32_t b_end, uint32_t b_length)
 18 |         : q_name_(), q_id_(a_id - 1), q_begin_(a_begin), q_end_(a_end),
 19 |         q_length_(a_length), t_name_(), t_id_(b_id - 1), t_begin_(b_begin),
 20 |         t_end_(b_end), t_length_(b_length), strand_(a_rc ^ b_rc), length_(),
 21 |         error_(), cigar_(), is_valid_(true), is_transmuted_(false),
 22 |         breaking_points_() {
 23 | 
 24 |     length_ = std::max(q_end_ - q_begin_, t_end_ - t_begin_);
 25 |     error_ = 1 - std::min(q_end_ - q_begin_, t_end_ - t_begin_) /
 26 |         static_cast<double>(length_);
 27 | }
 28 | 
 29 | Overlap::Overlap(const char* q_name, uint32_t q_name_length, uint32_t q_length,
 30 |     uint32_t q_begin, uint32_t q_end, char orientation, const char* t_name,
 31 |     uint32_t t_name_length, uint32_t t_length, uint32_t t_begin,
 32 |     uint32_t t_end, uint32_t, uint32_t, uint32_t)
 33 |         : q_name_(q_name, q_name_length), q_id_(), q_begin_(q_begin),
 34 |         q_end_(q_end), q_length_(q_length), t_name_(t_name, t_name_length),
 35 |         t_id_(), t_begin_(t_begin), t_end_(t_end), t_length_(t_length),
 36 |         strand_(orientation == '-'), length_(), error_(), cigar_(),
 37 |         is_valid_(true), is_transmuted_(false), breaking_points_() {
 38 | 
 39 |     length_ = std::max(q_end_ - q_begin_, t_end_ - t_begin_);
 40 |     error_ = 1 - std::min(q_end_ - q_begin_, t_end_ - t_begin_) /
 41 |         static_cast<double>(length_);
 42 | }
 43 | 
 44 | Overlap::Overlap(const char* q_name, uint32_t q_name_length, uint32_t flag,
 45 |     const char* t_name, uint32_t t_name_length, uint32_t t_begin,
 46 |     uint32_t, const char* cigar, uint32_t cigar_length, const char*,
 47 |     uint32_t, uint32_t, uint32_t, const char*, uint32_t, const char*,
 48 |     uint32_t)
 49 |         : q_name_(q_name, q_name_length), q_id_(), q_begin_(0), q_end_(),
 50 |         q_length_(0), t_name_(t_name, t_name_length), t_id_(), t_begin_(t_begin - 1),
 51 |         t_end_(), t_length_(0), strand_(flag & 0x10), length_(), error_(),
 52 |         cigar_(cigar, cigar_length), is_valid_(!(flag & 0x4)),
 53 |         is_transmuted_(false), breaking_points_() {
 54 | 
 55 |     if (cigar_.size() < 2 && is_valid_) {
 56 |         fprintf(stderr, "[Racon::Overlap::Overlap] error: "
 57 |             "missing alignment from SAM object!\n");
 58 |         exit(1);
 59 |     } else {
 60 |         for (uint32_t i = 0; i < cigar_.size(); ++i) {
 61 |             if (cigar_[i] == 'S' || cigar_[i] == 'H') {
 62 |                 q_begin_ = atoi(&cigar_[0]);
 63 |                 break;
 64 |             } else if (cigar_[i] == 'M' || cigar_[i] == '=' || cigar_[i] == 'I' ||
 65 |                 cigar_[i] == 'D' || cigar_[i] == 'N' || cigar_[i] == 'P' ||
 66 |                 cigar_[i] == 'X') {
 67 |                 break;
 68 |             }
 69 |         }
 70 | 
 71 |         uint32_t q_alignment_length = 0, q_clip_length = 0, t_alignment_length = 0;
 72 |         for (uint32_t i = 0, j = 0; i < cigar_.size(); ++i) {
 73 |             if (cigar_[i] == 'M' || cigar_[i] == '=' || cigar_[i] == 'X') {
 74 |                 auto num_bases = atoi(&cigar_[j]);
 75 |                 j = i + 1;
 76 |                 q_alignment_length += num_bases;
 77 |                 t_alignment_length += num_bases;
 78 |             } else if (cigar_[i] == 'I') {
 79 |                 auto num_bases = atoi(&cigar_[j]);
 80 |                 j = i + 1;
 81 |                 q_alignment_length += num_bases;
 82 |             } else if (cigar_[i] == 'D' || cigar_[i] == 'N') {
 83 |                 auto num_bases = atoi(&cigar_[j]);
 84 |                 j = i + 1;
 85 |                 t_alignment_length += num_bases;
 86 |             } else if (cigar_[i] == 'S' || cigar_[i] == 'H') {
 87 |                 q_clip_length += atoi(&cigar_[j]);
 88 |                 j = i + 1;
 89 |             } else if (cigar_[i] == 'P') {
 90 |                 j = i + 1;
 91 |             }
 92 |         }
 93 | 
 94 |         q_end_ = q_begin_ + q_alignment_length;
 95 |         q_length_ = q_clip_length + q_alignment_length;
 96 |         if (strand_) {
 97 |             uint32_t tmp = q_begin_;
 98 |             q_begin_ = q_length_ - q_end_;
 99 |             q_end_ = q_length_ - tmp;
100 |         }
101 | 
102 |         t_end_ = t_begin_ + t_alignment_length;
103 | 
104 |         length_ = std::max(q_alignment_length, t_alignment_length);
105 |         error_ = 1 - std::min(q_alignment_length, t_alignment_length) /
106 |             static_cast<double>(length_);
107 |     }
108 | }
109 | 
110 | Overlap::Overlap()
111 |         : q_name_(), q_id_(), q_begin_(), q_end_(), q_length_(), t_name_(),
112 |         t_id_(), t_begin_(), t_end_(), t_length_(), strand_(), length_(),
113 |         error_(), cigar_(), is_valid_(true), is_transmuted_(true),
114 |         breaking_points_(), dual_breaking_points_() {
115 | }
116 | 
117 | template<typename T>
118 | bool transmuteId(const std::unordered_map<T, uint64_t>& t_to_id, const T& t,
119 |     uint64_t& id) {
120 | 
121 |     auto it = t_to_id.find(t);
122 |     if (it == t_to_id.end()) {
123 |         return false;
124 |     }
125 |     id = it->second;
126 |     return true;
127 | }
128 | 
129 | void Overlap::transmute(const std::vector<std::unique_ptr<Sequence>>& sequences,
130 |     const std::unordered_map<std::string, uint64_t>& name_to_id,
131 |     const std::unordered_map<uint64_t, uint64_t>& id_to_id) {
132 | 
133 |     if (!is_valid_ || is_transmuted_) {
134 |         return;
135 |     }
136 | 
137 |     if (!q_name_.empty()) {
138 |         if (!transmuteId(name_to_id, q_name_ + "q", q_id_)) {
139 |             is_valid_ = false;
140 |             return;
141 |         }
142 |         std::string().swap(q_name_);
143 |     } else if (!transmuteId(id_to_id, q_id_ << 1 | 0, q_id_)) {
144 |         is_valid_ = false;
145 |         return;
146 |     }
147 | 
148 |     if (q_length_ != sequences[q_id_]->data().size()) {
149 |         fprintf(stderr, "[racon::Overlap::transmute] error: "
150 |             "unequal lengths in sequence and overlap file for sequence %s!\n",
151 |             sequences[q_id_]->name().c_str());
152 |         exit(1);
153 |     }
154 | 
155 |     if (!t_name_.empty()) {
156 |         if (!transmuteId(name_to_id, t_name_ + "t", t_id_)) {
157 |             is_valid_ = false;
158 |             return;
159 |         }
160 |         std::string().swap(t_name_);
161 |     } else if (!transmuteId(id_to_id, t_id_ << 1 | 1, t_id_)) {
162 |         is_valid_ = false;
163 |         return;
164 |     }
165 | 
166 |     if (t_length_ != 0 && t_length_ != sequences[t_id_]->data().size()) {
167 |         fprintf(stderr, "[racon::Overlap::transmute] error: "
168 |             "unequal lengths in target and overlap file for target %s!\n",
169 |             sequences[t_id_]->name().c_str());
170 |         exit(1);
171 |     }
172 | 
173 |     // for SAM input
174 |     t_length_ = sequences[t_id_]->data().size();
175 | 
176 |     is_transmuted_ = true;
177 | }
178 | 
179 | void Overlap::find_breaking_points(const std::vector<std::unique_ptr<Sequence>>& sequences,
180 |     uint32_t window_length) {
181 | 
182 |     if (!is_transmuted_) {
183 |         fprintf(stderr, "[racon::Overlap::find_breaking_points] error: "
184 |             "overlap is not transmuted!\n");
185 |         exit(1);
186 |     }
187 | 
188 |     if (!breaking_points_.empty()) {
189 |         return;
190 |     }
191 | 
192 |     if (cigar_.empty()) {
193 |         const char* q = !strand_ ? &(sequences[q_id_]->data()[q_begin_]) :
194 |             &(sequences[q_id_]->reverse_complement()[q_length_ - q_end_]);
195 |         const char* t = &(sequences[t_id_]->data()[t_begin_]);
196 | 
197 |         align_overlaps(q, q_end_ - q_begin_, t, t_end_ - t_begin_);
198 |     }
199 | 
200 |     find_breaking_points_from_cigar(window_length);
201 | 
202 |     std::string().swap(cigar_);
203 | }
204 | 
205 | void Overlap::align_overlaps(const char* q, uint32_t q_length, const char* t, uint32_t t_length)
206 | {
207 |     // align overlaps with edlib
208 |     EdlibAlignResult result = edlibAlign(q, q_length, t, t_length,
209 |             edlibNewAlignConfig(-1, EDLIB_MODE_NW, EDLIB_TASK_PATH,
210 |                 nullptr, 0));
211 | 
212 |     if (result.status == EDLIB_STATUS_OK) {
213 |         char* cigar = edlibAlignmentToCigar(result.alignment,
214 |                 result.alignmentLength, EDLIB_CIGAR_STANDARD);
215 |         cigar_ = cigar;
216 |         free(cigar);
217 |     } else {
218 |         fprintf(stderr, "[racon::Overlap::find_breaking_points] error: "
219 |                 "edlib unable to align pair (%zu x %zu)!\n", q_id_, t_id_);
220 |         exit(1);
221 |     }
222 | 
223 |     edlibFreeAlignResult(result);
224 | }
225 | 
226 | void Overlap::find_breaking_points_from_cigar(uint32_t window_length)
227 | {
228 |     // find breaking points from cigar
229 |     std::vector<int32_t> window_ends;
230 |     for (uint32_t i = 0; i < t_end_; i += window_length) {
231 |         if (i > t_begin_) {
232 |             window_ends.emplace_back(i - 1);
233 |         }
234 |     }
235 |     window_ends.emplace_back(t_end_ - 1);
236 | 
237 |     uint32_t w = 0;
238 |     bool found_first_match = false;
239 |     std::pair<uint32_t, uint32_t> first_match = {0, 0}, last_match = {0, 0};
240 | 
241 |     int32_t q_ptr = (strand_ ? (q_length_ - q_end_) : q_begin_) - 1;
242 |     int32_t t_ptr = t_begin_ - 1;
243 | 
244 |     for (uint32_t i = 0, j = 0; i < cigar_.size(); ++i) {
245 |         if (cigar_[i] == 'M' || cigar_[i] == '=' || cigar_[i] == 'X') {
246 |             uint32_t k = 0, num_bases = atoi(&cigar_[j]);
247 |             j = i + 1;
248 |             while (k < num_bases) {
249 |                 ++q_ptr;
250 |                 ++t_ptr;
251 | 
252 |                 if (!found_first_match) {
253 |                     found_first_match = true;
254 |                     first_match.first = t_ptr;
255 |                     first_match.second = q_ptr;
256 |                 }
257 |                 last_match.first = t_ptr + 1;
258 |                 last_match.second = q_ptr + 1;
259 |                 if (t_ptr == window_ends[w]) {
260 |                     if (found_first_match) {
261 |                         breaking_points_.emplace_back(first_match);
262 |                         breaking_points_.emplace_back(last_match);
263 |                     }
264 |                     found_first_match = false;
265 |                     ++w;
266 |                 }
267 | 
268 |                 ++k;
269 |             }
270 |         } else if (cigar_[i] == 'I') {
271 |             q_ptr += atoi(&cigar_[j]);
272 |             j = i + 1;
273 |         } else if (cigar_[i] == 'D' || cigar_[i] == 'N') {
274 |             uint32_t k = 0, num_bases = atoi(&cigar_[j]);
275 |             j = i + 1;
276 |             while (k < num_bases) {
277 |                 ++t_ptr;
278 |                 if (t_ptr == window_ends[w]) {
279 |                     if (found_first_match) {
280 |                         breaking_points_.emplace_back(first_match);
281 |                         breaking_points_.emplace_back(last_match);
282 |                     }
283 |                     found_first_match = false;
284 |                     ++w;
285 |                 }
286 |                 ++k;
287 |             }
288 |         } else if (cigar_[i] == 'S' || cigar_[i] == 'H' || cigar_[i] == 'P') {
289 |             j = i + 1;
290 |         }
291 |     }
292 | }
293 | 
294 | }
295 | 


--------------------------------------------------------------------------------
/src/cuda/cudapolisher.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file cudapolisher.cpp
  3 |  *
  4 |  * @brief CUDA Polisher class source file
  5 |  */
  6 | 
  7 | #include <future>
  8 | #include <iostream>
  9 | #include <chrono>
 10 | #include <cuda_profiler_api.h>
 11 | 
 12 | #include "sequence.hpp"
 13 | #include "logger.hpp"
 14 | #include "cudapolisher.hpp"
 15 | #include <claraparabricks/genomeworks/utils/cudautils.hpp>
 16 | #include <algorithm>
 17 | 
 18 | #include "bioparser/bioparser.hpp"
 19 | 
 20 | namespace racon {
 21 | 
 22 | // The logger used by racon has a fixed size of 20 bins
 23 | // which is used for the progress bar updates. Hence all
 24 | // updates need to be broken into 20 bins.
 25 | const uint32_t RACON_LOGGER_BIN_SIZE = 20;
 26 | 
 27 | CUDAPolisher::CUDAPolisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
 28 |     std::unique_ptr<bioparser::Parser<Overlap>> oparser,
 29 |     std::unique_ptr<bioparser::Parser<Sequence>> tparser,
 30 |     PolisherType type, uint32_t window_length, double quality_threshold,
 31 |     double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
 32 |     uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
 33 |     uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width)
 34 |         : Polisher(std::move(sparser), std::move(oparser), std::move(tparser),
 35 |                 type, window_length, quality_threshold, error_threshold, trim,
 36 |                 match, mismatch, gap, num_threads)
 37 |         , cudapoa_batches_(cudapoa_batches)
 38 |         , cudaaligner_batches_(cudaaligner_batches)
 39 |         , gap_(gap)
 40 |         , mismatch_(mismatch)
 41 |         , match_(match)
 42 |         , cuda_banded_alignment_(cuda_banded_alignment)
 43 |         , cudaaligner_band_width_(cudaaligner_band_width)
 44 | {
 45 |     claraparabricks::genomeworks::cudapoa::Init();
 46 |     claraparabricks::genomeworks::cudaaligner::Init();
 47 | 
 48 |     GW_CU_CHECK_ERR(cudaGetDeviceCount(&num_devices_));
 49 | 
 50 |     if (num_devices_ < 1)
 51 |     {
 52 |         throw std::runtime_error("No GPU devices found.");
 53 |     }
 54 | 
 55 |     std::cerr << "Using " << num_devices_ << " GPU(s) to perform polishing" << std::endl;
 56 | 
 57 |     // Run dummy call on each device to initialize CUDA context.
 58 |     for(int32_t dev_id = 0; dev_id < num_devices_; dev_id++)
 59 |     {
 60 |         std::cerr << "Initialize device " << dev_id << std::endl;
 61 |         GW_CU_CHECK_ERR(cudaSetDevice(dev_id));
 62 |         GW_CU_CHECK_ERR(cudaFree(0));
 63 |     }
 64 | 
 65 |     std::cerr << "[CUDAPolisher] Constructed." << std::endl;
 66 | }
 67 | 
 68 | CUDAPolisher::~CUDAPolisher()
 69 | {
 70 |     cudaDeviceSynchronize();
 71 |     cudaProfilerStop();
 72 | }
 73 | 
 74 | void CUDAPolisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps)
 75 | {
 76 |     if (cudaaligner_batches_ >= 1)
 77 |     {
 78 |         logger_->log();
 79 |         std::mutex mutex_overlaps;
 80 |         uint32_t next_overlap_index = 0;
 81 | 
 82 |         // Lambda expression for filling up next batch of alignments.
 83 |         auto fill_next_batch = [&mutex_overlaps, &next_overlap_index, &overlaps, this](CUDABatchAligner* batch) -> std::pair<uint32_t, uint32_t> {
 84 |             batch->reset();
 85 | 
 86 |             // Use mutex to read the vector containing windows in a threadsafe manner.
 87 |             std::lock_guard<std::mutex> guard(mutex_overlaps);
 88 | 
 89 |             uint32_t initial_count = next_overlap_index;
 90 |             uint32_t count = overlaps.size();
 91 |             while(next_overlap_index < count)
 92 |             {
 93 |                 if (batch->addOverlap(overlaps[next_overlap_index].get(), sequences_))
 94 |                 {
 95 |                     next_overlap_index++;
 96 |                 }
 97 |                 else
 98 |                 {
 99 |                     break;
100 |                 }
101 |             }
102 |             return {initial_count, next_overlap_index};
103 |         };
104 | 
105 |         // Variables for keeping track of logger progress bar.
106 |         uint32_t logger_step = overlaps.size() / RACON_LOGGER_BIN_SIZE;
107 |         int32_t log_bar_idx = 0, log_bar_idx_prev = -1;
108 |         uint32_t window_idx = 0;
109 |         std::mutex mutex_log_bar_idx;
110 | 
111 |         // Lambda expression for processing a batch of alignments.
112 |         auto process_batch = [&fill_next_batch, &logger_step, &log_bar_idx, &log_bar_idx_prev, &window_idx, &mutex_log_bar_idx, this](CUDABatchAligner* batch) -> void {
113 |             while(true)
114 |             {
115 |                 auto range = fill_next_batch(batch);
116 |                 if (batch->hasOverlaps())
117 |                 {
118 |                     // Launch workload.
119 |                     batch->alignAll();
120 | 
121 |                     // Generate CIGAR strings for successful alignments. The actual breaking points
122 |                     // will be calculate by the overlap object.
123 |                     batch->generate_cigar_strings();
124 | 
125 |                     // logging bar
126 |                     {
127 |                         std::lock_guard<std::mutex> guard(mutex_log_bar_idx);
128 |                         window_idx += range.second - range.first;
129 |                         log_bar_idx = window_idx / logger_step;
130 |                         if (log_bar_idx == log_bar_idx_prev) {
131 |                             continue;
132 |                         }
133 |                         else if (logger_step != 0 && log_bar_idx < static_cast<int32_t>(RACON_LOGGER_BIN_SIZE))
134 |                         {
135 |                             logger_->bar("[racon::CUDAPolisher::initialize] aligning overlaps");
136 |                             log_bar_idx_prev = log_bar_idx;
137 |                         }
138 |                     }
139 |                 }
140 |                 else
141 |                 {
142 |                     break;
143 |                 }
144 |             }
145 |         };
146 | 
147 |         // Calculate mean and std deviation of target/query sizes
148 |         // and use that to calculate cudaaligner batch size.
149 | 
150 |         // Calculate average length
151 |         int64_t len_sum = 0;
152 |         for(uint32_t i = 0; i < overlaps.size(); i++)
153 |         {
154 |             len_sum += overlaps[i]->length();
155 |         }
156 |         int64_t mean = len_sum / overlaps.size();
157 | 
158 |         // Calculate band width automatically if set to 0
159 |         if (cudaaligner_band_width_ == 0)
160 |         {
161 |             // Use 10% of max sequence length as band width
162 |             cudaaligner_band_width_ = static_cast<uint32_t>(mean * 0.1f);
163 |         }
164 | 
165 |         for(int32_t device = 0; device < num_devices_; device++)
166 |         {
167 |             GW_CU_CHECK_ERR(cudaSetDevice(device));
168 | 
169 | 
170 |             size_t free, total;
171 |             GW_CU_CHECK_ERR(cudaMemGetInfo(&free, &total));
172 |             const size_t free_usable_memory = static_cast<float>(free) * 90 / 100; // Using 90% of available memory
173 |             const int64_t usable_memory_per_aligner = free_usable_memory / cudaaligner_batches_;
174 |             const int32_t max_bandwidth = cudaaligner_band_width_ & ~0x1; // Band width needs to be even
175 |             std::cerr << "GPU " << device << ": Aligning with band width " << max_bandwidth << std::endl;
176 |             for(uint32_t batch = 0; batch < cudaaligner_batches_; batch++)
177 |             {
178 |                 batch_aligners_.emplace_back(createCUDABatchAligner(max_bandwidth, device, usable_memory_per_aligner));
179 |             }
180 |         }
181 | 
182 |         logger_->log("[racon::CUDAPolisher::initialize] allocated memory on GPUs for alignment");
183 | 
184 |         // Run batched alignment.
185 |         std::vector<std::future<void>> thread_futures;
186 |         for(auto& aligner : batch_aligners_)
187 |         {
188 |             thread_futures.emplace_back(
189 |                     thread_pool_->submit(
190 |                         process_batch,
191 |                         aligner.get()
192 |                         )
193 |                     );
194 |         }
195 | 
196 |         // Wait for threads to finish, and collect their results.
197 |         for (const auto& future : thread_futures) {
198 |             future.wait();
199 |         }
200 | 
201 |         batch_aligners_.clear();
202 | 
203 |         // Determine overlaps missed by GPU which will fall back to CPU.
204 |         int64_t missing_overlaps = std::count_if(begin(overlaps), end(overlaps),[](std::unique_ptr<Overlap> const& o){ return o->cigar().empty();});
205 | 
206 |         std::cerr << "Alignment skipped by GPU: " << missing_overlaps << " / " << overlaps.size() << std::endl;
207 |     }
208 | 
209 |     // This call runs the breaking point detection code for all alignments.
210 |     // Any overlaps that couldn't be processed by the GPU are also handled here
211 |     // by the CPU aligner.
212 |     logger_->log();
213 |     Polisher::find_overlap_breaking_points(overlaps);
214 | }
215 | 
216 | void CUDAPolisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
217 |     bool drop_unpolished_sequences)
218 | {
219 |     if (cudapoa_batches_ < 1)
220 |     {
221 |         Polisher::polish(dst, drop_unpolished_sequences);
222 |     }
223 |     else
224 |     {
225 |         // Creation and use of batches.
226 |         const uint32_t MAX_DEPTH_PER_WINDOW = 200;
227 | 
228 |         for(int32_t device = 0; device < num_devices_; device++)
229 |         {
230 |             size_t total = 0, free = 0;
231 |             GW_CU_CHECK_ERR(cudaSetDevice(device));
232 |             GW_CU_CHECK_ERR(cudaMemGetInfo(&free, &total));
233 |             // Using 90% of available memory as heuristic since not all available memory can be used
234 |             // due to fragmentation.
235 |             size_t mem_per_batch = 0.9 * free / cudapoa_batches_;
236 |             for(uint32_t batch = 0; batch < cudapoa_batches_; batch++)
237 |             {
238 |                 batch_processors_.emplace_back(createCUDABatch(MAX_DEPTH_PER_WINDOW, device, mem_per_batch, gap_, mismatch_, match_, cuda_banded_alignment_));
239 |             }
240 |         }
241 | 
242 |         logger_->log("[racon::CUDAPolisher::polish] allocated memory on GPUs for polishing");
243 | 
244 |         // Mutex for accessing the vector of windows.
245 |         std::mutex mutex_windows;
246 | 
247 |         // Initialize window consensus statuses.
248 |         window_consensus_status_.resize(windows_.size(), false);
249 | 
250 |         // Index of next window to be added to a batch.
251 |         uint32_t next_window_index = 0;
252 | 
253 |         // Lambda function for adding windows to batches.
254 |         auto fill_next_batch = [&mutex_windows, &next_window_index, this](CUDABatchProcessor* batch) -> std::pair<uint32_t, uint32_t> {
255 |             batch->reset();
256 | 
257 |             // Use mutex to read the vector containing windows in a threadsafe manner.
258 |             std::lock_guard<std::mutex> guard(mutex_windows);
259 | 
260 |             // TODO: Reducing window wize by 10 for debugging.
261 |             uint32_t initial_count = next_window_index;
262 |             uint32_t count = windows_.size();
263 |             while(next_window_index < count)
264 |             {
265 |                 if (batch->addWindow(windows_.at(next_window_index)))
266 |                 {
267 |                     next_window_index++;
268 |                 }
269 |                 else
270 |                 {
271 |                     break;
272 |                 }
273 |             }
274 | 
275 |             return {initial_count, next_window_index};
276 |         };
277 | 
278 |         // Variables for keeping track of logger progress bar.
279 |         uint32_t logger_step = windows_.size() / RACON_LOGGER_BIN_SIZE;
280 |         int32_t log_bar_idx = 0, log_bar_idx_prev = -1;
281 |         uint32_t window_idx = 0;
282 |         std::mutex mutex_log_bar_idx;
283 |         logger_->log();
284 | 
285 |         // Lambda function for processing each batch.
286 |         auto process_batch = [&fill_next_batch, &logger_step, &log_bar_idx, &mutex_log_bar_idx, &window_idx, &log_bar_idx_prev, this](CUDABatchProcessor* batch) -> void {
287 |             while(true)
288 |             {
289 |                 std::pair<uint32_t, uint32_t> range = fill_next_batch(batch);
290 |                 if (batch->hasWindows())
291 |                 {
292 |                     // Launch workload.
293 |                     const std::vector<bool>& results = batch->generateConsensus();
294 | 
295 |                     // Check if the number of batches processed is same as the range of
296 |                     // of windows that were added.
297 |                     if (results.size() != (range.second - range.first))
298 |                     {
299 |                         throw std::runtime_error("Windows processed doesn't match \
300 |                                 range of windows passed to batch\n");
301 |                     }
302 | 
303 |                     // Copy over the results from the batch into the per window
304 |                     // result vector of the CUDAPolisher.
305 |                     for(uint32_t i = 0; i < results.size(); i++)
306 |                     {
307 |                         window_consensus_status_.at(range.first + i) = results.at(i);
308 |                     }
309 | 
310 |                     // logging bar
311 |                     {
312 |                         std::lock_guard<std::mutex> guard(mutex_log_bar_idx);
313 |                         window_idx += results.size();
314 |                         log_bar_idx = window_idx / logger_step;
315 |                         if (log_bar_idx == log_bar_idx_prev) {
316 |                             continue;
317 |                         }
318 |                         else if (logger_step != 0 && log_bar_idx < static_cast<int32_t>(RACON_LOGGER_BIN_SIZE))
319 |                         {
320 |                             while(log_bar_idx_prev <= log_bar_idx)
321 |                             {
322 |                                 logger_->bar("[racon::CUDAPolisher::polish] generating consensus");
323 |                                 log_bar_idx_prev++;
324 |                             }
325 |                         }
326 |                     }
327 |                 }
328 |                 else
329 |                 {
330 |                     break;
331 |                 }
332 |             }
333 |         };
334 | 
335 |         // Process each of the batches in a separate thread.
336 |         std::vector<std::future<void>> thread_futures;
337 |         for(auto& batch_processor : batch_processors_)
338 |         {
339 |             thread_futures.emplace_back(
340 |                     thread_pool_->submit(
341 |                         process_batch,
342 |                         batch_processor.get()
343 |                         )
344 |                     );
345 |         }
346 | 
347 |         // Wait for threads to finish, and collect their results.
348 |         for (const auto& future : thread_futures) {
349 |             future.wait();
350 |         }
351 | 
352 |         logger_->log("[racon::CUDAPolisher::polish] polished windows on GPU");
353 | 
354 |         // Start timing CPU time for failed windows on GPU
355 |         logger_->log();
356 |         // Process each failed windows in parallel on CPU
357 |         std::vector<std::future<bool>> thread_failed_windows;
358 |         for (uint64_t i = 0; i < windows_.size(); ++i) {
359 |             if (window_consensus_status_.at(i) == false)
360 |             {
361 |                 thread_failed_windows.emplace_back(thread_pool_->submit(
362 |                             [&](uint64_t j) -> bool {
363 |                             auto it = thread_to_id_.find(std::this_thread::get_id());
364 |                             if (it == thread_to_id_.end()) {
365 |                             fprintf(stderr, "[racon::CUDAPolisher::polish] error: "
366 |                                     "thread identifier not present!\n");
367 |                             exit(1);
368 |                             }
369 |                             return window_consensus_status_.at(j) = windows_[j]->generate_consensus(
370 |                                     alignment_engines_[it->second], trim_);
371 |                             }, i));
372 |             }
373 |         }
374 | 
375 |         // Wait for threads to finish, and collect their results.
376 |         for (const auto& t : thread_failed_windows) {
377 |             t.wait();
378 |         }
379 |         if (thread_failed_windows.size() > 0)
380 |         {
381 |             logger_->log("[racon::CUDAPolisher::polish] polished remaining windows on CPU");
382 |             logger_->log();
383 |         }
384 | 
385 |         // Collect results from all windows into final output.
386 |         std::string polished_data = "";
387 |         uint32_t num_polished_windows = 0;
388 | 
389 |         for (uint64_t i = 0; i < windows_.size(); ++i) {
390 | 
391 |             num_polished_windows += window_consensus_status_.at(i) == true ? 1 : 0;
392 |             polished_data += windows_[i]->consensus();
393 | 
394 |             if (i == windows_.size() - 1 || windows_[i + 1]->rank() == 0) {
395 |                 double polished_ratio = num_polished_windows /
396 |                     static_cast<double>(windows_[i]->rank() + 1);
397 | 
398 |                 if (!drop_unpolished_sequences || polished_ratio > 0) {
399 |                     std::string tags = type_ == PolisherType::kF ? "r" : "";
400 |                     tags += " LN:i:" + std::to_string(polished_data.size());
401 |                     tags += " RC:i:" + std::to_string(targets_coverages_[windows_[i]->id()]);
402 |                     tags += " XC:f:" + std::to_string(polished_ratio);
403 |                     dst.emplace_back(createSequence(sequences_[windows_[i]->id()]->name() +
404 |                                 tags, polished_data));
405 |                 }
406 | 
407 |                 num_polished_windows = 0;
408 |                 polished_data.clear();
409 |             }
410 |             windows_[i].reset();
411 |         }
412 | 
413 |         logger_->log("[racon::CUDAPolisher::polish] generated consensus");
414 | 
415 |         // Clear POA processors.
416 |         batch_processors_.clear();
417 |     }
418 | }
419 | 
420 | }
421 | 


--------------------------------------------------------------------------------
/test/racon_test.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file racon_test.cpp
  3 |  *
  4 |  * @brief Racon unit test source file
  5 |  */
  6 | 
  7 | #include "racon_test_config.h"
  8 | 
  9 | #include "sequence.hpp"
 10 | #include "polisher.hpp"
 11 | 
 12 | #include "edlib.h"
 13 | #include "bioparser/bioparser.hpp"
 14 | #include "gtest/gtest.h"
 15 | 
 16 | uint32_t calculateEditDistance(const std::string& query, const std::string& target) {
 17 | 
 18 |     EdlibAlignResult result = edlibAlign(query.c_str(), query.size(), target.c_str(),
 19 |         target.size(), edlibDefaultAlignConfig());
 20 | 
 21 |     uint32_t edit_distance = result.editDistance;
 22 |     edlibFreeAlignResult(result);
 23 | 
 24 |     return edit_distance;
 25 | }
 26 | 
 27 | class RaconPolishingTest: public ::testing::Test {
 28 | public:
 29 |     void SetUp(const std::string& sequences_path, const std::string& overlaps_path,
 30 |         const std::string& target_path, racon::PolisherType type,
 31 |         uint32_t window_length, double quality_threshold, double error_threshold,
 32 |         int8_t match, int8_t mismatch, int8_t gap, uint32_t cuda_batches = 0,
 33 |         bool cuda_banded_alignment = false, uint32_t cudaaligner_batches = 0) {
 34 | 
 35 |         polisher = racon::createPolisher(sequences_path, overlaps_path, target_path,
 36 |             type, window_length, quality_threshold, error_threshold, true, match,
 37 |             mismatch, gap, 4, cuda_batches, cuda_banded_alignment, cudaaligner_batches);
 38 |     }
 39 | 
 40 |     void TearDown() {}
 41 | 
 42 |     void initialize() {
 43 |         polisher->initialize();
 44 |     }
 45 | 
 46 |     void polish(std::vector<std::unique_ptr<racon::Sequence>>& dst,
 47 |         bool drop_unpolished_sequences) {
 48 | 
 49 |         return polisher->polish(dst, drop_unpolished_sequences);
 50 |     }
 51 | 
 52 |     std::unique_ptr<racon::Polisher> polisher;
 53 | };
 54 | 
 55 | TEST(RaconInitializeTest, PolisherTypeError) {
 56 |     EXPECT_DEATH((racon::createPolisher("", "", "", static_cast<racon::PolisherType>(3),
 57 |         0, 0, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: invalid polisher"
 58 |         " type!");
 59 | }
 60 | 
 61 | TEST(RaconInitializeTest, WindowLengthError) {
 62 |     EXPECT_DEATH((racon::createPolisher("", "", "", racon::PolisherType::kC, 0,
 63 |         0, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: invalid window length!");
 64 | }
 65 | 
 66 | TEST(RaconInitializeTest, SequencesPathExtensionError) {
 67 |     EXPECT_DEATH((racon::createPolisher("", "", "", racon::PolisherType::kC, 500,
 68 |         0, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: file  has unsupported "
 69 |         "format extension .valid extensions: .fasta, .fasta.gz, .fna, .fna.gz, "
 70 |         ".fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz.!");
 71 | }
 72 | 
 73 | TEST(RaconInitializeTest, OverlapsPathExtensionError) {
 74 |     EXPECT_DEATH((racon::createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
 75 |         "", "", racon::PolisherType::kC, 500, 0, 0, 0, 0, 0, 0, 0)),
 76 |         ".racon::createPolisher. error: file  has unsupported format extension "
 77 |         ".valid extensions: .mhap, .mhap.gz, .paf, .paf.gz, .sam, .sam.gz.!");
 78 | }
 79 | 
 80 | TEST(RaconInitializeTest, TargetPathExtensionError) {
 81 |     EXPECT_DEATH((racon::createPolisher(racon_test_data_path + "sample_reads.fastq.gz",
 82 |         racon_test_data_path + "sample_overlaps.paf.gz", "", racon::PolisherType::kC,
 83 |         500, 0, 0, 0, 0, 0, 0, 0)), ".racon::createPolisher. error: file  has "
 84 |         "unsupported format extension .valid extensions: .fasta, .fasta.gz, .fna, "
 85 |         ".fna.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz.!");
 86 | }
 87 | 
 88 | TEST_F(RaconPolishingTest, ConsensusWithQualities) {
 89 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
 90 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
 91 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8);
 92 | 
 93 |     initialize();
 94 | 
 95 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
 96 |     polish(polished_sequences, true);
 97 |     EXPECT_EQ(polished_sequences.size(), 1);
 98 | 
 99 |     polished_sequences[0]->create_reverse_complement();
100 | 
101 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
102 |         racon_test_data_path + "sample_reference.fasta.gz");
103 |     parser->parse(polished_sequences, -1);
104 |     EXPECT_EQ(polished_sequences.size(), 2);
105 | 
106 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
107 |         polished_sequences[1]->data()), 1312);
108 | }
109 | 
110 | TEST_F(RaconPolishingTest, ConsensusWithoutQualities) {
111 |     SetUp(racon_test_data_path + "sample_reads.fasta.gz", racon_test_data_path +
112 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
113 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8);
114 | 
115 |     initialize();
116 | 
117 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
118 |     polish(polished_sequences, true);
119 |     EXPECT_EQ(polished_sequences.size(), 1);
120 | 
121 |     polished_sequences[0]->create_reverse_complement();
122 | 
123 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
124 |         racon_test_data_path + "sample_reference.fasta.gz");
125 |     parser->parse(polished_sequences, -1);
126 |     EXPECT_EQ(polished_sequences.size(), 2);
127 | 
128 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
129 |         polished_sequences[1]->data()), 1566);
130 | }
131 | 
132 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesAndAlignments) {
133 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
134 |         "sample_overlaps.sam.gz", racon_test_data_path + "sample_layout.fasta.gz",
135 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8);
136 | 
137 |     initialize();
138 | 
139 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
140 |     polish(polished_sequences, true);
141 |     EXPECT_EQ(polished_sequences.size(), 1);
142 | 
143 |     polished_sequences[0]->create_reverse_complement();
144 | 
145 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
146 |         racon_test_data_path + "sample_reference.fasta.gz");
147 |     parser->parse(polished_sequences, -1);
148 |     EXPECT_EQ(polished_sequences.size(), 2);
149 | 
150 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
151 |         polished_sequences[1]->data()), 1317);
152 | }
153 | 
154 | TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesAndWithAlignments) {
155 |     SetUp(racon_test_data_path + "sample_reads.fasta.gz", racon_test_data_path +
156 |         "sample_overlaps.sam.gz", racon_test_data_path + "sample_layout.fasta.gz",
157 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8);
158 | 
159 |     initialize();
160 | 
161 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
162 |     polish(polished_sequences, true);
163 |     EXPECT_EQ(polished_sequences.size(), 1);
164 | 
165 |     polished_sequences[0]->create_reverse_complement();
166 | 
167 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
168 |         racon_test_data_path + "sample_reference.fasta.gz");
169 |     parser->parse(polished_sequences, -1);
170 |     EXPECT_EQ(polished_sequences.size(), 2);
171 | 
172 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
173 |         polished_sequences[1]->data()), 1770);
174 | }
175 | 
176 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesLargerWindow) {
177 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
178 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
179 |         racon::PolisherType::kC, 1000, 10, 0.3, 5, -4, -8);
180 | 
181 |     initialize();
182 | 
183 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
184 |     polish(polished_sequences, true);
185 |     EXPECT_EQ(polished_sequences.size(), 1);
186 | 
187 |     polished_sequences[0]->create_reverse_complement();
188 | 
189 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
190 |         racon_test_data_path + "sample_reference.fasta.gz");
191 |     parser->parse(polished_sequences, -1);
192 |     EXPECT_EQ(polished_sequences.size(), 2);
193 | 
194 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
195 |         polished_sequences[1]->data()), 1289);
196 | }
197 | 
198 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesEditDistance) {
199 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
200 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
201 |         racon::PolisherType::kC, 500, 10, 0.3, 1, -1, -1);
202 | 
203 |     initialize();
204 | 
205 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
206 |     polish(polished_sequences, true);
207 |     EXPECT_EQ(polished_sequences.size(), 1);
208 | 
209 |     polished_sequences[0]->create_reverse_complement();
210 | 
211 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
212 |         racon_test_data_path + "sample_reference.fasta.gz");
213 |     parser->parse(polished_sequences, -1);
214 |     EXPECT_EQ(polished_sequences.size(), 2);
215 | 
216 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
217 |         polished_sequences[1]->data()), 1321);
218 | }
219 | 
220 | TEST_F(RaconPolishingTest, FragmentCorrectionWithQualities) {
221 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
222 |         "sample_ava_overlaps.paf.gz", racon_test_data_path + "sample_reads.fastq.gz",
223 |         racon::PolisherType::kC, 500, 10, 0.3, 1, -1, -1);
224 | 
225 |     initialize();
226 | 
227 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
228 |     polish(polished_sequences, true);
229 |     EXPECT_EQ(polished_sequences.size(), 39);
230 | 
231 |     uint32_t total_length = 0;
232 |     for (const auto& it: polished_sequences) {
233 |         total_length += it->data().size();
234 |     }
235 |     EXPECT_EQ(total_length, 389394);
236 | }
237 | 
238 | TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFull) {
239 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
240 |         "sample_ava_overlaps.paf.gz", racon_test_data_path + "sample_reads.fastq.gz",
241 |         racon::PolisherType::kF, 500, 10, 0.3, 1, -1, -1);
242 | 
243 |     initialize();
244 | 
245 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
246 |     polish(polished_sequences, false);
247 |     EXPECT_EQ(polished_sequences.size(), 236);
248 | 
249 |     uint32_t total_length = 0;
250 |     for (const auto& it: polished_sequences) {
251 |         total_length += it->data().size();
252 |     }
253 |     EXPECT_EQ(total_length, 1658216);
254 | }
255 | 
256 | TEST_F(RaconPolishingTest, FragmentCorrectionWithoutQualitiesFull) {
257 |     SetUp(racon_test_data_path + "sample_reads.fasta.gz", racon_test_data_path +
258 |         "sample_ava_overlaps.paf.gz", racon_test_data_path + "sample_reads.fasta.gz",
259 |         racon::PolisherType::kF, 500, 10, 0.3, 1, -1, -1);
260 | 
261 |     initialize();
262 | 
263 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
264 |     polish(polished_sequences, false);
265 |     EXPECT_EQ(polished_sequences.size(), 236);
266 | 
267 |     uint32_t total_length = 0;
268 |     for (const auto& it: polished_sequences) {
269 |         total_length += it->data().size();
270 |     }
271 |     EXPECT_EQ(total_length, 1663982);
272 | }
273 | 
274 | TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullMhap) {
275 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
276 |         "sample_ava_overlaps.mhap.gz", racon_test_data_path + "sample_reads.fastq.gz",
277 |         racon::PolisherType::kF, 500, 10, 0.3, 1, -1, -1);
278 | 
279 |     initialize();
280 | 
281 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
282 |     polish(polished_sequences, false);
283 |     EXPECT_EQ(polished_sequences.size(), 236);
284 | 
285 |     uint32_t total_length = 0;
286 |     for (const auto& it: polished_sequences) {
287 |         total_length += it->data().size();
288 |     }
289 |     EXPECT_EQ(total_length, 1658216);
290 | }
291 | 
292 | #ifdef CUDA_ENABLED
293 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesCUDA) {
294 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
295 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
296 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8, 1);
297 | 
298 |     initialize();
299 | 
300 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
301 |     polish(polished_sequences, true);
302 |     EXPECT_EQ(polished_sequences.size(), 1);
303 | 
304 |     polished_sequences[0]->create_reverse_complement();
305 | 
306 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
307 |         racon_test_data_path + "sample_reference.fasta.gz");
308 |     parser->parse(polished_sequences, -1);
309 |     EXPECT_EQ(polished_sequences.size(), 2);
310 | 
311 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
312 |         polished_sequences[1]->data()), 1385); // CPU 1312
313 | }
314 | 
315 | TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesCUDA) {
316 |     SetUp(racon_test_data_path + "sample_reads.fasta.gz", racon_test_data_path +
317 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
318 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8, 1);
319 | 
320 |     initialize();
321 | 
322 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
323 |     polish(polished_sequences, true);
324 |     EXPECT_EQ(polished_sequences.size(), 1);
325 | 
326 |     polished_sequences[0]->create_reverse_complement();
327 | 
328 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
329 |         racon_test_data_path + "sample_reference.fasta.gz");
330 |     parser->parse(polished_sequences, -1);
331 |     EXPECT_EQ(polished_sequences.size(), 2);
332 | 
333 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
334 |         polished_sequences[1]->data()), 1607); // CPU 1566
335 | }
336 | 
337 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesAndAlignmentsCUDA) {
338 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
339 |         "sample_overlaps.sam.gz", racon_test_data_path + "sample_layout.fasta.gz",
340 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8, 1);
341 | 
342 |     initialize();
343 | 
344 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
345 |     polish(polished_sequences, true);
346 |     EXPECT_EQ(polished_sequences.size(), 1);
347 | 
348 |     polished_sequences[0]->create_reverse_complement();
349 | 
350 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
351 |         racon_test_data_path + "sample_reference.fasta.gz");
352 |     parser->parse(polished_sequences, -1);
353 |     EXPECT_EQ(polished_sequences.size(), 2);
354 | 
355 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
356 |         polished_sequences[1]->data()), 1541); // CPU 1317
357 | }
358 | 
359 | TEST_F(RaconPolishingTest, ConsensusWithoutQualitiesAndWithAlignmentsCUDA) {
360 |     SetUp(racon_test_data_path + "sample_reads.fasta.gz", racon_test_data_path +
361 |         "sample_overlaps.sam.gz", racon_test_data_path + "sample_layout.fasta.gz",
362 |         racon::PolisherType::kC, 500, 10, 0.3, 5, -4, -8, 1);
363 | 
364 |     initialize();
365 | 
366 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
367 |     polish(polished_sequences, true);
368 |     EXPECT_EQ(polished_sequences.size(), 1);
369 | 
370 |     polished_sequences[0]->create_reverse_complement();
371 | 
372 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
373 |         racon_test_data_path + "sample_reference.fasta.gz");
374 |     parser->parse(polished_sequences, -1);
375 |     EXPECT_EQ(polished_sequences.size(), 2);
376 | 
377 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
378 |         polished_sequences[1]->data()), 1661); // CPU 1770
379 | }
380 | 
381 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesLargerWindowCUDA) {
382 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
383 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
384 |         racon::PolisherType::kC, 1000, 10, 0.3, 5, -4, -8, 1);
385 | 
386 |     initialize();
387 | 
388 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
389 |     polish(polished_sequences, true);
390 |     EXPECT_EQ(polished_sequences.size(), 1);
391 | 
392 |     polished_sequences[0]->create_reverse_complement();
393 | 
394 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
395 |         racon_test_data_path + "sample_reference.fasta.gz");
396 |     parser->parse(polished_sequences, -1);
397 |     EXPECT_EQ(polished_sequences.size(), 2);
398 | 
399 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
400 |         polished_sequences[1]->data()), 4168); // CPU 1289
401 | }
402 | 
403 | TEST_F(RaconPolishingTest, ConsensusWithQualitiesEditDistanceCUDA) {
404 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
405 |         "sample_overlaps.paf.gz", racon_test_data_path + "sample_layout.fasta.gz",
406 |         racon::PolisherType::kC, 500, 10, 0.3, 1, -1, -1, 1);
407 | 
408 |     initialize();
409 | 
410 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
411 |     polish(polished_sequences, true);
412 |     EXPECT_EQ(polished_sequences.size(), 1);
413 | 
414 |     polished_sequences[0]->create_reverse_complement();
415 | 
416 |     auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
417 |         racon_test_data_path + "sample_reference.fasta.gz");
418 |     parser->parse(polished_sequences, -1);
419 |     EXPECT_EQ(polished_sequences.size(), 2);
420 | 
421 |     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
422 |         polished_sequences[1]->data()), 1361); // CPU 1321
423 | }
424 | 
425 | TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesCUDA) {
426 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
427 |         "sample_ava_overlaps.paf.gz", racon_test_data_path + "sample_reads.fastq.gz",
428 |         racon::PolisherType::kC, 500, 10, 0.3, 1, -1, -1, 1);
429 | 
430 |     initialize();
431 | 
432 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
433 |     polish(polished_sequences, true);
434 |     EXPECT_EQ(polished_sequences.size(), 39);
435 | 
436 |     uint32_t total_length = 0;
437 |     for (const auto& it: polished_sequences) {
438 |         total_length += it->data().size();
439 |     }
440 |     EXPECT_EQ(total_length, 385543); // CPU 389394
441 | }
442 | 
443 | TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullCUDA) {
444 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
445 |         "sample_ava_overlaps.paf.gz", racon_test_data_path + "sample_reads.fastq.gz",
446 |         racon::PolisherType::kF, 500, 10, 0.3, 1, -1, -1, 1);
447 | 
448 |     initialize();
449 | 
450 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
451 |     polish(polished_sequences, false);
452 |     EXPECT_EQ(polished_sequences.size(), 236);
453 | 
454 |     uint32_t total_length = 0;
455 |     for (const auto& it: polished_sequences) {
456 |         total_length += it->data().size();
457 |     }
458 |     EXPECT_EQ(total_length, 1655505); // CPU 1658216
459 | }
460 | 
461 | TEST_F(RaconPolishingTest, FragmentCorrectionWithoutQualitiesFullCUDA) {
462 |     SetUp(racon_test_data_path + "sample_reads.fasta.gz", racon_test_data_path +
463 |         "sample_ava_overlaps.paf.gz", racon_test_data_path + "sample_reads.fasta.gz",
464 |         racon::PolisherType::kF, 500, 10, 0.3, 1, -1, -1, 1);
465 | 
466 |     initialize();
467 | 
468 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
469 |     polish(polished_sequences, false);
470 |     EXPECT_EQ(polished_sequences.size(), 236);
471 | 
472 |     uint32_t total_length = 0;
473 |     for (const auto& it: polished_sequences) {
474 |         total_length += it->data().size();
475 |     }
476 |     EXPECT_EQ(total_length, 1663732); // CPU 1663982
477 | }
478 | 
479 | TEST_F(RaconPolishingTest, FragmentCorrectionWithQualitiesFullMhapCUDA) {
480 |     SetUp(racon_test_data_path + "sample_reads.fastq.gz", racon_test_data_path +
481 |         "sample_ava_overlaps.mhap.gz", racon_test_data_path + "sample_reads.fastq.gz",
482 |         racon::PolisherType::kF, 500, 10, 0.3, 1, -1, -1, 1);
483 | 
484 |     initialize();
485 | 
486 |     std::vector<std::unique_ptr<racon::Sequence>> polished_sequences;
487 |     polish(polished_sequences, false);
488 |     EXPECT_EQ(polished_sequences.size(), 236);
489 | 
490 |     uint32_t total_length = 0;
491 |     for (const auto& it: polished_sequences) {
492 |         total_length += it->data().size();
493 |     }
494 |     EXPECT_EQ(total_length, 1655505); // CPU 1658216
495 | }
496 | #endif
497 | 


--------------------------------------------------------------------------------
/src/polisher.cpp:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * @file polisher.cpp
  3 |  *
  4 |  * @brief Polisher class source file
  5 |  */
  6 | 
  7 | #include <algorithm>
  8 | #include <unordered_set>
  9 | #include <iostream>
 10 | 
 11 | #include "overlap.hpp"
 12 | #include "sequence.hpp"
 13 | #include "window.hpp"
 14 | #include "logger.hpp"
 15 | #include "polisher.hpp"
 16 | #ifdef CUDA_ENABLED
 17 | #include "cuda/cudapolisher.hpp"
 18 | #endif
 19 | 
 20 | #include "bioparser/bioparser.hpp"
 21 | #include "thread_pool/thread_pool.hpp"
 22 | #include "spoa/spoa.hpp"
 23 | 
 24 | namespace racon {
 25 | 
 26 | constexpr uint32_t kChunkSize = 1024 * 1024 * 1024; // ~ 1GB
 27 | 
 28 | template<class T>
 29 | uint64_t shrinkToFit(std::vector<std::unique_ptr<T>>& src, uint64_t begin) {
 30 | 
 31 |     uint64_t i = begin;
 32 |     for (uint64_t j = begin; i < src.size(); ++i) {
 33 |         if (src[i] != nullptr) {
 34 |             continue;
 35 |         }
 36 | 
 37 |         j = std::max(j, i);
 38 |         while (j < src.size() && src[j] == nullptr) {
 39 |             ++j;
 40 |         }
 41 | 
 42 |         if (j >= src.size()) {
 43 |             break;
 44 |         } else if (i != j) {
 45 |             src[i].swap(src[j]);
 46 |         }
 47 |     }
 48 |     uint64_t num_deletions = src.size() - i;
 49 |     if (i < src.size()) {
 50 |         src.resize(i);
 51 |     }
 52 |     return num_deletions;
 53 | }
 54 | 
 55 | std::unique_ptr<Polisher> createPolisher(const std::string& sequences_path,
 56 |     const std::string& overlaps_path, const std::string& target_path,
 57 |     PolisherType type, uint32_t window_length, double quality_threshold,
 58 |     double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
 59 |     uint32_t num_threads, uint32_t cudapoa_batches, bool cuda_banded_alignment,
 60 |     uint32_t cudaaligner_batches, uint32_t cudaaligner_band_width) {
 61 | 
 62 |     if (type != PolisherType::kC && type != PolisherType::kF) {
 63 |         fprintf(stderr, "[racon::createPolisher] error: invalid polisher type!\n");
 64 |         exit(1);
 65 |     }
 66 | 
 67 |     if (window_length == 0) {
 68 |         fprintf(stderr, "[racon::createPolisher] error: invalid window length!\n");
 69 |         exit(1);
 70 |     }
 71 | 
 72 |     std::unique_ptr<bioparser::Parser<Sequence>> sparser = nullptr,
 73 |         tparser = nullptr;
 74 |     std::unique_ptr<bioparser::Parser<Overlap>> oparser = nullptr;
 75 | 
 76 |     auto is_suffix = [](const std::string& src, const std::string& suffix) -> bool {
 77 |         if (src.size() < suffix.size()) {
 78 |             return false;
 79 |         }
 80 |         return src.compare(src.size() - suffix.size(), suffix.size(), suffix) == 0;
 81 |     };
 82 | 
 83 |     if (is_suffix(sequences_path, ".fasta") || is_suffix(sequences_path, ".fasta.gz") ||
 84 |         is_suffix(sequences_path, ".fna") || is_suffix(sequences_path, ".fna.gz") ||
 85 |         is_suffix(sequences_path, ".fa") || is_suffix(sequences_path, ".fa.gz")) {
 86 |         sparser = bioparser::createParser<bioparser::FastaParser, Sequence>(
 87 |             sequences_path);
 88 |     } else if (is_suffix(sequences_path, ".fastq") || is_suffix(sequences_path, ".fastq.gz") ||
 89 |         is_suffix(sequences_path, ".fq") || is_suffix(sequences_path, ".fq.gz")) {
 90 |         sparser = bioparser::createParser<bioparser::FastqParser, Sequence>(
 91 |             sequences_path);
 92 |     } else {
 93 |         fprintf(stderr, "[racon::createPolisher] error: "
 94 |             "file %s has unsupported format extension (valid extensions: "
 95 |             ".fasta, .fasta.gz, .fna, .fna.gz, .fa, .fa.gz, .fastq, .fastq.gz, "
 96 |             ".fq, .fq.gz)!\n",
 97 |             sequences_path.c_str());
 98 |         exit(1);
 99 |     }
100 | 
101 |     if (is_suffix(overlaps_path, ".mhap") || is_suffix(overlaps_path, ".mhap.gz")) {
102 |         oparser = bioparser::createParser<bioparser::MhapParser, Overlap>(
103 |             overlaps_path);
104 |     } else if (is_suffix(overlaps_path, ".paf") || is_suffix(overlaps_path, ".paf.gz")) {
105 |         oparser = bioparser::createParser<bioparser::PafParser, Overlap>(
106 |             overlaps_path);
107 |     } else if (is_suffix(overlaps_path, ".sam") || is_suffix(overlaps_path, ".sam.gz")) {
108 |         oparser = bioparser::createParser<bioparser::SamParser, Overlap>(
109 |             overlaps_path);
110 |     } else {
111 |         fprintf(stderr, "[racon::createPolisher] error: "
112 |             "file %s has unsupported format extension (valid extensions: "
113 |             ".mhap, .mhap.gz, .paf, .paf.gz, .sam, .sam.gz)!\n", overlaps_path.c_str());
114 |         exit(1);
115 |     }
116 | 
117 |     if (is_suffix(target_path, ".fasta") || is_suffix(target_path, ".fasta.gz") ||
118 |         is_suffix(target_path, ".fna") || is_suffix(target_path, ".fna.gz") ||
119 |         is_suffix(target_path, ".fa") || is_suffix(target_path, ".fa.gz")) {
120 |         tparser = bioparser::createParser<bioparser::FastaParser, Sequence>(
121 |             target_path);
122 |     } else if (is_suffix(target_path, ".fastq") || is_suffix(target_path, ".fastq.gz") ||
123 |         is_suffix(target_path, ".fq") || is_suffix(target_path, ".fq.gz")) {
124 |         tparser = bioparser::createParser<bioparser::FastqParser, Sequence>(
125 |             target_path);
126 |     } else {
127 |         fprintf(stderr, "[racon::createPolisher] error: "
128 |             "file %s has unsupported format extension (valid extensions: "
129 |             ".fasta, .fasta.gz, .fna, .fna.gz, .fa, .fa.gz, .fastq, .fastq.gz, "
130 |             ".fq, .fq.gz)!\n",
131 |             target_path.c_str());
132 |         exit(1);
133 |     }
134 | 
135 |     if (cudapoa_batches > 0 || cudaaligner_batches > 0)
136 |     {
137 | #ifdef CUDA_ENABLED
138 |         // If CUDA is enabled, return an instance of the CUDAPolisher object.
139 |         return std::unique_ptr<Polisher>(new CUDAPolisher(std::move(sparser),
140 |                     std::move(oparser), std::move(tparser), type, window_length,
141 |                     quality_threshold, error_threshold, trim, match, mismatch, gap,
142 |                     num_threads, cudapoa_batches, cuda_banded_alignment, cudaaligner_batches,
143 |                     cudaaligner_band_width));
144 | #else
145 |         fprintf(stderr, "[racon::createPolisher] error: "
146 |                 "Attemping to use CUDA when CUDA support is not available.\n"
147 |                 "Please check logic in %s:%s\n",
148 |                 __FILE__, __func__);
149 |         exit(1);
150 | #endif
151 |     }
152 |     else
153 |     {
154 |         (void) cuda_banded_alignment;
155 |         return std::unique_ptr<Polisher>(new Polisher(std::move(sparser),
156 |                     std::move(oparser), std::move(tparser), type, window_length,
157 |                     quality_threshold, error_threshold, trim, match, mismatch, gap,
158 |                     num_threads));
159 |     }
160 | }
161 | 
162 | Polisher::Polisher(std::unique_ptr<bioparser::Parser<Sequence>> sparser,
163 |     std::unique_ptr<bioparser::Parser<Overlap>> oparser,
164 |     std::unique_ptr<bioparser::Parser<Sequence>> tparser,
165 |     PolisherType type, uint32_t window_length, double quality_threshold,
166 |     double error_threshold, bool trim, int8_t match, int8_t mismatch, int8_t gap,
167 |     uint32_t num_threads)
168 |         : sparser_(std::move(sparser)), oparser_(std::move(oparser)),
169 |         tparser_(std::move(tparser)), type_(type), quality_threshold_(
170 |         quality_threshold), error_threshold_(error_threshold), trim_(trim),
171 |         alignment_engines_(), sequences_(), dummy_quality_(window_length, '!'),
172 |         window_length_(window_length), windows_(),
173 |         thread_pool_(thread_pool::createThreadPool(num_threads)),
174 |         thread_to_id_(), logger_(new Logger()) {
175 | 
176 |     uint32_t id = 0;
177 |     for (const auto& it: thread_pool_->thread_identifiers()) {
178 |         thread_to_id_[it] = id++;
179 |     }
180 | 
181 |     for (uint32_t i = 0; i < num_threads; ++i) {
182 |         alignment_engines_.emplace_back(spoa::createAlignmentEngine(
183 |             spoa::AlignmentType::kNW, match, mismatch, gap));
184 |         alignment_engines_.back()->prealloc(window_length_, 5);
185 |     }
186 | }
187 | 
188 | Polisher::~Polisher() {
189 |     logger_->total("[racon::Polisher::] total =");
190 | }
191 | 
192 | void Polisher::initialize() {
193 | 
194 |     if (!windows_.empty()) {
195 |         fprintf(stderr, "[racon::Polisher::initialize] warning: "
196 |             "object already initialized!\n");
197 |         return;
198 |     }
199 | 
200 |     logger_->log();
201 | 
202 |     tparser_->reset();
203 |     tparser_->parse(sequences_, -1);
204 | 
205 |     uint64_t targets_size = sequences_.size();
206 |     if (targets_size == 0) {
207 |         fprintf(stderr, "[racon::Polisher::initialize] error: "
208 |             "empty target sequences set!\n");
209 |         exit(1);
210 |     }
211 | 
212 |     std::unordered_map<std::string, uint64_t> name_to_id;
213 |     std::unordered_map<uint64_t, uint64_t> id_to_id;
214 |     for (uint64_t i = 0; i < targets_size; ++i) {
215 |         name_to_id[sequences_[i]->name() + "t"] = i;
216 |         id_to_id[i << 1 | 1] = i;
217 |     }
218 | 
219 |     std::vector<bool> has_name(targets_size, true);
220 |     std::vector<bool> has_data(targets_size, true);
221 |     std::vector<bool> has_reverse_data(targets_size, false);
222 | 
223 |     logger_->log("[racon::Polisher::initialize] loaded target sequences");
224 |     logger_->log();
225 | 
226 |     uint64_t sequences_size = 0, total_sequences_length = 0;
227 | 
228 |     sparser_->reset();
229 |     while (true) {
230 |         uint64_t l = sequences_.size();
231 |         auto status = sparser_->parse(sequences_, kChunkSize);
232 | 
233 |         uint64_t n = 0;
234 |         for (uint64_t i = l; i < sequences_.size(); ++i, ++sequences_size) {
235 |             total_sequences_length += sequences_[i]->data().size();
236 | 
237 |             auto it = name_to_id.find(sequences_[i]->name() + "t");
238 |             if (it != name_to_id.end()) {
239 |                 if (sequences_[i]->data().size() != sequences_[it->second]->data().size() ||
240 |                     sequences_[i]->quality().size() != sequences_[it->second]->quality().size()) {
241 | 
242 |                     fprintf(stderr, "[racon::Polisher::initialize] error: "
243 |                         "duplicate sequence %s with unequal data\n",
244 |                         sequences_[i]->name().c_str());
245 |                     exit(1);
246 |                 }
247 | 
248 |                 name_to_id[sequences_[i]->name() + "q"] = it->second;
249 |                 id_to_id[sequences_size << 1 | 0] = it->second;
250 | 
251 |                 sequences_[i].reset();
252 |                 ++n;
253 |             } else {
254 |                 name_to_id[sequences_[i]->name() + "q"] = i - n;
255 |                 id_to_id[sequences_size << 1 | 0] = i - n;
256 |             }
257 |         }
258 | 
259 |         shrinkToFit(sequences_, l);
260 | 
261 |         if (!status) {
262 |             break;
263 |         }
264 |     }
265 | 
266 |     if (sequences_size == 0) {
267 |         fprintf(stderr, "[racon::Polisher::initialize] error: "
268 |             "empty sequences set!\n");
269 |         exit(1);
270 |     }
271 | 
272 |     has_name.resize(sequences_.size(), false);
273 |     has_data.resize(sequences_.size(), false);
274 |     has_reverse_data.resize(sequences_.size(), false);
275 | 
276 |     WindowType window_type = static_cast<double>(total_sequences_length) /
277 |         sequences_size <= 1000 ? WindowType::kNGS : WindowType::kTGS;
278 | 
279 |     logger_->log("[racon::Polisher::initialize] loaded sequences");
280 |     logger_->log();
281 | 
282 |     std::vector<std::unique_ptr<Overlap>> overlaps;
283 | 
284 |     auto remove_invalid_overlaps = [&](uint64_t begin, uint64_t end) -> void {
285 |         for (uint64_t i = begin; i < end; ++i) {
286 |             if (overlaps[i] == nullptr) {
287 |                 continue;
288 |             }
289 |             if (overlaps[i]->error() > error_threshold_ ||
290 |                 overlaps[i]->q_id() == overlaps[i]->t_id()) {
291 |                 overlaps[i].reset();
292 |                 continue;
293 |             }
294 |             if (type_ == PolisherType::kC) {
295 |                 for (uint64_t j = i + 1; j < end; ++j) {
296 |                     if (overlaps[j] == nullptr) {
297 |                         continue;
298 |                     }
299 |                     if (overlaps[i]->length() > overlaps[j]->length()) {
300 |                         overlaps[j].reset();
301 |                     } else {
302 |                         overlaps[i].reset();
303 |                         break;
304 |                     }
305 |                 }
306 |             }
307 |         }
308 |     };
309 | 
310 |     oparser_->reset();
311 |     uint64_t l = 0;
312 |     while (true) {
313 |         auto status = oparser_->parse(overlaps, kChunkSize);
314 | 
315 |         uint64_t c = l;
316 |         for (uint64_t i = l; i < overlaps.size(); ++i) {
317 |             overlaps[i]->transmute(sequences_, name_to_id, id_to_id);
318 | 
319 |             if (!overlaps[i]->is_valid()) {
320 |                 overlaps[i].reset();
321 |                 continue;
322 |             }
323 | 
324 |             while (overlaps[c] == nullptr) {
325 |                 ++c;
326 |             }
327 |             if (overlaps[c]->q_id() != overlaps[i]->q_id()) {
328 |                 remove_invalid_overlaps(c, i);
329 |                 c = i;
330 |             }
331 |         }
332 |         if (!status) {
333 |             remove_invalid_overlaps(c, overlaps.size());
334 |             c = overlaps.size();
335 |         }
336 | 
337 |         for (uint64_t i = l; i < c; ++i) {
338 |             if (overlaps[i] == nullptr) {
339 |                 continue;
340 |             }
341 | 
342 |             if (overlaps[i]->strand()) {
343 |                 has_reverse_data[overlaps[i]->q_id()] = true;
344 |             } else {
345 |                 has_data[overlaps[i]->q_id()] = true;
346 |             }
347 |         }
348 | 
349 |         uint64_t n = shrinkToFit(overlaps, l);
350 |         l = c - n;
351 | 
352 |         if (!status) {
353 |             break;
354 |         }
355 |     }
356 | 
357 |     std::unordered_map<std::string, uint64_t>().swap(name_to_id);
358 |     std::unordered_map<uint64_t, uint64_t>().swap(id_to_id);
359 | 
360 |     if (overlaps.empty()) {
361 |         fprintf(stderr, "[racon::Polisher::initialize] error: "
362 |             "empty overlap set!\n");
363 |         exit(1);
364 |     }
365 | 
366 |     logger_->log("[racon::Polisher::initialize] loaded overlaps");
367 |     logger_->log();
368 | 
369 |     std::vector<std::future<void>> thread_futures;
370 |     for (uint64_t i = 0; i < sequences_.size(); ++i) {
371 |         thread_futures.emplace_back(thread_pool_->submit(
372 |             [&](uint64_t j) -> void {
373 |                 sequences_[j]->transmute(has_name[j], has_data[j], has_reverse_data[j]);
374 |             }, i));
375 |     }
376 |     for (const auto& it: thread_futures) {
377 |         it.wait();
378 |     }
379 | 
380 |     find_overlap_breaking_points(overlaps);
381 | 
382 |     logger_->log();
383 | 
384 |     std::vector<uint64_t> id_to_first_window_id(targets_size + 1, 0);
385 |     for (uint64_t i = 0; i < targets_size; ++i) {
386 |         uint32_t k = 0;
387 |         for (uint32_t j = 0; j < sequences_[i]->data().size(); j += window_length_, ++k) {
388 | 
389 |             uint32_t length = std::min(j + window_length_,
390 |                 static_cast<uint32_t>(sequences_[i]->data().size())) - j;
391 | 
392 |             windows_.emplace_back(createWindow(i, k, window_type,
393 |                 &(sequences_[i]->data()[j]), length,
394 |                 sequences_[i]->quality().empty() ? &(dummy_quality_[0]) :
395 |                 &(sequences_[i]->quality()[j]), length));
396 |         }
397 | 
398 |         id_to_first_window_id[i + 1] = id_to_first_window_id[i] + k;
399 |     }
400 | 
401 |     targets_coverages_.resize(targets_size, 0);
402 | 
403 |     for (uint64_t i = 0; i < overlaps.size(); ++i) {
404 | 
405 |         ++targets_coverages_[overlaps[i]->t_id()];
406 | 
407 |         const auto& sequence = sequences_[overlaps[i]->q_id()];
408 |         const auto& breaking_points = overlaps[i]->breaking_points();
409 | 
410 |         for (uint32_t j = 0; j < breaking_points.size(); j += 2) {
411 |             if (breaking_points[j + 1].second - breaking_points[j].second < 0.02 * window_length_) {
412 |                 continue;
413 |             }
414 | 
415 |             if (!sequence->quality().empty() ||
416 |                 !sequence->reverse_quality().empty()) {
417 | 
418 |                 const auto& quality = overlaps[i]->strand() ?
419 |                     sequence->reverse_quality() : sequence->quality();
420 |                 double average_quality = 0;
421 |                 for (uint32_t k = breaking_points[j].second; k < breaking_points[j + 1].second; ++k) {
422 |                     average_quality += static_cast<uint32_t>(quality[k]) - 33;
423 |                 }
424 |                 average_quality /= breaking_points[j + 1].second - breaking_points[j].second;
425 | 
426 |                 if (average_quality < quality_threshold_) {
427 |                     continue;
428 |                 }
429 |             }
430 | 
431 |             uint64_t window_id = id_to_first_window_id[overlaps[i]->t_id()] +
432 |                 breaking_points[j].first / window_length_;
433 |             uint32_t window_start = (breaking_points[j].first / window_length_) *
434 |                 window_length_;
435 | 
436 |             const char* data = overlaps[i]->strand() ?
437 |                 &(sequence->reverse_complement()[breaking_points[j].second]) :
438 |                 &(sequence->data()[breaking_points[j].second]);
439 |             uint32_t data_length = breaking_points[j + 1].second -
440 |                 breaking_points[j].second;
441 | 
442 |             const char* quality = overlaps[i]->strand() ?
443 |                 (sequence->reverse_quality().empty() ?
444 |                     nullptr : &(sequence->reverse_quality()[breaking_points[j].second]))
445 |                 :
446 |                 (sequence->quality().empty() ?
447 |                     nullptr : &(sequence->quality()[breaking_points[j].second]));
448 |             uint32_t quality_length = quality == nullptr ? 0 : data_length;
449 | 
450 |             windows_[window_id]->add_layer(data, data_length,
451 |                 quality, quality_length,
452 |                 breaking_points[j].first - window_start,
453 |                 breaking_points[j + 1].first - window_start - 1);
454 |         }
455 | 
456 |         overlaps[i].reset();
457 |     }
458 | 
459 |     logger_->log("[racon::Polisher::initialize] transformed data into windows");
460 | }
461 | 
462 | void Polisher::find_overlap_breaking_points(std::vector<std::unique_ptr<Overlap>>& overlaps)
463 | {
464 |     std::vector<std::future<void>> thread_futures;
465 |     for (uint64_t i = 0; i < overlaps.size(); ++i) {
466 |         thread_futures.emplace_back(thread_pool_->submit(
467 |             [&](uint64_t j) -> void {
468 |                 overlaps[j]->find_breaking_points(sequences_, window_length_);
469 |             }, i));
470 |     }
471 | 
472 |     uint32_t logger_step = thread_futures.size() / 20;
473 |     for (uint64_t i = 0; i < thread_futures.size(); ++i) {
474 |         thread_futures[i].wait();
475 |         if (logger_step != 0 && (i + 1) % logger_step == 0 && (i + 1) / logger_step < 20) {
476 |             logger_->bar("[racon::Polisher::initialize] aligning overlaps");
477 |         }
478 |     }
479 |     if (logger_step != 0) {
480 |         logger_->bar("[racon::Polisher::initialize] aligning overlaps");
481 |     } else {
482 |         logger_->log("[racon::Polisher::initialize] aligned overlaps");
483 |     }
484 | }
485 | 
486 | void Polisher::polish(std::vector<std::unique_ptr<Sequence>>& dst,
487 |     bool drop_unpolished_sequences) {
488 | 
489 |     logger_->log();
490 | 
491 |     std::vector<std::future<bool>> thread_futures;
492 |     for (uint64_t i = 0; i < windows_.size(); ++i) {
493 |         thread_futures.emplace_back(thread_pool_->submit(
494 |             [&](uint64_t j) -> bool {
495 |                 auto it = thread_to_id_.find(std::this_thread::get_id());
496 |                 if (it == thread_to_id_.end()) {
497 |                     fprintf(stderr, "[racon::Polisher::polish] error: "
498 |                         "thread identifier not present!\n");
499 |                     exit(1);
500 |                 }
501 |                 return windows_[j]->generate_consensus(
502 |                     alignment_engines_[it->second], trim_);
503 |             }, i));
504 |     }
505 | 
506 |     std::string polished_data = "";
507 |     uint32_t num_polished_windows = 0;
508 | 
509 |     uint64_t logger_step = thread_futures.size() / 20;
510 | 
511 |     for (uint64_t i = 0; i < thread_futures.size(); ++i) {
512 |         thread_futures[i].wait();
513 | 
514 |         num_polished_windows += thread_futures[i].get() == true ? 1 : 0;
515 |         polished_data += windows_[i]->consensus();
516 | 
517 |         if (i == windows_.size() - 1 || windows_[i + 1]->rank() == 0) {
518 |             double polished_ratio = num_polished_windows /
519 |                 static_cast<double>(windows_[i]->rank() + 1);
520 | 
521 |             if (!drop_unpolished_sequences || polished_ratio > 0) {
522 |                 std::string tags = type_ == PolisherType::kF ? "r" : "";
523 |                 tags += " LN:i:" + std::to_string(polished_data.size());
524 |                 tags += " RC:i:" + std::to_string(targets_coverages_[windows_[i]->id()]);
525 |                 tags += " XC:f:" + std::to_string(polished_ratio);
526 |                 dst.emplace_back(createSequence(sequences_[windows_[i]->id()]->name() +
527 |                     tags, polished_data));
528 |             }
529 | 
530 |             num_polished_windows = 0;
531 |             polished_data.clear();
532 |         }
533 |         windows_[i].reset();
534 | 
535 |         if (logger_step != 0 && (i + 1) % logger_step == 0 && (i + 1) / logger_step < 20) {
536 |             logger_->bar("[racon::Polisher::polish] generating consensus");
537 |         }
538 |     }
539 | 
540 |     if (logger_step != 0) {
541 |         logger_->bar("[racon::Polisher::polish] generating consensus");
542 |     } else {
543 |         logger_->log("[racon::Polisher::polish] generated consensus");
544 |     }
545 | 
546 |     std::vector<std::shared_ptr<Window>>().swap(windows_);
547 |     std::vector<std::unique_ptr<Sequence>>().swap(sequences_);
548 | }
549 | 
550 | }
551 | 


--------------------------------------------------------------------------------