├── .clang-format
├── .clang-tidy
├── .github
    └── workflows
    │   ├── ci_linux.yaml
    │   └── clang-format.yaml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── README.md
├── benchmark
    └── run_benchmark.sh
├── doc
    └── Doxyfile.in
├── include
    ├── popvcf
    └── popvcf.hpp
├── src
    ├── CMakeLists.txt
    ├── decode.cpp
    ├── decode.hpp
    ├── encode.cpp
    ├── encode.hpp
    ├── in.constants.hpp
    ├── io.hpp
    ├── main.cpp
    ├── sequence_utils.cpp
    └── sequence_utils.hpp
└── test
    └── create_test_data.sh


/.clang-format:
--------------------------------------------------------------------------------
 1 | # run on whole project with:
 2 | #  find src/ test/ -name "*.hpp" -o -name "*.cpp" | xargs clang-format -i --style=file
 3 | ---
 4 | Language:        Cpp
 5 | BasedOnStyle:  LLVM
 6 | AlignOperands:   AlignAfterOperator
 7 | AllowAllArgumentsOnNextLine: false
 8 | AllowShortEnumsOnASingleLine: false
 9 | AllowShortFunctionsOnASingleLine: None
10 | AlwaysBreakAfterDefinitionReturnType: None
11 | AlwaysBreakAfterReturnType: None
12 | AlwaysBreakBeforeMultilineStrings: true
13 | AlwaysBreakTemplateDeclarations: Yes
14 | BinPackArguments: false
15 | BinPackParameters: false
16 | BreakBeforeBraces: Allman
17 | BraceWrapping:
18 |   AfterCaseLabel:  true
19 |   AfterClass:      true
20 |   AfterControlStatement: Always
21 |   AfterEnum:       true
22 |   AfterFunction:   true
23 |   AfterNamespace:  true
24 |   AfterObjCDeclaration: true
25 |   AfterStruct:     true
26 |   AfterUnion:      true
27 |   AfterExternBlock: true
28 |   BeforeCatch:     true
29 |   BeforeElse:      true
30 |   BeforeLambdaBody: true
31 |   BeforeWhile:     true
32 |   IndentBraces:    false
33 |   SplitEmptyFunction: false
34 |   SplitEmptyRecord: false
35 |   SplitEmptyNamespace: false
36 | BreakBeforeInheritanceComma: true
37 | BreakConstructorInitializers: AfterColon
38 | BreakInheritanceList: AfterColon
39 | ColumnLimit:     120
40 | CommentPragmas:  \/\/!
41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
42 | ConstructorInitializerIndentWidth: 2
43 | ContinuationIndentWidth: 2
44 | DeriveLineEnding: false
45 | IncludeBlocks: Regroup
46 | IncludeCategories:
47 |   - Regex:           '^<(sys/)?[a-z_]+(\.h)?>$'
48 |     Priority:        1
49 |   - Regex:           '^<parallel_hashmap/'
50 |     Priority:        4
51 |   - Regex:           '^<paw/'
52 |     Priority:        6
53 |   - Regex:           '^<weaver/'
54 |     Priority:        10
55 |   - Regex:           '^"[a-z_]+\.hpp"$'
56 |     Priority:        12
57 |   - Regex:           '.*'
58 |     Priority:        100
59 | IndentExternBlock: false
60 | IndentGotoLabels: false
61 | IndentPPDirectives: AfterHash
62 | IndentWidth:     2
63 | KeepEmptyLinesAtTheStartOfBlocks: false
64 | MaxEmptyLinesToKeep: 1
65 | PenaltyReturnTypeOnItsOwnLine: 200
66 | PointerAlignment: Middle
67 | ReflowComments: true
68 | Standard: c++17
69 | TabWidth: 2
70 | 


--------------------------------------------------------------------------------
/.clang-tidy:
--------------------------------------------------------------------------------
 1 | ---
 2 | Checks: >
 3 |   -*,
 4 |   performance-*,
 5 |   bugprone-*,
 6 |     -bugprone-narrowing-conversions,
 7 | #  readability-*,
 8 | #    -readability-magic-numbers,
 9 | #    -readability-qualified-auto,
10 | #    -readability-braces-around-statements,
11 | #    -readability-uppercase-literal-suffix,
12 | #    -readability-avoid-const-params-in-decls,
13 | #    -readability-function-size,
14 | #    -readability-function-cognitive-complexity,
15 | #    -readability-container-size-empty
16 | #  clang-analyzer-*,
17 | #    -clang-analyzer-osx*,
18 | #  modernize-*,
19 | #    -modernize-use-trailing-return-type,
20 | 
21 | WarningsAsErrors: ''
22 | #HeaderFilterRegex: 'graphtyper/'
23 | AnalyzeTemporaryDtors: false
24 | FormatStyle:     none
25 | CheckOptions:
26 |  - { key: readability-identifier-naming.NamespaceCase, value: lower_case }
27 | ...
28 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_linux.yaml:
--------------------------------------------------------------------------------
  1 | name: CI on Linux
  2 | 
  3 | on:
  4 |   # Trigger workflow on pull requests of any branch
  5 |   pull_request:
  6 |   # Trigger workflow on pushes to following branches
  7 |   push:
  8 |     branches:
  9 |       - master
 10 |       - dev
 11 | 
 12 | env:
 13 |   TZ: Atlantic/Reykjavik
 14 | 
 15 | defaults:
 16 |   run:
 17 |     shell: bash -ex {0}
 18 | 
 19 | jobs:
 20 |   build:
 21 |     name: ${{ matrix.name }}
 22 |     runs-on: ubuntu-18.04
 23 |     timeout-minutes: 120
 24 |     strategy:
 25 |       fail-fast: false
 26 |       matrix:
 27 |         include:
 28 |           - name: "GCC8 Release"
 29 |             pkg: "g++-8"
 30 |             cxx: "g++-8"
 31 |             cc: "gcc-8"
 32 |             build_type: Release
 33 | 
 34 |           - name: "GCC9 Debug"
 35 |             pkg: "g++-9"
 36 |             cxx: "g++-9"
 37 |             cc: "gcc-9"
 38 |             build_type: Debug
 39 | 
 40 |           - name: "GCC10 Debug C++20"
 41 |             pkg: "g++-10"
 42 |             cxx: "g++-10"
 43 |             cc: "gcc-10"
 44 |             cxx_flags: "-std=c++20"
 45 |             build_type: Debug
 46 | 
 47 |           - name: "GCC11 Release C++20"
 48 |             pkg: "g++-11"
 49 |             cxx: "g++-11"
 50 |             cc: "gcc-11"
 51 |             cxx_flags: "-std=c++20"
 52 |             build_type: Release
 53 | 
 54 |           - name: "Clang10 Debug C++20"
 55 |             pkg: "clang-10"
 56 |             cxx: "clang++-10"
 57 |             cc: "clang-10"
 58 |             cxx_flags: "-std=c++20"
 59 |             build_type: Debug
 60 | 
 61 |           - name: "Clang12 Release C++20"
 62 |             pkg: "clang-12"
 63 |             cxx: "clang++-12"
 64 |             cc: "clang-12"
 65 |             cxx_flags: "-std=c++20"
 66 |             build_type: Release
 67 |     steps:
 68 |       - name: Checkout
 69 |         uses: actions/checkout@v2
 70 |         with:
 71 |           path: src
 72 |           fetch-depth: 2
 73 |           submodules: recursive
 74 | 
 75 |       - name: Add package source
 76 |         run: |
 77 |           echo 'APT::Acquire::Retries "5";' | sudo tee -a /etc/apt/apt.conf.d/80-retries > /dev/null
 78 |           sudo add-apt-repository --no-update --yes ppa:ubuntu-toolchain-r/ppa
 79 |           sudo add-apt-repository --no-update --yes ppa:ubuntu-toolchain-r/test
 80 |           wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
 81 |           sudo add-apt-repository --no-update --yes "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main"
 82 |           sudo add-apt-repository --no-update --yes "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main"
 83 |           sudo add-apt-repository --no-update --yes "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-12 main"
 84 |           sudo apt-get update
 85 | 
 86 |       - name: Install Build dependencies
 87 |         run: sudo apt-get install --yes cmake ccache
 88 | 
 89 |       - name: Install compiler ${{ matrix.name }}
 90 |         run: sudo apt-get install --yes ${{ matrix.pkg }}
 91 | 
 92 |       - name: Install zstd
 93 |         run: sudo apt-get install --yes zstd libzstd-dev
 94 | 
 95 |       - name: Load ccache
 96 |         uses: actions/cache@v2
 97 |         with:
 98 |           path: .ccache
 99 |           key: ${{ runner.os }}-${{ matrix.name }}-ccache-${{ github.ref }}-${{ github.run_number }}
100 |           # Restoring: From current branch, otherwise from base branch, otherwise from any branch.
101 |           restore-keys: |
102 |             ${{ runner.os }}-${{ matrix.name }}-ccache-${{ github.ref }}
103 |             ${{ runner.os }}-${{ matrix.name }}-ccache-${{ github.base_ref }}
104 |             ${{ runner.os }}-${{ matrix.name }}-ccache-
105 | 
106 |       - name: Tool versions
107 |         run: |
108 |           env cmake --version
109 |           env ${{ matrix.cxx }} --version
110 | 
111 |       - name: Configure tests
112 |         env:
113 |           CXX: ${{ matrix.cxx }}
114 |           CC: ${{ matrix.cc }}
115 |         run: |
116 |           mkdir build
117 |           cd build
118 |           cmake ../src -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_FLAGS="${{ matrix.cxx_flags }}"
119 | 
120 |       - name: Build tests
121 |         env:
122 |           CCACHE_BASEDIR: ${{ github.workspace }}
123 |           CCACHE_DIR: ${{ github.workspace }}/.ccache
124 |           CCACHE_COMPRESS: true
125 |           CCACHE_COMPRESSLEVEL: 6
126 |           CCACHE_MAXSIZE: 500M
127 |         run: |
128 |           ccache -p || true
129 |           cd build
130 |           make -k -j2
131 |           ccache -s || true
132 | 
133 |       - name: Run tests
134 |         run: |
135 |           cd build
136 |           make test
137 | 


--------------------------------------------------------------------------------
/.github/workflows/clang-format.yaml:
--------------------------------------------------------------------------------
 1 | name: Format-check
 2 | 
 3 | on:
 4 |   # Trigger workflow on pull requests of any branch
 5 |   pull_request:
 6 |   # Trigger workflow on pushes to following branches
 7 |   push:
 8 |     branches:
 9 |       - master
10 |       - dev
11 | 
12 | env:
13 |   TZ: Atlantic/Reykjavik
14 | 
15 | defaults:
16 |   run:
17 |     shell: bash -ex {0}
18 | 
19 | jobs:
20 |   build:
21 |     name: ${{ matrix.name }}
22 |     runs-on: ubuntu-18.04
23 |     timeout-minutes: 120
24 |     strategy:
25 |       fail-fast: true
26 |       matrix:
27 |         include:
28 |           - name: "Clang12 Release C++20"
29 |             pkg: "clang-12"
30 |             format: "clang-format-12"
31 |             cxx: "clang++-12"
32 |             cc: "clang-12"
33 |             cxx_flags: "-std=c++20"
34 |             build_type: Release
35 |     steps:
36 |       - name: Checkout
37 |         uses: actions/checkout@v2
38 |         with:
39 |           path: src
40 |           fetch-depth: 2
41 |           submodules: recursive
42 | 
43 |       - name: Add package source
44 |         run: |
45 |           echo 'APT::Acquire::Retries "5";' | sudo tee -a /etc/apt/apt.conf.d/80-retries > /dev/null
46 |           sudo add-apt-repository --no-update --yes ppa:ubuntu-toolchain-r/ppa
47 |           sudo add-apt-repository --no-update --yes ppa:ubuntu-toolchain-r/test
48 |           wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
49 |           sudo add-apt-repository --no-update --yes "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-12 main"
50 |           sudo apt-get update
51 | 
52 |       - name: Install CMake
53 |         run: sudo apt-get install --yes cmake
54 | 
55 |       - name: Install clang-format
56 |         run: sudo apt-get install --yes ${{ matrix.pkg }} ${{ matrix.format }}
57 | 
58 |       - name: Install zstd
59 |         run: sudo apt-get install --yes zstd libzstd-dev
60 | 
61 |       - name: Tool versions
62 |         run: |
63 |           env cmake --version
64 |           env ${{ matrix.cxx }} --version
65 |           env ${{ matrix.format }} --version
66 |       - name: Configure tests
67 |         env:
68 |           CXX: ${{ matrix.cxx }}
69 |           CC: ${{ matrix.cc }}
70 |         run: |
71 |           mkdir build
72 |           cd build
73 |           cmake ../src -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_FLAGS="${{ matrix.cxx_flags }}"
74 |       - name: Run tests
75 |         run: |
76 |           cd build
77 |           make check_format
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.*
 2 | /.git*
 3 | /build*/
 4 | *vcf
 5 | *.gz
 6 | *.popvcf
 7 | *.popvcf.gz
 8 | /benchmark/.*
 9 | *.tbi
10 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "submodules/parallel-hashmap"]
 2 | 	path = submodules/parallel-hashmap
 3 | 	url = https://github.com/greg7mdp/parallel-hashmap.git
 4 | 	ignore = dirty
 5 | [submodule "submodules/paw"]
 6 | 	path = submodules/paw
 7 | 	url = https://github.com/hannespetur/paw.git
 8 | 	ignore = dirty
 9 | [submodule "submodules/htslib"]
10 | 	path = submodules/htslib
11 | 	url = https://github.com/samtools/htslib.git
12 | 	ignore = dirty
13 | [submodule "submodules/libdeflate"]
14 | 	path = submodules/libdeflate
15 | 	url = https://github.com/ebiggers/libdeflate.git
16 | 	ignore = dirty
17 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required (VERSION 3.8)
  2 | project (popvcf LANGUAGES C CXX)
  3 | 
  4 | include(ExternalProject)
  5 | 
  6 | # Build popvcf in release by default
  7 | if(NOT CMAKE_BUILD_TYPE)
  8 |   set(CMAKE_BUILD_TYPE "RELEASE" CACHE STRING "Choose the type of build." FORCE)
  9 | endif()
 10 | 
 11 | # Build paw (dependency) in release by default
 12 | if(NOT PAW_CMAKE_BUILD_TYPE)
 13 |   set(PAW_CMAKE_BUILD_TYPE "RELEASE" CACHE STRING "Choose the type of build for paw." FORCE)
 14 | endif()
 15 | 
 16 | message (STATUS "Build type: ${CMAKE_BUILD_TYPE}")
 17 | 
 18 | set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/")
 19 | set (STATIC_DIR "" CACHE STRING "Build in 'static' mode and include libraries in this directory.")
 20 | 
 21 | ############
 22 | ## popvcf ##
 23 | ############
 24 | set (popvcf_VERSION_MAJOR 1)
 25 | set (popvcf_VERSION_MINOR 1)
 26 | set (popvcf_VERSION_PATCH 1)
 27 | set (popvcf_VERSION ${popvcf_VERSION_MAJOR}.${popvcf_VERSION_MINOR}.${popvcf_VERSION_PATCH})
 28 | 
 29 | add_subdirectory(src) # Exposes "popvcf_sources", which contains all source files of popvcf
 30 | add_library(popvcf_objects OBJECT ${popvcf_sources})
 31 | 
 32 | target_compile_features(popvcf_objects PUBLIC cxx_std_17)
 33 | target_compile_options(popvcf_objects PUBLIC -Wall -Wextra -Wfatal-errors -pedantic -Wno-variadic-macros -march=x86-64 -mtune=generic)
 34 | add_dependencies(popvcf_objects project_paw) # popvcf depends on building paw
 35 | 
 36 | target_include_directories(popvcf_objects PUBLIC
 37 |   ${CMAKE_CURRENT_SOURCE_DIR}/src
 38 |   ${CMAKE_CURRENT_BINARY_DIR}/include/popvcf)
 39 | 
 40 | # Add popvcf executable
 41 | add_executable(popvcf src/main.cpp $<TARGET_OBJECTS:popvcf_objects>)
 42 | 
 43 | # Inherit stuff from popvcf_objects
 44 | target_include_directories(popvcf PRIVATE $<TARGET_PROPERTY:popvcf_objects,INTERFACE_INCLUDE_DIRECTORIES>)
 45 | target_compile_features(popvcf PRIVATE $<TARGET_PROPERTY:popvcf_objects,COMPILE_FEATURES>)
 46 | target_compile_options(popvcf PRIVATE $<TARGET_PROPERTY:popvcf_objects,COMPILE_OPTIONS>)
 47 | 
 48 | # configure a header file to pass some of the CMake settings to the source code
 49 | configure_file (
 50 |   ${PROJECT_SOURCE_DIR}/src/in.constants.hpp
 51 |   ${PROJECT_BINARY_DIR}/include/popvcf/constants.hpp)
 52 | 
 53 | target_include_directories(popvcf_objects PUBLIC ${PROJECT_BINARY_DIR}/include)
 54 | 
 55 | ########################
 56 | ## Build dependencies ##
 57 | ########################
 58 | ### paw ###
 59 | if (CMAKE_BUILD_TYPE STREQUAL "DEBUG")
 60 |   message (STATUS "paw in debug mode")
 61 | 
 62 |   ExternalProject_Add(
 63 |   project_paw
 64 |   SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/submodules/paw
 65 |   BUILD_IN_SOURCE 0
 66 |   PREFIX ${CMAKE_CURRENT_BINARY_DIR}/paw
 67 |   CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -H${CMAKE_CURRENT_SOURCE_DIR}/submodules/paw -B${CMAKE_CURRENT_BINARY_DIR}/paw -DCMAKE_BUILD_TYPE=${PAW_CMAKE_BUILD_TYPE} -DFORCE_AVX_FAST=1
 68 |   BUILD_COMMAND $(MAKE) -C ${CMAKE_CURRENT_BINARY_DIR}/paw static
 69 |   INSTALL_COMMAND "")
 70 | else()
 71 |   message (STATUS "paw in non-debug mode")
 72 | 
 73 |   ExternalProject_Add(
 74 |   project_paw
 75 |   SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/submodules/paw
 76 |   BUILD_IN_SOURCE 0
 77 |   PREFIX ${CMAKE_CURRENT_BINARY_DIR}/paw
 78 |   CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -H${CMAKE_CURRENT_SOURCE_DIR}/submodules/paw -B${CMAKE_CURRENT_BINARY_DIR}/paw -DCMAKE_BUILD_TYPE=${PAW_CMAKE_BUILD_TYPE} -DFORCE_AVX_FAST=1
 79 |   BUILD_COMMAND $(MAKE) -C ${CMAKE_CURRENT_BINARY_DIR}/paw static
 80 |   INSTALL_COMMAND "")
 81 | endif()
 82 | 
 83 | add_library(paw STATIC IMPORTED)
 84 | target_include_directories(popvcf_objects SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/submodules/paw/include)
 85 | target_include_directories(popvcf_objects SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/paw/include)
 86 | set(paw_location ${CMAKE_CURRENT_BINARY_DIR}/paw/lib/libpaw.a)
 87 | message(STATUS "paw target location is ${paw_location}")
 88 | set_property(TARGET paw PROPERTY IMPORTED_LOCATION ${paw_location})
 89 | add_dependencies(paw project_paw)
 90 | add_dependencies(popvcf_objects paw)
 91 | target_link_libraries(popvcf PUBLIC paw)
 92 | 
 93 | ### htslib
 94 | if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/submodules/htslib/Makefile)
 95 |     message(FATAL_ERROR "htslib not found. Run 'git submodule update --init' to fetch all submodules.")
 96 | endif()
 97 | 
 98 | ExternalProject_Add(
 99 |     project_htslib
100 |     BUILD_IN_SOURCE 1
101 |     SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/htslib
102 |     PREFIX ${CMAKE_CURRENT_BINARY_DIR}/htslib
103 |     CONFIGURE_COMMAND cp -a ${CMAKE_CURRENT_SOURCE_DIR}/submodules/htslib ${CMAKE_CURRENT_BINARY_DIR}/ COMMAND autoheader COMMAND autoconf COMMAND ${CMAKE_CURRENT_BINARY_DIR}/htslib/configure --disable-libcurl --disable-gcs --disable-lzma --disable-bz2 --with-libdeflate
104 |         "CFLAGS=${MYCFLAGS} -g -Wall -O3 ${CMAKE_C_FLAGS} -I${CMAKE_CURRENT_BINARY_DIR}/libdeflate"
105 |         "LDFLAGS=${MYLDFLAGS} -L${CMAKE_CURRENT_BINARY_DIR}/libdeflate"
106 |         "CC=${CMAKE_C_COMPILER}"
107 |     BUILD_COMMAND $(MAKE) -C ${CMAKE_CURRENT_BINARY_DIR}/htslib libhts.a
108 |     INSTALL_COMMAND ""
109 | )
110 | 
111 | add_library(htslib STATIC IMPORTED)
112 | 
113 | target_include_directories(popvcf_objects SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/htslib)
114 | 
115 | set(htslib_location ${CMAKE_CURRENT_BINARY_DIR}/htslib/libhts.a)
116 | message(STATUS "htslib target location is ${htslib_location}")
117 | set_property(TARGET htslib PROPERTY IMPORTED_LOCATION ${htslib_location})
118 | add_dependencies(htslib project_htslib)
119 | add_dependencies(project_htslib libdeflate)
120 | add_dependencies(popvcf_objects htslib)
121 | target_link_libraries(popvcf PUBLIC ${htslib_location})
122 | 
123 | ### libdeflate
124 | if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/submodules/libdeflate/Makefile)
125 |     message(FATAL_ERROR "libdeflate not found. Run 'git submodule update --init' to fetch all submodules.")
126 | endif ()
127 | 
128 | ExternalProject_Add(
129 |     project_libdeflate
130 |     BUILD_IN_SOURCE 1
131 |     SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/libdeflate
132 |     PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libdeflate
133 |     CONFIGURE_COMMAND cp -a ${CMAKE_CURRENT_SOURCE_DIR}/submodules/libdeflate ${CMAKE_CURRENT_BINARY_DIR}/
134 |     BUILD_COMMAND $(MAKE) -C ${CMAKE_CURRENT_BINARY_DIR}/libdeflate
135 |         PREFIX="${CMAKE_CURRENT_BINARY_DIR}/libdeflate"
136 |         "CC=${CMAKE_C_COMPILER}"
137 |         "CFLAGS=-fPIC -O3" libdeflate.a
138 |     INSTALL_COMMAND ""
139 | )
140 | 
141 | add_library(libdeflate STATIC IMPORTED)
142 | set(libdeflate_location ${CMAKE_CURRENT_BINARY_DIR}/libdeflate/libdeflate.a)
143 | message(STATUS "libdeflate target location is ${libdeflate_location}")
144 | 
145 | set_property(TARGET libdeflate PROPERTY IMPORTED_LOCATION ${libdeflate_location})
146 | add_dependencies(libdeflate project_libdeflate)
147 | target_link_libraries(popvcf PUBLIC libdeflate)
148 | 
149 | ### parallel_hashmap ###
150 | target_include_directories(popvcf_objects SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/submodules/parallel-hashmap)
151 | 
152 | ### threads ###
153 | if (STATIC_DIR STREQUAL "")
154 |     find_package(Threads)
155 |     target_link_libraries(popvcf PUBLIC ${CMAKE_THREAD_LIBS_INIT})
156 | else()
157 |     target_link_libraries(popvcf PUBLIC "${STATIC_DIR}/libpthread.a")
158 | endif()
159 | 
160 | ### rt and filesystem ###
161 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
162 |     message(STATUS "Using GCC")
163 | 
164 |     if (STATIC_DIR STREQUAL "")
165 |         target_link_libraries(popvcf PUBLIC "rt")
166 |         # target_link_libraries(popvcf PUBLIC "stdc++fs")
167 |     else()
168 |         target_link_libraries(popvcf PUBLIC "${STATIC_DIR}/librt.a")
169 |     endif()
170 | elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
171 |     message(STATUS "Using Clang")
172 | else()
173 |     message(WARNING "Unsupported compiler")
174 | endif ()
175 | 
176 | ### zlib ###
177 | message (STATUS "Checking for zlib")
178 | find_package(ZLIB REQUIRED)
179 | target_include_directories(popvcf_objects SYSTEM PUBLIC ${ZLIB_INCLUDE_DIRS})
180 | 
181 | if (STATIC_DIR STREQUAL "")
182 |     target_link_libraries(popvcf PUBLIC ${ZLIB_LIBRARIES})
183 | else()
184 |     target_link_libraries(popvcf PUBLIC "${STATIC_DIR}/libz.a")
185 | endif()
186 | 
187 | ### GCC ###
188 | 
189 | # LOCAL binaries have static GCC, PREBUILT are all static
190 | if (STATIC_DIR STREQUAL "")
191 |   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++")
192 | else ()
193 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static")
194 | endif ()
195 | 
196 | ################
197 | # clang-format #
198 | ################
199 | # Add clang-format check target
200 | find_program(CLANG_FORMAT "clang-format-12")
201 | 
202 | if (CLANG_FORMAT STREQUAL "" OR CLANG_FORMAT STREQUAL "CLANG_FORMAT-NOTFOUND")
203 |   find_program(CLANG_FORMAT "clang-format")
204 | endif()
205 | 
206 | if (NOT CLANG_FORMAT STREQUAL "" AND NOT CLANG_FORMAT STREQUAL "CLANG_FORMAT-NOTFOUND")
207 |   add_custom_target (check_format "find" "${CMAKE_CURRENT_SOURCE_DIR}/src" "-name" "\"*.hpp\"" "-o" "-name" "\"*.cpp\"" "-exec" "${CLANG_FORMAT}" "--style=file" "--dry-run" "--Werror" "{}" "+" COMMENT "Checking all files format with clang-format.")
208 | endif()
209 | 
210 | #################
211 | # Documentation #
212 | #################
213 | ## Adds a target to generate API documentation with Doxygen
214 | find_package(Doxygen)
215 | option(BUILD_DOCUMENTATION "Create and install the HTML based API documentation (requires Doxygen)" ${DOXYGEN_FOUND})
216 | 
217 | if(BUILD_DOCUMENTATION)
218 |   if(NOT DOXYGEN_FOUND)
219 |     message(FATAL_ERROR "Doxygen is needed to build the documentation.")
220 |   endif()
221 | 
222 |   set(doxyfile_in ${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in)
223 |   set(doxyfile ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
224 | 
225 |   configure_file(${doxyfile_in} ${doxyfile} @ONLY)
226 | 
227 |   add_custom_target(doc
228 |     COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile}
229 |     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
230 |     COMMENT "Generating API documentation with Doxygen"
231 |     VERBATIM)
232 | 
233 |   #install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/html DESTINATION share/doc)
234 | endif()
235 | 
236 | ###########
237 | # ARCHIVE #
238 | ###########
239 | add_custom_target(archive
240 |     COMMAND sh -c "bash .git-archive-all.sh --format tar.gz --prefix popvcf-v${popvcf_VERSION}/ ${CMAKE_CURRENT_BINARY_DIR}/popvcf-v${popvcf_VERSION}.tar.gz"
241 |     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
242 |     COMMENT "Generating an archive which includes submodules."
243 |     VERBATIM)
244 | 
245 | ###########
246 | # Testing #
247 | ###########
248 | 
249 | enable_testing(true)
250 | add_test(NAME test_popvcf COMMAND sh -c "set -e; sh ${CMAKE_CURRENT_SOURCE_DIR}/test/create_test_data.sh > test.vcf ; ${CMAKE_CURRENT_BINARY_DIR}/popvcf encode test.vcf -Oz > test.popvcf.gz ; ${CMAKE_CURRENT_BINARY_DIR}/popvcf decode test.popvcf.gz > test.new.vcf ; diff test.vcf test.new.vcf")
251 | 
252 | # TODO add tabix to workflow or make popvcf build the index
253 | # tabix -p vcf test.popvcf.gz ; ${CMAKE_CURRENT_BINARY_DIR}/popvcf decode test.popvcf.gz --region=chr2:10000-10200 | grep -v ^# | wc -l | grep -q -w -F 2
254 | 
255 | set_tests_properties(test_popvcf PROPERTIES DEPENDS popvcf)
256 | 
257 | ###########
258 | ## Other ##
259 | ###########
260 | # Get the current working git branch
261 | execute_process(
262 |   COMMAND git rev-parse --abbrev-ref HEAD
263 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
264 |   OUTPUT_VARIABLE GIT_BRANCH
265 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
266 | 
267 | # Get the latest abbreviated SHA commit hash of the working branch
268 | execute_process(
269 |   COMMAND git log -1 --format=%h
270 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
271 |   OUTPUT_VARIABLE GIT_COMMIT_SHORT_HASH
272 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
273 | 
274 | # Get the latest SHA commit hash of the working branch
275 | execute_process(
276 |   COMMAND git rev-parse HEAD
277 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
278 |   OUTPUT_VARIABLE GIT_COMMIT_LONG_HASH
279 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
280 | 
281 | # Get the latest SHA commit hash of the working branch
282 | execute_process(
283 |   COMMAND git diff COMMAND wc -l
284 |   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
285 |   OUTPUT_VARIABLE GIT_NUM_DIRTY_LINES
286 |   OUTPUT_STRIP_TRAILING_WHITESPACE)
287 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## popVCF
 2 | 
 3 | popVCF losslessly encodes a multi sample VCF to reduce disk footprint. VCF fields are encoded by pointing to other exactly identical fields in the same row or in the row above. popVCF compression performance is small on a single sample VCF, but the compression ratio can go up to 40+ on a large population VCFs or 5x more compressed than the standard bgzip compression. The compression ratio varies a lot between data sets, see below for benchmarks on several different data sets.
 4 | 
 5 | Files are encoded with the "popvcf encode" command, and by encoding with the "-Oz" flag you can directly write the output in bgzip format. You can then decode the file back to VCF using the "popvcf decode" command. The decode subcommand can also query a region using option "--region=chrN:A-B".
 6 | 
 7 | On a 64 bit linux, you can get the latest static binary from the [Release page](https://github.com/DecodeGenetics/popvcf/releases).
 8 | 
 9 | ### Benchmarks
10 | 
11 | We have benchmarked popVCF against few other compression methods with some large population VCF data. In all experiements, we report wall clock time using /usr/bin/time and used a single CPU thread. The VCF data was read and written to a SSD disk. spVCF was run with the "--no-squeeze" option to prevent any lossy compression. The script run to benchmark is in the benchmark/ directory.  In the WGS benchmarks, we had to exclude genozip and VCFShark as they were unable to compress the data because of repeated runtime errors.
12 | 
13 | Benchmarked versions: popVCF v1.1.0, spVCF v1.2.0-0-gbecb461, htslib+bcftools v1.14 (with libdeflate), Genozip 13.0.11, VCFShark v1.1.
14 | 
15 | #### GraphTyper UK biobank WGS-487k individual data
16 | 
17 | | Method/format | Compression ratio | Compared to bgzip |
18 | | ------------- | ----------------- | ----------------- |
19 | | popVCF+bgzip  |             37.6x |              4.4x |
20 | | spVCF+bgzip   |             17.2x |              2.0x |
21 | | BCF           |             10.5x |              1.2x |
22 | | bgzip (VCF)   |              8.6x |              1.0x |
23 | 
24 | #### Deep Variant/GLnexus WES-200k individual data
25 | 
26 | | Method/format | Compression ratio | Compared to bgzip | Compression speed (MB/s) | Decompression speed (MB/s) |
27 | | ------------- | ----------------- | ----------------- | ------------------------ | -------------------------- |
28 | | popVCF+bgzip  |            102.9x |              6.9x |                    194.0 |                      490.7 |
29 | | spVCF+bgzip   |             43.8x |              2.9x |                    129.7 |                      281.5 |
30 | | Genozip       |             35.0x |              2.3x |                     18.0 |                       17.3 |
31 | | VCFShark      |             28.3x |              1.9x |                     22.8 |                       21.7 |
32 | | BCF           |             14.0x |             0.94x |                     62.4 |                      175.2 |
33 | | bgzip (VCF)   |             14.9x |              1.0x |                     91.6 |                      521.3 |
34 | 
35 | #### GATK UK biobank WGS-150k individual data
36 | 
37 | | Method/format | Compression ratio | Compared to bgzip | Compression speed (MB/s) | Decompression speed (MB/s) |
38 | | ------------- | ----------------- | ----------------- | ------------------------ | -------------------------- |
39 | | popVCF+bgzip  |             20.1x |              2.8x |                    102.2 |                      295.0 |
40 | | spVCF+bgzip   |             10.0x |              1.4x |                     58.8 |                      165.7 |
41 | | BCF           |              6.7x |             0.94x |                     55.6 |                      174.2 |
42 | | bgzip (VCF)   |              7.1x |              1.0x |                     58.5 |                      474.7 |
43 | 
44 | ### Usage
45 | 
46 | ```sh
47 | popvcf encode my.vcf > my.popvcf
48 | popvcf decode my.popvcf > my.new.vcf
49 | diff my.vcf my.new.vcf # Should be the same
50 | 
51 | # It is also possible to bgzip, tabix index and query
52 | popvcf encode my.vcf -Oz > my.popvcf.gz
53 | tabix my.popvcf.gz
54 | popvcf decode my.popvcf.gz > my.new2.vcf
55 | popvcf decode my.popvcf.gz --region=chrN:A-B > my.region.vcf # Random access a region using the tabix index
56 | ```
57 | 
58 | ### Building
59 | Feature complete C++17 compiler is required for building popVCF, i.e. GCC 8/Clang 10 or newer.
60 | 
61 | ```sh
62 | git clone --recursive <url> popvcf # Clone the repository
63 | cd popvcf
64 | mkdir build-release
65 | cd build-release
66 | cmake ..
67 | make -j3 popvcf
68 | ```
69 | 
70 | ### Known limitations
71 | 
72 |  * Each VCF genotype field is assumed to be no larger than the popVCF buffer size (256kb). Site data may exceed this limit though (i.e. the INFO field).
73 |  * Each VCF genotype field is assumed to start on a number (0-9), a period (.), or a dash (-). Any VCF record with a GT field fulfills this requirement. Subsequent characters can contain any other printable characters.
74 | 
75 | ### License
76 | MIT


--------------------------------------------------------------------------------
/benchmark/run_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | #set -o xtrace
 5 | 
 6 | popvcf=$1
 7 | vcf=$2
 8 | level="$3"
 9 | #level="-l9"
10 | 
11 | if [[ -z $vcf ]]; then
12 |   echo "Usage: $0 <popvcf_binary> <vcf> [level]" >&2
13 |   echo " i.e. $0 ./popvcf test.vcf \"-l9\""
14 |   exit 1
15 | fi
16 | 
17 | if [[ ${vcf} != "test.vcf" ]]; then
18 |   cp $vcf test.vcf
19 | fi
20 | 
21 | #echo "cat test.vcf | bgzip -c > test.pipe.vcf.gz"
22 | #time cat test.vcf | bgzip -c > test.pipe.vcf.gz
23 | 
24 | run ()
25 | (
26 |   echo "$1"
27 |   eval time "$1"
28 | )
29 | 
30 | #echo "bgzip -k -f test.vcf"
31 | #time bgzip -k -f test.vcf
32 | 
33 | 
34 | echo "== Compression times =="
35 | run "bgzip --stdout --force --threads 1 test.vcf ${level} > test.vcf.gz"
36 | run "${popvcf} encode test.vcf -o test.vcf.popvcf"
37 | run "${popvcf} encode test.vcf ${level} -Oz -o test.vcf.popvcf.gz"
38 | run "spvcf encode --quiet --no-squeeze test.vcf > test.vcf.spvcf"
39 | run "spvcf encode --quiet --no-squeeze test.vcf | bgzip -c ${level} > test.vcf.spvcf.gz"
40 | 
41 | echo "== Decompression times =="
42 | md5sum test.vcf
43 | run "${popvcf} decode test.vcf.popvcf > test.vcf.popvcf.vcf"
44 | md5sum test.vcf.popvcf.vcf ; rm -f test.vcf.popvcf.vcf
45 | run "${popvcf} decode test.vcf.popvcf.gz > test.vcf.popvcf.gz.vcf"
46 | md5sum test.vcf.popvcf.gz.vcf ; rm -f test.vcf.popvcf.gz.vcf
47 | run "bgzip -dc test.vcf.spvcf.gz | spvcf decode --quiet > test.vcf.spvcf.gz.vcf"
48 | md5sum test.vcf.spvcf.gz.vcf ; rm -f test.vcf.spvcf.gz.vcf
49 | run "bgzip -dc test.vcf.gz > test.vcf.gz.vcf"
50 | md5sum test.vcf.gz.vcf ; rm -f test.vcf.gz.vcf
51 | 
52 | echo "== Index construction times =="
53 | run "tabix -p vcf -f test.vcf.gz"
54 | run "tabix -p vcf -f test.vcf.popvcf.gz"
55 | run "tabix -p vcf -f test.vcf.spvcf.gz"
56 | 
57 | echo "== Query times =="
58 | region=$(grep -v ^# test.vcf | cut -f1,2 | head -n 20 | tail -n 1 | awk '{print $1":"$2"-"$2+100}')
59 | run "tabix test.vcf.gz ${region} > /dev/null"
60 | run "${popvcf} decode test.vcf.popvcf.gz --region=${region} > /dev/null"
61 | run "spvcf tabix test.vcf.spvcf.gz ${region} | spvcf decode - > /dev/null"
62 | 
63 | ls -lh test.*gz
64 | ls -l test.*gz
65 | 
66 | original_size=$(find -L . -name "test.vcf" -printf "%s\n")
67 | find . -name "test.*gz" -printf "%f\t%s\n" | awk -v os="${original_size}" '{print $1"\t"$2"\t"os/$2}'
68 | 
69 | # cleanup
70 | #echo test.* | tr ' ' '\n' | grep -vP "^test.vcf$" | grep -vP "^test.vcf.gz$" | xargs rm
71 | 


--------------------------------------------------------------------------------
/doc/Doxyfile.in:
--------------------------------------------------------------------------------
   1 | # Doxyfile 1.6.1
   2 | 
   3 | # This file describes the settings to be used by the documentation system
   4 | # doxygen (www.doxygen.org) for a project
   5 | #
   6 | # All text after a hash (#) is considered a comment and will be ignored
   7 | # The format is:
   8 | #       TAG = value [value, ...]
   9 | # For lists items can also be appended using:
  10 | #       TAG += value [value, ...]
  11 | # Values that contain spaces should be placed between quotes (" ")
  12 | 
  13 | #---------------------------------------------------------------------------
  14 | # Project related configuration options
  15 | #---------------------------------------------------------------------------
  16 | 
  17 | # This tag specifies the encoding used for all characters in the config file
  18 | # that follow. The default is UTF-8 which is also the encoding used for all
  19 | # text before the first occurrence of this tag. Doxygen uses libiconv (or the
  20 | # iconv built into libc) for the transcoding. See
  21 | # http://www.gnu.org/software/libiconv for the list of possible encodings.
  22 | 
  23 | DOXYFILE_ENCODING      = UTF-8
  24 | 
  25 | # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
  26 | # by quotes) that should identify the project.
  27 | 
  28 | PROJECT_NAME           = "@CMAKE_PROJECT_NAME@"
  29 | 
  30 | # The PROJECT_NUMBER tag can be used to enter a project or revision number.
  31 | # This could be handy for archiving the generated documentation or
  32 | # if some version control system is used.
  33 | 
  34 | PROJECT_NUMBER         = @VERSION@
  35 | 
  36 | # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
  37 | # base path where the generated documentation will be put.
  38 | # If a relative path is entered, it will be relative to the location
  39 | # where doxygen was started. If left blank the current directory will be used.
  40 | 
  41 | OUTPUT_DIRECTORY       = doc
  42 | 
  43 | # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
  44 | # 4096 sub-directories (in 2 levels) under the output directory of each output
  45 | # format and will distribute the generated files over these directories.
  46 | # Enabling this option can be useful when feeding doxygen a huge amount of
  47 | # source files, where putting all generated files in the same directory would
  48 | # otherwise cause performance problems for the file system.
  49 | 
  50 | CREATE_SUBDIRS         = NO
  51 | 
  52 | # The OUTPUT_LANGUAGE tag is used to specify the language in which all
  53 | # documentation generated by doxygen is written. Doxygen will use this
  54 | # information to generate all constant output in the proper language.
  55 | # The default language is English, other supported languages are:
  56 | # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
  57 | # Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
  58 | # Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
  59 | # messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
  60 | # Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
  61 | # Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
  62 | 
  63 | OUTPUT_LANGUAGE        = English
  64 | 
  65 | # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
  66 | # include brief member descriptions after the members that are listed in
  67 | # the file and class documentation (similar to JavaDoc).
  68 | # Set to NO to disable this.
  69 | 
  70 | BRIEF_MEMBER_DESC      = YES
  71 | 
  72 | # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
  73 | # the brief description of a member or function before the detailed description.
  74 | # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
  75 | # brief descriptions will be completely suppressed.
  76 | 
  77 | REPEAT_BRIEF           = YES
  78 | 
  79 | # This tag implements a quasi-intelligent brief description abbreviator
  80 | # that is used to form the text in various listings. Each string
  81 | # in this list, if found as the leading text of the brief description, will be
  82 | # stripped from the text and the result after processing the whole list, is
  83 | # used as the annotated text. Otherwise, the brief description is used as-is.
  84 | # If left blank, the following values are used ("$name" is automatically
  85 | # replaced with the name of the entity): "The $name class" "The $name widget"
  86 | # "The $name file" "is" "provides" "specifies" "contains"
  87 | # "represents" "a" "an" "the"
  88 | 
  89 | ABBREVIATE_BRIEF       =
  90 | 
  91 | # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
  92 | # Doxygen will generate a detailed section even if there is only a brief
  93 | # description.
  94 | 
  95 | ALWAYS_DETAILED_SEC    = NO
  96 | 
  97 | # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
  98 | # inherited members of a class in the documentation of that class as if those
  99 | # members were ordinary class members. Constructors, destructors and assignment
 100 | # operators of the base classes will not be shown.
 101 | 
 102 | INLINE_INHERITED_MEMB  = NO
 103 | 
 104 | # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
 105 | # path before files name in the file list and in the header files. If set
 106 | # to NO the shortest path that makes the file name unique will be used.
 107 | 
 108 | FULL_PATH_NAMES        = YES
 109 | 
 110 | # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
 111 | # can be used to strip a user-defined part of the path. Stripping is
 112 | # only done if one of the specified strings matches the left-hand part of
 113 | # the path. The tag can be used to show relative paths in the file list.
 114 | # If left blank the directory from which doxygen is run is used as the
 115 | # path to strip.
 116 | 
 117 | STRIP_FROM_PATH        = .
 118 | 
 119 | # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
 120 | # the path mentioned in the documentation of a class, which tells
 121 | # the reader which header file to include in order to use a class.
 122 | # If left blank only the name of the header file containing the class
 123 | # definition is used. Otherwise one should specify the include paths that
 124 | # are normally passed to the compiler using the -I flag.
 125 | 
 126 | STRIP_FROM_INC_PATH    =
 127 | 
 128 | # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
 129 | # (but less readable) file names. This can be useful is your file systems
 130 | # doesn't support long names like on DOS, Mac, or CD-ROM.
 131 | 
 132 | SHORT_NAMES            = NO
 133 | 
 134 | # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
 135 | # will interpret the first line (until the first dot) of a JavaDoc-style
 136 | # comment as the brief description. If set to NO, the JavaDoc
 137 | # comments will behave just like regular Qt-style comments
 138 | # (thus requiring an explicit @brief command for a brief description.)
 139 | 
 140 | JAVADOC_AUTOBRIEF      = NO
 141 | 
 142 | # If the QT_AUTOBRIEF tag is set to YES then Doxygen will
 143 | # interpret the first line (until the first dot) of a Qt-style
 144 | # comment as the brief description. If set to NO, the comments
 145 | # will behave just like regular Qt-style comments (thus requiring
 146 | # an explicit \brief command for a brief description.)
 147 | 
 148 | QT_AUTOBRIEF           = NO
 149 | 
 150 | # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
 151 | # treat a multi-line C++ special comment block (i.e. a block of //! or ///
 152 | # comments) as a brief description. This used to be the default behaviour.
 153 | # The new default is to treat a multi-line C++ comment block as a detailed
 154 | # description. Set this tag to YES if you prefer the old behaviour instead.
 155 | 
 156 | MULTILINE_CPP_IS_BRIEF = NO
 157 | 
 158 | # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
 159 | # member inherits the documentation from any documented member that it
 160 | # re-implements.
 161 | 
 162 | INHERIT_DOCS           = YES
 163 | 
 164 | # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
 165 | # a new page for each member. If set to NO, the documentation of a member will
 166 | # be part of the file/class/namespace that contains it.
 167 | 
 168 | SEPARATE_MEMBER_PAGES  = NO
 169 | 
 170 | # The TAB_SIZE tag can be used to set the number of spaces in a tab.
 171 | # Doxygen uses this value to replace tabs by spaces in code fragments.
 172 | 
 173 | TAB_SIZE               = 2
 174 | 
 175 | # This tag can be used to specify a number of aliases that acts
 176 | # as commands in the documentation. An alias has the form "name=value".
 177 | # For example adding "sideeffect=\par Side Effects:\n" will allow you to
 178 | # put the command \sideeffect (or @sideeffect) in the documentation, which
 179 | # will result in a user-defined paragraph with heading "Side Effects:".
 180 | # You can put \n's in the value part of an alias to insert newlines.
 181 | 
 182 | ALIASES                =
 183 | 
 184 | # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
 185 | # sources only. Doxygen will then generate output that is more tailored for C.
 186 | # For instance, some of the names that are used will be different. The list
 187 | # of all members will be omitted, etc.
 188 | 
 189 | OPTIMIZE_OUTPUT_FOR_C  = YES
 190 | 
 191 | # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
 192 | # sources only. Doxygen will then generate output that is more tailored for
 193 | # Java. For instance, namespaces will be presented as packages, qualified
 194 | # scopes will look different, etc.
 195 | 
 196 | OPTIMIZE_OUTPUT_JAVA   = NO
 197 | 
 198 | # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
 199 | # sources only. Doxygen will then generate output that is more tailored for
 200 | # Fortran.
 201 | 
 202 | OPTIMIZE_FOR_FORTRAN   = NO
 203 | 
 204 | # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
 205 | # sources. Doxygen will then generate output that is tailored for
 206 | # VHDL.
 207 | 
 208 | OPTIMIZE_OUTPUT_VHDL   = NO
 209 | 
 210 | # Doxygen selects the parser to use depending on the extension of the files it parses.
 211 | # With this tag you can assign which parser to use for a given extension.
 212 | # Doxygen has a built-in mapping, but you can override or extend it using this tag.
 213 | # The format is ext=language, where ext is a file extension, and language is one of
 214 | # the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
 215 | # Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
 216 | # .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
 217 | # use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
 218 | 
 219 | EXTENSION_MAPPING      =
 220 | 
 221 | # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
 222 | # to include (a tag file for) the STL sources as input, then you should
 223 | # set this tag to YES in order to let doxygen match functions declarations and
 224 | # definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
 225 | # func(std::string) {}). This also make the inheritance and collaboration
 226 | # diagrams that involve STL classes more complete and accurate.
 227 | 
 228 | BUILTIN_STL_SUPPORT    = NO
 229 | 
 230 | # If you use Microsoft's C++/CLI language, you should set this option to YES to
 231 | # enable parsing support.
 232 | 
 233 | CPP_CLI_SUPPORT        = NO
 234 | 
 235 | # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
 236 | # Doxygen will parse them like normal C++ but will assume all classes use public
 237 | # instead of private inheritance when no explicit protection keyword is present.
 238 | 
 239 | SIP_SUPPORT            = NO
 240 | 
 241 | # For Microsoft's IDL there are propget and propput attributes to indicate getter
 242 | # and setter methods for a property. Setting this option to YES (the default)
 243 | # will make doxygen to replace the get and set methods by a property in the
 244 | # documentation. This will only work if the methods are indeed getting or
 245 | # setting a simple type. If this is not the case, or you want to show the
 246 | # methods anyway, you should set this option to NO.
 247 | 
 248 | IDL_PROPERTY_SUPPORT   = YES
 249 | 
 250 | # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
 251 | # tag is set to YES, then doxygen will reuse the documentation of the first
 252 | # member in the group (if any) for the other members of the group. By default
 253 | # all members of a group must be documented explicitly.
 254 | 
 255 | DISTRIBUTE_GROUP_DOC   = NO
 256 | 
 257 | # Set the SUBGROUPING tag to YES (the default) to allow class member groups of
 258 | # the same type (for instance a group of public functions) to be put as a
 259 | # subgroup of that type (e.g. under the Public Functions section). Set it to
 260 | # NO to prevent subgrouping. Alternatively, this can be done per class using
 261 | # the \nosubgrouping command.
 262 | 
 263 | SUBGROUPING            = YES
 264 | 
 265 | # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
 266 | # is documented as struct, union, or enum with the name of the typedef. So
 267 | # typedef struct TypeS {} TypeT, will appear in the documentation as a struct
 268 | # with name TypeT. When disabled the typedef will appear as a member of a file,
 269 | # namespace, or class. And the struct will be named TypeS. This can typically
 270 | # be useful for C code in case the coding convention dictates that all compound
 271 | # types are typedef'ed and only the typedef is referenced, never the tag name.
 272 | 
 273 | TYPEDEF_HIDES_STRUCT   = NO
 274 | 
 275 | # The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
 276 | # determine which symbols to keep in memory and which to flush to disk.
 277 | # When the cache is full, less often used symbols will be written to disk.
 278 | # For small to medium size projects (<1000 input files) the default value is
 279 | # probably good enough. For larger projects a too small cache size can cause
 280 | # doxygen to be busy swapping symbols to and from disk most of the time
 281 | # causing a significant performance penality.
 282 | # If the system has enough physical memory increasing the cache will improve the
 283 | # performance by keeping more symbols in memory. Note that the value works on
 284 | # a logarithmic scale so increasing the size by one will rougly double the
 285 | # memory usage. The cache size is given by this formula:
 286 | # 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
 287 | # corresponding to a cache size of 2^16 = 65536 symbols
 288 | 
 289 | # SYMBOL_CACHE_SIZE      = 0
 290 | 
 291 | #---------------------------------------------------------------------------
 292 | # Build related configuration options
 293 | #---------------------------------------------------------------------------
 294 | 
 295 | # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
 296 | # documentation are documented, even if no documentation was available.
 297 | # Private class members and static file members will be hidden unless
 298 | # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
 299 | 
 300 | EXTRACT_ALL            = YES
 301 | 
 302 | # If the EXTRACT_PRIVATE tag is set to YES all private members of a class
 303 | # will be included in the documentation.
 304 | 
 305 | EXTRACT_PRIVATE        = NO
 306 | 
 307 | # If the EXTRACT_STATIC tag is set to YES all static members of a file
 308 | # will be included in the documentation.
 309 | 
 310 | EXTRACT_STATIC         = YES
 311 | 
 312 | # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
 313 | # defined locally in source files will be included in the documentation.
 314 | # If set to NO only classes defined in header files are included.
 315 | 
 316 | EXTRACT_LOCAL_CLASSES  = YES
 317 | 
 318 | # This flag is only useful for Objective-C code. When set to YES local
 319 | # methods, which are defined in the implementation section but not in
 320 | # the interface are included in the documentation.
 321 | # If set to NO (the default) only methods in the interface are included.
 322 | 
 323 | EXTRACT_LOCAL_METHODS  = NO
 324 | 
 325 | # If this flag is set to YES, the members of anonymous namespaces will be
 326 | # extracted and appear in the documentation as a namespace called
 327 | # 'anonymous_namespace{file}', where file will be replaced with the base
 328 | # name of the file that contains the anonymous namespace. By default
 329 | # anonymous namespace are hidden.
 330 | 
 331 | EXTRACT_ANON_NSPACES   = NO
 332 | 
 333 | # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
 334 | # undocumented members of documented classes, files or namespaces.
 335 | # If set to NO (the default) these members will be included in the
 336 | # various overviews, but no documentation section is generated.
 337 | # This option has no effect if EXTRACT_ALL is enabled.
 338 | 
 339 | HIDE_UNDOC_MEMBERS     = NO
 340 | 
 341 | # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
 342 | # undocumented classes that are normally visible in the class hierarchy.
 343 | # If set to NO (the default) these classes will be included in the various
 344 | # overviews. This option has no effect if EXTRACT_ALL is enabled.
 345 | 
 346 | HIDE_UNDOC_CLASSES     = NO
 347 | 
 348 | # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
 349 | # friend (class|struct|union) declarations.
 350 | # If set to NO (the default) these declarations will be included in the
 351 | # documentation.
 352 | 
 353 | HIDE_FRIEND_COMPOUNDS  = NO
 354 | 
 355 | # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
 356 | # documentation blocks found inside the body of a function.
 357 | # If set to NO (the default) these blocks will be appended to the
 358 | # function's detailed documentation block.
 359 | 
 360 | HIDE_IN_BODY_DOCS      = NO
 361 | 
 362 | # The INTERNAL_DOCS tag determines if documentation
 363 | # that is typed after a \internal command is included. If the tag is set
 364 | # to NO (the default) then the documentation will be excluded.
 365 | # Set it to YES to include the internal documentation.
 366 | 
 367 | INTERNAL_DOCS          = NO
 368 | 
 369 | # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
 370 | # file names in lower-case letters. If set to YES upper-case letters are also
 371 | # allowed. This is useful if you have classes or files whose names only differ
 372 | # in case and if your file system supports case sensitive file names. Windows
 373 | # and Mac users are advised to set this option to NO.
 374 | 
 375 | CASE_SENSE_NAMES       = YES
 376 | 
 377 | # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
 378 | # will show members with their full class and namespace scopes in the
 379 | # documentation. If set to YES the scope will be hidden.
 380 | 
 381 | HIDE_SCOPE_NAMES       = NO
 382 | 
 383 | # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
 384 | # will put a list of the files that are included by a file in the documentation
 385 | # of that file.
 386 | 
 387 | SHOW_INCLUDE_FILES     = YES
 388 | 
 389 | # If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
 390 | # is inserted in the documentation for inline members.
 391 | 
 392 | INLINE_INFO            = YES
 393 | 
 394 | # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
 395 | # will sort the (detailed) documentation of file and class members
 396 | # alphabetically by member name. If set to NO the members will appear in
 397 | # declaration order.
 398 | 
 399 | SORT_MEMBER_DOCS       = YES
 400 | 
 401 | # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
 402 | # brief documentation of file, namespace and class members alphabetically
 403 | # by member name. If set to NO (the default) the members will appear in
 404 | # declaration order.
 405 | 
 406 | SORT_BRIEF_DOCS        = NO
 407 | 
 408 | # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
 409 | 
 410 | SORT_MEMBERS_CTORS_1ST = NO
 411 | 
 412 | # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
 413 | # hierarchy of group names into alphabetical order. If set to NO (the default)
 414 | # the group names will appear in their defined order.
 415 | 
 416 | SORT_GROUP_NAMES       = NO
 417 | 
 418 | # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
 419 | # sorted by fully-qualified names, including namespaces. If set to
 420 | # NO (the default), the class list will be sorted only by class name,
 421 | # not including the namespace part.
 422 | # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
 423 | # Note: This option applies only to the class list, not to the
 424 | # alphabetical list.
 425 | 
 426 | SORT_BY_SCOPE_NAME     = NO
 427 | 
 428 | # The GENERATE_TODOLIST tag can be used to enable (YES) or
 429 | # disable (NO) the todo list. This list is created by putting \todo
 430 | # commands in the documentation.
 431 | 
 432 | GENERATE_TODOLIST      = YES
 433 | 
 434 | # The GENERATE_TESTLIST tag can be used to enable (YES) or
 435 | # disable (NO) the test list. This list is created by putting \test
 436 | # commands in the documentation.
 437 | 
 438 | GENERATE_TESTLIST      = YES
 439 | 
 440 | # The GENERATE_BUGLIST tag can be used to enable (YES) or
 441 | # disable (NO) the bug list. This list is created by putting \bug
 442 | # commands in the documentation.
 443 | 
 444 | GENERATE_BUGLIST       = YES
 445 | 
 446 | # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
 447 | # disable (NO) the deprecated list. This list is created by putting
 448 | # \deprecated commands in the documentation.
 449 | 
 450 | GENERATE_DEPRECATEDLIST= YES
 451 | 
 452 | # The ENABLED_SECTIONS tag can be used to enable conditional
 453 | # documentation sections, marked by \if sectionname ... \endif.
 454 | 
 455 | ENABLED_SECTIONS       =
 456 | 
 457 | # The MAX_INITIALIZER_LINES tag determines the maximum number of lines
 458 | # the initial value of a variable or define consists of for it to appear in
 459 | # the documentation. If the initializer consists of more lines than specified
 460 | # here it will be hidden. Use a value of 0 to hide initializers completely.
 461 | # The appearance of the initializer of individual variables and defines in the
 462 | # documentation can be controlled using \showinitializer or \hideinitializer
 463 | # command in the documentation regardless of this setting.
 464 | 
 465 | MAX_INITIALIZER_LINES  = 30
 466 | 
 467 | # Set the SHOW_USED_FILES tag to NO to disable the list of files generated
 468 | # at the bottom of the documentation of classes and structs. If set to YES the
 469 | # list will mention the files that were used to generate the documentation.
 470 | 
 471 | SHOW_USED_FILES        = YES
 472 | 
 473 | # If the sources in your project are distributed over multiple directories
 474 | # then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
 475 | # in the documentation. The default is NO.
 476 | 
 477 | # Obsolete # SHOW_DIRECTORIES       = NO
 478 | 
 479 | # Set the SHOW_FILES tag to NO to disable the generation of the Files page.
 480 | # This will remove the Files entry from the Quick Index and from the
 481 | # Folder Tree View (if specified). The default is YES.
 482 | 
 483 | SHOW_FILES             = YES
 484 | 
 485 | # Set the SHOW_NAMESPACES tag to NO to disable the generation of the
 486 | # Namespaces page.
 487 | # This will remove the Namespaces entry from the Quick Index
 488 | # and from the Folder Tree View (if specified). The default is YES.
 489 | 
 490 | SHOW_NAMESPACES        = YES
 491 | 
 492 | # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 493 | # doxygen should invoke to get the current version for each file (typically from
 494 | # the version control system). Doxygen will invoke the program by executing (via
 495 | # popen()) the command <command> <input-file>, where <command> is the value of
 496 | # the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
 497 | # provided by doxygen. Whatever the program writes to standard output
 498 | # is used as the file version. See the manual for examples.
 499 | 
 500 | FILE_VERSION_FILTER    =
 501 | 
 502 | # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
 503 | # doxygen. The layout file controls the global structure of the generated output files
 504 | # in an output format independent way. The create the layout file that represents
 505 | # doxygen's defaults, run doxygen with the -l option. You can optionally specify a
 506 | # file name after the option, if omitted DoxygenLayout.xml will be used as the name
 507 | # of the layout file.
 508 | 
 509 | LAYOUT_FILE            =
 510 | 
 511 | #---------------------------------------------------------------------------
 512 | # configuration options related to warning and progress messages
 513 | #---------------------------------------------------------------------------
 514 | 
 515 | # The QUIET tag can be used to turn on/off the messages that are generated
 516 | # by doxygen. Possible values are YES and NO. If left blank NO is used.
 517 | 
 518 | QUIET                  = NO
 519 | 
 520 | # The WARNINGS tag can be used to turn on/off the warning messages that are
 521 | # generated by doxygen. Possible values are YES and NO. If left blank
 522 | # NO is used.
 523 | 
 524 | WARNINGS               = YES
 525 | 
 526 | # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
 527 | # for undocumented members. If EXTRACT_ALL is set to YES then this flag will
 528 | # automatically be disabled.
 529 | 
 530 | WARN_IF_UNDOCUMENTED   = YES
 531 | 
 532 | # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
 533 | # potential errors in the documentation, such as not documenting some
 534 | # parameters in a documented function, or documenting parameters that
 535 | # don't exist or using markup commands wrongly.
 536 | 
 537 | WARN_IF_DOC_ERROR      = YES
 538 | 
 539 | # This WARN_NO_PARAMDOC option can be abled to get warnings for
 540 | # functions that are documented, but have no documentation for their parameters
 541 | # or return value. If set to NO (the default) doxygen will only warn about
 542 | # wrong or incomplete parameter documentation, but not about the absence of
 543 | # documentation.
 544 | 
 545 | WARN_NO_PARAMDOC       = NO
 546 | 
 547 | # The WARN_FORMAT tag determines the format of the warning messages that
 548 | # doxygen can produce. The string should contain the $file, $line, and $text
 549 | # tags, which will be replaced by the file and line number from which the
 550 | # warning originated and the warning text. Optionally the format may contain
 551 | # $version, which will be replaced by the version of the file (if it could
 552 | # be obtained via FILE_VERSION_FILTER)
 553 | 
 554 | WARN_FORMAT            =
 555 | 
 556 | # The WARN_LOGFILE tag can be used to specify a file to which warning
 557 | # and error messages should be written. If left blank the output is written
 558 | # to stderr.
 559 | 
 560 | WARN_LOGFILE           =
 561 | 
 562 | #---------------------------------------------------------------------------
 563 | # configuration options related to the input files
 564 | #---------------------------------------------------------------------------
 565 | 
 566 | # The INPUT tag can be used to specify the files and/or directories that contain
 567 | # documented source files. You may enter file names like "myfile.cpp" or
 568 | # directories like "/usr/src/myproject". Separate the files or directories
 569 | # with spaces.
 570 | 
 571 | INPUT                  = @PROJECT_SOURCE_DIR@/include/weaver \
 572 |                          @PROJECT_SOURCE_DIR@/doc \
 573 |                          @PROJECT_SOURCE_DIR@/test
 574 | 
 575 | # This tag can be used to specify the character encoding of the source files
 576 | # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
 577 | # also the default input encoding. Doxygen uses libiconv (or the iconv built
 578 | # into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
 579 | # the list of possible encodings.
 580 | 
 581 | INPUT_ENCODING         = UTF-8
 582 | 
 583 | # If the value of the INPUT tag contains directories, you can use the
 584 | # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
 585 | # and *.h) to filter out the source-files in the directories. If left
 586 | # blank the following patterns are tested:
 587 | # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
 588 | # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
 589 | 
 590 | FILE_PATTERNS          = *.cpp *.hpp *.dox
 591 | 
 592 | # The RECURSIVE tag can be used to turn specify whether or not subdirectories
 593 | # should be searched for input files as well. Possible values are YES and NO.
 594 | # If left blank NO is used.
 595 | 
 596 | RECURSIVE              = YES
 597 | 
 598 | # The EXCLUDE tag can be used to specify files and/or directories that should
 599 | # excluded from the INPUT source files. This way you can easily exclude a
 600 | # subdirectory from a directory tree whose root is specified with the INPUT tag.
 601 | 
 602 | EXCLUDE                =
 603 | 
 604 | # The EXCLUDE_SYMLINKS tag can be used select whether or not files or
 605 | # directories that are symbolic links (a Unix filesystem feature) are excluded
 606 | # from the input.
 607 | 
 608 | EXCLUDE_SYMLINKS       = NO
 609 | 
 610 | # If the value of the INPUT tag contains directories, you can use the
 611 | # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
 612 | # certain files from those directories. Note that the wildcards are matched
 613 | # against the file with absolute path, so to exclude all test directories
 614 | # for example use the pattern */test/*
 615 | 
 616 | EXCLUDE_PATTERNS       =
 617 | 
 618 | # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
 619 | # (namespaces, classes, functions, etc.) that should be excluded from the
 620 | # output. The symbol name can be a fully qualified name, a word, or if the
 621 | # wildcard * is used, a substring. Examples: ANamespace, AClass,
 622 | # AClass::ANamespace, ANamespace::*Test
 623 | 
 624 | EXCLUDE_SYMBOLS        =
 625 | 
 626 | # The EXAMPLE_PATH tag can be used to specify one or more files or
 627 | # directories that contain example code fragments that are included (see
 628 | # the \include command).
 629 | 
 630 | EXAMPLE_PATH           =
 631 | 
 632 | # If the value of the EXAMPLE_PATH tag contains directories, you can use the
 633 | # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
 634 | # and *.h) to filter out the source-files in the directories. If left
 635 | # blank all files are included.
 636 | 
 637 | EXAMPLE_PATTERNS       =
 638 | 
 639 | # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
 640 | # searched for input files to be used with the \include or \dontinclude
 641 | # commands irrespective of the value of the RECURSIVE tag.
 642 | # Possible values are YES and NO. If left blank NO is used.
 643 | 
 644 | EXAMPLE_RECURSIVE      = NO
 645 | 
 646 | # The IMAGE_PATH tag can be used to specify one or more files or
 647 | # directories that contain image that are included in the documentation (see
 648 | # the \image command).
 649 | 
 650 | IMAGE_PATH             =
 651 | 
 652 | # The INPUT_FILTER tag can be used to specify a program that doxygen should
 653 | # invoke to filter for each input file. Doxygen will invoke the filter program
 654 | # by executing (via popen()) the command <filter> <input-file>, where <filter>
 655 | # is the value of the INPUT_FILTER tag, and <input-file> is the name of an
 656 | # input file. Doxygen will then use the output that the filter program writes
 657 | # to standard output.
 658 | # If FILTER_PATTERNS is specified, this tag will be
 659 | # ignored.
 660 | 
 661 | INPUT_FILTER           =
 662 | 
 663 | # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
 664 | # basis.
 665 | # Doxygen will compare the file name with each pattern and apply the
 666 | # filter if there is a match.
 667 | # The filters are a list of the form:
 668 | # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
 669 | # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
 670 | # is applied to all files.
 671 | 
 672 | FILTER_PATTERNS        =
 673 | 
 674 | # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
 675 | # INPUT_FILTER) will be used to filter the input files when producing source
 676 | # files to browse (i.e. when SOURCE_BROWSER is set to YES).
 677 | 
 678 | FILTER_SOURCE_FILES    = NO
 679 | 
 680 | #---------------------------------------------------------------------------
 681 | # configuration options related to source browsing
 682 | #---------------------------------------------------------------------------
 683 | 
 684 | # If the SOURCE_BROWSER tag is set to YES then a list of source files will
 685 | # be generated. Documented entities will be cross-referenced with these sources.
 686 | # Note: To get rid of all source code in the generated output, make sure also
 687 | # VERBATIM_HEADERS is set to NO.
 688 | 
 689 | SOURCE_BROWSER         = YES
 690 | 
 691 | # Setting the INLINE_SOURCES tag to YES will include the body
 692 | # of functions and classes directly in the documentation.
 693 | 
 694 | INLINE_SOURCES         = NO
 695 | 
 696 | # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
 697 | # doxygen to hide any special comment blocks from generated source code
 698 | # fragments. Normal C and C++ comments will always remain visible.
 699 | 
 700 | STRIP_CODE_COMMENTS    = YES
 701 | 
 702 | # If the REFERENCED_BY_RELATION tag is set to YES
 703 | # then for each documented function all documented
 704 | # functions referencing it will be listed.
 705 | 
 706 | REFERENCED_BY_RELATION = YES
 707 | 
 708 | # If the REFERENCES_RELATION tag is set to YES
 709 | # then for each documented function all documented entities
 710 | # called/used by that function will be listed.
 711 | 
 712 | REFERENCES_RELATION    = YES
 713 | 
 714 | # If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
 715 | # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
 716 | # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
 717 | # link to the source code.
 718 | # Otherwise they will link to the documentation.
 719 | 
 720 | REFERENCES_LINK_SOURCE = YES
 721 | 
 722 | # If the USE_HTAGS tag is set to YES then the references to source code
 723 | # will point to the HTML generated by the htags(1) tool instead of doxygen
 724 | # built-in source browser. The htags tool is part of GNU's global source
 725 | # tagging system (see http://www.gnu.org/software/global/global.html). You
 726 | # will need version 4.8.6 or higher.
 727 | 
 728 | USE_HTAGS              = NO
 729 | 
 730 | # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
 731 | # will generate a verbatim copy of the header file for each class for
 732 | # which an include is specified. Set to NO to disable this.
 733 | 
 734 | VERBATIM_HEADERS       = YES
 735 | 
 736 | #---------------------------------------------------------------------------
 737 | # configuration options related to the alphabetical class index
 738 | #---------------------------------------------------------------------------
 739 | 
 740 | # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
 741 | # of all compounds will be generated. Enable this if the project
 742 | # contains a lot of classes, structs, unions or interfaces.
 743 | 
 744 | ALPHABETICAL_INDEX     = YES
 745 | 
 746 | # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
 747 | # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
 748 | # in which this list will be split (can be a number in the range [1..20])
 749 | 
 750 | COLS_IN_ALPHA_INDEX    = 5
 751 | 
 752 | # In case all classes in a project start with a common prefix, all
 753 | # classes will be put under the same header in the alphabetical index.
 754 | # The IGNORE_PREFIX tag can be used to specify one or more prefixes that
 755 | # should be ignored while generating the index headers.
 756 | 
 757 | IGNORE_PREFIX          =
 758 | 
 759 | #---------------------------------------------------------------------------
 760 | # configuration options related to the HTML output
 761 | #---------------------------------------------------------------------------
 762 | 
 763 | # If the GENERATE_HTML tag is set to YES (the default) Doxygen will
 764 | # generate HTML output.
 765 | 
 766 | GENERATE_HTML          = YES
 767 | 
 768 | # The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
 769 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
 770 | # put in front of it. If left blank `html' will be used as the default path.
 771 | 
 772 | HTML_OUTPUT            =
 773 | 
 774 | # The HTML_FILE_EXTENSION tag can be used to specify the file extension for
 775 | # each generated HTML page (for example: .htm,.php,.asp). If it is left blank
 776 | # doxygen will generate files with .html extension.
 777 | 
 778 | HTML_FILE_EXTENSION    = .html
 779 | 
 780 | # The HTML_HEADER tag can be used to specify a personal HTML header for
 781 | # each generated HTML page. If it is left blank doxygen will generate a
 782 | # standard header.
 783 | 
 784 | HTML_HEADER            =
 785 | 
 786 | # The HTML_FOOTER tag can be used to specify a personal HTML footer for
 787 | # each generated HTML page. If it is left blank doxygen will generate a
 788 | # standard footer.
 789 | 
 790 | HTML_FOOTER            =
 791 | 
 792 | # If the HTML_TIMESTAMP tag is set to YES then the generated HTML
 793 | # documentation will contain the timesstamp.
 794 | 
 795 | HTML_TIMESTAMP         = NO
 796 | 
 797 | # The HTML_STYLESHEET tag can be used to specify a user-defined cascading
 798 | # style sheet that is used by each HTML page. It can be used to
 799 | # fine-tune the look of the HTML output. If the tag is left blank doxygen
 800 | # will generate a default style sheet. Note that doxygen will try to copy
 801 | # the style sheet file to the HTML output directory, so don't put your own
 802 | # stylesheet in the HTML output directory as well, or it will be erased!
 803 | 
 804 | HTML_STYLESHEET        =
 805 | 
 806 | # If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
 807 | # files or namespaces will be aligned in HTML using tables. If set to
 808 | # NO a bullet list will be used.
 809 | 
 810 | # Obsolete # HTML_ALIGN_MEMBERS     = YES
 811 | 
 812 | # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
 813 | # documentation will contain sections that can be hidden and shown after the
 814 | # page has loaded. For this to work a browser that supports
 815 | # JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
 816 | # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
 817 | 
 818 | HTML_DYNAMIC_SECTIONS  = NO
 819 | 
 820 | # If the GENERATE_DOCSET tag is set to YES, additional index files
 821 | # will be generated that can be used as input for Apple's Xcode 3
 822 | # integrated development environment, introduced with OSX 10.5 (Leopard).
 823 | # To create a documentation set, doxygen will generate a Makefile in the
 824 | # HTML output directory. Running make will produce the docset in that
 825 | # directory and running "make install" will install the docset in
 826 | # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
 827 | # it at startup.
 828 | # See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
 829 | 
 830 | GENERATE_DOCSET        = NO
 831 | 
 832 | # When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
 833 | # feed. A documentation feed provides an umbrella under which multiple
 834 | # documentation sets from a single provider (such as a company or product suite)
 835 | # can be grouped.
 836 | 
 837 | DOCSET_FEEDNAME        = "Doxygen generated docs"
 838 | 
 839 | # When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
 840 | # should uniquely identify the documentation set bundle. This should be a
 841 | # reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
 842 | # will append .docset to the name.
 843 | 
 844 | DOCSET_BUNDLE_ID       = org.doxygen.Project
 845 | 
 846 | # If the GENERATE_HTMLHELP tag is set to YES, additional index files
 847 | # will be generated that can be used as input for tools like the
 848 | # Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
 849 | # of the generated HTML documentation.
 850 | 
 851 | GENERATE_HTMLHELP      = NO
 852 | 
 853 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
 854 | # be used to specify the file name of the resulting .chm file. You
 855 | # can add a path in front of the file if the result should not be
 856 | # written to the html output directory.
 857 | 
 858 | CHM_FILE               =
 859 | 
 860 | # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
 861 | # be used to specify the location (absolute path including file name) of
 862 | # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
 863 | # the HTML help compiler on the generated index.hhp.
 864 | 
 865 | HHC_LOCATION           =
 866 | 
 867 | # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
 868 | # controls if a separate .chi index file is generated (YES) or that
 869 | # it should be included in the master .chm file (NO).
 870 | 
 871 | GENERATE_CHI           = NO
 872 | 
 873 | # If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
 874 | # is used to encode HtmlHelp index (hhk), content (hhc) and project file
 875 | # content.
 876 | 
 877 | CHM_INDEX_ENCODING     =
 878 | 
 879 | # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
 880 | # controls whether a binary table of contents is generated (YES) or a
 881 | # normal table of contents (NO) in the .chm file.
 882 | 
 883 | BINARY_TOC             = NO
 884 | 
 885 | # The TOC_EXPAND flag can be set to YES to add extra items for group members
 886 | # to the contents of the HTML help documentation and to the tree view.
 887 | 
 888 | TOC_EXPAND             = NO
 889 | 
 890 | # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
 891 | # are set, an additional index file will be generated that can be used as input for
 892 | # Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
 893 | # HTML documentation.
 894 | 
 895 | GENERATE_QHP           = NO
 896 | 
 897 | # If the QHG_LOCATION tag is specified, the QCH_FILE tag can
 898 | # be used to specify the file name of the resulting .qch file.
 899 | # The path specified is relative to the HTML output folder.
 900 | 
 901 | QCH_FILE               =
 902 | 
 903 | # The QHP_NAMESPACE tag specifies the namespace to use when generating
 904 | # Qt Help Project output. For more information please see
 905 | # http://doc.trolltech.com/qthelpproject.html#namespace
 906 | 
 907 | QHP_NAMESPACE          =
 908 | 
 909 | # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
 910 | # Qt Help Project output. For more information please see
 911 | # http://doc.trolltech.com/qthelpproject.html#virtual-folders
 912 | 
 913 | QHP_VIRTUAL_FOLDER     = doc
 914 | 
 915 | # If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
 916 | # For more information please see
 917 | # http://doc.trolltech.com/qthelpproject.html#custom-filters
 918 | 
 919 | QHP_CUST_FILTER_NAME   =
 920 | 
 921 | # The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
 922 | # <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
 923 | 
 924 | QHP_CUST_FILTER_ATTRS  =
 925 | 
 926 | # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
 927 | # filter section matches.
 928 | # <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
 929 | 
 930 | QHP_SECT_FILTER_ATTRS  =
 931 | 
 932 | # If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
 933 | # be used to specify the location of Qt's qhelpgenerator.
 934 | # If non-empty doxygen will try to run qhelpgenerator on the generated
 935 | # .qhp file.
 936 | 
 937 | QHG_LOCATION           =
 938 | 
 939 | # The DISABLE_INDEX tag can be used to turn on/off the condensed index at
 940 | # top of each HTML page. The value NO (the default) enables the index and
 941 | # the value YES disables it.
 942 | 
 943 | DISABLE_INDEX          = NO
 944 | 
 945 | # This tag can be used to set the number of enum values (range [1..20])
 946 | # that doxygen will group on one line in the generated HTML documentation.
 947 | 
 948 | ENUM_VALUES_PER_LINE   = 4
 949 | 
 950 | # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
 951 | # structure should be generated to display hierarchical information.
 952 | # If the tag value is set to YES, a side panel will be generated
 953 | # containing a tree-like index structure (just like the one that
 954 | # is generated for HTML Help). For this to work a browser that supports
 955 | # JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
 956 | # Windows users are probably better off using the HTML help feature.
 957 | 
 958 | GENERATE_TREEVIEW      = YES
 959 | 
 960 | # By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
 961 | # and Class Hierarchy pages using a tree view instead of an ordered list.
 962 | 
 963 | # Obsolete # USE_INLINE_TREES       = NO
 964 | 
 965 | # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
 966 | # used to set the initial width (in pixels) of the frame in which the tree
 967 | # is shown.
 968 | 
 969 | TREEVIEW_WIDTH         = 250
 970 | 
 971 | # Use this tag to change the font size of Latex formulas included
 972 | # as images in the HTML documentation. The default is 10. Note that
 973 | # when you change the font size after a successful doxygen run you need
 974 | # to manually remove any form_*.png images from the HTML output directory
 975 | # to force them to be regenerated.
 976 | 
 977 | FORMULA_FONTSIZE       = 10
 978 | 
 979 | # When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript
 980 | # and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP)
 981 | # there is already a search function so this one should typically
 982 | # be disabled.
 983 | 
 984 | SEARCHENGINE           = YES
 985 | 
 986 | #---------------------------------------------------------------------------
 987 | # configuration options related to the LaTeX output
 988 | #---------------------------------------------------------------------------
 989 | 
 990 | # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
 991 | # generate Latex output.
 992 | 
 993 | GENERATE_LATEX         = NO
 994 | 
 995 | # The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
 996 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
 997 | # put in front of it. If left blank `latex' will be used as the default path.
 998 | 
 999 | LATEX_OUTPUT           =
1000 | 
1001 | # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
1002 | # invoked. If left blank `latex' will be used as the default command name.
1003 | 
1004 | LATEX_CMD_NAME         = latex
1005 | 
1006 | # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
1007 | # generate index for LaTeX. If left blank `makeindex' will be used as the
1008 | # default command name.
1009 | 
1010 | MAKEINDEX_CMD_NAME     = makeindex
1011 | 
1012 | # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
1013 | # LaTeX documents. This may be useful for small projects and may help to
1014 | # save some trees in general.
1015 | 
1016 | COMPACT_LATEX          = NO
1017 | 
1018 | # The PAPER_TYPE tag can be used to set the paper type that is used
1019 | # by the printer. Possible values are: a4, a4wide, letter, legal and
1020 | # executive. If left blank a4wide will be used.
1021 | 
1022 | PAPER_TYPE             = a4wide
1023 | 
1024 | # The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
1025 | # packages that should be included in the LaTeX output.
1026 | 
1027 | EXTRA_PACKAGES         =
1028 | 
1029 | # The LATEX_HEADER tag can be used to specify a personal LaTeX header for
1030 | # the generated latex document. The header should contain everything until
1031 | # the first chapter. If it is left blank doxygen will generate a
1032 | # standard header. Notice: only use this tag if you know what you are doing!
1033 | 
1034 | LATEX_HEADER           =
1035 | 
1036 | # If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
1037 | # is prepared for conversion to pdf (using ps2pdf). The pdf file will
1038 | # contain links (just like the HTML output) instead of page references
1039 | # This makes the output suitable for online browsing using a pdf viewer.
1040 | 
1041 | PDF_HYPERLINKS         = NO
1042 | 
1043 | # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
1044 | # plain latex in the generated Makefile. Set this option to YES to get a
1045 | # higher quality PDF documentation.
1046 | 
1047 | USE_PDFLATEX           = NO
1048 | 
1049 | # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
1050 | # command to the generated LaTeX files. This will instruct LaTeX to keep
1051 | # running if errors occur, instead of asking the user for help.
1052 | # This option is also used when generating formulas in HTML.
1053 | 
1054 | LATEX_BATCHMODE        = NO
1055 | 
1056 | # If LATEX_HIDE_INDICES is set to YES then doxygen will not
1057 | # include the index chapters (such as File Index, Compound Index, etc.)
1058 | # in the output.
1059 | 
1060 | LATEX_HIDE_INDICES     = NO
1061 | 
1062 | # If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER.
1063 | 
1064 | LATEX_SOURCE_CODE      = NO
1065 | 
1066 | #---------------------------------------------------------------------------
1067 | # configuration options related to the RTF output
1068 | #---------------------------------------------------------------------------
1069 | 
1070 | # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
1071 | # The RTF output is optimized for Word 97 and may not look very pretty with
1072 | # other RTF readers or editors.
1073 | 
1074 | GENERATE_RTF           = NO
1075 | 
1076 | # The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
1077 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1078 | # put in front of it. If left blank `rtf' will be used as the default path.
1079 | 
1080 | RTF_OUTPUT             =
1081 | 
1082 | # If the COMPACT_RTF tag is set to YES Doxygen generates more compact
1083 | # RTF documents. This may be useful for small projects and may help to
1084 | # save some trees in general.
1085 | 
1086 | COMPACT_RTF            = NO
1087 | 
1088 | # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
1089 | # will contain hyperlink fields. The RTF file will
1090 | # contain links (just like the HTML output) instead of page references.
1091 | # This makes the output suitable for online browsing using WORD or other
1092 | # programs which support those fields.
1093 | # Note: wordpad (write) and others do not support links.
1094 | 
1095 | RTF_HYPERLINKS         = NO
1096 | 
1097 | # Load stylesheet definitions from file. Syntax is similar to doxygen's
1098 | # config file, i.e. a series of assignments. You only have to provide
1099 | # replacements, missing definitions are set to their default value.
1100 | 
1101 | RTF_STYLESHEET_FILE    =
1102 | 
1103 | # Set optional variables used in the generation of an rtf document.
1104 | # Syntax is similar to doxygen's config file.
1105 | 
1106 | RTF_EXTENSIONS_FILE    =
1107 | 
1108 | #---------------------------------------------------------------------------
1109 | # configuration options related to the man page output
1110 | #---------------------------------------------------------------------------
1111 | 
1112 | # If the GENERATE_MAN tag is set to YES (the default) Doxygen will
1113 | # generate man pages
1114 | 
1115 | GENERATE_MAN           = YES
1116 | 
1117 | # The MAN_OUTPUT tag is used to specify where the man pages will be put.
1118 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1119 | # put in front of it. If left blank `man' will be used as the default path.
1120 | 
1121 | MAN_OUTPUT             =
1122 | 
1123 | # The MAN_EXTENSION tag determines the extension that is added to
1124 | # the generated man pages (default is the subroutine's section .3)
1125 | 
1126 | MAN_EXTENSION          =
1127 | 
1128 | # If the MAN_LINKS tag is set to YES and Doxygen generates man output,
1129 | # then it will generate one additional man file for each entity
1130 | # documented in the real man page(s). These additional files
1131 | # only source the real man page, but without them the man command
1132 | # would be unable to find the correct page. The default is NO.
1133 | 
1134 | MAN_LINKS              = NO
1135 | 
1136 | #---------------------------------------------------------------------------
1137 | # configuration options related to the XML output
1138 | #---------------------------------------------------------------------------
1139 | 
1140 | # If the GENERATE_XML tag is set to YES Doxygen will
1141 | # generate an XML file that captures the structure of
1142 | # the code including all documentation.
1143 | 
1144 | GENERATE_XML           = NO
1145 | 
1146 | # The XML_OUTPUT tag is used to specify where the XML pages will be put.
1147 | # If a relative path is entered the value of OUTPUT_DIRECTORY will be
1148 | # put in front of it. If left blank `xml' will be used as the default path.
1149 | 
1150 | XML_OUTPUT             = xml
1151 | 
1152 | # The XML_SCHEMA tag can be used to specify an XML schema,
1153 | # which can be used by a validating XML parser to check the
1154 | # syntax of the XML files.
1155 | 
1156 | # XML_SCHEMA             =
1157 | 
1158 | # The XML_DTD tag can be used to specify an XML DTD,
1159 | # which can be used by a validating XML parser to check the
1160 | # syntax of the XML files.
1161 | 
1162 | # XML_DTD                =
1163 | 
1164 | # If the XML_PROGRAMLISTING tag is set to YES Doxygen will
1165 | # dump the program listings (including syntax highlighting
1166 | # and cross-referencing information) to the XML output. Note that
1167 | # enabling this will significantly increase the size of the XML output.
1168 | 
1169 | XML_PROGRAMLISTING     = YES
1170 | 
1171 | #---------------------------------------------------------------------------
1172 | # configuration options for the AutoGen Definitions output
1173 | #---------------------------------------------------------------------------
1174 | 
1175 | # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
1176 | # generate an AutoGen Definitions (see autogen.sf.net) file
1177 | # that captures the structure of the code including all
1178 | # documentation. Note that this feature is still experimental
1179 | # and incomplete at the moment.
1180 | 
1181 | GENERATE_AUTOGEN_DEF   = NO
1182 | 
1183 | #---------------------------------------------------------------------------
1184 | # configuration options related to the Perl module output
1185 | #---------------------------------------------------------------------------
1186 | 
1187 | # If the GENERATE_PERLMOD tag is set to YES Doxygen will
1188 | # generate a Perl module file that captures the structure of
1189 | # the code including all documentation. Note that this
1190 | # feature is still experimental and incomplete at the
1191 | # moment.
1192 | 
1193 | GENERATE_PERLMOD       = NO
1194 | 
1195 | # If the PERLMOD_LATEX tag is set to YES Doxygen will generate
1196 | # the necessary Makefile rules, Perl scripts and LaTeX code to be able
1197 | # to generate PDF and DVI output from the Perl module output.
1198 | 
1199 | PERLMOD_LATEX          = NO
1200 | 
1201 | # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1202 | # nicely formatted so it can be parsed by a human reader.
1203 | # This is useful
1204 | # if you want to understand what is going on.
1205 | # On the other hand, if this
1206 | # tag is set to NO the size of the Perl module output will be much smaller
1207 | # and Perl will parse it just the same.
1208 | 
1209 | PERLMOD_PRETTY         = YES
1210 | 
1211 | # The names of the make variables in the generated doxyrules.make file
1212 | # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
1213 | # This is useful so different doxyrules.make files included by the same
1214 | # Makefile don't overwrite each other's variables.
1215 | 
1216 | PERLMOD_MAKEVAR_PREFIX =
1217 | 
1218 | #---------------------------------------------------------------------------
1219 | # Configuration options related to the preprocessor
1220 | #---------------------------------------------------------------------------
1221 | 
1222 | # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
1223 | # evaluate all C-preprocessor directives found in the sources and include
1224 | # files.
1225 | 
1226 | ENABLE_PREPROCESSING   = YES
1227 | 
1228 | # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
1229 | # names in the source code. If set to NO (the default) only conditional
1230 | # compilation will be performed. Macro expansion can be done in a controlled
1231 | # way by setting EXPAND_ONLY_PREDEF to YES.
1232 | 
1233 | MACRO_EXPANSION        = NO
1234 | 
1235 | # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
1236 | # then the macro expansion is limited to the macros specified with the
1237 | # PREDEFINED and EXPAND_AS_DEFINED tags.
1238 | 
1239 | EXPAND_ONLY_PREDEF     = NO
1240 | 
1241 | # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
1242 | # in the INCLUDE_PATH (see below) will be search if a #include is found.
1243 | 
1244 | SEARCH_INCLUDES        = YES
1245 | 
1246 | # The INCLUDE_PATH tag can be used to specify one or more directories that
1247 | # contain include files that are not input files but should be processed by
1248 | # the preprocessor.
1249 | 
1250 | INCLUDE_PATH           =
1251 | 
1252 | # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
1253 | # patterns (like *.h and *.hpp) to filter out the header-files in the
1254 | # directories. If left blank, the patterns specified with FILE_PATTERNS will
1255 | # be used.
1256 | 
1257 | INCLUDE_FILE_PATTERNS  =
1258 | 
1259 | # The PREDEFINED tag can be used to specify one or more macro names that
1260 | # are defined before the preprocessor is started (similar to the -D option of
1261 | # gcc). The argument of the tag is a list of macros of the form: name
1262 | # or name=definition (no spaces). If the definition and the = are
1263 | # omitted =1 is assumed. To prevent a macro definition from being
1264 | # undefined via #undef or recursively expanded use the := operator
1265 | # instead of the = operator.
1266 | 
1267 | PREDEFINED             =
1268 | 
1269 | # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1270 | # this tag can be used to specify a list of macro names that should be expanded.
1271 | # The macro definition that is found in the sources will be used.
1272 | # Use the PREDEFINED tag if you want to use a different macro definition.
1273 | 
1274 | EXPAND_AS_DEFINED      =
1275 | 
1276 | # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1277 | # doxygen's preprocessor will remove all function-like macros that are alone
1278 | # on a line, have an all uppercase name, and do not end with a semicolon. Such
1279 | # function macros are typically used for boiler-plate code, and will confuse
1280 | # the parser if not removed.
1281 | 
1282 | SKIP_FUNCTION_MACROS   = YES
1283 | 
1284 | #---------------------------------------------------------------------------
1285 | # Configuration::additions related to external references
1286 | #---------------------------------------------------------------------------
1287 | 
1288 | # The TAGFILES option can be used to specify one or more tagfiles.
1289 | # Optionally an initial location of the external documentation
1290 | # can be added for each tagfile. The format of a tag file without
1291 | # this location is as follows:
1292 | #
1293 | # TAGFILES = file1 file2 ...
1294 | # Adding location for the tag files is done as follows:
1295 | #
1296 | # TAGFILES = file1=loc1 "file2 = loc2" ...
1297 | # where "loc1" and "loc2" can be relative or absolute paths or
1298 | # URLs. If a location is present for each tag, the installdox tool
1299 | # does not have to be run to correct the links.
1300 | # Note that each tag file must have a unique name
1301 | # (where the name does NOT include the path)
1302 | # If a tag file is not located in the directory in which doxygen
1303 | # is run, you must also specify the path to the tagfile here.
1304 | 
1305 | TAGFILES               =
1306 | 
1307 | # When a file name is specified after GENERATE_TAGFILE, doxygen will create
1308 | # a tag file that is based on the input files it reads.
1309 | 
1310 | GENERATE_TAGFILE       =
1311 | 
1312 | # If the ALLEXTERNALS tag is set to YES all external classes will be listed
1313 | # in the class index. If set to NO only the inherited external classes
1314 | # will be listed.
1315 | 
1316 | ALLEXTERNALS           = NO
1317 | 
1318 | # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
1319 | # in the modules index. If set to NO, only the current project's groups will
1320 | # be listed.
1321 | 
1322 | EXTERNAL_GROUPS        = YES
1323 | 
1324 | # The PERL_PATH should be the absolute path and name of the perl script
1325 | # interpreter (i.e. the result of `which perl').
1326 | 
1327 | PERL_PATH              =
1328 | 
1329 | #---------------------------------------------------------------------------
1330 | # Configuration options related to the dot tool
1331 | #---------------------------------------------------------------------------
1332 | 
1333 | # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
1334 | # generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
1335 | # or super classes. Setting the tag to NO turns the diagrams off. Note that
1336 | # this option is superseded by the HAVE_DOT option below. This is only a
1337 | # fallback. It is recommended to install and use dot, since it yields more
1338 | # powerful graphs.
1339 | 
1340 | CLASS_DIAGRAMS         = NO
1341 | 
1342 | # You can define message sequence charts within doxygen comments using the \msc
1343 | # command. Doxygen will then run the mscgen tool (see
1344 | # http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
1345 | # documentation. The MSCGEN_PATH tag allows you to specify the directory where
1346 | # the mscgen tool resides. If left empty the tool is assumed to be found in the
1347 | # default search path.
1348 | 
1349 | MSCGEN_PATH            =
1350 | 
1351 | # If set to YES, the inheritance and collaboration graphs will hide
1352 | # inheritance and usage relations if the target is undocumented
1353 | # or is not a class.
1354 | 
1355 | HIDE_UNDOC_RELATIONS   = YES
1356 | 
1357 | # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
1358 | # available from the path. This tool is part of Graphviz, a graph visualization
1359 | # toolkit from AT&T and Lucent Bell Labs. The other options in this section
1360 | # have no effect if this option is set to NO (the default)
1361 | 
1362 | HAVE_DOT               = YES
1363 | 
1364 | # By default doxygen will write a font called FreeSans.ttf to the output
1365 | # directory and reference it in all dot files that doxygen generates. This
1366 | # font does not include all possible unicode characters however, so when you need
1367 | # these (or just want a differently looking font) you can specify the font name
1368 | # using DOT_FONTNAME. You need need to make sure dot is able to find the font,
1369 | # which can be done by putting it in a standard location or by setting the
1370 | # DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
1371 | # containing the font.
1372 | 
1373 | #DOT_FONTNAME           = FreeSans
1374 | 
1375 | # The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1376 | # The default size is 10pt.
1377 | 
1378 | DOT_FONTSIZE           = 10
1379 | 
1380 | # By default doxygen will tell dot to use the output directory to look for the
1381 | # FreeSans.ttf font (which doxygen will put there itself). If you specify a
1382 | # different font using DOT_FONTNAME you can set the path where dot
1383 | # can find it using this tag.
1384 | 
1385 | DOT_FONTPATH           =
1386 | 
1387 | # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
1388 | # will generate a graph for each documented class showing the direct and
1389 | # indirect inheritance relations. Setting this tag to YES will force the
1390 | # the CLASS_DIAGRAMS tag to NO.
1391 | 
1392 | CLASS_GRAPH            = YES
1393 | 
1394 | # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
1395 | # will generate a graph for each documented class showing the direct and
1396 | # indirect implementation dependencies (inheritance, containment, and
1397 | # class references variables) of the class with other documented classes.
1398 | 
1399 | COLLABORATION_GRAPH    = YES
1400 | 
1401 | # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
1402 | # will generate a graph for groups, showing the direct groups dependencies
1403 | 
1404 | GROUP_GRAPHS           = YES
1405 | 
1406 | # If the UML_LOOK tag is set to YES doxygen will generate inheritance and
1407 | # collaboration diagrams in a style similar to the OMG's Unified Modeling
1408 | # Language.
1409 | 
1410 | UML_LOOK               = NO
1411 | 
1412 | # If set to YES, the inheritance and collaboration graphs will show the
1413 | # relations between templates and their instances.
1414 | 
1415 | TEMPLATE_RELATIONS     = NO
1416 | 
1417 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
1418 | # tags are set to YES then doxygen will generate a graph for each documented
1419 | # file showing the direct and indirect include dependencies of the file with
1420 | # other documented files.
1421 | 
1422 | INCLUDE_GRAPH          = YES
1423 | 
1424 | # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
1425 | # HAVE_DOT tags are set to YES then doxygen will generate a graph for each
1426 | # documented header file showing the documented files that directly or
1427 | # indirectly include this file.
1428 | 
1429 | INCLUDED_BY_GRAPH      = YES
1430 | 
1431 | # If the CALL_GRAPH and HAVE_DOT options are set to YES then
1432 | # doxygen will generate a call dependency graph for every global function
1433 | # or class method. Note that enabling this option will significantly increase
1434 | # the time of a run. So in most cases it will be better to enable call graphs
1435 | # for selected functions only using the \callgraph command.
1436 | 
1437 | CALL_GRAPH             = NO
1438 | 
1439 | # If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
1440 | # doxygen will generate a caller dependency graph for every global function
1441 | # or class method. Note that enabling this option will significantly increase
1442 | # the time of a run. So in most cases it will be better to enable caller
1443 | # graphs for selected functions only using the \callergraph command.
1444 | 
1445 | CALLER_GRAPH           = NO
1446 | 
1447 | # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
1448 | # will graphical hierarchy of all classes instead of a textual one.
1449 | 
1450 | GRAPHICAL_HIERARCHY    = YES
1451 | 
1452 | # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
1453 | # then doxygen will show the dependencies a directory has on other directories
1454 | # in a graphical way. The dependency relations are determined by the #include
1455 | # relations between the files in the directories.
1456 | 
1457 | DIRECTORY_GRAPH        = YES
1458 | 
1459 | # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
1460 | # generated by dot. Possible values are png, jpg, or gif
1461 | # If left blank png will be used.
1462 | 
1463 | DOT_IMAGE_FORMAT       = png
1464 | 
1465 | # The tag DOT_PATH can be used to specify the path where the dot tool can be
1466 | # found. If left blank, it is assumed the dot tool can be found in the path.
1467 | 
1468 | DOT_PATH               =
1469 | 
1470 | # The DOTFILE_DIRS tag can be used to specify one or more directories that
1471 | # contain dot files that are included in the documentation (see the
1472 | # \dotfile command).
1473 | 
1474 | DOTFILE_DIRS           =
1475 | 
1476 | # The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
1477 | # nodes that will be shown in the graph. If the number of nodes in a graph
1478 | # becomes larger than this value, doxygen will truncate the graph, which is
1479 | # visualized by representing a node as a red box. Note that doxygen if the
1480 | # number of direct children of the root node in a graph is already larger than
1481 | # DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
1482 | # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
1483 | 
1484 | DOT_GRAPH_MAX_NODES    = 100
1485 | 
1486 | # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
1487 | # graphs generated by dot. A depth value of 3 means that only nodes reachable
1488 | # from the root by following a path via at most 3 edges will be shown. Nodes
1489 | # that lay further from the root node will be omitted. Note that setting this
1490 | # option to 1 or 2 may greatly reduce the computation time needed for large
1491 | # code bases. Also note that the size of a graph can be further restricted by
1492 | # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
1493 | 
1494 | MAX_DOT_GRAPH_DEPTH    = 0
1495 | 
1496 | # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1497 | # background. This is disabled by default, because dot on Windows does not
1498 | # seem to support this out of the box. Warning: Depending on the platform used,
1499 | # enabling this option may lead to badly anti-aliased labels on the edges of
1500 | # a graph (i.e. they become hard to read).
1501 | 
1502 | DOT_TRANSPARENT        = NO
1503 | 
1504 | # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
1505 | # files in one run (i.e. multiple -o and -T options on the command line). This
1506 | # makes dot run faster, but since only newer versions of dot (>1.8.10)
1507 | # support this, this feature is disabled by default.
1508 | 
1509 | DOT_MULTI_TARGETS      = YES
1510 | 
1511 | # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
1512 | # generate a legend page explaining the meaning of the various boxes and
1513 | # arrows in the dot generated graphs.
1514 | 
1515 | GENERATE_LEGEND        = YES
1516 | 
1517 | # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
1518 | # remove the intermediate dot files that are used to generate
1519 | # the various graphs.
1520 | 
1521 | DOT_CLEANUP            = NO
1522 | 


--------------------------------------------------------------------------------
/include/popvcf:
--------------------------------------------------------------------------------
1 | ../src


--------------------------------------------------------------------------------
/include/popvcf.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #include "../src/decode.hpp"
4 | #include "../src/encode.hpp"
5 | #include "../src/sequence_utils.hpp"
6 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.8)
 2 | 
 3 | # Update with "find src -name "*.?pp" | sort | awk '$1 !~ /main.cpp/{print "  "$1}'" in project root directory
 4 | set(popvcf_sources
 5 |   src/encode.cpp
 6 |   src/encode.hpp
 7 |   src/decode.cpp
 8 |   src/decode.hpp
 9 |   src/sequence_utils.cpp
10 |   src/sequence_utils.hpp
11 |   PARENT_SCOPE)
12 | 


--------------------------------------------------------------------------------
/src/decode.cpp:
--------------------------------------------------------------------------------
  1 | #include "decode.hpp"
  2 | 
  3 | #include <algorithm> //std::copy
  4 | #include <array>     // std::array
  5 | #include <charconv>
  6 | #include <cstdio>   // std::stdin
  7 | #include <cstring>  // std::memmove
  8 | #include <iostream> // std::cerr
  9 | #include <memory>
 10 | #include <stdexcept>
 11 | #include <string> // std::string
 12 | #include <vector> // std::vector
 13 | 
 14 | #include "io.hpp"
 15 | #include "sequence_utils.hpp" // ascii_cstring_to_int
 16 | 
 17 | #include "htslib/bgzf.h"
 18 | #include "htslib/hts.h"
 19 | #include "htslib/kseq.h"
 20 | #include "htslib/tbx.h"
 21 | 
 22 | namespace popvcf
 23 | {
 24 | void decode_file(std::string const & input_fn, bool const is_bgzf_input)
 25 | {
 26 |   Tdec_array_buf buffer_in;     // input buffer
 27 |   std::vector<char> buffer_out; // output buffer
 28 |   DecodeData dd;                // data used to keep track of buffers while decoding
 29 | 
 30 |   /// Input streams
 31 |   popvcf::bgzf_ptr in_bgzf(nullptr, popvcf::close_bgzf);
 32 |   popvcf::file_ptr in_vcf(nullptr, popvcf::close_vcf_nop);
 33 | 
 34 |   /// Open input file based on options
 35 |   if (is_bgzf_input)
 36 |     in_bgzf = popvcf::open_bgzf(input_fn, "r");
 37 |   else
 38 |     in_vcf = popvcf::open_vcf(input_fn, "r");
 39 | 
 40 |   buffer_out.reserve(16 * DEC_BUFFER_SIZE);
 41 | 
 42 |   /// Read first batch of data
 43 |   if (is_bgzf_input)
 44 |     dd.in_size = bgzf_read(in_bgzf.get(), buffer_in.data(), DEC_BUFFER_SIZE);
 45 |   else
 46 |     dd.in_size = fread(buffer_in.data(), 1, DEC_BUFFER_SIZE, in_vcf.get());
 47 | 
 48 |   long new_bytes = dd.in_size;
 49 | 
 50 |   /// Outer loop - loop while there is some data to decode from the input stream
 51 |   while (new_bytes != 0)
 52 |   {
 53 |     decode_buffer</*in_region=*/false>(buffer_out, buffer_in, dd);
 54 | 
 55 |     /// Write buffer_out to stdout
 56 |     fwrite(buffer_out.data(), 1, buffer_out.size(), stdout);
 57 |     buffer_out.resize(0); // Clears output buffer, but does not deallocate
 58 |     new_bytes = -static_cast<long>(dd.in_size);
 59 | 
 60 |     /// Read more data
 61 |     if (is_bgzf_input)
 62 |       dd.in_size += bgzf_read(in_bgzf.get(), buffer_in.data() + dd.in_size, DEC_BUFFER_SIZE - dd.in_size);
 63 |     else
 64 |       dd.in_size += fread(buffer_in.data() + dd.in_size, 1, DEC_BUFFER_SIZE - dd.in_size, in_vcf.get());
 65 | 
 66 |     new_bytes += dd.in_size;
 67 |   } /// ends outer loop
 68 | 
 69 |   assert(buffer_out.size() == 0);
 70 | 
 71 |   if (dd.in_size != 0)
 72 |   {
 73 |     std::cerr << "[popvcf] WARNING: Unexpected ending of the VCF data, possibly the file is truncated.\n";
 74 | 
 75 |     // write output buffer
 76 |     fwrite(buffer_in.data(), 1, dd.in_size, stdout); // write output buffer
 77 |   }
 78 | }
 79 | 
 80 | void decode_region(std::string const & popvcf_fn, std::string const & region)
 81 | {
 82 |   assert(region.size() > 0);
 83 |   std::vector<char> buffer_in; // input buffer
 84 |   buffer_in.reserve(DEC_BUFFER_SIZE);
 85 |   std::vector<char> buffer_out; // output buffer
 86 |   DecodeData dd;                // data used to keep track of buffers while decoding
 87 | 
 88 |   /// parse region
 89 |   std::string chrom;
 90 |   long begin{-1};
 91 |   long end{std::numeric_limits<long>::max()};
 92 | 
 93 |   if (auto colon = region.find(':'); colon == std::string::npos)
 94 |   {
 95 |     chrom = region;
 96 |   }
 97 |   else
 98 |   {
 99 |     chrom = region.substr(0, colon);
100 | 
101 |     if (auto dash = region.find('-', colon + 1); dash == std::string::npos)
102 |     {
103 |       auto ret = std::from_chars(region.data() + colon + 1, region.data() + region.size(), begin);
104 | 
105 |       if (ret.ec != std::errc())
106 |         throw std::runtime_error("Could not parse region: " + region);
107 | 
108 |       end = begin;
109 |     }
110 |     else
111 |     {
112 |       auto ret_begin = std::from_chars(region.data() + colon + 1, region.data() + dash, begin);
113 | 
114 |       if (ret_begin.ec != std::errc())
115 |         throw std::runtime_error("Could not parse region: " + region);
116 | 
117 |       auto ret_end = std::from_chars(region.data() + dash + 1, region.data() + region.size(), end);
118 | 
119 |       if (ret_end.ec != std::errc())
120 |         throw std::runtime_error("Could not parse region: " + region);
121 |     }
122 | 
123 |     dd.begin = begin;
124 |     dd.end = end;
125 |   }
126 | 
127 |   /// Determine the region to query
128 |   std::string safe_region = chrom;
129 |   long safe_begin;
130 | 
131 |   if (begin >= 0)
132 |   {
133 |     safe_begin = std::max(1l, (begin / 10000l) * 10000l);
134 |     safe_region.push_back(':');
135 |     safe_region.append(std::to_string(std::max(1l, safe_begin)));
136 |     safe_region.push_back('-');
137 |     safe_region.append(std::to_string(end));
138 |   }
139 |   else
140 |   {
141 |     safe_begin = 0;
142 |   }
143 | 
144 |   /// Input streams
145 |   popvcf::hts_file_ptr in_bgzf = popvcf::open_hts_file(popvcf_fn.c_str(), "r");            // open popvcf.gz
146 |   popvcf::tbx_t_ptr in_tbx = popvcf::open_tbx_t(popvcf_fn.c_str());                        // open popvcf.gz.tbi
147 |   popvcf::hts_itr_t_ptr in_it = popvcf::open_hts_itr_t(in_tbx.get(), safe_region.c_str()); // query region
148 | 
149 |   /// Write the header lines
150 |   kstring_t str = {0, 0, 0};
151 | 
152 |   while (hts_getline(in_bgzf.get(), KS_SEP_LINE, &str) >= 0)
153 |   {
154 |     if (!str.l || str.s[0] != in_tbx->conf.meta_char)
155 |       break;
156 | 
157 |     fwrite(str.s, 1, str.l, stdout);
158 |     fputs("\n", stdout);
159 |   }
160 | 
161 |   // return here, after writing header, if there are no records in the region
162 |   if (in_it == nullptr)
163 |   {
164 |     free(str.s);
165 |     return;
166 |   }
167 | 
168 |   int ret = tbx_itr_next(in_bgzf.get(), in_tbx.get(), in_it.get(), &str);
169 | 
170 |   while (ret > 0)
171 |   {
172 |     long vcf_pos = get_vcf_pos(str.s, str.s + str.l);
173 | 
174 |     if (vcf_pos >= safe_begin)
175 |     {
176 |       buffer_in.insert(buffer_in.end(), str.s, str.s + str.l);
177 |       buffer_in.push_back('\n');
178 | 
179 |       decode_buffer</*in_region=*/true>(buffer_out, buffer_in, dd);
180 | 
181 |       /// Write buffer_out to stdout
182 |       fwrite(buffer_out.data(), 1, buffer_out.size(), stdout);
183 | 
184 |       /// Clears output buffer, but does not deallocate
185 |       buffer_out.resize(0);
186 | 
187 |       /// Check if end position has been passed
188 |       if (vcf_pos > dd.end)
189 |         break;
190 |     }
191 | 
192 |     /// Read more data
193 |     ret = tbx_itr_next(in_bgzf.get(), in_tbx.get(), in_it.get(), &str);
194 |   }
195 | 
196 |   free(str.s);
197 | }
198 | 
199 | } // namespace popvcf
200 | 


--------------------------------------------------------------------------------
/src/decode.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <algorithm>
  4 | #include <array>
  5 | #include <cassert>
  6 | #include <charconv>
  7 | #include <cstdint>
  8 | #include <iostream>
  9 | #include <limits>
 10 | #include <string>
 11 | #include <vector>
 12 | 
 13 | #include <parallel_hashmap/phmap.h>
 14 | 
 15 | #include "sequence_utils.hpp"
 16 | 
 17 | #include <popvcf/constants.hpp>
 18 | 
 19 | namespace popvcf
 20 | {
 21 | class DecodeData
 22 | {
 23 | public:
 24 |   std::size_t field{0};   //!< Current vcf field
 25 |   std::size_t in_size{0}; //!< Size of input buffer.
 26 |   std::size_t b{0};       //!< Field begin index in input buffer.
 27 |   std::size_t i{b};       //!< Curent index in input buffer
 28 |   bool header_line{true}; //!< True iff in header line
 29 |   bool in_region{true};   //!< True iff in region
 30 | 
 31 |   int64_t begin{-1};
 32 |   int64_t end{std::numeric_limits<int64_t>::max()};
 33 | 
 34 |   std::vector<uint32_t> prev_field2uid{};
 35 |   std::vector<std::string> prev_unique_fields{};
 36 |   phmap::flat_hash_map<std::string, uint32_t> prev_map_to_unique_fields{};
 37 | 
 38 |   int32_t stored_alt{0};
 39 |   int32_t n_alt{-1};
 40 |   std::string next_contig{};
 41 |   std::vector<uint32_t> field2uid{};
 42 |   std::vector<std::string> unique_fields{};
 43 |   phmap::flat_hash_map<std::string, uint32_t> map_to_unique_fields{};
 44 | 
 45 |   inline void clear_line(int32_t next_n_alt)
 46 |   {
 47 |     next_n_alt += stored_alt;
 48 |     stored_alt = 0;
 49 | 
 50 |     if (next_n_alt == n_alt)
 51 |     {
 52 |       std::swap(prev_field2uid, field2uid);
 53 |       std::swap(prev_unique_fields, unique_fields);
 54 |       std::swap(prev_map_to_unique_fields, map_to_unique_fields);
 55 |     }
 56 | 
 57 |     n_alt = next_n_alt;
 58 |     field2uid.resize(0);
 59 |     unique_fields.resize(0);
 60 |     map_to_unique_fields.clear();
 61 |   }
 62 | };
 63 | 
 64 | template <typename Tbuffer_in>
 65 | inline void set_input_size(Tbuffer_in & buffer_in, DecodeData & dd)
 66 | {
 67 |   dd.in_size = buffer_in.size();
 68 | }
 69 | 
 70 | template <>
 71 | inline void set_input_size(Tdec_array_buf & /*buffer_in*/, DecodeData & /*dd*/)
 72 | {
 73 |   // Do nothing.
 74 |   // NOTE: dd.in_size must be set prior to calling decode_buffer in arrays
 75 | }
 76 | 
 77 | //! Decodes an input buffer. Output is written in \a buffer_out .
 78 | template <bool is_region, typename Tbuffer_out, typename Tbuffer_in>
 79 | inline void decode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, DecodeData & dd)
 80 | {
 81 |   set_input_size(buffer_in, dd);
 82 |   std::size_t constexpr N_FIELDS_SITE_DATA{9};
 83 | 
 84 |   // inner loop - Loops over each character in the input buffer
 85 |   while (dd.i < dd.in_size)
 86 |   {
 87 |     char const b_in = buffer_in[dd.i];
 88 | 
 89 |     if (b_in != '\t' && b_in != '\n')
 90 |     {
 91 |       ++dd.i; // we are in a vcf field
 92 |       continue;
 93 |     }
 94 | 
 95 |     if (dd.field == 0)
 96 |     {
 97 |       dd.header_line = buffer_in[dd.b] == '#'; // check if in header line
 98 | 
 99 |       if (not dd.header_line)
100 |       {
101 |         ++dd.i; // include '\t'
102 |         dd.next_contig.assign(&buffer_in[dd.b], dd.i - dd.b);
103 | 
104 |         /// Do not print this line until we know if we are inside the region or not
105 |         dd.b = dd.i;
106 |         ++dd.field;
107 |         continue;
108 |       }
109 |     }
110 |     else if (not dd.header_line)
111 |     {
112 |       if (dd.field == 1) /*POS field */
113 |       {
114 |         long pos{};
115 |         std::from_chars(&buffer_in[dd.b], &buffer_in[dd.i], pos); // get pos
116 |         dd.in_region = pos >= dd.begin && pos <= dd.end;
117 | 
118 |         if (!is_region || dd.in_region) /*print contig if we are inside the region*/
119 |           buffer_out.insert(buffer_out.end(), dd.next_contig.begin(), dd.next_contig.end());
120 |       }
121 |       else if (dd.field == 4) /* ALT field */
122 |       {
123 |         int32_t next_n_alt = std::count(&buffer_in[dd.b], &buffer_in[dd.i], ',');
124 |         dd.clear_line(next_n_alt);
125 |       }
126 |     }
127 | 
128 |     if (dd.header_line || dd.field < N_FIELDS_SITE_DATA)
129 |     {
130 |       // write field without any encoding
131 |       ++dd.i; // adds '\t' or '\n'
132 | 
133 |       if (!is_region || dd.in_region)
134 |         buffer_out.insert(buffer_out.end(), &buffer_in[dd.b], &buffer_in[dd.i]);
135 |     }
136 |     else
137 |     {
138 |       long field_idx = dd.field - N_FIELDS_SITE_DATA;
139 |       assert(field_idx == static_cast<long>(dd.field2uid.size()));
140 | 
141 |       while (buffer_in[dd.b] == '$' || buffer_in[dd.b] == '&')
142 |       {
143 |         assert(dd.b < dd.i);
144 |         assert(field_idx < static_cast<long>(dd.prev_field2uid.size()));
145 |         assert(dd.prev_field2uid[field_idx] < static_cast<long>(dd.prev_unique_fields.size()));
146 | 
147 |         std::string const & prior_field = dd.prev_unique_fields[dd.prev_field2uid[field_idx]];
148 | 
149 |         if (buffer_in[dd.b] == '$')
150 |         {
151 |           /* Unique field in this line. Same as field above. */
152 |           dd.map_to_unique_fields.insert(std::pair<std::string, uint32_t>(prior_field, dd.unique_fields.size()));
153 |           dd.field2uid.push_back(dd.unique_fields.size());
154 |           dd.unique_fields.push_back(prior_field);
155 |         }
156 |         else
157 |         {
158 |           /* Duplicate field in this line. Same as field above. */
159 |           assert(buffer_in[dd.b] == '&');
160 |           auto find_it = dd.map_to_unique_fields.find(prior_field);
161 |           assert(find_it != dd.map_to_unique_fields.end());
162 |           dd.field2uid.push_back(find_it->second);
163 |         }
164 | 
165 |         ++dd.b;
166 |         ++dd.field;
167 |         ++field_idx;
168 | 
169 |         if (!is_region || dd.in_region)
170 |         {
171 |           buffer_out.insert(buffer_out.end(), prior_field.begin(), prior_field.end());
172 | 
173 |           if (dd.b < dd.i)
174 |             buffer_out.push_back('\t');
175 |         }
176 |       }
177 | 
178 |       if (buffer_in[dd.b] == '\n')
179 |       {
180 |         if (!is_region || dd.in_region)
181 |           buffer_out.push_back('\n');
182 | 
183 |         ++dd.i;
184 |       }
185 |       else if (buffer_in[dd.b] == '%')
186 |       {
187 |         // Unique field within the line but was seen in the previous line
188 |         ++dd.b; // Get over '%'
189 |         uint32_t const prev_unique_index = ascii_cstring_to_int(&buffer_in[dd.b], &buffer_in[dd.i++]);
190 |         assert(prev_unique_index < dd.prev_unique_fields.size());
191 |         std::string const & prior_field = dd.prev_unique_fields[prev_unique_index];
192 | 
193 |         dd.map_to_unique_fields.insert(std::pair<std::string, uint32_t>(prior_field, dd.unique_fields.size()));
194 |         dd.field2uid.push_back(dd.unique_fields.size());
195 |         dd.unique_fields.push_back(prior_field);
196 | 
197 |         if (!is_region || dd.in_region)
198 |         {
199 |           buffer_out.insert(buffer_out.end(), prior_field.begin(), prior_field.end());
200 |           buffer_out.push_back(b_in);
201 |         }
202 |       }
203 |       else if (buffer_in[dd.b] >= ':')
204 |       {
205 |         // same as earler field in the same line
206 |         uint32_t const unique_index = ascii_cstring_to_int(&buffer_in[dd.b], &buffer_in[dd.i++]);
207 |         assert(unique_index < dd.unique_fields.size());
208 |         dd.field2uid.push_back(unique_index);
209 |         std::string const & prior_field = dd.unique_fields[unique_index];
210 | 
211 |         if (!is_region || dd.in_region)
212 |         {
213 |           buffer_out.insert(buffer_out.end(), prior_field.begin(), prior_field.end());
214 |           buffer_out.push_back(b_in);
215 |         }
216 |       }
217 |       else
218 |       {
219 |         // add a new unique field and write field without any encoding
220 |         auto insert_it = dd.map_to_unique_fields.insert(
221 |           std::pair<std::string, uint32_t>(std::piecewise_construct,
222 |                                            std::forward_as_tuple(&buffer_in[dd.b], dd.i - dd.b),
223 |                                            std::forward_as_tuple(dd.unique_fields.size())));
224 | 
225 |         assert(insert_it.second == true);
226 |         dd.field2uid.push_back(dd.unique_fields.size());
227 |         dd.unique_fields.push_back(insert_it.first->first);
228 |         ++dd.i;
229 | 
230 |         if (!is_region || dd.in_region)
231 |           buffer_out.insert(buffer_out.end(), &buffer_in[dd.b], &buffer_in[dd.i]);
232 |       }
233 | 
234 |       // assert((field_idx + 1) == static_cast<long>(dd.field2uid.size()));
235 |     }
236 | 
237 |     assert(b_in == buffer_in[dd.i - 1]);
238 |     dd.b = dd.i;
239 | 
240 |     if (b_in == '\n')
241 |       dd.field = 0;
242 |     else
243 |       ++dd.field;
244 |   } // ends inner loop
245 | 
246 |   if (dd.field >= 3 && dd.field < N_FIELDS_SITE_DATA)
247 |   {
248 |     // write field without updating the field index
249 |     if (!is_region || dd.in_region)
250 |       buffer_out.insert(buffer_out.end(), &buffer_in[dd.b], &buffer_in[dd.i]);
251 | 
252 |     if (dd.field == 4) /*store the number of ALT alleles if we are in the ALT field*/
253 |       dd.stored_alt = std::count(&buffer_in[dd.b], &buffer_in[dd.i], ',');
254 | 
255 |     dd.i = 0;
256 |   }
257 |   else
258 |   {
259 |     // write data to the beginning of the input buffer
260 |     std::copy(&buffer_in[dd.b], &buffer_in[dd.i], &buffer_in[0]);
261 |     dd.i = dd.i - dd.b;
262 |   }
263 | 
264 |   dd.b = 0;
265 |   dd.in_size = dd.i;
266 |   resize_input_buffer(buffer_in, dd.i);
267 | }
268 | 
269 | //! Decode an encoded popVCF
270 | void decode_file(std::string const & popvcf_fn, bool const is_bgzf_input);
271 | 
272 | //! Decode a region with a bgzf file and tabix index.
273 | void decode_region(std::string const & popvcf_fn, std::string const & region);
274 | 
275 | } // namespace popvcf
276 | 


--------------------------------------------------------------------------------
/src/encode.cpp:
--------------------------------------------------------------------------------
  1 | #include "encode.hpp"
  2 | 
  3 | #include <array> // std::array
  4 | #include <charconv>
  5 | #include <iostream> // std::cerr
  6 | #include <string>   // std::string
  7 | #include <zlib.h>
  8 | 
  9 | #include <parallel_hashmap/phmap.h> // phmap::flat_hash_map
 10 | 
 11 | #include "io.hpp"
 12 | #include "sequence_utils.hpp" // int_to_ascii
 13 | 
 14 | #include "htslib/bgzf.h"
 15 | 
 16 | class BGZF;
 17 | 
 18 | namespace popvcf
 19 | {
 20 | void encode_file(std::string const & input_fn,
 21 |                  bool const is_bgzf_input,
 22 |                  std::string const & output_fn,
 23 |                  std::string const & output_mode,
 24 |                  bool const is_bgzf_output,
 25 |                  int const compression_threads)
 26 | {
 27 |   Tenc_array_buf buffer_in;     // input buffer
 28 |   std::vector<char> buffer_out; // output buffer
 29 |   EncodeData ed;                // encode data struct
 30 | 
 31 |   /// Open input file streams
 32 |   popvcf::bgzf_ptr in_bgzf(nullptr, popvcf::close_bgzf);   // bgzf input stream
 33 |   popvcf::file_ptr in_vcf(nullptr, popvcf::close_vcf_nop); // vcf input stream
 34 | 
 35 |   if (is_bgzf_input)
 36 |     in_bgzf = popvcf::open_bgzf(input_fn, "r");
 37 |   else
 38 |     in_vcf = popvcf::open_vcf(input_fn, "r");
 39 | 
 40 |   /// Open output file streams
 41 |   popvcf::bgzf_ptr out_bgzf(nullptr, popvcf::close_bgzf);   // bgzf output stream
 42 |   popvcf::file_ptr out_vcf(nullptr, popvcf::close_vcf_nop); // vcf output stream
 43 | 
 44 |   if (is_bgzf_output)
 45 |   {
 46 |     out_bgzf = popvcf::open_bgzf(output_fn.c_str(), output_mode.c_str());
 47 | 
 48 |     if (compression_threads > 1)
 49 |       bgzf_mt(out_bgzf.get(), compression_threads, 256);
 50 |   }
 51 |   else
 52 |   {
 53 |     out_vcf = popvcf::open_vcf(output_fn, output_mode);
 54 |   }
 55 | 
 56 |   /// Read first buffer of input data
 57 |   if (is_bgzf_input)
 58 |     ed.in_size = bgzf_read(in_bgzf.get(), buffer_in.data(), ENC_BUFFER_SIZE);
 59 |   else
 60 |     ed.in_size = fread(buffer_in.data(), 1, ENC_BUFFER_SIZE, in_vcf.get());
 61 | 
 62 |   long new_bytes = ed.in_size;
 63 | 
 64 |   // loop until all data has been read
 65 |   while (new_bytes != 0)
 66 |   {
 67 |     // encode the input buffer and write to output buffer
 68 |     encode_buffer(buffer_out, buffer_in, ed);
 69 | 
 70 |     // write output buffer
 71 |     if (out_bgzf != nullptr)
 72 |       popvcf::write_bgzf(out_bgzf.get(), buffer_out.data(), buffer_out.size());
 73 |     else
 74 |       fwrite(buffer_out.data(), 1, buffer_out.size(), out_vcf.get()); // write output buffer
 75 | 
 76 |     buffer_out.resize(0);
 77 |     new_bytes = -static_cast<long>(ed.in_size);
 78 | 
 79 |     // attempt to read more data from input
 80 |     if (is_bgzf_input)
 81 |       ed.in_size += bgzf_read(in_bgzf.get(), buffer_in.data() + ed.in_size, ENC_BUFFER_SIZE - ed.in_size);
 82 |     else
 83 |       ed.in_size += fread(buffer_in.data() + ed.in_size, 1, ENC_BUFFER_SIZE - ed.in_size, in_vcf.get());
 84 | 
 85 |     new_bytes += ed.in_size;
 86 |   }
 87 | 
 88 |   if (ed.in_size != 0)
 89 |   {
 90 |     std::cerr << "[popvcf] WARNING: Unexpected ending of the VCF data, possibly the file is truncated.\n";
 91 | 
 92 |     // write output buffer
 93 |     if (out_bgzf != nullptr)
 94 |       popvcf::write_bgzf(out_bgzf.get(), buffer_in.data(), ed.in_size);
 95 |     else
 96 |       fwrite(buffer_in.data(), 1, ed.in_size, out_vcf.get()); // write output buffer
 97 |   }
 98 | }
 99 | 
100 | } // namespace popvcf
101 | 


--------------------------------------------------------------------------------
/src/encode.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <charconv>
  4 | #include <cstdint>
  5 | #include <iostream>
  6 | #include <string>
  7 | #include <vector>
  8 | 
  9 | #include <parallel_hashmap/phmap.h>
 10 | 
 11 | #include "sequence_utils.hpp"
 12 | 
 13 | namespace popvcf
 14 | {
 15 | class EncodeData
 16 | {
 17 | public:
 18 |   std::size_t field{0};   //!< current vcf field.
 19 |   std::size_t in_size{0}; //!< Size of inut buffer.
 20 |   std::size_t b{0};       //!< begin index in buffer_in
 21 |   std::size_t i{b};       //!< index in buffer_in
 22 |   bool header_line{true}; //!< True iff in header line
 23 | 
 24 |   /* Data fields from previous line. */
 25 |   std::vector<std::string> prev_unique_fields{};
 26 |   std::vector<uint32_t> prev_field2uid{};
 27 |   phmap::flat_hash_map<std::string, uint32_t> prev_map_to_unique_fields{};
 28 | 
 29 |   /* Data fields from current line. */
 30 |   std::string contig{};
 31 |   int64_t pos{0};
 32 |   int32_t stored_alt{0};
 33 |   int32_t n_alt{-1};
 34 |   std::vector<std::string> unique_fields{};
 35 |   std::vector<uint32_t> field2uid{};
 36 |   phmap::flat_hash_map<std::string, uint32_t> map_to_unique_fields{};
 37 | 
 38 |   /* Data fields for the next line. */
 39 |   std::string next_contig{};
 40 |   int64_t next_pos{0};
 41 | 
 42 |   inline void clear_line(int64_t next_pos, int32_t next_n_alt)
 43 |   {
 44 |     next_n_alt += stored_alt;
 45 |     stored_alt = 0;
 46 | 
 47 |     if (next_contig != contig || (next_pos / 10000) != (pos / 10000))
 48 |     {
 49 |       /// Previous line is not available, clear values
 50 |       prev_unique_fields.resize(0);
 51 |       prev_field2uid.resize(0);
 52 |       prev_map_to_unique_fields.clear();
 53 |     }
 54 |     else if (next_n_alt == n_alt)
 55 |     {
 56 |       /// Only swap out from this line if we have the same amount of alts
 57 |       std::swap(prev_unique_fields, unique_fields);
 58 |       std::swap(prev_field2uid, field2uid);
 59 |       std::swap(prev_map_to_unique_fields, map_to_unique_fields);
 60 |     }
 61 | 
 62 |     /// Clear data from this line for the next
 63 |     contig = next_contig;
 64 |     pos = next_pos;
 65 |     n_alt = next_n_alt;
 66 |     unique_fields.resize(0);
 67 |     field2uid.resize(0);
 68 |     map_to_unique_fields.clear();
 69 |   }
 70 | };
 71 | 
 72 | template <typename Tbuffer_in>
 73 | inline void set_input_size(Tbuffer_in & buffer_in, EncodeData & ed)
 74 | {
 75 |   ed.in_size = buffer_in.size();
 76 | }
 77 | 
 78 | template <>
 79 | inline void set_input_size(Tenc_array_buf & /*buffer_in*/, EncodeData & /*ed*/)
 80 | {
 81 |   // Do nothing.
 82 |   // NOTE: dd.in_size must be set prior to calling decode_buffer in arrays
 83 | }
 84 | 
 85 | //! Encodes an input buffer. Output is written in \a buffer_out.
 86 | template <typename Tbuffer_out, typename Tbuffer_in>
 87 | inline void encode_buffer(Tbuffer_out & buffer_out, Tbuffer_in & buffer_in, EncodeData & ed)
 88 | {
 89 |   set_input_size(buffer_in, ed);
 90 |   buffer_out.reserve(ENC_BUFFER_SIZE);
 91 |   std::size_t constexpr N_FIELDS_SITE_DATA{9}; // how many fields of the VCF contains site data
 92 |   int64_t next_pos{0};
 93 | 
 94 |   while (ed.i < ed.in_size)
 95 |   {
 96 |     char const b_in = buffer_in[ed.i];
 97 | 
 98 |     if (b_in != '\t' && b_in != '\n')
 99 |     {
100 |       ++ed.i;
101 |       continue; // we are in a vcf field
102 |     }
103 | 
104 |     if (ed.field == 0) /*CHROM field*/
105 |     {
106 |       // check if in header line and store contig
107 |       ed.header_line = buffer_in[ed.b] == '#'; // check if in header line
108 | 
109 |       if (not ed.header_line)
110 |         ed.next_contig.assign(&buffer_in[ed.b], ed.i - ed.b);
111 |     }
112 |     else if (not ed.header_line)
113 |     {
114 |       if (ed.field == 1) /*POS field*/
115 |       {
116 |         std::from_chars(&buffer_in[ed.b], &buffer_in[ed.i], next_pos);
117 |       }
118 |       else if (ed.field == 4) /*ALT field*/
119 |       {
120 |         int32_t next_n_alt = std::count(&buffer_in[ed.b], &buffer_in[ed.i], ',');
121 |         ed.clear_line(next_pos, next_n_alt);
122 |       }
123 |     }
124 | 
125 |     if (ed.header_line || ed.field < N_FIELDS_SITE_DATA)
126 |     {
127 |       ++ed.i; // adds '\t' or '\n' and then insert the field to the output buffer
128 |       buffer_out.insert(buffer_out.end(), &buffer_in[ed.b], &buffer_in[ed.i]);
129 |     }
130 |     else
131 |     {
132 |       assert(buffer_in[ed.b] >= '!');
133 |       assert(buffer_in[ed.b] <= '9');
134 | 
135 |       // check if it is in the current line
136 |       auto insert_it = ed.map_to_unique_fields.insert(
137 |         std::pair<std::string, uint32_t>(std::piecewise_construct,
138 |                                          std::forward_as_tuple(&buffer_in[ed.b], ed.i - ed.b),
139 |                                          std::forward_as_tuple(ed.unique_fields.size())));
140 | 
141 |       long const field_idx = ed.field - N_FIELDS_SITE_DATA;
142 |       assert(field_idx == static_cast<long>(ed.field2uid.size()));
143 | 
144 |       if (insert_it.second == true)
145 |       {
146 |         ed.field2uid.push_back(ed.unique_fields.size());
147 |         ed.unique_fields.emplace_back(&buffer_in[ed.b], ed.i - ed.b);
148 | 
149 |         if (field_idx < static_cast<long>(ed.prev_field2uid.size()) &&
150 |             ed.prev_unique_fields[ed.prev_field2uid[field_idx]] == ed.unique_fields[insert_it.first->second])
151 |         {
152 |           /* Case 0: unique and same as above. */
153 |           buffer_out.push_back('$');
154 | 
155 |           if (b_in == '\n') /* never skip newline */
156 |             buffer_out.push_back('\n');
157 | 
158 |           ++ed.i;
159 |         }
160 |         else
161 |         {
162 |           // check if it is in the previous line
163 |           auto prev_find_it = ed.prev_map_to_unique_fields.find(insert_it.first->first);
164 | 
165 |           if (prev_find_it == ed.prev_map_to_unique_fields.end())
166 |           {
167 |             /* Case 1: Field is unique in the current line and is not in the previous line. */
168 |             ++ed.i; // adds '\t' or '\n'
169 |             buffer_out.insert(buffer_out.end(), &buffer_in[ed.b], &buffer_in[ed.i]);
170 |           }
171 |           else
172 |           {
173 |             /* Case 2: Field is unique in the current line but identical to a field in the previous line. */
174 |             buffer_out.push_back('%');
175 |             popvcf::to_chars(prev_find_it->second, buffer_out);
176 |             buffer_out.push_back(buffer_in[ed.i]); // write '\t' or '\n'
177 |             ++ed.i;
178 |           }
179 |         }
180 |       }
181 |       else
182 |       {
183 |         ed.field2uid.push_back(insert_it.first->second);
184 | 
185 |         if (field_idx < static_cast<long>(ed.prev_field2uid.size()) &&
186 |             ed.prev_unique_fields[ed.prev_field2uid[field_idx]] == ed.unique_fields[insert_it.first->second])
187 |         {
188 |           /* Case 3: Field is not unique and same has the field above. */
189 |           buffer_out.push_back('&');
190 | 
191 |           if (b_in == '\n') /* never skip newline */
192 |             buffer_out.push_back('\n');
193 | 
194 |           ++ed.i;
195 |         }
196 |         else
197 |         {
198 |           /* Case 4: Field is a duplicate in the current line. */
199 |           popvcf::to_chars(insert_it.first->second, buffer_out);
200 |           buffer_out.push_back(buffer_in[ed.i]); // write '\t' or '\n'
201 |           ++ed.i;
202 |         }
203 |       }
204 | 
205 |       assert((field_idx + 1) == static_cast<long>(ed.field2uid.size()));
206 |       assert(ed.field2uid[0] == 0);
207 |     }
208 | 
209 |     assert(b_in == buffer_in[ed.i - 1]); // i should have been already incremented here
210 |     ed.b = ed.i;                         // set begin index of next field
211 | 
212 |     // check if we need to clear line or increment field
213 |     if (b_in == '\n')
214 |       ed.field = 0; // reset field index
215 |     else
216 |       ++ed.field;
217 |   } // ends inner loop
218 | 
219 |   if (ed.field >= 3 && ed.field < N_FIELDS_SITE_DATA)
220 |   {
221 |     // write the data even if the field is not complete
222 |     buffer_out.insert(buffer_out.end(), &buffer_in[ed.b], &buffer_in[ed.i]);
223 | 
224 |     if (ed.field == 4) /*ALT field*/
225 |       ed.stored_alt = std::count(&buffer_in[ed.b], &buffer_in[ed.i], ',');
226 | 
227 |     ed.i = 0;
228 |   }
229 |   else
230 |   {
231 |     // copy the remaining data to the beginning of the input buffer
232 |     std::copy(&buffer_in[ed.b], &buffer_in[ed.i], &buffer_in[0]);
233 |     ed.i = ed.i - ed.b;
234 |   }
235 | 
236 |   ed.b = 0;
237 |   ed.in_size = ed.i;
238 |   resize_input_buffer(buffer_in, ed.i);
239 | }
240 | 
241 | //! Encode a gzipped file and write to stdout
242 | void encode_file(std::string const & input_fn,
243 |                  bool const is_bgzf_input,
244 |                  std::string const & output_fn,
245 |                  std::string const & output_mode,
246 |                  bool const is_bgzf_output,
247 |                  int const compression_threads);
248 | 
249 | } // namespace popvcf
250 | 


--------------------------------------------------------------------------------
/src/in.constants.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | /*!
 3 |  * \file in.constants.hpp
 4 |  * \brief Global constants, macros and configurations set by CMake.
 5 |  */
 6 | 
 7 | #include <array>
 8 | #include <string.h> // strrchr
 9 | #include <string>
10 | #include <vector>
11 | 
12 | // clang-format off
13 | // CMake variables
14 | #define popvcf_VERSION_MAJOR @popvcf_VERSION_MAJOR@
15 | #define popvcf_VERSION_MINOR @popvcf_VERSION_MINOR@
16 | #define popvcf_VERSION_PATCH @popvcf_VERSION_PATCH@
17 | #define popvcf_SOURCE_DIRECTORY "@PROJECT_SOURCE_DIR@"
18 | #define popvcf_BINARY_DIRECTORY "@PROJECT_BINARY_DIR@"
19 | #define GIT_BRANCH "@GIT_BRANCH@"
20 | #define GIT_COMMIT_SHORT_HASH "@GIT_COMMIT_SHORT_HASH@"
21 | #define GIT_COMMIT_LONG_HASH "@GIT_COMMIT_LONG_HASH@"
22 | #define GIT_NUM_DIRTY_LINES "@GIT_NUM_DIRTY_LINES@"
23 | // clang-format on
24 | 
25 | namespace popvcf
26 | {
27 | // Macros
28 | #define S1_popvcf_internal__(x) #x
29 | #define S2_popvcf_internal__(x) S1_popvcf_internal__(x)
30 | #define _HERE_ (strrchr("/" __FILE__ ":" S2_popvcf_internal__(__LINE__), '/') + 1)
31 | 
32 | } // namespace popvcf
33 | 


--------------------------------------------------------------------------------
/src/io.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstdio>
  4 | #include <iostream>
  5 | #include <memory>
  6 | #include <string>
  7 | 
  8 | #include "htslib/bgzf.h"
  9 | #include "htslib/hts.h"
 10 | #include "htslib/kseq.h"
 11 | #include "htslib/tbx.h"
 12 | 
 13 | class BGZF;
 14 | 
 15 | namespace popvcf
 16 | {
 17 | using file_ptr = std::unique_ptr<FILE, void (*)(FILE *)>;           //!< Type definition for a smart FILE pointer.
 18 | using bgzf_ptr = std::unique_ptr<BGZF, void (*)(BGZF *)>;           //!< Type definition for a smart BGZF pointer.
 19 | using hts_file_ptr = std::unique_ptr<htsFile, void (*)(htsFile *)>; //!< Type definition for a smart htsFile pointer.
 20 | using tbx_t_ptr = std::unique_ptr<tbx_t, void (*)(tbx_t *)>;        //!< Type definition for a smart tbx_t pointer.
 21 | using hts_itr_t_ptr = std::unique_ptr<hts_itr_t, void (*)(hts_itr_t *)>; //!< Type definition for a hts_itr_t pointer.
 22 | 
 23 | //! Closes a VCF file stream, i.e. stdout/stdin
 24 | inline void close_vcf_nop(FILE *)
 25 | {
 26 | }
 27 | 
 28 | //! Closes VCF file
 29 | inline void close_vcf(FILE * f)
 30 | {
 31 |   if (f != nullptr)
 32 |   {
 33 |     fclose(f);
 34 |   }
 35 | }
 36 | 
 37 | //! Opens a VCF file either from filename or stdout/stdin
 38 | inline file_ptr open_vcf(std::string const & fn, std::string const & filemode)
 39 | {
 40 |   if (fn == "-")
 41 |   {
 42 |     if (filemode == "r")
 43 |       return file_ptr(stdin, popvcf::close_vcf_nop);
 44 |     else
 45 |       return file_ptr(stdout, popvcf::close_vcf_nop);
 46 |   }
 47 | 
 48 |   file_ptr in_vcf(fopen(fn.c_str(), filemode.c_str()), popvcf::close_vcf);
 49 | 
 50 |   if (in_vcf == nullptr)
 51 |   {
 52 |     std::cerr << "[popvcf] ERROR: Opening VCF file " << fn << std::endl;
 53 |     std::exit(1);
 54 |   }
 55 | 
 56 |   return in_vcf;
 57 | }
 58 | 
 59 | inline void write_bgzf(BGZF * bgzf, const char * data, std::size_t const size)
 60 | {
 61 |   assert(bgzf != nullptr);
 62 |   std::size_t const written_bytes = bgzf_write(bgzf, data, size);
 63 | 
 64 |   if (written_bytes != size)
 65 |   {
 66 |     std::cerr << "[popvcf] WARNING: Problem writing bgzf data. " << written_bytes << " bytes written but expected "
 67 |               << size << " bytes.\n";
 68 |     std::exit(1);
 69 |   }
 70 | }
 71 | 
 72 | inline void close_bgzf(BGZF * bgzf)
 73 | {
 74 |   if (bgzf != nullptr)
 75 |   {
 76 |     if (bgzf_close(bgzf) != 0)
 77 |     {
 78 |       std::cerr << "[popvcf] ERROR: Failed closing bgzf file." << std::endl;
 79 |       std::exit(1);
 80 |     }
 81 |   }
 82 | }
 83 | 
 84 | inline bgzf_ptr open_bgzf(std::string const & fn, std::string const & filemode)
 85 | {
 86 |   bgzf_ptr in_bgzf(bgzf_open(fn.c_str(), filemode.c_str()), popvcf::close_bgzf);
 87 | 
 88 |   if (in_bgzf == nullptr)
 89 |   {
 90 |     std::cerr << "[popvcf] ERROR: Opening bgzf file " << fn << std::endl;
 91 |     std::exit(1);
 92 |   }
 93 | 
 94 |   return in_bgzf;
 95 | }
 96 | 
 97 | inline void close_hts_file(htsFile * f)
 98 | {
 99 |   if (f != nullptr)
100 |   {
101 |     if (hts_close(f) != 0)
102 |     {
103 |       std::cerr << "[popvcf] ERROR: Failed closing hts file." << std::endl;
104 |       std::exit(1);
105 |     }
106 |   }
107 | }
108 | 
109 | inline hts_file_ptr open_hts_file(const char * fn, const char * fm)
110 | {
111 |   hts_file_ptr ptr(hts_open(fn, fm), popvcf::close_hts_file);
112 | 
113 |   if (ptr == nullptr)
114 |   {
115 |     std::cerr << "ERROR: Could not open file " << fn << std::endl;
116 |     std::exit(1);
117 |   }
118 | 
119 |   return ptr;
120 | }
121 | 
122 | inline void close_tbx_t(tbx_t * f)
123 | {
124 |   if (f != nullptr)
125 |     tbx_destroy(f);
126 | }
127 | 
128 | inline tbx_t_ptr open_tbx_t(const char * fn)
129 | {
130 |   tbx_t_ptr ptr(tbx_index_load(fn), popvcf::close_tbx_t);
131 | 
132 |   if (ptr == nullptr)
133 |   {
134 |     std::cerr << "[popvcf] ERROR: Could not open file " << fn << std::endl;
135 |     std::exit(1);
136 |   }
137 | 
138 |   return ptr;
139 | }
140 | 
141 | inline void close_hts_itr_t(hts_itr_t * f)
142 | {
143 |   if (f != nullptr)
144 |     tbx_itr_destroy(f);
145 | }
146 | 
147 | inline hts_itr_t_ptr open_hts_itr_t(tbx_t * tbx, const char * region)
148 | {
149 |   hts_itr_t_ptr ptr(tbx_itr_querys(tbx, region), popvcf::close_hts_itr_t);
150 | 
151 |   if (ptr == nullptr)
152 |     std::cerr << "[popvcf] WARNING: No records found in region " << region << "\n";
153 | 
154 |   return ptr;
155 | }
156 | 
157 | inline void free_kstring_t(kstring_t * str)
158 | {
159 |   if (str->s != NULL)
160 |     free(str->s);
161 | }
162 | } // namespace popvcf
163 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <cstdlib>
  3 | #include <iostream>
  4 | 
  5 | #include <paw/parser.hpp>
  6 | 
  7 | #include "decode.hpp"
  8 | #include "encode.hpp"
  9 | 
 10 | #include <popvcf/constants.hpp>
 11 | 
 12 | namespace popvcf
 13 | {
 14 | int subcmd_encode(paw::Parser & parser)
 15 | {
 16 |   std::string vcf_fn{"-"};
 17 |   std::string input_type{"g"};
 18 |   std::string output_fn{"-"};
 19 |   std::string output_mode{"w"};
 20 |   std::string output_type{"v"};
 21 |   int output_compress_level{-1};
 22 |   int compression_threads{1};
 23 | 
 24 |   try
 25 |   {
 26 |     parser.parse_positional_argument(vcf_fn,
 27 |                                      "VCF",
 28 |                                      "Encode this VCF (or VCF.gz). If not set, read VCF from standard input.");
 29 | 
 30 |     parser.parse_option(compression_threads,
 31 |                         '@',
 32 |                         "threads",
 33 |                         "Number of output file compression threads (only used if output type is \"z\").",
 34 |                         "NUM");
 35 | 
 36 |     parser.parse_option(input_type,
 37 |                         'I',
 38 |                         "input-type",
 39 |                         "Input type. v uncompressed VCF, z bgzipped VCF, g guess based on filename.",
 40 |                         "v|z|g");
 41 | 
 42 |     parser.parse_option(output_fn,
 43 |                         'o',
 44 |                         "output",
 45 |                         "Output will be written to this path. If '-', then write instead to standard output.",
 46 |                         "output.vcf[.gz]");
 47 | 
 48 |     parser.parse_option(output_compress_level, 'l', "output-compress-level", "Output file compression level.", "LEVEL");
 49 | 
 50 |     parser.parse_option(output_type, 'O', "output-type", "Output type. v uncompressed VCF, z bgzipped VCF.", "v|z");
 51 |     parser.finalize();
 52 |   }
 53 |   catch (paw::exception::missing_positional_argument &)
 54 |   {
 55 |     output_fn = "-";
 56 |   }
 57 | 
 58 |   if (output_compress_level >= 0)
 59 |     output_mode += std::to_string(std::min(9, output_compress_level));
 60 | 
 61 |   long const n = vcf_fn.size();
 62 | 
 63 |   if (n > 3 && vcf_fn[n - 2] == 'g' && vcf_fn[n - 1] == 'z')
 64 |     input_type = "z";
 65 | 
 66 |   encode_file(vcf_fn, input_type == "z", output_fn, output_mode, output_type == "z", compression_threads);
 67 |   return 0;
 68 | }
 69 | 
 70 | int subcmd_decode(paw::Parser & parser)
 71 | {
 72 |   std::string popvcf_fn{};
 73 |   std::string input_type{"g"};
 74 |   std::string region{};
 75 | 
 76 |   try
 77 |   {
 78 |     parser.parse_option(input_type,
 79 |                         'I',
 80 |                         "input-type",
 81 |                         "Input type. v uncompressed VCF, z bgzipped VCF, g guess based on filename.",
 82 |                         "v|z|g");
 83 |     parser.parse_option(region, 'r', "region", "Fetch region/interval to decode. Requires .tbi index.", "chrN:A-B");
 84 |     parser.parse_positional_argument(popvcf_fn, "popVCF", "Decode this popVCF. Use '-' for standard input.");
 85 |     parser.finalize();
 86 |   }
 87 |   catch (paw::exception::missing_positional_argument &)
 88 |   {
 89 |     popvcf_fn = "-";
 90 |   }
 91 | 
 92 |   long const n = popvcf_fn.size();
 93 | 
 94 |   if (input_type == "g" && n > 3 && popvcf_fn[n - 2] == 'g' && popvcf_fn[n - 1] == 'z')
 95 |     input_type = "z";
 96 | 
 97 |   if (region.empty())
 98 |     decode_file(popvcf_fn, input_type == "z");
 99 |   else
100 |     decode_region(popvcf_fn, region);
101 | 
102 |   return 0;
103 | }
104 | 
105 | } // namespace popvcf
106 | 
107 | int main(int argc, char ** argv)
108 | {
109 | #ifndef NDEBUG
110 |   std::ios_base::sync_with_stdio(false);
111 | #endif // NDEBUG
112 |   paw::Parser parser(argc, argv);
113 |   parser.set_name("popVCF");
114 |   parser.set_version(popvcf_VERSION_MAJOR, popvcf_VERSION_MINOR, popvcf_VERSION_PATCH);
115 |   int ret{0};
116 | 
117 |   try
118 |   {
119 |     std::string subcmd{};
120 | 
121 |     parser.add_subcommand("encode", "Encode a VCF into a popVCF.");
122 |     parser.add_subcommand("decode", "Decode a popVCF into a VCF.");
123 | 
124 |     parser.parse_subcommand(subcmd);
125 | 
126 |     if (subcmd == "encode")
127 |     {
128 |       ret = popvcf::subcmd_encode(parser);
129 |     }
130 |     else if (subcmd == "decode")
131 |     {
132 |       ret = popvcf::subcmd_decode(parser);
133 |     }
134 |     else if (subcmd.size() == 0)
135 |     {
136 |       parser.finalize();
137 |       ret = 0;
138 |     }
139 |     else
140 |     {
141 |       parser.finalize();
142 |       ret = 1;
143 |     }
144 |   }
145 |   catch (paw::exception::help const & e)
146 |   {
147 |     std::cout << e.what();
148 |     return 0;
149 |   }
150 |   catch (std::exception const & e)
151 |   {
152 |     std::cerr << e.what();
153 |     return 1;
154 |   }
155 | 
156 |   return ret;
157 | }
158 | 


--------------------------------------------------------------------------------
/src/sequence_utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "sequence_utils.hpp"
 2 | 
 3 | #include <algorithm>   // std::find
 4 | #include <cstdint>     // int32_t
 5 | #include <string>      // std::string
 6 | #include <string_view> // std::string_view
 7 | #include <vector>      // std::vector
 8 | 
 9 | namespace popvcf
10 | {
11 | template <typename Tstring>
12 | std::vector<std::string_view> split_string(Tstring const & str, char const delimiter)
13 | {
14 |   std::vector<std::string_view> output;
15 |   std::string_view strv(str);
16 |   auto first = strv.cbegin();
17 | 
18 |   while (first != strv.cend())
19 |   {
20 |     auto const second = std::find(first, strv.cend(), delimiter);
21 | 
22 |     if (first != second)
23 |     {
24 |       std::size_t const pos = std::distance(strv.cbegin(), first);
25 |       output.emplace_back(strv.substr(pos, second - first));
26 |     }
27 | 
28 |     if (second == strv.cend())
29 |       break;
30 | 
31 |     first = std::next(second);
32 |   }
33 | 
34 |   return output;
35 | }
36 | 
37 | template std::vector<std::string_view> split_string(std::string const & str, char const delimiter);
38 | template std::vector<std::string_view> split_string(std::string_view const & str, char const delimiter);
39 | 
40 | } // namespace popvcf
41 | 


--------------------------------------------------------------------------------
/src/sequence_utils.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <algorithm>
  4 | #include <array>
  5 | #include <cassert>
  6 | #include <charconv>
  7 | #include <string>
  8 | #include <string_view>
  9 | #include <vector>
 10 | 
 11 | namespace popvcf
 12 | {
 13 | uint32_t constexpr CHAR_SET_SIZE = 69;
 14 | uint32_t constexpr CHAR_SET_SIZE_2BYTES = CHAR_SET_SIZE * CHAR_SET_SIZE;
 15 | char constexpr CHAR_SET_MIN = ':';
 16 | 
 17 | long constexpr ENC_BUFFER_SIZE{4 * 65536}; //!< Buffer size of arrays when encoding
 18 | long constexpr DEC_BUFFER_SIZE{8 * 65536}; //!< Buffer size of arrays when decoding
 19 | 
 20 | //! Data type of an encoding array buffer
 21 | using Tenc_array_buf = std::array<char, ENC_BUFFER_SIZE>;
 22 | 
 23 | //! Data type of a decoding array buffer
 24 | using Tdec_array_buf = std::array<char, DEC_BUFFER_SIZE>;
 25 | 
 26 | inline char int_to_ascii(uint32_t in)
 27 | {
 28 |   assert(in < CHAR_SET_SIZE);
 29 | 
 30 |   return CHAR_SET_MIN + in;
 31 | }
 32 | 
 33 | inline uint32_t ascii_to_int(char in)
 34 | {
 35 |   assert(in >= CHAR_SET_MIN);
 36 | 
 37 |   return static_cast<uint32_t>(in) - CHAR_SET_MIN;
 38 | }
 39 | 
 40 | inline std::string int_to_ascii_string(uint32_t in)
 41 | {
 42 |   std::string str;
 43 | 
 44 |   while (in >= CHAR_SET_SIZE)
 45 |   {
 46 |     uint32_t rem = in % CHAR_SET_SIZE;
 47 |     in = in / CHAR_SET_SIZE;
 48 |     str.push_back(int_to_ascii(rem));
 49 |   }
 50 | 
 51 |   assert(in < CHAR_SET_SIZE);
 52 |   str.push_back(int_to_ascii(in));
 53 |   return str;
 54 | }
 55 | 
 56 | inline uint32_t ascii_string_view_to_int(std::string_view in)
 57 | {
 58 |   uint32_t const in_size = in.size();
 59 |   assert(in_size > 0);
 60 |   uint32_t out{0};
 61 |   uint32_t pow{1};
 62 | 
 63 |   for (uint32_t c{0}; c < in_size; ++c)
 64 |   {
 65 |     out += pow * ascii_to_int(in[c]);
 66 |     pow *= CHAR_SET_SIZE;
 67 |   }
 68 | 
 69 |   return out;
 70 | }
 71 | 
 72 | inline uint32_t ascii_cstring_to_int(char const * b, char const * e)
 73 | {
 74 |   uint32_t out{ascii_to_int(*b)};
 75 |   ++b;
 76 |   uint32_t pow{CHAR_SET_SIZE};
 77 | 
 78 |   while (b != e)
 79 |   {
 80 |     out += pow * ascii_to_int(*b);
 81 |     pow *= CHAR_SET_SIZE;
 82 |     ++b;
 83 |   }
 84 | 
 85 |   return out;
 86 | }
 87 | 
 88 | template <typename Tint, typename Tbuffer_out>
 89 | inline void to_chars(Tint char_val, Tbuffer_out & buffer_out)
 90 | {
 91 |   std::size_t constexpr ARR_SIZE{6};
 92 |   std::array<char, ARR_SIZE> a;
 93 |   std::size_t i{0};
 94 | 
 95 |   while (char_val >= CHAR_SET_SIZE)
 96 |   {
 97 |     Tint rem = char_val % CHAR_SET_SIZE;
 98 |     char_val = char_val / CHAR_SET_SIZE;
 99 |     assert(i < ARR_SIZE);
100 |     a[i++] = int_to_ascii(rem);
101 |   }
102 | 
103 |   assert(char_val < CHAR_SET_SIZE);
104 |   assert(i < ARR_SIZE);
105 |   a[i++] = int_to_ascii(char_val);
106 |   buffer_out.insert(buffer_out.end(), a.data(), a.data() + i);
107 | }
108 | 
109 | template <typename Tstring>
110 | std::vector<std::string_view> split_string(Tstring const & str, char const delimiter);
111 | 
112 | template <typename Tit>
113 | long get_vcf_pos(Tit begin, Tit end)
114 | {
115 |   auto find_it1 = std::find(begin, end, '\t');
116 |   auto find_it2 = std::find(find_it1 + 1, end, '\t');
117 |   long vcf_pos{0};
118 |   std::from_chars(find_it1 + 1, find_it2, vcf_pos);
119 |   return vcf_pos;
120 | }
121 | 
122 | template <typename Tbuffer_in>
123 | inline void resize_input_buffer(Tbuffer_in & buffer_in, std::size_t const new_size)
124 | {
125 |   buffer_in.resize(new_size);
126 | }
127 | 
128 | template <>
129 | inline void resize_input_buffer(Tdec_array_buf & /*buffer_in*/, std::size_t const /*new_size*/)
130 | {
131 |   // Do nothing. Arrays are not resized
132 | }
133 | 
134 | template <>
135 | inline void resize_input_buffer(Tenc_array_buf & /*buffer_in*/, std::size_t const /*new_size*/)
136 | {
137 |   // Do nothing. Arrays are not resized
138 | }
139 | 
140 | } // namespace popvcf
141 | 


--------------------------------------------------------------------------------
/test/create_test_data.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | n=100000
  4 | echo "##fileformat=VCFv4.2"
  5 | echo "##contig=<ID=chr1>"
  6 | echo "##contig=<ID=chr2>"
  7 | echo "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype.\">"
  8 | echo "##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths.\">"
  9 | echo "##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"PHRED-scaled genotype likelihoods.\">"
 10 | echo -e -n "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT"
 11 | 
 12 | awk -v n=${n} 'BEGIN{
 13 |   for (i = 1; i <= n; i++){
 14 |     printf "\t%08d", i
 15 |   }
 16 | }'
 17 | 
 18 | awk -v n=${n} -v n_alts=3 'BEGIN{
 19 |   alt="AC"
 20 |   printf "\nchr1\t"n+2
 21 |   printf "\t.\tA\tA"
 22 |   for (a = 1; a <= n_alts; a++){
 23 |     alt=alt"C"
 24 |     printf ","alt
 25 |   }
 26 |   printf "\t0\t.\t.\tGT:AD:PL"
 27 |   for (i = 1; i <= n; i++){
 28 |     printf "\t0/0:"n_alts*10
 29 |     for (a = 1; a <= n_alts; a++){
 30 |       printf ","a % 10
 31 |     }
 32 |     printf ":0"
 33 |     for (a = 2; a <= n_alts + 1; a++){
 34 |       for (b = 1; b <= a; b++){
 35 |         printf ","a+b-2
 36 |       }
 37 |     }
 38 |   }
 39 |   printf "\n"}'
 40 | 
 41 | awk -v n=${n} -v n_alts=3 'BEGIN{
 42 |   alt="AC"
 43 |   printf "chr1\t"n+2
 44 |   printf "\t.\tA\tA"
 45 |   for (a = 1; a <= n_alts; a++){
 46 |     alt=alt"C"
 47 |     printf ","alt
 48 |   }
 49 |   printf "\t0\t.\t.\tGT:AD:PL"
 50 |   for (i = 1; i <= n; i++){
 51 |     printf "\t0/0:"n_alts*10
 52 |     for (a = 1; a <= n_alts; a++){
 53 |       printf ","a % 10
 54 |     }
 55 |     printf ":0"
 56 |     for (a = 2; a <= n_alts + 1; a++){
 57 |       for (b = 1; b <= a; b++){
 58 |         printf ","a+b-2
 59 |       }
 60 |     }
 61 |   }
 62 |   printf "\n"}'
 63 | 
 64 | awk -v n=${n} -v n_alts=3 'BEGIN{
 65 |   alt="AC"
 66 |   printf "chr1\t"n+2
 67 |   printf "\t.\tA\tA"
 68 |   for (a = 1; a <= n_alts; a++){
 69 |     alt=alt"C"
 70 |     printf ","alt
 71 |   }
 72 |   printf "\t0\t.\t.\tGT:AD:PL"
 73 |   for (i = 1; i <= n; i++){
 74 |     printf "\t0/0:"n_alts*10
 75 |     for (a = 1; a <= n_alts; a++){
 76 |       printf ","a % 10
 77 |     }
 78 |     printf ":0"
 79 |     for (a = 2; a <= n_alts + 1; a++){
 80 |       for (b = 1; b <= a; b++){
 81 |         printf ","a+b-2
 82 |       }
 83 |     }
 84 |   }
 85 |   printf "\n"}'
 86 | 
 87 | 
 88 | awk -v n=${n} -v n_alts=7 'BEGIN{
 89 |   alt="GT"
 90 |   printf "chr1\t"n+3
 91 |   printf "\t.\tG\tG"
 92 |   for (a = 1; a <= n_alts; a++){
 93 |     alt=alt"T"
 94 |     printf ","alt
 95 |   }
 96 |   printf "\t0\t.\t.\tGT:AD:PL"
 97 |   for (i = 1; i <= n; i++){
 98 |     printf "\t0/0:"n_alts*10
 99 |     for (a = 1; a <= n_alts; a++){
100 |       printf ","(a+i) % 10
101 |     }
102 |     printf ":0"
103 |     for (a = 2; a <= n_alts + 1; a++){
104 |       for (b = 1; b <= a; b++){
105 |         printf ","a+i+b-2
106 |       }
107 |     }
108 |   }
109 |   printf "\n"}'
110 | 
111 | awk -v n=${n} 'BEGIN{
112 |   printf "chr2\t9999\t.\tGTTTTTTT\tG\t0\t.\t.\tGT"
113 |   for (i = 1; i <= n; i++){
114 |     printf "\t0/0"
115 |   }
116 |   printf "\n"}'
117 | 
118 | awk -v n=${n} 'BEGIN{
119 |   printf "chr2\t10000\t.\tGTTTTTTT\tG\t0\t.\t.\tGT"
120 |   for (i = 1; i <= n; i++){
121 |     printf "\t0/0"
122 |   }
123 |   printf "\n"}'
124 | 
125 | awk -v n=${n} 'BEGIN{
126 |   printf "chr2\t10001\t.\tGTTTTTTT\tG\t0\t.\t.\tGT"
127 |   for (i = 1; i <= n; i++){
128 |     printf "\t0/0"
129 |   }
130 |   printf "\n"}'
131 | 
132 | awk -v n=${n} 'BEGIN{
133 |   printf "chr2\t1000000\t.\tGTTTTTTT\tG\t0\t.\t.\tGT"
134 |   for (i = 1; i <= n; i++){
135 |     printf "\t0/0"
136 |   }
137 |   printf "\n"}'
138 | 


--------------------------------------------------------------------------------