├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── cmake ├── Modules │ ├── FindJellyfish.cmake │ ├── FindJemalloc.cmake │ ├── FindTBB.cmake │ └── FindTcmalloc.cmake ├── PostInstall.cmake ├── SimpleTest.cmake ├── TestSalmonFMD.cmake ├── TestSalmonQuasi.cmake └── UnitTests.cmake ├── include ├── FastxParser.hpp ├── args.hpp ├── blockingconcurrentqueue.h ├── concurrentqueue.h ├── kseq.h ├── sparsepp │ ├── spp.h │ ├── spp_alloc.h │ ├── spp_bitset.h │ ├── spp_btree.h │ ├── spp_config.h │ ├── spp_memory.h │ ├── spp_smartptr.h │ ├── spp_stdint.h │ ├── spp_timer.h │ ├── spp_traits.h │ └── spp_utils.h ├── strict_fstream.hpp └── zstr.hpp ├── sample_data ├── reads_1.fastq ├── reads_2.fastq └── unmapped_random.txt ├── scripts ├── check_shasum.sh └── generateDecoyTranscriptome.sh └── src ├── CMakeLists.txt ├── ExtractUnmapped.cpp ├── FastxParser.cpp └── SalmonTools.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | *.smod 19 | 20 | # Compiled Static libraries 21 | *.lai 22 | *.la 23 | *.a 24 | *.lib 25 | 26 | # Executables 27 | *.exe 28 | *.out 29 | *.app 30 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.0) 2 | 3 | enable_testing() 4 | 5 | project (SalmonTools) 6 | 7 | set(CPACK_PACKAGE_VERSION "0.1.0") 8 | set(CPACK_PACKAGE_VERSION_MAJOR "0") 9 | set(CPACK_PACKAGE_VERSION_MINOR "1") 10 | set(CPACK_PACKAGE_VERSION_PATCH "0") 11 | set(CPACK_GENERATOR "TGZ") 12 | set(CPACK_SOURCE_GENERATOR "TGZ") 13 | set(CPACK_PACKAGE_VENDOR "Stony Brook University") 14 | set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "SalmonTools - Tools for working with Salmon output.") 15 | set(CPACK_PACKAGE_NAME 16 | "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") 17 | set(CPACK_SOURCE_PACKAGE_FILE_NAME 18 | "${CMAKE_PROJECT_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}-Source") 19 | 20 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") 21 | 22 | if (APPLE) 23 | set (WARNING_IGNORE_FLAGS "-Wno-deprecated-register") 24 | else() 25 | set (WARNING_IGNORE_FLAGS "") 26 | endif() 27 | 28 | set (BOOST_CXX_FLAGS "${WARNING_IGNORE_FLAGS} -std=c++11") 29 | ## Prefer static to dynamic libraries 30 | SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) 31 | 32 | ## Set the standard required compile flags 33 | # Nov 18th --- removed -DHAVE_CONFIG_H 34 | set (CMAKE_CXX_FLAGS "-ftree-vectorize -funroll-loops -fPIC -fomit-frame-pointer -O3 -DRAPMAP_SALMON_SUPPORT -DHAVE_ANSI_TERM -DHAVE_SSTREAM -Wall -Wno-unknown-pragmas -Wno-reorder -Wno-unused-variable -std=c++11 -Wreturn-type -pthread") 35 | 36 | ## 37 | # OSX is strange (some might say, stupid in this regard). Deal with it's quirkines here. 38 | ## 39 | if (APPLE) 40 | # To allow ourselves to build a dynamic library, we have to tell the compiler 41 | # that, yes, the symbols will be around at runtime. 42 | # set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -undefined dynamic_lookup") 43 | set (LIBSALMONTOOLS_LINKER_FLAGS "-all_load") 44 | # In order to "think different", we also have to use non-standard suffixes 45 | # for our shared libraries 46 | set(SHARED_LIB_EXTENSION "dylib") 47 | else() 48 | # We're in sane linux world 49 | set (SHARED_LIB_EXTENSION "so") 50 | set (LIBSALMON_LINKER_FLAGS "") 51 | endif() 52 | 53 | ## this get's set differently below if we 54 | ## are on clang & apple 55 | set (NON_APPLECLANG_LIBS gomp rt) 56 | 57 | if(UNIX AND NOT APPLE) 58 | set(LIBRT rt) 59 | endif() 60 | 61 | set (PTHREAD_LIB) 62 | 63 | ## 64 | # Let us check the sha sum of our pacakges if we have the right tools 65 | ## 66 | set(SHASUM ${CMAKE_CURRENT_SOURCE_DIR}/scripts/check_shasum.sh) 67 | 68 | ## 69 | # Compiler-specific C++11 activation. 70 | # http://stackoverflow.com/questions/10984442/how-to-detect-c11-support-of-a-compiler-with-cmake 71 | ## 72 | ## 73 | # First take care of what to do if we have gcc 74 | ## 75 | if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") 76 | execute_process( 77 | COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) 78 | # If we're on OSX 79 | if (APPLE AND NOT (GCC_VERSION VERSION_GREATER 4.8.2 OR GCC_VERSION VERSION_EQUAL 4.8.2)) 80 | message(FATAL_ERROR "When building under OSX, ${PROJECT_NAME} requires " 81 | "either clang or g++ >= 4.8.2") 82 | elseif (NOT (GCC_VERSION VERSION_GREATER 4.7 OR GCC_VERSION VERSION_EQUAL 4.7)) 83 | message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.7 or greater.") 84 | endif () 85 | 86 | set (GCC TRUE) 87 | 88 | # Put complete static linking on hold for the time-being 89 | # If we're not on OSX, make an attempt to compile everything statically 90 | #if (NOT APPLE) 91 | #set (CMAKE_CXX_FLAGS "-static ${CMAKE_CXX_FLAGS}") 92 | #set (CMAKE_EXE_LINK_FLAGS "-static") 93 | set (PTHREAD_LIB "pthread") 94 | #endif() 95 | 96 | # If we're on Linux (i.e. not OSX) and we're using 97 | # gcc, then set the -static-libstdc++ flag 98 | if (NOT APPLE) 99 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++") 100 | endif() 101 | 102 | set (WARNING_IGNORE_FLAGS "${WARNING_IGNORE_FLAGS} -Wno-unused-local-typedefs") 103 | # Clang 104 | elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 105 | set(CLANG TRUE) 106 | # If we have libc++, then try and use it 107 | include(CheckCXXCompilerFlag) 108 | check_cxx_compiler_flag(-stdlib=libc++ HAVE_LIBCPP) 109 | if (HAVE_LIBCPP) 110 | message ("It appears that you're compiling with clang and that libc++ is available, so I'll use that") 111 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") 112 | set (BOOST_TOOLSET "clang") 113 | set (BOOST_CONFIGURE_TOOLSET "--with-toolset=clang") 114 | set (BCXX_FLAGS "-stdlib=libc++ -DBOOST_HAS_INT128") 115 | set (BOOST_EXTRA_FLAGS toolset=clang cxxflags=${BCXX_FLAGS} linkflags="-stdlib=libc++") 116 | set (JELLYFISH_CXX_FLAGS "-stdlib=libc++") 117 | # Otherwise, use libstdc++ (and make it static) 118 | else() 119 | #set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++") 120 | endif() 121 | # There's currently a bug with clang-3.4 & Boost 1.55 -- this hack fixes it 122 | # but we should do something better (does this break things if CPU doesn't 123 | # have 128-bit support)? 124 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_HAS_INT128") 125 | 126 | if (APPLE) 127 | message("Building on a Mac") 128 | set (NON_APPLECLANG_LIBS "") 129 | else() 130 | set (PTHREAD_LIB "pthread") 131 | endif() 132 | else () 133 | message(FATAL_ERROR "Your C++ compiler does not support C++11.") 134 | endif () 135 | 136 | if (DO_QUIET_MAKE) 137 | set( QUIET_MAKE "--silent" ) 138 | else() 139 | set( QUIET_MAKE "") 140 | endif() 141 | 142 | include(ExternalProject) 143 | 144 | find_package (ZLIB) 145 | if (NOT ZLIB_FOUND) 146 | message (FATAL_ERROR "zlib must be installed before configuration & building can proceed") 147 | endif() 148 | 149 | ## 150 | # Update the CXX flags according to the system and compiler 151 | ## 152 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WARNING_IGNORE_FLAGS}") 153 | 154 | if (CMAKE_BUILD_TYPE MATCHES Debug) 155 | message ("Making Debug build") 156 | set (CMAKE_CXX_FLAGS_DEBUG "-g ${CMAKE_CXX_FLAGS}") 157 | elseif (CMAKE_BUILD_TYPE MATCHES Release) 158 | message ("Making Release build") 159 | set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS}") 160 | else () 161 | message ("Making Default build type") 162 | endif () 163 | 164 | ## 165 | # Record this top-level path 166 | ## 167 | set (GAT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) 168 | 169 | # Have CMake tell us what it's doing 170 | # set (CMAKE_VERBOSE_MAKEFILE true) 171 | 172 | ### 173 | # 174 | # Grab RapMap sources for quasi-mapping code --- DURING CONFIGURE TIME! 175 | # 176 | #### 177 | #if(NOT FETCHED_RAPMAP) 178 | # exec_program(${CMAKE_CURRENT_SOURCE_DIR}/scripts/fetchRapMap.sh) 179 | # set(FETCHED_RAPMAP TRUE CACHE BOOL "Has RapMap been fetched?" FORCE) 180 | #endif() 181 | 182 | 183 | #message("Build system will fetch and build the Cereal serialization library") 184 | #message("==================================================================") 185 | #include(ExternalProject) 186 | #ExternalProject_Add(libcereal 187 | # DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external 188 | # DOWNLOAD_COMMAND curl -k -L https://github.com/USCiLab/cereal/archive/v1.2.2.tar.gz -o cereal-v1.2.2.tar.gz && 189 | # ${SHASUM} 1921f26d2e1daf9132da3c432e2fd02093ecaedf846e65d7679ddf868c7289c4 cereal-v1.2.2.tar.gz && 190 | # tar -xzvf cereal-v1.2.2.tar.gz 191 | # 192 | # ## 193 | # #URL https://github.com/USCiLab/cereal/archive/v1.2.2.tar.gz 194 | # #DOWNLOAD_NAME cereal-v1.2.2.tar.gz 195 | # #TLS_VERIFY FALSE 196 | # #URL_HASH SHA1=ffddf5fc5313cfbb893e07823ca8c473084eebca 197 | # ## 198 | # SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.2.2 199 | # INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install 200 | # #UPDATE_COMMAND sh -c "mkdir -p /build" 201 | # BINARY_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/cereal-1.2.2/build 202 | # CONFIGURE_COMMAND "" 203 | # BUILD_COMMAND "" 204 | # INSTALL_COMMAND sh -c "mkdir -p /include && cp -r /include/cereal /include" 205 | #) 206 | #ExternalProject_Add_Step(libcereal makedir 207 | # COMMAND mkdir -p /build 208 | # COMMENT "Make build directory" 209 | # DEPENDEES download 210 | # DEPENDERS configure) 211 | 212 | 213 | message("Build system will fetch SPDLOG") 214 | message("==================================================================") 215 | ExternalProject_Add(libspdlog 216 | DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external 217 | DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v0.12.0.tar.gz -o spdlog-v0.12.0.tar.gz && 218 | ${SHASUM} 5cfd6a0b3182a88e1eb35bcb65a7ef9035140d7c73b16ba6095939dbf07325b9 spdlog-v0.12.0.tar.gz && 219 | tar -xzf spdlog-v0.12.0.tar.gz 220 | ## 221 | #URL https://github.com/COMBINE-lab/spdlog/archive/v0.12.0.tar.gz 222 | #DOWNLOAD_NAME spdlog-v0.12.0.tar.gz 223 | #TLS_VERIFY FALSE 224 | #URL_HASH SHA1=c868b4d1a27c4d9fb3b748f0566a8f0390d83ffb 225 | ## 226 | SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/spdlog-0.12.0 227 | INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install 228 | CONFIGURE_COMMAND "" 229 | BUILD_COMMAND "" 230 | BUILD_IN_SOURCE 1 231 | INSTALL_COMMAND cp -r /include/spdlog /include 232 | ) 233 | 234 | ### 235 | # 236 | # Done building external dependencies. 237 | # 238 | ### 239 | set (CPACK_SOURCE_IGNORE_FILES 240 | "/build/" 241 | "/bin/" 242 | "/lib/" 243 | "/external/" 244 | ".git/") 245 | 246 | message("CPACK_SOURCE_IGNORE_FILES = ${CPACK_SOURCE_IGNORE_FILES}") 247 | 248 | # Recurse into SalmonTools source directory 249 | add_subdirectory ( src ) 250 | 251 | # build a CPack driven installer package 252 | include (CPack) 253 | 254 | set(ARCHIVE_NAME ${CMAKE_PROJECT_NAME}-${PROJECT_VERSION}) 255 | add_custom_target(dist 256 | COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD 257 | | gzip > ${CMAKE_BINARY_DIR}/${ARCHIVE_NAME}.tar.gz 258 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) 259 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SalmonTools 2 | 3 | This repository contains (or will contain) a suite of tools that are useful for working with Salmon output. This is the ideal repository for tools that don't quite belong in the Salmon repository itself, but which are too small to warrant their own separate project. It's nice to have such things collected in one place. Contributions and pull-requests are welcome! 4 | 5 | # Tools 6 | 7 | `salmontools` is the main command-line interace for interacting with tools. Like `samtools`, it uses separate commands to execute separate functionality. The available commands are: 8 | 9 | * extract-unmapped —Takes an `unmapped_names.txt` file from a run of Salmon, as well as the original FASTA/FASTQ files from which the unmapped names were generated, and extracts the corresponding reads from the FASTA/FASTQ file. The results (the read names and sequences) are written to a user-provided output file. 10 | * generateDecoyTranscriptome.sh — Located in the `scripts/` directory, this is a preprocessing script for creating augmented hybrid fasta file for `salmon index`. It consumes genome fasta (one file given through -g), transcriptome fasta (-t) and the annotation (GTF file given through -a) to create a new hybrid fasta file which contains the decoy sequences from the genome, concatenated with the transcriptome (`gentrome.fa`). It runs [mashmap](https://github.com/marbl/MashMap) (path to binary given through -m) to align transcriptome to an exon masked genome, with 80% homology and extracts the mapped genomic interval. It uses `awk` and [bedtools](https://bedtools.readthedocs.io/en/latest/index.html) (path to binary given through -b) to merge the contiguosly mapped interval and extracts decoy sequences from the genome. It also dumps `decoys.txt` file which contains the name/id of the decoy sequences. Both `gentrome.fa` and `decoys.txt` can be used with `salmon index` with `salmon` >=0.14.0. 11 | **NOTE:** Salmon version [v1.0](https://github.com/COMBINE-lab/salmon/releases/tag/v1.0.0) can directly index the genome and transcriptome and doesn't mandates to run the `generateDecoyTranscriptome` script, however it's still backward compatible. Please checkout [this](https://combine-lab.github.io/alevin-tutorial/2019/selective-alignment/) tutorial on how to run salmon with full genome + transcriptome without the annotation. 12 | 13 | # Salmon in Alignment mode w/ decoy BAM 14 | 15 | Salmon by default, if provided with the decoy aware index and `--writeMappings` flag, dumps the reads aligning to decoys with better aligninment score than transcriptomic target. In an atypical situation where the decoy tagged BAM has to be requantified with salmon in alignment mode, salmon can fail. The general recommendation for such scenario is to filter the BAM file for all such decoy alignment before requantifying with salmon. The following command will remove both the decoy target and the decoy alignment from the decoy tagged BAM and makes it compatible to run in alignment mode in salmon. 16 | ``` 17 | samtools view -h input.bam | grep -v 'XT:A:D\|DS:D' | samtools view -bS > output.sam 18 | ``` 19 | -------------------------------------------------------------------------------- /cmake/Modules/FindJellyfish.cmake: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Find Jellyfish 3 | # 4 | # This sets the following variables: 5 | # JELLYFISH_FOUND - True if Jellyfish was found. 6 | # JELLYFISH_INCLUDE_DIRS - Directories containing the Jellyfish include files. 7 | # JELLYFISH_DEFINITIONS - Compiler flags for Jellyfish. 8 | 9 | find_path(JELLYFISH_INCLUDE_DIR jellyfish 10 | HINTS "${JELLYFISH_ROOT}/include" "$ENV{JELLYFISH_ROOT}/include" "/usr/include" "$ENV{PROGRAMFILES}/jellyfish/include") 11 | 12 | set(JELLYFISH_INCLUDE_DIRS ${JELLYFISH_INCLUDE_DIR}) 13 | 14 | include(FindPackageHandleStandardArgs) 15 | #message("Required Jellyfish version ${Jellyfish_FIND_VERSION}") 16 | find_package_handle_standard_args(Jellyfish 17 | DEFAULT_MSG 18 | FOUND_VAR JELLYFISH_FOUND 19 | REQUIRED_VARS JELLYFISH_INCLUDE_DIR 20 | VERSION_VAR Jellyfish_FOUND_VERSION) 21 | 22 | mark_as_advanced(JELLYFISH_INCLUDE_DIR) 23 | 24 | if(JELLYFISH_FOUND) 25 | message(STATUS "Jellyfish found (include: ${JELLYFISH_INCLUDE_DIRS})") 26 | endif(JELLYFISH_FOUND) 27 | -------------------------------------------------------------------------------- /cmake/Modules/FindJemalloc.cmake: -------------------------------------------------------------------------------- 1 | # From: https://raw.githubusercontent.com/STEllAR-GROUP/hpx/master/cmake/FindJemalloc.cmake 2 | # Copyright (c) 2014 Thomas Heller 3 | # Copyright (c) 2007-2012 Hartmut Kaiser 4 | # Copyright (c) 2010-2011 Matt Anderson 5 | # Copyright (c) 2011 Bryce Lelbach 6 | # 7 | # Distributed under the Boost Software License, Version 1.0. (See accompanying 8 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 9 | 10 | find_package(PkgConfig) 11 | pkg_check_modules(PC_JEMALLOC QUIET libjemalloc) 12 | 13 | find_path(JEMALLOC_INCLUDE_DIR jemalloc/jemalloc.h 14 | HINTS 15 | ${JEMALLOC_ROOT} ENV JEMALLOC_ROOT 16 | ${PC_JEMALLOC_MINIMAL_INCLUDEDIR} 17 | ${PC_JEMALLOC_MINIMAL_INCLUDE_DIRS} 18 | ${PC_JEMALLOC_INCLUDEDIR} 19 | ${PC_JEMALLOC_INCLUDE_DIRS} 20 | PATH_SUFFIXES include) 21 | 22 | find_library(JEMALLOC_LIBRARY NAMES jemalloc libjemalloc 23 | HINTS 24 | ${JEMALLOC_ROOT} ENV JEMALLOC_ROOT 25 | ${PC_JEMALLOC_MINIMAL_LIBDIR} 26 | ${PC_JEMALLOC_MINIMAL_LIBRARY_DIRS} 27 | ${PC_JEMALLOC_LIBDIR} 28 | ${PC_JEMALLOC_LIBRARY_DIRS} 29 | PATH_SUFFIXES lib lib64) 30 | 31 | set(JEMALLOC_LIBRARIES ${JEMALLOC_LIBRARY}) 32 | set(JEMALLOC_INCLUDE_DIRS ${JEMALLOC_INCLUDE_DIR}) 33 | 34 | find_package_handle_standard_args(Jemalloc DEFAULT_MSG 35 | JEMALLOC_LIBRARY JEMALLOC_INCLUDE_DIR) 36 | 37 | get_property(_type CACHE JEMALLOC_ROOT PROPERTY TYPE) 38 | if(_type) 39 | set_property(CACHE JEMALLOC_ROOT PROPERTY ADVANCED 1) 40 | if("x${_type}" STREQUAL "xUNINITIALIZED") 41 | set_property(CACHE JEMALLOC_ROOT PROPERTY TYPE PATH) 42 | endif() 43 | endif() 44 | 45 | mark_as_advanced(JEMALLOC_ROOT JEMALLOC_LIBRARY JEMALLOC_INCLUDE_DIR) 46 | 47 | -------------------------------------------------------------------------------- /cmake/Modules/FindTBB.cmake: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2015 Justus Calvin 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | 23 | # 24 | # FindTBB 25 | # ------- 26 | # 27 | # Find TBB include directories and libraries. 28 | # 29 | # Usage: 30 | # 31 | # find_package(TBB [major[.minor]] [EXACT] 32 | # [QUIET] [REQUIRED] 33 | # [[COMPONENTS] [components...]] 34 | # [OPTIONAL_COMPONENTS components...]) 35 | # 36 | # where the allowed components are tbbmalloc and tbb_preview. Users may modify 37 | # the behavior of this module with the following variables: 38 | # 39 | # * TBB_ROOT_DIR - The base directory the of TBB installation. 40 | # * TBB_INCLUDE_DIR - The directory that contains the TBB headers files. 41 | # * TBB_LIBRARY - The directory that contains the TBB library files. 42 | # * TBB__LIBRARY - The path of the TBB the corresponding TBB library. 43 | # These libraries, if specified, override the 44 | # corresponding library search results, where 45 | # may be tbb, tbb_debug, tbbmalloc, tbbmalloc_debug, 46 | # tbb_preview, or tbb_preview_debug. 47 | # * TBB_USE_DEBUG_BUILD - The debug version of tbb libraries, if present, will 48 | # be used instead of the release version. 49 | # 50 | # Users may modify the behavior of this module with the following environment 51 | # variables: 52 | # 53 | # * TBB_INSTALL_DIR 54 | # * TBBROOT 55 | # * LIBRARY_PATH 56 | # 57 | # This module will set the following variables: 58 | # 59 | # * TBB_FOUND - Set to false, or undefined, if we haven’t found, or 60 | # don’t want to use TBB. 61 | # * TBB__FOUND - If False, optional part of TBB sytem is 62 | # not available. 63 | # * TBB_VERSION - The full version string 64 | # * TBB_VERSION_MAJOR - The major version 65 | # * TBB_VERSION_MINOR - The minor version 66 | # * TBB_INTERFACE_VERSION - The interface version number defined in 67 | # tbb/tbb_stddef.h. 68 | # * TBB__LIBRARY_RELEASE - The path of the TBB release version of 69 | # , where may be tbb, tbb_debug, 70 | # tbbmalloc, tbbmalloc_debug, tbb_preview, or 71 | # tbb_preview_debug. 72 | # * TBB__LIBRARY_DEGUG - The path of the TBB release version of 73 | # , where may be tbb, tbb_debug, 74 | # tbbmalloc, tbbmalloc_debug, tbb_preview, or 75 | # tbb_preview_debug. 76 | # 77 | # The following varibles should be used to build and link with TBB: 78 | # 79 | # * TBB_INCLUDE_DIRS - The include directory for TBB. 80 | # * TBB_LIBRARIES - The libraries to link against to use TBB. 81 | # * TBB_LIBRARIES_RELEASE - The release libraries to link against to use TBB. 82 | # * TBB_LIBRARIES_DEBUG - The debug libraries to link against to use TBB. 83 | # * TBB_DEFINITIONS - Definitions to use when compiling code that uses 84 | # TBB. 85 | # * TBB_DEFINITIONS_RELEASE - Definitions to use when compiling release code that 86 | # uses TBB. 87 | # * TBB_DEFINITIONS_DEBUG - Definitions to use when compiling debug code that 88 | # uses TBB. 89 | # 90 | # This module will also create the "tbb" target that may be used when building 91 | # executables and libraries. 92 | 93 | include(FindPackageHandleStandardArgs) 94 | 95 | if(NOT TBB_FOUND) 96 | 97 | ################################## 98 | # Check the build type 99 | ################################## 100 | 101 | if(NOT DEFINED TBB_USE_DEBUG_BUILD) 102 | if(CMAKE_BUILD_TYPE MATCHES "(Debug|DEBUG|debug|RelWithDebInfo|RELWITHDEBINFO|relwithdebinfo)") 103 | set(TBB_BUILD_TYPE DEBUG) 104 | else() 105 | set(TBB_BUILD_TYPE RELEASE) 106 | endif() 107 | elseif(TBB_USE_DEBUG_BUILD) 108 | set(TBB_BUILD_TYPE DEBUG) 109 | else() 110 | set(TBB_BUILD_TYPE RELEASE) 111 | endif() 112 | 113 | ################################## 114 | # Set the TBB search directories 115 | ################################## 116 | 117 | # Define search paths based on user input and environment variables 118 | set(TBB_SEARCH_DIR ${TBB_ROOT_DIR} $ENV{TBB_INSTALL_DIR} $ENV{TBBROOT}) 119 | 120 | # Define the search directories based on the current platform 121 | if(CMAKE_SYSTEM_NAME STREQUAL "Windows") 122 | set(TBB_DEFAULT_SEARCH_DIR "C:/Program Files/Intel/TBB" 123 | "C:/Program Files (x86)/Intel/TBB") 124 | 125 | # Set the target architecture 126 | if(CMAKE_SIZEOF_VOID_P EQUAL 8) 127 | set(TBB_ARCHITECTURE "intel64") 128 | else() 129 | set(TBB_ARCHITECTURE "ia32") 130 | endif() 131 | 132 | # Set the TBB search library path search suffix based on the version of VC 133 | if(WINDOWS_STORE) 134 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11_ui") 135 | elseif(MSVC14) 136 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc14") 137 | elseif(MSVC12) 138 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc12") 139 | elseif(MSVC11) 140 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11") 141 | elseif(MSVC10) 142 | set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc10") 143 | endif() 144 | 145 | # Add the library path search suffix for the VC independent version of TBB 146 | list(APPEND TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc_mt") 147 | 148 | elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") 149 | # OS X 150 | set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") 151 | 152 | # TODO: Check to see which C++ library is being used by the compiler. 153 | if(NOT ${CMAKE_SYSTEM_VERSION} VERSION_LESS 13.0) 154 | # The default C++ library on OS X 10.9 and later is libc++ 155 | set(TBB_LIB_PATH_SUFFIX "lib/libc++" "lib") 156 | else() 157 | set(TBB_LIB_PATH_SUFFIX "lib") 158 | endif() 159 | elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") 160 | # Linux 161 | set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") 162 | 163 | # TODO: Check compiler version to see the suffix should be /gcc4.1 or 164 | # /gcc4.1. For now, assume that the compiler is more recent than 165 | # gcc 4.4.x or later. 166 | if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") 167 | set(TBB_LIB_PATH_SUFFIX "lib/intel64/gcc4.4") 168 | elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$") 169 | set(TBB_LIB_PATH_SUFFIX "lib/ia32/gcc4.4") 170 | endif() 171 | endif() 172 | 173 | ################################## 174 | # Find the TBB include dir 175 | ################################## 176 | 177 | find_path(TBB_INCLUDE_DIRS tbb/tbb.h 178 | HINTS ${TBB_INCLUDE_DIR} ${TBB_SEARCH_DIR} 179 | PATHS ${TBB_DEFAULT_SEARCH_DIR} 180 | PATH_SUFFIXES include) 181 | 182 | ################################## 183 | # Set version strings 184 | ################################## 185 | 186 | if(TBB_INCLUDE_DIRS) 187 | file(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _tbb_version_file) 188 | string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" 189 | TBB_VERSION_MAJOR "${_tbb_version_file}") 190 | string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" 191 | TBB_VERSION_MINOR "${_tbb_version_file}") 192 | string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" 193 | TBB_INTERFACE_VERSION "${_tbb_version_file}") 194 | set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}") 195 | endif() 196 | 197 | ################################## 198 | # Find TBB components 199 | ################################## 200 | 201 | if(TBB_VERSION VERSION_LESS 4.3) 202 | set(TBB_SEARCH_COMPOMPONENTS tbb_preview tbbmalloc tbb) 203 | else() 204 | set(TBB_SEARCH_COMPOMPONENTS tbb_preview tbbmalloc_proxy tbbmalloc tbb) 205 | endif() 206 | 207 | # Find each component 208 | foreach(_comp ${TBB_SEARCH_COMPOMPONENTS}) 209 | if(";${TBB_FIND_COMPONENTS};tbb;" MATCHES ";${_comp};") 210 | 211 | # Search for the libraries 212 | find_library(TBB_${_comp}_LIBRARY_RELEASE ${_comp} 213 | HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} 214 | PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH 215 | PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) 216 | 217 | find_library(TBB_${_comp}_LIBRARY_DEBUG ${_comp}_debug 218 | HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} 219 | PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH 220 | PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) 221 | 222 | if(TBB_${_comp}_LIBRARY_DEBUG) 223 | list(APPEND TBB_LIBRARIES_DEBUG "${TBB_${_comp}_LIBRARY_DEBUG}") 224 | endif() 225 | if(TBB_${_comp}_LIBRARY_RELEASE) 226 | list(APPEND TBB_LIBRARIES_RELEASE "${TBB_${_comp}_LIBRARY_RELEASE}") 227 | endif() 228 | if(TBB_${_comp}_LIBRARY_${TBB_BUILD_TYPE} AND NOT TBB_${_comp}_LIBRARY) 229 | set(TBB_${_comp}_LIBRARY "${TBB_${_comp}_LIBRARY_${TBB_BUILD_TYPE}}") 230 | endif() 231 | 232 | if(TBB_${_comp}_LIBRARY AND EXISTS "${TBB_${_comp}_LIBRARY}") 233 | set(TBB_${_comp}_FOUND TRUE) 234 | else() 235 | set(TBB_${_comp}_FOUND FALSE) 236 | endif() 237 | 238 | # Mark internal variables as advanced 239 | mark_as_advanced(TBB_${_comp}_LIBRARY_RELEASE) 240 | mark_as_advanced(TBB_${_comp}_LIBRARY_DEBUG) 241 | mark_as_advanced(TBB_${_comp}_LIBRARY) 242 | 243 | endif() 244 | endforeach() 245 | 246 | ################################## 247 | # Set compile flags and libraries 248 | ################################## 249 | 250 | set(TBB_DEFINITIONS_RELEASE "") 251 | set(TBB_DEFINITIONS_DEBUG "-DTBB_USE_DEBUG=1") 252 | 253 | if(TBB_LIBRARIES_${TBB_BUILD_TYPE}) 254 | set(TBB_DEFINITIONS "${TBB_DEFINITIONS_${TBB_BUILD_TYPE}}") 255 | set(TBB_LIBRARIES "${TBB_LIBRARIES_${TBB_BUILD_TYPE}}") 256 | elseif(TBB_LIBRARIES_RELEASE) 257 | set(TBB_DEFINITIONS "${TBB_DEFINITIONS_RELEASE}") 258 | set(TBB_LIBRARIES "${TBB_LIBRARIES_RELEASE}") 259 | elseif(TBB_LIBRARIES_DEBUG) 260 | set(TBB_DEFINITIONS "${TBB_DEFINITIONS_DEBUG}") 261 | set(TBB_LIBRARIES "${TBB_LIBRARIES_DEBUG}") 262 | endif() 263 | 264 | find_package_handle_standard_args(TBB 265 | REQUIRED_VARS TBB_INCLUDE_DIRS TBB_LIBRARIES 266 | HANDLE_COMPONENTS 267 | VERSION_VAR TBB_VERSION) 268 | 269 | ################################## 270 | # Create targets 271 | ################################## 272 | 273 | if(NOT CMAKE_VERSION VERSION_LESS 3.0 AND TBB_FOUND) 274 | add_library(tbb SHARED IMPORTED) 275 | set_target_properties(tbb PROPERTIES 276 | INTERFACE_INCLUDE_DIRECTORIES ${TBB_INCLUDE_DIRS} 277 | IMPORTED_LOCATION ${TBB_LIBRARIES}) 278 | if(TBB_LIBRARIES_RELEASE AND TBB_LIBRARIES_DEBUG) 279 | set_target_properties(tbb PROPERTIES 280 | INTERFACE_COMPILE_DEFINITIONS "$<$,$>:TBB_USE_DEBUG=1>" 281 | IMPORTED_LOCATION_DEBUG ${TBB_LIBRARIES_DEBUG} 282 | IMPORTED_LOCATION_RELWITHDEBINFO ${TBB_LIBRARIES_DEBUG} 283 | IMPORTED_LOCATION_RELEASE ${TBB_LIBRARIES_RELEASE} 284 | IMPORTED_LOCATION_MINSIZEREL ${TBB_LIBRARIES_RELEASE} 285 | ) 286 | elseif(TBB_LIBRARIES_RELEASE) 287 | set_target_properties(tbb PROPERTIES IMPORTED_LOCATION ${TBB_LIBRARIES_RELEASE}) 288 | else() 289 | set_target_properties(tbb PROPERTIES 290 | INTERFACE_COMPILE_DEFINITIONS "${TBB_DEFINITIONS_DEBUG}" 291 | IMPORTED_LOCATION ${TBB_LIBRARIES_DEBUG} 292 | ) 293 | endif() 294 | endif() 295 | 296 | mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARIES) 297 | 298 | unset(TBB_ARCHITECTURE) 299 | unset(TBB_BUILD_TYPE) 300 | unset(TBB_LIB_PATH_SUFFIX) 301 | unset(TBB_DEFAULT_SEARCH_DIR) 302 | 303 | endif() 304 | -------------------------------------------------------------------------------- /cmake/Modules/FindTcmalloc.cmake: -------------------------------------------------------------------------------- 1 | # - Find Tcmalloc 2 | # Find the native Tcmalloc includes and library 3 | # 4 | # Tcmalloc_INCLUDE_DIR - where to find Tcmalloc.h, etc. 5 | # Tcmalloc_LIBRARIES - List of libraries when using Tcmalloc. 6 | # Tcmalloc_FOUND - True if Tcmalloc found. 7 | 8 | find_path(Tcmalloc_INCLUDE_DIR google/tcmalloc.h NO_DEFAULT_PATH PATHS 9 | ${HT_DEPENDENCY_INCLUDE_DIR} 10 | /usr/include 11 | /opt/local/include 12 | /usr/local/include 13 | ) 14 | 15 | if (USE_TCMALLOC) 16 | set(Tcmalloc_NAMES tcmalloc) 17 | else () 18 | set(Tcmalloc_NAMES tcmalloc_minimal tcmalloc) 19 | endif () 20 | 21 | find_library(Tcmalloc_LIBRARY NO_DEFAULT_PATH 22 | NAMES ${Tcmalloc_NAMES} 23 | PATHS ${HT_DEPENDENCY_LIB_DIR} /lib /usr/lib /usr/local/lib /opt/local/lib 24 | ) 25 | 26 | if (Tcmalloc_INCLUDE_DIR AND Tcmalloc_LIBRARY) 27 | set(Tcmalloc_FOUND TRUE) 28 | set( Tcmalloc_LIBRARIES ${Tcmalloc_LIBRARY} ) 29 | else () 30 | set(Tcmalloc_FOUND FALSE) 31 | set( Tcmalloc_LIBRARIES ) 32 | endif () 33 | 34 | if (Tcmalloc_FOUND) 35 | message(STATUS "Found Tcmalloc: ${Tcmalloc_LIBRARY}") 36 | else () 37 | message(STATUS "Not Found Tcmalloc: ${Tcmalloc_LIBRARY}") 38 | if (Tcmalloc_FIND_REQUIRED) 39 | message(STATUS "Looked for Tcmalloc libraries named ${Tcmalloc_NAMES}.") 40 | message(FATAL_ERROR "Could NOT find Tcmalloc library") 41 | endif () 42 | endif () 43 | 44 | mark_as_advanced( 45 | Tcmalloc_LIBRARY 46 | Tcmalloc_INCLUDE_DIR 47 | ) 48 | -------------------------------------------------------------------------------- /cmake/PostInstall.cmake: -------------------------------------------------------------------------------- 1 | ## 2 | # Print some post install messages for the user 3 | ## 4 | message("\n\n") 5 | message("Installation complete. Please ensure the following paths are set properly.") 6 | message("==========================================================================") 7 | #if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") 8 | # message("Fixing library names with install_name_tool") 9 | # execute_process(COMMAND install_name_tool -add_rpath ${CMAKE_INSTALL_PREFIX}/bin ${CMAKE_INSTALL_PREFIX}/bin/salmon) 10 | # execute_process(COMMAND install_name_tool -add_rpath ${CMAKE_INSTALL_PREFIX}/lib ${CMAKE_INSTALL_PREFIX}/bin/salmon) 11 | # execute_process(COMMAND install_name_tool -add_rpath @executable_path ${CMAKE_INSTALL_PREFIX}/bin/salmon) 12 | #endif() 13 | message("Please add ${CMAKE_INSTALL_PREFIX}/bin to your PATH") 14 | if ("${CMAKE_SYSTEM_NAME}" MATCHES "Darwin") 15 | message("Please add ${CMAKE_INSTALL_PREFIX}/lib to your DYLD_FALLBACK_LIBRARY_PATH") 16 | else() 17 | message("Please add ${CMAKE_INSTALL_PREFIX}/lib to your LD_LIBRARY_PATH") 18 | endif() 19 | message("==========================================================================") 20 | -------------------------------------------------------------------------------- /cmake/SimpleTest.cmake: -------------------------------------------------------------------------------- 1 | execute_process(COMMAND tar xzvf sample_data.tgz 2 | WORKING_DIRECTORY ${TOPLEVEL_DIR} 3 | RESULT_VARIABLE TAR_RESULT 4 | ) 5 | 6 | if (TAR_RESULT) 7 | message(FATAL_ERROR "Error untarring sample_data.tgz") 8 | endif() 9 | 10 | set(INDEX_CMD ${TOPLEVEL_DIR}/build/src/sailfish index -t transcripts.fasta -k 20 -o sample_index --force) 11 | execute_process(COMMAND ${INDEX_CMD} 12 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data 13 | RESULT_VARIABLE INDEX_RESULT 14 | ) 15 | 16 | if (INDEX_RESULT) 17 | message(FATAL_ERROR "Error running ${INDEX_COMMAND}") 18 | endif() 19 | 20 | set(QUANT_COMMAND ${TOPLEVEL_DIR}/build/src/sailfish quant -i sample_index --noBiasCorrect -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_quant) 21 | execute_process(COMMAND ${QUANT_COMMAND} 22 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data 23 | RESULT_VARIABLE QUANT_RESULT 24 | ) 25 | if (QUANT_RESULT) 26 | message(FATAL_ERROR "Error running ${QUANT_RESULT}") 27 | endif() 28 | 29 | if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_quant/quant.sf) 30 | message("Sailfish ran successfully") 31 | else() 32 | message(FATAL_ERROR "Sailfish failed to produce output") 33 | endif() 34 | 35 | 36 | -------------------------------------------------------------------------------- /cmake/TestSalmonFMD.cmake: -------------------------------------------------------------------------------- 1 | execute_process(COMMAND tar xzvf sample_data.tgz 2 | WORKING_DIRECTORY ${TOPLEVEL_DIR} 3 | RESULT_VARIABLE TAR_RESULT 4 | ) 5 | 6 | if (TAR_RESULT) 7 | message(FATAL_ERROR "Error untarring sample_data.tgz") 8 | endif() 9 | 10 | set(SALMON_FMD_INDEX_CMD ${CMAKE_BINARY_DIR}/salmon index -t transcripts.fasta -i sample_salmon_fmd_index --type fmd) 11 | execute_process(COMMAND ${SALMON_FMD_INDEX_CMD} 12 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data 13 | RESULT_VARIABLE SALMON_FMD_INDEX_RESULT 14 | ) 15 | 16 | if (SALMON_FMD_INDEX_RESULT) 17 | message(FATAL_ERROR "Error running ${SALMON_FMD_INDEX_COMMAND}") 18 | endif() 19 | 20 | set(SALMON_QUANT_COMMAND ${CMAKE_BINARY_DIR}/salmon quant -i sample_salmon_fmd_index -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_salmon_fmd_quant) 21 | execute_process(COMMAND ${SALMON_QUANT_COMMAND} 22 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data 23 | RESULT_VARIABLE SALMON_FMD_QUANT_RESULT 24 | ) 25 | if (SALMON_FMD_QUANT_RESULT) 26 | message(FATAL_ERROR "Error running ${SALMON_FMD_QUANT_RESULT}") 27 | endif() 28 | 29 | if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_salmon_fmd_quant/quant.sf) 30 | message("Salmon (read) ran successfully") 31 | else() 32 | message(FATAL_ERROR "Salmon (read --- fmd-index) failed to produce output") 33 | endif() 34 | -------------------------------------------------------------------------------- /cmake/TestSalmonQuasi.cmake: -------------------------------------------------------------------------------- 1 | set(SALMON_QUASI_INDEX_CMD ${CMAKE_BINARY_DIR}/salmon index -t transcripts.fasta -i sample_salmon_quasi_index --type quasi) 2 | execute_process(COMMAND ${SALMON_QUASI_INDEX_CMD} 3 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data 4 | RESULT_VARIABLE SALMON_QUASI_INDEX_RESULT 5 | ) 6 | 7 | if (SALMON_QUASI_INDEX_RESULT) 8 | message(FATAL_ERROR "Error running ${SALMON_QUASI_INDEX_COMMAND}") 9 | endif() 10 | 11 | set(SALMON_QUANT_COMMAND ${CMAKE_BINARY_DIR}/salmon quant -i sample_salmon_quasi_index -l IU -1 reads_1.fastq -2 reads_2.fastq -o sample_salmon_quasi_quant) 12 | execute_process(COMMAND ${SALMON_QUANT_COMMAND} 13 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/sample_data 14 | RESULT_VARIABLE SALMON_QUASI_QUANT_RESULT 15 | ) 16 | if (SALMON_QUASI_QUANT_RESULT) 17 | message(FATAL_ERROR "Error running ${SALMON_QUASI_QUANT_RESULT}") 18 | endif() 19 | 20 | if (EXISTS ${TOPLEVEL_DIR}/sample_data/sample_salmon_quasi_quant/quant.sf) 21 | message("Salmon (read) ran successfully") 22 | else() 23 | message(FATAL_ERROR "Salmon (read --- quasi-index) failed to produce output") 24 | endif() 25 | -------------------------------------------------------------------------------- /cmake/UnitTests.cmake: -------------------------------------------------------------------------------- 1 | set(TEST_COMMAND ${TOPLEVEL_DIR}/tests/unitTests) 2 | execute_process(COMMAND ${TEST_COMMAND} 3 | WORKING_DIRECTORY ${TOPLEVEL_DIR}/tests 4 | RESULT_VARIABLE UNIT_TEST_RESULT 5 | ) 6 | if (UNIT_TEST_RESULT) 7 | message(FATAL_ERROR "Error running ${UNIT_TEST_RESULT}") 8 | endif() 9 | 10 | 11 | -------------------------------------------------------------------------------- /include/FastxParser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __FASTX_PARSER__ 2 | #define __FASTX_PARSER__ 3 | 4 | #include "fcntl.h" 5 | #include "unistd.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | extern "C" { 14 | #include "kseq.h" 15 | } 16 | 17 | #include "concurrentqueue.h" 18 | 19 | #ifndef __FASTX_PARSER_PRECXX14_MAKE_UNIQUE__ 20 | #define __FASTX_PARSER_PRECXX14_MAKE_UNIQUE__ 21 | 22 | #if __cplusplus >= 201402L 23 | #include 24 | using std::make_unique 25 | #else 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | template struct _Unique_if { 33 | using _Single_object = std::unique_ptr; 34 | }; 35 | 36 | template struct _Unique_if { 37 | using _Unknown_bound = std::unique_ptr; 38 | }; 39 | 40 | template struct _Unique_if { 41 | using _Known_bound = void; 42 | }; 43 | 44 | template 45 | typename _Unique_if::_Single_object make_unique(Args&&... args) { 46 | return std::unique_ptr(new T(std::forward(args)...)); 47 | } 48 | 49 | template 50 | typename _Unique_if::_Unknown_bound make_unique(size_t n) { 51 | using U = typename std::remove_extent::type; 52 | return std::unique_ptr(new U[n]()); 53 | } 54 | 55 | template 56 | typename _Unique_if::_Known_bound make_unique(Args&&...) = delete; 57 | 58 | #endif // C++11 59 | #endif //__FASTX_PARSER_PRECXX14_MAKE_UNIQUE__ 60 | 61 | namespace fastx_parser { 62 | struct ReadSeq { 63 | std::string seq; 64 | std::string name; 65 | ~ReadSeq() {} 66 | }; 67 | 68 | struct ReadPair { 69 | ReadSeq first; 70 | ReadSeq second; 71 | }; 72 | 73 | template class ReadChunk { 74 | public: 75 | ReadChunk(size_t want) : group_(want), want_(want), have_(want) {} 76 | inline void have(size_t num) { have_ = num; } 77 | inline size_t size() { return have_; } 78 | inline size_t want() const { return want_; } 79 | T& operator[](size_t i) { return group_[i]; } 80 | typename std::vector::iterator begin() { return group_.begin(); } 81 | typename std::vector::iterator end() { return group_.begin() + have_; } 82 | 83 | private: 84 | std::vector group_; 85 | size_t want_; 86 | size_t have_; 87 | }; 88 | 89 | template class ReadGroup { 90 | public: 91 | ReadGroup(moodycamel::ProducerToken&& pt, moodycamel::ConsumerToken&& ct) 92 | : pt_(std::move(pt)), ct_(std::move(ct)) {} 93 | moodycamel::ConsumerToken& consumerToken() { return ct_; } 94 | moodycamel::ProducerToken& producerToken() { return pt_; } 95 | // get a reference to the chunk this ReadGroup owns 96 | std::unique_ptr>& chunkPtr() { return chunk_; } 97 | // get a *moveable* reference to the chunk this ReadGroup owns 98 | std::unique_ptr>&& takeChunkPtr() { return std::move(chunk_); } 99 | inline void have(size_t num) { chunk_->have(num); } 100 | inline size_t size() { return chunk_->size(); } 101 | inline size_t want() const { return chunk_->want(); } 102 | T& operator[](size_t i) { return (*chunk_)[i]; } 103 | typename std::vector::iterator begin() { return chunk_->begin(); } 104 | typename std::vector::iterator end() { 105 | return chunk_->begin() + chunk_->size(); 106 | } 107 | void setChunkEmpty() { chunk_.release(); } 108 | bool empty() const { return chunk_.get() == nullptr; } 109 | 110 | private: 111 | std::unique_ptr> chunk_{nullptr}; 112 | moodycamel::ProducerToken pt_; 113 | moodycamel::ConsumerToken ct_; 114 | }; 115 | 116 | template class FastxParser { 117 | public: 118 | FastxParser(std::vector files, uint32_t numConsumers, 119 | uint32_t numParsers = 1, uint32_t chunkSize = 1000); 120 | 121 | FastxParser(std::vector files, std::vector files2, 122 | uint32_t numConsumers, uint32_t numParsers = 1, 123 | uint32_t chunkSize = 1000); 124 | ~FastxParser(); 125 | bool start(); 126 | ReadGroup getReadGroup(); 127 | bool refill(ReadGroup& rg); 128 | void finishedWithGroup(ReadGroup& s); 129 | 130 | private: 131 | moodycamel::ProducerToken getProducerToken_(); 132 | moodycamel::ConsumerToken getConsumerToken_(); 133 | 134 | std::vector inputStreams_; 135 | std::vector inputStreams2_; 136 | uint32_t numParsers_; 137 | std::atomic numParsing_; 138 | std::vector> parsingThreads_; 139 | size_t blockSize_; 140 | moodycamel::ConcurrentQueue>> readQueue_, 141 | seqContainerQueue_; 142 | 143 | // holds the indices of files (file-pairs) to be processed 144 | moodycamel::ConcurrentQueue workQueue_; 145 | 146 | std::vector> produceReads_; 147 | std::vector> consumeContainers_; 148 | }; 149 | } 150 | #endif // __FASTX_PARSER__ 151 | -------------------------------------------------------------------------------- /include/kseq.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, 2009, 2011 Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* Last Modified: 2017-02-11 */ 27 | 28 | #ifndef AC_KSEQ_H 29 | #define AC_KSEQ_H 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r 36 | #define KS_SEP_TAB 1 // isspace() && !' ' 37 | #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows) 38 | #define KS_SEP_MAX 2 39 | 40 | #define __KS_TYPE(type_t) \ 41 | typedef struct __kstream_t { \ 42 | unsigned char *buf; \ 43 | int begin, end, is_eof; \ 44 | type_t f; \ 45 | } kstream_t; 46 | 47 | #define ks_err(ks) ((ks)->end < 0) 48 | #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end) 49 | #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0) 50 | 51 | #define __KS_BASIC(type_t, __bufsize) \ 52 | static inline kstream_t *ks_init(type_t f) \ 53 | { \ 54 | kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ 55 | ks->f = f; \ 56 | ks->buf = (unsigned char*)malloc(__bufsize); \ 57 | return ks; \ 58 | } \ 59 | static inline void ks_destroy(kstream_t *ks) \ 60 | { \ 61 | if (ks) { \ 62 | free(ks->buf); \ 63 | free(ks); \ 64 | } \ 65 | } 66 | 67 | #define __KS_GETC(__read, __bufsize) \ 68 | static inline int ks_getc(kstream_t *ks) \ 69 | { \ 70 | if (ks_err(ks)) return -3; \ 71 | if (ks_eof(ks)) return -1; \ 72 | if (ks->begin >= ks->end) { \ 73 | ks->begin = 0; \ 74 | ks->end = __read(ks->f, ks->buf, __bufsize); \ 75 | if (ks->end == 0) { ks->is_eof = 1; return -1; } \ 76 | else if (ks->end < 0) { ks->is_eof = 1; return -3; } \ 77 | } \ 78 | return (int)ks->buf[ks->begin++]; \ 79 | } 80 | 81 | #ifndef KSTRING_T 82 | #define KSTRING_T kstring_t 83 | typedef struct __kstring_t { 84 | size_t l, m; 85 | char *s; 86 | } kstring_t; 87 | #endif 88 | 89 | #ifndef kroundup32 90 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 91 | #endif 92 | 93 | #define __KS_GETUNTIL(__read, __bufsize) \ 94 | static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \ 95 | { \ 96 | int gotany = 0; \ 97 | if (dret) *dret = 0; \ 98 | str->l = append? str->l : 0; \ 99 | for (;;) { \ 100 | int i; \ 101 | if (ks_err(ks)) return -3; \ 102 | if (ks->begin >= ks->end) { \ 103 | if (!ks->is_eof) { \ 104 | ks->begin = 0; \ 105 | ks->end = __read(ks->f, ks->buf, __bufsize); \ 106 | if (ks->end == 0) { ks->is_eof = 1; break; } \ 107 | if (ks->end == -1) { ks->is_eof = 1; return -3; } \ 108 | } else break; \ 109 | } \ 110 | if (delimiter == KS_SEP_LINE) { \ 111 | for (i = ks->begin; i < ks->end; ++i) \ 112 | if (ks->buf[i] == '\n') break; \ 113 | } else if (delimiter > KS_SEP_MAX) { \ 114 | for (i = ks->begin; i < ks->end; ++i) \ 115 | if (ks->buf[i] == delimiter) break; \ 116 | } else if (delimiter == KS_SEP_SPACE) { \ 117 | for (i = ks->begin; i < ks->end; ++i) \ 118 | if (isspace(ks->buf[i])) break; \ 119 | } else if (delimiter == KS_SEP_TAB) { \ 120 | for (i = ks->begin; i < ks->end; ++i) \ 121 | if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \ 122 | } else i = 0; /* never come to here! */ \ 123 | if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \ 124 | str->m = str->l + (i - ks->begin) + 1; \ 125 | kroundup32(str->m); \ 126 | str->s = (char*)realloc(str->s, str->m); \ 127 | } \ 128 | gotany = 1; \ 129 | memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ 130 | str->l = str->l + (i - ks->begin); \ 131 | ks->begin = i + 1; \ 132 | if (i < ks->end) { \ 133 | if (dret) *dret = ks->buf[i]; \ 134 | break; \ 135 | } \ 136 | } \ 137 | if (!gotany && ks_eof(ks)) return -1; \ 138 | if (str->s == 0) { \ 139 | str->m = 1; \ 140 | str->s = (char*)calloc(1, 1); \ 141 | } else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \ 142 | str->s[str->l] = '\0'; \ 143 | return str->l; \ 144 | } \ 145 | static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ 146 | { return ks_getuntil2(ks, delimiter, str, dret, 0); } 147 | 148 | #define KSTREAM_INIT(type_t, __read, __bufsize) \ 149 | __KS_TYPE(type_t) \ 150 | __KS_BASIC(type_t, __bufsize) \ 151 | __KS_GETC(__read, __bufsize) \ 152 | __KS_GETUNTIL(__read, __bufsize) 153 | 154 | #define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0) 155 | 156 | #define __KSEQ_BASIC(SCOPE, type_t) \ 157 | SCOPE kseq_t *kseq_init(type_t fd) \ 158 | { \ 159 | kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ 160 | s->f = ks_init(fd); \ 161 | return s; \ 162 | } \ 163 | SCOPE void kseq_destroy(kseq_t *ks) \ 164 | { \ 165 | if (!ks) return; \ 166 | free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ 167 | ks_destroy(ks->f); \ 168 | free(ks); \ 169 | } 170 | 171 | /* Return value: 172 | >=0 length of the sequence (normal) 173 | -1 end-of-file 174 | -2 truncated quality string 175 | -3 error reading stream 176 | */ 177 | #define __KSEQ_READ(SCOPE) \ 178 | SCOPE int kseq_read(kseq_t *seq) \ 179 | { \ 180 | int c,r; \ 181 | kstream_t *ks = seq->f; \ 182 | if (seq->last_char == 0) { /* then jump to the next header line */ \ 183 | while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '@'); \ 184 | if (c < 0) return c; /* end of file or error*/ \ 185 | seq->last_char = c; \ 186 | } /* else: the first header char has been read in the previous call */ \ 187 | seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \ 188 | if ((r=ks_getuntil(ks, 0, &seq->name, &c)) < 0) return r; /* normal exit: EOF or error */ \ 189 | if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \ 190 | if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \ 191 | seq->seq.m = 256; \ 192 | seq->seq.s = (char*)malloc(seq->seq.m); \ 193 | } \ 194 | while ((c = ks_getc(ks)) >= 0 && c != '>' && c != '+' && c != '@') { \ 195 | if (c == '\n') continue; /* skip empty lines */ \ 196 | seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \ 197 | ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \ 198 | } \ 199 | if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ 200 | if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \ 201 | seq->seq.m = seq->seq.l + 2; \ 202 | kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \ 203 | seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ 204 | } \ 205 | seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ 206 | seq->is_fastq = (c == '+'); \ 207 | if (!seq->is_fastq) return seq->seq.l; /* FASTA */ \ 208 | if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \ 209 | seq->qual.m = seq->seq.m; \ 210 | seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ 211 | } \ 212 | while ((c = ks_getc(ks)) >= 0 && c != '\n'); /* skip the rest of '+' line */ \ 213 | if (c == -1) return -2; /* error: no quality string */ \ 214 | while ((c = ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l)); \ 215 | if (c == -3) return -3; /* stream error */ \ 216 | seq->last_char = 0; /* we have not come to the next header line */ \ 217 | if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \ 218 | return seq->seq.l; \ 219 | } 220 | 221 | #define __KSEQ_TYPE(type_t) \ 222 | typedef struct { \ 223 | kstring_t name, comment, seq, qual; \ 224 | int last_char, is_fastq; \ 225 | kstream_t *f; \ 226 | } kseq_t; 227 | 228 | #define KSEQ_INIT2(SCOPE, type_t, __read) \ 229 | KSTREAM_INIT(type_t, __read, 16384) \ 230 | __KSEQ_TYPE(type_t) \ 231 | __KSEQ_BASIC(SCOPE, type_t) \ 232 | __KSEQ_READ(SCOPE) 233 | 234 | #define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read) 235 | 236 | #define KSEQ_DECLARE(type_t) \ 237 | __KS_TYPE(type_t) \ 238 | __KSEQ_TYPE(type_t) \ 239 | extern kseq_t *kseq_init(type_t fd); \ 240 | void kseq_destroy(kseq_t *ks); \ 241 | int kseq_read(kseq_t *seq); 242 | 243 | #endif 244 | -------------------------------------------------------------------------------- /include/sparsepp/spp_alloc.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_alloc_h_guard) 2 | #define spp_alloc_h_guard 3 | 4 | 5 | /* ----------------------------------------------------------------------------------------------- 6 | * - use Segment tree with nodes containg info of pages - each page bm_sz objects with bitmap 7 | * - _items pointers are aligned mod bm_sz*sizeof(T), with index in segment tree after T items 8 | * - when hash_table created with specified size (or resized), prealloc pages for allocator so 9 | * we don't have to sort them one by one. 10 | * - use prefix tree structure match pointer 'hint" to page, so we can realloc or allocate in 11 | * nearby memory 12 | * ----------------------------------------------------------------------------------------------- 13 | */ 14 | 15 | #include // includes spp_config.h 16 | #include 17 | #include 18 | 19 | #define USE_BTREE 1 20 | 21 | #if USE_BTREE 22 | #include 23 | #define BTREE_SET spp_::btree_set 24 | #elif USE_CPP_BTREE 25 | #ifdef _WIN32 26 | typedef int64_t ssize_t; // for cpp-btree 27 | #endif 28 | 29 | #include 30 | #define BTREE_SET btree::btree_set 31 | #endif 32 | 33 | #include 34 | #include 35 | 36 | 37 | namespace spp_ 38 | { 39 | 40 | #if USE_BTREE 41 | 42 | // ----------------------------------------------------------- 43 | // btree is almost as fast as the flat map for small sizes, 44 | // but maintain good insert/delete performance for large 45 | // sizes. 46 | // ----------------------------------------------------------- 47 | template 48 | class PageContainer : public BTREE_SET 49 | { 50 | public: 51 | 52 | }; 53 | 54 | #else 55 | 56 | // ----------------------------------------------------------- 57 | // using a sorted vector (a lind of flat map). Cache friendly, 58 | // fast lookups, but slow insert/delete when the size gets 59 | // large (i.e. when we have a lot of pages). 60 | // ----------------------------------------------------------- 61 | template 62 | class PageContainer : public std::vector 63 | { 64 | public: 65 | typedef typename std::vector super; 66 | 67 | typename super::iterator upper_bound(const T& value) 68 | { 69 | return std::upper_bound(this->begin(), this->end(), value); 70 | } 71 | 72 | void insert(const T& value) 73 | { 74 | // we don't check for unicity - not needed in our case. 75 | typename super::iterator it = std::upper_bound(this->begin(), this->end(), value); 76 | this->std::vector::insert(it, value); 77 | } 78 | 79 | void erase(const T& value) 80 | { 81 | typename super::iterator it = std::lower_bound(this->begin(), this->end(), value); 82 | if (*it == value) 83 | this->std::vector::erase(it); 84 | } 85 | }; 86 | 87 | #endif 88 | 89 | 90 | // ---------------------------------------------------------------- 91 | // ---------------------------------------------------------------- 92 | template 93 | class spp_allocator 94 | { 95 | public: 96 | typedef T value_type; 97 | typedef size_t size_type; 98 | typedef ptrdiff_t difference_type; 99 | 100 | typedef T* pointer; 101 | typedef const T* const_pointer; 102 | typedef T& reference; 103 | typedef const T& const_reference; 104 | 105 | private: 106 | typedef uint32_t offset_type; 107 | 108 | // ------------------- Page ---------------------------------------- 109 | // pages are not stored in segments, as we want to keep segment tree 110 | // compact for better cache hit. Also that way we can double the 111 | // segment tree size when we need more memory, while still allocating 112 | // item blocks as needed. 113 | // ----------------------------------------------------------------- 114 | template 115 | class Page 116 | { 117 | public: 118 | Page() : _num_free(bm_sz), _start_idx(0), _lzs_start((size_t)-1) {} 119 | 120 | ~Page() { assert(_num_free == bm_sz && _bs.none(0, bm_sz)); } 121 | 122 | T *allocate(size_type n, offset_type &lf, intptr_t &diff) 123 | { 124 | size_t start = _bs.find_next_n(n, _start_idx); 125 | 126 | assert(start != Bitset::npos); 127 | assert(_bs.none(start, start + n)); 128 | 129 | _start_idx = start + n; 130 | _bs.set(start, start + n); 131 | 132 | if (0 && lf == _num_free) 133 | { 134 | lf -= (offset_type)n; 135 | diff = - (intptr_t)n; 136 | // _lzs_start = ?? 137 | } 138 | else 139 | _update_longest_free(lf, diff); 140 | 141 | _num_free -= n; 142 | 143 | assert(lf >= max_lf || lf <= _num_free); 144 | assert(_num_free <= bm_sz); 145 | 146 | return (T *)&_items[start]; 147 | } 148 | 149 | T *extend(size_type start, size_type old_sz, size_type new_sz, bool request_space_after, 150 | offset_type &lf, intptr_t &diff) 151 | { 152 | assert(new_sz > old_sz); 153 | assert(_bs.all(start, start + old_sz)); 154 | assert(_lzs_start != start); 155 | 156 | bool have_space_after = (start + new_sz <= page_size) && 157 | _bs.none(start + old_sz, start + new_sz); 158 | 159 | size_type add = new_sz - old_sz; 160 | 161 | if (request_space_after && have_space_after) 162 | { 163 | _bs.set(start + old_sz, start + new_sz); 164 | _num_free -= add; 165 | if (_lzs_start == (size_t)-1 || lf >= max_lf || _lzs_start == start + old_sz) 166 | _update_longest_free(lf, diff); 167 | 168 | assert(lf >= max_lf || lf <= _num_free); 169 | return (T *)&_items[start]; 170 | } 171 | 172 | bool have_space_before = start >= add && _bs.none(start - add, start); 173 | if (have_space_before && (!request_space_after || !have_space_after)) 174 | { 175 | _bs.set(start - add, start); 176 | _num_free -= add; 177 | if (_lzs_start == (size_t)-1 || lf >= max_lf || _lzs_start + lf == start) 178 | _update_longest_free(lf, diff); 179 | assert(lf >= max_lf || lf <= _num_free); 180 | return (T *)&_items[start - add]; 181 | } 182 | 183 | if (have_space_after) 184 | { 185 | _bs.set(start + old_sz, start + new_sz); 186 | _num_free -= add; 187 | if (_lzs_start == (size_t)-1 || lf >= max_lf || _lzs_start == start + old_sz) 188 | _update_longest_free(lf, diff); 189 | assert(lf >= max_lf || lf <= _num_free); 190 | return (T *)&_items[start]; 191 | } 192 | 193 | return 0; 194 | } 195 | 196 | T *shrink(size_type start, size_type old_sz, size_type new_sz, 197 | offset_type &lf, intptr_t &diff) 198 | { 199 | assert(new_sz < old_sz); 200 | assert(_bs.all(start, start + old_sz)); 201 | 202 | _bs.reset(start + new_sz, start + old_sz); 203 | _num_free += old_sz - new_sz; 204 | if (lf < max_lf && _lzs_start == start + old_sz) 205 | _update_longest_free(lf, diff); 206 | assert(lf >= max_lf || lf <= _num_free); 207 | return (T *)&_items[start]; 208 | } 209 | 210 | bool free(size_type start, size_type n, offset_type &lf, intptr_t &diff) 211 | { 212 | assert(_bs.all(start, start + n)); 213 | _bs.reset(start, start + n); 214 | _num_free += n; 215 | if (_num_free < bm_sz && lf < max_lf) 216 | { 217 | #if 1 218 | size_t start_pos; 219 | offset_type new_lf = (offset_type)_bs.zero_sequence_size_around(start, start+n, start_pos); 220 | assert(start_pos == (size_t)-1 || start_pos < bm_sz); 221 | if (new_lf > lf) 222 | { 223 | diff = (intptr_t)new_lf - (intptr_t)lf; 224 | lf = new_lf; 225 | _lzs_start = start_pos; 226 | } 227 | #else 228 | _update_longest_free(lf, diff); 229 | #endif 230 | } 231 | else 232 | diff = 0; 233 | assert(lf >= max_lf || lf <= _num_free); 234 | return true; 235 | } 236 | 237 | size_t longest_free() const 238 | { 239 | _lzs_start = (size_t)-1; 240 | if (_num_free <= 1) 241 | return _num_free; 242 | 243 | // 64 free entries is enough for sparsepp -> don't waste time in longest_zero_sequence() 244 | return _bs.has_zero_word() ? max_lf : 245 | _bs.longest_zero_sequence(max_lf, _lzs_start); 246 | //_bs.longest_zero_sequence(); 247 | } 248 | 249 | size_t num_allocated() const 250 | { 251 | size_t res = _bs.count(); 252 | assert(res + _num_free == bm_sz); 253 | return res; 254 | } 255 | 256 | size_t num_free() const 257 | { 258 | return _num_free; 259 | } 260 | 261 | const T *base() const 262 | { 263 | return (const T *)&_items[0]; 264 | } 265 | 266 | private: 267 | void _update_longest_free(offset_type &lf, intptr_t &diff) 268 | { 269 | offset_type new_lf = (offset_type)longest_free(); 270 | assert(_lzs_start == (size_t)-1 || _lzs_start < bm_sz); 271 | if (new_lf == lf) 272 | diff = 0; 273 | else 274 | { 275 | diff = (intptr_t)new_lf - (intptr_t)lf; 276 | lf = new_lf; 277 | } 278 | } 279 | 280 | typedef char TProxy[sizeof(T)]; 281 | typedef spp_bitset Bitset; 282 | 283 | // 64 free entries is enough for sparsepp -> don't waste time in longest_zero_sequence() 284 | static const offset_type max_lf = SPP_GROUP_SIZE; 285 | 286 | size_t _num_free; // within this page 287 | size_t _start_idx; // within this page 288 | mutable size_t _lzs_start; // start index of longest zero sequence - or -1 if not set 289 | Bitset _bs; // 0 == free, 1 == busy 290 | TProxy _items[bm_sz]; // memory returned to user 291 | }; 292 | 293 | // ---------------------- Segment ----------------------------------------------- 294 | template 295 | class Segment 296 | { 297 | public: 298 | Segment() : _page(0), _longest_free(bm_sz) {} 299 | 300 | Segment(const Segment &o): 301 | _page(0), 302 | _longest_free(bm_sz) 303 | { 304 | assert(o._page == 0); 305 | (void)(o._page); // silence warning 306 | } 307 | 308 | ~Segment() 309 | { 310 | //assert(_longest_free == bm_sz); 311 | if (_page) 312 | _free_page(); 313 | } 314 | 315 | Segment& operator=(const Segment &o) 316 | { 317 | assert(o._page == 0); 318 | _page = o._page; 319 | _longest_free = o._longest_free; 320 | return *this; 321 | } 322 | 323 | #ifndef SPP_NO_CXX11_RVALUE_REFERENCES 324 | Segment(Segment &&o) : 325 | _page(o._page), 326 | _longest_free(o._longest_free) 327 | { 328 | o._page = 0; 329 | o._longest_free = bm_sz; 330 | } 331 | 332 | Segment& operator=(Segment &&o) 333 | { 334 | _page = o._page; 335 | _longest_free = o._longest_free; 336 | o._page = 0; 337 | o._longest_free = bm_sz; 338 | return *this; 339 | } 340 | #endif 341 | 342 | void swap(Segment &o) 343 | { 344 | using std::swap; 345 | 346 | swap(_page, o._page); 347 | swap(_longest_free, o._longest_free); 348 | } 349 | 350 | pointer allocate(size_type n, intptr_t &diff) 351 | { 352 | if (n > _longest_free) 353 | return 0; 354 | 355 | pointer res = 0; 356 | 357 | if (!_page) 358 | _allocate_page(); 359 | 360 | if (_page) 361 | res = _page->allocate(n, _longest_free, diff); 362 | 363 | return res; 364 | } 365 | 366 | pointer extend(size_type start, size_type old_sz, size_type new_sz, 367 | bool space_after, intptr_t &diff) 368 | { 369 | assert(_page); 370 | return _page->extend(start, old_sz, new_sz, space_after, _longest_free, diff); 371 | } 372 | 373 | pointer shrink(size_type start, size_type old_sz, size_type new_sz, intptr_t &diff) 374 | { 375 | assert(_page); 376 | return _page->shrink(start, old_sz, new_sz, _longest_free, diff); 377 | } 378 | 379 | bool free(size_type start, size_type n, intptr_t &diff) 380 | { 381 | assert(_page); 382 | if (_page->free(start, n, _longest_free, diff)) 383 | { 384 | if (_page->num_free() == bm_sz) 385 | _free_page(); 386 | return true; 387 | } 388 | return false; 389 | } 390 | 391 | size_type num_allocated() const 392 | { 393 | return _page ? _page->num_allocated() : 0; 394 | } 395 | 396 | offset_type& longest_free() { return _longest_free; } 397 | 398 | const T *page() const { return _page ? _page->base() : 0; } 399 | 400 | private: 401 | typedef Page _Page; 402 | 403 | void _allocate_page() 404 | { 405 | _page = (_Page *)malloc(sizeof(_Page)); 406 | new (_page) _Page(); // construct 407 | } 408 | 409 | void _free_page() 410 | { 411 | if (_page) 412 | { 413 | _page->~_Page(); // destruct 414 | ::free(_page); 415 | _longest_free = bm_sz; 416 | _page = 0; 417 | } 418 | } 419 | 420 | _Page *_page; // either actual page ptr 421 | offset_type _longest_free; 422 | }; 423 | 424 | 425 | // ------------------- Segment Tree --------------------------------------------- 426 | // http://codeforces.com/blog/entry/18051?mobile=true 427 | // ------------------------------------------------------------------------------ 428 | template 429 | class SegTree : public spp_rc 430 | { 431 | public: 432 | SegTree() : 433 | _num_allocated(0), 434 | _num_seg(2), 435 | _num_extend_tries(0), 436 | _num_extend_successes(0) 437 | { 438 | } 439 | 440 | pointer allocate(size_type n, const_pointer hint = 0) 441 | { 442 | assert(n <= bm_sz); 443 | pointer res = 0; 444 | 445 | if (_seg.empty()) 446 | _seg.resize(_num_seg * 2); // default value _longest_free OK 447 | 448 | // check the hint first 449 | if (hint) 450 | { 451 | const_pageiter it = _seg_pages.upper_bound(_PageIndex(hint)); 452 | if (it != _seg_pages.end()) 453 | { 454 | --it; 455 | if (hint - it->_page_ptr < (intptr_t)bm_sz && ((res = _alloc(it->_page_idx, n, hint)))) 456 | return res; 457 | } 458 | } 459 | 460 | // allocate using segment tree 461 | // --------------------------- 462 | 463 | // first check whether we need to allocate new pages 464 | if (_seg[1].longest_free() < n) 465 | { 466 | // need to grow the segment tree 467 | std::vector<_Segment> new_seg(4 * _num_seg); 468 | for (size_t i=_num_seg; i>0; i /= 2) 469 | { 470 | for (size_t j=0; j= n); 506 | 507 | // and allocate from that page 508 | res = _alloc(i, n, 0); 509 | return res; 510 | } 511 | 512 | void deallocate(pointer p, size_type n) 513 | { 514 | const_pageiter it = _find_page(p); 515 | _free(it->_page_idx, (size_type)(p - it->_page_ptr), n); 516 | } 517 | 518 | // tries to extend the current buffer if possible *without* moving the content. If 519 | // space_after == true, tries to add space after preferably, o/w before. 520 | // returns null if buffer couldn't be extended. 521 | // -------------------------------------------------------------------------------- 522 | pointer extend(pointer p, size_type old_size, size_type new_size, bool space_after) 523 | { 524 | assert(new_size > old_size); 525 | if (new_size <= old_size) 526 | { 527 | if (new_size == old_size) 528 | return p; 529 | return 0; 530 | } 531 | 532 | const_pageiter it = _find_page(p); 533 | return _extend(it->_page_idx, (size_type)(p - it->_page_ptr), 534 | old_size, new_size, space_after); 535 | } 536 | 537 | pointer shrink(pointer p, size_type old_size, size_type new_size) 538 | { 539 | assert(new_size && new_size < old_size); 540 | const_pageiter it = _find_page(p); 541 | return _shrink(it->_page_idx, (size_type)(p - it->_page_ptr), 542 | old_size, new_size); 543 | } 544 | 545 | bool validate() const 546 | { 547 | size_t actual = 0; 548 | for (size_t i=0; i<_seg.size(); ++i) 549 | actual += _seg[i].num_allocated(); 550 | assert(actual == _num_allocated); 551 | return actual == _num_allocated; 552 | } 553 | 554 | void swap(SegTree &o) 555 | { 556 | using std::swap; 557 | 558 | swap(_num_allocated, o._num_allocated); 559 | swap(_num_seg, o._num_seg); 560 | swap(_num_extend_tries, o._num_extend_tries); 561 | swap(_num_extend_successes, o._num_extend_successes); 562 | swap(_seg, o._seg); 563 | _seg_pages.swap(o._seg_pages); 564 | } 565 | 566 | private: 567 | // ---------------------------------------------------------------------- 568 | class _PageIndex 569 | { 570 | public: 571 | _PageIndex(const T *ptr = 0, size_t idx = 0) : _page_ptr(ptr), _page_idx(idx) {} 572 | bool operator<(const _PageIndex &o) const { return _page_ptr < o._page_ptr; } 573 | bool operator==(const _PageIndex &o) const { return _page_ptr == o._page_ptr; } 574 | 575 | const T *_page_ptr; 576 | size_t _page_idx; 577 | }; 578 | 579 | typedef Segment _Segment; 580 | typedef PageContainer<_PageIndex> SegPages; // page_ptr -> segment index 581 | typedef typename SegPages::const_iterator const_pageiter; 582 | 583 | const_pageiter _find_page(pointer p) const 584 | { 585 | const_pageiter it = _seg_pages.upper_bound(_PageIndex(p)); 586 | --it; 587 | assert(p - it->_page_ptr < (intptr_t)bm_sz); 588 | return it; 589 | } 590 | 591 | pointer _alloc(size_t seg_idx, size_type n, const_pointer /* hint */ = 0) 592 | { 593 | intptr_t diff = 0; 594 | _Segment &segment = _seg[seg_idx]; 595 | const T *page = segment.page(); 596 | pointer res = segment.allocate(n, diff); 597 | if (res) 598 | { 599 | _num_allocated += n; 600 | _update_segment_tree(seg_idx, diff); 601 | if (!page && segment.page()) 602 | { 603 | _PageIndex pi(segment.page(), seg_idx); 604 | _seg_pages.insert(pi); 605 | } 606 | } 607 | return res; 608 | } 609 | 610 | pointer _extend(size_t seg_idx, size_type n, size_type old_size, size_type new_size, 611 | bool space_after) 612 | { 613 | intptr_t diff = 0; 614 | _num_extend_tries++; 615 | _Segment &segment = _seg[seg_idx]; 616 | pointer res = segment.extend(n, old_size, new_size, space_after, diff); 617 | if (res) 618 | { 619 | _num_allocated += new_size - old_size; 620 | _update_segment_tree(seg_idx, diff); 621 | _num_extend_successes++; 622 | } 623 | return res; 624 | } 625 | 626 | pointer _shrink(size_t seg_idx, size_type n, size_type old_size, size_type new_size) 627 | { 628 | intptr_t diff = 0; 629 | _Segment &segment = _seg[seg_idx]; 630 | pointer res = segment.shrink(n, old_size, new_size, diff); 631 | if (res) 632 | { 633 | _num_allocated -= old_size - new_size; 634 | _update_segment_tree(seg_idx, diff); 635 | } 636 | return res; 637 | } 638 | 639 | void _free(size_t seg_idx, size_type start, size_type n) 640 | { 641 | intptr_t diff = 0; 642 | _Segment &segment = _seg[seg_idx]; 643 | const T *page = segment.page(); 644 | 645 | if (segment.free(start, n, diff)) 646 | { 647 | _num_allocated -= n; 648 | _update_segment_tree(seg_idx, diff); 649 | if (page && !segment.page()) 650 | { 651 | _seg_pages.erase(_PageIndex(page)); 652 | if (_seg_pages.empty()) 653 | { 654 | SegPages().swap(_seg_pages); 655 | std::vector<_Segment>().swap(_seg); 656 | } 657 | } 658 | } 659 | } 660 | 661 | template Ti _mymax(Ti a, Ti b) { return a > b ? a : b; } 662 | 663 | void _update_segment_tree(size_t seg_idx, intptr_t diff) 664 | { 665 | if (diff == 0) 666 | return; 667 | assert(seg_idx >= _num_seg); 668 | if (diff > 0) 669 | { 670 | // longest_free for seg_idx is larger - propagate up if needed 671 | while (seg_idx > 1 && _seg[seg_idx/2].longest_free() < _seg[seg_idx].longest_free()) 672 | { 673 | _seg[seg_idx/2].longest_free() = _seg[seg_idx].longest_free(); 674 | seg_idx /= 2; 675 | } 676 | } 677 | else 678 | { 679 | // longest_free for seg_idx is smaller - propagate up if needed 680 | while (seg_idx > 1) 681 | { 682 | seg_idx &= ~1; 683 | offset_type cur_max = _mymax(_seg[seg_idx].longest_free(), 684 | _seg[seg_idx + 1].longest_free()); 685 | seg_idx /= 2; 686 | if (cur_max < _seg[seg_idx].longest_free()) 687 | _seg[seg_idx].longest_free() = cur_max; 688 | else 689 | break; 690 | } 691 | } 692 | } 693 | 694 | // ---------------------------------------------------------------------- 695 | size_t _num_allocated; 696 | size_t _num_seg; // number of segments used for allocation 697 | size_t _num_extend_tries; 698 | size_t _num_extend_successes; 699 | std::vector<_Segment> _seg; 700 | SegPages _seg_pages; // find segments when deallocating/realloc 701 | }; 702 | 703 | public: 704 | 705 | spp_allocator() : _st(new SegTree) {} 706 | ~spp_allocator() {} 707 | spp_allocator(const spp_allocator& o) : _st(o._st) { } 708 | spp_allocator& operator=(const spp_allocator &o) { _st = o._st; return *this; } 709 | 710 | #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES) 711 | spp_allocator(spp_allocator&& o) 712 | { 713 | o.swap(*this); 714 | } 715 | #endif 716 | 717 | void swap(spp_allocator &o) 718 | { 719 | _st.swap(o._st); 720 | } 721 | 722 | pointer address(reference r) const { return &r; } 723 | 724 | const_pointer address(const_reference r) const { return &r; } 725 | 726 | pointer allocate(size_type n, const_pointer hint = 0) 727 | { 728 | return _st->allocate(n, hint); 729 | } 730 | 731 | void deallocate(pointer p, size_type n) 732 | { 733 | _st->deallocate(p, n); 734 | } 735 | 736 | // tries to extend the current buffer if possible *without* moving the content. If 737 | // space_after == true, tries to add space after preferably, o/w before. 738 | // returns null if buffer couldn't be extended. 739 | // ------------------------------------------------------------------------------- 740 | pointer extend(pointer p, size_type old_size, size_type new_size, bool space_after) 741 | { 742 | return _st->extend(p, old_size, new_size, space_after); 743 | } 744 | 745 | pointer shrink(pointer p, size_type old_size, size_type new_size) 746 | { 747 | assert(new_size <= old_size); 748 | 749 | if (new_size == 0) 750 | { 751 | _st->deallocate(p, old_size); 752 | return 0; 753 | } 754 | 755 | if (new_size == old_size) 756 | return p; 757 | 758 | return _st->shrink(p, old_size, new_size); 759 | } 760 | 761 | pointer reallocate(pointer p, size_type old_size, size_type new_size) 762 | { 763 | if (!p) 764 | return allocate(new_size); 765 | 766 | if (new_size <= old_size) 767 | return shrink(p, old_size, new_size); // we assume that shrink returns p 768 | 769 | pointer res = extend(p, old_size, new_size, true); 770 | if (res) 771 | { 772 | if (res < p) 773 | memmove(res, p, old_size * sizeof(T)); 774 | return res; 775 | } 776 | 777 | res = allocate(new_size, p); 778 | if (res) 779 | memcpy(res, p, old_size * sizeof(T)); 780 | deallocate(p, old_size); 781 | return res; 782 | } 783 | 784 | size_type max_size() const 785 | { 786 | return static_cast(-1); 787 | } 788 | 789 | void construct(pointer p, const value_type& val) { new(p) value_type(val); } 790 | 791 | void destroy(pointer p) { p->~value_type(); } 792 | 793 | // sparsepp uses the rebind type for the group_allocator. Make sure this allocator 794 | // uses malloc/free, as the spp_allocator has a max_size() of page_size. 795 | // ------------------------------------------------------------------------------- 796 | template 797 | struct rebind 798 | { 799 | typedef spp_::libc_allocator other; 800 | }; 801 | 802 | private: 803 | spp_sptr > _st; 804 | }; 805 | 806 | } // spp_ namespace 807 | 808 | 809 | template 810 | inline bool operator==(const spp_::spp_allocator &a, const spp_::spp_allocator &b) 811 | { 812 | return &a == &b; 813 | } 814 | 815 | template 816 | inline bool operator!=(const spp_::spp_allocator &a, const spp_::spp_allocator &b) 817 | { 818 | return &a != &b; 819 | } 820 | 821 | namespace std 822 | { 823 | template 824 | inline void swap(spp_::spp_allocator &a, spp_::spp_allocator &b) 825 | { 826 | a.swap(b); 827 | } 828 | } 829 | 830 | #endif // spp_alloc_h_guard 831 | -------------------------------------------------------------------------------- /include/sparsepp/spp_bitset.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_bitset_h_guard) 2 | #define spp_bitset_h_guard 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include // includes spp_config.h 10 | #include 11 | 12 | namespace spp_ 13 | { 14 | 15 | static inline uint32_t count_trailing_zeroes_naive(size_t v) SPP_NOEXCEPT 16 | { 17 | if (v == 0) 18 | return sizeof(v) * 8; 19 | 20 | uint32_t count = 0; 21 | while (v % 2 == 0) 22 | { 23 | count++; 24 | v >>= 1; 25 | } 26 | return count; 27 | } 28 | 29 | static inline uint32_t count_leading_zeros(size_t v) SPP_NOEXCEPT 30 | { 31 | v = v | (v >> 1); 32 | v = v | (v >> 2); 33 | v = v | (v >> 4); 34 | v = v | (v >> 8); 35 | v = v | (v >>16); 36 | if (sizeof(size_t) == 8) 37 | v = v | (v >> 32); 38 | return s_popcount(~v); 39 | } 40 | 41 | // ---------------------------------------------------------------- 42 | // Bitset whose size is always a multiple of 64 bits 43 | // ---------------------------------------------------------------- 44 | template 45 | class spp_bitset 46 | { 47 | public: 48 | spp_bitset() SPP_NOEXCEPT 49 | { 50 | assert(N > 0 && N % 64 == 0); 51 | std::fill_n(_bits, num_words, size_t(0)); 52 | } 53 | 54 | bool operator[](size_t pos) const SPP_NOEXCEPT { return test(pos); } 55 | 56 | bool test(size_t pos) const 57 | { 58 | return !!(_bits[_idx(pos)] & (_mask(pos))); 59 | } 60 | 61 | void set(size_t pos) SPP_NOEXCEPT { _bits[_idx(pos)] |= _mask(pos); } 62 | void reset(size_t pos) SPP_NOEXCEPT { _bits[_idx(pos)] &= ~_mask(pos); } 63 | void flip(size_t pos) SPP_NOEXCEPT { _bits[_idx(pos)] ^= ~_mask(pos); } 64 | 65 | void set(size_t from, size_t to) SPP_NOEXCEPT 66 | { 67 | size_t first = _num_words(from); // first full word after from 68 | size_t last = _idx(to); // word containing to 69 | 70 | if (first <= last) 71 | { 72 | if (from % bits_per_word) 73 | _bits[first-1] |= ~(_mask(from) - 1); 74 | 75 | size_t i; 76 | for (i=first; iany(from, to); 189 | } 190 | 191 | bool all() const SPP_NOEXCEPT 192 | { 193 | for (size_t i=0; iany(); } 224 | 225 | bool operator==(const spp_bitset &rhs) const SPP_NOEXCEPT 226 | { 227 | return std::equal(_bits, _bits + num_words, rhs._bits); 228 | } 229 | 230 | bool operator!=(const spp_bitset &rhs) const SPP_NOEXCEPT 231 | { 232 | return !(*this == rhs); 233 | } 234 | 235 | spp_bitset& operator&=(const spp_bitset& rhs) SPP_NOEXCEPT 236 | { 237 | for (size_t i=0; i= N) 259 | { 260 | reset(); 261 | return *this; 262 | } 263 | 264 | if (n > 0) 265 | { 266 | size_t last = num_words - 1; 267 | size_t div = n / bits_per_word; 268 | size_t r = n % bits_per_word; 269 | 270 | if (r) 271 | { 272 | size_t c = bits_per_word - r; 273 | 274 | for (size_t i = last-div; i > 0; --i) 275 | _bits[i + div] = (_bits[i] << r) | (_bits[i-1] >> c); 276 | _bits[div] = _bits[0] << r; 277 | } 278 | else 279 | { 280 | for (size_t i = last-div; i > 0; --i) 281 | _bits[i + div] = _bits[i]; 282 | _bits[div] = _bits[0]; 283 | } 284 | 285 | std::fill_n(&_bits[0], div, size_t(0)); 286 | } 287 | 288 | return *this; 289 | } 290 | 291 | spp_bitset& operator>>=(size_t n) SPP_NOEXCEPT 292 | { 293 | if (n >= N) 294 | { 295 | reset(); 296 | return *this; 297 | } 298 | 299 | if (n > 0) 300 | { 301 | size_t last = num_words - 1; 302 | size_t div = n / bits_per_word; 303 | size_t r = n % bits_per_word; 304 | 305 | if (r) 306 | { 307 | size_t c = bits_per_word - r; 308 | 309 | for (size_t i = div; i < last; ++i) 310 | _bits[i-div] = (_bits[i] >> r) | (_bits[i+1] << c); 311 | _bits[last - div] = _bits[last] >> r; 312 | } 313 | else 314 | { 315 | for (size_t i = div; i <= last; ++i) 316 | _bits[i - div] = _bits[i]; 317 | } 318 | 319 | std::fill_n(&_bits[num_words - div], div, size_t(0)); 320 | } 321 | 322 | return *this; 323 | } 324 | 325 | spp_bitset& set() SPP_NOEXCEPT 326 | { 327 | for (size_t i=0; i>(size_t n) const SPP_NOEXCEPT 369 | { 370 | spp_bitset res(*this); 371 | res >>= n; 372 | return res; 373 | } 374 | 375 | spp_bitset operator|(const spp_bitset &o) const SPP_NOEXCEPT 376 | { 377 | spp_bitset res(*this); 378 | res |= o; 379 | return res; 380 | } 381 | 382 | spp_bitset operator&(const spp_bitset &o) const SPP_NOEXCEPT 383 | { 384 | spp_bitset res(*this); 385 | res &= o; 386 | return res; 387 | } 388 | 389 | spp_bitset operator~() const SPP_NOEXCEPT 390 | { 391 | spp_bitset res(*this); 392 | res.flip(); 393 | return res; 394 | } 395 | 396 | // returns length of longuest sequence of consecutive zeros 397 | // thanks to Michal Forisek for the algorithm 398 | // -------------------------------------------------------- 399 | size_t longest_zero_sequence() const SPP_NOEXCEPT 400 | { 401 | if (none()) 402 | return size(); 403 | 404 | if (all()) 405 | return size_t(0); 406 | 407 | spp_bitset state = (*this) | (*this) << 1; 408 | state.set(0); 409 | 410 | if (state.all()) 411 | return 1; 412 | 413 | size_t steps = 1; 414 | while (true) 415 | { 416 | spp_bitset new_state = state | state << steps; 417 | if (new_state.all()) 418 | break; 419 | state = new_state; 420 | steps *= 2; 421 | } 422 | 423 | size_t lo = steps, hi = 2 * steps; 424 | while (hi - lo > 1) 425 | { 426 | size_t med = lo + (hi - lo) / 2; 427 | spp_bitset med_state = state | state << (med - steps); 428 | if (med_state.all()) 429 | hi = med; 430 | else 431 | lo = med; 432 | } 433 | return hi; 434 | } 435 | 436 | // returns length of longuest sequence of consecutive zeros 437 | // thanks to Michal Forisek for the algorithm 438 | // -------------------------------------------------------- 439 | size_t longest_zero_sequence(size_t ceiling, // max value needed 440 | size_t &start_pos) // start of sequence 441 | const SPP_NOEXCEPT 442 | { 443 | if (none()) 444 | return size(); 445 | 446 | if (all()) 447 | return size_t(0); 448 | 449 | spp_bitset state = (*this) | (*this) << 1; 450 | state.set(0); 451 | 452 | if (state.all()) 453 | return 1; 454 | 455 | size_t steps = 1; 456 | while (true) 457 | { 458 | spp_bitset new_state = state | state << steps; 459 | if (new_state.all(start_pos)) 460 | break; 461 | 462 | if (steps >= ceiling) 463 | { 464 | start_pos = (size_t)-1; 465 | return ceiling; // must be ceiling, not steps 466 | } 467 | 468 | state = new_state; 469 | steps *= 2; 470 | } 471 | 472 | size_t lo = steps, hi = 2*steps; 473 | while (hi-lo > 1) 474 | { 475 | size_t med = lo+(hi-lo)/2; 476 | spp_bitset med_state = state | state << (med-steps); 477 | if (med_state.all(start_pos)) 478 | hi = med; 479 | else 480 | lo = med; 481 | } 482 | 483 | if (hi >= ceiling) 484 | { 485 | start_pos = (size_t)-1; 486 | return ceiling; // must be ceiling, not hi 487 | } 488 | 489 | if (start_pos != (size_t)-1) 490 | start_pos -= hi - 1; 491 | return hi; 492 | } 493 | 494 | // returns the length of the zero sequence around [start, end] 495 | size_t zero_sequence_size_around(size_t start, size_t end, size_t &start_pos) 496 | { 497 | size_t lg = end - start; 498 | 499 | size_t cur = start; 500 | while (cur > 0 && !test(--cur)) 501 | ++lg; 502 | 503 | start_pos = end - lg; 504 | 505 | cur = end; 506 | while (cur < N && !test(cur++)) 507 | ++lg; 508 | 509 | return lg; 510 | } 511 | 512 | 513 | #ifdef SPP_TEST 514 | // slow implementation - just for testing 515 | // -------------------------------------- 516 | size_t longest_zero_sequence_naive() const SPP_NOEXCEPT 517 | { 518 | size_t longest = 0; 519 | size_t lg = 0; 520 | for (size_t cur = 0; cur < N; ++cur) 521 | { 522 | if (!test(cur)) 523 | { 524 | if (++lg > longest) 525 | longest = lg; 526 | } 527 | else 528 | lg = 0; 529 | } 530 | return longest; 531 | } 532 | 533 | size_t longest_zero_sequence_naive(size_t ceiling, // max value needed 534 | size_t &start_pos) const SPP_NOEXCEPT 535 | { 536 | size_t longest = 0; 537 | size_t lg = 0; 538 | size_t end_pos = 0; 539 | for (size_t cur = 0; cur < N; ++cur) 540 | { 541 | if (!test(cur)) 542 | { 543 | if (++lg > longest) 544 | { 545 | longest = lg; 546 | end_pos = cur; 547 | if (longest >= ceiling) 548 | { 549 | start_pos = (size_t)-1; 550 | return ceiling; // must be ceiling, not steps 551 | } 552 | } 553 | } 554 | else 555 | lg = 0; 556 | } 557 | start_pos = end_pos - (longest - 1); 558 | return longest; 559 | } 560 | 561 | void set_naive(size_t from, size_t to) 562 | { 563 | for (size_t cur = from; cur < to; ++cur) 564 | set(cur); 565 | } 566 | 567 | void reset_naive(size_t from, size_t to) 568 | { 569 | for (size_t cur = from; cur < to; ++cur) 570 | reset(cur); 571 | } 572 | 573 | bool all_naive(size_t from, size_t to) const SPP_NOEXCEPT 574 | { 575 | for (size_t cur = from; cur < to; ++cur) 576 | if (!test(cur)) 577 | return false; 578 | return true; 579 | } 580 | 581 | bool all_naive(size_t &start_idx) const SPP_NOEXCEPT 582 | { 583 | for (size_t cur = 0; cur < N; ++cur) 584 | if (!test(cur)) 585 | { 586 | start_idx = cur; 587 | return false; 588 | } 589 | return true; 590 | } 591 | 592 | bool any_naive(size_t from, size_t to) const SPP_NOEXCEPT 593 | { 594 | for (size_t cur = from; cur < to; ++cur) 595 | if (test(cur)) 596 | return true; 597 | return false; 598 | } 599 | 600 | #endif 601 | 602 | 603 | size_t find_first_n(size_t num_zeros) 604 | { 605 | if (num_zeros == 0) 606 | return npos; 607 | return _find_next_n(num_zeros, 0, N); 608 | } 609 | 610 | size_t find_next_n(size_t num_zeros, size_t start_pos = 0) 611 | { 612 | if (start_pos > N || num_zeros == 0) 613 | return npos; 614 | 615 | size_t res = _find_next_n(num_zeros, start_pos, N); 616 | 617 | if (res == npos && start_pos) 618 | res = _find_next_n(num_zeros, 0, start_pos + num_zeros); // + num_zeros needed! 619 | 620 | return res; 621 | } 622 | 623 | bool has_zero_word() const 624 | { 625 | for (int i=(int)num_words-1; i>0;--i) 626 | if (!_bits[i]) 627 | return true; 628 | return false; 629 | } 630 | 631 | 632 | static const size_t npos = size_t(-1); 633 | static const unsigned bits_per_word = sizeof(size_t) * 8; 634 | static const unsigned num_words = N / bits_per_word; 635 | 636 | 637 | private: 638 | // find first sequence of num_zeros zeroes starting at position start_pos 639 | // slow implementation! 640 | // see http://www.perlmonks.org/?node_id=1037467 641 | // ---------------------------------------------------------------------- 642 | size_t _find_next_n(size_t num_zeros, size_t start_pos, size_t end_pos) const SPP_NOEXCEPT 643 | { 644 | assert(num_zeros <= N && start_pos <= end_pos); 645 | if (end_pos < start_pos + num_zeros) 646 | return npos; 647 | 648 | if (this->none(start_pos, start_pos + num_zeros)) 649 | return start_pos; 650 | 651 | if (end_pos > N) 652 | end_pos = N; 653 | 654 | size_t lg = 0; 655 | for (size_t cur = start_pos; cur < end_pos; ++cur) 656 | { 657 | if (!test(cur)) 658 | { 659 | if (++lg == num_zeros) 660 | { 661 | assert(this->none(cur - num_zeros + 1, cur + 1)); 662 | return cur - num_zeros + 1; 663 | } 664 | } 665 | else 666 | { 667 | if (cur % bits_per_word == 0) 668 | { 669 | // can we skip the whole word? 670 | size_t x = _bits[_idx(cur)]; 671 | if (x == (size_t)-1) 672 | { 673 | cur += bits_per_word - 1; 674 | if (cur >= end_pos) 675 | return npos; 676 | } 677 | else if (bits_per_word - s_popcount(x) < num_zeros) 678 | { 679 | cur += bits_per_word - count_leading_zeros(x) - 1; 680 | if (cur >= end_pos) 681 | return npos; 682 | } 683 | } 684 | lg = 0; 685 | } 686 | } 687 | return npos; 688 | } 689 | 690 | static size_t _num_words(size_t num_bits) { return (num_bits + bits_per_word - 1) / bits_per_word; } 691 | static size_t _idx(size_t pos) { return pos / bits_per_word; } 692 | static size_t _mask(size_t pos) { return size_t(1) << (pos % bits_per_word); } 693 | 694 | size_t _bits[num_words]; 695 | }; 696 | 697 | } 698 | 699 | 700 | #endif // spp_bitset_h_guard 701 | -------------------------------------------------------------------------------- /include/sparsepp/spp_config.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_config_h_guard) 2 | #define spp_config_h_guard 3 | 4 | // -------------------------------------------------- 5 | // Sparsepp config macros 6 | // some can be overriden on the command line 7 | // -------------------------------------------------- 8 | #ifndef SPP_NAMESPACE 9 | #define SPP_NAMESPACE spp 10 | #endif 11 | 12 | #define spp_ SPP_NAMESPACE 13 | 14 | #ifndef SPP_DEFAULT_ALLOCATOR 15 | #ifdef SPP_USE_SPP_ALLOC 16 | #define SPP_DEFAULT_ALLOCATOR spp_::spp_allocator 17 | #define SPP_INCLUDE_SPP_ALLOC 18 | #else 19 | #define SPP_DEFAULT_ALLOCATOR spp_::libc_allocator 20 | #endif 21 | #endif 22 | 23 | #define SPP_ALLOC_PAGE_SIZE 2048 24 | 25 | #ifndef SPP_GROUP_SIZE 26 | // must be 32 or 64 27 | #define SPP_GROUP_SIZE 32 28 | #endif 29 | 30 | #ifndef SPP_ALLOC_SZ 31 | // must be power of 2 (0 = agressive alloc, 1 = smallest memory usage, 2 = good compromise) 32 | #define SPP_ALLOC_SZ 0 33 | #endif 34 | 35 | #ifndef SPP_STORE_NUM_ITEMS 36 | // 1 uses a little bit more memory, but faster!! 37 | #define SPP_STORE_NUM_ITEMS 1 38 | #endif 39 | 40 | 41 | // --------------------------------------------------------------------------- 42 | // Compiler detection code (SPP_ proprocessor macros) derived from Boost 43 | // libraries. Therefore Boost software licence reproduced below. 44 | // --------------------------------------------------------------------------- 45 | // Boost Software License - Version 1.0 - August 17th, 2003 46 | // 47 | // Permission is hereby granted, free of charge, to any person or organization 48 | // obtaining a copy of the software and accompanying documentation covered by 49 | // this license (the "Software") to use, reproduce, display, distribute, 50 | // execute, and transmit the Software, and to prepare derivative works of the 51 | // Software, and to permit third-parties to whom the Software is furnished to 52 | // do so, all subject to the following: 53 | // 54 | // The copyright notices in the Software and this entire statement, including 55 | // the above license grant, this restriction and the following disclaimer, 56 | // must be included in all copies of the Software, in whole or in part, and 57 | // all derivative works of the Software, unless such copies or derivative 58 | // works are solely in the form of machine-executable object code generated by 59 | // a source language processor. 60 | // 61 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 62 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 63 | // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 64 | // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 65 | // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 66 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 67 | // DEALINGS IN THE SOFTWARE. 68 | // --------------------------------------------------------------------------- 69 | 70 | // Boost like configuration 71 | // ------------------------ 72 | #if defined __clang__ 73 | 74 | #if defined(i386) 75 | #include 76 | inline void spp_cpuid(int info[4], int InfoType) { 77 | __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); 78 | } 79 | #endif 80 | 81 | #define SPP_POPCNT __builtin_popcount 82 | #define SPP_POPCNT64 __builtin_popcountll 83 | 84 | #define SPP_HAS_CSTDINT 85 | 86 | #ifndef __has_extension 87 | #define __has_extension __has_feature 88 | #endif 89 | 90 | #if !__has_feature(cxx_exceptions) && !defined(SPP_NO_EXCEPTIONS) 91 | #define SPP_NO_EXCEPTIONS 92 | #endif 93 | 94 | #if !__has_feature(cxx_rtti) && !defined(SPP_NO_RTTI) 95 | #define SPP_NO_RTTI 96 | #endif 97 | 98 | #if !__has_feature(cxx_rtti) && !defined(SPP_NO_TYPEID) 99 | #define SPP_NO_TYPEID 100 | #endif 101 | 102 | #if defined(__int64) && !defined(__GNUC__) 103 | #define SPP_HAS_MS_INT64 104 | #endif 105 | 106 | #define SPP_HAS_NRVO 107 | 108 | // Branch prediction hints 109 | #if defined(__has_builtin) 110 | #if __has_builtin(__builtin_expect) 111 | #define SPP_LIKELY(x) __builtin_expect(x, 1) 112 | #define SPP_UNLIKELY(x) __builtin_expect(x, 0) 113 | #endif 114 | #endif 115 | 116 | // Clang supports "long long" in all compilation modes. 117 | #define SPP_HAS_LONG_LONG 118 | 119 | #if !__has_feature(cxx_constexpr) 120 | #define SPP_NO_CXX11_CONSTEXPR 121 | #endif 122 | 123 | #if !__has_feature(cxx_decltype) 124 | #define SPP_NO_CXX11_DECLTYPE 125 | #endif 126 | 127 | #if !__has_feature(cxx_decltype_incomplete_return_types) 128 | #define SPP_NO_CXX11_DECLTYPE_N3276 129 | #endif 130 | 131 | #if !__has_feature(cxx_defaulted_functions) 132 | #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS 133 | #endif 134 | 135 | #if !__has_feature(cxx_deleted_functions) 136 | #define SPP_NO_CXX11_DELETED_FUNCTIONS 137 | #endif 138 | 139 | #if !__has_feature(cxx_explicit_conversions) 140 | #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS 141 | #endif 142 | 143 | #if !__has_feature(cxx_default_function_template_args) 144 | #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS 145 | #endif 146 | 147 | #if !__has_feature(cxx_generalized_initializers) 148 | #define SPP_NO_CXX11_HDR_INITIALIZER_LIST 149 | #endif 150 | 151 | #if !__has_feature(cxx_lambdas) 152 | #define SPP_NO_CXX11_LAMBDAS 153 | #endif 154 | 155 | #if !__has_feature(cxx_local_type_template_args) 156 | #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS 157 | #endif 158 | 159 | #if !__has_feature(cxx_raw_string_literals) 160 | #define SPP_NO_CXX11_RAW_LITERALS 161 | #endif 162 | 163 | #if !__has_feature(cxx_reference_qualified_functions) 164 | #define SPP_NO_CXX11_REF_QUALIFIERS 165 | #endif 166 | 167 | #if !__has_feature(cxx_generalized_initializers) 168 | #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX 169 | #endif 170 | 171 | #if !__has_feature(cxx_rvalue_references) 172 | #define SPP_NO_CXX11_RVALUE_REFERENCES 173 | #endif 174 | 175 | #if !__has_feature(cxx_static_assert) 176 | #define SPP_NO_CXX11_STATIC_ASSERT 177 | #endif 178 | 179 | #if !__has_feature(cxx_alias_templates) 180 | #define SPP_NO_CXX11_TEMPLATE_ALIASES 181 | #endif 182 | 183 | #if !__has_feature(cxx_variadic_templates) 184 | #define SPP_NO_CXX11_VARIADIC_TEMPLATES 185 | #endif 186 | 187 | #if !__has_feature(cxx_user_literals) 188 | #define SPP_NO_CXX11_USER_DEFINED_LITERALS 189 | #endif 190 | 191 | #if !__has_feature(cxx_alignas) 192 | #define SPP_NO_CXX11_ALIGNAS 193 | #endif 194 | 195 | #if !__has_feature(cxx_trailing_return) 196 | #define SPP_NO_CXX11_TRAILING_RESULT_TYPES 197 | #endif 198 | 199 | #if !__has_feature(cxx_inline_namespaces) 200 | #define SPP_NO_CXX11_INLINE_NAMESPACES 201 | #endif 202 | 203 | #if !__has_feature(cxx_override_control) 204 | #define SPP_NO_CXX11_FINAL 205 | #endif 206 | 207 | #if !(__has_feature(__cxx_binary_literals__) || __has_extension(__cxx_binary_literals__)) 208 | #define SPP_NO_CXX14_BINARY_LITERALS 209 | #endif 210 | 211 | #if !__has_feature(__cxx_decltype_auto__) 212 | #define SPP_NO_CXX14_DECLTYPE_AUTO 213 | #endif 214 | 215 | #if !__has_feature(__cxx_init_captures__) 216 | #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES 217 | #endif 218 | 219 | #if !__has_feature(__cxx_generic_lambdas__) 220 | #define SPP_NO_CXX14_GENERIC_LAMBDAS 221 | #endif 222 | 223 | 224 | #if !__has_feature(__cxx_generic_lambdas__) || !__has_feature(__cxx_relaxed_constexpr__) 225 | #define SPP_NO_CXX14_CONSTEXPR 226 | #endif 227 | 228 | #if !__has_feature(__cxx_return_type_deduction__) 229 | #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION 230 | #endif 231 | 232 | #if !__has_feature(__cxx_variable_templates__) 233 | #define SPP_NO_CXX14_VARIABLE_TEMPLATES 234 | #endif 235 | 236 | #if __cplusplus < 201400 237 | #define SPP_NO_CXX14_DIGIT_SEPARATORS 238 | #endif 239 | 240 | #if defined(__has_builtin) && __has_builtin(__builtin_unreachable) 241 | #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable(); 242 | #endif 243 | 244 | #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__)) 245 | 246 | #ifndef SPP_COMPILER 247 | #define SPP_COMPILER "Clang version " __clang_version__ 248 | #endif 249 | 250 | #define SPP_CLANG 1 251 | 252 | 253 | #elif defined __GNUC__ 254 | 255 | #define SPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) 256 | 257 | // definition to expand macro then apply to pragma message 258 | // #define VALUE_TO_STRING(x) #x 259 | // #define VALUE(x) VALUE_TO_STRING(x) 260 | // #define VAR_NAME_VALUE(var) #var "=" VALUE(var) 261 | // #pragma message(VAR_NAME_VALUE(SPP_GCC_VERSION)) 262 | 263 | #if defined(i386) 264 | #include 265 | inline void spp_cpuid(int info[4], int InfoType) { 266 | __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); 267 | } 268 | #endif 269 | 270 | // __POPCNT__ defined when the compiled with popcount support 271 | // (-mpopcnt compiler option is given for example) 272 | #ifdef __POPCNT__ 273 | // slower unless compiled iwith -mpopcnt 274 | #define SPP_POPCNT __builtin_popcount 275 | #define SPP_POPCNT64 __builtin_popcountll 276 | #endif 277 | 278 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L) 279 | #define SPP_GCC_CXX11 280 | #endif 281 | 282 | #if __GNUC__ == 3 283 | #if defined (__PATHSCALE__) 284 | #define SPP_NO_TWO_PHASE_NAME_LOOKUP 285 | #define SPP_NO_IS_ABSTRACT 286 | #endif 287 | 288 | #if __GNUC_MINOR__ < 4 289 | #define SPP_NO_IS_ABSTRACT 290 | #endif 291 | 292 | #define SPP_NO_CXX11_EXTERN_TEMPLATE 293 | #endif 294 | 295 | #if __GNUC__ < 4 296 | // 297 | // All problems to gcc-3.x and earlier here: 298 | // 299 | #define SPP_NO_TWO_PHASE_NAME_LOOKUP 300 | #ifdef __OPEN64__ 301 | #define SPP_NO_IS_ABSTRACT 302 | #endif 303 | #endif 304 | 305 | // GCC prior to 3.4 had #pragma once too but it didn't work well with filesystem links 306 | #if SPP_GCC_VERSION >= 30400 307 | #define SPP_HAS_PRAGMA_ONCE 308 | #endif 309 | 310 | #if SPP_GCC_VERSION < 40400 311 | // Previous versions of GCC did not completely implement value-initialization: 312 | // GCC Bug 30111, "Value-initialization of POD base class doesn't initialize 313 | // members", reported by Jonathan Wakely in 2006, 314 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=30111 (fixed for GCC 4.4) 315 | // GCC Bug 33916, "Default constructor fails to initialize array members", 316 | // reported by Michael Elizabeth Chastain in 2007, 317 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33916 (fixed for GCC 4.2.4) 318 | // See also: http://www.boost.org/libs/utility/value_init.htm #compiler_issues 319 | #define SPP_NO_COMPLETE_VALUE_INITIALIZATION 320 | #endif 321 | 322 | #if !defined(__EXCEPTIONS) && !defined(SPP_NO_EXCEPTIONS) 323 | #define SPP_NO_EXCEPTIONS 324 | #endif 325 | 326 | // 327 | // Threading support: Turn this on unconditionally here (except for 328 | // those platforms where we can know for sure). It will get turned off again 329 | // later if no threading API is detected. 330 | // 331 | #if !defined(__MINGW32__) && !defined(linux) && !defined(__linux) && !defined(__linux__) 332 | #define SPP_HAS_THREADS 333 | #endif 334 | 335 | // 336 | // gcc has "long long" 337 | // Except on Darwin with standard compliance enabled (-pedantic) 338 | // Apple gcc helpfully defines this macro we can query 339 | // 340 | #if !defined(__DARWIN_NO_LONG_LONG) 341 | #define SPP_HAS_LONG_LONG 342 | #endif 343 | 344 | // 345 | // gcc implements the named return value optimization since version 3.1 346 | // 347 | #define SPP_HAS_NRVO 348 | 349 | // Branch prediction hints 350 | #define SPP_LIKELY(x) __builtin_expect(x, 1) 351 | #define SPP_UNLIKELY(x) __builtin_expect(x, 0) 352 | 353 | // 354 | // Dynamic shared object (DSO) and dynamic-link library (DLL) support 355 | // 356 | #if __GNUC__ >= 4 357 | #if (defined(_WIN32) || defined(__WIN32__) || defined(WIN32)) && !defined(__CYGWIN__) 358 | // All Win32 development environments, including 64-bit Windows and MinGW, define 359 | // _WIN32 or one of its variant spellings. Note that Cygwin is a POSIX environment, 360 | // so does not define _WIN32 or its variants. 361 | #define SPP_HAS_DECLSPEC 362 | #define SPP_SYMBOL_EXPORT __attribute__((__dllexport__)) 363 | #define SPP_SYMBOL_IMPORT __attribute__((__dllimport__)) 364 | #else 365 | #define SPP_SYMBOL_EXPORT __attribute__((__visibility__("default"))) 366 | #define SPP_SYMBOL_IMPORT 367 | #endif 368 | 369 | #define SPP_SYMBOL_VISIBLE __attribute__((__visibility__("default"))) 370 | #else 371 | // config/platform/win32.hpp will define SPP_SYMBOL_EXPORT, etc., unless already defined 372 | #define SPP_SYMBOL_EXPORT 373 | #endif 374 | 375 | // 376 | // RTTI and typeinfo detection is possible post gcc-4.3: 377 | // 378 | #if SPP_GCC_VERSION > 40300 379 | #ifndef __GXX_RTTI 380 | #ifndef SPP_NO_TYPEID 381 | #define SPP_NO_TYPEID 382 | #endif 383 | #ifndef SPP_NO_RTTI 384 | #define SPP_NO_RTTI 385 | #endif 386 | #endif 387 | #endif 388 | 389 | // 390 | // Recent GCC versions have __int128 when in 64-bit mode. 391 | // 392 | // We disable this if the compiler is really nvcc with C++03 as it 393 | // doesn't actually support __int128 as of CUDA_VERSION=7500 394 | // even though it defines __SIZEOF_INT128__. 395 | // See https://svn.boost.org/trac/boost/ticket/8048 396 | // https://svn.boost.org/trac/boost/ticket/11852 397 | // Only re-enable this for nvcc if you're absolutely sure 398 | // of the circumstances under which it's supported: 399 | // 400 | #if defined(__CUDACC__) 401 | #if defined(SPP_GCC_CXX11) 402 | #define SPP_NVCC_CXX11 403 | #else 404 | #define SPP_NVCC_CXX03 405 | #endif 406 | #endif 407 | 408 | #if defined(__SIZEOF_INT128__) && !defined(SPP_NVCC_CXX03) 409 | #define SPP_HAS_INT128 410 | #endif 411 | // 412 | // Recent GCC versions have a __float128 native type, we need to 413 | // include a std lib header to detect this - not ideal, but we'll 414 | // be including later anyway when we select the std lib. 415 | // 416 | // Nevertheless, as of CUDA 7.5, using __float128 with the host 417 | // compiler in pre-C++11 mode is still not supported. 418 | // See https://svn.boost.org/trac/boost/ticket/11852 419 | // 420 | #ifdef __cplusplus 421 | #include 422 | #else 423 | #include 424 | #endif 425 | 426 | #if defined(_GLIBCXX_USE_FLOAT128) && !defined(__STRICT_ANSI__) && !defined(SPP_NVCC_CXX03) 427 | #define SPP_HAS_FLOAT128 428 | #endif 429 | 430 | // C++0x features in 4.3.n and later 431 | // 432 | #if (SPP_GCC_VERSION >= 40300) && defined(SPP_GCC_CXX11) 433 | // C++0x features are only enabled when -std=c++0x or -std=gnu++0x are 434 | // passed on the command line, which in turn defines 435 | // __GXX_EXPERIMENTAL_CXX0X__. 436 | #define SPP_HAS_DECLTYPE 437 | #define SPP_HAS_RVALUE_REFS 438 | #define SPP_HAS_STATIC_ASSERT 439 | #define SPP_HAS_VARIADIC_TMPL 440 | #define SPP_HAS_CSTDINT 441 | #else 442 | #define SPP_NO_CXX11_DECLTYPE 443 | #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS 444 | #define SPP_NO_CXX11_RVALUE_REFERENCES 445 | #define SPP_NO_CXX11_STATIC_ASSERT 446 | #endif 447 | 448 | // C++0x features in 4.4.n and later 449 | // 450 | #if (SPP_GCC_VERSION < 40400) || !defined(SPP_GCC_CXX11) 451 | #define SPP_NO_CXX11_AUTO_DECLARATIONS 452 | #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS 453 | #define SPP_NO_CXX11_CHAR16_T 454 | #define SPP_NO_CXX11_CHAR32_T 455 | #define SPP_NO_CXX11_HDR_INITIALIZER_LIST 456 | #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS 457 | #define SPP_NO_CXX11_DELETED_FUNCTIONS 458 | #define SPP_NO_CXX11_TRAILING_RESULT_TYPES 459 | #define SPP_NO_CXX11_INLINE_NAMESPACES 460 | #define SPP_NO_CXX11_VARIADIC_TEMPLATES 461 | #endif 462 | 463 | #if SPP_GCC_VERSION < 40500 464 | #define SPP_NO_SFINAE_EXPR 465 | #endif 466 | 467 | // GCC 4.5 forbids declaration of defaulted functions in private or protected sections 468 | #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 5) || !defined(SPP_GCC_CXX11) 469 | #define SPP_NO_CXX11_NON_PUBLIC_DEFAULTED_FUNCTIONS 470 | #endif 471 | 472 | // C++0x features in 4.5.0 and later 473 | // 474 | #if (SPP_GCC_VERSION < 40500) || !defined(SPP_GCC_CXX11) 475 | #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS 476 | #define SPP_NO_CXX11_LAMBDAS 477 | #define SPP_NO_CXX11_LOCAL_CLASS_TEMPLATE_PARAMETERS 478 | #define SPP_NO_CXX11_RAW_LITERALS 479 | #endif 480 | 481 | // C++0x features in 4.6.n and later 482 | // 483 | #if (SPP_GCC_VERSION < 40600) || !defined(SPP_GCC_CXX11) 484 | #define SPP_NO_CXX11_CONSTEXPR 485 | #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX 486 | #endif 487 | 488 | // C++0x features in 4.7.n and later 489 | // 490 | #if (SPP_GCC_VERSION < 40700) || !defined(SPP_GCC_CXX11) 491 | #define SPP_NO_CXX11_FINAL 492 | #define SPP_NO_CXX11_TEMPLATE_ALIASES 493 | #define SPP_NO_CXX11_USER_DEFINED_LITERALS 494 | #define SPP_NO_CXX11_FIXED_LENGTH_VARIADIC_TEMPLATE_EXPANSION_PACKS 495 | #endif 496 | 497 | // C++0x features in 4.8.n and later 498 | // 499 | #if (SPP_GCC_VERSION < 40800) || !defined(SPP_GCC_CXX11) 500 | #define SPP_NO_CXX11_ALIGNAS 501 | #endif 502 | 503 | // C++0x features in 4.8.1 and later 504 | // 505 | #if (SPP_GCC_VERSION < 40801) || !defined(SPP_GCC_CXX11) 506 | #define SPP_NO_CXX11_DECLTYPE_N3276 507 | #define SPP_NO_CXX11_REF_QUALIFIERS 508 | #define SPP_NO_CXX14_BINARY_LITERALS 509 | #endif 510 | 511 | // C++14 features in 4.9.0 and later 512 | // 513 | #if (SPP_GCC_VERSION < 40900) || (__cplusplus < 201300) 514 | #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION 515 | #define SPP_NO_CXX14_GENERIC_LAMBDAS 516 | #define SPP_NO_CXX14_DIGIT_SEPARATORS 517 | #define SPP_NO_CXX14_DECLTYPE_AUTO 518 | #if !((SPP_GCC_VERSION >= 40801) && (SPP_GCC_VERSION < 40900) && defined(SPP_GCC_CXX11)) 519 | #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES 520 | #endif 521 | #endif 522 | 523 | 524 | // C++ 14: 525 | #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304) 526 | #define SPP_NO_CXX14_CONSTEXPR 527 | #endif 528 | #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304) 529 | #define SPP_NO_CXX14_VARIABLE_TEMPLATES 530 | #endif 531 | 532 | // 533 | // Unused attribute: 534 | #if __GNUC__ >= 4 535 | #define SPP_ATTRIBUTE_UNUSED __attribute__((__unused__)) 536 | #endif 537 | // 538 | // __builtin_unreachable: 539 | #if SPP_GCC_VERSION >= 40800 540 | #define SPP_UNREACHABLE_RETURN(x) __builtin_unreachable(); 541 | #endif 542 | 543 | #ifndef SPP_COMPILER 544 | #define SPP_COMPILER "GNU C++ version " __VERSION__ 545 | #endif 546 | 547 | // ConceptGCC compiler: 548 | // http://www.generic-programming.org/software/ConceptGCC/ 549 | #ifdef __GXX_CONCEPTS__ 550 | #define SPP_HAS_CONCEPTS 551 | #define SPP_COMPILER "ConceptGCC version " __VERSION__ 552 | #endif 553 | 554 | #elif defined _MSC_VER 555 | 556 | #include // for __popcnt() 557 | 558 | #define SPP_POPCNT_CHECK // slower when defined, but we have to check! 559 | #define spp_cpuid(info, x) __cpuid(info, x) 560 | 561 | #define SPP_POPCNT __popcnt 562 | #if (SPP_GROUP_SIZE == 64 && INTPTR_MAX == INT64_MAX) 563 | #define SPP_POPCNT64 __popcnt64 564 | #endif 565 | 566 | // Attempt to suppress VC6 warnings about the length of decorated names (obsolete): 567 | #pragma warning( disable : 4503 ) // warning: decorated name length exceeded 568 | 569 | #define SPP_HAS_PRAGMA_ONCE 570 | #define SPP_HAS_CSTDINT 571 | 572 | // 573 | // versions check: 574 | // we don't support Visual C++ prior to version 7.1: 575 | #if _MSC_VER < 1310 576 | #error "Antique compiler not supported" 577 | #endif 578 | 579 | #if _MSC_FULL_VER < 180020827 580 | #define SPP_NO_FENV_H 581 | #endif 582 | 583 | #if _MSC_VER < 1400 584 | // although a conforming signature for swprint exists in VC7.1 585 | // it appears not to actually work: 586 | #define SPP_NO_SWPRINTF 587 | 588 | // Our extern template tests also fail for this compiler: 589 | #define SPP_NO_CXX11_EXTERN_TEMPLATE 590 | 591 | // Variadic macros do not exist for VC7.1 and lower 592 | #define SPP_NO_CXX11_VARIADIC_MACROS 593 | #endif 594 | 595 | #if _MSC_VER < 1500 // 140X == VC++ 8.0 596 | #undef SPP_HAS_CSTDINT 597 | #define SPP_NO_MEMBER_TEMPLATE_FRIENDS 598 | #endif 599 | 600 | #if _MSC_VER < 1600 // 150X == VC++ 9.0 601 | // A bug in VC9: 602 | #define SPP_NO_ADL_BARRIER 603 | #endif 604 | 605 | 606 | // MSVC (including the latest checked version) has not yet completely 607 | // implemented value-initialization, as is reported: 608 | // "VC++ does not value-initialize members of derived classes without 609 | // user-declared constructor", reported in 2009 by Sylvester Hesp: 610 | // https: //connect.microsoft.com/VisualStudio/feedback/details/484295 611 | // "Presence of copy constructor breaks member class initialization", 612 | // reported in 2009 by Alex Vakulenko: 613 | // https: //connect.microsoft.com/VisualStudio/feedback/details/499606 614 | // "Value-initialization in new-expression", reported in 2005 by 615 | // Pavel Kuznetsov (MetaCommunications Engineering): 616 | // https: //connect.microsoft.com/VisualStudio/feedback/details/100744 617 | // See also: http: //www.boost.org/libs/utility/value_init.htm #compiler_issues 618 | // (Niels Dekker, LKEB, May 2010) 619 | #define SPP_NO_COMPLETE_VALUE_INITIALIZATION 620 | 621 | #ifndef _NATIVE_WCHAR_T_DEFINED 622 | #define SPP_NO_INTRINSIC_WCHAR_T 623 | #endif 624 | 625 | // 626 | // check for exception handling support: 627 | #if !defined(_CPPUNWIND) && !defined(SPP_NO_EXCEPTIONS) 628 | #define SPP_NO_EXCEPTIONS 629 | #endif 630 | 631 | // 632 | // __int64 support: 633 | // 634 | #define SPP_HAS_MS_INT64 635 | #if defined(_MSC_EXTENSIONS) || (_MSC_VER >= 1400) 636 | #define SPP_HAS_LONG_LONG 637 | #else 638 | #define SPP_NO_LONG_LONG 639 | #endif 640 | 641 | #if (_MSC_VER >= 1400) && !defined(_DEBUG) 642 | #define SPP_HAS_NRVO 643 | #endif 644 | 645 | #if _MSC_VER >= 1500 // 150X == VC++ 9.0 646 | #define SPP_HAS_PRAGMA_DETECT_MISMATCH 647 | #endif 648 | 649 | // 650 | // disable Win32 API's if compiler extensions are 651 | // turned off: 652 | // 653 | #if !defined(_MSC_EXTENSIONS) && !defined(SPP_DISABLE_WIN32) 654 | #define SPP_DISABLE_WIN32 655 | #endif 656 | 657 | #if !defined(_CPPRTTI) && !defined(SPP_NO_RTTI) 658 | #define SPP_NO_RTTI 659 | #endif 660 | 661 | // 662 | // TR1 features: 663 | // 664 | #if _MSC_VER >= 1700 665 | // #define SPP_HAS_TR1_HASH // don't know if this is true yet. 666 | // #define SPP_HAS_TR1_TYPE_TRAITS // don't know if this is true yet. 667 | #define SPP_HAS_TR1_UNORDERED_MAP 668 | #define SPP_HAS_TR1_UNORDERED_SET 669 | #endif 670 | 671 | // 672 | // C++0x features 673 | // 674 | // See above for SPP_NO_LONG_LONG 675 | 676 | // C++ features supported by VC++ 10 (aka 2010) 677 | // 678 | #if _MSC_VER < 1600 679 | #define SPP_NO_CXX11_AUTO_DECLARATIONS 680 | #define SPP_NO_CXX11_AUTO_MULTIDECLARATIONS 681 | #define SPP_NO_CXX11_LAMBDAS 682 | #define SPP_NO_CXX11_RVALUE_REFERENCES 683 | #define SPP_NO_CXX11_STATIC_ASSERT 684 | #define SPP_NO_CXX11_DECLTYPE 685 | #endif // _MSC_VER < 1600 686 | 687 | #if _MSC_VER >= 1600 688 | #define SPP_HAS_STDINT_H 689 | #endif 690 | 691 | // C++11 features supported by VC++ 11 (aka 2012) 692 | // 693 | #if _MSC_VER < 1700 694 | #define SPP_NO_CXX11_FINAL 695 | #endif // _MSC_VER < 1700 696 | 697 | // C++11 features supported by VC++ 12 (aka 2013). 698 | // 699 | #if _MSC_FULL_VER < 180020827 700 | #define SPP_NO_CXX11_DEFAULTED_FUNCTIONS 701 | #define SPP_NO_CXX11_DELETED_FUNCTIONS 702 | #define SPP_NO_CXX11_EXPLICIT_CONVERSION_OPERATORS 703 | #define SPP_NO_CXX11_FUNCTION_TEMPLATE_DEFAULT_ARGS 704 | #define SPP_NO_CXX11_RAW_LITERALS 705 | #define SPP_NO_CXX11_TEMPLATE_ALIASES 706 | #define SPP_NO_CXX11_TRAILING_RESULT_TYPES 707 | #define SPP_NO_CXX11_VARIADIC_TEMPLATES 708 | #define SPP_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX 709 | #define SPP_NO_CXX11_DECLTYPE_N3276 710 | #endif 711 | 712 | // C++11 features supported by VC++ 14 (aka 2014) CTP1 713 | #if (_MSC_FULL_VER < 190021730) 714 | #define SPP_NO_CXX11_REF_QUALIFIERS 715 | #define SPP_NO_CXX11_USER_DEFINED_LITERALS 716 | #define SPP_NO_CXX11_ALIGNAS 717 | #define SPP_NO_CXX11_INLINE_NAMESPACES 718 | #define SPP_NO_CXX14_DECLTYPE_AUTO 719 | #define SPP_NO_CXX14_INITIALIZED_LAMBDA_CAPTURES 720 | #define SPP_NO_CXX14_RETURN_TYPE_DEDUCTION 721 | #define SPP_NO_CXX11_HDR_INITIALIZER_LIST 722 | #endif 723 | 724 | // C++11 features not supported by any versions 725 | #define SPP_NO_CXX11_CHAR16_T 726 | #define SPP_NO_CXX11_CHAR32_T 727 | #define SPP_NO_CXX11_CONSTEXPR 728 | #define SPP_NO_SFINAE_EXPR 729 | #define SPP_NO_TWO_PHASE_NAME_LOOKUP 730 | 731 | // C++ 14: 732 | #if !defined(__cpp_binary_literals) || (__cpp_binary_literals < 201304) 733 | #define SPP_NO_CXX14_BINARY_LITERALS 734 | #endif 735 | 736 | #if !defined(__cpp_constexpr) || (__cpp_constexpr < 201304) 737 | #define SPP_NO_CXX14_CONSTEXPR 738 | #endif 739 | 740 | #if (__cplusplus < 201304) // There's no SD6 check for this.... 741 | #define SPP_NO_CXX14_DIGIT_SEPARATORS 742 | #endif 743 | 744 | #if !defined(__cpp_generic_lambdas) || (__cpp_generic_lambdas < 201304) 745 | #define SPP_NO_CXX14_GENERIC_LAMBDAS 746 | #endif 747 | 748 | #if !defined(__cpp_variable_templates) || (__cpp_variable_templates < 201304) 749 | #define SPP_NO_CXX14_VARIABLE_TEMPLATES 750 | #endif 751 | 752 | #endif 753 | 754 | // from boost/config/suffix.hpp 755 | // ---------------------------- 756 | #ifndef SPP_ATTRIBUTE_UNUSED 757 | #define SPP_ATTRIBUTE_UNUSED 758 | #endif 759 | 760 | #endif // spp_config_h_guard 761 | -------------------------------------------------------------------------------- /include/sparsepp/spp_memory.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_memory_h_guard) 2 | #define spp_memory_h_guard 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #if defined(_WIN32) || defined( __CYGWIN__) 9 | #define SPP_WIN 10 | #endif 11 | 12 | #ifdef SPP_WIN 13 | #include 14 | #include 15 | #undef min 16 | #undef max 17 | #else 18 | #include 19 | #include 20 | #endif 21 | 22 | namespace spp 23 | { 24 | uint64_t GetSystemMemory() 25 | { 26 | #ifdef SPP_WIN 27 | MEMORYSTATUSEX memInfo; 28 | memInfo.dwLength = sizeof(MEMORYSTATUSEX); 29 | GlobalMemoryStatusEx(&memInfo); 30 | return static_cast(memInfo.ullTotalPageFile); 31 | #else 32 | struct sysinfo memInfo; 33 | sysinfo (&memInfo); 34 | auto totalVirtualMem = memInfo.totalram; 35 | 36 | totalVirtualMem += memInfo.totalswap; 37 | totalVirtualMem *= memInfo.mem_unit; 38 | return static_cast(totalVirtualMem); 39 | #endif 40 | } 41 | 42 | uint64_t GetTotalMemoryUsed() 43 | { 44 | #ifdef SPP_WIN 45 | MEMORYSTATUSEX memInfo; 46 | memInfo.dwLength = sizeof(MEMORYSTATUSEX); 47 | GlobalMemoryStatusEx(&memInfo); 48 | return static_cast(memInfo.ullTotalPageFile - memInfo.ullAvailPageFile); 49 | #else 50 | struct sysinfo memInfo; 51 | sysinfo(&memInfo); 52 | auto virtualMemUsed = memInfo.totalram - memInfo.freeram; 53 | 54 | virtualMemUsed += memInfo.totalswap - memInfo.freeswap; 55 | virtualMemUsed *= memInfo.mem_unit; 56 | 57 | return static_cast(virtualMemUsed); 58 | #endif 59 | } 60 | 61 | uint64_t GetProcessMemoryUsed() 62 | { 63 | #ifdef SPP_WIN 64 | PROCESS_MEMORY_COUNTERS_EX pmc; 65 | GetProcessMemoryInfo(GetCurrentProcess(), reinterpret_cast(&pmc), sizeof(pmc)); 66 | return static_cast(pmc.PrivateUsage); 67 | #else 68 | auto parseLine = 69 | [](char* line)->int 70 | { 71 | auto i = strlen(line); 72 | 73 | while(*line < '0' || *line > '9') 74 | { 75 | line++; 76 | } 77 | 78 | line[i-3] = '\0'; 79 | i = atoi(line); 80 | return i; 81 | }; 82 | 83 | auto file = fopen("/proc/self/status", "r"); 84 | auto result = -1; 85 | char line[128]; 86 | 87 | while(fgets(line, 128, file) != nullptr) 88 | { 89 | if(strncmp(line, "VmSize:", 7) == 0) 90 | { 91 | result = parseLine(line); 92 | break; 93 | } 94 | } 95 | 96 | fclose(file); 97 | return static_cast(result) * 1024; 98 | #endif 99 | } 100 | 101 | uint64_t GetPhysicalMemory() 102 | { 103 | #ifdef SPP_WIN 104 | MEMORYSTATUSEX memInfo; 105 | memInfo.dwLength = sizeof(MEMORYSTATUSEX); 106 | GlobalMemoryStatusEx(&memInfo); 107 | return static_cast(memInfo.ullTotalPhys); 108 | #else 109 | struct sysinfo memInfo; 110 | sysinfo(&memInfo); 111 | 112 | auto totalPhysMem = memInfo.totalram; 113 | 114 | totalPhysMem *= memInfo.mem_unit; 115 | return static_cast(totalPhysMem); 116 | #endif 117 | } 118 | 119 | } 120 | 121 | #endif // spp_memory_h_guard 122 | -------------------------------------------------------------------------------- /include/sparsepp/spp_smartptr.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_smartptr_h_guard) 2 | #define spp_smartptr_h_guard 3 | 4 | 5 | /* ----------------------------------------------------------------------------------------------- 6 | * quick version of intrusive_ptr 7 | * ----------------------------------------------------------------------------------------------- 8 | */ 9 | 10 | #include 11 | 12 | // ------------------------------------------------------------------------ 13 | class spp_rc 14 | { 15 | public: 16 | spp_rc() : _cnt(0) {} 17 | spp_rc(const spp_rc &) : _cnt(0) {} 18 | void increment() const { ++_cnt; } 19 | void decrement() const { assert(_cnt); if (--_cnt == 0) delete this; } 20 | unsigned count() const { assert(_cnt); return _cnt; } 21 | 22 | protected: 23 | virtual ~spp_rc() {} 24 | 25 | private: 26 | mutable unsigned _cnt; 27 | }; 28 | 29 | // ------------------------------------------------------------------------ 30 | template 31 | class spp_sptr 32 | { 33 | public: 34 | spp_sptr() : _p(0) {} 35 | spp_sptr(T *p) : _p(p) { if (_p) _p->increment(); } 36 | spp_sptr(const spp_sptr &o) : _p(o._p) { if (_p) _p->increment(); } 37 | ~spp_sptr() { if (_p) _p->decrement(); } 38 | spp_sptr& operator=(const spp_sptr &o) { reset(o._p); return *this; } 39 | T* get() const { return _p; } 40 | void swap(spp_sptr &o) { T *tmp = _p; _p = o._p; o._p = tmp; } 41 | void reset(const T *p = 0) 42 | { 43 | if (p == _p) 44 | return; 45 | if (_p) _p->decrement(); 46 | _p = (T *)p; 47 | if (_p) _p->increment(); 48 | } 49 | T* operator->() const { return const_cast(_p); } 50 | 51 | private: 52 | T *_p; 53 | }; 54 | 55 | 56 | #endif // spp_smartptr_h_guard 57 | -------------------------------------------------------------------------------- /include/sparsepp/spp_stdint.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_stdint_h_guard) 2 | #define spp_stdint_h_guard 3 | 4 | #include 5 | 6 | #if defined(SPP_HAS_CSTDINT) && (__cplusplus >= 201103) 7 | #include 8 | #else 9 | #if defined(__FreeBSD__) || defined(__IBMCPP__) || defined(_AIX) 10 | #include 11 | #else 12 | #include 13 | #endif 14 | #endif 15 | 16 | #endif // spp_stdint_h_guard 17 | -------------------------------------------------------------------------------- /include/sparsepp/spp_timer.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright (c) 2016 Mariano Gonzalez 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | */ 22 | 23 | #ifndef spp_timer_h_guard 24 | #define spp_timer_h_guard 25 | 26 | #include 27 | 28 | namespace spp 29 | { 30 | template 31 | class Timer 32 | { 33 | public: 34 | Timer() { reset(); } 35 | void reset() { _start = _snap = clock::now(); } 36 | void snap() { _snap = clock::now(); } 37 | 38 | float get_total() const { return get_diff(_start, clock::now()); } 39 | float get_delta() const { return get_diff(_snap, clock::now()); } 40 | 41 | private: 42 | using clock = std::chrono::high_resolution_clock; 43 | using point = std::chrono::time_point; 44 | 45 | template 46 | static T get_diff(const point& start, const point& end) 47 | { 48 | using duration_t = std::chrono::duration; 49 | 50 | return std::chrono::duration_cast(end - start).count(); 51 | } 52 | 53 | point _start; 54 | point _snap; 55 | }; 56 | } 57 | 58 | #endif // spp_timer_h_guard 59 | -------------------------------------------------------------------------------- /include/sparsepp/spp_traits.h: -------------------------------------------------------------------------------- 1 | #if !defined(spp_traits_h_guard) 2 | #define spp_traits_h_guard 3 | 4 | #include 5 | 6 | template class HashObject; // for Google's benchmark, not in spp namespace! 7 | 8 | namespace spp_ 9 | { 10 | 11 | // --------------------------------------------------------------------------- 12 | // type_traits we need 13 | // --------------------------------------------------------------------------- 14 | template 15 | struct integral_constant { static const T value = v; }; 16 | 17 | template const T integral_constant::value; 18 | 19 | typedef integral_constant true_type; 20 | typedef integral_constant false_type; 21 | 22 | typedef integral_constant zero_type; 23 | typedef integral_constant one_type; 24 | typedef integral_constant two_type; 25 | typedef integral_constant three_type; 26 | 27 | template struct is_same : public false_type { }; 28 | template struct is_same : public true_type { }; 29 | 30 | template struct remove_const { typedef T type; }; 31 | template struct remove_const { typedef T type; }; 32 | 33 | template struct remove_volatile { typedef T type; }; 34 | template struct remove_volatile { typedef T type; }; 35 | 36 | template struct remove_cv 37 | { 38 | typedef typename remove_const::type>::type type; 39 | }; 40 | 41 | // ---------------- is_integral ---------------------------------------- 42 | template struct is_integral; 43 | template struct is_integral : false_type { }; 44 | template<> struct is_integral : true_type { }; 45 | template<> struct is_integral : true_type { }; 46 | template<> struct is_integral : true_type { }; 47 | template<> struct is_integral : true_type { }; 48 | template<> struct is_integral : true_type { }; 49 | template<> struct is_integral : true_type { }; 50 | template<> struct is_integral : true_type { }; 51 | template<> struct is_integral : true_type { }; 52 | template<> struct is_integral : true_type { }; 53 | template<> struct is_integral : true_type { }; 54 | #ifdef SPP_HAS_LONG_LONG 55 | template<> struct is_integral : true_type { }; 56 | template<> struct is_integral : true_type { }; 57 | #endif 58 | template struct is_integral : is_integral { }; 59 | template struct is_integral : is_integral { }; 60 | template struct is_integral : is_integral { }; 61 | 62 | // ---------------- is_floating_point ---------------------------------------- 63 | template struct is_floating_point; 64 | template struct is_floating_point : false_type { }; 65 | template<> struct is_floating_point : true_type { }; 66 | template<> struct is_floating_point : true_type { }; 67 | template<> struct is_floating_point : true_type { }; 68 | template struct is_floating_point : is_floating_point { }; 69 | template struct is_floating_point : is_floating_point { }; 70 | template struct is_floating_point : is_floating_point { }; 71 | 72 | // ---------------- is_pointer ---------------------------------------- 73 | template struct is_pointer; 74 | template struct is_pointer : false_type { }; 75 | template struct is_pointer : true_type { }; 76 | template struct is_pointer : is_pointer { }; 77 | template struct is_pointer : is_pointer { }; 78 | template struct is_pointer : is_pointer { }; 79 | 80 | // ---------------- is_reference ---------------------------------------- 81 | template struct is_reference; 82 | template struct is_reference : false_type {}; 83 | template struct is_reference : true_type {}; 84 | 85 | // ---------------- is_relocatable ---------------------------------------- 86 | // relocatable values can be moved around in memory using memcpy and remain 87 | // correct. Most types are relocatable, an example of a type who is not would 88 | // be a struct which contains a pointer to a buffer inside itself - this is the 89 | // case for std::string in gcc 5. 90 | // ------------------------------------------------------------------------ 91 | template struct is_relocatable; 92 | template struct is_relocatable : 93 | integral_constant::value || is_floating_point::value)> 94 | { }; 95 | 96 | template struct is_relocatable > : true_type { }; 97 | 98 | template struct is_relocatable : is_relocatable { }; 99 | template struct is_relocatable : is_relocatable { }; 100 | template struct is_relocatable : is_relocatable { }; 101 | template struct is_relocatable : is_relocatable { }; 102 | template struct is_relocatable > : 103 | integral_constant::value && is_relocatable::value)> 104 | { }; 105 | 106 | // A template helper used to select A or B based on a condition. 107 | // ------------------------------------------------------------ 108 | template 109 | struct if_ 110 | { 111 | typedef A type; 112 | }; 113 | 114 | template 115 | struct if_ 116 | { 117 | typedef B type; 118 | }; 119 | 120 | } // spp_ namespace 121 | 122 | #endif // spp_traits_h_guard 123 | -------------------------------------------------------------------------------- /include/sparsepp/spp_utils.h: -------------------------------------------------------------------------------- 1 | // ---------------------------------------------------------------------- 2 | // Copyright (c) 2016, Steven Gregory Popovitch - greg7mdp@gmail.com 3 | // All rights reserved. 4 | // 5 | // Code derived derived from Boost libraries. 6 | // Boost software licence reproduced below. 7 | // 8 | // Redistribution and use in source and binary forms, with or without 9 | // modification, are permitted provided that the following conditions are 10 | // met: 11 | // 12 | // * Redistributions of source code must retain the above copyright 13 | // notice, this list of conditions and the following disclaimer. 14 | // * Redistributions in binary form must reproduce the above 15 | // copyright notice, this list of conditions and the following disclaimer 16 | // in the documentation and/or other materials provided with the 17 | // distribution. 18 | // * The name of Steven Gregory Popovitch may not be used to 19 | // endorse or promote products derived from this software without 20 | // specific prior written permission. 21 | // 22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | // ---------------------------------------------------------------------- 34 | 35 | // --------------------------------------------------------------------------- 36 | // Boost Software License - Version 1.0 - August 17th, 2003 37 | // 38 | // Permission is hereby granted, free of charge, to any person or organization 39 | // obtaining a copy of the software and accompanying documentation covered by 40 | // this license (the "Software") to use, reproduce, display, distribute, 41 | // execute, and transmit the Software, and to prepare derivative works of the 42 | // Software, and to permit third-parties to whom the Software is furnished to 43 | // do so, all subject to the following: 44 | // 45 | // The copyright notices in the Software and this entire statement, including 46 | // the above license grant, this restriction and the following disclaimer, 47 | // must be included in all copies of the Software, in whole or in part, and 48 | // all derivative works of the Software, unless such copies or derivative 49 | // works are solely in the form of machine-executable object code generated by 50 | // a source language processor. 51 | // 52 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 53 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 54 | // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 55 | // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 56 | // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 57 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 58 | // DEALINGS IN THE SOFTWARE. 59 | // --------------------------------------------------------------------------- 60 | 61 | // ---------------------------------------------------------------------- 62 | // H A S H F U N C T I O N S 63 | // ---------------------------- 64 | // 65 | // Implements spp::spp_hash() and spp::hash_combine() 66 | // ---------------------------------------------------------------------- 67 | 68 | #if !defined(spp_utils_h_guard_) 69 | #define spp_utils_h_guard_ 70 | 71 | #if defined(_MSC_VER) 72 | #if (_MSC_VER >= 1600 ) // vs2010 (1900 is vs2015) 73 | #include 74 | #define SPP_HASH_CLASS std::hash 75 | #else 76 | #include 77 | #define SPP_HASH_CLASS stdext::hash_compare 78 | #endif 79 | #if (_MSC_FULL_VER < 190021730) 80 | #define SPP_NO_CXX11_NOEXCEPT 81 | #endif 82 | #elif defined __clang__ 83 | #if __has_feature(cxx_noexcept) // what to use here? 84 | #include 85 | #define SPP_HASH_CLASS std::hash 86 | #else 87 | #include 88 | #define SPP_HASH_CLASS std::tr1::hash 89 | #endif 90 | 91 | #if !__has_feature(cxx_noexcept) 92 | #define SPP_NO_CXX11_NOEXCEPT 93 | #endif 94 | #elif defined(__GNUC__) 95 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || (__cplusplus >= 201103L) 96 | #include 97 | #define SPP_HASH_CLASS std::hash 98 | 99 | #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) < 40600 100 | #define SPP_NO_CXX11_NOEXCEPT 101 | #endif 102 | #else 103 | #include 104 | #define SPP_HASH_CLASS std::tr1::hash 105 | #define SPP_NO_CXX11_NOEXCEPT 106 | #endif 107 | #else 108 | #include 109 | #define SPP_HASH_CLASS std::hash 110 | #endif 111 | 112 | #ifdef SPP_NO_CXX11_NOEXCEPT 113 | #define SPP_NOEXCEPT 114 | #else 115 | #define SPP_NOEXCEPT noexcept 116 | #endif 117 | 118 | #ifdef SPP_NO_CXX11_CONSTEXPR 119 | #define SPP_CONSTEXPR 120 | #else 121 | #define SPP_CONSTEXPR constexpr 122 | #endif 123 | 124 | #define SPP_INLINE 125 | 126 | #ifndef spp_ 127 | #define spp_ spp 128 | #endif 129 | 130 | namespace spp_ 131 | { 132 | 133 | template T spp_min(T a, T b) { return a < b ? a : b; } 134 | template T spp_max(T a, T b) { return a >= b ? a : b; } 135 | 136 | template 137 | struct spp_hash 138 | { 139 | SPP_INLINE size_t operator()(const T &__v) const SPP_NOEXCEPT 140 | { 141 | SPP_HASH_CLASS hasher; 142 | return hasher(__v); 143 | } 144 | }; 145 | 146 | template 147 | struct spp_hash 148 | { 149 | static size_t spp_log2 (size_t val) SPP_NOEXCEPT 150 | { 151 | size_t res = 0; 152 | while (val > 1) 153 | { 154 | val >>= 1; 155 | res++; 156 | } 157 | return res; 158 | } 159 | 160 | SPP_INLINE size_t operator()(const T *__v) const SPP_NOEXCEPT 161 | { 162 | static const size_t shift = 3; // spp_log2(1 + sizeof(T)); // T might be incomplete! 163 | const uintptr_t i = (const uintptr_t)__v; 164 | return static_cast(i >> shift); 165 | } 166 | }; 167 | 168 | // from http://burtleburtle.net/bob/hash/integer.html 169 | // fast and efficient for power of two table sizes where we always 170 | // consider the last bits. 171 | // --------------------------------------------------------------- 172 | inline size_t spp_mix_32(uint32_t a) 173 | { 174 | a = a ^ (a >> 4); 175 | a = (a ^ 0xdeadbeef) + (a << 5); 176 | a = a ^ (a >> 11); 177 | return static_cast(a); 178 | } 179 | 180 | // Maybe we should do a more thorough scrambling as described in 181 | // https://gist.github.com/badboy/6267743 182 | // ------------------------------------------------------------- 183 | inline size_t spp_mix_64(uint64_t a) 184 | { 185 | a = a ^ (a >> 4); 186 | a = (a ^ 0xdeadbeef) + (a << 5); 187 | a = a ^ (a >> 11); 188 | return (size_t)a; 189 | } 190 | 191 | template <> 192 | struct spp_hash : public std::unary_function 193 | { 194 | SPP_INLINE size_t operator()(bool __v) const SPP_NOEXCEPT 195 | { return static_cast(__v); } 196 | }; 197 | 198 | template <> 199 | struct spp_hash : public std::unary_function 200 | { 201 | SPP_INLINE size_t operator()(char __v) const SPP_NOEXCEPT 202 | { return static_cast(__v); } 203 | }; 204 | 205 | template <> 206 | struct spp_hash : public std::unary_function 207 | { 208 | SPP_INLINE size_t operator()(signed char __v) const SPP_NOEXCEPT 209 | { return static_cast(__v); } 210 | }; 211 | 212 | template <> 213 | struct spp_hash : public std::unary_function 214 | { 215 | SPP_INLINE size_t operator()(unsigned char __v) const SPP_NOEXCEPT 216 | { return static_cast(__v); } 217 | }; 218 | 219 | template <> 220 | struct spp_hash : public std::unary_function 221 | { 222 | SPP_INLINE size_t operator()(wchar_t __v) const SPP_NOEXCEPT 223 | { return static_cast(__v); } 224 | }; 225 | 226 | template <> 227 | struct spp_hash : public std::unary_function 228 | { 229 | SPP_INLINE size_t operator()(int16_t __v) const SPP_NOEXCEPT 230 | { return spp_mix_32(static_cast(__v)); } 231 | }; 232 | 233 | template <> 234 | struct spp_hash : public std::unary_function 235 | { 236 | SPP_INLINE size_t operator()(uint16_t __v) const SPP_NOEXCEPT 237 | { return spp_mix_32(static_cast(__v)); } 238 | }; 239 | 240 | template <> 241 | struct spp_hash : public std::unary_function 242 | { 243 | SPP_INLINE size_t operator()(int32_t __v) const SPP_NOEXCEPT 244 | { return spp_mix_32(static_cast(__v)); } 245 | }; 246 | 247 | template <> 248 | struct spp_hash : public std::unary_function 249 | { 250 | SPP_INLINE size_t operator()(uint32_t __v) const SPP_NOEXCEPT 251 | { return spp_mix_32(static_cast(__v)); } 252 | }; 253 | 254 | template <> 255 | struct spp_hash : public std::unary_function 256 | { 257 | SPP_INLINE size_t operator()(int64_t __v) const SPP_NOEXCEPT 258 | { return spp_mix_64(static_cast(__v)); } 259 | }; 260 | 261 | template <> 262 | struct spp_hash : public std::unary_function 263 | { 264 | SPP_INLINE size_t operator()(uint64_t __v) const SPP_NOEXCEPT 265 | { return spp_mix_64(static_cast(__v)); } 266 | }; 267 | 268 | template <> 269 | struct spp_hash : public std::unary_function 270 | { 271 | SPP_INLINE size_t operator()(float __v) const SPP_NOEXCEPT 272 | { 273 | // -0.0 and 0.0 should return same hash 274 | uint32_t *as_int = reinterpret_cast(&__v); 275 | return (__v == 0) ? static_cast(0) : spp_mix_32(*as_int); 276 | } 277 | }; 278 | 279 | template <> 280 | struct spp_hash : public std::unary_function 281 | { 282 | SPP_INLINE size_t operator()(double __v) const SPP_NOEXCEPT 283 | { 284 | // -0.0 and 0.0 should return same hash 285 | uint64_t *as_int = reinterpret_cast(&__v); 286 | return (__v == 0) ? static_cast(0) : spp_mix_64(*as_int); 287 | } 288 | }; 289 | 290 | template struct Combiner 291 | { 292 | inline void operator()(T& seed, T value); 293 | }; 294 | 295 | template struct Combiner 296 | { 297 | inline void operator()(T& seed, T value) 298 | { 299 | seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2); 300 | } 301 | }; 302 | 303 | template struct Combiner 304 | { 305 | inline void operator()(T& seed, T value) 306 | { 307 | seed ^= value + T(0xc6a4a7935bd1e995) + (seed << 6) + (seed >> 2); 308 | } 309 | }; 310 | 311 | template 312 | inline void hash_combine(std::size_t& seed, T const& v) 313 | { 314 | spp_::spp_hash hasher; 315 | Combiner combiner; 316 | 317 | combiner(seed, hasher(v)); 318 | } 319 | 320 | static inline uint32_t s_spp_popcount_default(uint32_t i) SPP_NOEXCEPT 321 | { 322 | i = i - ((i >> 1) & 0x55555555); 323 | i = (i & 0x33333333) + ((i >> 2) & 0x33333333); 324 | return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; 325 | } 326 | 327 | static inline uint32_t s_spp_popcount_default(uint64_t x) SPP_NOEXCEPT 328 | { 329 | const uint64_t m1 = uint64_t(0x5555555555555555); // binary: 0101... 330 | const uint64_t m2 = uint64_t(0x3333333333333333); // binary: 00110011.. 331 | const uint64_t m4 = uint64_t(0x0f0f0f0f0f0f0f0f); // binary: 4 zeros, 4 ones ... 332 | const uint64_t h01 = uint64_t(0x0101010101010101); // the sum of 256 to the power of 0,1,2,3... 333 | 334 | x -= (x >> 1) & m1; // put count of each 2 bits into those 2 bits 335 | x = (x & m2) + ((x >> 2) & m2); // put count of each 4 bits into those 4 bits 336 | x = (x + (x >> 4)) & m4; // put count of each 8 bits into those 8 bits 337 | return (x * h01)>>56; // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24)+... 338 | } 339 | 340 | #ifdef __APPLE__ 341 | static inline uint32_t count_trailing_zeroes(size_t v) SPP_NOEXCEPT 342 | { 343 | size_t x = (v & -v) - 1; 344 | // sadly sizeof() required to build on macos 345 | return sizeof(size_t) == 8 ? s_spp_popcount_default((uint64_t)x) : s_spp_popcount_default((uint32_t)x); 346 | } 347 | 348 | static inline uint32_t s_popcount(size_t v) SPP_NOEXCEPT 349 | { 350 | // sadly sizeof() required to build on macos 351 | return sizeof(size_t) == 8 ? s_spp_popcount_default((uint64_t)v) : s_spp_popcount_default((uint32_t)v); 352 | } 353 | #else 354 | static inline uint32_t count_trailing_zeroes(size_t v) SPP_NOEXCEPT 355 | { 356 | return s_spp_popcount_default((v & -v) - 1); 357 | } 358 | 359 | static inline uint32_t s_popcount(size_t v) SPP_NOEXCEPT 360 | { 361 | return s_spp_popcount_default(v); 362 | } 363 | #endif 364 | 365 | // ----------------------------------------------------------- 366 | // ----------------------------------------------------------- 367 | template 368 | class libc_allocator 369 | { 370 | public: 371 | typedef T value_type; 372 | typedef T* pointer; 373 | typedef ptrdiff_t difference_type; 374 | typedef const T* const_pointer; 375 | typedef size_t size_type; 376 | 377 | libc_allocator() {} 378 | libc_allocator(const libc_allocator &) {} 379 | 380 | pointer allocate(size_t n, const_pointer /* unused */= 0) 381 | { 382 | return static_cast(malloc(n * sizeof(T))); 383 | } 384 | 385 | void deallocate(pointer p, size_t /* unused */) 386 | { 387 | free(p); 388 | } 389 | 390 | pointer reallocate(pointer p, size_t new_size) 391 | { 392 | return static_cast(realloc(p, new_size * sizeof(T))); 393 | } 394 | 395 | // extra API to match spp_allocator interface 396 | pointer reallocate(pointer p, size_t /* old_size */, size_t new_size) 397 | { 398 | return static_cast(realloc(p, new_size * sizeof(T))); 399 | } 400 | 401 | size_type max_size() const 402 | { 403 | return static_cast(-1) / sizeof(value_type); 404 | } 405 | 406 | void construct(pointer p, const value_type& val) 407 | { 408 | new(p) value_type(val); 409 | } 410 | 411 | void destroy(pointer p) { p->~value_type(); } 412 | 413 | template 414 | struct rebind 415 | { 416 | typedef spp_::libc_allocator other; 417 | }; 418 | 419 | }; 420 | 421 | // forward declaration 422 | // ------------------- 423 | template 424 | class spp_allocator; 425 | 426 | } 427 | 428 | template 429 | inline bool operator==(const spp_::libc_allocator &, const spp_::libc_allocator &) 430 | { 431 | return true; 432 | } 433 | 434 | template 435 | inline bool operator!=(const spp_::libc_allocator &, const spp_::libc_allocator &) 436 | { 437 | return false; 438 | } 439 | 440 | #endif // spp_utils_h_guard_ 441 | 442 | -------------------------------------------------------------------------------- /include/strict_fstream.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __STRICT_FSTREAM_HPP 2 | #define __STRICT_FSTREAM_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /** 10 | * This namespace defines wrappers for std::ifstream, std::ofstream, and 11 | * std::fstream objects. The wrappers perform the following steps: 12 | * - check the open modes make sense 13 | * - check that the call to open() is successful 14 | * - (for input streams) check that the opened file is peek-able 15 | * - turn on the badbit in the exception mask 16 | */ 17 | namespace strict_fstream 18 | { 19 | 20 | /// Overload of error-reporting function, to enable use with VS. 21 | /// Ref: http://stackoverflow.com/a/901316/717706 22 | static std::string strerror() 23 | { 24 | std::string buff(80, '\0'); 25 | #ifdef _WIN32 26 | if (strerror_s(&buff[0], buff.size(), errno) != 0) 27 | { 28 | buff = "Unknown error"; 29 | } 30 | #elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE 31 | // XSI-compliant strerror_r() 32 | if (strerror_r(errno, &buff[0], buff.size()) != 0) 33 | { 34 | buff = "Unknown error"; 35 | } 36 | #else 37 | // GNU-specific strerror_r() 38 | auto p = strerror_r(errno, &buff[0], buff.size()); 39 | std::string tmp(p, std::strlen(p)); 40 | std::swap(buff, tmp); 41 | #endif 42 | buff.resize(buff.find('\0')); 43 | return buff; 44 | } 45 | 46 | /// Exception class thrown by failed operations. 47 | class Exception 48 | : public std::exception 49 | { 50 | public: 51 | Exception(const std::string& msg) : _msg(msg) {} 52 | const char * what() const noexcept { return _msg.c_str(); } 53 | private: 54 | std::string _msg; 55 | }; // class Exception 56 | 57 | namespace detail 58 | { 59 | 60 | struct static_method_holder 61 | { 62 | static std::string mode_to_string(std::ios_base::openmode mode) 63 | { 64 | static const int n_modes = 6; 65 | static const std::ios_base::openmode mode_val_v[n_modes] = 66 | { 67 | std::ios_base::in, 68 | std::ios_base::out, 69 | std::ios_base::app, 70 | std::ios_base::ate, 71 | std::ios_base::trunc, 72 | std::ios_base::binary 73 | }; 74 | 75 | static const char * mode_name_v[n_modes] = 76 | { 77 | "in", 78 | "out", 79 | "app", 80 | "ate", 81 | "trunc", 82 | "binary" 83 | }; 84 | std::string res; 85 | for (int i = 0; i < n_modes; ++i) 86 | { 87 | if (mode & mode_val_v[i]) 88 | { 89 | res += (! res.empty()? "|" : ""); 90 | res += mode_name_v[i]; 91 | } 92 | } 93 | if (res.empty()) res = "none"; 94 | return res; 95 | } 96 | static void check_mode(const std::string& filename, std::ios_base::openmode mode) 97 | { 98 | if ((mode & std::ios_base::trunc) && ! (mode & std::ios_base::out)) 99 | { 100 | throw Exception(std::string("strict_fstream: open('") + filename + "'): mode error: trunc and not out"); 101 | } 102 | else if ((mode & std::ios_base::app) && ! (mode & std::ios_base::out)) 103 | { 104 | throw Exception(std::string("strict_fstream: open('") + filename + "'): mode error: app and not out"); 105 | } 106 | else if ((mode & std::ios_base::trunc) && (mode & std::ios_base::app)) 107 | { 108 | throw Exception(std::string("strict_fstream: open('") + filename + "'): mode error: trunc and app"); 109 | } 110 | } 111 | static void check_open(std::ios * s_p, const std::string& filename, std::ios_base::openmode mode) 112 | { 113 | if (s_p->fail()) 114 | { 115 | throw Exception(std::string("strict_fstream: open('") 116 | + filename + "'," + mode_to_string(mode) + "): open failed: " 117 | + strerror()); 118 | } 119 | } 120 | static void check_peek(std::istream * is_p, const std::string& filename, std::ios_base::openmode mode) 121 | { 122 | bool peek_failed = true; 123 | try 124 | { 125 | is_p->peek(); 126 | peek_failed = is_p->fail(); 127 | } 128 | catch (std::ios_base::failure e) {} 129 | if (peek_failed) 130 | { 131 | throw Exception(std::string("strict_fstream: open('") 132 | + filename + "'," + mode_to_string(mode) + "): peek failed: " 133 | + strerror()); 134 | } 135 | is_p->clear(); 136 | } 137 | }; // struct static_method_holder 138 | 139 | } // namespace detail 140 | 141 | class ifstream 142 | : public std::ifstream 143 | { 144 | public: 145 | ifstream() = default; 146 | ifstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in) 147 | { 148 | open(filename, mode); 149 | } 150 | void open(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in) 151 | { 152 | mode |= std::ios_base::in; 153 | exceptions(std::ios_base::badbit); 154 | detail::static_method_holder::check_mode(filename, mode); 155 | std::ifstream::open(filename, mode); 156 | detail::static_method_holder::check_open(this, filename, mode); 157 | detail::static_method_holder::check_peek(this, filename, mode); 158 | } 159 | }; // class ifstream 160 | 161 | class ofstream 162 | : public std::ofstream 163 | { 164 | public: 165 | ofstream() = default; 166 | ofstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::out) 167 | { 168 | open(filename, mode); 169 | } 170 | void open(const std::string& filename, std::ios_base::openmode mode = std::ios_base::out) 171 | { 172 | mode |= std::ios_base::out; 173 | exceptions(std::ios_base::badbit); 174 | detail::static_method_holder::check_mode(filename, mode); 175 | std::ofstream::open(filename, mode); 176 | detail::static_method_holder::check_open(this, filename, mode); 177 | } 178 | }; // class ofstream 179 | 180 | class fstream 181 | : public std::fstream 182 | { 183 | public: 184 | fstream() = default; 185 | fstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in) 186 | { 187 | open(filename, mode); 188 | } 189 | void open(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in) 190 | { 191 | if (! (mode & std::ios_base::out)) mode |= std::ios_base::in; 192 | exceptions(std::ios_base::badbit); 193 | detail::static_method_holder::check_mode(filename, mode); 194 | std::fstream::open(filename, mode); 195 | detail::static_method_holder::check_open(this, filename, mode); 196 | detail::static_method_holder::check_peek(this, filename, mode); 197 | } 198 | }; // class fstream 199 | 200 | } // namespace strict_fstream 201 | 202 | #endif 203 | -------------------------------------------------------------------------------- /include/zstr.hpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------- 2 | // Copyright 2015 Ontario Institute for Cancer Research 3 | // Written by Matei David (matei@cs.toronto.edu) 4 | //--------------------------------------------------------- 5 | 6 | // Reference: 7 | // http://stackoverflow.com/questions/14086417/how-to-write-custom-input-stream-in-c 8 | 9 | #ifndef __ZSTR_HPP 10 | #define __ZSTR_HPP 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "strict_fstream.hpp" 17 | 18 | namespace zstr 19 | { 20 | 21 | /// Exception class thrown by failed zlib operations. 22 | class Exception 23 | : public std::exception 24 | { 25 | public: 26 | Exception(z_stream * zstrm_p, int ret) 27 | : _msg("zlib: ") 28 | { 29 | switch (ret) 30 | { 31 | case Z_STREAM_ERROR: 32 | _msg += "Z_STREAM_ERROR: "; 33 | break; 34 | case Z_DATA_ERROR: 35 | _msg += "Z_DATA_ERROR: "; 36 | break; 37 | case Z_MEM_ERROR: 38 | _msg += "Z_MEM_ERROR: "; 39 | break; 40 | case Z_VERSION_ERROR: 41 | _msg += "Z_VERSION_ERROR: "; 42 | break; 43 | case Z_BUF_ERROR: 44 | _msg += "Z_BUF_ERROR: "; 45 | break; 46 | default: 47 | std::ostringstream oss; 48 | oss << ret; 49 | _msg += "[" + oss.str() + "]: "; 50 | break; 51 | } 52 | _msg += zstrm_p->msg; 53 | } 54 | Exception(const std::string msg) : _msg(msg) {} 55 | const char * what() const noexcept { return _msg.c_str(); } 56 | private: 57 | std::string _msg; 58 | }; // class Exception 59 | 60 | namespace detail 61 | { 62 | 63 | class z_stream_wrapper 64 | : public z_stream 65 | { 66 | public: 67 | z_stream_wrapper(bool _is_input = true, int _level = Z_DEFAULT_COMPRESSION) 68 | : is_input(_is_input) 69 | { 70 | this->zalloc = Z_NULL; 71 | this->zfree = Z_NULL; 72 | this->opaque = Z_NULL; 73 | int ret; 74 | if (is_input) 75 | { 76 | this->avail_in = 0; 77 | this->next_in = Z_NULL; 78 | ret = inflateInit2(this, 15+32); 79 | } 80 | else 81 | { 82 | ret = deflateInit2(this, _level, Z_DEFLATED, 15+16, 8, Z_DEFAULT_STRATEGY); 83 | } 84 | if (ret != Z_OK) throw Exception(this, ret); 85 | } 86 | ~z_stream_wrapper() 87 | { 88 | if (is_input) 89 | { 90 | inflateEnd(this); 91 | } 92 | else 93 | { 94 | deflateEnd(this); 95 | } 96 | } 97 | private: 98 | bool is_input; 99 | }; // class z_stream_wrapper 100 | 101 | } // namespace detail 102 | 103 | class istreambuf 104 | : public std::streambuf 105 | { 106 | public: 107 | istreambuf(std::streambuf * _sbuf_p, 108 | std::size_t _buff_size = default_buff_size, bool _auto_detect = true) 109 | : sbuf_p(_sbuf_p), 110 | zstrm_p(nullptr), 111 | buff_size(_buff_size), 112 | auto_detect(_auto_detect), 113 | auto_detect_run(false), 114 | is_text(false) 115 | { 116 | assert(sbuf_p); 117 | in_buff = new char [buff_size]; 118 | in_buff_start = in_buff; 119 | in_buff_end = in_buff; 120 | out_buff = new char [buff_size]; 121 | setg(out_buff, out_buff, out_buff); 122 | } 123 | 124 | istreambuf(const istreambuf &) = delete; 125 | istreambuf(istreambuf &&) = default; 126 | istreambuf & operator = (const istreambuf &) = delete; 127 | istreambuf & operator = (istreambuf &&) = default; 128 | 129 | virtual ~istreambuf() 130 | { 131 | delete [] in_buff; 132 | delete [] out_buff; 133 | if (zstrm_p) delete zstrm_p; 134 | } 135 | 136 | virtual std::streambuf::int_type underflow() 137 | { 138 | if (this->gptr() == this->egptr()) 139 | { 140 | // pointers for free region in output buffer 141 | char * out_buff_free_start = out_buff; 142 | do 143 | { 144 | // read more input if none available 145 | if (in_buff_start == in_buff_end) 146 | { 147 | // empty input buffer: refill from the start 148 | in_buff_start = in_buff; 149 | std::streamsize sz = sbuf_p->sgetn(in_buff, buff_size); 150 | in_buff_end = in_buff + sz; 151 | if (in_buff_end == in_buff_start) break; // end of input 152 | } 153 | // auto detect if the stream contains text or deflate data 154 | if (auto_detect && ! auto_detect_run) 155 | { 156 | auto_detect_run = true; 157 | unsigned char b0 = *reinterpret_cast< unsigned char * >(in_buff_start); 158 | unsigned char b1 = *reinterpret_cast< unsigned char * >(in_buff_start + 1); 159 | // Ref: 160 | // http://en.wikipedia.org/wiki/Gzip 161 | // http://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like 162 | is_text = ! (in_buff_start + 2 <= in_buff_end 163 | && ((b0 == 0x1F && b1 == 0x8B) // gzip header 164 | || (b0 == 0x78 && (b1 == 0x01 // zlib header 165 | || b1 == 0x9C 166 | || b1 == 0xDA)))); 167 | } 168 | if (is_text) 169 | { 170 | // simply swap in_buff and out_buff, and adjust pointers 171 | assert(in_buff_start == in_buff); 172 | std::swap(in_buff, out_buff); 173 | out_buff_free_start = in_buff_end; 174 | in_buff_start = in_buff; 175 | in_buff_end = in_buff; 176 | } 177 | else 178 | { 179 | // run inflate() on input 180 | if (! zstrm_p) zstrm_p = new detail::z_stream_wrapper(true); 181 | zstrm_p->next_in = reinterpret_cast< decltype(zstrm_p->next_in) >(in_buff_start); 182 | zstrm_p->avail_in = in_buff_end - in_buff_start; 183 | zstrm_p->next_out = reinterpret_cast< decltype(zstrm_p->next_out) >(out_buff_free_start); 184 | zstrm_p->avail_out = (out_buff + buff_size) - out_buff_free_start; 185 | int ret = inflate(zstrm_p, Z_NO_FLUSH); 186 | // process return code 187 | if (ret != Z_OK && ret != Z_STREAM_END) throw Exception(zstrm_p, ret); 188 | // update in&out pointers following inflate() 189 | in_buff_start = reinterpret_cast< decltype(in_buff_start) >(zstrm_p->next_in); 190 | in_buff_end = in_buff_start + zstrm_p->avail_in; 191 | out_buff_free_start = reinterpret_cast< decltype(out_buff_free_start) >(zstrm_p->next_out); 192 | assert(out_buff_free_start + zstrm_p->avail_out == out_buff + buff_size); 193 | // if stream ended, deallocate inflator 194 | if (ret == Z_STREAM_END) 195 | { 196 | delete zstrm_p; 197 | zstrm_p = nullptr; 198 | } 199 | } 200 | } while (out_buff_free_start == out_buff); 201 | // 2 exit conditions: 202 | // - end of input: there might or might not be output available 203 | // - out_buff_free_start != out_buff: output available 204 | this->setg(out_buff, out_buff, out_buff_free_start); 205 | } 206 | return this->gptr() == this->egptr() 207 | ? traits_type::eof() 208 | : traits_type::to_int_type(*this->gptr()); 209 | } 210 | private: 211 | std::streambuf * sbuf_p; 212 | char * in_buff; 213 | char * in_buff_start; 214 | char * in_buff_end; 215 | char * out_buff; 216 | detail::z_stream_wrapper * zstrm_p; 217 | std::size_t buff_size; 218 | bool auto_detect; 219 | bool auto_detect_run; 220 | bool is_text; 221 | 222 | static const std::size_t default_buff_size = (std::size_t)1 << 20; 223 | }; // class istreambuf 224 | 225 | class ostreambuf 226 | : public std::streambuf 227 | { 228 | public: 229 | ostreambuf(std::streambuf * _sbuf_p, 230 | std::size_t _buff_size = default_buff_size, int _level = Z_DEFAULT_COMPRESSION) 231 | : sbuf_p(_sbuf_p), 232 | zstrm_p(new detail::z_stream_wrapper(false, _level)), 233 | buff_size(_buff_size) 234 | { 235 | assert(sbuf_p); 236 | in_buff = new char [buff_size]; 237 | out_buff = new char [buff_size]; 238 | setp(in_buff, in_buff + buff_size); 239 | } 240 | 241 | ostreambuf(const ostreambuf &) = delete; 242 | ostreambuf(ostreambuf &&) = default; 243 | ostreambuf & operator = (const ostreambuf &) = delete; 244 | ostreambuf & operator = (ostreambuf &&) = default; 245 | 246 | int deflate_loop(int flush) 247 | { 248 | while (true) 249 | { 250 | zstrm_p->next_out = reinterpret_cast< decltype(zstrm_p->next_out) >(out_buff); 251 | zstrm_p->avail_out = buff_size; 252 | int ret = deflate(zstrm_p, flush); 253 | if (ret != Z_OK && ret != Z_STREAM_END && ret != Z_BUF_ERROR) throw Exception(zstrm_p, ret); 254 | std::streamsize sz = sbuf_p->sputn(out_buff, reinterpret_cast< decltype(out_buff) >(zstrm_p->next_out) - out_buff); 255 | if (sz != reinterpret_cast< decltype(out_buff) >(zstrm_p->next_out) - out_buff) 256 | { 257 | // there was an error in the sink stream 258 | return -1; 259 | } 260 | if (ret == Z_STREAM_END || ret == Z_BUF_ERROR || sz == 0) 261 | { 262 | break; 263 | } 264 | } 265 | return 0; 266 | } 267 | 268 | virtual ~ostreambuf() 269 | { 270 | // flush the zlib stream 271 | // 272 | // NOTE: Errors here (sync() return value not 0) are ignored, because we 273 | // cannot throw in a destructor. This mirrors the behaviour of 274 | // std::basic_filebuf::~basic_filebuf(). To see an exception on error, 275 | // close the ofstream with an explicit call to close(), and do not rely 276 | // on the implicit call in the destructor. 277 | // 278 | sync(); 279 | delete [] in_buff; 280 | delete [] out_buff; 281 | delete zstrm_p; 282 | } 283 | virtual std::streambuf::int_type overflow(std::streambuf::int_type c = traits_type::eof()) 284 | { 285 | zstrm_p->next_in = reinterpret_cast< decltype(zstrm_p->next_in) >(pbase()); 286 | zstrm_p->avail_in = pptr() - pbase(); 287 | while (zstrm_p->avail_in > 0) 288 | { 289 | int r = deflate_loop(Z_NO_FLUSH); 290 | if (r != 0) 291 | { 292 | setp(nullptr, nullptr); 293 | return traits_type::eof(); 294 | } 295 | } 296 | setp(in_buff, in_buff + buff_size); 297 | return traits_type::eq_int_type(c, traits_type::eof()) ? traits_type::eof() : sputc(c); 298 | } 299 | virtual int sync() 300 | { 301 | // first, call overflow to clear in_buff 302 | overflow(); 303 | if (! pptr()) return -1; 304 | // then, call deflate asking to finish the zlib stream 305 | zstrm_p->next_in = nullptr; 306 | zstrm_p->avail_in = 0; 307 | if (deflate_loop(Z_FINISH) != 0) return -1; 308 | deflateReset(zstrm_p); 309 | return 0; 310 | } 311 | private: 312 | std::streambuf * sbuf_p; 313 | char * in_buff; 314 | char * out_buff; 315 | detail::z_stream_wrapper * zstrm_p; 316 | std::size_t buff_size; 317 | 318 | static const std::size_t default_buff_size = (std::size_t)1 << 20; 319 | }; // class ostreambuf 320 | 321 | class istream 322 | : public std::istream 323 | { 324 | public: 325 | istream(std::istream & is) 326 | : std::istream(new istreambuf(is.rdbuf())) 327 | { 328 | exceptions(std::ios_base::badbit); 329 | } 330 | explicit istream(std::streambuf * sbuf_p) 331 | : std::istream(new istreambuf(sbuf_p)) 332 | { 333 | exceptions(std::ios_base::badbit); 334 | } 335 | virtual ~istream() 336 | { 337 | delete rdbuf(); 338 | } 339 | }; // class istream 340 | 341 | class ostream 342 | : public std::ostream 343 | { 344 | public: 345 | ostream(std::ostream & os) 346 | : std::ostream(new ostreambuf(os.rdbuf())) 347 | { 348 | exceptions(std::ios_base::badbit); 349 | } 350 | explicit ostream(std::streambuf * sbuf_p) 351 | : std::ostream(new ostreambuf(sbuf_p)) 352 | { 353 | exceptions(std::ios_base::badbit); 354 | } 355 | virtual ~ostream() 356 | { 357 | delete rdbuf(); 358 | } 359 | }; // class ostream 360 | 361 | namespace detail 362 | { 363 | 364 | template < typename FStream_Type > 365 | struct strict_fstream_holder 366 | { 367 | strict_fstream_holder(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in) 368 | : _fs(filename, mode) 369 | {} 370 | FStream_Type _fs; 371 | }; // class strict_fstream_holder 372 | 373 | } // namespace detail 374 | 375 | class ifstream 376 | : private detail::strict_fstream_holder< strict_fstream::ifstream >, 377 | public std::istream 378 | { 379 | public: 380 | explicit ifstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::in) 381 | : detail::strict_fstream_holder< strict_fstream::ifstream >(filename, mode), 382 | std::istream(new istreambuf(_fs.rdbuf())) 383 | { 384 | exceptions(std::ios_base::badbit); 385 | } 386 | virtual ~ifstream() 387 | { 388 | if (rdbuf()) delete rdbuf(); 389 | } 390 | }; // class ifstream 391 | 392 | class ofstream 393 | : private detail::strict_fstream_holder< strict_fstream::ofstream >, 394 | public std::ostream 395 | { 396 | public: 397 | explicit ofstream(const std::string& filename, std::ios_base::openmode mode = std::ios_base::out) 398 | : detail::strict_fstream_holder< strict_fstream::ofstream >(filename, mode | std::ios_base::binary), 399 | std::ostream(new ostreambuf(_fs.rdbuf())) 400 | { 401 | exceptions(std::ios_base::badbit); 402 | } 403 | virtual ~ofstream() 404 | { 405 | if (rdbuf()) delete rdbuf(); 406 | } 407 | }; // class ofstream 408 | 409 | } // namespace zstr 410 | 411 | #endif 412 | -------------------------------------------------------------------------------- /sample_data/unmapped_random.txt: -------------------------------------------------------------------------------- 1 | 427:NM_174914:1157:172 u 2 | 598:NM_022658:2023:160 m2 3 | 677:NM_006897:124:180 m2 4 | 743:NM_022658:1835:181 m12 5 | 746:NM_022658:1797:202 m12 6 | 838:NM_014620:1423:191 u 7 | 915:NM_022658:663:169 m2 8 | 1073:NM_174914:1473:166 m2 9 | 1174:NM_022658:131:188 m1 10 | 1175:NM_022658:1352:146 m1 11 | 1312:NM_173860:137:166 m1 12 | 1317:NM_174914:458:172 m1 13 | 1383:NM_173860:564:179 m12 14 | 2019:NM_022658:441:179 m12 15 | 2040:NM_004503:1107:176 m1 16 | 2097:NM_174914:1998:190 m2 17 | 2214:NM_014620:1923:176 u 18 | 2283:NM_018953:635:234 m12 19 | 2470:NM_173860:55:175 m12 20 | 2555:NM_022658:34:186 u 21 | 2558:NM_173860:298:181 m12 22 | 2593:NM_022658:1517:170 m1 23 | 2861:NM_014212:974:167 m2 24 | 2983:NM_001168316:1704:182 u 25 | 2996:NM_174914:1508:171 m2 26 | 3033:NM_022658:83:163 u 27 | 3061:NM_022658:1217:177 m2 28 | 3175:NM_022658:1577:153 u 29 | 3237:NM_022658:2078:170 m12 30 | 3283:NM_022658:277:190 u 31 | 3667:NM_006897:704:141 m1 32 | 3738:NM_153693:24:196 m1 33 | 3775:NM_174914:0:140 m2 34 | 4041:NM_022658:1881:191 m12 35 | 4044:NM_153633:688:172 u 36 | 4062:NM_022658:1628:187 m12 37 | 4064:NM_022658:53:178 u 38 | 4076:NM_022658:55:194 m2 39 | 4203:NM_174914:1526:204 m2 40 | 4206:NM_022658:799:189 m2 41 | 4255:NM_022658:337:85 m1 42 | 4286:NM_022658:1097:160 m2 43 | 4319:NM_006897:67:200 u 44 | 4468:NM_174914:245:204 m1 45 | 4636:NM_174914:791:175 m1 46 | 4812:NM_022658:1991:179 m2 47 | 4832:NM_022658:725:200 m12 48 | 4897:NM_174914:242:180 m2 49 | 5036:NM_001168316:935:145 m12 50 | 5067:NM_022658:247:158 m12 51 | 5087:NM_174914:2101:188 m12 52 | 5157:NM_153633:1112:178 m1 53 | 5162:NM_004503:1032:93 m2 54 | 5175:NM_174914:2064:176 m1 55 | 5267:NM_022658:674:217 u 56 | 5326:NM_022658:847:182 m1 57 | 5426:NM_022658:1029:200 m1 58 | 5917:NM_174914:1002:189 u 59 | 5980:NM_173860:388:173 m12 60 | 5981:NM_173860:637:183 m12 61 | 5987:NM_022658:1746:176 m12 62 | 6253:NM_022658:246:169 m2 63 | 6721:NM_006897:202:169 m2 64 | 6825:NM_022658:848:209 m2 65 | 6877:NM_173860:512:174 u 66 | 7033:NM_022658:1839:174 u 67 | 7056:NM_022658:806:178 m2 68 | 7141:NM_173860:580:193 m1 69 | 7244:NM_022658:1097:132 m12 70 | 7464:NM_006897:1313:214 m12 71 | 7639:NM_022658:148:162 u 72 | 7650:NM_022658:2101:155 u 73 | 7665:NM_004503:597:172 m1 74 | 7728:NM_173860:585:196 m1 75 | 7734:NM_014620:1045:195 u 76 | 7851:NM_173860:496:196 m1 77 | 7875:NM_022658:1870:191 m12 78 | 7984:NM_022658:734:168 m1 79 | 8032:NM_006897:127:107 m12 80 | 8046:NM_022658:632:167 m12 81 | 8079:NM_022658:516:172 m1 82 | 8097:NM_022658:993:177 m12 83 | 8113:NM_022658:1715:185 u 84 | 8142:NM_022658:1848:172 m1 85 | 8319:NM_022658:1416:205 m12 86 | 8409:NM_022658:805:177 m12 87 | 8691:NM_022658:1923:183 u 88 | 8840:NM_174914:2182:164 m1 89 | 8857:NM_022658:763:176 m1 90 | 8880:NM_022658:1600:179 u 91 | 9122:NM_173860:256:168 m2 92 | 9173:NM_022658:1179:160 m2 93 | 9199:NM_018953:891:175 m1 94 | 9287:NM_022658:1162:218 m2 95 | 9442:NM_022658:1415:174 m12 96 | 9566:NM_022658:1849:166 m1 97 | 9590:NM_022658:1755:176 m1 98 | 9643:NM_174914:1227:171 m1 99 | 9646:NM_014620:794:189 u 100 | 9954:NM_022658:350:193 m12 101 | -------------------------------------------------------------------------------- /scripts/check_shasum.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | exists() 4 | { 5 | command -v "$1" >/dev/null 2>&1 6 | } 7 | 8 | sum=$1 9 | fname=$2 10 | 11 | hashcheck="" 12 | if exists sha256sum; then 13 | hashcheck="sha256sum" 14 | elif exists shasum; then 15 | hashcheck="shasum -a256" 16 | else 17 | unset hashcheck 18 | fi 19 | 20 | if [ -z "${hashcheck-}" ]; then 21 | echo "Couldn't find shasum command; can't verify contents of ${fname}"; 22 | else 23 | echo "${sum} ${fname}" | ${hashcheck} -c - || { echo "${fname} did not match expected SHA256! Exiting."; exit 1; } 24 | fi 25 | 26 | 27 | -------------------------------------------------------------------------------- /scripts/generateDecoyTranscriptome.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Using getopt 3 | 4 | abort() 5 | { 6 | echo >&2 ' 7 | *************** 8 | *** ABORTED *** 9 | *************** 10 | ' 11 | echo "An error occurred. Exiting..." >&2 12 | exit 1 13 | } 14 | 15 | trap 'abort' 0 16 | 17 | set -e 18 | 19 | ############################################### 20 | ## It assumes awk, bedtools and mashmap is 21 | ## available. 22 | ## We have tested this script with 23 | ## awk 4.1.3, bedtools v2.28.0 and mashmap v2.0 24 | ## on an Ubuntu system. 25 | ############################################### 26 | 27 | threads=1 28 | awk="awk" 29 | bedtools="bedtools" 30 | mashmap="mashmap" 31 | 32 | # Argument Parsing 33 | print_usage_and_exit () { 34 | echo "Usage: $0 [-j =1 default] [-b =bedtools default] [-m =mashmap default] -a -g -t -o " 35 | exit 1 36 | } 37 | 38 | echo "****************" 39 | echo "*** getDecoy ***" 40 | echo "****************" 41 | while getopts ":a:b:o:j:h:g:t:m:" opt; do 42 | case $opt in 43 | b) 44 | bedtools=`realpath $OPTARG` 45 | echo "-b = $bedtools" 46 | ;; 47 | m) 48 | mashmap=`realpath $OPTARG` 49 | echo "-m = $mashmap" 50 | ;; 51 | a) 52 | gtffile=`realpath $OPTARG` 53 | echo "-a = $gtffile" 54 | ;; 55 | o) 56 | outfolder="$OPTARG" 57 | echo "-o = $outfolder" 58 | ;; 59 | j) 60 | threads="$OPTARG" 61 | echo "-j = $threads" 62 | ;; 63 | g) 64 | genomefile=`realpath $OPTARG` 65 | echo "-g = $genomefile" 66 | ;; 67 | t) 68 | txpfile=`realpath $OPTARG` 69 | echo "-t = $txpfile" 70 | ;; 71 | h) 72 | print_usage_and_exit 73 | ;; 74 | \?) 75 | echo "Invalid option: -$OPTARG" 76 | print_usage_and_exit 77 | ;; 78 | :) 79 | echo "Option -$OPTARG requires an argument." 80 | print_usage_and_exit 81 | ;; 82 | esac 83 | done 84 | 85 | # Required arguments 86 | if [ -z "$gtffile" -o -z "$outfolder" -o -z "$genomefile" -o -z "$txpfile" -o -z "$mashmap" -o -z "$awk" -o -z "$bedtools" -o -z "$threads" ] 87 | then 88 | echo "Error: missing required argument(s)" 89 | print_usage_and_exit 90 | fi 91 | 92 | mkdir -p $outfolder 93 | cd $outfolder 94 | 95 | # extracting all the exonic features to mask 96 | echo "[1/10] Extracting exonic features from the gtf" 97 | $awk -v OFS='\t' '{if ($3=="exon") {print $1,$4,$5}}' $gtffile > exons.bed 98 | 99 | # masking the exonic regions from the genome 100 | echo "[2/10] Masking the genome fasta" 101 | $bedtools maskfasta -fi $genomefile -bed exons.bed -fo reference.masked.genome.fa 102 | 103 | # aligning the transcriptome to the masked genome 104 | echo "[3/10] Aligning transcriptome to genome" 105 | $mashmap -r reference.masked.genome.fa -q $txpfile -t $threads --pi 80 -s 500 106 | 107 | # extracting the bed files from the reported alignment 108 | echo "[4/10] Extracting intervals from mashmap alignments" 109 | $awk -v OFS='\t' '{print $6,$8,$9}' mashmap.out | sort -k1,1 -k2,2n - > genome_found.sorted.bed 110 | 111 | # merging the reported intervals 112 | echo "[5/10] Merging the intervals" 113 | $bedtools merge -i genome_found.sorted.bed > genome_found_merged.bed 114 | 115 | # extracting relevant sequence from the genome 116 | echo "[6/10] Extracting sequences from the genome" 117 | $bedtools getfasta -fi reference.masked.genome.fa -bed genome_found_merged.bed -fo genome_found.fa 118 | 119 | # concatenating the sequence at per chromsome level to extract decoy sequences 120 | echo "[7/10] Concatenating to get decoy sequences" 121 | $awk '{a=$0; getline;split(a, b, ":"); r[b[1]] = r[b[1]]""$0} END { for (k in r) { print k"\n"r[k] } }' genome_found.fa > decoy.fa 122 | 123 | # concatenating decoys to transcriptome 124 | echo "[8/10] Making gentrome" 125 | cat $txpfile decoy.fa > gentrome.fa 126 | 127 | # extracting the names of the decoys 128 | echo "[9/10] Extracting decoy sequence ids" 129 | grep ">" decoy.fa | $awk '{print substr($1,2); }' > decoys.txt 130 | 131 | # removing extra files 132 | echo "[10/10] Removing temporary files" 133 | rm exons.bed reference.masked.genome.fa mashmap.out genome_found.sorted.bed genome_found_merged.bed genome_found.fa decoy.fa reference.masked.genome.fa.fai 134 | 135 | trap : 0 136 | echo >&2 ' 137 | ********************************************** 138 | *** DONE Processing ... 139 | *** You can use files `$outfolder/gentrome.fa` 140 | *** and $outfolder/decoys.txt` with 141 | *** `salmon index` 142 | ********************************************** 143 | ' 144 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories( 2 | ${GAT_SOURCE_DIR}/include 3 | ${GAT_SOURCE_DIR}/external/install/include 4 | ) 5 | 6 | set ( SALMON_TOOLS_MAIN_SRCS 7 | SalmonTools.cpp 8 | ) 9 | 10 | set (SALMON_TOOLS_LIB_SRCS 11 | FastxParser.cpp 12 | ExtractUnmapped.cpp 13 | ) 14 | 15 | #set ( UNIT_TESTS_SRCS 16 | # ${GAT_SOURCE_DIR}/tests/UnitTests.cpp 17 | # FragmentLengthDistribution.cpp 18 | #) 19 | 20 | 21 | link_directories( 22 | ${GAT_SOURCE_DIR}/lib 23 | ${GAT_SOURCE_DIR}/external/install/lib 24 | ) 25 | 26 | set(PTHREAD_LIB "") 27 | # Set the RPATH 28 | if (NOT APPLE) 29 | set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib:$ORIGIN/../../lib:$ORIGIN/:$ORIGIN/../../external/install/lib") 30 | set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) 31 | else() 32 | # use, i.e. don't skip the full RPATH for the build tree 33 | set(CMAKE_SKIP_BUILD_RPATH FALSE) 34 | 35 | # when building, don't use the install RPATH already 36 | # (but later on when installing) 37 | set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) 38 | 39 | # the RPATH to be used when installing 40 | set(CMAKE_INSTALL_RPATH "") 41 | 42 | # don't add the automatically determined parts of the RPATH 43 | # which point to directories outside the build tree to the install RPATH 44 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) 45 | endif() 46 | 47 | # Build the SalmonTools library 48 | add_library(salmon_tools_core STATIC ${SALMON_TOOLS_LIB_SRCS} ) 49 | 50 | # Build the salmon executable 51 | add_executable(salmontools ${SALMON_TOOLS_MAIN_SRCS}) 52 | 53 | #add_executable(unitTests ${UNIT_TESTS_SRCS}) 54 | 55 | # Link the executable 56 | target_link_libraries(salmontools 57 | salmon_tools_core 58 | #gff 59 | ${PTHREAD_LIB} 60 | ${ZLIB_LIBRARY} 61 | m 62 | ${NON_APPLECLANG_LIBS} 63 | ${LIBRT} 64 | ) 65 | 66 | #add_dependencies(salmontools libcereal) 67 | #add_dependencies(salmon unitTests) 68 | 69 | ##### ====================================== 70 | IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 71 | SET(CMAKE_INSTALL_PREFIX 72 | "${GAT_SOURCE_DIR}" CACHE PATH "Default install prefix" FORCE 73 | ) 74 | ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) 75 | 76 | set(INSTALL_LIB_DIR lib ) 77 | set(INSTALL_BIN_DIR bin ) 78 | set(INSTALL_INCLUDE_DIR include ) 79 | 80 | install(TARGETS salmontools 81 | RUNTIME DESTINATION bin 82 | LIBRARY DESTINATION lib 83 | ARCHIVE DESTINATION lib 84 | ) 85 | 86 | #add_custom_command(TARGET unitTests POST_BUILD 87 | # COMMAND ${CMAKE_COMMAND} -E copy $ ${GAT_SOURCE_DIR}/tests/$ 88 | # COMMENT "Copying unitTests" 89 | #) 90 | #set(POST_INSTALL_SCRIPT ${GAT_SOURCE_DIR}/cmake/PostInstall.cmake) 91 | #install( 92 | # CODE 93 | # " 94 | # execute_process(COMMAND \"${CMAKE_COMMAND}\" 95 | # -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} 96 | # -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} 97 | # -P \"${POST_INSTALL_SCRIPT}\") 98 | # " 99 | #) 100 | 101 | #include(InstallRequiredSystemLibraries) 102 | #add_test( NAME unit_tests COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${CMAKE_INSTALL_PREFIX} -P ${GAT_SOURCE_DIR}/cmake/UnitTests.cmake ) 103 | #add_test( NAME salmon_read_test_fmd COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SOURCE_DIR} -P ${GAT_SOURCE_DIR}/cmake/TestSalmonFMD.cmake ) 104 | #add_test( NAME salmon_read_test_quasi COMMAND ${CMAKE_COMMAND} -DTOPLEVEL_DIR=${GAT_SOURCE_DIR} -P ${GAT_SOURCE_DIR}/cmake/TestSalmonQuasi.cmake ) 105 | -------------------------------------------------------------------------------- /src/ExtractUnmapped.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "FastxParser.hpp" 8 | #include "sparsepp/spp.h" 9 | #include "args.hpp" 10 | #include "zstr.hpp" 11 | 12 | //enum class MappedState : uint8_t {UNMAPPED, LEFT_MAPPED, RIGHT_MAPPED, BOTH_MAPPED}; 13 | //inline std::ostream& operator<<(std::ostream & os, MappedState & ms) { 14 | // if (ms == MappedState::UNMAPPED) 15 | //} 16 | 17 | 18 | std::string SplitFilename (const std::string& str) 19 | { 20 | size_t found; 21 | //cout << "Splitting: " << str << endl; 22 | found=str.find_last_of("/\\"); 23 | return str.substr(0, found) ; 24 | //cout << " folder: " << str.substr(0,found) << endl; 25 | //cout << " file: " << str.substr(found+1) << endl; 26 | } 27 | 28 | 29 | 30 | // from http://stackoverflow.com/questions/9435385/split-a-string-using-c11 31 | std::vector tokenize(const std::string &s, char delim) { 32 | std::stringstream ss(s); 33 | std::string item; 34 | std::vector elems; 35 | while (std::getline(ss, item, delim)) { 36 | elems.push_back(item); 37 | } 38 | return elems; 39 | } 40 | 41 | // Taken from 42 | // http://stackoverflow.com/questions/12774207/fastest-way-to-check-if-a-file-exist-using-standard-c-c11-c 43 | bool DirExists(const char* path) { 44 | struct stat fileStat; 45 | if (stat(path, &fileStat)) { 46 | return false; 47 | } 48 | if (!S_ISDIR(fileStat.st_mode)) { 49 | return false; 50 | } 51 | return true; 52 | } 53 | 54 | void MakeDir(const char* path) { mkdir(path, ACCESSPERMS); } 55 | 56 | void ExtractUnmapped(const std::string &progname, std::vector::const_iterator beginargs, std::vector::const_iterator endargs) { 57 | args::ArgumentParser parser(""); 58 | parser.Prog(progname + " extract-unmapped"); 59 | args::HelpFlag help(parser, "help", "Display this help menu", {'h', "help"}); 60 | args::ValueFlag unmappedFile(parser, "unmapped-file", "salmon file of unmapped names", {'u', "unmapped"}); 61 | args::ValueFlag leftReads(parser, "left-reads", "left read files", {'1', "left"}); 62 | args::ValueFlag rightReads(parser, "right-reads", "right read files", {'2', "right"}); 63 | args::ValueFlag singleReads(parser, "single-reads", "single read files", {'r', "single"}); 64 | args::ValueFlag outFile(parser, "output-base", "the base name of the output file", {'o', "outbase"}); 65 | args::Flag quiet(parser, "quiet", "be quiet", {'q', "quiet"}); 66 | 67 | spp::sparse_hash_map targetReads; 68 | 69 | //args::Positional directory(parser, "directory", "The directory to create in", "."); 70 | try 71 | { 72 | parser.ParseArgs(beginargs, endargs); 73 | std::cerr << std::boolalpha; 74 | if (!unmappedFile) { 75 | std::cerr << "you must provide an argument for [-u/--unmapped]\n"; 76 | std::exit(1); 77 | } 78 | 79 | if (!(singleReads or (leftReads and rightReads))) { 80 | std::cerr << bool{singleReads} << ", " << bool{leftReads} << ", " << bool{rightReads} << '\n'; 81 | std::cerr << "you must provide an argument for either [-r/--single], or both [-1/--left] and [-2/--right]\n"; 82 | std::exit(1); 83 | } 84 | if (singleReads and leftReads and rightReads) { 85 | std::cerr << "you can not provide unpaired [-r/--single] and paired-end [-1/--left], [-2/--right] reads at the same time\n"; 86 | std::exit(1); 87 | } 88 | if (!outFile) { 89 | std::cerr << "you must provide an output basename for where to write the unmapped reads\n"; 90 | std::exit(1); 91 | } 92 | 93 | auto fname = args::get(unmappedFile); 94 | std::ifstream uf(fname); 95 | if (!uf.good()) { 96 | uf.close(); 97 | std::cerr << "could not open the file [" << fname << "]\n"; 98 | std::exit(1); 99 | } 100 | std::string readName, readState; 101 | while (uf >> readName >> readState) { 102 | /* 103 | MappedState ms{MappedState::UNMAPPED}; 104 | if (readState == "u") { 105 | ms = MappedState::UNMAPPED; 106 | } else if (readState == "m1") { 107 | ms = MappedState::LEFT_MAPPED; 108 | } else if (readState == "m2") { 109 | ms = MappedState::RIGHT_MAPPED; 110 | } else if (readState == "m12") { 111 | ms = MappedState::BOTH_MAPPED; 112 | } else { 113 | std::cerr << "should not happen\n"; 114 | } 115 | targetReads[readName] = ms; 116 | */ 117 | targetReads[readName] = readState; 118 | } 119 | uf.close(); 120 | std::cerr << "There were " << targetReads.size() << " unmapped reads\n"; 121 | 122 | 123 | auto outName = args::get(outFile); 124 | auto folderName = SplitFilename(outName) ; 125 | 126 | if(!DirExists(folderName.c_str())){ 127 | MakeDir(folderName.c_str()) ; 128 | } 129 | 130 | if (singleReads) { 131 | 132 | //std::ofstream out(outName + ".fa.gz"); 133 | std::unique_ptr out = 134 | std::unique_ptr (new zstr::ofstream(outName+".fa.gz")) ; 135 | 136 | 137 | 138 | auto readNames = args::get(singleReads); 139 | std::vector files = tokenize(readNames, ','); 140 | 141 | fastx_parser::FastxParser parser(files, 1, 1); 142 | parser.start(); 143 | // Get the read group by which this thread will 144 | auto rg = parser.getReadGroup(); 145 | 146 | while (parser.refill(rg)) { 147 | // Here, rg will contain a chunk of read pairs we can process. 148 | for (auto& r : rg) { 149 | auto targetIt = targetReads.find(r.name); 150 | std::stringstream ss ; 151 | //std::string buff(">"+r.name+" "+targetIt->second+"\n"+r.seq+"\n") ; 152 | if (targetIt != targetReads.end()) { 153 | ss << '>' << r.name << ' ' << targetIt->second << '\n' << r.seq << '\n'; 154 | } 155 | std::string buf = ss.str(); 156 | const char *op = buf.c_str() ; 157 | std::streamsize toCopy = buf.size() ; 158 | out->write(op,toCopy) ; 159 | } 160 | } 161 | //out.close(); 162 | } else { 163 | //std::ofstream outLeft(outName + "_1.fa.gz"); 164 | //std::ofstream outRight(outName + "_2.fa.gz"); 165 | std::unique_ptr outLeft = 166 | std::unique_ptr (new zstr::ofstream(outName+"_1.fa.gz")) ; 167 | std::unique_ptr outRight = 168 | std::unique_ptr (new zstr::ofstream(outName+"_2.fa.gz")) ; 169 | 170 | 171 | 172 | auto readNamesLeft = args::get(leftReads); 173 | auto readNamesRight= args::get(rightReads); 174 | std::vector files1 = tokenize(readNamesLeft, ','); 175 | std::vector files2 = tokenize(readNamesRight, ','); 176 | fastx_parser::FastxParser parser(files1, files2, 1, 1); 177 | parser.start(); 178 | 179 | // Get the read group by which this thread will 180 | auto rg = parser.getReadGroup(); 181 | 182 | while (parser.refill(rg)) { 183 | // Here, rg will contain a chunk of read pairs we can process. 184 | for (auto& rp : rg) { 185 | auto& r1 = rp.first; 186 | auto& r2 = rp.second; 187 | auto targetIt = targetReads.find(r1.name); 188 | std::stringstream ssLeft ; 189 | std::stringstream ssRight ; 190 | if (targetIt != targetReads.end()) { 191 | ssLeft << '>' << rp.first.name << ' ' << targetIt->second << '\n' << rp.first.seq << '\n'; 192 | ssRight << '>' << rp.second.name << ' ' << targetIt->second << '\n' << rp.second.seq << '\n'; 193 | } 194 | 195 | std::string bufLeft = ssLeft.str(); 196 | std::string bufRight = ssRight.str(); 197 | const char *opLeft = bufLeft.c_str() ; 198 | const char *opRight = bufRight.c_str() ; 199 | std::streamsize toCopyLeft = bufLeft.size() ; 200 | std::streamsize toCopyRight = bufRight.size() ; 201 | outLeft->write(opLeft, toCopyLeft) ; 202 | outRight->write(opRight, toCopyRight) ; 203 | 204 | } 205 | } 206 | //outLeft.close(); 207 | //outRight.close(); 208 | } 209 | } 210 | catch (args::Help) 211 | { 212 | std::cout << parser; 213 | return; 214 | } 215 | catch (args::ParseError e) 216 | { 217 | std::cerr << e.what() << std::endl; 218 | std::cerr << parser; 219 | return; 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /src/FastxParser.cpp: -------------------------------------------------------------------------------- 1 | #include "FastxParser.hpp" 2 | 3 | #include "fcntl.h" 4 | #include "unistd.h" 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | // STEP 1: declare the type of file handler and the read() function 15 | KSEQ_INIT(gzFile, gzread) 16 | 17 | namespace fastx_parser { 18 | template 19 | FastxParser::FastxParser(std::vector files, 20 | uint32_t numConsumers, uint32_t numParsers, 21 | uint32_t chunkSize) 22 | : FastxParser(files, {}, numConsumers, numParsers, chunkSize) {} 23 | 24 | template 25 | FastxParser::FastxParser(std::vector files, 26 | std::vector files2, 27 | uint32_t numConsumers, uint32_t numParsers, 28 | uint32_t chunkSize) 29 | : inputStreams_(files), inputStreams2_(files2), numParsing_(0), 30 | blockSize_(chunkSize) { 31 | 32 | if (numParsers > files.size()) { 33 | std::cerr << "Can't make user of more parsing threads than file (pairs); " 34 | "setting # of parsing threads to " 35 | << files.size(); 36 | numParsers = files.size(); 37 | } 38 | numParsers_ = numParsers; 39 | 40 | // nobody is parsing yet 41 | numParsing_ = 0; 42 | 43 | readQueue_ = moodycamel::ConcurrentQueue>>( 44 | 4 * numConsumers, numParsers, 0); 45 | 46 | seqContainerQueue_ = 47 | moodycamel::ConcurrentQueue>>( 48 | 4 * numConsumers, 1 + numConsumers, 0); 49 | 50 | workQueue_ = moodycamel::ConcurrentQueue(numParsers_); 51 | 52 | // push all file ids on the queue 53 | for (size_t i = 0; i < files.size(); ++i) { 54 | workQueue_.enqueue(i); 55 | } 56 | 57 | // every parsing thread gets a consumer token for the seqContainerQueue 58 | // and a producer token for the readQueue. 59 | for (size_t i = 0; i < numParsers_; ++i) { 60 | consumeContainers_.emplace_back( 61 | new moodycamel::ConsumerToken(seqContainerQueue_)); 62 | produceReads_.emplace_back(new moodycamel::ProducerToken(readQueue_)); 63 | } 64 | 65 | // enqueue the appropriate number of read chunks so that we can start 66 | // filling them once the parser has been started. 67 | moodycamel::ProducerToken produceContainer(seqContainerQueue_); 68 | for (size_t i = 0; i < 4 * numConsumers; ++i) { 69 | auto chunk = make_unique>(blockSize_); 70 | seqContainerQueue_.enqueue(produceContainer, std::move(chunk)); 71 | } 72 | } 73 | 74 | template ReadGroup FastxParser::getReadGroup() { 75 | return ReadGroup(getProducerToken_(), getConsumerToken_()); 76 | } 77 | 78 | template 79 | moodycamel::ProducerToken FastxParser::getProducerToken_() { 80 | return moodycamel::ProducerToken(seqContainerQueue_); 81 | } 82 | 83 | template 84 | moodycamel::ConsumerToken FastxParser::getConsumerToken_() { 85 | return moodycamel::ConsumerToken(readQueue_); 86 | } 87 | 88 | template FastxParser::~FastxParser() { 89 | for (auto& t : parsingThreads_) { 90 | t->join(); 91 | } 92 | } 93 | 94 | inline void copyRecord(kseq_t* seq, ReadSeq* s) { 95 | // Copy over the sequence and read name 96 | s->seq.assign(seq->seq.s, seq->seq.l); 97 | s->name.assign(seq->name.s, seq->name.l); 98 | } 99 | 100 | template 101 | void parseReads( 102 | std::vector& inputStreams, std::atomic& numParsing, 103 | moodycamel::ConsumerToken* cCont, moodycamel::ProducerToken* pRead, 104 | moodycamel::ConcurrentQueue& workQueue, 105 | moodycamel::ConcurrentQueue>>& 106 | seqContainerQueue_, 107 | moodycamel::ConcurrentQueue>>& readQueue_) { 108 | kseq_t* seq; 109 | T* s; 110 | uint32_t fn{0}; 111 | while (workQueue.try_dequeue(fn)) { 112 | auto file = inputStreams[fn]; 113 | std::unique_ptr> local; 114 | while (!seqContainerQueue_.try_dequeue(*cCont, local)) { 115 | std::cerr << "couldn't dequeue read chunk\n"; 116 | } 117 | size_t numObtained{local->size()}; 118 | // open the file and init the parser 119 | auto fp = gzopen(file.c_str(), "r"); 120 | 121 | // The number of reads we have in the local vector 122 | size_t numWaiting{0}; 123 | 124 | seq = kseq_init(fp); 125 | int ksv = kseq_read(seq); 126 | 127 | while (ksv >= 0) { 128 | s = &((*local)[numWaiting++]); 129 | 130 | copyRecord(seq, s); 131 | 132 | // If we've filled the local vector, then dump to the concurrent queue 133 | if (numWaiting == numObtained) { 134 | while (!readQueue_.try_enqueue(std::move(local))) { 135 | } 136 | numWaiting = 0; 137 | numObtained = 0; 138 | // And get more empty reads 139 | while (!seqContainerQueue_.try_dequeue(*cCont, local)) { 140 | } 141 | numObtained = local->size(); 142 | } 143 | ksv = kseq_read(seq); 144 | } 145 | 146 | // If we hit the end of the file and have any reads in our local buffer 147 | // then dump them here. 148 | if (numWaiting > 0) { 149 | local->have(numWaiting); 150 | while (!readQueue_.try_enqueue(*pRead, std::move(local))) { 151 | } 152 | numWaiting = 0; 153 | } 154 | // destroy the parser and close the file 155 | kseq_destroy(seq); 156 | gzclose(fp); 157 | } 158 | 159 | --numParsing; 160 | } 161 | 162 | template 163 | void parseReadPair( 164 | std::vector& inputStreams, 165 | std::vector& inputStreams2, std::atomic& numParsing, 166 | moodycamel::ConsumerToken* cCont, moodycamel::ProducerToken* pRead, 167 | moodycamel::ConcurrentQueue& workQueue, 168 | moodycamel::ConcurrentQueue>>& 169 | seqContainerQueue_, 170 | moodycamel::ConcurrentQueue>>& readQueue_) { 171 | 172 | kseq_t* seq; 173 | kseq_t* seq2; 174 | T* s; 175 | 176 | uint32_t fn{0}; 177 | while (workQueue.try_dequeue(fn)) { 178 | // for (size_t fn = 0; fn < inputStreams.size(); ++fn) { 179 | auto& file = inputStreams[fn]; 180 | auto& file2 = inputStreams2[fn]; 181 | 182 | std::unique_ptr> local; 183 | while (!seqContainerQueue_.try_dequeue(*cCont, local)) { 184 | std::cerr << "couldn't dequeue read chunk\n"; 185 | } 186 | size_t numObtained{local->size()}; 187 | // open the file and init the parser 188 | auto fp = gzopen(file.c_str(), "r"); 189 | auto fp2 = gzopen(file2.c_str(), "r"); 190 | 191 | // The number of reads we have in the local vector 192 | size_t numWaiting{0}; 193 | 194 | seq = kseq_init(fp); 195 | seq2 = kseq_init(fp2); 196 | 197 | int ksv = kseq_read(seq); 198 | int ksv2 = kseq_read(seq2); 199 | while (ksv >= 0 and ksv2 >= 0) { 200 | 201 | s = &((*local)[numWaiting++]); 202 | copyRecord(seq, &s->first); 203 | copyRecord(seq2, &s->second); 204 | 205 | // If we've filled the local vector, then dump to the concurrent queue 206 | if (numWaiting == numObtained) { 207 | while (!readQueue_.try_enqueue(std::move(local))) { 208 | } 209 | numWaiting = 0; 210 | numObtained = 0; 211 | // And get more empty reads 212 | while (!seqContainerQueue_.try_dequeue(*cCont, local)) { 213 | } 214 | numObtained = local->size(); 215 | } 216 | ksv = kseq_read(seq); 217 | ksv2 = kseq_read(seq2); 218 | } 219 | 220 | // If we hit the end of the file and have any reads in our local buffer 221 | // then dump them here. 222 | if (numWaiting > 0) { 223 | local->have(numWaiting); 224 | while (!readQueue_.try_enqueue(*pRead, std::move(local))) { 225 | } 226 | numWaiting = 0; 227 | } 228 | // destroy the parser and close the file 229 | kseq_destroy(seq); 230 | gzclose(fp); 231 | kseq_destroy(seq2); 232 | gzclose(fp2); 233 | } 234 | 235 | --numParsing; 236 | } 237 | 238 | template <> bool FastxParser::start() { 239 | if (numParsing_ == 0) { 240 | for (size_t i = 0; i < numParsers_; ++i) { 241 | ++numParsing_; 242 | parsingThreads_.emplace_back(new std::thread([this, i]() { 243 | parseReads(this->inputStreams_, this->numParsing_, 244 | this->consumeContainers_[i].get(), 245 | this->produceReads_[i].get(), this->workQueue_, 246 | this->seqContainerQueue_, this->readQueue_); 247 | })); 248 | } 249 | return true; 250 | } else { 251 | return false; 252 | } 253 | } 254 | 255 | template <> bool FastxParser::start() { 256 | if (numParsing_ == 0) { 257 | 258 | // Some basic checking to ensure the read files look "sane". 259 | if (inputStreams_.size() != inputStreams2_.size()) { 260 | throw std::invalid_argument("There should be the same number " 261 | "of files for the left and right reads"); 262 | } 263 | for (size_t i = 0; i < inputStreams_.size(); ++i) { 264 | auto& s1 = inputStreams_[i]; 265 | auto& s2 = inputStreams2_[i]; 266 | if (s1 == s2) { 267 | throw std::invalid_argument("You provided the same file " + s1 + 268 | " as both a left and right file"); 269 | } 270 | } 271 | for (size_t i = 0; i < numParsers_; ++i) { 272 | ++numParsing_; 273 | parsingThreads_.emplace_back(new std::thread([this, i]() { 274 | parseReadPair(this->inputStreams_, this->inputStreams2_, 275 | this->numParsing_, this->consumeContainers_[i].get(), 276 | this->produceReads_[i].get(), this->workQueue_, 277 | this->seqContainerQueue_, this->readQueue_); 278 | })); 279 | } 280 | return true; 281 | } else { 282 | return false; 283 | } 284 | } 285 | 286 | template bool FastxParser::refill(ReadGroup& seqs) { 287 | finishedWithGroup(seqs); 288 | while (numParsing_ > 0) { 289 | if (readQueue_.try_dequeue(seqs.consumerToken(), seqs.chunkPtr())) { 290 | return true; 291 | } 292 | } 293 | return readQueue_.try_dequeue(seqs.consumerToken(), seqs.chunkPtr()); 294 | } 295 | 296 | template void FastxParser::finishedWithGroup(ReadGroup& s) { 297 | // If this read group is holding a valid chunk, then give it back 298 | if (!s.empty()) { 299 | seqContainerQueue_.enqueue(s.producerToken(), std::move(s.takeChunkPtr())); 300 | s.setChunkEmpty(); 301 | } 302 | } 303 | 304 | template class FastxParser; 305 | template class FastxParser; 306 | } 307 | -------------------------------------------------------------------------------- /src/SalmonTools.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "args.hpp" 6 | 7 | void ExtractUnmapped(const std::string &progname, std::vector::const_iterator beginargs, std::vector::const_iterator endargs); 8 | 9 | using commandtype = std::function; 10 | 11 | int main(int argc, char **argv) { 12 | std::unordered_map map{ 13 | {"extract-unmapped", ExtractUnmapped} 14 | }; 15 | 16 | const std::vector args(argv + 1, argv + argc); 17 | args::ArgumentParser parser("A tool for helping to process Salmon output", 18 | "valid commands are: \n" 19 | "extract-unmapped"); 20 | args::HelpFlag help(parser, "help", "Display this help menu", {'h', "help"}); 21 | parser.Prog(argv[0]); 22 | parser.ProglinePostfix("{command options}"); 23 | args::Flag version(parser, "version", "Show the version of this program", {"version"}); 24 | args::MapPositional command(parser, "command", "Command to execute", map); 25 | command.KickOut(true); 26 | try { 27 | auto next = parser.ParseArgs(args); 28 | if (version) { 29 | std::cerr << "Salmon Tools 0.1.0\n"; 30 | return 0; 31 | } 32 | 33 | if (command) { 34 | args::get(command)(argv[0], next, std::end(args)); 35 | } else { 36 | std::cout << parser; 37 | } 38 | } 39 | catch (args::Help) { 40 | std::cout << parser; 41 | return 0; 42 | } 43 | catch (args::Error e) { 44 | std::cerr << e.what() << std::endl; 45 | std::cerr << parser; 46 | return 1; 47 | } 48 | return 0; 49 | } 50 | --------------------------------------------------------------------------------