├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── benchmark-result.pdf ├── benchmark ├── .gitignore ├── Makefile ├── README.md ├── bin │ └── .gitignore ├── compressors.config ├── cp │ ├── bcm.sh │ ├── bwz.sh │ ├── bzip2.sh │ ├── gzip.sh │ ├── tbcm.sh │ ├── tbwz.sh │ ├── twt.sh │ ├── wt.sh │ ├── xz-extreme.sh │ ├── xz.sh │ └── zpaq.sh ├── rcrdata │ ├── .gitignore │ ├── Makefile │ ├── rcrcompressors.config │ └── rcrtestcases.config ├── testcases.config ├── tmp │ └── .gitignore └── visualize.sh ├── external ├── bcm │ ├── LICENSE │ ├── Make.helper │ ├── README.md │ ├── bcm-ss.cpp │ └── bcm-ss.hpp ├── divsufsort │ ├── LICENSE │ ├── Make.helper │ ├── README.md │ ├── config.h │ ├── divsufsort.c │ ├── divsufsort.h │ ├── divsufsort_private.h │ ├── sssort.c │ ├── trsort.c │ └── utils.c ├── sdsl │ ├── COPYING │ ├── Make.helper │ ├── include │ │ └── sdsl │ │ │ ├── CMakeLists.txt │ │ │ ├── bit_vector_il.hpp │ │ │ ├── bit_vectors.hpp │ │ │ ├── bits.hpp │ │ │ ├── bp_support.hpp │ │ │ ├── bp_support_algorithm.hpp │ │ │ ├── bp_support_g.hpp │ │ │ ├── bp_support_gg.hpp │ │ │ ├── bp_support_sada.hpp │ │ │ ├── coder.hpp │ │ │ ├── coder_comma.hpp │ │ │ ├── coder_elias_delta.hpp │ │ │ ├── coder_elias_gamma.hpp │ │ │ ├── coder_fibonacci.hpp │ │ │ ├── config.hpp │ │ │ ├── construct.hpp │ │ │ ├── construct_bwt.hpp │ │ │ ├── construct_config.hpp │ │ │ ├── construct_isa.hpp │ │ │ ├── construct_lcp.hpp │ │ │ ├── construct_lcp_helper.hpp │ │ │ ├── construct_sa.hpp │ │ │ ├── construct_sa_se.hpp │ │ │ ├── csa_alphabet_strategy.hpp │ │ │ ├── csa_bitcompressed.hpp │ │ │ ├── csa_sada.hpp │ │ │ ├── csa_sampling_strategy.hpp │ │ │ ├── csa_wt.hpp │ │ │ ├── cst_fully.hpp │ │ │ ├── cst_iterators.hpp │ │ │ ├── cst_sada.hpp │ │ │ ├── cst_sct3.hpp │ │ │ ├── dac_vector.hpp │ │ │ ├── enc_vector.hpp │ │ │ ├── fast_cache.hpp │ │ │ ├── hyb_vector.hpp │ │ │ ├── int_vector.hpp │ │ │ ├── int_vector_buffer.hpp │ │ │ ├── int_vector_io_wrappers.hpp │ │ │ ├── int_vector_mapper.hpp │ │ │ ├── inv_perm_support.hpp │ │ │ ├── io.hpp │ │ │ ├── iterators.hpp │ │ │ ├── k2_treap.hpp │ │ │ ├── k2_treap_algorithm.hpp │ │ │ ├── k2_treap_helper.hpp │ │ │ ├── k2_tree.hpp │ │ │ ├── k2_tree_helper.hpp │ │ │ ├── lcp.hpp │ │ │ ├── lcp_bitcompressed.hpp │ │ │ ├── lcp_byte.hpp │ │ │ ├── lcp_dac.hpp │ │ │ ├── lcp_support_sada.hpp │ │ │ ├── lcp_support_tree.hpp │ │ │ ├── lcp_support_tree2.hpp │ │ │ ├── lcp_vlc.hpp │ │ │ ├── lcp_wt.hpp │ │ │ ├── louds_tree.hpp │ │ │ ├── memory_management.hpp │ │ │ ├── nearest_neighbour_dictionary.hpp │ │ │ ├── nn_dict_dynamic.hpp │ │ │ ├── qsufsort.hpp │ │ │ ├── ram_filebuf.hpp │ │ │ ├── ram_fs.hpp │ │ │ ├── rank_support.hpp │ │ │ ├── rank_support_scan.hpp │ │ │ ├── rank_support_v.hpp │ │ │ ├── rank_support_v5.hpp │ │ │ ├── raster_img.hpp │ │ │ ├── rmq_succinct_sada.hpp │ │ │ ├── rmq_succinct_sct.hpp │ │ │ ├── rmq_support.hpp │ │ │ ├── rmq_support_sparse_table.hpp │ │ │ ├── rrr_helper.hpp │ │ │ ├── rrr_vector.hpp │ │ │ ├── rrr_vector_15.hpp │ │ │ ├── sd_vector.hpp │ │ │ ├── sdsl_concepts.hpp │ │ │ ├── select_support.hpp │ │ │ ├── select_support_mcl.hpp │ │ │ ├── select_support_scan.hpp │ │ │ ├── sfstream.hpp │ │ │ ├── sorted_int_stack.hpp │ │ │ ├── sorted_multi_stack_support.hpp │ │ │ ├── sorted_stack_support.hpp │ │ │ ├── structure_tree.hpp │ │ │ ├── suffix_array_algorithm.hpp │ │ │ ├── suffix_array_helper.hpp │ │ │ ├── suffix_arrays.hpp │ │ │ ├── suffix_tree_algorithm.hpp │ │ │ ├── suffix_tree_helper.hpp │ │ │ ├── suffix_trees.hpp │ │ │ ├── uint128_t.hpp │ │ │ ├── uint256_t.hpp │ │ │ ├── uintx_t.hpp │ │ │ ├── util.hpp │ │ │ ├── vectors.hpp │ │ │ ├── vlc_vector.hpp │ │ │ ├── wavelet_trees.hpp │ │ │ ├── wm_int.hpp │ │ │ ├── wt_algorithm.hpp │ │ │ ├── wt_ap.hpp │ │ │ ├── wt_blcd.hpp │ │ │ ├── wt_gmr.hpp │ │ │ ├── wt_helper.hpp │ │ │ ├── wt_huff.hpp │ │ │ ├── wt_hutu.hpp │ │ │ ├── wt_int.hpp │ │ │ ├── wt_pc.hpp │ │ │ └── wt_rlmn.hpp │ └── lib │ │ ├── bits.cpp │ │ ├── bp_support_algorithm.cpp │ │ ├── coder_elias_delta.cpp │ │ ├── coder_elias_gamma.cpp │ │ ├── coder_fibonacci.cpp │ │ ├── config.cpp │ │ ├── construct_config.cpp │ │ ├── construct_isa.cpp │ │ ├── construct_lcp.cpp │ │ ├── construct_lcp_helper.cpp │ │ ├── construct_sa.cpp │ │ ├── construct_sa_se.cpp │ │ ├── csa_alphabet_strategy.cpp │ │ ├── io.cpp │ │ ├── lcp_support_tree.cpp │ │ ├── louds_tree.cpp │ │ ├── memory_management.cpp │ │ ├── nn_dict_dynamic.cpp │ │ ├── ram_filebuf.cpp │ │ ├── ram_fs.cpp │ │ ├── rrr_vector_15.cpp │ │ ├── sd_vector.cpp │ │ ├── sfstream.cpp │ │ ├── structure_tree.cpp │ │ ├── uint128_t.cpp │ │ ├── uint256_t.cpp │ │ ├── util.cpp │ │ └── wt_helper.cpp └── sg-entropy │ ├── Make.helper │ ├── entropy │ ├── arith32.cpp │ ├── arith32.h │ ├── arith64.cpp │ ├── arith64.h │ ├── range32.cpp │ ├── range32.h │ ├── range64.cpp │ └── range64.h │ ├── io │ ├── bit_stream.cpp │ ├── bit_stream.h │ ├── stream.h │ ├── stream_array.cpp │ └── stream_array.h │ ├── license.txt │ ├── order0test_arith32.cpp │ ├── order0test_arith64.cpp │ ├── order0test_range32.cpp │ ├── order0test_range64.cpp │ ├── readme.txt │ └── stdx │ ├── bit.cpp │ ├── bit.h │ ├── define.h │ ├── exception.cpp │ └── exception.h ├── include ├── aux-encoding.hpp ├── bcm-compressor.hpp ├── block-compressor.hpp ├── block-nav-support.hpp ├── block-scores-rle-model.hpp ├── bw94-compressor.hpp ├── bwt-compressor.hpp ├── bwt-config.hpp ├── bwt-run-support.hpp ├── entropy-coder.hpp ├── lheap.hpp ├── mtf-coder.hpp ├── rle0-coder.hpp ├── tbwt-compressor.hpp ├── tunneling-support.hpp ├── twobitvector.hpp └── wt-compressor.hpp └── lib ├── block-nav-support.cpp ├── bwt-run-support.cpp └── ui.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.x 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This code is part of the paper 2 | "On undetected redundancy in the Burrows-Wheeler Transform" 3 | by Uwe Baier (hopefully to be published in CPM 2018) 4 | 5 | Copyright (c) 2018 Uwe Baier 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the 10 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 11 | sell copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | include external/sg-entropy/Make.helper 2 | include external/divsufsort/Make.helper 3 | include external/bcm/Make.helper 4 | include external/sdsl/Make.helper 5 | 6 | OWN_INCS = \ 7 | aux-encoding.hpp \ 8 | block-compressor.hpp \ 9 | block-nav-support.hpp \ 10 | block-scores-rle-model.hpp \ 11 | bwt-compressor.hpp \ 12 | bwt-config.hpp \ 13 | bwt-run-support.hpp \ 14 | entropy-coder.hpp \ 15 | lheap.hpp \ 16 | mtf-coder.hpp \ 17 | rle0-coder.hpp \ 18 | tbwt-compressor.hpp \ 19 | tunneling-support.hpp \ 20 | twobitvector.hpp 21 | OWN_LIBS = \ 22 | block-nav-support.cpp \ 23 | bwt-run-support.cpp \ 24 | ui.cpp 25 | 26 | INC_DIRS = external/sg-entropy external/divsufsort external/bcm external/sdsl/include include 27 | LIB_DIRS = external/sg-entropy external/divsufsort external/bcm external/sdsl/lib lib 28 | 29 | CC_OPTS = -O3 -DNDEBUG 30 | CC_INCS = $(addprefix external/sg-entropy/,$(SG_ENTROPY_INCS)) \ 31 | $(addprefix external/divsufsort/,$(DIVSUFSORT_INCS)) \ 32 | $(addprefix external/bcm/,$(BCM_INCS)) \ 33 | $(addprefix external/sdsl/,$(SDSL_INCS)) \ 34 | $(addprefix include/,$(OWN_INCS)) 35 | CC_LIBS = $(addprefix lib/,$(OWN_LIBS)) \ 36 | $(addprefix external/divsufsort/,$(DIVSUFSORT_LIBS)) 37 | BW_CC_LIBS = $(addprefix external/sg-entropy/,$(SG_ENTROPY_LIBS)) $(CC_LIBS) 38 | BCM_CC_LIBS = $(addprefix external/bcm/,$(BCM_LIBS)) $(CC_LIBS) 39 | WT_CC_LIBS = $(addprefix external/sdsl/,$(SDSL_LIBS)) $(CC_LIBS) 40 | 41 | all: bwzip.x tbwzip.x bcmzip.x tbcmzip.x wtzip.x twtzip.x 42 | 43 | bwzip.x: lib/ui.cpp include/bw94-compressor.hpp $(CC_INCS) $(BW_CC_LIBS) 44 | g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \ 45 | -DBW94 $(BW_CC_LIBS) -o bwzip.x 46 | 47 | tbwzip.x: lib/ui.cpp include/bw94-compressor.hpp $(CC_INCS) $(BW_CC_LIBS) 48 | g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \ 49 | -DTBWT $(BW_CC_LIBS) -o tbwzip.x 50 | 51 | bcmzip.x: lib/ui.cpp include/bcm-compressor.hpp $(CC_INCS) $(BCM_CC_LIBS) 52 | g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \ 53 | -DBCM $(BCM_CC_LIBS) -o bcmzip.x 54 | 55 | tbcmzip.x: lib/ui.cpp include/bcm-compressor.hpp $(CC_INCS) $(BCM_CC_LIBS) 56 | g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \ 57 | -DTBCM $(BCM_CC_LIBS) -o tbcmzip.x 58 | 59 | wtzip.x: lib/ui.cpp include/wt-compressor.hpp $(CC_INCS) $(WT_CC_LIBS) 60 | g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \ 61 | -DWT $(WT_CC_LIBS) -o wtzip.x 62 | 63 | twtzip.x: lib/ui.cpp include/wt-compressor.hpp $(CC_INCS) $(WT_CC_LIBS) 64 | g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \ 65 | -DTWT $(WT_CC_LIBS) -o twtzip.x 66 | 67 | clean: 68 | rm -f *.x 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TBWT 2 | This repository contains an implementation and a benchmark for the such-called 3 | Tunneled BWT, which is, a compression improvement for compressors using the 4 | Burrows-Wheeler Transform like [bzip2]. The tunneled BWT is described in 5 | 6 | On Undetected Redundancy in the Burrows-Wheeler Transform 7 | 8 | by Uwe Baier (hopefully to appear in CPM 2018) 9 | 10 | ## What is contained 11 | This bundle of files consist of the following parts: 12 | 1. The algorithms required to construct, compress and decompress a Tunneled BWT, 13 | contained in the `include`- and `lib`-directory 14 | 2. External resources in the `external`-directory, namely 15 | - a library for suffix array construction [divsufsort](https://github.com/y-256/libdivsufsort) 16 | - a library containing different entropy coders [Entropy Coders by Sachin Garg](http://www.sachingarg.com/compression/entropy_coding/64bit) 17 | - a library containing a bundle of succinct data structures [sdsl-lite](https://github.com/simongog/sdsl-lite) 18 | - the backend of a high-performance file compressor using the BWT [bcm](https://github.com/encode84/bcm) 19 | 3. A benchmark to test the given compressor against common other lossless 20 | data compressors, see `benchmark` - directory. 21 | 22 | ## Requirements 23 | To compile the compressor(s), you need a modern c++11 ready compiler such as 24 | [gcc](https://gcc.gnu.org/) version 4.7 or newer. 25 | 26 | ## Installation 27 | Just call the command `make`. It should produce six executables: 28 | - `bwzip.x`: a compressor similar to [bzip2], but without memory limitation 29 | - `tbwzip.x`: like `bwzip.x`, enhanced with tunneling 30 | - `bcmzip.x`: a compressor similar to [bcm] 31 | - `tbcmzip.x`: like `bcmzip.x`, enhanced with tunneling 32 | - `wtzip.x`: compression of a BWT using a wavelet tree and compressed bitvectors, 33 | currently not usable for text indexing 34 | - `twtzip.x`: like `wtzip.x`, enhanced with tunneling 35 | 36 | ## Usage 37 | Both compiled compressors use the same user interface, just call one of them 38 | without a parameter to get a detailed description. 39 | -------------------------------------------------------------------------------- /benchmark-result.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waYne1337/tbwt/e6c24549f38e0961b39b42ffb3cf56bd2c747e48/benchmark-result.pdf -------------------------------------------------------------------------------- /benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | bin/*.x 2 | tmp/fcomp 3 | tmp/fres 4 | estquality.dat 5 | result.dat 6 | result.tex 7 | result.pdf 8 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Tunneled BWT Compression Benchmark 2 | Experiments for compression of different compressors. 3 | 4 | ## What is contained 5 | This bundle of files consist of the following parts: 6 | 1. The `cp`-directory contains foreach compressor a standardized interface 7 | for installation, compression and decompression with an arbitrary compressor. 8 | 2. A benchmark measuring compression and resource usage of each compressor 9 | 3. A benchmark measuring the estimator quality of estimators in the tunneled bwt 10 | 4. A visualization for the benchmark data 11 | 5. A set of test files (which need to be downloaded first, see below), contained 12 | in the `rcrdata`-directory 13 | 14 | ## Requirements 15 | To run the benchmark, you need the following: 16 | - a modern c++11 ready compiler such as [gcc](https://gcc.gnu.org/) version 4.7 or newer 17 | - [awk] 18 | - [bc] 19 | - [sed] 20 | - [tr] 21 | 22 | To visualize your results, the following programs are needed: 23 | - [pdflatex], especially supporting pgf and pgfplotstable 24 | 25 | To download and set up the test files, the following programs are needed: 26 | - [curl](https://curl.haxx.se/) 27 | - [gzip] 28 | - [bzip2] 29 | 30 | ## Installation 31 | - To install the required compressors, call `sudo make install`. Superuser-rights 32 | are required to download the other compressors using [apt-get], which can be 33 | avoided by downloading the compressors yourself, see the scripts in the 34 | `cp`-directory. 35 | - To download the test data, switch into the `rcrdata` - directory, and call `make`. 36 | This will download and extract all of the test data using [curl]. 37 | 38 | ## Usage 39 | 40 | ### Benchmark 41 | To run the benchmark, configure the files `testcases.config` and `compressors.config` 42 | as you require it, an example is already listed. Afterwards, call 43 | 44 | make 45 | 46 | After the benchmark has finished, 4 files are generated: 47 | - `result.dat`: a file containing the benchmark results of all test files on all compressors. 48 | Every speed measurement is measured in MB/s, every size is measured in bits per symbol 49 | (both with the size of the original file as borderline) 50 | - `estquality.dat`: a file containing the measured relative errors of estimators 51 | used for BWT Tunneling. 52 | - `result.tex`: a file ready to be compiled with [latex], displaying the results 53 | in a better readable format 54 | - `result.pdf`: a presentation of all measurements 55 | 56 | All of the 4 above mentioned files can be generated seperately by calling `make FILE`. 57 | A rule of thumb for the memory usage is that the compressors will need 12 times input 58 | size or less. 59 | 60 | ### Replicating Computational Results 61 | The most straightforward way to use this benchmark is by just calling 62 | 63 | make rcr 64 | 65 | This command will automatically download the test data, sets up the benchmark 66 | properly (Warning: the .config - files will be overwritten), execute the 67 | benchmark and generate all resulting files. Your machine should contain 16 GB 68 | of memory to ensure no swapping takes place. 69 | -------------------------------------------------------------------------------- /benchmark/bin/.gitignore: -------------------------------------------------------------------------------- 1 | *.x 2 | -------------------------------------------------------------------------------- /benchmark/compressors.config: -------------------------------------------------------------------------------- 1 | #define compressors (each compressor must be available per seperate .sh - file in cp-directory) 2 | 3 | #list only a couple of compressors: 4 | #COMPRESSORS=bwz tbwz bcm tbcm wt twt 5 | 6 | #list all available compressors: 7 | COMPRESSORS=$(basename $(shell ls cp)) 8 | -------------------------------------------------------------------------------- /benchmark/cp/bcm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bin/bcmzip.x -c $2 $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bin/bcmzip.x -d $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | cd ..;make bcmzip.x 9 | cd benchmark;cp ../bcmzip.x bin/bcmzip.x 10 | else 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /benchmark/cp/bwz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bin/bwzip.x -c $2 $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bin/bwzip.x -d $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | cd ..;make bwzip.x 9 | cd benchmark;cp ../bwzip.x bin/bwzip.x 10 | else 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /benchmark/cp/bzip2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bzip2 -9 -c -f -k $2 > $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bunzip2 -c -f -k $2 > $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | apt-get install bzip2 9 | else 10 | exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /benchmark/cp/gzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | gzip -c -f -k $2 > $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | gunzip -c -f -k $2 > $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | apt-get install gzip 9 | else 10 | exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /benchmark/cp/tbcm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bin/tbcmzip.x -c $2 $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bin/tbcmzip.x -d $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | cd ..;make tbcmzip.x 9 | cd benchmark;cp ../tbcmzip.x bin/tbcmzip.x 10 | else 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /benchmark/cp/tbwz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bin/tbwzip.x -c $2 $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bin/tbwzip.x -d $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | cd ..;make tbwzip.x 9 | cd benchmark;cp ../tbwzip.x bin/tbwzip.x 10 | else 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /benchmark/cp/twt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bin/twtzip.x -c $2 $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bin/twtzip.x -d $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | cd ..;make twtzip.x 9 | cd benchmark;cp ../twtzip.x bin/twtzip.x 10 | else 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /benchmark/cp/wt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | bin/wtzip.x -c $2 $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | bin/wtzip.x -d $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | cd ..;make wtzip.x 9 | cd benchmark;cp ../wtzip.x bin/wtzip.x 10 | else 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /benchmark/cp/xz-extreme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | xz -z -c -k -9e -M 100% $2 > $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | xz -d -c -k -9e -M 100% $2 > $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | apt-get install p7zip 9 | else 10 | exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /benchmark/cp/xz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | xz -z -c -k $2 > $3 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | xz -d -c -k $2 > $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | apt-get install p7zip 9 | else 10 | exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /benchmark/cp/zpaq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #check args 3 | if [ "$1" = "c" ]; then #compress infile 4 | zpaq qisc $3 $2 5 | elif [ "$1" = "d" ]; then #decompress infile 6 | zpaq qx $2 $3 7 | elif [ "$1" = "i" ]; then #install compressor 8 | apt-get install zpaq 9 | else 10 | exit 1 11 | fi 12 | -------------------------------------------------------------------------------- /benchmark/rcrdata/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !Makefile 4 | !rcrcompressors.config 5 | !rcrtestcases.config 6 | -------------------------------------------------------------------------------- /benchmark/rcrdata/Makefile: -------------------------------------------------------------------------------- 1 | include rcrtestcases.config 2 | 3 | all: $(RCRFILES) 4 | 5 | #generate texts from the silesia corpus 6 | $(SILESIA): 7 | curl http://sun.aei.polsl.pl/~sdeor/corpus/$(@F).bz2 -o $@.bz2 8 | bunzip2 -f $@.bz2 9 | 10 | #download texts from the pizza & chili corpus 11 | $(PIZZACHILI): 12 | $(eval fdir := \ 13 | $(if $(findstring $(basename $(@F)),sources),code,\ 14 | $(if $(findstring $(basename $(@F)),pitches),music,\ 15 | $(if $(findstring $(basename $(@F)),proteins),protein,\ 16 | $(if $(findstring $(basename $(@F)),dna),dna,\ 17 | $(if $(findstring $(basename $(@F)),english),nlang,\ 18 | $(if $(findstring $(basename $(@F)),dblp.xml),xml,\ 19 | $(error unknown pizza chili category of $(@F) )))))))) 20 | curl http://pizzachili.dcc.uchile.cl/texts/$(fdir)/$(@F).gz -o $@.gz 21 | gunzip -f $@.gz 22 | 23 | #generate repetitive texts 24 | $(REPETITIVE): 25 | curl http://pizzachili.dcc.uchile.cl/repcorpus/real/$(@F).gz -o $@.gz 26 | gunzip -f $@.gz 27 | 28 | clean: 29 | rm -f $(RCRFILES) 30 | -------------------------------------------------------------------------------- /benchmark/rcrdata/rcrcompressors.config: -------------------------------------------------------------------------------- 1 | COMPRESSORS=bwz tbwz xz-extreme zpaq 2 | -------------------------------------------------------------------------------- /benchmark/rcrdata/rcrtestcases.config: -------------------------------------------------------------------------------- 1 | #benchmark setup 2 | 3 | #test files categorized 4 | SILESIA = \ 5 | dickens \ 6 | mozilla \ 7 | mr \ 8 | nci \ 9 | ooffice \ 10 | osdb \ 11 | reymont \ 12 | samba \ 13 | sao \ 14 | webster \ 15 | xml \ 16 | x-ray 17 | PIZZACHILI = \ 18 | sources \ 19 | pitches \ 20 | proteins \ 21 | dna \ 22 | english.1024MB \ 23 | dblp.xml 24 | REPETITIVE = \ 25 | Escherichia_Coli \ 26 | cere \ 27 | coreutils \ 28 | einstein.de.txt \ 29 | einstein.en.txt \ 30 | influenza \ 31 | kernel \ 32 | para \ 33 | world_leaders 34 | RCRFILES=$(SILESIA) $(PIZZACHILI) $(REPETITIVE) 35 | 36 | #test files to be used for the benchmark 37 | TCFILES = $(addprefix rcrdata/,$(RCRFILES)) 38 | -------------------------------------------------------------------------------- /benchmark/testcases.config: -------------------------------------------------------------------------------- 1 | #benchmark setup 2 | 3 | #test files to be used for the benchmark 4 | TCFILES = README.md visualize.sh 5 | -------------------------------------------------------------------------------- /benchmark/tmp/.gitignore: -------------------------------------------------------------------------------- 1 | *.x 2 | -------------------------------------------------------------------------------- /external/bcm/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (C) 2008-2016 Ilya Muravyov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /external/bcm/Make.helper: -------------------------------------------------------------------------------- 1 | BCM_INCS = \ 2 | bcm-ss.hpp 3 | BCM_LIBS = \ 4 | bcm-ss.cpp 5 | -------------------------------------------------------------------------------- /external/bcm/README.md: -------------------------------------------------------------------------------- 1 | # BCM v1.25 2 | 3 | ### Description 4 | BCM is a high-performance file compressor that utilizes advanced context modeling techniques to achieve a very high compression ratio. All in all, it's like a big brother of the BZIP2. 5 | 6 | ### Author 7 | Ilya Muravyov 8 | 9 | ### Thanks 10 | Special thanks to Yuta Mori, Matt Mahoney, Eugene Shelwien, Alexander Rhatushnyak, Przemyslaw Skibinski, Malcolm Taylor and LovePimple. 11 | -------------------------------------------------------------------------------- /external/bcm/bcm-ss.cpp: -------------------------------------------------------------------------------- 1 | // This is the implementation of the second stage BWT transformation of BCM, 2 | // invented by Ilya Muravyov 3 | #include "bcm-ss.hpp" 4 | 5 | using namespace bcm; 6 | 7 | //// ENCODER IMPLEMENTATION //// 8 | 9 | Encoder::Encoder() 10 | { 11 | low=0; 12 | high=uint(-1); 13 | code=0; 14 | } 15 | 16 | void Encoder::EncodeBit0(uint p, std::ostream &out) 17 | { 18 | #ifdef _WIN64 19 | low+=((ulonglong(high-low)*p)>>18)+1; 20 | #else 21 | low+=((ulonglong(high-low)*(p<<(32-18)))>>32)+1; 22 | #endif 23 | while ((low^high)<(1<<24)) 24 | { 25 | out.put(low>>24); 26 | low<<=8; 27 | high=(high<<8)+255; 28 | } 29 | } 30 | 31 | void Encoder::EncodeBit1(uint p, std::ostream &out) 32 | { 33 | #ifdef _WIN64 34 | high=low+((ulonglong(high-low)*p)>>18); 35 | #else 36 | high=low+((ulonglong(high-low)*(p<<(32-18)))>>32); 37 | #endif 38 | while ((low^high)<(1<<24)) 39 | { 40 | out.put(low>>24); 41 | low<<=8; 42 | high=(high<<8)+255; 43 | } 44 | } 45 | 46 | void Encoder::Flush(std::ostream &out) 47 | { 48 | for (int i=0; i<4; ++i) 49 | { 50 | out.put(low>>24); 51 | low<<=8; 52 | } 53 | } 54 | 55 | void Encoder::Init(std::istream &in) 56 | { 57 | for (int i=0; i<4; ++i) 58 | code=(code<<8)+in.get(); 59 | } 60 | 61 | int Encoder::DecodeBit(uint p, std::istream &in) 62 | { 63 | #ifdef _WIN64 64 | const uint mid=low+((ulonglong(high-low)*p)>>18); 65 | #else 66 | const uint mid=low+((ulonglong(high-low)*(p<<(32-18)))>>32); 67 | #endif 68 | const int bit=(code<=mid); 69 | if (bit) 70 | high=mid; 71 | else 72 | low=mid+1; 73 | 74 | while ((low^high)<(1<<24)) 75 | { 76 | low<<=8; 77 | high=(high<<8)+255; 78 | code=(code<<8)+in.get(); 79 | } 80 | 81 | return bit; 82 | } 83 | 84 | //// BWT ENCODER IMPLEMENTATION //// 85 | 86 | CM::CM() 87 | { 88 | c1=0; 89 | c2=0; 90 | run=0; 91 | 92 | for (int i=0; i<2; ++i) 93 | { 94 | for (int j=0; j<256; ++j) 95 | { 96 | for (int k=0; k<17; ++k) 97 | counter2[i][j][k].p=(k<<12)-(k==16); 98 | } 99 | } 100 | } 101 | 102 | void CM::Encode32(uint n, std::ostream &out) 103 | { 104 | for (int i=0; i<32; ++i) 105 | { 106 | if (n&(1<<31)) 107 | Encoder::EncodeBit1(1<<17, out); 108 | else 109 | Encoder::EncodeBit0(1<<17, out); 110 | n+=n; 111 | } 112 | } 113 | 114 | uint CM::Decode32(std::istream &in) 115 | { 116 | uint n=0; 117 | for (int i=0; i<32; ++i) 118 | n+=n+Encoder::DecodeBit(1<<17, in); 119 | 120 | return n; 121 | } 122 | 123 | void CM::Encode(int c, std::ostream &out) 124 | { 125 | if (c1==c2) 126 | ++run; 127 | else 128 | run=0; 129 | const int f=(run>2); 130 | 131 | int ctx=1; 132 | while (ctx<256) 133 | { 134 | const int p0=counter0[ctx].p; 135 | const int p1=counter1[c1][ctx].p; 136 | const int p2=counter1[c2][ctx].p; 137 | const int p=((p0+p1)*7+p2+p2)>>4; 138 | 139 | const int j=p>>12; 140 | const int x1=counter2[f][ctx][j].p; 141 | const int x2=counter2[f][ctx][j+1].p; 142 | const int ssep=x1+(((x2-x1)*(p&4095))>>12); 143 | 144 | const int bit=c&128; 145 | c+=c; 146 | 147 | if (bit) 148 | { 149 | Encoder::EncodeBit1(ssep*3+p, out); 150 | counter0[ctx].UpdateBit1(); 151 | counter1[c1][ctx].UpdateBit1(); 152 | counter2[f][ctx][j].UpdateBit1(); 153 | counter2[f][ctx][j+1].UpdateBit1(); 154 | ctx+=ctx+1; 155 | } 156 | else 157 | { 158 | Encoder::EncodeBit0(ssep*3+p, out); 159 | counter0[ctx].UpdateBit0(); 160 | counter1[c1][ctx].UpdateBit0(); 161 | counter2[f][ctx][j].UpdateBit0(); 162 | counter2[f][ctx][j+1].UpdateBit0(); 163 | ctx+=ctx; 164 | } 165 | } 166 | 167 | c2=c1; 168 | c1=ctx&255; 169 | } 170 | 171 | int CM::Decode(std::istream &in) 172 | { 173 | if (c1==c2) 174 | ++run; 175 | else 176 | run=0; 177 | const int f=(run>2); 178 | 179 | int ctx=1; 180 | while (ctx<256) 181 | { 182 | const int p0=counter0[ctx].p; 183 | const int p1=counter1[c1][ctx].p; 184 | const int p2=counter1[c2][ctx].p; 185 | const int p=((p0+p1)*7+p2+p2)>>4; 186 | 187 | const int j=p>>12; 188 | const int x1=counter2[f][ctx][j].p; 189 | const int x2=counter2[f][ctx][j+1].p; 190 | const int ssep=x1+(((x2-x1)*(p&4095))>>12); 191 | 192 | const int bit=Encoder::DecodeBit(ssep*3+p, in); 193 | 194 | if (bit) 195 | { 196 | counter0[ctx].UpdateBit1(); 197 | counter1[c1][ctx].UpdateBit1(); 198 | counter2[f][ctx][j].UpdateBit1(); 199 | counter2[f][ctx][j+1].UpdateBit1(); 200 | ctx+=ctx+1; 201 | } 202 | else 203 | { 204 | counter0[ctx].UpdateBit0(); 205 | counter1[c1][ctx].UpdateBit0(); 206 | counter2[f][ctx][j].UpdateBit0(); 207 | counter2[f][ctx][j+1].UpdateBit0(); 208 | ctx+=ctx; 209 | } 210 | } 211 | 212 | c2=c1; 213 | return c1=ctx&255; 214 | } 215 | -------------------------------------------------------------------------------- /external/bcm/bcm-ss.hpp: -------------------------------------------------------------------------------- 1 | // This is a header file to cover the second stage BWT transformation of BCM, 2 | // invented by Ilya Muravyov 3 | 4 | #ifndef BCM_SS_HPP 5 | #define BCM_SS_HPP 6 | 7 | #include 8 | #include 9 | 10 | namespace bcm { 11 | 12 | typedef unsigned char byte; 13 | typedef unsigned short word; 14 | typedef unsigned int uint; 15 | typedef unsigned long long ulonglong; 16 | 17 | //basic encoder 18 | 19 | struct Encoder 20 | { 21 | uint low; 22 | uint high; 23 | uint code; 24 | 25 | Encoder(); 26 | void EncodeBit0(uint p, std::ostream &out); 27 | void EncodeBit1(uint p, std::ostream &out); 28 | void Flush(std::ostream &out); 29 | void Init(std::istream &in); 30 | int DecodeBit(uint p, std::istream &in); 31 | }; 32 | 33 | //counter 34 | template 35 | struct Counter 36 | { 37 | word p; 38 | Counter() 39 | { 40 | p=1<<15; 41 | } 42 | void UpdateBit0() 43 | { 44 | p-=p>>RATE; 45 | } 46 | void UpdateBit1() 47 | { 48 | p+=(p^65535)>>RATE; 49 | } 50 | }; 51 | 52 | //BWT encoder 53 | struct CM: Encoder 54 | { 55 | Counter<2> counter0[256]; 56 | Counter<4> counter1[256][256]; 57 | Counter<6> counter2[2][256][17]; 58 | int c1; 59 | int c2; 60 | int run; 61 | 62 | CM(); 63 | 64 | void Encode32(uint n, std::ostream &out); 65 | uint Decode32(std::istream &in); 66 | void Encode(int c, std::ostream &out); 67 | int Decode(std::istream &in); 68 | }; 69 | 70 | //// EXAMPLES OF USE ////////////////////////////////////////////////////////// 71 | /* 72 | //ENCODING OF A BWT 73 | CM cm; 74 | cm.Encode32(n, out); 75 | for (int i=0; i 78 | #include 79 | #include 80 | 81 | #include 82 | 83 | int main() { 84 | // intput data 85 | char *Text = "abracadabra"; 86 | int n = strlen(Text); 87 | int i, j; 88 | 89 | // allocate 90 | int *SA = (int *)malloc(n * sizeof(int)); 91 | 92 | // sort 93 | divsufsort((unsigned char *)Text, SA, n); 94 | 95 | // output 96 | for(i = 0; i < n; ++i) { 97 | printf("SA[%2d] = %2d: ", i, SA[i]); 98 | for(j = SA[i]; j < n; ++j) { 99 | printf("%c", Text[j]); 100 | } 101 | printf("$\n"); 102 | } 103 | 104 | // deallocate 105 | free(SA); 106 | 107 | return 0; 108 | } 109 | ``` 110 | See the [examples](examples) directory for a few other examples. 111 | 112 | ## Benchmarks 113 | See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details. 114 | 115 | ## License 116 | libdivsufsort is released under the [MIT license](LICENSE "MIT license"). 117 | > The MIT License (MIT) 118 | > 119 | > Copyright (c) 2003 Yuta Mori All rights reserved. 120 | > 121 | > Permission is hereby granted, free of charge, to any person obtaining a copy 122 | > of this software and associated documentation files (the "Software"), to deal 123 | > in the Software without restriction, including without limitation the rights 124 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 125 | > copies of the Software, and to permit persons to whom the Software is 126 | > furnished to do so, subject to the following conditions: 127 | > 128 | > The above copyright notice and this permission notice shall be included in all 129 | > copies or substantial portions of the Software. 130 | > 131 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 132 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 133 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 134 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 135 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 136 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 137 | > SOFTWARE. 138 | 139 | ## Author 140 | * Yuta Mori 141 | -------------------------------------------------------------------------------- /external/divsufsort/config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * config.h for libdivsufsort 3 | * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person 6 | * obtaining a copy of this software and associated documentation 7 | * files (the "Software"), to deal in the Software without 8 | * restriction, including without limitation the rights to use, 9 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following 12 | * conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be 15 | * included in all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | * OTHER DEALINGS IN THE SOFTWARE. 25 | */ 26 | 27 | #ifndef _DIVSUFSORT_CONFIG_H 28 | #define _DIVSUFSORT_CONFIG_H 1 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif /* __cplusplus */ 33 | 34 | /** Define to the version of this package. **/ 35 | #define PROJECT_VERSION_FULL "2.0.2" 36 | 37 | /** Define to 1 if you have the header files. **/ 38 | #define HAVE_INTTYPES_H 1 39 | #define HAVE_STDDEF_H 1 40 | #define HAVE_STDINT_H 1 41 | #define HAVE_STDLIB_H 1 42 | #define HAVE_STRING_H 1 43 | #define HAVE_STRINGS_H 1 44 | #define HAVE_MEMORY_H 1 45 | #define HAVE_SYS_TYPES_H 1 46 | 47 | /** for WinIO **/ 48 | /* #undef HAVE_IO_H */ 49 | /* #undef HAVE_FCNTL_H */ 50 | /* #undef HAVE__SETMODE */ 51 | /* #undef HAVE_SETMODE */ 52 | /* #undef HAVE__FILENO */ 53 | /* #undef HAVE_FOPEN_S */ 54 | /* #undef HAVE__O_BINARY */ 55 | #ifndef HAVE__SETMODE 56 | # if HAVE_SETMODE 57 | # define _setmode setmode 58 | # define HAVE__SETMODE 1 59 | # endif 60 | # if HAVE__SETMODE && !HAVE__O_BINARY 61 | # define _O_BINARY 0 62 | # define HAVE__O_BINARY 1 63 | # endif 64 | #endif 65 | 66 | /** for inline **/ 67 | #ifndef INLINE 68 | # define INLINE inline 69 | #endif 70 | 71 | /** for VC++ warning **/ 72 | #ifdef _MSC_VER 73 | #pragma warning(disable: 4127) 74 | #endif 75 | 76 | 77 | #ifdef __cplusplus 78 | } /* extern "C" */ 79 | #endif /* __cplusplus */ 80 | 81 | #endif /* _CONFIG_H */ 82 | -------------------------------------------------------------------------------- /external/sdsl/COPYING: -------------------------------------------------------------------------------- 1 | The sdsl copyright is as follows: 2 | 3 | Copyright (C) 2007-2014 Simon Gog All Right Reserved. 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see http://www.gnu.org/licenses/ . 17 | -------------------------------------------------------------------------------- /external/sdsl/Make.helper: -------------------------------------------------------------------------------- 1 | SDSL_INCS = $(addprefix include/sdsl/,$(shell ls external/sdsl/include/sdsl)) 2 | SDSL_LIBS = $(addprefix lib/,$(shell ls external/sdsl/lib)) 3 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include(CheckIncludeFile) 2 | include(CheckIncludeFileCXX) 3 | include(CheckTypeSize) 4 | 5 | ## Check for header files ## 6 | check_include_file_CXX(cstdio HAVE_STDIO) 7 | 8 | ## copy hpp files to the binary tree ## 9 | 10 | file(GLOB hppFiles RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp") # select all .hpp-files 11 | 12 | foreach(hppFile ${hppFiles}) # copy each file 13 | configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/${hppFile}" "${CMAKE_CURRENT_BINARY_DIR}/${hppFile}" COPYONLY ) 14 | install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${hppFile}" DESTINATION include/sdsl) 15 | # MESSAGE(${hppFile}) 16 | endforeach(hppFile) 17 | 18 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/bit_vectors.hpp: -------------------------------------------------------------------------------- 1 | /*! \file bit_vectors.hpp 2 | \brief bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations. 3 | \author Simon Gog 4 | */ 5 | #ifndef INCLUDED_SDSL_BITVECTORS 6 | #define INCLUDED_SDSL_BITVECTORS 7 | 8 | #include "int_vector.hpp" 9 | #include "bit_vector_il.hpp" 10 | #include "rrr_vector.hpp" 11 | #include "sd_vector.hpp" 12 | #include "hyb_vector.hpp" 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/bp_support.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2009 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file bp_support.hpp 18 | \brief bp_support.hpp contains several classed which support find_open, find_close, enclose and rr-enclose queries. 19 | \author Simon Gog 20 | */ 21 | 22 | #ifndef INCLUDED_SDSL_BP_SUPPORT 23 | #define INCLUDED_SDSL_BP_SUPPORT 24 | 25 | /** \defgroup bps Balanced Parentheses Supports (BPS) 26 | * This group contains data structures which supports a sdsl::bit_vector with the following methods: 27 | * - find_open 28 | * - find_close 29 | * - enclose 30 | * - double_enclose 31 | * - rank 32 | * - select 33 | * - excess 34 | * - rr_enclose 35 | */ 36 | 37 | #include "bp_support_g.hpp" 38 | #include "bp_support_gg.hpp" 39 | #include "bp_support_sada.hpp" 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/coder.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2008 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file coder.hpp 18 | \brief coder.hpp contains the coder namespace and includes the header files of sdsl::coder::fibonacci, sdsl::coder::elias_delta, and sdsl::coder::run_length 19 | \author Simon Gog 20 | */ 21 | #ifndef SDSL_CODER 22 | #define SDSL_CODER 23 | 24 | #include "int_vector.hpp" 25 | #include "coder_fibonacci.hpp" 26 | #include "coder_elias_delta.hpp" 27 | #include "coder_elias_gamma.hpp" 28 | #include "coder_comma.hpp" 29 | 30 | namespace sdsl 31 | { 32 | 33 | //! Namespace for the different coder of the sdsl. 34 | namespace coder 35 | { 36 | 37 | template 38 | class run_length 39 | { 40 | public: 41 | typedef uint64_t size_type; 42 | static void encode(uint64_t x, uint64_t*& z, uint8_t offset); 43 | static uint64_t encoding_length(const uint64_t* s, uint8_t s_offset, size_type bit_length); 44 | }; 45 | 46 | template 47 | typename run_length::size_type run_length::encoding_length(const uint64_t* s, uint8_t s_offset, size_type bit_length) 48 | { 49 | assert(s_offset < 64); 50 | size_type i=0; 51 | uint64_t w = (*s >> s_offset); 52 | uint8_t last_bit = w&1; 53 | size_type result = 0; 54 | while (i < bit_length) { 55 | size_type len = 0; 56 | while (last_bit == (w&1) and i < bit_length) { 57 | // std::cout<>= 1; 60 | if (s_offset == 64) { 61 | s_offset = 0; 62 | w = *(++s); 63 | } 64 | } 65 | // std::cout<<"len="< 6 | #include 7 | 8 | namespace sdsl 9 | { 10 | namespace conf // namespace for library constant 11 | { 12 | // size of the buffer for reading and writing data in elements (not in bytes) 13 | const uint64_t SDSL_BLOCK_SIZE = (uint64_t)1<<22; 14 | 15 | const char KEY_BWT[] = "bwt"; 16 | const char KEY_BWT_INT[] = "bwt_int"; 17 | const char KEY_SA[] = "sa"; 18 | const char KEY_CSA[] = "csa"; 19 | const char KEY_CST[] = "cst"; 20 | const char KEY_ISA[] = "isa"; 21 | const char KEY_TEXT[] = "text"; 22 | const char KEY_TEXT_INT[] = "text_int"; 23 | const char KEY_PSI[] = "psi"; 24 | const char KEY_LCP[] = "lcp"; 25 | const char KEY_SAMPLE_CHAR[]= "sample_char"; 26 | } 27 | typedef uint64_t int_vector_size_type; 28 | 29 | typedef std::map tMSS; 30 | 31 | enum format_type {JSON_FORMAT, R_FORMAT, HTML_FORMAT}; 32 | 33 | enum byte_sa_algo_type {LIBDIVSUFSORT, SE_SAIS}; 34 | 35 | //! Helper class for construction process 36 | struct cache_config { 37 | bool delete_files; // Flag which indicates if all files which were created 38 | // during construction should be deleted. 39 | std::string dir; // Directory for temporary files. 40 | std::string id; // Identifier is part of temporary file names. If 41 | // id is the empty string, then it will be replace 42 | // a concatenation of PID and a unique ID inside the 43 | // current process. 44 | tMSS file_map; // Files stored during the construction process. 45 | cache_config(bool f_delete_files=true, std::string f_dir="./", std::string f_id="", tMSS f_file_map=tMSS()); 46 | }; 47 | 48 | //! Helper classes to transform width=0 and width=8 to corresponding text key 49 | template 50 | struct key_text_trait { 51 | static const char* KEY_TEXT; 52 | }; 53 | 54 | //! Helper classes to transform width=0 and width=8 to corresponding bwt key 55 | template 56 | struct key_bwt_trait { 57 | static const char* KEY_BWT; 58 | }; 59 | } 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/construct_bwt.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2010 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file construct_bwt.hpp 18 | \brief construct_bwt.hpp contains a space and time efficient construction method for the Burrows and Wheeler Transform (BWT). 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_CONSTRUCT_BWT 22 | #define INCLUDED_SDSL_CONSTRUCT_BWT 23 | 24 | #include "int_vector.hpp" 25 | #include "sfstream.hpp" 26 | #include "util.hpp" 27 | #include "config.hpp" // for cache_config 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | namespace sdsl 34 | { 35 | 36 | //! Constructs the Burrows and Wheeler Transform (BWT) from text over byte- or integer-alphabet and suffix array. 37 | /*! The algorithm constructs the BWT and stores it to disk. 38 | * \tparam t_width Width of the text. 0==integer alphabet, 8=byte alphabet. 39 | * \param config Reference to cache configuration 40 | * \par Space complexity 41 | * \f$ n \log \sigma \f$ bits 42 | * \pre Text and Suffix array exist in the cache. Keys: 43 | * * conf::KEY_TEXT for t_width=8 or conf::KEY_TEXT_INT for t_width=0 44 | * * conf::KEY_SA 45 | * \post BWT exist in the cache. Key 46 | * * conf::KEY_BWT for t_width=8 or conf::KEY_BWT_INT for t_width=0 47 | */ 48 | template 49 | void construct_bwt(cache_config& config) 50 | { 51 | static_assert(t_width == 0 or t_width == 8 , "construct_bwt: width must be `0` for integer alphabet and `8` for byte alphabet"); 52 | 53 | typedef int_vector<>::size_type size_type; 54 | typedef int_vector text_type; 55 | typedef int_vector_buffer bwt_type; 56 | const char* KEY_TEXT = key_text_trait::KEY_TEXT; 57 | const char* KEY_BWT = key_bwt_trait::KEY_BWT; 58 | 59 | // (1) Load text from disk 60 | text_type text; 61 | load_from_cache(text, KEY_TEXT, config); 62 | size_type n = text.size(); 63 | uint8_t bwt_width = text.width(); 64 | 65 | // (2) Prepare to stream SA from disc and BWT to disc 66 | size_type buffer_size = 1000000; // buffer_size is a multiple of 8!, TODO: still true? 67 | int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config), std::ios::in, buffer_size); 68 | std::string bwt_file = cache_file_name(KEY_BWT, config); 69 | bwt_type bwt_buf(bwt_file, std::ios::out, buffer_size, bwt_width); 70 | 71 | // (3) Construct BWT sequentially by streaming SA and random access to text 72 | size_type to_add[2] = {(size_type)-1,n-1}; 73 | for (size_type i=0; i < n; ++i) { 74 | bwt_buf[i] = text[ sa_buf[i]+to_add[sa_buf[i]==0] ]; 75 | } 76 | bwt_buf.close(); 77 | register_cache_file(KEY_BWT, config); 78 | } 79 | 80 | }// end namespace 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/construct_config.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_SDSL_CONSTRUCT_CONFIG 2 | #define INCLUDED_SDSL_CONSTRUCT_CONFIG 3 | 4 | #include "config.hpp" 5 | 6 | namespace sdsl 7 | { 8 | 9 | class construct_config 10 | { 11 | public: 12 | static byte_sa_algo_type byte_algo_sa; 13 | 14 | construct_config() = delete; 15 | }; 16 | 17 | } 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/construct_isa.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2010 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file construct_isa.hpp 18 | \brief construct_isa.hpp contains a space and time efficient construction method for the inverse suffix array 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_CONSTRUCT_ISA 22 | #define INCLUDED_SDSL_CONSTRUCT_ISA 23 | 24 | #include "int_vector.hpp" 25 | #include "util.hpp" 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | namespace sdsl 32 | { 33 | 34 | void construct_isa(cache_config& config); 35 | 36 | }// end namespace 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/construct_lcp_helper.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_SDSL_CONSTRUCT_LCP_HELPER 2 | #define INCLUDED_SDSL_CONSTRUCT_LCP_HELPER 3 | 4 | #include "sdsl/int_vector.hpp" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace sdsl 10 | { 11 | 12 | 13 | void insert_lcp_values(int_vector<>& partial_lcp, bit_vector& index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset); 14 | 15 | template 16 | void create_C_array(std::vector& C, const tWT& wt) 17 | { 18 | uint64_t quantity; // quantity of characters in interval 19 | std::vector cs(wt.sigma); // list of characters in the interval 20 | std::vector rank_c_i(wt.sigma); // number of occurrence of character in [0 .. i-1] 21 | std::vector rank_c_j(wt.sigma); // number of occurrence of character in [0 .. j-1] 22 | 23 | C = std::vector(257, 0); 24 | interval_symbols(wt, 0, wt.size(), quantity, cs, rank_c_i, rank_c_j); 25 | for (uint64_t i=0; i tQ; 39 | private: 40 | static const uint32_t m_buffer_size = 10000;//409600; 41 | uint8_t m_write_buf[m_buffer_size]; 42 | uint8_t m_read_buf[m_buffer_size]; 43 | size_type m_widx; // write index 44 | size_type m_ridx; // read index 45 | bool m_sync; // are read and write buffer the same? 46 | size_type m_disk_buffered_blocks; // number of blocks written to disk and not read again yet 47 | char m_c; 48 | size_type m_rb; // read blocks 49 | size_type m_wb; // written blocks 50 | 51 | std::string m_file_name; 52 | 53 | std::fstream m_stream; 54 | 55 | public: 56 | 57 | buffered_char_queue(); 58 | void init(const std::string& dir, char c); 59 | ~buffered_char_queue(); 60 | void push_back(uint8_t x); 61 | uint8_t pop_front(); 62 | }; 63 | 64 | typedef std::list::size_type> tLI; 65 | typedef std::vector::size_type> tVI; 66 | 67 | template 68 | void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t (&m_chars)[256], size_type_class& m_char_count) 69 | { 70 | if (m_list[c].empty()) { 71 | m_chars[m_char_count++] = c; 72 | } 73 | m_list[c].push_front(i); 74 | } 75 | 76 | template 77 | void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t (&m_chars)[256], size_type_class& m_char_count) 78 | { 79 | if (m_list[c].empty()) { 80 | m_chars[m_char_count++] = c; 81 | } 82 | m_list[c].push_back(i); 83 | } 84 | 85 | void lcp_info(tMSS& file_map); 86 | 87 | } 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/fast_cache.hpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef INCLUDED_SDSL_FAST_CACHE 3 | #define INCLUDED_SDSL_FAST_CACHE 4 | 5 | #include "int_vector.hpp" 6 | 7 | namespace sdsl 8 | { 9 | 10 | #define CACHE_SIZE 0x3FFULL 11 | 12 | struct fast_cache { 13 | typedef int_vector<>::size_type size_type; 14 | size_type m_table[2*(CACHE_SIZE+1)]; 15 | // Constructor 16 | fast_cache() { 17 | for (size_type i=0; i < (CACHE_SIZE+1); ++i) { 18 | m_table[i<<1] = (size_type)-1; 19 | } 20 | } 21 | // Returns true if the request i is cached and 22 | // x is set to the answer of request i 23 | bool exists(size_type i, size_type& x) { 24 | if (m_table[(i&CACHE_SIZE)<<1 ] == i) { 25 | x = m_table[((i&CACHE_SIZE)<<1) + 1 ]; 26 | return true; 27 | } else 28 | return false; 29 | } 30 | // Writes the answer for request i to the cache 31 | void write(size_type i, size_type x) { 32 | m_table[(i&CACHE_SIZE)<<1 ] = i; 33 | m_table[((i&CACHE_SIZE)<<1) + 1 ] = x; 34 | } 35 | }; 36 | 37 | } // end namespace sdsl 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/k2_treap_helper.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2014 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file k2_treap_helper.hpp 18 | \brief k2_treap_helper.hpp contains helper functions and definitions for a k^2-treap implementation. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_K2_TREAP_HELPER 22 | #define INCLUDED_SDSL_K2_TREAP_HELPER 23 | 24 | #include "sdsl/vectors.hpp" 25 | #include "sdsl/bits.hpp" 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | //! Namespace for the succinct data structure library. 35 | namespace sdsl 36 | { 37 | 38 | namespace k2_treap_ns 39 | { 40 | 41 | // Precomputed value for fast k^2 treap operations 42 | template 43 | struct precomp { 44 | static struct impl { 45 | uint64_t exp[65]; 46 | impl() 47 | { 48 | exp[0] = 1; 49 | for (uint8_t i=1; i<65; ++i) { 50 | exp[i] = t_k * exp[i-1]; 51 | } 52 | } 53 | } data; 54 | 55 | static uint64_t exp(uint8_t l) 56 | { 57 | return data.exp[l]; 58 | } 59 | 60 | static uint64_t divexp(uint64_t x, uint8_t l) 61 | { 62 | return x/data.exp[l]; 63 | } 64 | 65 | static uint64_t modexp(uint64_t x, uint8_t l) 66 | { 67 | return x%data.exp[l]; 68 | } 69 | }; 70 | 71 | template<> 72 | struct precomp<2> { 73 | static uint64_t exp(uint8_t l) 74 | { 75 | return 1ULL<>l; 81 | } 82 | 83 | static uint64_t modexp(uint64_t x, uint8_t l) 84 | { 85 | return x & bits::lo_set[l]; 86 | } 87 | }; 88 | 89 | template<> 90 | struct precomp<4> { 91 | static uint64_t exp(uint8_t l) 92 | { 93 | return 1ULL<<(2*l); 94 | } 95 | 96 | static uint64_t divexp(uint64_t x, uint8_t l) 97 | { 98 | return x>>(2*l); 99 | } 100 | 101 | static uint64_t modexp(uint64_t x, uint8_t l) 102 | { 103 | return x & bits::lo_set[2*l]; 104 | } 105 | }; 106 | 107 | template<> 108 | struct precomp<8> { 109 | static uint64_t exp(uint8_t l) 110 | { 111 | return 1ULL<<(3*l); 112 | } 113 | 114 | static uint64_t divexp(uint64_t x, uint8_t l) 115 | { 116 | return x>>(3*l); 117 | } 118 | 119 | static uint64_t modexp(uint64_t x, uint8_t l) 120 | { 121 | return x & bits::lo_set[3*l]; 122 | } 123 | }; 124 | 125 | template<> 126 | struct precomp<16> { 127 | static uint64_t exp(uint8_t l) 128 | { 129 | return 1ULL<<(4*l); 130 | } 131 | 132 | static uint64_t divexp(uint64_t x, uint8_t l) 133 | { 134 | return x>>(4*l); 135 | } 136 | 137 | static uint64_t modexp(uint64_t x, uint8_t l) 138 | { 139 | return x & bits::lo_set[4*l]; 140 | } 141 | }; 142 | 143 | 144 | template 145 | typename precomp::impl precomp::data; 146 | 147 | 148 | 149 | typedef std::complex t_p; 150 | typedef t_p point_type; 151 | typedef t_p range_type; 152 | 153 | struct node_type { 154 | uint8_t t; // level; size of node 1< real(v.max_p); 177 | } 178 | return imag(max_p) > imag(v.max_p); 179 | } 180 | }; 181 | 182 | } // end namepsace k2_treap_ns 183 | 184 | } // end nomespace sdsl 185 | #endif 186 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/k2_tree_helper.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2016 Francisco Montoto 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file k2_tree_helper.hpp 18 | \brief k2_tree_helper.hpp contains helper functions and definitions for a k^2-tree implementation. 19 | \author Francisco Montoto 20 | */ 21 | #ifndef INCLUDED_SDSL_K2_TREE_HELPER 22 | #define INCLUDED_SDSL_K2_TREE_HELPER 23 | 24 | #include 25 | #include 26 | 27 | #include "sdsl/bit_vectors.hpp" 28 | 29 | //! Namespace for the succinct data structure library. 30 | namespace sdsl 31 | { 32 | 33 | //! Namespace for the k2_tree 34 | namespace k2_tree_ns 35 | { 36 | 37 | typedef int_vector<>::size_type idx_type; 38 | typedef int_vector<>::size_type size_type; 39 | 40 | template 41 | int _build_from_matrix(const std::vector>& matrix, 42 | const uint8_t k, int n, const int height, 43 | int l, int p, int q, std::vector>& acc) 44 | { 45 | unsigned i, j, b_size = pow(k, 2); 46 | t_bv b(b_size, 0); 47 | bool is_leaf = (l == height); 48 | 49 | if (is_leaf) { 50 | for (i = 0; i < k; i++) 51 | for (j = 0; j < k; j++) 52 | if (p + i < matrix.size() 53 | && q + j < matrix.size() 54 | && matrix[p + i][q + j] == 1) 55 | b[i * k + j] = 1; 56 | } else { // Internal node 57 | for (i = 0; i < k; i++) 58 | for (j = 0; j < k; j++) 59 | b[i * k + j] = _build_from_matrix(matrix, k, n/k, height, l + 1, 60 | p + i * (n/k), q + j * (n/k), 61 | acc); 62 | } 63 | 64 | // TODO There must be a better way to check if there is a 1 at b. 65 | for (i = 0; i < b_size; i++) 66 | if (b[i] == 1) 67 | break; 68 | if (i == b_size) // If there are not 1s at b. 69 | return 0; 70 | 71 | acc[l].push_back(std::move(b)); 72 | return 1; 73 | } 74 | 75 | /*! Get the chunk index ([0, k^2[) of a submatrix point. 76 | * 77 | * Gets a point in the global matrix and returns its corresponding chunk 78 | * in the submatrix specified. 79 | * 80 | * \param v Row of the point in the global matrix. 81 | * \param u Column of the point in the global matrix. 82 | * \param c_0 Column offset of the submatix in the global matrix. 83 | * \param r_0 Row offset of the submatrix in the global matrix. 84 | * \param l size of the chunk at the submatrix. 85 | * \param k the k parameter from the k^2 tree. 86 | * \returns the index of the chunk containing the point at the submatrix. 87 | */ 88 | inline uint16_t get_chunk_idx(idx_type v, idx_type u, idx_type c_0, 89 | idx_type r_0, size_type l, uint8_t k) 90 | { 91 | return ((v - r_0) / l) * k + (u - c_0) / l; 92 | } 93 | 94 | template void build_template_vector(bit_vector& k_t_, 95 | bit_vector& k_l_, t_bv& k_t, t_bv& k_l) 96 | { 97 | k_t = t_bv(k_t_); 98 | k_l = t_bv(k_l_); 99 | } 100 | 101 | template<> void build_template_vector(bit_vector& k_t_, 102 | bit_vector& k_l_, 103 | bit_vector& k_t, 104 | bit_vector& k_l) 105 | { 106 | k_t.swap(k_t_); 107 | k_l.swap(k_l_); 108 | } 109 | 110 | } // end namespace k2_tree_ns 111 | } // end namespace sdsl 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/lcp_dac.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2011-2014 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file lcp_dac.hpp 18 | \brief lcp_dac.hpp contains an implementation of a (compressed) LCP array. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_LCP_DAC 22 | #define INCLUDED_SDSL_LCP_DAC 23 | 24 | #include "lcp.hpp" 25 | #include "vectors.hpp" 26 | #include "rank_support_v5.hpp" 27 | 28 | namespace sdsl 29 | { 30 | 31 | //! A class for the compressed version of LCP information of an suffix array 32 | /*! A dac_vector is used to compress represent the values compressed. 33 | * The template parameter are forwarded to the dac_vector. 34 | * \tparam t_b Split block size. 35 | * \tparam t_rank Rank structure to navigate between the different levels. 36 | */ 37 | template> 39 | using lcp_dac = lcp_vlc>; 40 | 41 | template 42 | using lcp_dac_dp = lcp_vlc>; 43 | 44 | } // end namespace sdsl 45 | #endif 46 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/ram_filebuf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_SDSL_RAM_FSTREAMBUF 2 | #define INCLUDED_SDSL_RAM_FSTREAMBUF 3 | 4 | #include 5 | #include 6 | #include "ram_fs.hpp" 7 | 8 | namespace sdsl 9 | { 10 | 11 | class ram_filebuf : public std::streambuf 12 | { 13 | private: 14 | ram_fs::content_type* m_ram_file = nullptr; // file handle 15 | void pbump64(std::ptrdiff_t); 16 | 17 | public: 18 | virtual ~ram_filebuf(); 19 | 20 | ram_filebuf(); 21 | ram_filebuf(std::vector& ram_file); 22 | 23 | std::streambuf* 24 | open(const std::string s, std::ios_base::openmode mode); 25 | 26 | bool is_open(); 27 | 28 | ram_filebuf* 29 | close(); 30 | 31 | pos_type 32 | seekpos(pos_type sp, 33 | std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override; 34 | 35 | pos_type 36 | pubseekoff(off_type off, std::ios_base::seekdir way, 37 | std::ios_base::openmode which = std::ios_base::in | std::ios_base::out); 38 | 39 | pos_type 40 | pubseekpos(pos_type sp, 41 | std::ios_base::openmode which = std::ios_base::in | std::ios_base::out); 42 | 43 | 44 | // std::streamsize 45 | // xsputn(const char_type* s, std::streamsize n) override; 46 | 47 | int 48 | sync() override; 49 | 50 | int_type 51 | overflow(int_type c = traits_type::eof()) override; 52 | }; 53 | 54 | } 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/ram_fs.hpp: -------------------------------------------------------------------------------- 1 | /*! \file ram_fs.hpp 2 | * \brief ram_fs.hpp 3 | * \author Simon Gog 4 | */ 5 | #ifndef INCLUDED_SDSL_RAM_FS 6 | #define INCLUDED_SDSL_RAM_FS 7 | 8 | #include "uintx_t.hpp" 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace sdsl 15 | { 16 | 17 | class ram_fs_initializer 18 | { 19 | public: 20 | ram_fs_initializer(); 21 | ~ram_fs_initializer(); 22 | }; 23 | 24 | } // end namespace sdsl 25 | 26 | 27 | static sdsl::ram_fs_initializer init_ram_fs; 28 | 29 | namespace sdsl 30 | { 31 | 32 | 33 | //! ram_fs is a simple store for RAM-files. 34 | /*! 35 | * Simple key-value store which maps file names 36 | * (strings) to file content (content_type). 37 | */ 38 | class ram_fs 39 | { 40 | public: 41 | typedef std::vector content_type; 42 | 43 | private: 44 | friend class ram_fs_initializer; 45 | typedef std::map mss_type; 46 | static mss_type m_map; 47 | static std::recursive_mutex m_rlock; 48 | 49 | public: 50 | //! Default construct 51 | ram_fs(); 52 | static void store(const std::string& name, content_type data); 53 | //! Check if the file exists 54 | static bool exists(const std::string& name); 55 | //! Get the file size 56 | static size_t file_size(const std::string& name); 57 | //! Get the content 58 | static content_type& content(const std::string& name); 59 | //! Remove the file with key `name` 60 | static int remove(const std::string& name); 61 | //! Rename the file. Change key `old_filename` into `new_filename`. 62 | static int rename(const std::string old_filename, const std::string new_filename); 63 | }; 64 | 65 | //! Determines if the given file is a RAM-file. 66 | bool is_ram_file(const std::string& file); 67 | 68 | //! Returns the corresponding RAM-file name for file. 69 | std::string ram_file_name(const std::string& file); 70 | 71 | //! Returns for a RAM-file the corresponding disk file name 72 | std::string disk_file_name(const std::string& file); 73 | 74 | //! Remove a file. 75 | int remove(const std::string& file); 76 | 77 | //! Rename a file 78 | int rename(const std::string& old_filename, const std::string& new_filename); 79 | 80 | } // end namespace sdsl 81 | #endif 82 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/rank_support_scan.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2012 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file rank_support_scan.hpp 18 | \brief rank_support_scan.hpp contains rank_support_scan that support a sdsl::bit_vector with linear time rank information. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_RANK_SUPPORT_SCAN 22 | #define INCLUDED_SDSL_RANK_SUPPORT_SCAN 23 | 24 | #include "rank_support.hpp" 25 | 26 | //! Namespace for the succinct data structure library. 27 | namespace sdsl 28 | { 29 | 30 | //! A class supporting rank queries in linear time. 31 | /*! \par Space complexity 32 | * Constant. 33 | * \par Time complexity 34 | * Linear in the size of the supported vector. 35 | * 36 | * \tparam t_b Bit pattern which should be supported. Either `0`,`1`,`10`,`01`. 37 | * \tparam t_pat_len Length of the bit pattern. 38 | * @ingroup rank_support_group 39 | */ 40 | template 41 | class rank_support_scan : public rank_support 42 | { 43 | private: 44 | static_assert(t_b == 1u or t_b == 0u or t_b == 10u or t_b == 11u, "rank_support_scan: bit pattern must be `0`,`1`,`10` or `01`"); 45 | static_assert(t_pat_len == 1u or t_pat_len == 2u , "rank_support_scan: bit pattern length must be 1 or 2"); 46 | public: 47 | typedef bit_vector bit_vector_type; 48 | enum { bit_pat = t_b }; 49 | enum { bit_pat_len = t_pat_len }; 50 | public: 51 | explicit rank_support_scan(const bit_vector* v = nullptr) 52 | { 53 | set_vector(v); 54 | } 55 | rank_support_scan(const rank_support_scan& rs) 56 | { 57 | set_vector(rs.m_v); 58 | } 59 | size_type rank(size_type idx) const; 60 | size_type operator()(size_type idx)const 61 | { 62 | return rank(idx); 63 | }; 64 | size_type size()const 65 | { 66 | return m_v->size(); 67 | }; 68 | size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const 69 | { 70 | return serialize_empty_object(out, v, name, this); 71 | } 72 | void load(std::istream&, const int_vector<1>* v=nullptr) 73 | { 74 | set_vector(v); 75 | } 76 | void set_vector(const bit_vector* v=nullptr) 77 | { 78 | m_v=v; 79 | } 80 | 81 | //! Assign Operator 82 | rank_support_scan& operator=(const rank_support_scan& rs) 83 | { 84 | set_vector(rs.m_v); 85 | return *this; 86 | } 87 | 88 | //! swap Operator 89 | void swap(rank_support_scan&) {} 90 | }; 91 | 92 | template 93 | inline typename rank_support_scan::size_type rank_support_scan::rank(size_type idx)const 94 | { 95 | assert(m_v != nullptr); 96 | assert(idx <= m_v->size()); 97 | const uint64_t* p = m_v->data(); 98 | size_type i = 0; 99 | size_type result = 0; 100 | while (i+64 <= idx) { 101 | result += rank_support_trait::full_word_rank(p, i); 102 | i += 64; 103 | } 104 | return result+rank_support_trait::word_rank(p, idx); 105 | } 106 | 107 | }// end namespace sds 108 | 109 | #endif // end file 110 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/raster_img.hpp: -------------------------------------------------------------------------------- 1 | #ifndef SDSL_INC_RASTER_IMG 2 | #define SDSL_INC_RASTER_IMG 3 | 4 | #include 5 | 6 | namespace sdsl 7 | { 8 | 9 | struct raster_img { 10 | typedef uint64_t size_type; 11 | 12 | uint64_t max_x; // max x value 13 | uint64_t max_y; // max y value 14 | uint64_t max_z; // max z value in the compacted range 15 | uint32_t offset; 16 | bit_vector value_map; 17 | int_vector<> data; 18 | 19 | //! Serializes the data structure into the given ostream 20 | uint64_t serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const; 21 | 22 | //! Loads the data structure from the given istream. 23 | void load(std::istream& in); 24 | }; 25 | 26 | } 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/rmq_support.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2009 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file rmq_support.hpp 18 | \brief rmq_support.hpp contains different range minimum support data structures. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_RMQ_SUPPORT 22 | #define INCLUDED_SDSL_RMQ_SUPPORT 23 | 24 | /** \defgroup rmq_group Range Minimum/Maximum Support (RMS) */ 25 | 26 | template // for range minimum queries 27 | struct min_max_trait { 28 | static inline bool strict_compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) { 29 | return v1 < v2; 30 | } 31 | static inline bool compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) { 32 | return v1 <= v2; 33 | } 34 | }; 35 | 36 | template // for range maximum queries 37 | struct min_max_trait { 38 | static inline bool strict_compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) { 39 | return v1 > v2; 40 | } 41 | static inline bool compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) { 42 | return v1 >= v2; 43 | } 44 | }; 45 | 46 | #include "rmq_support_sparse_table.hpp" 47 | #include "rmq_succinct_sct.hpp" 48 | #include "rmq_succinct_sada.hpp" 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/sdsl_concepts.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2010 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file sdsl_concepts.hpp 18 | \brief Contains declarations and definitions of data structure concepts. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_CONCEPTS 22 | #define INCLUDED_SDSL_CONCEPTS 23 | 24 | #include "uintx_t.hpp" // for uint8_t 25 | 26 | namespace sdsl 27 | { 28 | 29 | struct bv_tag {}; // bitvector tag 30 | struct iv_tag {}; // int_vector tag 31 | 32 | struct csa_tag {}; // compressed suffix array (CSAs) tag 33 | struct cst_tag {}; // compressed suffix tree (CST) tag 34 | struct wt_tag {}; // wavelet tree tag 35 | 36 | struct psi_tag {}; // tag for CSAs based on the psi function 37 | struct lf_tag {}; // tag for CSAs based on the LF function 38 | 39 | struct csa_member_tag {}; // tag for text, bwt, LF, \Psi members of CSA 40 | 41 | struct lcp_tag {}; 42 | struct lcp_plain_tag {}; 43 | struct lcp_permuted_tag {}; 44 | struct lcp_tree_compressed_tag {}; 45 | struct lcp_tree_and_lf_compressed_tag {}; 46 | 47 | struct alphabet_tag {}; 48 | struct byte_alphabet_tag { static const uint8_t WIDTH=8; }; 49 | struct int_alphabet_tag { static const uint8_t WIDTH=0; }; 50 | 51 | struct sa_sampling_tag {}; 52 | struct isa_sampling_tag {}; 53 | 54 | 55 | template 56 | struct enable_if_type { 57 | typedef t_r type; 58 | }; 59 | 60 | template 61 | struct index_tag { 62 | typedef t_enable type; 63 | }; 64 | 65 | template 66 | struct index_tag::type> { 67 | using type = typename t_idx::index_category; 68 | }; 69 | 70 | template 71 | struct sampling_tag { 72 | typedef t_enable type; 73 | }; 74 | 75 | template 76 | struct sampling_tag::type> { 77 | using type = typename t_sampling::sampling_category; 78 | }; 79 | 80 | template 81 | struct is_enc_vec { 82 | static const bool value = false; 83 | }; 84 | 85 | template 86 | struct is_enc_vec::type> { 87 | static const bool value = true; 88 | }; 89 | 90 | template 91 | struct is_alphabet { 92 | static const bool value = false; 93 | }; 94 | 95 | template 96 | struct is_alphabet::type> { 97 | static const bool value = true; 98 | }; 99 | 100 | } // end namespace sdsl 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/select_support_scan.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2012 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file select_support_scan.hpp 18 | \brief select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_SELECT_SUPPORT_SCAN 22 | #define INCLUDED_SDSL_SELECT_SUPPORT_SCAN 23 | 24 | #include "int_vector.hpp" 25 | #include "util.hpp" 26 | #include "select_support.hpp" 27 | 28 | //! Namespace for the succinct data structure library. 29 | namespace sdsl 30 | { 31 | 32 | 33 | //! A class supporting linear time select queries. 34 | /*! \par Space complexity 35 | * Constant. 36 | * \par Time complexity 37 | * Linear in the size of the supported vector. 38 | * 39 | * \tparam t_b Bit pattern which should be supported. Either `0`,`1`,`10`,`01`. 40 | * \tparam t_pat_len Length of the bit pattern. 41 | * @ingroup select_support_group 42 | */ 43 | template 44 | class select_support_scan : public select_support 45 | { 46 | private: 47 | static_assert(t_b == 1u or t_b == 0u or t_b == 10u , "select_support_scan: bit pattern must be `0`,`1`,`10` or `01`"); 48 | static_assert(t_pat_len == 1u or t_pat_len == 2u , "select_support_scan: bit pattern length must be 1 or 2"); 49 | public: 50 | typedef bit_vector bit_vector_type; 51 | enum { bit_pat = t_b }; 52 | public: 53 | explicit select_support_scan(const bit_vector* v=nullptr) : select_support(v) {} 54 | select_support_scan(const select_support_scan& ss) : select_support(ss.m_v) {} 55 | 56 | inline size_type select(size_type i) const; 57 | inline size_type operator()(size_type i)const 58 | { 59 | return select(i); 60 | } 61 | size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const 62 | { 63 | return serialize_empty_object(out, v, name, this); 64 | } 65 | void load(std::istream&, SDSL_UNUSED const bit_vector* v=nullptr) 66 | { 67 | set_vector(v); 68 | } 69 | 70 | void set_vector(const bit_vector* v=nullptr) 71 | { 72 | m_v = v; 73 | } 74 | select_support_scan& operator=(const select_support_scan& ss) 75 | { 76 | set_vector(ss.m_v); 77 | return *this; 78 | } 79 | void swap(select_support_scan&) {} 80 | }; 81 | 82 | template 83 | inline typename select_support_scan::size_type select_support_scan::select(size_type i)const 84 | { 85 | const uint64_t* data = m_v->data(); 86 | size_type word_pos = 0; 87 | size_type word_off = 0; 88 | uint64_t carry = select_support_trait::init_carry(data, word_pos); 89 | size_type args = select_support_trait::args_in_the_first_word(*data, word_off, carry); 90 | if (args >= i) { 91 | return (word_pos<<6)+select_support_trait::ith_arg_pos_in_the_first_word(*data, i, word_off, carry); 92 | } 93 | word_pos+=1; 94 | size_type sum_args = args; 95 | carry = select_support_trait::get_carry(*data); 96 | uint64_t old_carry = carry; 97 | args = select_support_trait::args_in_the_word(*(++data), carry); 98 | while (sum_args + args < i) { 99 | sum_args += args; 100 | assert(data+1 < m_v->data() + (m_v->capacity()>>6)); 101 | old_carry = carry; 102 | args = select_support_trait::args_in_the_word(*(++data), carry); 103 | word_pos+=1; 104 | } 105 | return (word_pos<<6) + select_support_trait::ith_arg_pos_in_the_word(*data, i-sum_args, old_carry); 106 | } 107 | 108 | } // end namespace 109 | #endif 110 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/sfstream.hpp: -------------------------------------------------------------------------------- 1 | /*!\file sfstream.hpp 2 | \brief sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings. 3 | \author Simon Gog 4 | */ 5 | #ifndef INCLUDED_SDSL_SFSTREAM 6 | #define INCLUDED_SDSL_SFSTREAM 7 | 8 | #include 9 | #include 10 | #include 11 | #include "sdsl/ram_fs.hpp" 12 | #include "sdsl/ram_filebuf.hpp" 13 | 14 | namespace sdsl 15 | { 16 | 17 | class osfstream : public std::ostream 18 | { 19 | public: 20 | typedef std::streambuf* buf_ptr_type; 21 | private: 22 | buf_ptr_type m_streambuf = nullptr; 23 | std::string m_file = ""; 24 | public: 25 | typedef void* voidptr; 26 | //! Standard constructor. 27 | osfstream(); 28 | //! Constructor taking a file name and open mode. 29 | osfstream(const std::string& file, std::ios_base::openmode mode = std::ios_base::out); 30 | //! Open the stream. 31 | buf_ptr_type 32 | open(const std::string& file, std::ios_base::openmode mode = std::ios_base::out); 33 | //! Is the stream close? 34 | bool is_open(); 35 | //! Close the stream. 36 | void close(); 37 | //! Standard destructor 38 | ~osfstream(); 39 | //! Cast to void* 40 | operator voidptr() const; 41 | 42 | osfstream& seekp(pos_type pos); 43 | osfstream& seekp(off_type off, ios_base::seekdir way); 44 | std::streampos tellp(); 45 | }; 46 | 47 | 48 | class isfstream : public std::istream 49 | { 50 | typedef std::streambuf* buf_ptr_type; 51 | private: 52 | buf_ptr_type m_streambuf = nullptr; 53 | std::string m_file = ""; 54 | public: 55 | typedef void* voidptr; 56 | //! Standard constructor. 57 | isfstream(); 58 | //! Constructor taking a file name and open mode. 59 | isfstream(const std::string& file, std::ios_base::openmode mode = std::ios_base::in); 60 | //! Open the stream. 61 | buf_ptr_type 62 | open(const std::string& file, std::ios_base::openmode mode = std::ios_base::in); 63 | //! Is the stream close? 64 | bool is_open(); 65 | //! Close the stream. 66 | void close(); 67 | //! Standard destructor 68 | ~isfstream(); 69 | //! Cast to void* 70 | operator voidptr() const; 71 | 72 | isfstream& seekg(pos_type pos); 73 | isfstream& seekg(off_type off, ios_base::seekdir way); 74 | std::streampos tellg(); 75 | }; 76 | 77 | } // end namespace 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/structure_tree.hpp: -------------------------------------------------------------------------------- 1 | /*!\file structure_tree.hpp 2 | \brief structure_tree.hpp contains a helper class which can represent the memory structure of a class. 3 | \author Simon Gog 4 | */ 5 | #ifndef INCLUDED_SDSL_STRUCTURE_TREE 6 | #define INCLUDED_SDSL_STRUCTURE_TREE 7 | 8 | #include "uintx_t.hpp" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "config.hpp" 15 | 16 | //! Namespace for the succinct data structure library 17 | namespace sdsl 18 | { 19 | 20 | class structure_tree_node 21 | { 22 | private: 23 | using map_type = std::unordered_map>; 24 | map_type m_children; 25 | public: 26 | const map_type& children = m_children; 27 | size_t size = 0; 28 | std::string name; 29 | std::string type; 30 | public: 31 | structure_tree_node(const std::string& n, const std::string& t) : name(n) , type(t) {} 32 | structure_tree_node* add_child(const std::string& n, const std::string& t) { 33 | auto hash = n+t; 34 | auto child_itr = m_children.find(hash); 35 | if (child_itr == m_children.end()) { 36 | // add new child as we don't have one of this type yet 37 | structure_tree_node* new_node = new structure_tree_node(n,t); 38 | m_children[hash] = std::unique_ptr(new_node); 39 | return new_node; 40 | } else { 41 | // child of same type and name exists 42 | return (*child_itr).second.get(); 43 | } 44 | } 45 | void add_size(size_t s) { size += s; } 46 | }; 47 | 48 | class structure_tree 49 | { 50 | public: 51 | static structure_tree_node* add_child(structure_tree_node* v, const std::string& name, const std::string& type) { 52 | if (v) return v->add_child(name,type); 53 | return nullptr; 54 | }; 55 | static void add_size(structure_tree_node* v, uint64_t value) { 56 | if (v) v->add_size(value); 57 | }; 58 | }; 59 | 60 | 61 | template 62 | void write_structure_tree(const structure_tree_node* v, std::ostream& out, size_t level = 0); 63 | 64 | 65 | } 66 | #endif 67 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/suffix_arrays.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2008 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file suffix_arrays.hpp 18 | \brief suffix_arrays.hpp contains generic classes for different suffix array classes. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_SUFFIX_ARRAYS 22 | #define INCLUDED_SDSL_SUFFIX_ARRAYS 23 | 24 | #include "sdsl_concepts.hpp" 25 | 26 | /** \defgroup csa Compressed Suffix Arrays (CSA) */ 27 | 28 | #include "csa_bitcompressed.hpp" 29 | #include "csa_wt.hpp" 30 | #include "csa_sada.hpp" 31 | #include "wavelet_trees.hpp" 32 | #include "construct.hpp" 33 | #include "suffix_array_algorithm.hpp" 34 | 35 | namespace sdsl 36 | { 37 | 38 | //! Typedef for convenient usage of std integer alphabet strategy 39 | template, 40 | uint32_t t_dens = 32, 41 | uint32_t t_inv_dens = 64, 42 | class t_sa_sample_strat = sa_order_sa_sampling<>, 43 | class t_isa_sample_strat = isa_sampling<> 44 | > 45 | using csa_wt_int = csa_wt>; 46 | 47 | template, // Vector type used to store the Psi-function 48 | uint32_t t_dens = 32, // Sample density for suffix array (SA) values 49 | uint32_t t_inv_dens = 64, // Sample density for inverse suffix array (ISA) values 50 | class t_sa_sample_strat = sa_order_sa_sampling<>,// Policy class for the SA sampling. Alternative text_order_sa_sampling. 51 | class t_isa_sample_strat = isa_sampling<> // Policy class for the ISA sampling. 52 | > 53 | using csa_sada_int = csa_sada>; 54 | 55 | } 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/suffix_trees.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2009 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file suffix_trees.hpp 18 | \brief suffix_trees.hpp contains generic classes for different suffix tree classes. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_SUFFIX_TREES 22 | #define INCLUDED_SDSL_SUFFIX_TREES 23 | 24 | /** \defgroup cst Compressed Suffix Trees (CST) 25 | * This group contains data structures for compressed suffix trees. The following methods are supported: 26 | * - root() 27 | * - child(v,c) 28 | * - select_child(v) 29 | * - select_leaf(i) 30 | * - parent(v) 31 | * - sl(v) 32 | * - lca(v,w) 33 | * - .. 34 | */ 35 | 36 | #include "cst_sct3.hpp" 37 | #include "cst_sada.hpp" 38 | #include "cst_fully.hpp" 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/uintx_t.hpp: -------------------------------------------------------------------------------- 1 | #ifndef INCLUDED_SDSL_UINTX_T 2 | #define INCLUDED_SDSL_UINTX_T 3 | 4 | #include 5 | 6 | using std::int8_t; 7 | using std::int16_t; 8 | using std::int32_t; 9 | using std::int64_t; 10 | 11 | using std::uint8_t; 12 | using std::uint16_t; 13 | using std::uint32_t; 14 | using std::uint64_t; 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/vectors.hpp: -------------------------------------------------------------------------------- 1 | /** \defgroup int_vector int_vector */ 2 | #ifndef SDSL_INCLUDED_VECTORS 3 | #define SDSL_INCLUDED_VECTORS 4 | 5 | #include "int_vector.hpp" 6 | #include "enc_vector.hpp" 7 | #include "vlc_vector.hpp" 8 | #include "dac_vector.hpp" 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /external/sdsl/include/sdsl/wavelet_trees.hpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2011 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | /*! \file wavelet_trees.hpp 18 | \brief wavelet_trees.hpp contains wavelet tree implementations. 19 | \author Simon Gog 20 | */ 21 | #ifndef INCLUDED_SDSL_WAVELET_TREES 22 | #define INCLUDED_SDSL_WAVELET_TREES 23 | 24 | /** \defgroup wt Wavelet Trees (WT) 25 | * This group contains data structures for wavelet trees. The following methods are supported: 26 | * - []-operator 27 | * - rank(i, c) 28 | * - select(i, c) 29 | * - inverse_select(i) 30 | */ 31 | 32 | #include "wt_pc.hpp" 33 | #include "wt_blcd.hpp" 34 | #include "wt_gmr.hpp" 35 | #include "wt_huff.hpp" 36 | #include "wt_hutu.hpp" 37 | #include "wt_int.hpp" 38 | #include "wm_int.hpp" 39 | #include "wt_rlmn.hpp" 40 | #include "wt_ap.hpp" 41 | #include "construct.hpp" 42 | #include "wt_algorithm.hpp" 43 | 44 | namespace sdsl 45 | { 46 | 47 | template 52 | using wt_hutu_int = wt_pc>; 58 | 59 | template 63 | using wt_huff_int = wt_pc>; 69 | 70 | template 74 | using wt_blcd_int = wt_pc>; 80 | } 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /external/sdsl/lib/coder_fibonacci.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/coder_fibonacci.hpp" 2 | 3 | namespace sdsl 4 | { 5 | 6 | namespace coder 7 | { 8 | 9 | fibonacci::impl fibonacci::data; 10 | 11 | uint64_t fibonacci::decode_prefix_sum(const uint64_t* d, const size_type start_idx, size_type n) 12 | { 13 | if (n==0) 14 | return 0; 15 | // return decode(data, start_idx, n); 16 | d += (start_idx >> 6); 17 | size_type i = 0; 18 | int32_t bits_to_decode = 0; 19 | uint64_t w = 0, value = 0; 20 | int16_t buffered = 0, read = start_idx & 0x3F, shift = 0; 21 | uint16_t temp=0; 22 | uint64_t carry=0; 23 | i = bits::cnt11(*d & ~bits::lo_set[read], carry); 24 | if (i=n 34 | bits_to_decode = bits::sel11(*d >> read, n)+1; 35 | } 36 | if (((size_type)bits_to_decode) == n<<1) 37 | return n; 38 | if (((size_type)bits_to_decode) == (n<<1)+1) 39 | return n+1; 40 | i = 0; 41 | // while( bits_to_decode > 0 or buffered > 0){// while not all values are decoded 42 | do { 43 | while (buffered < 64 and bits_to_decode > 0) { 44 | w |= (((*d)>>read)<= buffered) { 46 | ++d; 47 | buffered += 64-read; 48 | bits_to_decode -= (64-read); 49 | read = 0; 50 | } else { // read buffered 51 | read += 64-buffered; 52 | bits_to_decode -= (64-buffered); 53 | buffered = 64; 54 | } 55 | if (bits_to_decode < 0) { 56 | buffered += bits_to_decode; 57 | w &= bits::lo_set[buffered]; 58 | bits_to_decode = 0; 59 | } 60 | } 61 | if (!i) { // try do decode multiple values 62 | if ((w&0xFFFFFF)==0xFFFFFF) { 63 | value += 12; 64 | w >>= 24; 65 | buffered -= 24; 66 | if ((w&0xFFFFFF)==0xFFFFFF) { 67 | value += 12; 68 | w >>= 24; 69 | buffered -= 24; 70 | } 71 | } 72 | do { 73 | temp = fibonacci::data.fib2bin_16_greedy[w&0xFFFF]; 74 | if ((shift=(temp>>11)) > 0) { 75 | value += (temp & 0x7FFULL); 76 | w >>= shift; 77 | buffered -= shift; 78 | } else { 79 | value += fibonacci::data.fib2bin_0_95[w&0xFFF]; 80 | w >>= 12; 81 | buffered -= 12; 82 | i = 1; 83 | break; 84 | } 85 | } while (buffered>15); 86 | } else { // i > 0 87 | value += fibonacci::data.fib2bin_0_95[(i<<12) | (w&0xFFF)]; 88 | shift = fibonacci::data.fib2bin_shift[w&0x1FFF]; 89 | if (shift > 0) { // if end of decoding 90 | w >>= shift; 91 | buffered -= shift; 92 | i = 0; 93 | } else { // not end of decoding 94 | w >>= 12; 95 | buffered -= 12; 96 | ++i; 97 | } 98 | } 99 | } while (bits_to_decode > 0 or buffered > 0); 100 | return value; 101 | } 102 | 103 | uint64_t fibonacci::decode_prefix_sum(const uint64_t* d, const size_type start_idx, SDSL_UNUSED const size_type end_idx, size_type n) 104 | { 105 | return decode_prefix_sum(d, start_idx, n); 106 | } 107 | 108 | } // end namespace coder 109 | } // end namespace sdsl 110 | -------------------------------------------------------------------------------- /external/sdsl/lib/config.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/config.hpp" 2 | #include "sdsl/util.hpp" 3 | 4 | namespace sdsl 5 | { 6 | cache_config::cache_config(bool f_delete_files, std::string f_dir, std::string f_id, tMSS f_file_map) : delete_files(f_delete_files), dir(f_dir), id(f_id), file_map(f_file_map) 7 | { 8 | if ("" == id) { 9 | id = util::to_string(util::pid())+"_"+util::to_string(util::id()); 10 | } 11 | } 12 | 13 | template<> 14 | const char* key_text_trait<0>::KEY_TEXT = conf::KEY_TEXT_INT; 15 | template<> 16 | const char* key_text_trait<8>::KEY_TEXT = conf::KEY_TEXT; 17 | 18 | template<> 19 | const char* key_bwt_trait<0>::KEY_BWT = conf::KEY_BWT_INT; 20 | template<> 21 | const char* key_bwt_trait<8>::KEY_BWT = conf::KEY_BWT; 22 | 23 | }// end namespace sdsl 24 | -------------------------------------------------------------------------------- /external/sdsl/lib/construct_config.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/construct_config.hpp" 2 | 3 | namespace sdsl 4 | { 5 | 6 | byte_sa_algo_type construct_config::byte_algo_sa = LIBDIVSUFSORT; 7 | 8 | } 9 | -------------------------------------------------------------------------------- /external/sdsl/lib/construct_isa.cpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2010 Simon Gog 3 | */ 4 | #include "sdsl/construct_isa.hpp" 5 | #include 6 | 7 | namespace sdsl 8 | { 9 | 10 | void construct_isa(cache_config& config) 11 | { 12 | typedef int_vector<>::size_type size_type; 13 | if (!cache_file_exists(conf::KEY_ISA, config)) { // if isa is not already on disk => calculate it 14 | int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); 15 | if (!sa_buf.is_open()) { 16 | throw std::ios_base::failure("cst_construct: Cannot load SA from file system!"); 17 | } 18 | int_vector<> isa(sa_buf.size()); 19 | for (size_type i=0; i < isa.size(); ++i) { 20 | isa[ sa_buf[i] ] = i; 21 | } 22 | store_to_cache(isa, conf::KEY_ISA, config); 23 | } 24 | } 25 | 26 | }// end namespace 27 | -------------------------------------------------------------------------------- /external/sdsl/lib/construct_lcp_helper.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/construct_lcp_helper.hpp" 2 | #include "sdsl/int_vector.hpp" 3 | #include 4 | 5 | namespace sdsl 6 | { 7 | 8 | //! Merges a partial LCP array into the LCP array on disk. 9 | /*! 10 | * \param partial_lcp Vector containing LCP values for all indexes \f$i\f$ with 11 | * index_done[i] == 0. Let x=partail_lcp[rank(index_done, i, 0)]; 12 | * LCP[i]=x if x!=0 and index_done[i] == 0 13 | * \param lcp_file Path to the LCP array on disk. 14 | * \param index_done Entry index_done[i] indicates if LCP[i] is already calculated. 15 | * \param max_lcp_value Maximum known LCP value 16 | * \param lcp_value_offset Largest LCP value in lcp_file 17 | */ 18 | void insert_lcp_values(int_vector<>& partial_lcp, bit_vector& index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset) 19 | { 20 | std::string tmp_lcp_file = lcp_file+"_TMP"; 21 | const uint64_t buffer_size = 1000000; // has to be a multiple of 64 22 | typedef int_vector<>::size_type size_type; 23 | int_vector_buffer<> lcp_buffer(lcp_file, std::ios::in, buffer_size); // open lcp_file 24 | uint64_t n = lcp_buffer.size(); 25 | 26 | // open tmp_lcp_file 27 | uint8_t int_width = bits::hi(max_lcp_value-1)+1; 28 | int_vector_buffer<> out_buf(tmp_lcp_file, std::ios::out, buffer_size, int_width); // Output buffer 29 | // Write values into buffer 30 | for (size_type i=0, calc_idx=0; i < n; ++i) { 31 | if (index_done[i]) { // If value was already calculated 32 | out_buf[i] = lcp_buffer[i]; // Copy value 33 | } else { 34 | if (partial_lcp[calc_idx]) { // If value was calculated now 35 | // Insert value 36 | out_buf[i] = partial_lcp[calc_idx]+lcp_value_offset; 37 | index_done[i] = true; 38 | } 39 | ++calc_idx; 40 | } 41 | } 42 | // Close file and replace old file with new one 43 | out_buf.close(); 44 | sdsl::rename(tmp_lcp_file, lcp_file); 45 | } 46 | 47 | buffered_char_queue::buffered_char_queue():m_widx(0), m_ridx(0), m_sync(true), m_disk_buffered_blocks(0), m_c('?'),m_rb(0), m_wb(0) {} 48 | 49 | void buffered_char_queue::init(const std::string& dir, char c) 50 | { 51 | m_c = c; 52 | m_file_name = dir+"buffered_char_queue_"+util::to_string(util::pid()); 53 | // m_stream.rdbuf()->pubsetbuf(0, 0); 54 | } 55 | 56 | buffered_char_queue::~buffered_char_queue() 57 | { 58 | m_stream.close(); 59 | sdsl::remove(m_file_name); 60 | } 61 | 62 | void buffered_char_queue::push_back(uint8_t x) 63 | { 64 | m_write_buf[m_widx] = x; 65 | if (m_sync) { 66 | m_read_buf[m_widx] = x; 67 | } 68 | ++m_widx; 69 | if (m_widx == m_buffer_size) { 70 | if (!m_sync) { // if not sync, write block to disk 71 | if (!m_stream.is_open()) { 72 | m_stream.open(m_file_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc); 73 | } 74 | m_stream.seekp(m_buffer_size * (m_wb++), std::ios::beg); 75 | m_stream.write((char*) m_write_buf, m_buffer_size); 76 | ++m_disk_buffered_blocks; 77 | } 78 | m_sync = 0; 79 | m_widx = 0; 80 | } 81 | } 82 | 83 | uint8_t buffered_char_queue::pop_front() 84 | { 85 | uint8_t x = m_read_buf[m_ridx]; 86 | ++m_ridx; 87 | if (m_ridx == m_buffer_size) { 88 | if (m_disk_buffered_blocks > 0) { 89 | m_stream.seekg(m_buffer_size * (m_rb++), std::ios::beg); 90 | m_stream.read((char*) m_read_buf, m_buffer_size); 91 | --m_disk_buffered_blocks; 92 | } else { // m_disk_buffered_blocks == 0 93 | m_sync = 1; 94 | memcpy(m_read_buf, m_write_buf, m_widx+1); 95 | } 96 | m_ridx = 0; 97 | } 98 | return x; 99 | } 100 | 101 | void lcp_info(cache_config& config) 102 | { 103 | typedef int_vector<>::size_type size_type; 104 | int_vector_buffer<> lcp_buf(cache_file_name(conf::KEY_LCP, config)); 105 | size_type n = lcp_buf.size(); 106 | 107 | size_type max_lcp = 0; 108 | size_type sum_lcp = 0; 109 | for (size_type i=0; i < n; ++i) { 110 | if (lcp_buf[i] > max_lcp) 111 | max_lcp = lcp_buf[i]; 112 | sum_lcp += lcp_buf[i]; 113 | } 114 | std::cout<<"# max lcp = " << max_lcp << std::endl; 115 | std::cout<<"# sum lcp = " << sum_lcp << std::endl; 116 | std::cout<<"# avg lcp = " << sum_lcp/(double)n << std::endl; 117 | } 118 | 119 | } // end namespace sdsl 120 | -------------------------------------------------------------------------------- /external/sdsl/lib/construct_sa.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/construct_sa.hpp" 2 | 3 | namespace sdsl 4 | { 5 | 6 | void construct_sa_se(cache_config& config) 7 | { 8 | int_vector<8> text; 9 | load_from_file(text, cache_file_name(conf::KEY_TEXT, config)); 10 | 11 | if (text.size() <= 2) { 12 | // If text is c$ or $ write suffix array [1, 0] or [0] 13 | int_vector_buffer<> sa(cache_file_name(conf::KEY_SA, config), std::ios::out, 8, 2); 14 | if (text.size() == 2) { 15 | sa.push_back(1); 16 | } 17 | sa.push_back(0); 18 | } else { 19 | _construct_sa_se>(text, cache_file_name(conf::KEY_SA, config), 256, 0); 20 | } 21 | register_cache_file(conf::KEY_SA, config); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /external/sdsl/lib/csa_alphabet_strategy.cpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2012 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | #include "sdsl/csa_alphabet_strategy.hpp" 18 | 19 | namespace sdsl 20 | { 21 | 22 | const char* key_trait<8>::KEY_BWT = conf::KEY_BWT; 23 | const char* key_trait<8>::KEY_TEXT = conf::KEY_TEXT; 24 | 25 | byte_alphabet::byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len): 26 | char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) 27 | { 28 | m_sigma = 0; 29 | if (0 == len or 0 == text_buf.size()) 30 | return; 31 | assert(len <= text_buf.size()); 32 | // initialize vectors 33 | util::assign(m_C , int_vector<64>(257, 0)); 34 | util::assign(m_char2comp, int_vector<8>(256,0)); 35 | util::assign(m_comp2char, int_vector<8>(256,0)); 36 | // count occurrences of each symbol 37 | for (size_type i=0; i < len; ++i) { 38 | ++m_C[text_buf[i]]; 39 | } 40 | assert(1 == m_C[0]); // null-byte should occur exactly once 41 | m_sigma = 0; 42 | for (int i=0; i<256; ++i) 43 | if (m_C[i]) { 44 | m_char2comp[i] = m_sigma; 45 | m_comp2char[sigma] = i; 46 | m_C[m_sigma] = m_C[i]; 47 | ++m_sigma; 48 | } 49 | m_comp2char.resize(m_sigma); 50 | m_C.resize(m_sigma+1); 51 | for (int i=(int)m_sigma; i > 0; --i) m_C[i] = m_C[i-1]; 52 | m_C[0] = 0; 53 | for (int i=1; i <= (int)m_sigma; ++i) m_C[i] += m_C[i-1]; 54 | assert(C[sigma]==len); 55 | } 56 | 57 | 58 | byte_alphabet::byte_alphabet(): char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) 59 | { 60 | m_sigma = 0; 61 | } 62 | 63 | void byte_alphabet::copy(const byte_alphabet& bas) 64 | { 65 | m_char2comp = bas.m_char2comp; 66 | m_comp2char = bas.m_comp2char; 67 | m_C = bas.m_C; 68 | m_sigma = bas.m_sigma; 69 | } 70 | 71 | byte_alphabet::byte_alphabet(const byte_alphabet& bas): char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma) 72 | { 73 | copy(bas); 74 | } 75 | 76 | byte_alphabet& byte_alphabet::operator=(const byte_alphabet& bas) 77 | { 78 | if (this != &bas) { 79 | copy(bas); 80 | } 81 | return *this; 82 | } 83 | 84 | byte_alphabet& byte_alphabet::operator=(byte_alphabet&& bas) 85 | { 86 | if (this != &bas) { 87 | m_char2comp = std::move(bas.m_char2comp); 88 | m_comp2char = std::move(bas.m_comp2char); 89 | m_C = std::move(bas.m_C); 90 | m_sigma = std::move(bas.m_sigma); 91 | } 92 | return *this; 93 | } 94 | 95 | void byte_alphabet::swap(byte_alphabet& bas) 96 | { 97 | m_char2comp.swap(bas.m_char2comp); 98 | m_comp2char.swap(bas.m_comp2char); 99 | m_C.swap(bas.m_C); 100 | std::swap(m_sigma, bas.m_sigma); 101 | } 102 | 103 | byte_alphabet::size_type byte_alphabet::serialize(std::ostream& out, structure_tree_node* v, std::string name)const 104 | { 105 | structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this)); 106 | size_type written_bytes = 0; 107 | written_bytes += m_char2comp.serialize(out, child, "m_char2comp"); 108 | written_bytes += m_comp2char.serialize(out, child, "m_comp2char"); 109 | written_bytes += m_C.serialize(out, child, "m_C"); 110 | written_bytes += write_member(m_sigma, out, child, "m_sigma"); 111 | structure_tree::add_size(child, written_bytes); 112 | return written_bytes; 113 | } 114 | 115 | void byte_alphabet::load(std::istream& in) 116 | { 117 | m_char2comp.load(in); 118 | m_comp2char.load(in); 119 | m_C.load(in); 120 | read_member(m_sigma, in); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /external/sdsl/lib/io.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/io.hpp" 2 | #include "sdsl/sfstream.hpp" 3 | #include "sdsl/util.hpp" 4 | #include 5 | 6 | namespace sdsl 7 | { 8 | 9 | 10 | bool store_to_file(const char* v, const std::string& file) 11 | { 12 | osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out); 13 | if (!out) { 14 | if (util::verbose) { 15 | std::cerr<<"ERROR: store_to_file(const char *v, const std::string&)"< 57 | size_t write_member(const std::string& t, std::ostream& out, structure_tree_node* v, std::string name) 58 | { 59 | structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(t)); 60 | size_t written_bytes = 0; 61 | written_bytes += write_member(t.size(), out, child, "length"); 62 | out.write(t.c_str(), t.size()); 63 | written_bytes += t.size(); 64 | structure_tree::add_size(v, written_bytes); 65 | return written_bytes; 66 | } 67 | 68 | template<> 69 | void read_member(std::string& t, std::istream& in) 70 | { 71 | std::string::size_type size; 72 | read_member(size, in); 73 | char* buf = new char[size]; 74 | in.read(buf, size); 75 | std::string temp(buf, size); 76 | delete [] buf; 77 | t.swap(temp); 78 | } 79 | 80 | uint64_t _parse_number(std::string::const_iterator& c, const std::string::const_iterator& end) 81 | { 82 | std::string::const_iterator s = c; 83 | while (c != end and isdigit(*c)) ++c; 84 | if (c > s) { 85 | return std::stoull(std::string(s,c)); 86 | } else { 87 | return 0; 88 | } 89 | } 90 | 91 | std::string cache_file_name(const std::string& key, const cache_config& config) 92 | { 93 | if (config.file_map.count(key) != 0) { 94 | return config.file_map.at(key); 95 | } 96 | return config.dir+"/"+key+"_"+config.id+".sdsl"; 97 | } 98 | 99 | void register_cache_file(const std::string& key, cache_config& config) 100 | { 101 | std::string file_name = cache_file_name(key, config); 102 | isfstream in(file_name); 103 | if (in) { // if file exists, register it. 104 | config.file_map[key] = file_name; 105 | } 106 | } 107 | 108 | 109 | bool cache_file_exists(const std::string& key, const cache_config& config) 110 | { 111 | std::string file_name = cache_file_name(key, config); 112 | isfstream in(file_name); 113 | if (in) { 114 | in.close(); 115 | return true; 116 | } 117 | return false; 118 | } 119 | 120 | std::string tmp_file(const cache_config& config, std::string name_part) 121 | { 122 | return config.dir+"/"+ util::to_string(util::pid()) + "_" + util::to_string(util::id()) + name_part + ".sdsl"; 123 | } 124 | 125 | std::string tmp_file(const std::string& filename, std::string name_part) 126 | { 127 | return util::dirname(filename) + "/" + util::to_string(util::pid()) + "_" + 128 | util::to_string(util::id()) + name_part + ".sdsl"; 129 | } 130 | 131 | }// end namespace sdsl 132 | 133 | -------------------------------------------------------------------------------- /external/sdsl/lib/lcp_support_tree.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/lcp_support_tree.hpp" 2 | 3 | namespace sdsl 4 | { 5 | 6 | void construct_first_child_lcp(int_vector_buffer<>& lcp_buf, int_vector<>& fc_lcp) 7 | { 8 | typedef int_vector_size_type size_type; 9 | size_type n = lcp_buf.size(); 10 | if (n == 0) { // if n == 0 we are done 11 | fc_lcp = int_vector<>(0); 12 | } 13 | { 14 | int_vector<> tmp(n, 0, bits::hi(n)+1); 15 | fc_lcp.swap(tmp); 16 | } 17 | 18 | size_type fc_cnt=0; // first child counter 19 | sorted_multi_stack_support vec_stack(n); 20 | size_type y; 21 | for (size_type i=0, x; i < n; ++i) { 22 | x = lcp_buf[i]; 23 | while (!vec_stack.empty() and x < vec_stack.top()) { 24 | y = vec_stack.top(); 25 | if (vec_stack.pop()) { 26 | fc_lcp[fc_cnt++] = y; 27 | } 28 | } 29 | vec_stack.push(x); 30 | } 31 | 32 | while (!vec_stack.empty()) { 33 | y = vec_stack.top(); 34 | if (vec_stack.pop()) { 35 | fc_lcp[fc_cnt++] = y; 36 | } 37 | } 38 | if (fc_cnt < fc_lcp.size()) { 39 | fc_lcp.resize(fc_cnt); 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /external/sdsl/lib/louds_tree.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/louds_tree.hpp" 2 | 3 | namespace sdsl 4 | { 5 | std::ostream& operator<<(std::ostream& os, const louds_node& v) 6 | { 7 | os<<"("< 3 | #include 4 | 5 | #ifdef WIN32 6 | #include "iso646.h" 7 | #endif 8 | 9 | 10 | namespace sdsl 11 | { 12 | 13 | 14 | ram_filebuf::~ram_filebuf() {} 15 | 16 | ram_filebuf::ram_filebuf() {} 17 | 18 | ram_filebuf::ram_filebuf(std::vector& ram_file) : m_ram_file(&ram_file) 19 | { 20 | char* begin = m_ram_file->data(); 21 | char* end = begin + m_ram_file->size(); 22 | setg(begin, begin, end); // set get pointers eback(), eptr(), egptr() 23 | } 24 | 25 | std::streambuf* 26 | ram_filebuf::open(const std::string name, std::ios_base::openmode mode) 27 | { 28 | // open ram_file 29 | if ((mode & std::ios_base::in) and !(mode & std::ios_base::trunc)) { 30 | // file must exist, initial position at the start 31 | if (!ram_fs::exists(name)) { 32 | m_ram_file = nullptr; 33 | } else { 34 | m_ram_file = &ram_fs::content(name); 35 | } 36 | } else { // existence of file not required 37 | if (!ram_fs::exists(name)) { 38 | // create empty file, if it does not yet exist 39 | ram_fs::store(name, ram_fs::content_type());// TODO: create method in ram_fs?? or store w 1 arg? 40 | } 41 | m_ram_file = &ram_fs::content(name); 42 | if ((mode & std::ios_base::out) and !(mode & std::ios_base::app)) { 43 | m_ram_file->clear(); 44 | } 45 | } 46 | 47 | if (m_ram_file and(mode & std::ios_base::trunc)) { 48 | m_ram_file->clear(); 49 | } 50 | if (m_ram_file) { 51 | if (mode & std::ios_base::ate) { 52 | // TODO: move put pointer to the end of the file 53 | } else { 54 | 55 | } 56 | setg(m_ram_file->data(), m_ram_file->data(), m_ram_file->data()+m_ram_file->size()); 57 | setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size()); 58 | } 59 | // ATTENTION: if m_ram_file->size() == 0, then data might be nullptr !!! 60 | return m_ram_file ? this : nullptr; 61 | } 62 | 63 | bool 64 | ram_filebuf::is_open() 65 | { 66 | return m_ram_file!=nullptr; 67 | } 68 | 69 | ram_filebuf* 70 | ram_filebuf::close() 71 | { 72 | if (!this->is_open()) 73 | return nullptr; 74 | m_ram_file = nullptr; 75 | setg(nullptr, nullptr, nullptr); 76 | setp(nullptr, nullptr); 77 | return this; 78 | } 79 | 80 | ram_filebuf::pos_type 81 | ram_filebuf::seekpos(pos_type sp, std::ios_base::openmode mode) 82 | { 83 | if (sp >= (pos_type)0 and sp <= (pos_type)m_ram_file->size()) { 84 | setg(m_ram_file->data(), m_ram_file->data()+sp, m_ram_file->data()+m_ram_file->size()); 85 | setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size()); 86 | pbump64(sp); 87 | } else { 88 | if (mode & std::ios_base::out) { 89 | // extend buffer 90 | m_ram_file->resize(sp, 0); 91 | setg(m_ram_file->data(), m_ram_file->data()+sp, m_ram_file->data()+m_ram_file->size()); 92 | setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size()); 93 | pbump64(sp); 94 | } else { 95 | return pos_type(off_type(-1)); 96 | } 97 | } 98 | return sp; 99 | } 100 | 101 | ram_filebuf::pos_type 102 | ram_filebuf::pubseekoff(off_type off, std::ios_base::seekdir way, 103 | std::ios_base::openmode which) 104 | { 105 | if (std::ios_base::beg == way) { 106 | if (seekpos(off, which) == pos_type(-1)) { 107 | return pos_type(-1); 108 | } 109 | } else if (std::ios_base::cur == way) { 110 | if (seekpos(gptr()-eback()+off, which) == pos_type(-1)) { 111 | return pos_type(-1); 112 | } 113 | } else if (std::ios_base::end == way) { 114 | if (seekpos(egptr()-eback()+off, which) == pos_type(-1)) { 115 | return pos_type(-1); 116 | } 117 | } 118 | return gptr()-eback(); 119 | } 120 | 121 | 122 | ram_filebuf::pos_type 123 | ram_filebuf::pubseekpos(pos_type sp, std::ios_base::openmode which) 124 | { 125 | if (seekpos(sp, which) == pos_type(-1)) { 126 | return pos_type(-1); 127 | } else { 128 | return gptr()-eback(); 129 | } 130 | } 131 | 132 | int 133 | ram_filebuf::sync() 134 | { 135 | return 0; // we are always in sync, since buffer is sink 136 | } 137 | 138 | ram_filebuf::int_type 139 | ram_filebuf::overflow(int_type c) 140 | { 141 | if (m_ram_file) { 142 | m_ram_file->push_back(c); 143 | setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size()); 144 | std::ptrdiff_t add = epptr()-pbase(); 145 | pbump64(add); 146 | setg(m_ram_file->data(), gptr(), m_ram_file->data()+m_ram_file->size()); 147 | } 148 | return traits_type::to_int_type(c); 149 | } 150 | 151 | void ram_filebuf::pbump64(std::ptrdiff_t x) 152 | { 153 | while (x > std::numeric_limits::max()) { 154 | pbump(std::numeric_limits::max()); 155 | x -= std::numeric_limits::max(); 156 | } 157 | pbump(x); 158 | } 159 | 160 | } 161 | -------------------------------------------------------------------------------- /external/sdsl/lib/ram_fs.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/ram_fs.hpp" 2 | #include "sdsl/util.hpp" 3 | #include 4 | #include 5 | #include 6 | 7 | static int nifty_counter = 0; 8 | 9 | sdsl::ram_fs::mss_type sdsl::ram_fs::m_map; 10 | std::recursive_mutex sdsl::ram_fs::m_rlock; 11 | 12 | 13 | sdsl::ram_fs_initializer::ram_fs_initializer() 14 | { 15 | if (0 == nifty_counter++) { 16 | if (!ram_fs::m_map.empty()) { 17 | throw std::logic_error("Static preinitialized object is not empty."); 18 | } 19 | } 20 | } 21 | 22 | sdsl::ram_fs_initializer::~ram_fs_initializer() 23 | { 24 | if (0 == --nifty_counter) { 25 | // clean up 26 | } 27 | } 28 | 29 | namespace sdsl 30 | { 31 | 32 | ram_fs::ram_fs() {} 33 | 34 | void 35 | ram_fs::store(const std::string& name, content_type data) 36 | { 37 | std::lock_guard lock(m_rlock); 38 | if (!exists(name)) { 39 | std::string cname = name; 40 | m_map.insert(std::make_pair(std::move(cname), std::move(data))); 41 | } else { 42 | m_map[name] = std::move(data); 43 | } 44 | } 45 | 46 | bool 47 | ram_fs::exists(const std::string& name) 48 | { 49 | std::lock_guard lock(m_rlock); 50 | return m_map.find(name) != m_map.end(); 51 | } 52 | 53 | ram_fs::content_type& 54 | ram_fs::content(const std::string& name) 55 | { 56 | std::lock_guard lock(m_rlock); 57 | return m_map[name]; 58 | } 59 | 60 | size_t 61 | ram_fs::file_size(const std::string& name) 62 | { 63 | std::lock_guard lock(m_rlock); 64 | if (exists(name)) { 65 | return m_map[name].size(); 66 | } else { 67 | return 0; 68 | } 69 | } 70 | 71 | int 72 | ram_fs::remove(const std::string& name) 73 | { 74 | std::lock_guard lock(m_rlock); 75 | m_map.erase(name); 76 | return 0; 77 | } 78 | 79 | int 80 | ram_fs::rename(const std::string old_filename, const std::string new_filename) 81 | { 82 | std::lock_guard lock(m_rlock); 83 | m_map[new_filename] = std::move(m_map[old_filename]); 84 | remove(old_filename); 85 | return 0; 86 | } 87 | 88 | bool is_ram_file(const std::string& file) 89 | { 90 | if (file.size() > 0) { 91 | if (file[0]=='@') { 92 | return true; 93 | } 94 | } 95 | return false; 96 | } 97 | 98 | std::string ram_file_name(const std::string& file) 99 | { 100 | if (is_ram_file(file)) { 101 | return file; 102 | } else { 103 | return "@" + file; 104 | } 105 | } 106 | 107 | std::string disk_file_name(const std::string& file) 108 | { 109 | if (!is_ram_file(file)) { 110 | return file; 111 | } else { 112 | return file.substr(1); 113 | } 114 | } 115 | 116 | int remove(const std::string& file) 117 | { 118 | if (is_ram_file(file)) { 119 | return ram_fs::remove(file); 120 | } else { 121 | return std::remove(file.c_str()); 122 | } 123 | } 124 | 125 | int rename(const std::string& old_filename, const std::string& new_filename) 126 | { 127 | if (is_ram_file(old_filename)) { 128 | if (!is_ram_file(new_filename)) { // error, if new file is not also RAM-file 129 | return -1; 130 | } 131 | return ram_fs::rename(old_filename, new_filename); 132 | } else { 133 | return std::rename(old_filename.c_str(), new_filename.c_str()); 134 | } 135 | } 136 | 137 | } // end namespace sdsl 138 | -------------------------------------------------------------------------------- /external/sdsl/lib/rrr_vector_15.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/rrr_vector_15.hpp" 2 | 3 | //! Namespace for the succinct data structure library 4 | namespace sdsl 5 | { 6 | // initialize the inner class 7 | binomial15::impl binomial15::iii; 8 | 9 | } // end namespace 10 | -------------------------------------------------------------------------------- /external/sdsl/lib/sd_vector.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/sd_vector.hpp" 2 | #include 3 | 4 | //! Namespace for the succinct data structure library 5 | namespace sdsl 6 | { 7 | 8 | sd_vector_builder::sd_vector_builder() : 9 | m_size(0), m_capacity(0), 10 | m_wl(0), 11 | m_tail(0), m_items(0), 12 | m_last_high(0), m_highpos(0) 13 | { 14 | } 15 | 16 | sd_vector_builder::sd_vector_builder(size_type n, size_type m) : 17 | m_size(n), m_capacity(m), 18 | m_wl(0), 19 | m_tail(0), m_items(0), 20 | m_last_high(0), m_highpos(0) 21 | { 22 | if(m_capacity > m_size) 23 | { 24 | throw std::runtime_error("sd_vector_builder: requested capacity is larger than vector size."); 25 | } 26 | 27 | size_type logm = bits::hi(m_capacity) + 1; 28 | const size_type logn = bits::hi(m_size) + 1; 29 | if(logm == logn) 30 | { 31 | --logm; // to ensure logn-logm > 0 32 | assert(logn - logm > 0); 33 | } 34 | m_wl = logn - logm; 35 | m_low = int_vector<>(m_capacity, 0, m_wl); 36 | m_high = bit_vector(m_capacity + (1ULL << logm), 0); 37 | } 38 | 39 | void 40 | sd_vector_builder::swap(sd_vector_builder& sdb) 41 | { 42 | std::swap(m_size, sdb.m_size); 43 | std::swap(m_capacity, sdb.m_capacity); 44 | std::swap(m_wl, sdb.m_wl); 45 | std::swap(m_tail, sdb.m_tail); 46 | std::swap(m_items, sdb.m_items); 47 | std::swap(m_last_high, sdb.m_last_high); 48 | std::swap(m_highpos, sdb.m_highpos); 49 | m_low.swap(sdb.m_low); 50 | m_high.swap(sdb.m_high); 51 | } 52 | 53 | template<> 54 | sd_vector<>::sd_vector(sd_vector_builder& builder) 55 | { 56 | if(builder.items() != builder.capacity()) 57 | { 58 | throw std::runtime_error("sd_vector: the builder is not full."); 59 | } 60 | 61 | m_size = builder.m_size; 62 | m_wl = builder.m_wl; 63 | m_low.swap(builder.m_low); 64 | m_high.swap(builder.m_high); 65 | util::init_support(m_high_1_select, &m_high); 66 | util::init_support(m_high_0_select, &m_high); 67 | 68 | builder = sd_vector_builder(); 69 | } 70 | 71 | } // end namespace 72 | -------------------------------------------------------------------------------- /external/sdsl/lib/uint128_t.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/uint128_t.hpp" 2 | 3 | //! Namespace for the succinct data structure library 4 | namespace sdsl 5 | { 6 | 7 | std::ostream& operator<<(std::ostream& os, const uint128_t& x) 8 | { 9 | uint64_t X[2] = {(uint64_t)(x >> 64), (uint64_t)x}; 10 | for (int j=0; j < 2; ++j) { 11 | for (int i=0; i < 16; ++i) { 12 | os << std::hex << ((X[j]>>60)&0xFULL) << std::dec; 13 | X[j] <<= 4; 14 | } 15 | } 16 | return os; 17 | } 18 | 19 | } // end namespace 20 | -------------------------------------------------------------------------------- /external/sdsl/lib/uint256_t.cpp: -------------------------------------------------------------------------------- 1 | #include "sdsl/uint256_t.hpp" 2 | 3 | //! Namespace for the succinct data structure library 4 | namespace sdsl 5 | { 6 | std::ostream& operator<<(std::ostream& os, const uint256_t& x) 7 | { 8 | uint64_t X[4] = {(uint64_t)(x.m_high >> 64), (uint64_t)x.m_high, x.m_mid, x.m_lo}; 9 | for (int j=0; j < 4; ++j) { 10 | for (int i=0; i < 16; ++i) { 11 | os << std::hex << ((X[j]>>60)&0xFULL) << std::dec; 12 | X[j] <<= 4; 13 | } 14 | } 15 | return os; 16 | } 17 | } // end namespace 18 | -------------------------------------------------------------------------------- /external/sdsl/lib/util.cpp: -------------------------------------------------------------------------------- 1 | /* sdsl - succinct data structures library 2 | Copyright (C) 2009-2013 Simon Gog 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see http://www.gnu.org/licenses/ . 16 | */ 17 | 18 | #include "sdsl/util.hpp" 19 | 20 | #include // for file_size 21 | #include // for file_size 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #ifndef MSVC_COMPILER 29 | #include 30 | #endif 31 | 32 | namespace sdsl 33 | { 34 | 35 | namespace util 36 | { 37 | 38 | uint64_t _id_helper::id = 0; 39 | 40 | std::string basename(std::string file) 41 | { 42 | file = disk_file_name(file); // remove RAM-prefix 43 | #ifdef MSVC_COMPILER 44 | char* c = _strdup((const char*)file.c_str()); 45 | char file_name[_MAX_FNAME] = { 0 }; 46 | ::_splitpath_s(c, NULL, 0, NULL, NULL, file_name, _MAX_FNAME, NULL, 0); 47 | std::string res(file_name); 48 | #else 49 | char* c = strdup((const char*)file.c_str()); 50 | std::string res = std::string(::basename(c)); 51 | #endif 52 | free(c); 53 | return res; 54 | } 55 | 56 | std::string dirname(std::string file) 57 | { 58 | bool ram_file = is_ram_file(file); 59 | file = disk_file_name(file); // remove RAM-prefix 60 | #ifdef MSVC_COMPILER 61 | char* c = _strdup((const char*)file.c_str()); 62 | char dir_name[_MAX_DIR] = { 0 }; 63 | char drive[_MAX_DRIVE] = {0}; 64 | ::_splitpath_s(c, drive, _MAX_DRIVE, dir_name, _MAX_DIR, NULL,0, NULL,0); 65 | std::string res = std::string(drive) + std::string(dir_name); 66 | #else 67 | char* c = strdup((const char*)file.c_str()); 68 | std::string res = std::string(::dirname(c)); 69 | #endif 70 | free(c); 71 | if (ram_file) { 72 | if ("." == res) { 73 | res = ram_file_name(""); 74 | } else if ("/" ==res) { 75 | res = ram_file_name(res); 76 | } 77 | } 78 | return res; 79 | } 80 | 81 | uint64_t pid() 82 | { 83 | #ifdef MSVC_COMPILER 84 | return _getpid(); 85 | #else 86 | return getpid(); 87 | #endif 88 | } 89 | 90 | char* str_from_errno() 91 | { 92 | #ifdef MSVC_COMPILER 93 | #pragma warning(disable:4996) 94 | return strerror(errno); 95 | #pragma warning(default:4996) 96 | #else 97 | return strerror(errno); 98 | #endif 99 | } 100 | 101 | 102 | uint64_t id() 103 | { 104 | return _id_helper::getId(); 105 | } 106 | 107 | std::string demangle(const std::string& name) 108 | { 109 | #ifdef HAVE_CXA_DEMANGLE 110 | char buf[4096]; 111 | size_t size = 4096; 112 | int status = 0; 113 | abi::__cxa_demangle(name.c_str(), buf, &size, &status); 114 | if (status==0) 115 | return std::string(buf); 116 | return name; 117 | #else 118 | return name; 119 | #endif 120 | } 121 | 122 | std::string demangle2(const std::string& name) 123 | { 124 | std::string result = demangle(name); 125 | std::vector words_to_delete; 126 | words_to_delete.push_back("sdsl::"); 127 | words_to_delete.push_back("(unsigned char)"); 128 | words_to_delete.push_back(", unsigned long"); 129 | 130 | for (size_t k=0; k(r) == (std::get<1>(r) + 1); 9 | } 10 | 11 | int_vector<>::size_type size(const range_type& r) 12 | { 13 | return std::get<1>(r) - std::get<0>(r) + 1; 14 | } 15 | 16 | 17 | pc_node::pc_node(uint64_t freq, uint64_t sym, uint64_t parent, 18 | uint64_t child_left, uint64_t child_right): 19 | freq(freq), sym(sym), parent(parent) 20 | { 21 | child[0] = child_left; 22 | child[1] = child_right; 23 | } 24 | 25 | pc_node& pc_node::operator=(const pc_node& v) 26 | { 27 | freq = v.freq; 28 | sym = v.sym; 29 | parent = v.parent; 30 | child[0] = v.child[0]; 31 | child[1] = v.child[1]; 32 | return *this; 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /external/sg-entropy/Make.helper: -------------------------------------------------------------------------------- 1 | SG_ENTROPY_INCS = \ 2 | stdx/bit.h \ 3 | stdx/define.h \ 4 | stdx/exception.h \ 5 | io/bit_stream.h \ 6 | io/stream.h \ 7 | io/stream_array.h \ 8 | entropy/arith32.h \ 9 | entropy/arith64.h \ 10 | entropy/range32.h \ 11 | entropy/range64.h 12 | 13 | SG_ENTROPY_LIBS = \ 14 | stdx/bit.cpp \ 15 | stdx/exception.cpp \ 16 | io/bit_stream.cpp \ 17 | io/stream_array.cpp \ 18 | entropy/arith32.cpp \ 19 | entropy/arith64.cpp \ 20 | entropy/range32.cpp \ 21 | entropy/range64.cpp 22 | -------------------------------------------------------------------------------- /external/sg-entropy/entropy/arith32.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include "entropy/arith32.h" 15 | 16 | const SG::DWord SG::Entropy::ArithmeticCoder32::MaxRange=0x3FFF; 17 | 18 | SG::Entropy::ArithmeticCoder32::ArithmeticCoder32() : 19 | High(0xFFFF), 20 | Low(0), 21 | UnderflowCount(0), 22 | TempRange(0) 23 | { 24 | } 25 | 26 | SG::Entropy::ArithmeticEncoder32::ArithmeticEncoder32(SG::io::BitOutputStream &BitOStream) : 27 | Flushed(false), 28 | Output(BitOStream) 29 | { 30 | } 31 | 32 | SG::Entropy::ArithmeticEncoder32::~ArithmeticEncoder32() 33 | { 34 | if(!Flushed) Flush(); 35 | } 36 | 37 | void SG::Entropy::ArithmeticEncoder32::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange) 38 | { 39 | TempRange=(High-Low)+1; 40 | High=Low + ((TempRange*SymbolHigh)/TotalRange)-1; 41 | Low =Low + ((TempRange*SymbolLow )/TotalRange); 42 | 43 | for(;;) 44 | { 45 | if((High & 0x8000)==(Low & 0x8000)) 46 | { 47 | Output.WriteBit(High>>15); 48 | while(UnderflowCount) 49 | { 50 | Output.WriteBit((High>>15)^1); 51 | UnderflowCount--; 52 | } 53 | } 54 | else 55 | { 56 | if((Low & 0x4000) && !(High & 0x4000)) 57 | { 58 | UnderflowCount++; 59 | 60 | Low &= 0x3FFF; 61 | High |= 0x4000; 62 | } 63 | else 64 | return; 65 | } 66 | 67 | Low =(Low<<1) & 0xFFFF; 68 | High=((High<<1)|1) & 0xFFFF; 69 | } 70 | } 71 | 72 | void SG::Entropy::ArithmeticEncoder32::Flush() 73 | { 74 | if(!Flushed) 75 | { 76 | Output.WriteBit((Low>>14)&1); 77 | UnderflowCount++; 78 | 79 | while(UnderflowCount) 80 | { 81 | Output.WriteBit(((Low>>14)^1)&1); 82 | UnderflowCount--; 83 | } 84 | 85 | Output.Flush(); 86 | Flushed=true; 87 | } 88 | } 89 | 90 | SG::Entropy::ArithmeticDecoder32::ArithmeticDecoder32(SG::io::BitInputStream &BitIStream) : 91 | Code(0), 92 | Input(BitIStream) 93 | { 94 | for(SG::FastInt I=0;I<16;I++) 95 | { 96 | Code<<=1; 97 | Code+=Input.ReadBit();; 98 | } 99 | } 100 | 101 | SG::DWord SG::Entropy::ArithmeticDecoder32::GetCurrentCount(SG::DWord TotalRange) 102 | { 103 | TempRange=(High-Low)+1; 104 | return (SG::DWord)(((((Code-Low)+1)*(SG::QWord)TotalRange)-1)/TempRange); 105 | } 106 | 107 | void SG::Entropy::ArithmeticDecoder32::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange) 108 | { 109 | TempRange=(High-Low)+1; 110 | High=Low+((TempRange*SymbolHigh)/TotalRange)-1; 111 | Low =Low+((TempRange*SymbolLow )/TotalRange); 112 | 113 | for(;;) 114 | { 115 | if((High & 0x8000) == (Low & 0x8000)) 116 | { 117 | } 118 | else 119 | { 120 | if((Low & 0x4000) && !(High & 0x4000)) 121 | { 122 | Code ^= 0x4000; 123 | Low &= 0x3FFF; 124 | High |= 0x4000; 125 | } 126 | else 127 | return; 128 | } 129 | Low = (Low << 1) & 0xFFFF; 130 | High = ((High<<1) | 1) & 0xFFFF; 131 | 132 | Code <<=1; 133 | Code|=Input.ReadBit(); 134 | Code &=0xFFFF; 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /external/sg-entropy/entropy/arith32.h: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #ifndef __sg_entropy_arith32 15 | #define __sg_entropy_arith32 16 | 17 | #include "stdx/define.h" 18 | #include "stdx/bit.h" 19 | #include "io/bit_stream.h" 20 | 21 | namespace SG 22 | { 23 | namespace Entropy 24 | { 25 | /* Code for arithmetic coding 26 | derived from work by Mark Nelson, Tom st Denis, Charles Bloom 27 | 28 | author : Sachin Garg 29 | */ 30 | class ArithmeticCoder32 31 | { 32 | public: 33 | static const SG::DWord MaxRange; 34 | 35 | protected: 36 | 37 | ArithmeticCoder32(); 38 | SG::DWord High,Low,UnderflowCount; 39 | SG::DWord TempRange; 40 | }; 41 | 42 | class ArithmeticEncoder32 : public ArithmeticCoder32 43 | { 44 | private: 45 | SG::Boolean Flushed; 46 | SG::io::BitOutputStream &Output; 47 | 48 | public: 49 | ArithmeticEncoder32(SG::io::BitOutputStream &BitOStream); 50 | ~ArithmeticEncoder32(); 51 | 52 | void EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange); 53 | void Flush(); 54 | }; 55 | 56 | class ArithmeticDecoder32 : public ArithmeticCoder32 57 | { 58 | private: 59 | SG::DWord Code; 60 | SG::io::BitInputStream &Input; 61 | 62 | public: 63 | ArithmeticDecoder32(SG::io::BitInputStream &BitIStream); 64 | 65 | SG::DWord GetCurrentCount(SG::DWord TotalRange); 66 | void RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange); 67 | }; 68 | } 69 | } 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /external/sg-entropy/entropy/arith64.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include "entropy/arith64.h" 15 | 16 | const SG::DWord SG::Entropy::ArithmeticCoder64::MaxRange=0x3FFFFFFF; 17 | 18 | SG::Entropy::ArithmeticCoder64::ArithmeticCoder64() : 19 | High(0xFFFFFFFF), 20 | Low(0), 21 | UnderflowCount(0), 22 | TempRange(0) 23 | { 24 | } 25 | 26 | SG::Entropy::ArithmeticEncoder64::ArithmeticEncoder64(SG::io::BitOutputStream &BitOStream) : 27 | Flushed(false), 28 | Output(BitOStream) 29 | { 30 | } 31 | 32 | SG::Entropy::ArithmeticEncoder64::~ArithmeticEncoder64() 33 | { 34 | if(!Flushed) Flush(); 35 | } 36 | 37 | void SG::Entropy::ArithmeticEncoder64::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange) 38 | { 39 | TempRange=(High-Low)+1; 40 | High=Low + ((TempRange*(SG::QWord)SymbolHigh)/TotalRange)-1; 41 | Low =Low + ((TempRange*(SG::QWord)SymbolLow )/TotalRange); 42 | 43 | for(;;) 44 | { 45 | if((High & 0x80000000)==(Low & 0x80000000)) 46 | { 47 | Output.WriteBit(High>>31); 48 | while(UnderflowCount) 49 | { 50 | Output.WriteBit((High>>31)^1); 51 | UnderflowCount--; 52 | } 53 | } 54 | else 55 | { 56 | if((Low & 0x40000000) && !(High & 0x40000000)) 57 | { 58 | UnderflowCount++; 59 | 60 | Low &= 0x3FFFFFFF; 61 | High |= 0x40000000; 62 | } 63 | else 64 | return; 65 | } 66 | 67 | Low =(Low<<1) & 0xFFFFFFFF; 68 | High=((High<<1)|1) & 0xFFFFFFFF; 69 | } 70 | } 71 | 72 | void SG::Entropy::ArithmeticEncoder64::Flush() 73 | { 74 | if(!Flushed) 75 | { 76 | Output.WriteBit((Low>>30)&1); 77 | UnderflowCount++; 78 | 79 | while(UnderflowCount) 80 | { 81 | Output.WriteBit(((Low>>30)^1)&1); 82 | UnderflowCount--; 83 | } 84 | 85 | Output.Flush(); 86 | Flushed=true; 87 | } 88 | } 89 | 90 | SG::Entropy::ArithmeticDecoder64::ArithmeticDecoder64(SG::io::BitInputStream &BitIStream) : 91 | Code(0), 92 | Input(BitIStream) 93 | { 94 | for(SG::FastInt I=0;I<32;I++) 95 | { 96 | Code<<=1; 97 | Code+=Input.ReadBit();; 98 | } 99 | } 100 | 101 | 102 | 103 | SG::DWord SG::Entropy::ArithmeticDecoder64::GetCurrentCount(SG::DWord TotalRange) 104 | { 105 | TempRange=(High-Low)+1; 106 | return (SG::DWord)(((((Code-Low)+1)*(SG::QWord)TotalRange)-1)/TempRange); 107 | } 108 | 109 | void SG::Entropy::ArithmeticDecoder64::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange) 110 | { 111 | TempRange=(High-Low)+1; 112 | High=Low+((TempRange*(SG::QWord)SymbolHigh)/TotalRange)-1; 113 | Low =Low+((TempRange*(SG::QWord)SymbolLow )/TotalRange); 114 | 115 | for(;;) 116 | { 117 | if((High & 0x80000000) == (Low & 0x80000000)) 118 | { 119 | } 120 | else 121 | { 122 | if((Low & 0x40000000) && !(High & 0x40000000)) 123 | { 124 | Code ^= 0x40000000; 125 | Low &= 0x3FFFFFFF; 126 | High |= 0x40000000; 127 | } 128 | else 129 | return; 130 | } 131 | Low = (Low << 1) & 0xFFFFFFFF; 132 | High = ((High<<1) | 1) & 0xFFFFFFFF; 133 | 134 | Code <<=1; 135 | Code|=Input.ReadBit(); 136 | Code &=0xFFFFFFFF; 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /external/sg-entropy/entropy/arith64.h: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #ifndef __sg_entropy_arith64 15 | #define __sg_entropy_arith64 16 | 17 | #include "stdx/define.h" 18 | #include "stdx/bit.h" 19 | #include "io/bit_stream.h" 20 | 21 | namespace SG 22 | { 23 | namespace Entropy 24 | { 25 | /* Code for arithmetic coding, derived from work by Mark Nelson, Tom st Denis, Charles Bloom 26 | Modified to use 64-bit integer maths, for increased precision 27 | 28 | author : Sachin Garg 29 | */ 30 | class ArithmeticCoder64 31 | { 32 | public: 33 | static const SG::DWord MaxRange; 34 | 35 | protected: 36 | 37 | ArithmeticCoder64(); 38 | SG::QWord High,Low,UnderflowCount; 39 | SG::QWord TempRange; 40 | }; 41 | 42 | class ArithmeticEncoder64 : public ArithmeticCoder64 43 | { 44 | private: 45 | SG::Boolean Flushed; 46 | SG::io::BitOutputStream &Output; 47 | 48 | public: 49 | ArithmeticEncoder64(SG::io::BitOutputStream &BitOStream); 50 | ~ArithmeticEncoder64(); 51 | 52 | void EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange); 53 | void Flush(); 54 | }; 55 | 56 | class ArithmeticDecoder64 : public ArithmeticCoder64 57 | { 58 | private: 59 | SG::QWord Code; 60 | SG::io::BitInputStream &Input; 61 | 62 | public: 63 | ArithmeticDecoder64(SG::io::BitInputStream &BitIStream); 64 | 65 | SG::DWord GetCurrentCount(SG::DWord TotalRange); 66 | void RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange); 67 | }; 68 | } 69 | } 70 | #endif 71 | -------------------------------------------------------------------------------- /external/sg-entropy/entropy/range32.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include "entropy/range32.h" 15 | 16 | const SG::DWord SG::Entropy::RangeCoder32::Top=(SG::DWord)1<<24; 17 | const SG::DWord SG::Entropy::RangeCoder32::Bottom=(SG::DWord)1<<16; 18 | const SG::DWord SG::Entropy::RangeCoder32::MaxRange=Bottom; 19 | 20 | SG::Entropy::RangeCoder32::RangeCoder32() : 21 | Low(0), 22 | Range((SG::DWord)-1) 23 | { 24 | } 25 | 26 | SG::Entropy::RangeEncoder32::RangeEncoder32(SG::io::OutputStream &OStream) : 27 | Flushed(false), 28 | Output(OStream) 29 | { 30 | } 31 | 32 | SG::Entropy::RangeEncoder32::~RangeEncoder32() 33 | { 34 | if (!Flushed) Flush(); 35 | } 36 | 37 | void SG::Entropy::RangeEncoder32::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange) 38 | { 39 | Low += SymbolLow*(Range/=TotalRange); 40 | Range *= SymbolHigh-SymbolLow; 41 | 42 | while ((Low ^ (Low+Range))>24), Range<<=8, Low<<=8; 45 | } 46 | } 47 | 48 | void SG::Entropy::RangeEncoder32::Flush() 49 | { 50 | if(!Flushed) 51 | { 52 | for(SG::FastInt i=0;i<4;i++) 53 | { 54 | Output.WriteByte(Low>>24); 55 | Low<<=8; 56 | } 57 | 58 | Flushed=true; 59 | } 60 | } 61 | 62 | SG::Entropy::RangeDecoder32::RangeDecoder32(SG::io::InputStream &IStream) : 63 | Code(0), 64 | Input(IStream) 65 | { 66 | for(SG::FastInt i=0;i<4;i++) 67 | { 68 | Code = (Code << 8) | Input.ReadByte(); 69 | } 70 | } 71 | 72 | SG::DWord SG::Entropy::RangeDecoder32::GetCurrentCount(SG::DWord TotalRange) 73 | { 74 | return (Code-Low)/(Range/=TotalRange); 75 | } 76 | 77 | void SG::Entropy::RangeDecoder32::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord /*TotalRange*/) 78 | { 79 | Low += SymbolLow*Range; 80 | Range *= SymbolHigh-SymbolLow; 81 | 82 | while ((Low ^ Low+Range)>56), Range<<=8, Low<<=8; 45 | } 46 | } 47 | 48 | void SG::Entropy::RangeEncoder64::Flush() 49 | { 50 | if(!Flushed) 51 | { 52 | for(SG::FastInt i=0;i<8;i++) 53 | { 54 | Output.WriteByte(Low>>56); 55 | Low<<=8; 56 | } 57 | Flushed=true; 58 | } 59 | } 60 | 61 | SG::Entropy::RangeDecoder64::RangeDecoder64(SG::io::InputStream &IStream) : 62 | Code(0), 63 | Input(IStream) 64 | { 65 | for(SG::FastInt i=0;i<8;i++) 66 | { 67 | Code = (Code << 8) | Input.ReadByte(); 68 | } 69 | } 70 | 71 | SG::DWord SG::Entropy::RangeDecoder64::GetCurrentCount(SG::DWord TotalRange) 72 | { 73 | return (Code-Low)/(Range/=TotalRange); 74 | } 75 | 76 | void SG::Entropy::RangeDecoder64::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord /*TotalRange*/) 77 | { 78 | Low += SymbolLow*Range; 79 | Range *= SymbolHigh-SymbolLow; 80 | 81 | while ((Low ^ Low+Range)=_Size; 34 | } 35 | 36 | SG::io::ArrayOutputStream::ArrayOutputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset) : 37 | _Array(Array), 38 | _Size(Size), 39 | _Offset(InitialOffset) 40 | { 41 | } 42 | 43 | void SG::io::ArrayOutputStream::WriteByte(SG::Byte Value) 44 | { 45 | if(_Offset>=_Size) throw stdx::Exception("Buffer Overflow","SG::io::ArrayOutputStream::WriteByte"); 46 | _Array[_Offset]=Value; 47 | ++_Offset; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /external/sg-entropy/io/stream_array.h: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #ifndef __sg_io_streamarray 15 | #define __sg_io_streamarray 16 | 17 | #include "stdx/define.h" 18 | #include "stdx/bit.h" 19 | #include "io/stream.h" 20 | 21 | namespace SG 22 | { 23 | namespace io 24 | { 25 | class ArrayInputStream : public SG::io::InputStream 26 | { 27 | private: 28 | SG::Counter _Offset; 29 | SG::Byte *_Array; 30 | SG::Counter _Size; 31 | 32 | public: 33 | ArrayInputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset=0); 34 | ~ArrayInputStream(){} 35 | 36 | int ReadByte(); 37 | SG::Boolean Ended(); 38 | 39 | SG::Counter Tell(){return _Offset;} 40 | void Seek(SG::Counter Offset){_Offset=Offset;} 41 | }; 42 | 43 | class ArrayOutputStream : public SG::io::OutputStream 44 | { 45 | private: 46 | SG::Counter _Offset; 47 | SG::Byte *_Array; 48 | SG::Counter _Size; 49 | 50 | public: 51 | ArrayOutputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset=0); 52 | ~ArrayOutputStream(){} 53 | 54 | void WriteByte(SG::Byte Value); 55 | void Flush(){}; 56 | 57 | SG::Counter Tell(){return _Offset;} 58 | void Seek(SG::Counter Offset){_Offset=Offset;} 59 | }; 60 | } 61 | } 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /external/sg-entropy/license.txt: -------------------------------------------------------------------------------- 1 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 2 | 3 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 4 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 5 | 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. 6 | 7 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /external/sg-entropy/order0test_arith32.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include 15 | #include 16 | #include 17 | #include "stdx/define.h" 18 | 19 | #include "io/bit_stream.h" 20 | #include "io/stream_array.h" 21 | 22 | #include "entropy/arith32.h" 23 | 24 | using namespace std; 25 | using namespace SG; 26 | 27 | void Rescale(Counter *Frequency) { 28 | for(int i=1;i<=256;i++) { 29 | Frequency[i]/=2; 30 | if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1; 31 | } 32 | } 33 | 34 | //A quick test for entropty coders. Uses order-0 model. 35 | int main(int argc,char *argv[]) 36 | { 37 | fstream Fin,Fout; 38 | Counter FileSizeB; 39 | 40 | Counter Seconds, OutputSize; 41 | 42 | if(argc!=4) 43 | { 44 | cerr<<"Usage: c|d InputFileName OutputFileName\n" 45 | <<"c: compress\n" 46 | <<"d: decompress\n"; 47 | return 1; 48 | } 49 | 50 | Fin.open(argv[2],ios::in|ios::binary); 51 | if(!Fin.good()) { cerr<<"File not found\n"; return 1; } 52 | 53 | Fin.seekg(0,ios::end); 54 | FileSizeB=Fin.tellg(); 55 | Fin.seekg(0,ios::beg); 56 | 57 | Byte *InputFile=new Byte[(DWord)(FileSizeB)]; 58 | Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)]; 59 | if(InputFile==NULL||OutputFile==NULL) { cerr<<"Memory allocation error\n"; return 1; } 60 | 61 | Fin.read((char *)InputFile,FileSizeB); 62 | 63 | Fout.open(argv[3],ios::out|ios::binary); 64 | if(!Fout.good()) { cerr<<"Error creating file\n"; return 1; } 65 | 66 | Seconds=clock(); 67 | 68 | if(argv[1][0]=='c') 69 | { 70 | cout<<"Compressing...\n"; 71 | 72 | io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB)); 73 | io::ArrayOutputStream OutputStream(OutputFile,(DWord)(FileSizeB+2000000)); 74 | io::BitOutputStream BitStream(OutputStream); 75 | 76 | for(int i=0;i=EntropyCoder.MaxRange) Rescale(Freq); 90 | } 91 | EntropyCoder.Flush(); 92 | BitStream.Flush(); 93 | OutputSize = OutputStream.Tell(); 94 | 95 | cout<Count;Symbol--); 120 | //Symbol--; 121 | 122 | ByteStream.WriteByte(Symbol); 123 | EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]); 124 | 125 | for(int j=Symbol+1;j<257;j++) Freq[j]++; 126 | if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq); 127 | } 128 | 129 | Fout.write((char*)OutputFile,OutputSize); 130 | } 131 | else 132 | { 133 | cerr<<"Invalid parameter\n"; 134 | return 1; 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /external/sg-entropy/order0test_arith64.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include 15 | #include 16 | #include 17 | #include "stdx/define.h" 18 | 19 | #include "io/bit_stream.h" 20 | #include "io/stream_array.h" 21 | 22 | #include "entropy/arith64.h" 23 | 24 | using namespace std; 25 | using namespace SG; 26 | 27 | void Rescale(Counter *Frequency) { 28 | for(int i=1;i<=256;i++) { 29 | Frequency[i]/=2; 30 | if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1; 31 | } 32 | } 33 | 34 | //A quick test for entropty coders. Uses order-0 model. 35 | int main(int argc,char *argv[]) 36 | { 37 | fstream Fin,Fout; 38 | Counter FileSizeB; 39 | 40 | Counter Seconds, OutputSize; 41 | 42 | if(argc!=4) 43 | { 44 | cerr<<"Usage: c|d InputFileName OutputFileName\n" 45 | <<"c: compress\n" 46 | <<"d: decompress\n"; 47 | return 1; 48 | } 49 | 50 | Fin.open(argv[2],ios::in|ios::binary); 51 | if(!Fin.good()) { cerr<<"File not found\n"; return 1; } 52 | 53 | Fin.seekg(0,ios::end); 54 | FileSizeB=Fin.tellg(); 55 | Fin.seekg(0,ios::beg); 56 | 57 | Byte *InputFile=new Byte[(DWord)(FileSizeB)]; 58 | Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)]; 59 | if(InputFile==NULL||OutputFile==NULL) { cerr<<"Memory allocation error\n"; return 1; } 60 | 61 | Fin.read((char *)InputFile,FileSizeB); 62 | 63 | Fout.open(argv[3],ios::out|ios::binary); 64 | if(!Fout.good()) { cerr<<"Error creating file\n"; return 1; } 65 | 66 | Seconds=clock(); 67 | 68 | if(argv[1][0]=='c') 69 | { 70 | cout<<"Compressing...\n"; 71 | 72 | io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB)); 73 | io::ArrayOutputStream OutputStream(OutputFile,(DWord)(FileSizeB+2000000)); 74 | io::BitOutputStream BitStream(OutputStream); 75 | 76 | for(int i=0;i=EntropyCoder.MaxRange) Rescale(Freq); 90 | } 91 | EntropyCoder.Flush(); 92 | BitStream.Flush(); 93 | OutputSize = OutputStream.Tell(); 94 | 95 | cout<Count;Symbol--); 120 | //Symbol--; 121 | 122 | ByteStream.WriteByte(Symbol); 123 | EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]); 124 | 125 | for(int j=Symbol+1;j<257;j++) Freq[j]++; 126 | if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq); 127 | } 128 | 129 | Fout.write((char*)OutputFile,OutputSize); 130 | } 131 | else 132 | { 133 | cerr<<"Invalid parameter\n"; 134 | return 1; 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /external/sg-entropy/order0test_range32.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include 15 | #include 16 | #include 17 | #include "stdx/define.h" 18 | 19 | #include "io/bit_stream.h" 20 | #include "io/stream_array.h" 21 | 22 | #include "entropy/range32.h" 23 | 24 | using namespace std; 25 | using namespace SG; 26 | 27 | void Rescale(Counter *Frequency) { 28 | for(int i=1;i<=256;i++) { 29 | Frequency[i]/=2; 30 | if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1; 31 | } 32 | } 33 | 34 | //A quick test for entropty coders. Uses order-0 model. 35 | int main(int argc,char *argv[]) 36 | { 37 | fstream Fin,Fout; 38 | Counter FileSizeB; 39 | 40 | Counter Seconds, OutputSize; 41 | 42 | if(argc!=4) 43 | { 44 | cerr<<"Usage: c|d InputFileName OutputFileName\n" 45 | <<"c: compress\n" 46 | <<"d: decompress\n"; 47 | return 1; 48 | } 49 | 50 | Fin.open(argv[2],ios::in|ios::binary); 51 | if(!Fin.good()) { cerr<<"File not found\n"; return 1; } 52 | 53 | Fin.seekg(0,ios::end); 54 | FileSizeB=Fin.tellg(); 55 | Fin.seekg(0,ios::beg); 56 | 57 | Byte *InputFile=new Byte[(DWord)(FileSizeB)]; 58 | Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)]; 59 | if(InputFile==NULL||OutputFile==NULL) { cerr<<"Memory allocation error\n"; return 1; } 60 | 61 | Fin.read((char *)InputFile,FileSizeB); 62 | 63 | Fout.open(argv[3],ios::out|ios::binary); 64 | if(!Fout.good()) { cerr<<"Error creating file\n"; return 1; } 65 | 66 | Seconds=clock(); 67 | 68 | if(argv[1][0]=='c') 69 | { 70 | cout<<"Compressing...\n"; 71 | 72 | io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB)); 73 | io::ArrayOutputStream OutputStream(OutputFile,(DWord)(FileSizeB+2000000)); 74 | 75 | for(int i=0;i=EntropyCoder.MaxRange) Rescale(Freq); 89 | } 90 | EntropyCoder.Flush(); 91 | 92 | OutputSize = OutputStream.Tell(); 93 | 94 | cout<Count;Symbol--); 118 | //Symbol--; 119 | 120 | ByteStream.WriteByte(Symbol); 121 | EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]); 122 | 123 | for(int j=Symbol+1;j<257;j++) Freq[j]++; 124 | if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq); 125 | } 126 | 127 | Fout.write((char*)OutputFile,OutputSize); 128 | } 129 | else 130 | { 131 | cerr<<"Invalid parameter\n"; 132 | return 1; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /external/sg-entropy/order0test_range64.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include 15 | #include 16 | #include 17 | #include "stdx/define.h" 18 | 19 | #include "io/bit_stream.h" 20 | #include "io/stream_array.h" 21 | 22 | #include "entropy/range64.h" 23 | 24 | using namespace std; 25 | using namespace SG; 26 | 27 | void Rescale(Counter *Frequency) { 28 | for(int i=1;i<=256;i++) { 29 | Frequency[i]/=2; 30 | if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1; 31 | } 32 | } 33 | 34 | //A quick test for entropty coders. Uses order-0 model. 35 | int main(int argc,char *argv[]) 36 | { 37 | fstream Fin,Fout; 38 | Counter FileSizeB; 39 | 40 | Counter Seconds, OutputSize; 41 | 42 | if(argc!=4) 43 | { 44 | cerr<<"Usage: c|d InputFileName OutputFileName\n" 45 | <<"c: compress\n" 46 | <<"d: decompress\n"; 47 | return 1; 48 | } 49 | 50 | Fin.open(argv[2],ios::in|ios::binary); 51 | if(!Fin.good()) { cerr<<"File not found\n"; return 1; } 52 | 53 | Fin.seekg(0,ios::end); 54 | FileSizeB=Fin.tellg(); 55 | Fin.seekg(0,ios::beg); 56 | 57 | Byte *InputFile=new Byte[(DWord)(FileSizeB)]; 58 | Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)]; 59 | if(InputFile==NULL||OutputFile==NULL) { cerr<<"Memory allocation error\n"; return 1; } 60 | 61 | Fin.read((char *)InputFile,FileSizeB); 62 | 63 | Fout.open(argv[3],ios::out|ios::binary); 64 | if(!Fout.good()) { cerr<<"Error creating file\n"; return 1; } 65 | 66 | Seconds=clock(); 67 | 68 | if(argv[1][0]=='c') 69 | { 70 | cout<<"Compressing...\n"; 71 | 72 | io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB)); 73 | io::ArrayOutputStream OutputStream(OutputFile,(DWord)(FileSizeB+2000000)); 74 | 75 | for(int i=0;i=EntropyCoder.MaxRange) Rescale(Freq); 89 | } 90 | EntropyCoder.Flush(); 91 | 92 | OutputSize = OutputStream.Tell(); 93 | 94 | cout<Count;Symbol--); 118 | //Symbol--; 119 | 120 | ByteStream.WriteByte(Symbol); 121 | EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]); 122 | 123 | for(int j=Symbol+1;j<257;j++) Freq[j]++; 124 | if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq); 125 | } 126 | 127 | Fout.write((char*)OutputFile,OutputSize); 128 | } 129 | else 130 | { 131 | cerr<<"Invalid parameter\n"; 132 | return 1; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /external/sg-entropy/readme.txt: -------------------------------------------------------------------------------- 1 | Entropy Coding Source code 2 | By Sachin Garg, 2006 3 | 4 | Includes range coder based upon the carry-less implementation 5 | by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | DDJ code. 7 | 8 | Modified to use 64-bit variables for improved performance. 9 | 32-bit reference implementations also included. 10 | 11 | For details: 12 | http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | Please send your suggestions, improvements, errors, feedback etc... 15 | Read license.txt before using this in anyway. 16 | -------------------------------------------------------------------------------- /external/sg-entropy/stdx/bit.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include "stdx/bit.h" 15 | 16 | 17 | SG::Binary SG::stdx::GetBit(SG::Byte BitNo,SG::Byte Data) 18 | { 19 | return (1< 32 | //inline SG::Binary GetArr(SG::Counter BitNo,std::vector &Data); 33 | //inline void SetArr(SG::Counter BitNo,std::vector &Data,SG::stdx::Type::Binary X); 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /external/sg-entropy/stdx/define.h: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #ifndef __sg_stdx_define 15 | #define __sg_stdx_define 16 | 17 | //Constants 18 | #define True 1 19 | #define False 0 20 | #define Yes 1 21 | #define No 0 22 | 23 | #ifndef NULL 24 | #define NULL 0 25 | #endif 26 | 27 | #define NotFound -1 28 | 29 | namespace SG 30 | { 31 | //Compiler/implementation dependent typedefs 32 | typedef unsigned char Byte; 33 | typedef unsigned short Word; 34 | typedef unsigned long DWord; 35 | //typedef unsigned __int64 QWord; //MS platform 36 | typedef unsigned long long QWord; //Linux and other Unices 37 | typedef long double Real; 38 | 39 | typedef Byte Binary; //reperesents 0 or 1 40 | typedef bool Boolean; //reperesents true or false 41 | 42 | typedef DWord Counter; 43 | typedef Word SmallCounter; 44 | typedef QWord BigCounter; 45 | 46 | typedef signed long Num; 47 | typedef unsigned int FastInt; 48 | } 49 | 50 | #endif 51 | 52 | -------------------------------------------------------------------------------- /external/sg-entropy/stdx/exception.cpp: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #include "stdx/exception.h" 15 | 16 | SG::stdx::Exception::Exception(std::string Description,std::string Location) 17 | { 18 | Exception::Description = Description; 19 | Exception::Location = Location; 20 | } 21 | -------------------------------------------------------------------------------- /external/sg-entropy/stdx/exception.h: -------------------------------------------------------------------------------- 1 | //Entropy Coding Source code 2 | //By Sachin Garg, 2006 3 | // 4 | //Includes range coder based upon the carry-less implementation 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's 6 | //DDJ code. 7 | // 8 | //Modified to use 64-bit variables for improved performance. 9 | //32-bit reference implementations also included. 10 | // 11 | //For details: 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit 13 | 14 | #ifndef __sg_stdx_exception 15 | #define __sg_stdx_exception 16 | 17 | #include 18 | 19 | namespace SG 20 | { 21 | namespace stdx 22 | { 23 | //Standard exception object to be thrown 24 | class Exception 25 | { 26 | 27 | public: 28 | Exception(std::string Description,std::string Location); 29 | 30 | std::string Description; 31 | std::string Location; 32 | }; 33 | } 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /include/aux-encoding.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * aux-encoding.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _AUX_ENCODING_HPP 25 | #define _AUX_ENCODING_HPP 26 | 27 | #include "twobitvector.hpp" 28 | 29 | //! namespace gathering constants for interpretation of the auxiliary data structure 30 | namespace aux_encoding { 31 | typedef twobitvector::value_type value_type; 32 | //! regular bwt entry 33 | const value_type REG = 0; 34 | //! entry indicating the end of a tunnel 35 | const value_type SKP_F = 1; 36 | //! entry indicating the start of a tunnel 37 | const value_type IGN_L = 2; 38 | //! entry to be removed 39 | const value_type REM = SKP_F | IGN_L; 40 | //! alphabet size in auxiliary data structure 41 | const value_type SIGMA = 3; 42 | }; 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /include/bcm-compressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * bcm-compressor.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef BCM_COMPRESSOR_HPP 25 | #define BCM_COMPRESSOR_HPP 26 | 27 | #include "bwt-compressor.hpp" 28 | #include "tbwt-compressor.hpp" 29 | #include "bcm-ss.hpp" 30 | 31 | #include "block-scores-rle-model.hpp" 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | //! class which encodes a BWT with second stage by Ilya Muravyov 39 | class BW_SS_BCM : public block_scores_rle_model { 40 | 41 | public: 42 | //! encodes the transform t using MTF + RLE0 + Entropy 43 | template 44 | static void encode( T &t, std::ostream &out ) { 45 | bcm::CM cm; 46 | for (t_idx_t i = 0; i < t.size(); i++) { 47 | cm.Encode( t[i], out ); 48 | } 49 | cm.Flush(out); 50 | } 51 | 52 | //! decodes the transform and stores it in t using MTF + RLE0 + Entropy (t must have length of output) 53 | template 54 | static void decode( std::istream &in, T &t ) { 55 | bcm::CM cm; 56 | cm.Init(in); 57 | for (t_idx_t i = 0; i < t.size(); i++) { 58 | t[i] = cm.Decode(in); 59 | } 60 | } 61 | }; 62 | 63 | //typedefs defining compressors 64 | typedef bwt_compressor bwt_compressor_bcm; 65 | typedef tbwt_compressor tbwt_compressor_bcm; 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /include/block-nav-support.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * block-nav-support.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _BLOCK_NAV_SUPPORT_HPP 25 | #define _BLOCK_NAV_SUPPORT_HPP 26 | 27 | #include "bwt-run-support.hpp" 28 | #include "bwt-config.hpp" 29 | 30 | #include 31 | 32 | //! support class for blocks and block navigation in a bwt. 33 | /*! class offers methods to compute blocks, as well as methods 34 | to store, enumerate and remove block collisions. 35 | */ 36 | class block_nav_support { 37 | private: 38 | const bwt_run_support &bwtrs; //navigation 39 | const t_size_t mbh = 2; //minimal block height 40 | 41 | std::vector m_end; //end position of blocks (see below) 42 | std::vector collisions; //map for collisions 43 | 44 | void compute_blocks(); 45 | void init_empty_collision_map(); 46 | 47 | public: 48 | //! number of blocks (always equal to number of runs) 49 | const t_size_t& blocks; 50 | 51 | //! exclusive end position (upper left position in BWT) of block 52 | const std::vector &end = m_end; 53 | 54 | //! constructor, expects a navigation and a minimal block height. 55 | /*! note that collisions will NOT be computed by this function, 56 | use function add_collision for this purpose. 57 | */ 58 | block_nav_support( const bwt_run_support &bwsupport ) 59 | : bwtrs( bwsupport ), blocks( bwtrs.runs ) { 60 | compute_blocks(); 61 | init_empty_collision_map(); 62 | }; 63 | 64 | //! adds a collision between inner block ic_b and outer block oc_b. 65 | void add_collision( t_idx_t ic_b, t_idx_t oc_b ); 66 | 67 | //! sets end of a block b to the given value 68 | void set_end( t_idx_t b, t_idx_t e ); 69 | 70 | //! computes all inner colliding blocks of the given one (array is ordered in text order). 71 | //! Note that first block always is block b. 72 | std::vector get_inner_collisions( t_idx_t b ) const; 73 | 74 | //! computes all outer colliding blocks of the given one. Note that first block always is block b. 75 | std::vector get_outer_collisions( t_idx_t b ) const; 76 | 77 | //! removes all collisions between colliding inner and outer blocks of b 78 | void remove_inner_outer_collisions( t_idx_t b ); 79 | }; 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /include/bw94-compressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * bw94-compressor.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef BW94_COMPRESSOR_HPP 25 | #define BW94_COMPRESSOR_HPP 26 | 27 | #include "bwt-compressor.hpp" 28 | #include "tbwt-compressor.hpp" 29 | 30 | #include "aux-encoding.hpp" 31 | #include "bwt-run-support.hpp" 32 | #include "entropy-coder.hpp" 33 | #include "mtf-coder.hpp" 34 | #include "rle0-coder.hpp" 35 | #include "twobitvector.hpp" 36 | 37 | #include "block-scores-rle-model.hpp" 38 | 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | //! class which encodes a BWT with MTF + RLE0 + Entropy as second stage 45 | class BW_SS_BW94 : public block_scores_rle_model { 46 | public: 47 | //! encodes the transform t using MTF + RLE0 + Entropy 48 | template 49 | static void encode( T &t, std::ostream &out ) { 50 | //write alphabet 51 | auto alph = mtf_coder::compute_alph( t ); 52 | out.put( (t_uchar_t)alph.size() ); //store alphabet size (note that this stores 0 if full alphabet is used) 53 | for (t_idx_t i = 0; i < alph.size(); i++) { //and the alphabet itself 54 | out.put( alph[i] ); 55 | } 56 | 57 | //prepare encoders 58 | mtf_coder mtfcoder( alph ); 59 | rle0_encoder rle0coder; 60 | entropy_encoder entcoder( out ); 61 | entcoder.reset( alph.size() + 1 ); 62 | 63 | for (t_idx_t i = 0; i < t.size(); ) { //do encoding 64 | do { 65 | if (i >= t.size()) break; 66 | //feed rle0-encoder with mtf coded input until some contents can be written 67 | } while (rle0coder.encode_char( mtfcoder.encode_char( t[i++] ) )); 68 | 69 | //move the output of the rle0coder to the entropy coder 70 | while (rle0coder.has_next_enc_char()) { 71 | entcoder.encode_char( rle0coder.next_enc_char() ); 72 | } 73 | } 74 | entcoder.flush(); 75 | } 76 | 77 | //! decodes the transform and stores it in t using MTF + RLE0 + Entropy (t must have length of output) 78 | template 79 | static void decode( std::istream &in, T &t ) { 80 | t_size_t alphsize = in.get(); 81 | //check validity 82 | if (alphsize == 0u) { 83 | if (t.size() == 0) return; 84 | alphsize = std::numeric_limits::max()+1u; //remember that on full alphabet 0 is stored 85 | } 86 | if (alphsize > t.size()) 87 | throw std::invalid_argument("alphabet must be smaller than encoded string size"); 88 | 89 | //read alphabet 90 | T alph; alph.resize( alphsize ); 91 | for (t_idx_t i = 0; i < alph.size(); i++) { 92 | alph[i] = in.get(); 93 | } 94 | 95 | //set up required decodes 96 | mtf_coder mtfcoder( alph ); 97 | rle0_decoder rle0coder; 98 | entropy_decoder entcoder( in ); 99 | entcoder.reset( alph.size() + 1 ); 100 | 101 | //do decoding 102 | for (t_idx_t i = 0; i < t.size(); entcoder.next() ) { 103 | //feed rle0-decoder with input 104 | rle0coder.decode_char( entcoder.decode_char() ); 105 | 106 | //fetch characters from rle0-decoder and invert mtf 107 | while (i < t.size() && rle0coder.has_next_char()) { 108 | t[i++] = mtfcoder.decode_char( rle0coder.next_char() ); 109 | } 110 | } 111 | if (rle0coder.has_next_char()) { 112 | throw std::invalid_argument("encoded rle0-sequence is longer than text length"); 113 | } 114 | } 115 | }; 116 | 117 | //typedefs defining compressors 118 | typedef bwt_compressor bwt_compressor_bw94; 119 | typedef tbwt_compressor tbwt_compressor_bw94; 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /include/bwt-config.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * bwt-config.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _BWT_CONFIG_HPP 25 | #define _BWT_CONFIG_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | typedef uint8_t t_uchar_t; 32 | typedef uint32_t t_size_t; 33 | typedef uint32_t t_idx_t; 34 | typedef int64_t t_bitsize_t; 35 | typedef typename std::vector t_string_t; 36 | 37 | const t_size_t t_max_size = (1024ul + 512ul)*1024ul*1024ul; //maximal size of input (1,5 GB) 38 | 39 | #include "divsufsort.h" 40 | 41 | //do some type assertions 42 | static_assert( std::numeric_limits::max() > t_max_size, 43 | "saidx_t is too small" ); 44 | static_assert( std::numeric_limits::max() > t_max_size, 45 | "t_idx_t is too small" ); 46 | static_assert( std::numeric_limits::max() > t_max_size, 47 | "t_size_t is too small" ); 48 | static_assert( std::numeric_limits::max() > 8ul * t_max_size, 49 | "t_bitsize_t is too small" ); 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /include/bwt-run-support.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * bwt-run-support.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _BWT_RUN_SUPPORT_HPP 25 | #define _BWT_RUN_SUPPORT_HPP 26 | 27 | #include 28 | 29 | #include "bwt-config.hpp" 30 | 31 | //! support structure for bwt navigation and bwt run support 32 | /*! The support structure distinguishes between positions in 33 | the logical BWT, that is, the BWT of a nullterminated string, 34 | and the indexed BWT, which is BWT of a nullterminated string 35 | where the null-character in the BWT is removed and stored by 36 | a such-called bwt idx. 37 | Both versions differ not much, but make a difference in positioning. 38 | Unless especially stated, this support structure always uses 39 | logical positioning, but also offers conversion methods to switch 40 | between logical and indexed positioning. 41 | */ 42 | class bwt_run_support { 43 | private: 44 | t_size_t m_runs; //number of logical runs 45 | t_size_t m_idx_runs; //number of runs (indexed BWT) 46 | t_idx_t m_bwt_idx; //bwt index 47 | t_size_t m_n; //logical text length 48 | t_size_t m_idx_n; //text length (indexed BWT) 49 | t_size_t m_sigma; //size of alphabet 50 | t_size_t m_max_char_val; //maximal value of an element in alphabet 51 | 52 | std::vector m_lfr; //lf, only for the start of runs 53 | std::vector m_rs; //start positions of all runs, sorted ascending. 54 | //additionally, m_rs[m_runs] = n+1 holds. 55 | 56 | public: 57 | //! constructor, expects a indexed BWT and its primary index. 58 | bwt_run_support( const t_uchar_t *bwt, t_size_t _n, t_idx_t _idx ); 59 | 60 | //! logical number of runs in BWT 61 | const t_size_t &runs = m_runs; 62 | 63 | //! number of runs in the indexed BWT 64 | const t_size_t &idx_runs = m_idx_runs; 65 | 66 | //! primary index of the bwt 67 | const t_idx_t &bwt_idx = m_bwt_idx; 68 | 69 | //! logical length of text 70 | const t_size_t &n = m_n; 71 | 72 | //! real text length (also length of indexed BWT) 73 | const t_size_t &idx_n = m_idx_n; 74 | 75 | //! size of alphabet in text 76 | const t_size_t &sigma = m_sigma; 77 | 78 | //! maximal value of an element in alphabet 79 | const t_size_t &max_char_val = m_max_char_val; 80 | 81 | //! utility function, returns lf at the start of the given run 82 | t_idx_t run_lf( t_idx_t r ) const { 83 | return m_lfr[r]; 84 | }; 85 | 86 | //! utility function, returns the start of a run 87 | t_idx_t start( t_idx_t r ) const { 88 | return m_rs[r]; 89 | }; 90 | 91 | //! function returns the run to which position i belongs, 92 | // or a value >= runs if i does not belong to any run (e.g. i < 0 or i >= n) 93 | t_idx_t run_of( t_idx_t i ) const; 94 | 95 | //! utility function, computes height of a run 96 | t_size_t height( t_idx_t r ) const { 97 | return m_rs[r+1]-m_rs[r]; 98 | }; 99 | 100 | //! utility function, computes exclusive end of a run 101 | t_idx_t end( t_idx_t r ) const { 102 | return m_rs[r+1]; 103 | }; 104 | 105 | //! utility function, converts a position in the indexed bwt to 106 | //! a position in the logical bwt 107 | t_idx_t idx_to_log( t_idx_t p_idx ) const { 108 | return (p_idx < bwt_idx) ? p_idx : p_idx + 1; 109 | }; 110 | 111 | //! utility function, converts a logical position in the bwt to 112 | //! a position in the indexed bwt 113 | t_idx_t log_to_idx( t_idx_t p_log ) const { 114 | return (p_log <= bwt_idx) ? p_log : p_log - 1; 115 | }; 116 | }; 117 | 118 | #endif 119 | -------------------------------------------------------------------------------- /include/mtf-coder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * mtf-coder.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _MTF_CODER_HPP 25 | #define _MTF_CODER_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | //! class for mtf-transformations, requires a string type 32 | /*! template parameter string_t should support random access [], as well as 33 | resize() - function, empty construction and size()-function. 34 | */ 35 | template 36 | class mtf_coder { 37 | public: 38 | typedef typename string_t::value_type char_type; 39 | typedef typename string_t::size_type size_type; 40 | private: 41 | string_t alph; 42 | public: 43 | //! constructs an mtf coder, expects an alphabet of the underlying source. 44 | mtf_coder (string_t _alph) : alph(_alph) {}; 45 | 46 | //! encodes a single character, and returns the coding for the character 47 | char_type encode_char( char_type c ) { 48 | char_type r = 0; //rank of S[i] in alph 49 | while (alph[0] != c) { //move S[i] to front 50 | ++r; 51 | 52 | char_type tmp = alph[0]; 53 | alph[0] = alph[r]; 54 | alph[r] = tmp; 55 | } 56 | return r; 57 | }; 58 | 59 | //! decodes a single encoded character and returns its decoded value. 60 | /*! throws invalid_argument if ranks in S are bigger than alphabet size. 61 | */ 62 | char_type decode_char( char_type c ) { 63 | if (c >= alph.size()) 64 | throw std::invalid_argument("MTF Retransform failed"); 65 | 66 | while (c > 0) { //move alph[c] to front 67 | char_type tmp = alph[c-1]; 68 | alph[c-1] = alph[c]; 69 | alph[c] = tmp; 70 | 71 | --c; 72 | } 73 | return (char_type)alph[0]; 74 | }; 75 | 76 | //! computes alphabet from underlying string S. 77 | /*! alphabet must consist of elements in [0..maxsigma-1], 78 | depending on the type of S (e.g., if S is a vector of 1-byte-characters, 79 | it's suitable to choose maxsigma = 256) 80 | Note that alphabet order is equal to the order of the first appearance 81 | of the characters in S. 82 | */ 83 | static string_t compute_alph( const string_t &S, 84 | size_type maxsigma = std::numeric_limits::max()+1 ) { 85 | //set up alphabet and bitmap 86 | std::vector charUsed( maxsigma ); 87 | string_t alph; alph.resize( maxsigma ); 88 | size_type sigma = 0; 89 | 90 | //compute alphabet 91 | for (size_type i = 0; i < S.size(); i++) { 92 | size_type ch = S[i]; 93 | if (!charUsed[ch]) { 94 | alph[sigma++] = ch; 95 | charUsed[ch] = true; 96 | } 97 | } 98 | alph.resize( sigma ); 99 | return alph; 100 | }; 101 | 102 | //! transforms a string S using Move-To-Front Transformation. 103 | /*! alph must be a list of the alphabet used in S, e.g. as computed 104 | by function compute_alph (alph should be a copy, as it gets modified 105 | during execution) 106 | */ 107 | static void transform( string_t &S, string_t alph ) { 108 | mtf_coder coder( std::move( alph ) ); 109 | for (size_type i = 0; i < S.size(); i++) { 110 | S[i] = coder.encode_char( S[i] ); 111 | } 112 | }; 113 | 114 | //! retransforms a Move-To-Front transformed string S using alph. 115 | /*! this function thus is the inverse operation of mtf_transform. 116 | Note that for correct reconstruction, alph must be same as 117 | given to mtf_transform. 118 | throws invalid_argument if ranks in S are bigger than alphabet size 119 | */ 120 | static void retransform( string_t &S, string_t alph ) { 121 | mtf_coder coder( std::move( alph ) ); 122 | for (size_type i = 0; i < S.size(); i++) { 123 | S[i] = coder.decode_char( S[i] ); 124 | } 125 | }; 126 | }; 127 | 128 | #endif 129 | -------------------------------------------------------------------------------- /include/twobitvector.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * twobitvector.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef _TWOBITVECTOR_HPP 25 | #define _TWOBITVECTOR_HPP 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | //! a simple implementation of a vector where each entry requires 2 bits. 32 | class twobitvector { 33 | public: 34 | typedef uint8_t value_type; 35 | typedef std::vector::size_type size_type; 36 | 37 | //! reference type for twobitvector 38 | class reference { 39 | private: 40 | value_type &val; 41 | value_type shift; 42 | 43 | friend class twobitvector; 44 | reference( value_type &v, value_type s ) : val{v}, shift{s} {}; 45 | public: 46 | //! get value 47 | operator value_type() const { 48 | return (val >> shift) & 3u; 49 | }; 50 | //! set value 51 | reference& operator=(value_type v) { 52 | val ^= (((val >> shift) ^ v) & 3u) << shift; 53 | return *this; 54 | }; 55 | //! set value using another reference 56 | reference& operator=(const reference& x) { 57 | return *this=((x.val >> x.shift) & 3u); 58 | }; 59 | }; 60 | private: 61 | std::vector m_data; 62 | size_type m_size = 0; 63 | 64 | public: 65 | //! resize vector to the given size. 66 | /*! if n is bigger than current size, old contents stay and the end 67 | is filled with zeros. 68 | */ 69 | void resize( size_type n ) { 70 | m_data.resize( (n >> 2) + 1 ); 71 | m_size = n; 72 | }; 73 | 74 | //! returns the number of entries in the twobitvector 75 | size_type size() const { 76 | return m_size; 77 | }; 78 | 79 | //! returns a pointer to the underlying data field 80 | const uint8_t *data() const { 81 | return (const uint8_t *)m_data.data(); 82 | }; 83 | 84 | //! length of the underlying data field in bytes 85 | size_type datasize() const { 86 | return m_data.size(); 87 | }; 88 | 89 | //! random read access to the elements 90 | value_type operator[]( size_type i ) const { 91 | assert(i < m_size); 92 | return (m_data[i >> 2] >> ((i & 3u) << 1)) & 3u; 93 | }; 94 | 95 | //! random read/write access to the elements 96 | reference operator[]( size_type i ) { 97 | assert(i < m_size); 98 | return reference( m_data[i >> 2], (i & 3u) << 1 ); 99 | }; 100 | 101 | //TODO: add more functions if required 102 | }; 103 | 104 | #endif 105 | -------------------------------------------------------------------------------- /include/wt-compressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * wt-compressor.hpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef BW94_COMPRESSOR_HPP 25 | #define BW94_COMPRESSOR_HPP 26 | 27 | #include "bwt-compressor.hpp" 28 | #include "tbwt-compressor.hpp" 29 | 30 | #include "sdsl/bit_vectors.hpp" 31 | #include "sdsl/wavelet_trees.hpp" 32 | 33 | #include "block-scores-rle-model.hpp" 34 | 35 | #include 36 | #include 37 | 38 | //! class which encodes a BWT with a wavelet tree (and hybrid bitvectors) as second stage 39 | class BW_SS_WT : public block_scores_rle_model { 40 | public: 41 | //! encodes the transform t using a wavelet tree 42 | template 43 | static void encode( T &t, std::ostream &out ) { 44 | sdsl::wt_huff> wt( t, t.size() ); 45 | wt.serialize( out ); 46 | } 47 | 48 | //! decodes the transform and stores it in t 49 | template 50 | static void decode( std::istream &in, T &t ) { 51 | sdsl::wt_huff> wt; 52 | wt.load( in ); 53 | for (t_idx_t i = 0; i < t.size(); i++) { 54 | t[i] = wt[i]; 55 | } 56 | } 57 | }; 58 | 59 | //typedefs defining compressors 60 | typedef bwt_compressor bwt_compressor_wt; 61 | typedef tbwt_compressor tbwt_compressor_wt; 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /lib/bwt-run-support.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * bwt-run-support.cpp for bwt tunneling 3 | * Copyright (c) 2017 Uwe Baier All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include "bwt-run-support.hpp" 25 | 26 | #include 27 | #include 28 | 29 | using namespace std; 30 | 31 | t_idx_t bwt_run_support::run_of( t_idx_t i ) const { 32 | //use binary search with runstart - array 33 | auto it = upper_bound( m_rs.begin(), m_rs.end(), i ); 34 | return (t_idx_t)(it - m_rs.begin()) - 1; 35 | } 36 | 37 | bwt_run_support::bwt_run_support( const t_uchar_t *bwt, t_size_t _n, t_idx_t idx ) { 38 | //init some basic variables 39 | m_bwt_idx = idx; 40 | m_idx_n = _n; 41 | m_idx_runs = 0; 42 | m_sigma = 0; 43 | m_max_char_val = 0; 44 | 45 | //build C Array and count runs 46 | vector C( numeric_limits::max() + 1 ); 47 | t_idx_t borders[] = {bwt_idx,idx_n}; 48 | t_idx_t i = 0; 49 | for (t_idx_t b : borders) { //to split runs at primary index 50 | t_uchar_t lastchar = (i < idx_n) ? bwt[i]+1 : 0; 51 | while (i < b) { 52 | if (lastchar != bwt[i]) { //start of a run 53 | ++m_idx_runs; 54 | lastchar = bwt[i]; 55 | } 56 | ++C[lastchar]; 57 | ++i; 58 | } 59 | } 60 | m_n = idx_n + 1; 61 | m_runs = idx_runs + 1; //for bwt index 62 | 63 | //build cumulative sums of the C array 64 | t_idx_t l = 1; //for bwt index 65 | for (t_idx_t c = 0; c < C.size(); c++) { 66 | auto tmp = C[c]; 67 | C[c] = l; 68 | l += tmp; 69 | if (tmp > 0) { 70 | ++m_sigma; 71 | m_max_char_val = c; 72 | } 73 | } 74 | 75 | //compute LF 76 | m_lfr.reserve( m_runs + 1 ); 77 | m_rs.reserve( m_runs + 1 ); 78 | i = 0; 79 | t_idx_t i_log = 0; //logical position of i 80 | for (t_idx_t b : borders) { //to split runs at primary index 81 | t_uchar_t lastchar = (i < n) ? bwt[i]+1 : 0; 82 | while (i < b) { 83 | if (lastchar != bwt[i]) { //start of a run 84 | m_rs.push_back( i_log ); //store start of run 85 | 86 | lastchar = bwt[i]; 87 | m_lfr.push_back( C[lastchar] ); 88 | } 89 | ++C[lastchar]; 90 | ++i; ++i_log; 91 | } 92 | //add a terminator to both lfr and rs (for both primary index and n) 93 | m_rs.push_back( i_log++ ); 94 | m_lfr.push_back( 0 ); 95 | } 96 | } 97 | --------------------------------------------------------------------------------