├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── benchmark-result.pdf
├── benchmark
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── bin
    │   └── .gitignore
    ├── compressors.config
    ├── cp
    │   ├── bcm.sh
    │   ├── bwz.sh
    │   ├── bzip2.sh
    │   ├── gzip.sh
    │   ├── tbcm.sh
    │   ├── tbwz.sh
    │   ├── twt.sh
    │   ├── wt.sh
    │   ├── xz-extreme.sh
    │   ├── xz.sh
    │   └── zpaq.sh
    ├── rcrdata
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── rcrcompressors.config
    │   └── rcrtestcases.config
    ├── testcases.config
    ├── tmp
    │   └── .gitignore
    └── visualize.sh
├── external
    ├── bcm
    │   ├── LICENSE
    │   ├── Make.helper
    │   ├── README.md
    │   ├── bcm-ss.cpp
    │   └── bcm-ss.hpp
    ├── divsufsort
    │   ├── LICENSE
    │   ├── Make.helper
    │   ├── README.md
    │   ├── config.h
    │   ├── divsufsort.c
    │   ├── divsufsort.h
    │   ├── divsufsort_private.h
    │   ├── sssort.c
    │   ├── trsort.c
    │   └── utils.c
    ├── sdsl
    │   ├── COPYING
    │   ├── Make.helper
    │   ├── include
    │   │   └── sdsl
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── bit_vector_il.hpp
    │   │   │   ├── bit_vectors.hpp
    │   │   │   ├── bits.hpp
    │   │   │   ├── bp_support.hpp
    │   │   │   ├── bp_support_algorithm.hpp
    │   │   │   ├── bp_support_g.hpp
    │   │   │   ├── bp_support_gg.hpp
    │   │   │   ├── bp_support_sada.hpp
    │   │   │   ├── coder.hpp
    │   │   │   ├── coder_comma.hpp
    │   │   │   ├── coder_elias_delta.hpp
    │   │   │   ├── coder_elias_gamma.hpp
    │   │   │   ├── coder_fibonacci.hpp
    │   │   │   ├── config.hpp
    │   │   │   ├── construct.hpp
    │   │   │   ├── construct_bwt.hpp
    │   │   │   ├── construct_config.hpp
    │   │   │   ├── construct_isa.hpp
    │   │   │   ├── construct_lcp.hpp
    │   │   │   ├── construct_lcp_helper.hpp
    │   │   │   ├── construct_sa.hpp
    │   │   │   ├── construct_sa_se.hpp
    │   │   │   ├── csa_alphabet_strategy.hpp
    │   │   │   ├── csa_bitcompressed.hpp
    │   │   │   ├── csa_sada.hpp
    │   │   │   ├── csa_sampling_strategy.hpp
    │   │   │   ├── csa_wt.hpp
    │   │   │   ├── cst_fully.hpp
    │   │   │   ├── cst_iterators.hpp
    │   │   │   ├── cst_sada.hpp
    │   │   │   ├── cst_sct3.hpp
    │   │   │   ├── dac_vector.hpp
    │   │   │   ├── enc_vector.hpp
    │   │   │   ├── fast_cache.hpp
    │   │   │   ├── hyb_vector.hpp
    │   │   │   ├── int_vector.hpp
    │   │   │   ├── int_vector_buffer.hpp
    │   │   │   ├── int_vector_io_wrappers.hpp
    │   │   │   ├── int_vector_mapper.hpp
    │   │   │   ├── inv_perm_support.hpp
    │   │   │   ├── io.hpp
    │   │   │   ├── iterators.hpp
    │   │   │   ├── k2_treap.hpp
    │   │   │   ├── k2_treap_algorithm.hpp
    │   │   │   ├── k2_treap_helper.hpp
    │   │   │   ├── k2_tree.hpp
    │   │   │   ├── k2_tree_helper.hpp
    │   │   │   ├── lcp.hpp
    │   │   │   ├── lcp_bitcompressed.hpp
    │   │   │   ├── lcp_byte.hpp
    │   │   │   ├── lcp_dac.hpp
    │   │   │   ├── lcp_support_sada.hpp
    │   │   │   ├── lcp_support_tree.hpp
    │   │   │   ├── lcp_support_tree2.hpp
    │   │   │   ├── lcp_vlc.hpp
    │   │   │   ├── lcp_wt.hpp
    │   │   │   ├── louds_tree.hpp
    │   │   │   ├── memory_management.hpp
    │   │   │   ├── nearest_neighbour_dictionary.hpp
    │   │   │   ├── nn_dict_dynamic.hpp
    │   │   │   ├── qsufsort.hpp
    │   │   │   ├── ram_filebuf.hpp
    │   │   │   ├── ram_fs.hpp
    │   │   │   ├── rank_support.hpp
    │   │   │   ├── rank_support_scan.hpp
    │   │   │   ├── rank_support_v.hpp
    │   │   │   ├── rank_support_v5.hpp
    │   │   │   ├── raster_img.hpp
    │   │   │   ├── rmq_succinct_sada.hpp
    │   │   │   ├── rmq_succinct_sct.hpp
    │   │   │   ├── rmq_support.hpp
    │   │   │   ├── rmq_support_sparse_table.hpp
    │   │   │   ├── rrr_helper.hpp
    │   │   │   ├── rrr_vector.hpp
    │   │   │   ├── rrr_vector_15.hpp
    │   │   │   ├── sd_vector.hpp
    │   │   │   ├── sdsl_concepts.hpp
    │   │   │   ├── select_support.hpp
    │   │   │   ├── select_support_mcl.hpp
    │   │   │   ├── select_support_scan.hpp
    │   │   │   ├── sfstream.hpp
    │   │   │   ├── sorted_int_stack.hpp
    │   │   │   ├── sorted_multi_stack_support.hpp
    │   │   │   ├── sorted_stack_support.hpp
    │   │   │   ├── structure_tree.hpp
    │   │   │   ├── suffix_array_algorithm.hpp
    │   │   │   ├── suffix_array_helper.hpp
    │   │   │   ├── suffix_arrays.hpp
    │   │   │   ├── suffix_tree_algorithm.hpp
    │   │   │   ├── suffix_tree_helper.hpp
    │   │   │   ├── suffix_trees.hpp
    │   │   │   ├── uint128_t.hpp
    │   │   │   ├── uint256_t.hpp
    │   │   │   ├── uintx_t.hpp
    │   │   │   ├── util.hpp
    │   │   │   ├── vectors.hpp
    │   │   │   ├── vlc_vector.hpp
    │   │   │   ├── wavelet_trees.hpp
    │   │   │   ├── wm_int.hpp
    │   │   │   ├── wt_algorithm.hpp
    │   │   │   ├── wt_ap.hpp
    │   │   │   ├── wt_blcd.hpp
    │   │   │   ├── wt_gmr.hpp
    │   │   │   ├── wt_helper.hpp
    │   │   │   ├── wt_huff.hpp
    │   │   │   ├── wt_hutu.hpp
    │   │   │   ├── wt_int.hpp
    │   │   │   ├── wt_pc.hpp
    │   │   │   └── wt_rlmn.hpp
    │   └── lib
    │   │   ├── bits.cpp
    │   │   ├── bp_support_algorithm.cpp
    │   │   ├── coder_elias_delta.cpp
    │   │   ├── coder_elias_gamma.cpp
    │   │   ├── coder_fibonacci.cpp
    │   │   ├── config.cpp
    │   │   ├── construct_config.cpp
    │   │   ├── construct_isa.cpp
    │   │   ├── construct_lcp.cpp
    │   │   ├── construct_lcp_helper.cpp
    │   │   ├── construct_sa.cpp
    │   │   ├── construct_sa_se.cpp
    │   │   ├── csa_alphabet_strategy.cpp
    │   │   ├── io.cpp
    │   │   ├── lcp_support_tree.cpp
    │   │   ├── louds_tree.cpp
    │   │   ├── memory_management.cpp
    │   │   ├── nn_dict_dynamic.cpp
    │   │   ├── ram_filebuf.cpp
    │   │   ├── ram_fs.cpp
    │   │   ├── rrr_vector_15.cpp
    │   │   ├── sd_vector.cpp
    │   │   ├── sfstream.cpp
    │   │   ├── structure_tree.cpp
    │   │   ├── uint128_t.cpp
    │   │   ├── uint256_t.cpp
    │   │   ├── util.cpp
    │   │   └── wt_helper.cpp
    └── sg-entropy
    │   ├── Make.helper
    │   ├── entropy
    │       ├── arith32.cpp
    │       ├── arith32.h
    │       ├── arith64.cpp
    │       ├── arith64.h
    │       ├── range32.cpp
    │       ├── range32.h
    │       ├── range64.cpp
    │       └── range64.h
    │   ├── io
    │       ├── bit_stream.cpp
    │       ├── bit_stream.h
    │       ├── stream.h
    │       ├── stream_array.cpp
    │       └── stream_array.h
    │   ├── license.txt
    │   ├── order0test_arith32.cpp
    │   ├── order0test_arith64.cpp
    │   ├── order0test_range32.cpp
    │   ├── order0test_range64.cpp
    │   ├── readme.txt
    │   └── stdx
    │       ├── bit.cpp
    │       ├── bit.h
    │       ├── define.h
    │       ├── exception.cpp
    │       └── exception.h
├── include
    ├── aux-encoding.hpp
    ├── bcm-compressor.hpp
    ├── block-compressor.hpp
    ├── block-nav-support.hpp
    ├── block-scores-rle-model.hpp
    ├── bw94-compressor.hpp
    ├── bwt-compressor.hpp
    ├── bwt-config.hpp
    ├── bwt-run-support.hpp
    ├── entropy-coder.hpp
    ├── lheap.hpp
    ├── mtf-coder.hpp
    ├── rle0-coder.hpp
    ├── tbwt-compressor.hpp
    ├── tunneling-support.hpp
    ├── twobitvector.hpp
    └── wt-compressor.hpp
└── lib
    ├── block-nav-support.cpp
    ├── bwt-run-support.cpp
    └── ui.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | *.x
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This code is part of the paper
 2 | "On undetected redundancy in the Burrows-Wheeler Transform"
 3 | by Uwe Baier (hopefully to be published in CPM 2018)
 4 | 
 5 | Copyright (c) 2018 Uwe Baier
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the
10 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11 | sell copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | include external/sg-entropy/Make.helper
 2 | include external/divsufsort/Make.helper
 3 | include external/bcm/Make.helper
 4 | include external/sdsl/Make.helper
 5 | 
 6 | OWN_INCS = \
 7 | 	aux-encoding.hpp \
 8 | 	block-compressor.hpp \
 9 | 	block-nav-support.hpp \
10 | 	block-scores-rle-model.hpp \
11 | 	bwt-compressor.hpp \
12 | 	bwt-config.hpp \
13 | 	bwt-run-support.hpp \
14 | 	entropy-coder.hpp \
15 | 	lheap.hpp \
16 | 	mtf-coder.hpp \
17 | 	rle0-coder.hpp \
18 | 	tbwt-compressor.hpp \
19 | 	tunneling-support.hpp \
20 | 	twobitvector.hpp
21 | OWN_LIBS = \
22 | 	block-nav-support.cpp \
23 | 	bwt-run-support.cpp  \
24 | 	ui.cpp
25 | 
26 | INC_DIRS = external/sg-entropy external/divsufsort external/bcm external/sdsl/include include
27 | LIB_DIRS = external/sg-entropy external/divsufsort external/bcm external/sdsl/lib lib
28 | 
29 | CC_OPTS = -O3 -DNDEBUG
30 | CC_INCS = $(addprefix external/sg-entropy/,$(SG_ENTROPY_INCS)) \
31 |           $(addprefix external/divsufsort/,$(DIVSUFSORT_INCS)) \
32 |           $(addprefix external/bcm/,$(BCM_INCS)) \
33 |           $(addprefix external/sdsl/,$(SDSL_INCS)) \
34 |           $(addprefix include/,$(OWN_INCS))
35 | CC_LIBS = $(addprefix lib/,$(OWN_LIBS)) \
36 |           $(addprefix external/divsufsort/,$(DIVSUFSORT_LIBS))
37 | BW_CC_LIBS  = $(addprefix external/sg-entropy/,$(SG_ENTROPY_LIBS)) $(CC_LIBS)
38 | BCM_CC_LIBS = $(addprefix external/bcm/,$(BCM_LIBS)) $(CC_LIBS)
39 | WT_CC_LIBS  = $(addprefix external/sdsl/,$(SDSL_LIBS)) $(CC_LIBS)
40 | 
41 | all:	bwzip.x tbwzip.x bcmzip.x tbcmzip.x wtzip.x twtzip.x
42 | 
43 | bwzip.x:	lib/ui.cpp include/bw94-compressor.hpp $(CC_INCS) $(BW_CC_LIBS)
44 | 	g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \
45 | 		-DBW94 $(BW_CC_LIBS) -o bwzip.x
46 | 
47 | tbwzip.x:	lib/ui.cpp include/bw94-compressor.hpp $(CC_INCS) $(BW_CC_LIBS)
48 | 	g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \
49 | 		-DTBWT $(BW_CC_LIBS) -o tbwzip.x
50 | 
51 | bcmzip.x:	lib/ui.cpp include/bcm-compressor.hpp $(CC_INCS) $(BCM_CC_LIBS)
52 | 	g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \
53 | 		-DBCM $(BCM_CC_LIBS) -o bcmzip.x
54 | 
55 | tbcmzip.x:	lib/ui.cpp include/bcm-compressor.hpp $(CC_INCS) $(BCM_CC_LIBS)
56 | 	g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \
57 | 		-DTBCM $(BCM_CC_LIBS) -o tbcmzip.x
58 | 
59 | wtzip.x:	lib/ui.cpp include/wt-compressor.hpp $(CC_INCS) $(WT_CC_LIBS)
60 | 	g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \
61 | 		-DWT $(WT_CC_LIBS) -o wtzip.x
62 | 
63 | twtzip.x:	lib/ui.cpp include/wt-compressor.hpp $(CC_INCS) $(WT_CC_LIBS)
64 | 	g++ -std=c++11 -Wall -Wextra -g $(addprefix -I,$(INC_DIRS)) $(addprefix -L,$(LIB_DIRS)) $(CC_OPTS) \
65 | 		-DTWT $(WT_CC_LIBS) -o twtzip.x
66 | 
67 | clean:
68 | 	rm -f *.x
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TBWT
 2 | This repository contains an implementation and a benchmark for the such-called
 3 | Tunneled BWT, which is, a compression improvement for compressors using the 
 4 | Burrows-Wheeler Transform like [bzip2]. The tunneled BWT is described in
 5 | 
 6 | 	On Undetected Redundancy in the Burrows-Wheeler Transform
 7 | 
 8 | 	by Uwe Baier (hopefully to appear in CPM 2018)
 9 | 
10 | ## What is contained
11 | This bundle of files consist of the following parts:
12 | 1. The algorithms required to construct, compress and decompress a Tunneled BWT,
13 |    contained in the `include`- and `lib`-directory
14 | 2. External resources in the `external`-directory, namely
15 |    - a library for suffix array construction [divsufsort](https://github.com/y-256/libdivsufsort)
16 |    - a library containing different entropy coders [Entropy Coders by Sachin Garg](http://www.sachingarg.com/compression/entropy_coding/64bit)
17 |    - a library containing a bundle of succinct data structures [sdsl-lite](https://github.com/simongog/sdsl-lite)
18 |    - the backend of a high-performance file compressor using the BWT [bcm](https://github.com/encode84/bcm)
19 | 3. A benchmark to test the given compressor against common other lossless
20 |    data compressors, see `benchmark` - directory.
21 | 
22 | ## Requirements
23 | To compile the compressor(s), you need a modern c++11 ready compiler such as 
24 | [gcc](https://gcc.gnu.org/) version 4.7 or newer.
25 | 
26 | ## Installation
27 | Just call the command `make`. It should produce six executables:
28 | - `bwzip.x`: a compressor similar to [bzip2], but without memory limitation
29 | - `tbwzip.x`: like `bwzip.x`, enhanced with tunneling
30 | - `bcmzip.x`: a compressor similar to [bcm]
31 | - `tbcmzip.x`: like `bcmzip.x`, enhanced with tunneling
32 | - `wtzip.x`: compression of a BWT using a wavelet tree and compressed bitvectors,
33 |   currently not usable for text indexing
34 | - `twtzip.x`: like `wtzip.x`, enhanced with tunneling
35 | 
36 | ## Usage
37 | Both compiled compressors use the same user interface, just call one of them
38 | without a parameter to get a detailed description.
39 | 


--------------------------------------------------------------------------------
/benchmark-result.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waYne1337/tbwt/e6c24549f38e0961b39b42ffb3cf56bd2c747e48/benchmark-result.pdf


--------------------------------------------------------------------------------
/benchmark/.gitignore:
--------------------------------------------------------------------------------
1 | bin/*.x
2 | tmp/fcomp
3 | tmp/fres
4 | estquality.dat
5 | result.dat
6 | result.tex
7 | result.pdf
8 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # Tunneled BWT Compression Benchmark
 2 | Experiments for compression of different compressors.
 3 | 
 4 | ## What is contained
 5 | This bundle of files consist of the following parts:
 6 | 1. The `cp`-directory contains foreach compressor a standardized interface
 7 |    for installation, compression and decompression with an arbitrary compressor.
 8 | 2. A benchmark measuring compression and resource usage of each compressor
 9 | 3. A benchmark measuring the estimator quality of estimators in the tunneled bwt
10 | 4. A visualization for the benchmark data
11 | 5. A set of test files (which need to be downloaded first, see below), contained
12 |    in the `rcrdata`-directory
13 | 
14 | ## Requirements
15 | To run the benchmark, you need the following:
16 | - a modern c++11 ready compiler such as [gcc](https://gcc.gnu.org/) version 4.7 or newer
17 | - [awk]
18 | - [bc]
19 | - [sed]
20 | - [tr]
21 | 
22 | To visualize your results, the following programs are needed:
23 | - [pdflatex], especially supporting pgf and pgfplotstable
24 | 
25 | To download and set up the test files, the following programs are needed:
26 | - [curl](https://curl.haxx.se/)
27 | - [gzip]
28 | - [bzip2]
29 | 
30 | ## Installation
31 | -  To install the required compressors, call `sudo make install`. Superuser-rights
32 |   are required to download the other compressors using [apt-get], which can be 
33 |   avoided by downloading the compressors yourself, see the scripts in the
34 |   `cp`-directory.
35 | - To download the test data, switch into the `rcrdata` - directory, and call `make`.
36 |   This will download and extract all of the test data using [curl].
37 | 
38 | ## Usage
39 | 
40 | ### Benchmark
41 | To run the benchmark, configure the files `testcases.config` and `compressors.config`
42 | as you require it, an example is already listed. Afterwards, call
43 | 
44 | 	make
45 | 
46 | After the benchmark has finished, 4 files are generated:
47 | - `result.dat`: a file containing the benchmark results of all test files on all compressors.
48 |   Every speed measurement is measured in MB/s, every size is measured in bits per symbol
49 |   (both with the size of the original file as borderline)
50 | - `estquality.dat`: a file containing the measured relative errors of estimators
51 |   used for BWT Tunneling.
52 | - `result.tex`: a file ready to be compiled with [latex], displaying the results
53 |   in a better readable format
54 | - `result.pdf`: a presentation of all measurements
55 | 
56 | All of the 4 above mentioned files can be generated seperately by calling `make FILE`.
57 | A rule of thumb for the memory usage is that the compressors will need 12 times input
58 | size or less.
59 | 
60 | ### Replicating Computational Results
61 | The most straightforward way to use this benchmark is by just calling
62 | 
63 | 	make rcr
64 | 
65 | This command will automatically download the test data, sets up the benchmark
66 | properly (Warning: the .config - files will be overwritten), execute the
67 | benchmark and generate all resulting files. Your machine should contain 16 GB
68 | of memory to ensure no swapping takes place.
69 | 


--------------------------------------------------------------------------------
/benchmark/bin/.gitignore:
--------------------------------------------------------------------------------
1 | *.x
2 | 


--------------------------------------------------------------------------------
/benchmark/compressors.config:
--------------------------------------------------------------------------------
1 | #define compressors (each compressor must be available per seperate .sh - file in cp-directory)
2 | 
3 | #list only a couple of compressors:
4 | #COMPRESSORS=bwz tbwz bcm tbcm wt twt
5 | 
6 | #list all available compressors:
7 | COMPRESSORS=$(basename $(shell ls cp))
8 | 


--------------------------------------------------------------------------------
/benchmark/cp/bcm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bin/bcmzip.x -c $2 $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bin/bcmzip.x -d $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	cd ..;make bcmzip.x
 9 | 	cd benchmark;cp ../bcmzip.x bin/bcmzip.x
10 | else
11 | 	exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/benchmark/cp/bwz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bin/bwzip.x -c $2 $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bin/bwzip.x -d $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	cd ..;make bwzip.x
 9 | 	cd benchmark;cp ../bwzip.x bin/bwzip.x
10 | else
11 | 	exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/benchmark/cp/bzip2.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bzip2 -9 -c -f -k $2 > $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bunzip2 -c -f -k $2 > $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	apt-get install bzip2
 9 | else
10 | 	exit 1
11 | fi
12 | 


--------------------------------------------------------------------------------
/benchmark/cp/gzip.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	gzip -c -f -k $2 > $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	gunzip -c -f -k $2 > $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	apt-get install gzip
 9 | else
10 | 	exit 1
11 | fi
12 | 


--------------------------------------------------------------------------------
/benchmark/cp/tbcm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bin/tbcmzip.x -c $2 $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bin/tbcmzip.x -d $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	cd ..;make tbcmzip.x
 9 | 	cd benchmark;cp ../tbcmzip.x bin/tbcmzip.x
10 | else
11 | 	exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/benchmark/cp/tbwz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bin/tbwzip.x -c $2 $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bin/tbwzip.x -d $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	cd ..;make tbwzip.x
 9 | 	cd benchmark;cp ../tbwzip.x bin/tbwzip.x
10 | else
11 | 	exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/benchmark/cp/twt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bin/twtzip.x -c $2 $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bin/twtzip.x -d $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	cd ..;make twtzip.x
 9 | 	cd benchmark;cp ../twtzip.x bin/twtzip.x
10 | else
11 | 	exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/benchmark/cp/wt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	bin/wtzip.x -c $2 $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	bin/wtzip.x -d $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	cd ..;make wtzip.x
 9 | 	cd benchmark;cp ../wtzip.x bin/wtzip.x
10 | else
11 | 	exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/benchmark/cp/xz-extreme.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	xz -z -c -k -9e -M 100% $2 > $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	xz -d -c -k -9e -M 100% $2 > $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	apt-get install p7zip
 9 | else
10 | 	exit 1
11 | fi
12 | 


--------------------------------------------------------------------------------
/benchmark/cp/xz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	xz -z -c -k $2 > $3
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	xz -d -c -k $2 > $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	apt-get install p7zip
 9 | else
10 | 	exit 1
11 | fi
12 | 


--------------------------------------------------------------------------------
/benchmark/cp/zpaq.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #check args
 3 | if [ "$1" = "c" ]; then		#compress infile
 4 | 	zpaq qisc $3 $2
 5 | elif [ "$1" = "d" ]; then	#decompress infile
 6 | 	zpaq qx $2 $3
 7 | elif [ "$1" = "i" ]; then	#install compressor
 8 | 	apt-get install zpaq
 9 | else
10 | 	exit 1
11 | fi
12 | 


--------------------------------------------------------------------------------
/benchmark/rcrdata/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !Makefile
4 | !rcrcompressors.config
5 | !rcrtestcases.config
6 | 


--------------------------------------------------------------------------------
/benchmark/rcrdata/Makefile:
--------------------------------------------------------------------------------
 1 | include rcrtestcases.config
 2 | 
 3 | all:	$(RCRFILES)
 4 | 
 5 | #generate texts from the silesia corpus
 6 | $(SILESIA):
 7 | 	curl http://sun.aei.polsl.pl/~sdeor/corpus/$(@F).bz2 -o $@.bz2
 8 | 	bunzip2 -f $@.bz2
 9 | 
10 | #download texts from the pizza & chili corpus
11 | $(PIZZACHILI):
12 | 	$(eval fdir := \
13 | 		$(if $(findstring $(basename $(@F)),sources),code,\
14 | 		$(if $(findstring $(basename $(@F)),pitches),music,\
15 | 		$(if $(findstring $(basename $(@F)),proteins),protein,\
16 | 		$(if $(findstring $(basename $(@F)),dna),dna,\
17 | 		$(if $(findstring $(basename $(@F)),english),nlang,\
18 | 		$(if $(findstring $(basename $(@F)),dblp.xml),xml,\
19 | 			$(error unknown pizza chili category of $(@F) ))))))))
20 | 	curl http://pizzachili.dcc.uchile.cl/texts/$(fdir)/$(@F).gz -o $@.gz
21 | 	gunzip -f $@.gz
22 | 
23 | #generate repetitive texts
24 | $(REPETITIVE):
25 | 	curl http://pizzachili.dcc.uchile.cl/repcorpus/real/$(@F).gz -o $@.gz
26 | 	gunzip -f $@.gz
27 | 
28 | clean:
29 | 	rm -f $(RCRFILES)
30 | 


--------------------------------------------------------------------------------
/benchmark/rcrdata/rcrcompressors.config:
--------------------------------------------------------------------------------
1 | COMPRESSORS=bwz tbwz xz-extreme zpaq
2 | 


--------------------------------------------------------------------------------
/benchmark/rcrdata/rcrtestcases.config:
--------------------------------------------------------------------------------
 1 | #benchmark setup
 2 | 
 3 | #test files categorized
 4 | SILESIA = \
 5 | 	dickens \
 6 | 	mozilla \
 7 | 	mr \
 8 | 	nci \
 9 | 	ooffice \
10 | 	osdb \
11 | 	reymont \
12 | 	samba \
13 | 	sao \
14 | 	webster \
15 | 	xml \
16 | 	x-ray
17 | PIZZACHILI = \
18 | 	sources \
19 | 	pitches \
20 | 	proteins \
21 | 	dna \
22 | 	english.1024MB \
23 | 	dblp.xml
24 | REPETITIVE = \
25 | 	Escherichia_Coli \
26 | 	cere \
27 | 	coreutils \
28 | 	einstein.de.txt \
29 | 	einstein.en.txt \
30 | 	influenza \
31 | 	kernel \
32 | 	para \
33 | 	world_leaders
34 | RCRFILES=$(SILESIA) $(PIZZACHILI) $(REPETITIVE)
35 | 
36 | #test files to be used for the benchmark
37 | TCFILES = $(addprefix rcrdata/,$(RCRFILES))
38 | 


--------------------------------------------------------------------------------
/benchmark/testcases.config:
--------------------------------------------------------------------------------
1 | #benchmark setup
2 | 
3 | #test files to be used for the benchmark
4 | TCFILES = README.md visualize.sh
5 | 


--------------------------------------------------------------------------------
/benchmark/tmp/.gitignore:
--------------------------------------------------------------------------------
1 | *.x
2 | 


--------------------------------------------------------------------------------
/external/bcm/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (C) 2008-2016 Ilya Muravyov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/external/bcm/Make.helper:
--------------------------------------------------------------------------------
1 | BCM_INCS = \
2 | 	bcm-ss.hpp
3 | BCM_LIBS = \
4 | 	bcm-ss.cpp
5 | 


--------------------------------------------------------------------------------
/external/bcm/README.md:
--------------------------------------------------------------------------------
 1 | # BCM v1.25
 2 | 
 3 | ### Description
 4 | BCM is a high-performance file compressor that utilizes advanced context modeling techniques to achieve a very high compression ratio. All in all, it's like a big brother of the BZIP2.
 5 | 
 6 | ### Author
 7 | Ilya Muravyov
 8 | 
 9 | ### Thanks
10 | Special thanks to Yuta Mori, Matt Mahoney, Eugene Shelwien, Alexander Rhatushnyak, Przemyslaw Skibinski, Malcolm Taylor and LovePimple.
11 | 


--------------------------------------------------------------------------------
/external/bcm/bcm-ss.cpp:
--------------------------------------------------------------------------------
  1 | // This is the implementation of the second stage BWT transformation of BCM,
  2 | // invented by Ilya Muravyov
  3 | #include "bcm-ss.hpp"
  4 | 
  5 | using namespace bcm;
  6 | 
  7 | //// ENCODER IMPLEMENTATION ////
  8 | 
  9 | Encoder::Encoder()
 10 |   {
 11 |     low=0;
 12 |     high=uint(-1);
 13 |     code=0;
 14 |   }
 15 | 
 16 | void Encoder::EncodeBit0(uint p, std::ostream &out)
 17 |   {
 18 | #ifdef _WIN64
 19 |     low+=((ulonglong(high-low)*p)>>18)+1;
 20 | #else
 21 |     low+=((ulonglong(high-low)*(p<<(32-18)))>>32)+1;
 22 | #endif
 23 |     while ((low^high)<(1<<24))
 24 |     {
 25 |       out.put(low>>24);
 26 |       low<<=8;
 27 |       high=(high<<8)+255;
 28 |     }
 29 |   }
 30 | 
 31 | void Encoder::EncodeBit1(uint p, std::ostream &out)
 32 |   {
 33 | #ifdef _WIN64
 34 |     high=low+((ulonglong(high-low)*p)>>18);
 35 | #else
 36 |     high=low+((ulonglong(high-low)*(p<<(32-18)))>>32);
 37 | #endif
 38 |     while ((low^high)<(1<<24))
 39 |     {
 40 |       out.put(low>>24);
 41 |       low<<=8;
 42 |       high=(high<<8)+255;
 43 |     }
 44 |   }
 45 | 
 46 | void Encoder::Flush(std::ostream &out)
 47 |   {
 48 |     for (int i=0; i<4; ++i)
 49 |     {
 50 |       out.put(low>>24);
 51 |       low<<=8;
 52 |     }
 53 |   }
 54 | 
 55 | void Encoder::Init(std::istream &in)
 56 |   {
 57 |     for (int i=0; i<4; ++i)
 58 |       code=(code<<8)+in.get();
 59 |   }
 60 | 
 61 | int Encoder::DecodeBit(uint p, std::istream &in)
 62 |   {
 63 | #ifdef _WIN64
 64 |     const uint mid=low+((ulonglong(high-low)*p)>>18);
 65 | #else
 66 |     const uint mid=low+((ulonglong(high-low)*(p<<(32-18)))>>32);
 67 | #endif
 68 |     const int bit=(code<=mid);
 69 |     if (bit)
 70 |       high=mid;
 71 |     else
 72 |       low=mid+1;
 73 | 
 74 |     while ((low^high)<(1<<24))
 75 |     {
 76 |       low<<=8;
 77 |       high=(high<<8)+255;
 78 |       code=(code<<8)+in.get();
 79 |     }
 80 | 
 81 |     return bit;
 82 |   }
 83 | 
 84 | //// BWT ENCODER IMPLEMENTATION ////
 85 | 
 86 | CM::CM()
 87 |   {
 88 |     c1=0;
 89 |     c2=0;
 90 |     run=0;
 91 | 
 92 |     for (int i=0; i<2; ++i)
 93 |     {
 94 |       for (int j=0; j<256; ++j)
 95 |       {
 96 |         for (int k=0; k<17; ++k)
 97 |           counter2[i][j][k].p=(k<<12)-(k==16);
 98 |       }
 99 |     }
100 |   }
101 | 
102 | void CM::Encode32(uint n, std::ostream &out)
103 |   {
104 |     for (int i=0; i<32; ++i)
105 |     {
106 |       if (n&(1<<31))
107 |         Encoder::EncodeBit1(1<<17, out);
108 |       else
109 |         Encoder::EncodeBit0(1<<17, out);
110 |       n+=n;
111 |     }
112 |   }
113 | 
114 | uint CM::Decode32(std::istream &in)
115 |   {
116 |     uint n=0;
117 |     for (int i=0; i<32; ++i)
118 |       n+=n+Encoder::DecodeBit(1<<17, in);
119 | 
120 |     return n;
121 |   }
122 | 
123 | void CM::Encode(int c, std::ostream &out)
124 |   {
125 |     if (c1==c2)
126 |       ++run;
127 |     else
128 |       run=0;
129 |     const int f=(run>2);
130 | 
131 |     int ctx=1;
132 |     while (ctx<256)
133 |     {
134 |       const int p0=counter0[ctx].p;
135 |       const int p1=counter1[c1][ctx].p;
136 |       const int p2=counter1[c2][ctx].p;
137 |       const int p=((p0+p1)*7+p2+p2)>>4;
138 | 
139 |       const int j=p>>12;
140 |       const int x1=counter2[f][ctx][j].p;
141 |       const int x2=counter2[f][ctx][j+1].p;
142 |       const int ssep=x1+(((x2-x1)*(p&4095))>>12);
143 | 
144 |       const int bit=c&128;
145 |       c+=c;
146 | 
147 |       if (bit)
148 |       {
149 |         Encoder::EncodeBit1(ssep*3+p, out);
150 |         counter0[ctx].UpdateBit1();
151 |         counter1[c1][ctx].UpdateBit1();
152 |         counter2[f][ctx][j].UpdateBit1();
153 |         counter2[f][ctx][j+1].UpdateBit1();
154 |         ctx+=ctx+1;
155 |       }
156 |       else
157 |       {
158 |         Encoder::EncodeBit0(ssep*3+p, out);
159 |         counter0[ctx].UpdateBit0();
160 |         counter1[c1][ctx].UpdateBit0();
161 |         counter2[f][ctx][j].UpdateBit0();
162 |         counter2[f][ctx][j+1].UpdateBit0();
163 |         ctx+=ctx;
164 |       }
165 |     }
166 | 
167 |     c2=c1;
168 |     c1=ctx&255;
169 |   }
170 | 
171 | int CM::Decode(std::istream &in)
172 |   {
173 |     if (c1==c2)
174 |       ++run;
175 |     else
176 |       run=0;
177 |     const int f=(run>2);
178 | 
179 |     int ctx=1;
180 |     while (ctx<256)
181 |     {
182 |       const int p0=counter0[ctx].p;
183 |       const int p1=counter1[c1][ctx].p;
184 |       const int p2=counter1[c2][ctx].p;
185 |       const int p=((p0+p1)*7+p2+p2)>>4;
186 | 
187 |       const int j=p>>12;
188 |       const int x1=counter2[f][ctx][j].p;
189 |       const int x2=counter2[f][ctx][j+1].p;
190 |       const int ssep=x1+(((x2-x1)*(p&4095))>>12);
191 | 
192 |       const int bit=Encoder::DecodeBit(ssep*3+p, in);
193 | 
194 |       if (bit)
195 |       {
196 |         counter0[ctx].UpdateBit1();
197 |         counter1[c1][ctx].UpdateBit1();
198 |         counter2[f][ctx][j].UpdateBit1();
199 |         counter2[f][ctx][j+1].UpdateBit1();
200 |         ctx+=ctx+1;
201 |       }
202 |       else
203 |       {
204 |         counter0[ctx].UpdateBit0();
205 |         counter1[c1][ctx].UpdateBit0();
206 |         counter2[f][ctx][j].UpdateBit0();
207 |         counter2[f][ctx][j+1].UpdateBit0();
208 |         ctx+=ctx;
209 |       }
210 |     }
211 | 
212 |     c2=c1;
213 |     return c1=ctx&255;
214 |   }
215 | 


--------------------------------------------------------------------------------
/external/bcm/bcm-ss.hpp:
--------------------------------------------------------------------------------
 1 | // This is a header file to cover the second stage BWT transformation of BCM,
 2 | // invented by Ilya Muravyov
 3 | 
 4 | #ifndef BCM_SS_HPP
 5 | #define BCM_SS_HPP
 6 | 
 7 | #include <istream>
 8 | #include <ostream>
 9 | 
10 | namespace bcm {
11 | 
12 | typedef unsigned char byte;
13 | typedef unsigned short word;
14 | typedef unsigned int uint;
15 | typedef unsigned long long ulonglong;
16 | 
17 | //basic encoder
18 | 
19 | struct Encoder
20 | {
21 |   uint low;
22 |   uint high;
23 |   uint code;
24 | 
25 |   Encoder();
26 |   void EncodeBit0(uint p, std::ostream &out);
27 |   void EncodeBit1(uint p, std::ostream &out);
28 |   void Flush(std::ostream &out);
29 |   void Init(std::istream &in);
30 |   int DecodeBit(uint p, std::istream &in);
31 | };
32 | 
33 | //counter
34 | template<int RATE>
35 | struct Counter
36 | {
37 |   word p;
38 |   Counter()
39 |   {
40 |     p=1<<15;
41 |   }
42 |   void UpdateBit0()
43 |   {
44 |     p-=p>>RATE;
45 |   }
46 |   void UpdateBit1()
47 |   {
48 |     p+=(p^65535)>>RATE;
49 |   }
50 | };
51 | 
52 | //BWT encoder
53 | struct CM: Encoder
54 | {
55 |   Counter<2> counter0[256];
56 |   Counter<4> counter1[256][256];
57 |   Counter<6> counter2[2][256][17];
58 |   int c1;
59 |   int c2;
60 |   int run;
61 | 
62 |   CM();
63 | 
64 |   void Encode32(uint n, std::ostream &out);
65 |   uint Decode32(std::istream &in);
66 |   void Encode(int c, std::ostream &out);
67 |   int Decode(std::istream &in);
68 | };
69 | 
70 | //// EXAMPLES OF USE //////////////////////////////////////////////////////////
71 | /*
72 |   //ENCODING OF A BWT
73 |   CM cm;
74 |   cm.Encode32(n, out);
75 |   for (int i=0; i<n; ++i)
76 |     cm.Encode(bwt[i], out);
77 | 
78 |   cm.Flush(out);
79 | 
80 |   //DECODING OF A BWT
81 |   CM cm;
82 |   cm.Init(in);
83 |   int n = cm.Decode32(in);
84 |   byte *bwt = new byte[n];
85 |   for (int i=0; i<n; ++i)
86 |     bwt[i]=cm.Decode(in);
87 | */
88 | };
89 | 
90 | #endif
91 | 


--------------------------------------------------------------------------------
/external/divsufsort/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2003 Yuta Mori All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/external/divsufsort/Make.helper:
--------------------------------------------------------------------------------
 1 | DIVSUFSORT_INCS = \
 2 | 	config.h \
 3 | 	divsufsort.h \
 4 | 	divsufsort_private.h
 5 | DIVSUFSORT_LIBS = \
 6 | 	divsufsort.c \
 7 | 	sssort.c \
 8 | 	trsort.c \
 9 | 	utils.c
10 | 


--------------------------------------------------------------------------------
/external/divsufsort/README.md:
--------------------------------------------------------------------------------
  1 | # libdivsufsort
  2 | 
  3 | libdivsufsort is a software library that implements a lightweight suffix array construction algorithm.
  4 | 
  5 | ## News
  6 | * 2015-03-21: The project has moved from [Google Code](http://code.google.com/p/libdivsufsort/) to [GitHub](https://github.com/y-256/libdivsufsort)
  7 | 
  8 | ## Introduction
  9 | This library provides a simple and an efficient C API to construct a suffix array and a Burrows-Wheeler transformed string from a given string over a constant-size alphabet.
 10 | The algorithm runs in O(n log n) worst-case time using only 5n+O(1) bytes of memory space, where n is the length of
 11 | the string.
 12 | 
 13 | ## Build requirements
 14 | * An ANSI C Compiler (e.g. GNU GCC)
 15 | * [CMake](http://www.cmake.org/ "CMake") version 2.4.2 or newer
 16 | * CMake-supported build tool
 17 | 
 18 | ## Building on GNU/Linux
 19 | 1. Get the source code from GitHub. You can either
 20 |     * use git to clone the repository
 21 |     ```
 22 |     git clone https://github.com/y-256/libdivsufsort.git
 23 |     ```
 24 |     * or download a [zip file](../../archive/master.zip) directly
 25 | 2. Create a `build` directory in the package source directory.
 26 | ```shell
 27 | $ cd libdivsufsort
 28 | $ mkdir build
 29 | $ cd build
 30 | ```
 31 | 3. Configure the package for your system.
 32 | If you want to install to a different location,  change the -DCMAKE_INSTALL_PREFIX option.
 33 | ```shell
 34 | $ cmake -DCMAKE_BUILD_TYPE="Release" \
 35 | -DCMAKE_INSTALL_PREFIX="/usr/local" ..
 36 | ```
 37 | 4. Compile the package.
 38 | ```shell
 39 | $ make
 40 | ```
 41 | 5. (Optional) Install the library and header files.
 42 | ```shell
 43 | $ sudo make install
 44 | ```
 45 | 
 46 | ## API
 47 | ```c
 48 | /* Data types */
 49 | typedef int32_t saint_t;
 50 | typedef int32_t saidx_t;
 51 | typedef uint8_t sauchar_t;
 52 | 
 53 | /*
 54 |  * Constructs the suffix array of a given string.
 55 |  * @param T[0..n-1] The input string.
 56 |  * @param SA[0..n-1] The output array or suffixes.
 57 |  * @param n The length of the given string.
 58 |  * @return 0 if no error occurred, -1 or -2 otherwise.
 59 |  */
 60 | saint_t
 61 | divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
 62 | 
 63 | /*
 64 |  * Constructs the burrows-wheeler transformed string of a given string.
 65 |  * @param T[0..n-1] The input string.
 66 |  * @param U[0..n-1] The output string. (can be T)
 67 |  * @param A[0..n-1] The temporary array. (can be NULL)
 68 |  * @param n The length of the given string.
 69 |  * @return The primary index if no error occurred, -1 or -2 otherwise.
 70 |  */
 71 | saidx_t
 72 | divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
 73 | ```
 74 | 
 75 | ## Example Usage
 76 | ```c
 77 | #include <stdio.h>
 78 | #include <stdlib.h>
 79 | #include <string.h>
 80 | 
 81 | #include <divsufsort.h>
 82 | 
 83 | int main() {
 84 |     // intput data
 85 |     char *Text = "abracadabra";
 86 |     int n = strlen(Text);
 87 |     int i, j;
 88 | 
 89 |     // allocate
 90 |     int *SA = (int *)malloc(n * sizeof(int));
 91 | 
 92 |     // sort
 93 |     divsufsort((unsigned char *)Text, SA, n);
 94 | 
 95 |     // output
 96 |     for(i = 0; i < n; ++i) {
 97 |         printf("SA[%2d] = %2d: ", i, SA[i]);
 98 |         for(j = SA[i]; j < n; ++j) {
 99 |             printf("%c", Text[j]);
100 |         }
101 |         printf("$\n");
102 |     }
103 | 
104 |     // deallocate
105 |     free(SA);
106 | 
107 |     return 0;
108 | }
109 | ```
110 | See the [examples](examples) directory for a few other examples.
111 | 
112 | ## Benchmarks
113 | See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details.
114 | 
115 | ## License
116 | libdivsufsort is released under the [MIT license](LICENSE "MIT license").
117 | > The MIT License (MIT)
118 | >
119 | > Copyright (c) 2003 Yuta Mori All rights reserved.
120 | >
121 | > Permission is hereby granted, free of charge, to any person obtaining a copy
122 | > of this software and associated documentation files (the "Software"), to deal
123 | > in the Software without restriction, including without limitation the rights
124 | > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
125 | > copies of the Software, and to permit persons to whom the Software is
126 | > furnished to do so, subject to the following conditions:
127 | >
128 | > The above copyright notice and this permission notice shall be included in all
129 | > copies or substantial portions of the Software.
130 | >
131 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
132 | > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
133 | > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
134 | > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
135 | > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
136 | > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
137 | > SOFTWARE.
138 | 
139 | ## Author
140 | * Yuta Mori
141 | 


--------------------------------------------------------------------------------
/external/divsufsort/config.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * config.h for libdivsufsort
 3 |  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person
 6 |  * obtaining a copy of this software and associated documentation
 7 |  * files (the "Software"), to deal in the Software without
 8 |  * restriction, including without limitation the rights to use,
 9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the
11 |  * Software is furnished to do so, subject to the following
12 |  * conditions:
13 |  *
14 |  * The above copyright notice and this permission notice shall be
15 |  * included in all copies or substantial portions of the Software.
16 |  *
17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 |  * OTHER DEALINGS IN THE SOFTWARE.
25 |  */
26 | 
27 | #ifndef _DIVSUFSORT_CONFIG_H
28 | #define _DIVSUFSORT_CONFIG_H 1
29 | 
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif /* __cplusplus */
33 | 
34 | /** Define to the version of this package. **/
35 | #define PROJECT_VERSION_FULL "2.0.2"
36 | 
37 | /** Define to 1 if you have the header files. **/
38 | #define HAVE_INTTYPES_H 1
39 | #define HAVE_STDDEF_H 1
40 | #define HAVE_STDINT_H 1
41 | #define HAVE_STDLIB_H 1
42 | #define HAVE_STRING_H 1
43 | #define HAVE_STRINGS_H 1
44 | #define HAVE_MEMORY_H 1
45 | #define HAVE_SYS_TYPES_H 1
46 | 
47 | /** for WinIO **/
48 | /* #undef HAVE_IO_H */
49 | /* #undef HAVE_FCNTL_H */
50 | /* #undef HAVE__SETMODE */
51 | /* #undef HAVE_SETMODE */
52 | /* #undef HAVE__FILENO */
53 | /* #undef HAVE_FOPEN_S */
54 | /* #undef HAVE__O_BINARY */
55 | #ifndef HAVE__SETMODE
56 | # if HAVE_SETMODE
57 | #  define _setmode setmode
58 | #  define HAVE__SETMODE 1
59 | # endif
60 | # if HAVE__SETMODE && !HAVE__O_BINARY
61 | #  define _O_BINARY 0
62 | #  define HAVE__O_BINARY 1
63 | # endif
64 | #endif
65 | 
66 | /** for inline **/
67 | #ifndef INLINE
68 | # define INLINE inline
69 | #endif
70 | 
71 | /** for VC++ warning **/
72 | #ifdef _MSC_VER
73 | #pragma warning(disable: 4127)
74 | #endif
75 | 
76 | 
77 | #ifdef __cplusplus
78 | } /* extern "C" */
79 | #endif /* __cplusplus */
80 | 
81 | #endif /* _CONFIG_H */
82 | 


--------------------------------------------------------------------------------
/external/sdsl/COPYING:
--------------------------------------------------------------------------------
 1 | The sdsl copyright is as follows:
 2 | 
 3 | Copyright (C) 2007-2014 Simon Gog  All Right Reserved.
 4 | 
 5 | This program is free software: you can redistribute it and/or modify
 6 | it under the terms of the GNU General Public License as published by
 7 | the Free Software Foundation, either version 3 of the License, or
 8 | (at your option) any later version.
 9 | 
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | GNU General Public License for more details.
14 | 
15 | You should have received a copy of the GNU General Public License
16 | along with this program.  If not, see http://www.gnu.org/licenses/ .
17 | 


--------------------------------------------------------------------------------
/external/sdsl/Make.helper:
--------------------------------------------------------------------------------
1 | SDSL_INCS = $(addprefix include/sdsl/,$(shell ls external/sdsl/include/sdsl))
2 | SDSL_LIBS = $(addprefix lib/,$(shell ls external/sdsl/lib))
3 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include(CheckIncludeFile)	
 2 | include(CheckIncludeFileCXX)	
 3 | include(CheckTypeSize)	
 4 | 
 5 | ## Check for header files ##
 6 | check_include_file_CXX(cstdio HAVE_STDIO)
 7 | 
 8 | ## copy hpp files to the binary tree ##
 9 | 
10 | file(GLOB hppFiles RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp") # select all .hpp-files
11 | 
12 | foreach(hppFile ${hppFiles}) # copy each file
13 | 	configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/${hppFile}" "${CMAKE_CURRENT_BINARY_DIR}/${hppFile}" COPYONLY ) 
14 | 	install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${hppFile}" DESTINATION include/sdsl)
15 | #	MESSAGE(${hppFile})
16 | endforeach(hppFile)
17 | 
18 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/bit_vectors.hpp:
--------------------------------------------------------------------------------
 1 | /*! \file bit_vectors.hpp
 2 |     \brief bit_vectors.hpp contains classes for uncompressed and compressed bit vector representations.
 3 | 	\author Simon Gog
 4 | */
 5 | #ifndef INCLUDED_SDSL_BITVECTORS
 6 | #define INCLUDED_SDSL_BITVECTORS
 7 | 
 8 | #include "int_vector.hpp"
 9 | #include "bit_vector_il.hpp"
10 | #include "rrr_vector.hpp"
11 | #include "sd_vector.hpp"
12 | #include "hyb_vector.hpp"
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/bp_support.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2009 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file bp_support.hpp
18 |     \brief bp_support.hpp contains several classed which support find_open, find_close, enclose and rr-enclose queries.
19 |     \author Simon Gog
20 | */
21 | 
22 | #ifndef INCLUDED_SDSL_BP_SUPPORT
23 | #define INCLUDED_SDSL_BP_SUPPORT
24 | 
25 | /** \defgroup bps Balanced Parentheses Supports (BPS)
26 |  * This group contains data structures which supports a sdsl::bit_vector with the following methods:
27 |  *   - find_open
28 |  *   - find_close
29 |  *   - enclose
30 |  *   - double_enclose
31 |  *   - rank
32 |  *   - select
33 |  *   - excess
34 |  *   - rr_enclose
35 |  */
36 | 
37 | #include "bp_support_g.hpp"
38 | #include "bp_support_gg.hpp"
39 | #include "bp_support_sada.hpp"
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/coder.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2008 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file coder.hpp
18 |     \brief coder.hpp contains the coder namespace and includes the header files of sdsl::coder::fibonacci, sdsl::coder::elias_delta, and sdsl::coder::run_length
19 | 	\author Simon Gog
20 |  */
21 | #ifndef SDSL_CODER
22 | #define SDSL_CODER
23 | 
24 | #include "int_vector.hpp"
25 | #include "coder_fibonacci.hpp"
26 | #include "coder_elias_delta.hpp"
27 | #include "coder_elias_gamma.hpp"
28 | #include "coder_comma.hpp"
29 | 
30 | namespace sdsl
31 | {
32 | 
33 | //! Namespace for the different coder of the sdsl.
34 | namespace coder
35 | {
36 | 
37 | template<class Coder>
38 | class run_length
39 | {
40 |     public:
41 |         typedef uint64_t size_type;
42 |         static void encode(uint64_t x, uint64_t*& z, uint8_t offset);
43 |         static uint64_t encoding_length(const uint64_t* s, uint8_t s_offset, size_type bit_length);
44 | };
45 | 
46 | template<class Coder>
47 | typename run_length<Coder>::size_type run_length<Coder>::encoding_length(const uint64_t* s, uint8_t s_offset, size_type bit_length)
48 | {
49 |     assert(s_offset < 64);
50 |     size_type i=0;
51 |     uint64_t w = (*s >> s_offset);
52 |     uint8_t last_bit = w&1;
53 |     size_type result = 0;
54 |     while (i < bit_length) {
55 |         size_type len = 0;
56 |         while (last_bit == (w&1) and  i < bit_length) {
57 | //			std::cout<<w<<" "<<i<<std::endl;
58 |             ++len; ++i; ++s_offset;
59 |             w >>= 1;
60 |             if (s_offset == 64) {
61 |                 s_offset = 0;
62 |                 w = *(++s);
63 |             }
64 |         }
65 | //		std::cout<<"len="<<Coder::encoding_length(len)<<std::endl;
66 |         last_bit = (w&1);
67 |         result += Coder::encoding_length(len);
68 |     }
69 |     return result;
70 | }
71 | 
72 | 
73 | } // end namespace coder
74 | 
75 | } // end namespace sdsl
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/config.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SDSL_CONFIG
 2 | #define SDSL_CONFIG
 3 | 
 4 | #include "uintx_t.hpp"
 5 | #include <map>
 6 | #include <string>
 7 | 
 8 | namespace sdsl
 9 | {
10 | namespace conf  // namespace for library constant
11 | {
12 | // size of the buffer for reading and writing data in elements (not in bytes)
13 | const uint64_t SDSL_BLOCK_SIZE = (uint64_t)1<<22;
14 | 
15 | const char KEY_BWT[] 		= "bwt";
16 | const char KEY_BWT_INT[]	= "bwt_int";
17 | const char KEY_SA[] 		= "sa";
18 | const char KEY_CSA[] 		= "csa";
19 | const char KEY_CST[] 		= "cst";
20 | const char KEY_ISA[] 		= "isa";
21 | const char KEY_TEXT[] 		= "text";
22 | const char KEY_TEXT_INT[] 	= "text_int";
23 | const char KEY_PSI[] 		= "psi";
24 | const char KEY_LCP[] 		= "lcp";
25 | const char KEY_SAMPLE_CHAR[]= "sample_char";
26 | }
27 | typedef uint64_t int_vector_size_type;
28 | 
29 | typedef std::map<std::string, std::string> tMSS;
30 | 
31 | enum format_type {JSON_FORMAT, R_FORMAT, HTML_FORMAT};
32 | 
33 | enum byte_sa_algo_type {LIBDIVSUFSORT, SE_SAIS};
34 | 
35 | //! Helper class for construction process
36 | struct cache_config {
37 |     bool 		delete_files;   // Flag which indicates if all files which were created
38 |     // during construction should be deleted.
39 |     std::string dir;    		// Directory for temporary files.
40 |     std::string id;     		// Identifier is part of temporary file names. If
41 |     // id is the empty string, then it will be replace
42 |     // a concatenation of PID and a unique ID inside the
43 |     // current process.
44 |     tMSS 		file_map;		// Files stored during the construction process.
45 |     cache_config(bool f_delete_files=true, std::string f_dir="./", std::string f_id="", tMSS f_file_map=tMSS());
46 | };
47 | 
48 | //! Helper classes to transform width=0 and width=8 to corresponding text key
49 | template<uint8_t width>
50 | struct key_text_trait {
51 |     static const char* KEY_TEXT;
52 | };
53 | 
54 | //! Helper classes to transform width=0 and width=8 to corresponding bwt key
55 | template<uint8_t width>
56 | struct key_bwt_trait {
57 |     static const char* KEY_BWT;
58 | };
59 | }
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/construct_bwt.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2010 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file construct_bwt.hpp
18 |     \brief construct_bwt.hpp contains a space and time efficient construction method for the Burrows and Wheeler Transform (BWT).
19 |     \author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_CONSTRUCT_BWT
22 | #define INCLUDED_SDSL_CONSTRUCT_BWT
23 | 
24 | #include "int_vector.hpp"
25 | #include "sfstream.hpp"
26 | #include "util.hpp"
27 | #include "config.hpp" // for cache_config
28 | 
29 | #include <iostream>
30 | #include <stdexcept>
31 | #include <list>
32 | 
33 | namespace sdsl
34 | {
35 | 
36 | //! Constructs the Burrows and Wheeler Transform (BWT) from text over byte- or integer-alphabet and suffix array.
37 | /*!	The algorithm constructs the BWT and stores it to disk.
38 |  *  \tparam t_width Width of the text. 0==integer alphabet, 8=byte alphabet.
39 |  *  \param config	Reference to cache configuration
40 |  *  \par Space complexity
41 |  *		\f$ n \log \sigma \f$ bits
42 |  *  \pre Text and Suffix array exist in the cache. Keys:
43 |  *         * conf::KEY_TEXT for t_width=8 or conf::KEY_TEXT_INT for t_width=0
44 |  *         * conf::KEY_SA
45 |  *  \post BWT exist in the cache. Key
46 |  *         * conf::KEY_BWT for t_width=8 or conf::KEY_BWT_INT for t_width=0
47 |  */
48 | template<uint8_t t_width>
49 | void construct_bwt(cache_config& config)
50 | {
51 |     static_assert(t_width == 0 or t_width == 8 , "construct_bwt: width must be `0` for integer alphabet and `8` for byte alphabet");
52 | 
53 |     typedef int_vector<>::size_type size_type;
54 |     typedef int_vector<t_width> text_type;
55 |     typedef int_vector_buffer<t_width> bwt_type;
56 |     const char* KEY_TEXT = key_text_trait<t_width>::KEY_TEXT;
57 |     const char* KEY_BWT = key_bwt_trait<t_width>::KEY_BWT;
58 | 
59 |     //  (1) Load text from disk
60 |     text_type text;
61 |     load_from_cache(text, KEY_TEXT, config);
62 |     size_type n = text.size();
63 |     uint8_t bwt_width = text.width();
64 | 
65 |     //  (2) Prepare to stream SA from disc and BWT to disc
66 |     size_type buffer_size = 1000000; // buffer_size is a multiple of 8!, TODO: still true?
67 |     int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config), std::ios::in, buffer_size);
68 |     std::string bwt_file = cache_file_name(KEY_BWT, config);
69 |     bwt_type bwt_buf(bwt_file, std::ios::out, buffer_size, bwt_width);
70 | 
71 |     //  (3) Construct BWT sequentially by streaming SA and random access to text
72 |     size_type to_add[2] = {(size_type)-1,n-1};
73 |     for (size_type i=0; i < n; ++i) {
74 |         bwt_buf[i] = text[ sa_buf[i]+to_add[sa_buf[i]==0] ];
75 |     }
76 |     bwt_buf.close();
77 |     register_cache_file(KEY_BWT, config);
78 | }
79 | 
80 | }// end namespace
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/construct_config.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDED_SDSL_CONSTRUCT_CONFIG
 2 | #define INCLUDED_SDSL_CONSTRUCT_CONFIG
 3 | 
 4 | #include "config.hpp"
 5 | 
 6 | namespace sdsl
 7 | {
 8 | 
 9 | class construct_config
10 | {
11 |     public:
12 |         static byte_sa_algo_type byte_algo_sa;
13 | 
14 |         construct_config() = delete;
15 | };
16 | 
17 | }
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/construct_isa.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2010 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file construct_isa.hpp
18 |     \brief construct_isa.hpp contains a space and time efficient construction method for the inverse suffix array
19 | 	\author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_CONSTRUCT_ISA
22 | #define INCLUDED_SDSL_CONSTRUCT_ISA
23 | 
24 | #include "int_vector.hpp"
25 | #include "util.hpp"
26 | 
27 | #include <iostream>
28 | #include <stdexcept>
29 | #include <list>
30 | 
31 | namespace sdsl
32 | {
33 | 
34 | void construct_isa(cache_config& config);
35 | 
36 | }// end namespace
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/construct_lcp_helper.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDED_SDSL_CONSTRUCT_LCP_HELPER
 2 | #define INCLUDED_SDSL_CONSTRUCT_LCP_HELPER
 3 | 
 4 | #include "sdsl/int_vector.hpp"
 5 | #include <queue>
 6 | #include <list>
 7 | #include <vector>
 8 | 
 9 | namespace sdsl
10 | {
11 | 
12 | 
13 | void insert_lcp_values(int_vector<>& partial_lcp, bit_vector& index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset);
14 | 
15 | template<class tWT>
16 | void create_C_array(std::vector<uint64_t>& C, const tWT& wt)
17 | {
18 |     uint64_t quantity;                          // quantity of characters in interval
19 |     std::vector<unsigned char> cs(wt.sigma);      // list of characters in the interval
20 |     std::vector<uint64_t> rank_c_i(wt.sigma);    // number of occurrence of character in [0 .. i-1]
21 |     std::vector<uint64_t> rank_c_j(wt.sigma);    // number of occurrence of character in [0 .. j-1]
22 | 
23 |     C = std::vector<uint64_t>(257, 0);
24 |     interval_symbols(wt, 0, wt.size(), quantity, cs, rank_c_i, rank_c_j);
25 |     for (uint64_t i=0; i<quantity; ++i) {
26 |         unsigned char c = cs[i];
27 |         C[c+1] = rank_c_j[i];
28 |     }
29 |     for (uint64_t i=1; i<C.size()-1; ++i) {
30 |         C[i+1] += C[i];
31 |     }
32 | }
33 | 
34 | 
35 | class buffered_char_queue
36 | {
37 |         typedef bit_vector::size_type size_type;
38 |         typedef std::queue<uint8_t> tQ;
39 |     private:
40 |         static const uint32_t m_buffer_size =  10000;//409600;
41 |         uint8_t m_write_buf[m_buffer_size];
42 |         uint8_t m_read_buf[m_buffer_size];
43 |         size_type 	m_widx; // write index
44 |         size_type 	m_ridx; // read index
45 |         bool		m_sync; // are read and write buffer the same?
46 |         size_type 	m_disk_buffered_blocks; // number of blocks written to disk and not read again yet
47 |         char 		m_c;
48 |         size_type	m_rb; // read blocks
49 |         size_type	m_wb; // written blocks
50 | 
51 |         std::string m_file_name;
52 | 
53 |         std::fstream	m_stream;
54 | 
55 |     public:
56 | 
57 |         buffered_char_queue();
58 |         void init(const std::string& dir, char c);
59 |         ~buffered_char_queue();
60 |         void push_back(uint8_t x);
61 |         uint8_t pop_front();
62 | };
63 | 
64 | typedef std::list<int_vector<>::size_type> tLI;
65 | typedef std::vector<int_vector<>::size_type> tVI;
66 | 
67 | template<class size_type_class>
68 | void push_front_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t (&m_chars)[256], size_type_class& m_char_count)
69 | {
70 |     if (m_list[c].empty()) {
71 |         m_chars[m_char_count++] = c;
72 |     }
73 |     m_list[c].push_front(i);
74 | }
75 | 
76 | template<class size_type_class>
77 | void push_back_m_index(size_type_class i, uint8_t c, tLI(&m_list)[256], uint8_t (&m_chars)[256], size_type_class& m_char_count)
78 | {
79 |     if (m_list[c].empty()) {
80 |         m_chars[m_char_count++] = c;
81 |     }
82 |     m_list[c].push_back(i);
83 | }
84 | 
85 | void lcp_info(tMSS& file_map);
86 | 
87 | }
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/fast_cache.hpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef INCLUDED_SDSL_FAST_CACHE
 3 | #define INCLUDED_SDSL_FAST_CACHE
 4 | 
 5 | #include "int_vector.hpp"
 6 | 
 7 | namespace sdsl
 8 | {
 9 | 
10 | #define CACHE_SIZE 0x3FFULL
11 | 
12 | struct fast_cache {
13 |     typedef int_vector<>::size_type size_type;
14 |     size_type m_table[2*(CACHE_SIZE+1)];
15 |     // Constructor
16 |     fast_cache() {
17 |         for (size_type i=0; i < (CACHE_SIZE+1); ++i) {
18 |             m_table[i<<1] = (size_type)-1;
19 |         }
20 |     }
21 |     // Returns true if the request i is cached and
22 |     // x is set to the answer of request i
23 |     bool exists(size_type i, size_type& x) {
24 |         if (m_table[(i&CACHE_SIZE)<<1 ] == i) {
25 |             x = m_table[((i&CACHE_SIZE)<<1) + 1 ];
26 |             return true;
27 |         } else
28 |             return false;
29 |     }
30 |     // Writes the answer for request i to the cache
31 |     void write(size_type i, size_type x) {
32 |         m_table[(i&CACHE_SIZE)<<1 ] = i;
33 |         m_table[((i&CACHE_SIZE)<<1) + 1 ] = x;
34 |     }
35 | };
36 | 
37 | } // end namespace sdsl
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/k2_treap_helper.hpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2014 Simon Gog
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | /*! \file k2_treap_helper.hpp
 18 |     \brief k2_treap_helper.hpp contains helper functions and definitions for a k^2-treap implementation.
 19 |     \author Simon Gog
 20 | */
 21 | #ifndef INCLUDED_SDSL_K2_TREAP_HELPER
 22 | #define INCLUDED_SDSL_K2_TREAP_HELPER
 23 | 
 24 | #include "sdsl/vectors.hpp"
 25 | #include "sdsl/bits.hpp"
 26 | #include <tuple>
 27 | #include <algorithm>
 28 | #include <iterator>
 29 | #include <vector>
 30 | #include <complex>
 31 | #include <queue>
 32 | #include <array>
 33 | 
 34 | //! Namespace for the succinct data structure library.
 35 | namespace sdsl
 36 | {
 37 | 
 38 | namespace k2_treap_ns
 39 | {
 40 | 
 41 | // Precomputed value for fast k^2 treap operations
 42 | template<uint8_t t_k>
 43 | struct precomp {
 44 |     static struct impl {
 45 |         uint64_t exp[65];
 46 |         impl()
 47 |         {
 48 |             exp[0] = 1;
 49 |             for (uint8_t i=1; i<65; ++i) {
 50 |                 exp[i] = t_k * exp[i-1];
 51 |             }
 52 |         }
 53 |     } data;
 54 | 
 55 |     static uint64_t exp(uint8_t l)
 56 |     {
 57 |         return data.exp[l];
 58 |     }
 59 | 
 60 |     static uint64_t divexp(uint64_t x, uint8_t l)
 61 |     {
 62 |         return x/data.exp[l];
 63 |     }
 64 | 
 65 |     static uint64_t modexp(uint64_t x, uint8_t l)
 66 |     {
 67 |         return x%data.exp[l];
 68 |     }
 69 | };
 70 | 
 71 | template<>
 72 | struct precomp<2> {
 73 |     static uint64_t exp(uint8_t l)
 74 |     {
 75 |         return 1ULL<<l;
 76 |     }
 77 | 
 78 |     static uint64_t divexp(uint64_t x, uint8_t l)
 79 |     {
 80 |         return x>>l;
 81 |     }
 82 | 
 83 |     static uint64_t modexp(uint64_t x, uint8_t l)
 84 |     {
 85 |         return x & bits::lo_set[l];
 86 |     }
 87 | };
 88 | 
 89 | template<>
 90 | struct precomp<4> {
 91 |     static uint64_t exp(uint8_t l)
 92 |     {
 93 |         return 1ULL<<(2*l);
 94 |     }
 95 | 
 96 |     static uint64_t divexp(uint64_t x, uint8_t l)
 97 |     {
 98 |         return x>>(2*l);
 99 |     }
100 | 
101 |     static uint64_t modexp(uint64_t x, uint8_t l)
102 |     {
103 |         return x & bits::lo_set[2*l];
104 |     }
105 | };
106 | 
107 | template<>
108 | struct precomp<8> {
109 |     static uint64_t exp(uint8_t l)
110 |     {
111 |         return 1ULL<<(3*l);
112 |     }
113 | 
114 |     static uint64_t divexp(uint64_t x, uint8_t l)
115 |     {
116 |         return x>>(3*l);
117 |     }
118 | 
119 |     static uint64_t modexp(uint64_t x, uint8_t l)
120 |     {
121 |         return x & bits::lo_set[3*l];
122 |     }
123 | };
124 | 
125 | template<>
126 | struct precomp<16> {
127 |     static uint64_t exp(uint8_t l)
128 |     {
129 |         return 1ULL<<(4*l);
130 |     }
131 | 
132 |     static uint64_t divexp(uint64_t x, uint8_t l)
133 |     {
134 |         return x>>(4*l);
135 |     }
136 | 
137 |     static uint64_t modexp(uint64_t x, uint8_t l)
138 |     {
139 |         return x & bits::lo_set[4*l];
140 |     }
141 | };
142 | 
143 | 
144 | template<uint8_t t_k>
145 | typename precomp<t_k>::impl precomp<t_k>::data;
146 | 
147 | 
148 | 
149 | typedef std::complex<uint64_t> t_p;
150 | typedef t_p                    point_type;
151 | typedef t_p                    range_type;
152 | 
153 | struct node_type {
154 |     uint8_t  t;   // level; size of node 1<<t
155 |     t_p      p;   // lower left corner
156 |     uint64_t idx; // index in bp
157 |     uint64_t max_v; // maximal value
158 |     t_p      max_p; // maximal point
159 | 
160 |     node_type() = default;
161 |     node_type(uint8_t _t, t_p _p, uint64_t _idx, uint64_t _max_v,
162 |               t_p _max_p) : t(_t), p(_p), idx(_idx), max_v(_max_v),
163 |         max_p(_max_p)
164 |     {}
165 |     node_type(node_type&&) = default;
166 |     node_type(const node_type&) = default;
167 |     node_type& operator=(node_type&&) = default;
168 |     node_type& operator=(const node_type&) = default;
169 | 
170 |     bool operator<(const node_type& v) const
171 |     {
172 |         if (max_v != v.max_v) {
173 |             return max_v < v.max_v;
174 |         }
175 |         if (real(max_p) != real(v.max_p)) {
176 |             return real(max_p) > real(v.max_p);
177 |         }
178 |         return imag(max_p) > imag(v.max_p);
179 |     }
180 | };
181 | 
182 | } // end namepsace k2_treap_ns
183 | 
184 | } // end nomespace sdsl
185 | #endif
186 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/k2_tree_helper.hpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2016 Francisco Montoto
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | /*! \file k2_tree_helper.hpp
 18 |     \brief k2_tree_helper.hpp contains helper functions and definitions for a k^2-tree implementation.
 19 |     \author Francisco Montoto
 20 | */
 21 | #ifndef INCLUDED_SDSL_K2_TREE_HELPER
 22 | #define INCLUDED_SDSL_K2_TREE_HELPER
 23 | 
 24 | #include <cmath>
 25 | #include <iostream>
 26 | 
 27 | #include "sdsl/bit_vectors.hpp"
 28 | 
 29 | //! Namespace for the succinct data structure library.
 30 | namespace sdsl
 31 | {
 32 | 
 33 | //! Namespace for the k2_tree
 34 | namespace k2_tree_ns
 35 | {
 36 | 
 37 | typedef int_vector<>::size_type idx_type;
 38 | typedef int_vector<>::size_type size_type;
 39 | 
 40 | template<typename t_bv=bit_vector>
 41 | int _build_from_matrix(const std::vector<std::vector <int>>& matrix,
 42 |                        const uint8_t k, int n, const int height,
 43 |                        int l, int p, int q, std::vector<std::deque<t_bv>>& acc)
 44 | {
 45 |     unsigned i, j, b_size = pow(k, 2);
 46 |     t_bv b(b_size, 0);
 47 |     bool is_leaf = (l == height);
 48 | 
 49 |     if (is_leaf) {
 50 |         for (i = 0; i < k; i++)
 51 |             for (j = 0; j < k; j++)
 52 |                 if (p + i < matrix.size()
 53 |                     && q + j < matrix.size()
 54 |                     && matrix[p + i][q + j] == 1)
 55 |                     b[i * k + j] = 1;
 56 |     } else { // Internal node
 57 |         for (i = 0; i < k; i++)
 58 |             for (j = 0; j < k; j++)
 59 |                 b[i * k + j] = _build_from_matrix(matrix, k, n/k, height, l + 1,
 60 |                                                   p + i * (n/k), q + j * (n/k),
 61 |                                                   acc);
 62 |     }
 63 | 
 64 |     // TODO There must be a better way to check if there is a 1 at b.
 65 |     for (i = 0; i < b_size; i++)
 66 |         if (b[i] == 1)
 67 |             break;
 68 |     if (i == b_size) // If there are not 1s at b.
 69 |         return 0;
 70 | 
 71 |     acc[l].push_back(std::move(b));
 72 |     return 1;
 73 | }
 74 | 
 75 | /*! Get the chunk index ([0, k^2[) of a submatrix point.
 76 |  *
 77 |  * Gets a point in the global matrix and returns its corresponding chunk
 78 |  * in the submatrix specified.
 79 |  *
 80 |  * \param v Row of the point in the global matrix.
 81 |  * \param u Column of the point in the global matrix.
 82 |  * \param c_0 Column offset of the submatix in the global matrix.
 83 |  * \param r_0 Row offset of the submatrix in the global matrix.
 84 |  * \param l size of the chunk at the submatrix.
 85 |  * \param k the k parameter from the k^2 tree.
 86 |  * \returns the index of the chunk containing the point at the submatrix.
 87 |  */
 88 | inline uint16_t get_chunk_idx(idx_type v, idx_type u, idx_type c_0,
 89 |                               idx_type r_0, size_type l, uint8_t k)
 90 | {
 91 |     return  ((v - r_0) / l) * k + (u - c_0) / l;
 92 | }
 93 | 
 94 | template<typename t_bv=bit_vector> void build_template_vector(bit_vector& k_t_,
 95 |         bit_vector& k_l_, t_bv& k_t, t_bv& k_l)
 96 | {
 97 |     k_t = t_bv(k_t_);
 98 |     k_l = t_bv(k_l_);
 99 | }
100 | 
101 | template<> void build_template_vector<bit_vector>(bit_vector& k_t_,
102 |         bit_vector& k_l_,
103 |         bit_vector& k_t,
104 |         bit_vector& k_l)
105 | {
106 |     k_t.swap(k_t_);
107 |     k_l.swap(k_l_);
108 | }
109 | 
110 | } // end namespace k2_tree_ns
111 | } // end namespace sdsl
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/lcp_dac.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2011-2014 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file lcp_dac.hpp
18 |     \brief lcp_dac.hpp contains an implementation of a (compressed) LCP array.
19 |     \author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_LCP_DAC
22 | #define INCLUDED_SDSL_LCP_DAC
23 | 
24 | #include "lcp.hpp"
25 | #include "vectors.hpp"
26 | #include "rank_support_v5.hpp"
27 | 
28 | namespace sdsl
29 | {
30 | 
31 | //! A class for the compressed version of LCP information of an suffix array
32 | /*! A dac_vector is used to compress represent the values compressed.
33 |  *  The template parameter are forwarded to the dac_vector.
34 |  *  \tparam t_b    Split block size.
35 |  *  \tparam t_rank Rank structure to navigate between the different levels.
36 |  */
37 | template<uint8_t  t_b    = 4,
38 |          typename t_rank = rank_support_v5<>>
39 | using lcp_dac = lcp_vlc<dac_vector<t_b, t_rank>>;
40 | 
41 | template<typename t_bv = bit_vector, int t_default_max_levels = 64>
42 | using lcp_dac_dp = lcp_vlc<dac_vector_dp<t_bv, t_default_max_levels>>;
43 | 
44 | } // end namespace sdsl
45 | #endif
46 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/ram_filebuf.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDED_SDSL_RAM_FSTREAMBUF
 2 | #define INCLUDED_SDSL_RAM_FSTREAMBUF
 3 | 
 4 | #include <fstream>
 5 | #include <vector>
 6 | #include "ram_fs.hpp"
 7 | 
 8 | namespace sdsl
 9 | {
10 | 
11 | class ram_filebuf : public std::streambuf
12 | {
13 |     private:
14 |         ram_fs::content_type* m_ram_file = nullptr;  // file handle
15 |         void pbump64(std::ptrdiff_t);
16 | 
17 |     public:
18 |         virtual ~ram_filebuf();
19 | 
20 |         ram_filebuf();
21 |         ram_filebuf(std::vector<char>& ram_file);
22 | 
23 |         std::streambuf*
24 |         open(const std::string s, std::ios_base::openmode mode);
25 | 
26 |         bool is_open();
27 | 
28 |         ram_filebuf*
29 |         close();
30 | 
31 |         pos_type
32 |         seekpos(pos_type sp,
33 |                 std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) override;
34 | 
35 |         pos_type
36 |         pubseekoff(off_type off, std::ios_base::seekdir way,
37 |                    std::ios_base::openmode which = std::ios_base::in | std::ios_base::out);
38 | 
39 |         pos_type
40 |         pubseekpos(pos_type sp,
41 |                    std::ios_base::openmode which = std::ios_base::in | std::ios_base::out);
42 | 
43 | 
44 | //    std::streamsize
45 | //    xsputn(const char_type* s, std::streamsize n) override;
46 | 
47 |         int
48 |         sync() override;
49 | 
50 |         int_type
51 |         overflow(int_type c = traits_type::eof()) override;
52 | };
53 | 
54 | }
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/ram_fs.hpp:
--------------------------------------------------------------------------------
 1 | /*! \file ram_fs.hpp
 2 |  * \brief ram_fs.hpp
 3 |  * \author Simon Gog
 4 |  */
 5 | #ifndef INCLUDED_SDSL_RAM_FS
 6 | #define INCLUDED_SDSL_RAM_FS
 7 | 
 8 | #include "uintx_t.hpp"
 9 | #include <string>
10 | #include <map>
11 | #include <vector>
12 | #include <mutex>
13 | 
14 | namespace sdsl
15 | {
16 | 
17 | class ram_fs_initializer
18 | {
19 |     public:
20 |         ram_fs_initializer();
21 |         ~ram_fs_initializer();
22 | };
23 | 
24 | } // end namespace sdsl
25 | 
26 | 
27 | static sdsl::ram_fs_initializer init_ram_fs;
28 | 
29 | namespace sdsl
30 | {
31 | 
32 | 
33 | //! ram_fs is a simple store for RAM-files.
34 | /*!
35 |  * Simple key-value store which maps file names
36 |  * (strings) to file content (content_type).
37 |  */
38 | class ram_fs
39 | {
40 |     public:
41 |         typedef std::vector<char> content_type;
42 | 
43 |     private:
44 |         friend class ram_fs_initializer;
45 |         typedef std::map<std::string, content_type> mss_type;
46 |         static mss_type m_map;
47 |         static std::recursive_mutex m_rlock;
48 | 
49 |     public:
50 |         //! Default construct
51 |         ram_fs();
52 |         static void store(const std::string& name, content_type data);
53 |         //! Check if the file exists
54 |         static bool exists(const std::string& name);
55 |         //! Get the file size
56 |         static size_t file_size(const std::string& name);
57 |         //! Get the content
58 |         static content_type& content(const std::string& name);
59 |         //! Remove the file with key `name`
60 |         static int remove(const std::string& name);
61 |         //! Rename the file. Change key `old_filename` into `new_filename`.
62 |         static int rename(const std::string old_filename, const std::string new_filename);
63 | };
64 | 
65 | //! Determines if the given file is a RAM-file.
66 | bool is_ram_file(const std::string& file);
67 | 
68 | //! Returns the corresponding RAM-file name for file.
69 | std::string ram_file_name(const std::string& file);
70 | 
71 | //! Returns for a RAM-file the corresponding disk file name
72 | std::string disk_file_name(const std::string& file);
73 | 
74 | //! Remove a file.
75 | int remove(const std::string& file);
76 | 
77 | //! Rename a file
78 | int rename(const std::string& old_filename, const std::string& new_filename);
79 | 
80 | } // end namespace sdsl
81 | #endif
82 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/rank_support_scan.hpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2012 Simon Gog
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | /*! \file rank_support_scan.hpp
 18 |     \brief rank_support_scan.hpp contains rank_support_scan that support a sdsl::bit_vector with linear time rank information.
 19 |     \author Simon Gog
 20 | */
 21 | #ifndef INCLUDED_SDSL_RANK_SUPPORT_SCAN
 22 | #define INCLUDED_SDSL_RANK_SUPPORT_SCAN
 23 | 
 24 | #include "rank_support.hpp"
 25 | 
 26 | //! Namespace for the succinct data structure library.
 27 | namespace sdsl
 28 | {
 29 | 
 30 | //! A class supporting rank queries in linear time.
 31 | /*! \par Space complexity
 32 |  *       Constant.
 33 |  *  \par Time complexity
 34 |  *       Linear in the size of the supported vector.
 35 |  *
 36 |  *  \tparam t_b       Bit pattern which should be supported. Either `0`,`1`,`10`,`01`.
 37 |  *  \tparam t_pat_len Length of the bit pattern.
 38 |  * @ingroup rank_support_group
 39 |  */
 40 | template<uint8_t t_b=1, uint8_t t_pat_len=1>
 41 | class rank_support_scan : public rank_support
 42 | {
 43 |     private:
 44 |         static_assert(t_b == 1u or t_b == 0u or t_b == 10u or t_b == 11u, "rank_support_scan: bit pattern must be `0`,`1`,`10` or `01`");
 45 |         static_assert(t_pat_len == 1u or t_pat_len == 2u , "rank_support_scan: bit pattern length must be 1 or 2");
 46 |     public:
 47 |         typedef bit_vector bit_vector_type;
 48 |         enum { bit_pat = t_b };
 49 |         enum { bit_pat_len = t_pat_len };
 50 |     public:
 51 |         explicit rank_support_scan(const bit_vector* v = nullptr)
 52 |         {
 53 |             set_vector(v);
 54 |         }
 55 |         rank_support_scan(const rank_support_scan& rs)
 56 |         {
 57 |             set_vector(rs.m_v);
 58 |         }
 59 |         size_type rank(size_type idx) const;
 60 |         size_type operator()(size_type idx)const
 61 |         {
 62 |             return rank(idx);
 63 |         };
 64 |         size_type size()const
 65 |         {
 66 |             return m_v->size();
 67 |         };
 68 |         size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
 69 |         {
 70 |             return serialize_empty_object(out, v, name, this);
 71 |         }
 72 |         void load(std::istream&, const int_vector<1>* v=nullptr)
 73 |         {
 74 |             set_vector(v);
 75 |         }
 76 |         void set_vector(const bit_vector* v=nullptr)
 77 |         {
 78 |             m_v=v;
 79 |         }
 80 | 
 81 |         //! Assign Operator
 82 |         rank_support_scan& operator=(const rank_support_scan& rs)
 83 |         {
 84 |             set_vector(rs.m_v);
 85 |             return *this;
 86 |         }
 87 | 
 88 |         //! swap Operator
 89 |         void swap(rank_support_scan&) {}
 90 | };
 91 | 
 92 | template<uint8_t t_b, uint8_t t_pat_len>
 93 | inline typename rank_support_scan<t_b, t_pat_len>::size_type rank_support_scan<t_b, t_pat_len>::rank(size_type idx)const
 94 | {
 95 |     assert(m_v != nullptr);
 96 |     assert(idx <= m_v->size());
 97 |     const uint64_t* p   = m_v->data();
 98 |     size_type       i   = 0;
 99 |     size_type   result  = 0;
100 |     while (i+64 <= idx) {
101 |         result += rank_support_trait<t_b, t_pat_len>::full_word_rank(p, i);
102 |         i += 64;
103 |     }
104 |     return  result+rank_support_trait<t_b, t_pat_len>::word_rank(p, idx);
105 | }
106 | 
107 | }// end namespace sds
108 | 
109 | #endif // end file
110 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/raster_img.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef SDSL_INC_RASTER_IMG
 2 | #define SDSL_INC_RASTER_IMG
 3 | 
 4 | #include <sdsl/bit_vectors.hpp>
 5 | 
 6 | namespace sdsl
 7 | {
 8 | 
 9 | struct raster_img {
10 |     typedef uint64_t size_type;
11 | 
12 |     uint64_t     max_x; // max x value
13 |     uint64_t     max_y; // max y value
14 |     uint64_t     max_z; // max z value in the compacted range
15 |     uint32_t     offset;
16 |     bit_vector   value_map;
17 |     int_vector<> data;
18 | 
19 |     //! Serializes the data structure into the given ostream
20 |     uint64_t serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const;
21 | 
22 |     //! Loads the data structure from the given istream.
23 |     void load(std::istream& in);
24 | };
25 | 
26 | }
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/rmq_support.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2009 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file rmq_support.hpp
18 |     \brief rmq_support.hpp contains different range minimum support data structures.
19 | 	\author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_RMQ_SUPPORT
22 | #define INCLUDED_SDSL_RMQ_SUPPORT
23 | 
24 | /** \defgroup rmq_group Range Minimum/Maximum Support (RMS) */
25 | 
26 | template<class RandomAccessContainer, bool Minimum>	 // for range minimum queries
27 | struct min_max_trait {
28 |     static inline bool strict_compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) {
29 |         return v1 < v2;
30 |     }
31 |     static inline bool compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) {
32 |         return v1 <= v2;
33 |     }
34 | };
35 | 
36 | template<class RandomAccessContainer> // for range maximum queries
37 | struct min_max_trait<RandomAccessContainer, false> {
38 |     static inline bool strict_compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) {
39 |         return v1 > v2;
40 |     }
41 |     static inline bool compare(const typename RandomAccessContainer::value_type v1, const typename RandomAccessContainer::value_type v2) {
42 |         return v1 >= v2;
43 |     }
44 | };
45 | 
46 | #include "rmq_support_sparse_table.hpp"
47 | #include "rmq_succinct_sct.hpp"
48 | #include "rmq_succinct_sada.hpp"
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/sdsl_concepts.hpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2010 Simon Gog
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | /*! \file sdsl_concepts.hpp
 18 |     \brief Contains declarations and definitions of data structure concepts.
 19 | 	\author Simon Gog
 20 | */
 21 | #ifndef INCLUDED_SDSL_CONCEPTS
 22 | #define INCLUDED_SDSL_CONCEPTS
 23 | 
 24 | #include "uintx_t.hpp" // for uint8_t
 25 | 
 26 | namespace sdsl
 27 | {
 28 | 
 29 | struct bv_tag {}; // bitvector tag
 30 | struct iv_tag {}; // int_vector tag
 31 | 
 32 | struct csa_tag {}; // compressed suffix array (CSAs) tag
 33 | struct cst_tag {}; // compressed suffix tree (CST) tag
 34 | struct wt_tag {};  // wavelet tree tag
 35 | 
 36 | struct psi_tag {}; // tag for CSAs based on the psi function
 37 | struct lf_tag {}; // tag for CSAs based on the LF function
 38 | 
 39 | struct csa_member_tag {}; // tag for text, bwt, LF, \Psi members of CSA
 40 | 
 41 | struct lcp_tag {};
 42 | struct lcp_plain_tag {};
 43 | struct lcp_permuted_tag {};
 44 | struct lcp_tree_compressed_tag {};
 45 | struct lcp_tree_and_lf_compressed_tag {};
 46 | 
 47 | struct alphabet_tag {};
 48 | struct byte_alphabet_tag { static const uint8_t WIDTH=8; };
 49 | struct int_alphabet_tag { static const uint8_t WIDTH=0; };
 50 | 
 51 | struct sa_sampling_tag {};
 52 | struct isa_sampling_tag {};
 53 | 
 54 | 
 55 | template<class t_T, class t_r = void>
 56 | struct enable_if_type {
 57 |     typedef t_r type;
 58 | };
 59 | 
 60 | template<class t_idx, class t_enable = void>
 61 | struct index_tag {
 62 |     typedef t_enable type;
 63 | };
 64 | 
 65 | template<class t_idx>
 66 | struct index_tag<t_idx, typename enable_if_type<typename t_idx::index_category>::type> {
 67 |     using type = typename t_idx::index_category;
 68 | };
 69 | 
 70 | template<class t_sampling, class t_enable = void>
 71 | struct sampling_tag {
 72 |     typedef t_enable type;
 73 | };
 74 | 
 75 | template<class t_sampling>
 76 | struct sampling_tag<t_sampling, typename enable_if_type<typename t_sampling::sampling_category>::type> {
 77 |     using type = typename t_sampling::sampling_category;
 78 | };
 79 | 
 80 | template<class t_enc_vec, class t_enable = void>
 81 | struct is_enc_vec {
 82 |     static const bool value = false;
 83 | };
 84 | 
 85 | template<class t_enc_vec>
 86 | struct is_enc_vec<t_enc_vec, typename enable_if_type<typename t_enc_vec::enc_vec_type>::type> {
 87 |     static const bool value = true;
 88 | };
 89 | 
 90 | template<class t_alphabet, class t_enable = void>
 91 | struct is_alphabet {
 92 |     static const bool value = false;
 93 | };
 94 | 
 95 | template<class t_alphabet>
 96 | struct is_alphabet<t_alphabet, typename enable_if_type<typename t_alphabet::alphabet_category>::type> {
 97 |     static const bool value = true;
 98 | };
 99 | 
100 | } // end namespace sdsl
101 | 
102 | #endif
103 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/select_support_scan.hpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2012 Simon Gog
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | /*! \file select_support_scan.hpp
 18 |     \brief select_support_scan.hpp contains classes that support a sdsl::bit_vector with linear time select.
 19 |     \author Simon Gog
 20 | */
 21 | #ifndef INCLUDED_SDSL_SELECT_SUPPORT_SCAN
 22 | #define INCLUDED_SDSL_SELECT_SUPPORT_SCAN
 23 | 
 24 | #include "int_vector.hpp"
 25 | #include "util.hpp"
 26 | #include "select_support.hpp"
 27 | 
 28 | //! Namespace for the succinct data structure library.
 29 | namespace sdsl
 30 | {
 31 | 
 32 | 
 33 | //! A class supporting linear time select queries.
 34 | /*! \par Space complexity
 35 |  *       Constant.
 36 |  *  \par Time complexity
 37 |  *       Linear in the size of the supported vector.
 38 |  *
 39 |  *  \tparam t_b       Bit pattern which should be supported. Either `0`,`1`,`10`,`01`.
 40 |  *  \tparam t_pat_len Length of the bit pattern.
 41 |  * @ingroup select_support_group
 42 |  */
 43 | template<uint8_t t_b=1, uint8_t t_pat_len=1>
 44 | class select_support_scan : public select_support
 45 | {
 46 |     private:
 47 |         static_assert(t_b == 1u or t_b == 0u or t_b == 10u , "select_support_scan: bit pattern must be `0`,`1`,`10` or `01`");
 48 |         static_assert(t_pat_len == 1u or t_pat_len == 2u , "select_support_scan: bit pattern length must be 1 or 2");
 49 |     public:
 50 |         typedef bit_vector bit_vector_type;
 51 |         enum { bit_pat = t_b };
 52 |     public:
 53 |         explicit select_support_scan(const bit_vector* v=nullptr) : select_support(v) {}
 54 |         select_support_scan(const select_support_scan<t_b,t_pat_len>& ss) : select_support(ss.m_v) {}
 55 | 
 56 |         inline size_type select(size_type i) const;
 57 |         inline size_type operator()(size_type i)const
 58 |         {
 59 |             return select(i);
 60 |         }
 61 |         size_type serialize(std::ostream& out, structure_tree_node* v=nullptr, std::string name="")const
 62 |         {
 63 |             return serialize_empty_object(out, v, name, this);
 64 |         }
 65 |         void load(std::istream&, SDSL_UNUSED const bit_vector* v=nullptr)
 66 |         {
 67 |             set_vector(v);
 68 |         }
 69 | 
 70 |         void set_vector(const bit_vector* v=nullptr)
 71 |         {
 72 |             m_v = v;
 73 |         }
 74 |         select_support_scan<t_b, t_pat_len>& operator=(const select_support_scan& ss)
 75 |         {
 76 |             set_vector(ss.m_v);
 77 |             return *this;
 78 |         }
 79 |         void swap(select_support_scan<t_b, t_pat_len>&) {}
 80 | };
 81 | 
 82 | template<uint8_t t_b, uint8_t t_pat_len>
 83 | inline typename select_support_scan<t_b,t_pat_len>::size_type select_support_scan<t_b,t_pat_len>::select(size_type i)const
 84 | {
 85 |     const uint64_t* data = m_v->data();
 86 |     size_type word_pos = 0;
 87 |     size_type word_off = 0;
 88 |     uint64_t carry = select_support_trait<t_b,t_pat_len>::init_carry(data, word_pos);
 89 |     size_type args = select_support_trait<t_b,t_pat_len>::args_in_the_first_word(*data, word_off, carry);
 90 |     if (args >= i) {
 91 |         return (word_pos<<6)+select_support_trait<t_b,t_pat_len>::ith_arg_pos_in_the_first_word(*data, i, word_off, carry);
 92 |     }
 93 |     word_pos+=1;
 94 |     size_type sum_args = args;
 95 |     carry = select_support_trait<t_b,t_pat_len>::get_carry(*data);
 96 |     uint64_t old_carry = carry;
 97 |     args = select_support_trait<t_b,t_pat_len>::args_in_the_word(*(++data), carry);
 98 |     while (sum_args + args < i) {
 99 |         sum_args += args;
100 |         assert(data+1 < m_v->data() + (m_v->capacity()>>6));
101 |         old_carry = carry;
102 |         args = select_support_trait<t_b,t_pat_len>::args_in_the_word(*(++data), carry);
103 |         word_pos+=1;
104 |     }
105 |     return (word_pos<<6) + select_support_trait<t_b,t_pat_len>::ith_arg_pos_in_the_word(*data, i-sum_args, old_carry);
106 | }
107 | 
108 | } // end namespace
109 | #endif
110 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/sfstream.hpp:
--------------------------------------------------------------------------------
 1 | /*!\file sfstream.hpp
 2 |    \brief sfstream.hpp contains a two stream class which can be used to read/write from/to files or strings.
 3 |    \author Simon Gog
 4 | */
 5 | #ifndef INCLUDED_SDSL_SFSTREAM
 6 | #define INCLUDED_SDSL_SFSTREAM
 7 | 
 8 | #include <fstream>
 9 | #include <sstream>
10 | #include <string>
11 | #include "sdsl/ram_fs.hpp"
12 | #include "sdsl/ram_filebuf.hpp"
13 | 
14 | namespace sdsl
15 | {
16 | 
17 | class osfstream : public std::ostream
18 | {
19 |     public:
20 |         typedef std::streambuf* buf_ptr_type;
21 |     private:
22 |         buf_ptr_type m_streambuf = nullptr;
23 |         std::string  m_file      = "";
24 |     public:
25 |         typedef void* voidptr;
26 |         //! Standard constructor.
27 |         osfstream();
28 |         //! Constructor taking a file name and open mode.
29 |         osfstream(const std::string& file, std::ios_base::openmode mode = std::ios_base::out);
30 |         //! Open the stream.
31 |         buf_ptr_type
32 |         open(const std::string& file, std::ios_base::openmode mode = std::ios_base::out);
33 |         //! Is the stream close?
34 |         bool is_open();
35 |         //! Close the stream.
36 |         void close();
37 |         //! Standard destructor
38 |         ~osfstream();
39 |         //! Cast to void*
40 |         operator  voidptr() const;
41 | 
42 |         osfstream& seekp(pos_type pos);
43 |         osfstream& seekp(off_type off, ios_base::seekdir way);
44 |         std::streampos tellp();
45 | };
46 | 
47 | 
48 | class isfstream : public std::istream
49 | {
50 |         typedef std::streambuf* buf_ptr_type;
51 |     private:
52 |         buf_ptr_type m_streambuf = nullptr;
53 |         std::string  m_file      = "";
54 |     public:
55 |         typedef void* voidptr;
56 |         //! Standard constructor.
57 |         isfstream();
58 |         //! Constructor taking a file name and open mode.
59 |         isfstream(const std::string& file, std::ios_base::openmode mode = std::ios_base::in);
60 |         //! Open the stream.
61 |         buf_ptr_type
62 |         open(const std::string& file, std::ios_base::openmode mode = std::ios_base::in);
63 |         //! Is the stream close?
64 |         bool is_open();
65 |         //! Close the stream.
66 |         void close();
67 |         //! Standard destructor
68 |         ~isfstream();
69 |         //! Cast to void*
70 |         operator  voidptr() const;
71 | 
72 |         isfstream& seekg(pos_type pos);
73 |         isfstream& seekg(off_type off, ios_base::seekdir way);
74 |         std::streampos tellg();
75 | };
76 | 
77 | } // end namespace
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/structure_tree.hpp:
--------------------------------------------------------------------------------
 1 | /*!\file structure_tree.hpp
 2 |    \brief structure_tree.hpp contains a helper class which can represent the memory structure of a class.
 3 |    \author Simon Gog
 4 | */
 5 | #ifndef INCLUDED_SDSL_STRUCTURE_TREE
 6 | #define INCLUDED_SDSL_STRUCTURE_TREE
 7 | 
 8 | #include "uintx_t.hpp"
 9 | #include <unordered_map>
10 | #include <string>
11 | #include <iostream>
12 | #include <sstream>
13 | #include <memory>
14 | #include "config.hpp"
15 | 
16 | //! Namespace for the succinct data structure library
17 | namespace sdsl
18 | {
19 | 
20 | class structure_tree_node
21 | {
22 |     private:
23 |         using map_type = std::unordered_map<std::string,std::unique_ptr<structure_tree_node>>;
24 |         map_type            m_children;
25 |     public:
26 |         const map_type& children = m_children;
27 |         size_t              size = 0;
28 |         std::string         name;
29 |         std::string         type;
30 |     public:
31 |         structure_tree_node(const std::string& n, const std::string& t) : name(n) , type(t) {}
32 |         structure_tree_node* add_child(const std::string& n, const std::string& t) {
33 |             auto hash = n+t;
34 |             auto child_itr = m_children.find(hash);
35 |             if (child_itr == m_children.end()) {
36 |                 // add new child as we don't have one of this type yet
37 |                 structure_tree_node* new_node = new structure_tree_node(n,t);
38 |                 m_children[hash] = std::unique_ptr<structure_tree_node>(new_node);
39 |                 return new_node;
40 |             } else {
41 |                 // child of same type and name exists
42 |                 return (*child_itr).second.get();
43 |             }
44 |         }
45 |         void add_size(size_t s) { size += s; }
46 | };
47 | 
48 | class structure_tree
49 | {
50 |     public:
51 |         static structure_tree_node* add_child(structure_tree_node* v, const std::string& name, const std::string& type) {
52 |             if (v) return v->add_child(name,type);
53 |             return nullptr;
54 |         };
55 |         static void add_size(structure_tree_node* v, uint64_t value) {
56 |             if (v) v->add_size(value);
57 |         };
58 | };
59 | 
60 | 
61 | template<format_type F>
62 | void write_structure_tree(const structure_tree_node* v, std::ostream& out, size_t level = 0);
63 | 
64 | 
65 | }
66 | #endif
67 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/suffix_arrays.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2008 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file suffix_arrays.hpp
18 |     \brief suffix_arrays.hpp contains generic classes for different suffix array classes.
19 | 	\author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_SUFFIX_ARRAYS
22 | #define INCLUDED_SDSL_SUFFIX_ARRAYS
23 | 
24 | #include "sdsl_concepts.hpp"
25 | 
26 | /** \defgroup csa Compressed Suffix Arrays (CSA) */
27 | 
28 | #include "csa_bitcompressed.hpp"
29 | #include "csa_wt.hpp"
30 | #include "csa_sada.hpp"
31 | #include "wavelet_trees.hpp"
32 | #include "construct.hpp"
33 | #include "suffix_array_algorithm.hpp"
34 | 
35 | namespace sdsl
36 | {
37 | 
38 | //! Typedef for convenient usage of std integer alphabet strategy
39 | template<class t_wt               = wt_int<>,
40 |          uint32_t t_dens          = 32,
41 |          uint32_t t_inv_dens      = 64,
42 |          class t_sa_sample_strat  = sa_order_sa_sampling<>,
43 |          class t_isa_sample_strat = isa_sampling<>
44 |          >
45 | using csa_wt_int = csa_wt<t_wt, t_dens, t_inv_dens, t_sa_sample_strat, t_isa_sample_strat, int_alphabet<>>;
46 | 
47 | template<class t_enc_vec          = enc_vector<>,          // Vector type used to store the Psi-function
48 |          uint32_t t_dens          = 32,                    // Sample density for suffix array (SA) values
49 |          uint32_t t_inv_dens      = 64,                    // Sample density for inverse suffix array (ISA) values
50 |          class t_sa_sample_strat  = sa_order_sa_sampling<>,// Policy class for the SA sampling. Alternative text_order_sa_sampling.
51 |          class t_isa_sample_strat = isa_sampling<>         // Policy class for the ISA sampling.
52 |          >
53 | using csa_sada_int = csa_sada<t_enc_vec, t_dens, t_inv_dens, t_sa_sample_strat, t_isa_sample_strat, int_alphabet<>>;
54 | 
55 | }
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/suffix_trees.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2009 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file suffix_trees.hpp
18 |     \brief suffix_trees.hpp contains generic classes for different suffix tree classes.
19 | 	\author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_SUFFIX_TREES
22 | #define INCLUDED_SDSL_SUFFIX_TREES
23 | 
24 | /** \defgroup cst Compressed Suffix Trees (CST)
25 |  *   This group contains data structures for compressed suffix trees. The following methods are supported:
26 |  *    - root()
27 |  *    - child(v,c)
28 |  *    - select_child(v)
29 |  *    - select_leaf(i)
30 |  *    - parent(v)
31 |  *    - sl(v)
32 |  *    - lca(v,w)
33 |  *    - ..
34 |  */
35 | 
36 | #include "cst_sct3.hpp"
37 | #include "cst_sada.hpp"
38 | #include "cst_fully.hpp"
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/uintx_t.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef INCLUDED_SDSL_UINTX_T
 2 | #define INCLUDED_SDSL_UINTX_T
 3 | 
 4 | #include <cstdint>
 5 | 
 6 | using std::int8_t;
 7 | using std::int16_t;
 8 | using std::int32_t;
 9 | using std::int64_t;
10 | 
11 | using std::uint8_t;
12 | using std::uint16_t;
13 | using std::uint32_t;
14 | using std::uint64_t;
15 | 
16 | #endif
17 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/vectors.hpp:
--------------------------------------------------------------------------------
 1 | /** \defgroup int_vector int_vector */
 2 | #ifndef SDSL_INCLUDED_VECTORS
 3 | #define SDSL_INCLUDED_VECTORS
 4 | 
 5 | #include "int_vector.hpp"
 6 | #include "enc_vector.hpp"
 7 | #include "vlc_vector.hpp"
 8 | #include "dac_vector.hpp"
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/external/sdsl/include/sdsl/wavelet_trees.hpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2011 Simon Gog
 3 | 
 4 |     This program is free software: you can redistribute it and/or modify
 5 |     it under the terms of the GNU General Public License as published by
 6 |     the Free Software Foundation, either version 3 of the License, or
 7 |     (at your option) any later version.
 8 | 
 9 |     This program is distributed in the hope that it will be useful,
10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 |     GNU General Public License for more details.
13 | 
14 |     You should have received a copy of the GNU General Public License
15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
16 | */
17 | /*! \file wavelet_trees.hpp
18 |     \brief wavelet_trees.hpp contains wavelet tree implementations.
19 | 	\author Simon Gog
20 | */
21 | #ifndef INCLUDED_SDSL_WAVELET_TREES
22 | #define INCLUDED_SDSL_WAVELET_TREES
23 | 
24 | /** \defgroup wt Wavelet Trees (WT)
25 |  *   This group contains data structures for wavelet trees. The following methods are supported:
26 |  *    - []-operator
27 |  *    - rank(i, c)
28 |  *    - select(i, c)
29 |  *    - inverse_select(i)
30 |  */
31 | 
32 | #include "wt_pc.hpp"
33 | #include "wt_blcd.hpp"
34 | #include "wt_gmr.hpp"
35 | #include "wt_huff.hpp"
36 | #include "wt_hutu.hpp"
37 | #include "wt_int.hpp"
38 | #include "wm_int.hpp"
39 | #include "wt_rlmn.hpp"
40 | #include "wt_ap.hpp"
41 | #include "construct.hpp"
42 | #include "wt_algorithm.hpp"
43 | 
44 | namespace sdsl
45 | {
46 | 
47 | template<class t_bitvector   = bit_vector,
48 |          class t_rank        = typename t_bitvector::rank_1_type,
49 |          class t_select      = typename t_bitvector::select_1_type,
50 |          class t_select_zero = typename t_bitvector::select_0_type
51 |          >
52 | using wt_hutu_int = wt_pc<hutu_shape,
53 |       t_bitvector,
54 |       t_rank,
55 |       t_select,
56 |       t_select_zero,
57 |       int_tree<>>;
58 | 
59 | template<class t_bitvector   = bit_vector,
60 |          class t_rank        = typename t_bitvector::rank_1_type,
61 |          class t_select      = typename t_bitvector::select_1_type,
62 |          class t_select_zero = typename t_bitvector::select_0_type>
63 | using wt_huff_int = wt_pc<huff_shape,
64 |       t_bitvector,
65 |       t_rank,
66 |       t_select,
67 |       t_select_zero,
68 |       int_tree<>>;
69 | 
70 | template<class t_bitvector   = bit_vector,
71 |          class t_rank        = typename t_bitvector::rank_1_type,
72 |          class t_select_one  = typename t_bitvector::select_1_type,
73 |          class t_select_zero = typename t_bitvector::select_0_type>
74 | using wt_blcd_int = wt_pc<balanced_shape,
75 |       t_bitvector,
76 |       t_rank,
77 |       t_select_one,
78 |       t_select_zero,
79 |       int_tree<>>;
80 | }
81 | 
82 | #endif
83 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/coder_fibonacci.cpp:
--------------------------------------------------------------------------------
  1 | #include "sdsl/coder_fibonacci.hpp"
  2 | 
  3 | namespace sdsl
  4 | {
  5 | 
  6 | namespace coder
  7 | {
  8 | 
  9 | fibonacci::impl fibonacci::data;
 10 | 
 11 | uint64_t fibonacci::decode_prefix_sum(const uint64_t* d, const size_type start_idx, size_type n)
 12 | {
 13 |     if (n==0)
 14 |         return 0;
 15 | //	return decode<true,false,int*>(data, start_idx, n);
 16 |     d += (start_idx >> 6);
 17 |     size_type i = 0;
 18 |     int32_t	bits_to_decode = 0;
 19 |     uint64_t w = 0, value = 0;
 20 |     int16_t buffered = 0, read = start_idx & 0x3F, shift = 0;
 21 |     uint16_t temp=0;
 22 |     uint64_t carry=0;
 23 |     i = bits::cnt11(*d & ~bits::lo_set[read], carry);
 24 |     if (i<n) {
 25 |         uint64_t oldcarry;
 26 |         w = 0;
 27 |         do {
 28 |             oldcarry = carry;
 29 |             i += (temp = bits::cnt11(*(d+(++w)), carry));
 30 |         } while (i<n);
 31 |         bits_to_decode += ((w-1)<<6) + bits::sel11(*(d+w), n-(i-temp), oldcarry) + 65 - read;
 32 |         w = 0;
 33 |     } else { // i>=n
 34 |         bits_to_decode = bits::sel11(*d >> read, n)+1;
 35 |     }
 36 |     if (((size_type)bits_to_decode) == n<<1)
 37 |         return n;
 38 |     if (((size_type)bits_to_decode) == (n<<1)+1)
 39 |         return n+1;
 40 |     i = 0;
 41 | //	while( bits_to_decode > 0 or buffered > 0){// while not all values are decoded
 42 |     do {
 43 |         while (buffered < 64 and bits_to_decode > 0) {
 44 |             w |= (((*d)>>read)<<buffered);
 45 |             if (read >= buffered) {
 46 |                 ++d;
 47 |                 buffered += 64-read;
 48 |                 bits_to_decode -= (64-read);
 49 |                 read = 0;
 50 |             } else { // read buffered
 51 |                 read += 64-buffered;
 52 |                 bits_to_decode -= (64-buffered);
 53 |                 buffered = 64;
 54 |             }
 55 |             if (bits_to_decode < 0) {
 56 |                 buffered += bits_to_decode;
 57 |                 w &= bits::lo_set[buffered];
 58 |                 bits_to_decode = 0;
 59 |             }
 60 |         }
 61 |         if (!i) { // try do decode multiple values
 62 |             if ((w&0xFFFFFF)==0xFFFFFF) {
 63 |                 value += 12;
 64 |                 w >>= 24;
 65 |                 buffered -= 24;
 66 |                 if ((w&0xFFFFFF)==0xFFFFFF) {
 67 |                     value += 12;
 68 |                     w >>= 24;
 69 |                     buffered -= 24;
 70 |                 }
 71 |             }
 72 |             do {
 73 |                 temp = fibonacci::data.fib2bin_16_greedy[w&0xFFFF];
 74 |                 if ((shift=(temp>>11)) > 0) {
 75 |                     value += (temp & 0x7FFULL);
 76 |                     w >>= shift;
 77 |                     buffered -= shift;
 78 |                 } else {
 79 |                     value += fibonacci::data.fib2bin_0_95[w&0xFFF];
 80 |                     w >>= 12;
 81 |                     buffered -= 12;
 82 |                     i = 1;
 83 |                     break;
 84 |                 }
 85 |             } while (buffered>15);
 86 |         } else { // i > 0
 87 |             value += fibonacci::data.fib2bin_0_95[(i<<12) | (w&0xFFF)];
 88 |             shift  = fibonacci::data.fib2bin_shift[w&0x1FFF];
 89 |             if (shift > 0) { // if end of decoding
 90 |                 w >>= shift;
 91 |                 buffered -= shift;
 92 |                 i = 0;
 93 |             } else { // not end of decoding
 94 |                 w >>= 12;
 95 |                 buffered -= 12;
 96 |                 ++i;
 97 |             }
 98 |         }
 99 |     } while (bits_to_decode > 0 or buffered > 0);
100 |     return value;
101 | }
102 | 
103 | uint64_t fibonacci::decode_prefix_sum(const uint64_t* d, const size_type start_idx, SDSL_UNUSED const size_type end_idx, size_type n)
104 | {
105 |     return decode_prefix_sum(d, start_idx, n);
106 | }
107 | 
108 | } // end namespace coder
109 | } // end namespace sdsl
110 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/config.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/config.hpp"
 2 | #include "sdsl/util.hpp"
 3 | 
 4 | namespace sdsl
 5 | {
 6 | cache_config::cache_config(bool f_delete_files, std::string f_dir, std::string f_id, tMSS f_file_map) : delete_files(f_delete_files), dir(f_dir), id(f_id), file_map(f_file_map)
 7 | {
 8 |     if ("" == id) {
 9 |         id = util::to_string(util::pid())+"_"+util::to_string(util::id());
10 |     }
11 | }
12 | 
13 | template<>
14 | const char* key_text_trait<0>::KEY_TEXT = conf::KEY_TEXT_INT;
15 | template<>
16 | const char* key_text_trait<8>::KEY_TEXT = conf::KEY_TEXT;
17 | 
18 | template<>
19 | const char* key_bwt_trait<0>::KEY_BWT = conf::KEY_BWT_INT;
20 | template<>
21 | const char* key_bwt_trait<8>::KEY_BWT = conf::KEY_BWT;
22 | 
23 | }// end namespace sdsl
24 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/construct_config.cpp:
--------------------------------------------------------------------------------
1 | #include "sdsl/construct_config.hpp"
2 | 
3 | namespace sdsl
4 | {
5 | 
6 | byte_sa_algo_type construct_config::byte_algo_sa = LIBDIVSUFSORT;
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/construct_isa.cpp:
--------------------------------------------------------------------------------
 1 | /* sdsl - succinct data structures library
 2 |     Copyright (C) 2010 Simon Gog
 3 | */
 4 | #include "sdsl/construct_isa.hpp"
 5 | #include <string>
 6 | 
 7 | namespace sdsl
 8 | {
 9 | 
10 | void construct_isa(cache_config& config)
11 | {
12 |     typedef int_vector<>::size_type size_type;
13 |     if (!cache_file_exists(conf::KEY_ISA, config)) {   // if isa is not already on disk => calculate it
14 |         int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config));
15 |         if (!sa_buf.is_open()) {
16 |             throw std::ios_base::failure("cst_construct: Cannot load SA from file system!");
17 |         }
18 |         int_vector<> isa(sa_buf.size());
19 |         for (size_type i=0; i < isa.size(); ++i) {
20 |             isa[ sa_buf[i] ] = i;
21 |         }
22 |         store_to_cache(isa, conf::KEY_ISA, config);
23 |     }
24 | }
25 | 
26 | }// end namespace
27 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/construct_lcp_helper.cpp:
--------------------------------------------------------------------------------
  1 | #include "sdsl/construct_lcp_helper.hpp"
  2 | #include "sdsl/int_vector.hpp"
  3 | #include <algorithm>
  4 | 
  5 | namespace sdsl
  6 | {
  7 | 
  8 | //! Merges a partial LCP array into the LCP array on disk.
  9 | /*!
 10 |  * \param partial_lcp		Vector containing LCP values for all indexes \f$i\f$ with
 11 |  *                      	index_done[i] == 0. Let x=partail_lcp[rank(index_done, i, 0)];
 12 |  *                      	LCP[i]=x if x!=0 and index_done[i] == 0
 13 |  * \param lcp_file			Path to the LCP array on disk.
 14 |  * \param index_done		Entry index_done[i] indicates if LCP[i] is already calculated.
 15 |  * \param max_lcp_value 	Maximum known LCP value
 16 |  * \param lcp_value_offset	Largest LCP value in lcp_file
 17 |  */
 18 | void insert_lcp_values(int_vector<>& partial_lcp, bit_vector& index_done, std::string lcp_file, uint64_t max_lcp_value, uint64_t lcp_value_offset)
 19 | {
 20 |     std::string tmp_lcp_file  = lcp_file+"_TMP";
 21 |     const uint64_t buffer_size = 1000000; // has to be a multiple of 64
 22 |     typedef int_vector<>::size_type size_type;
 23 |     int_vector_buffer<> lcp_buffer(lcp_file, std::ios::in, buffer_size); // open lcp_file
 24 |     uint64_t n = lcp_buffer.size();
 25 | 
 26 |     // open tmp_lcp_file
 27 |     uint8_t int_width = bits::hi(max_lcp_value-1)+1;
 28 |     int_vector_buffer<> out_buf(tmp_lcp_file, std::ios::out, buffer_size, int_width);		// Output buffer
 29 |     // Write values into buffer
 30 |     for (size_type i=0, calc_idx=0; i < n; ++i) {
 31 |         if (index_done[i]) {   // If value was already calculated
 32 |             out_buf[i] = lcp_buffer[i]; // Copy value
 33 |         } else {
 34 |             if (partial_lcp[calc_idx]) {   // If value was calculated now
 35 |                 // Insert value
 36 |                 out_buf[i] = partial_lcp[calc_idx]+lcp_value_offset;
 37 |                 index_done[i] = true;
 38 |             }
 39 |             ++calc_idx;
 40 |         }
 41 |     }
 42 |     // Close file and replace old file with new one
 43 |     out_buf.close();
 44 |     sdsl::rename(tmp_lcp_file, lcp_file);
 45 | }
 46 | 
 47 | buffered_char_queue::buffered_char_queue():m_widx(0), m_ridx(0), m_sync(true), m_disk_buffered_blocks(0), m_c('?'),m_rb(0), m_wb(0) {}
 48 | 
 49 | void buffered_char_queue::init(const std::string& dir, char c)
 50 | {
 51 |     m_c = c;
 52 |     m_file_name = dir+"buffered_char_queue_"+util::to_string(util::pid());
 53 | //		m_stream.rdbuf()->pubsetbuf(0, 0);
 54 | }
 55 | 
 56 | buffered_char_queue::~buffered_char_queue()
 57 | {
 58 |     m_stream.close();
 59 |     sdsl::remove(m_file_name);
 60 | }
 61 | 
 62 | void buffered_char_queue::push_back(uint8_t x)
 63 | {
 64 |     m_write_buf[m_widx] = x;
 65 |     if (m_sync) {
 66 |         m_read_buf[m_widx] = x;
 67 |     }
 68 |     ++m_widx;
 69 |     if (m_widx == m_buffer_size) {
 70 |         if (!m_sync) { // if not sync, write block to disk
 71 |             if (!m_stream.is_open()) {
 72 |                 m_stream.open(m_file_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc);
 73 |             }
 74 |             m_stream.seekp(m_buffer_size * (m_wb++), std::ios::beg);
 75 |             m_stream.write((char*) m_write_buf, m_buffer_size);
 76 |             ++m_disk_buffered_blocks;
 77 |         }
 78 |         m_sync = 0;
 79 |         m_widx = 0;
 80 |     }
 81 | }
 82 | 
 83 | uint8_t buffered_char_queue::pop_front()
 84 | {
 85 |     uint8_t x = m_read_buf[m_ridx];
 86 |     ++m_ridx;
 87 |     if (m_ridx ==  m_buffer_size) {
 88 |         if (m_disk_buffered_blocks > 0) {
 89 |             m_stream.seekg(m_buffer_size * (m_rb++), std::ios::beg);
 90 |             m_stream.read((char*) m_read_buf, m_buffer_size);
 91 |             --m_disk_buffered_blocks;
 92 |         } else { // m_disk_buffered_blocks == 0
 93 |             m_sync = 1;
 94 |             memcpy(m_read_buf, m_write_buf, m_widx+1);
 95 |         }
 96 |         m_ridx = 0;
 97 |     }
 98 |     return x;
 99 | }
100 | 
101 | void lcp_info(cache_config& config)
102 | {
103 |     typedef int_vector<>::size_type size_type;
104 |     int_vector_buffer<> lcp_buf(cache_file_name(conf::KEY_LCP, config));
105 |     size_type n = lcp_buf.size();
106 | 
107 |     size_type max_lcp = 0;
108 |     size_type sum_lcp = 0;
109 |     for (size_type i=0; i < n; ++i) {
110 |         if (lcp_buf[i] > max_lcp)
111 |             max_lcp = lcp_buf[i];
112 |         sum_lcp += lcp_buf[i];
113 |     }
114 |     std::cout<<"# max lcp = " << max_lcp << std::endl;
115 |     std::cout<<"# sum lcp = " << sum_lcp << std::endl;
116 |     std::cout<<"# avg lcp = " << sum_lcp/(double)n << std::endl;
117 | }
118 | 
119 | } // end namespace sdsl
120 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/construct_sa.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/construct_sa.hpp"
 2 | 
 3 | namespace sdsl
 4 | {
 5 | 
 6 | void construct_sa_se(cache_config& config)
 7 | {
 8 |     int_vector<8> text;
 9 |     load_from_file(text, cache_file_name(conf::KEY_TEXT, config));
10 | 
11 |     if (text.size() <= 2) {
12 |         // If text is c$ or $ write suffix array [1, 0] or [0]
13 |         int_vector_buffer<> sa(cache_file_name(conf::KEY_SA, config), std::ios::out, 8, 2);
14 |         if (text.size() == 2) {
15 |             sa.push_back(1);
16 |         }
17 |         sa.push_back(0);
18 |     } else {
19 |         _construct_sa_se<int_vector<8>>(text, cache_file_name(conf::KEY_SA, config), 256, 0);
20 |     }
21 |     register_cache_file(conf::KEY_SA, config);
22 | }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/csa_alphabet_strategy.cpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2012 Simon Gog
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | #include "sdsl/csa_alphabet_strategy.hpp"
 18 | 
 19 | namespace sdsl
 20 | {
 21 | 
 22 | const char* key_trait<8>::KEY_BWT = conf::KEY_BWT;
 23 | const char* key_trait<8>::KEY_TEXT = conf::KEY_TEXT;
 24 | 
 25 | byte_alphabet::byte_alphabet(int_vector_buffer<8>& text_buf, int_vector_size_type len):
 26 |     char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma)
 27 | {
 28 |     m_sigma = 0;
 29 |     if (0 == len or 0 == text_buf.size())
 30 |         return;
 31 |     assert(len <= text_buf.size());
 32 |     // initialize vectors
 33 |     util::assign(m_C	    , int_vector<64>(257, 0));
 34 |     util::assign(m_char2comp, int_vector<8>(256,0));
 35 |     util::assign(m_comp2char, int_vector<8>(256,0));
 36 |     // count occurrences of each symbol
 37 |     for (size_type i=0; i < len; ++i) {
 38 |         ++m_C[text_buf[i]];
 39 |     }
 40 |     assert(1 == m_C[0]); // null-byte should occur exactly once
 41 |     m_sigma = 0;
 42 |     for (int i=0; i<256; ++i)
 43 |         if (m_C[i]) {
 44 |             m_char2comp[i] 	 	= m_sigma;
 45 |             m_comp2char[sigma]  = i;
 46 |             m_C[m_sigma]		= m_C[i];
 47 |             ++m_sigma;
 48 |         }
 49 |     m_comp2char.resize(m_sigma);
 50 |     m_C.resize(m_sigma+1);
 51 |     for (int i=(int)m_sigma; i > 0; --i) m_C[i] = m_C[i-1];
 52 |     m_C[0] = 0;
 53 |     for (int i=1; i <= (int)m_sigma; ++i) m_C[i] += m_C[i-1];
 54 |     assert(C[sigma]==len);
 55 | }
 56 | 
 57 | 
 58 | byte_alphabet::byte_alphabet(): char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma)
 59 | {
 60 |     m_sigma = 0;
 61 | }
 62 | 
 63 | void byte_alphabet::copy(const byte_alphabet& bas)
 64 | {
 65 |     m_char2comp = bas.m_char2comp;
 66 |     m_comp2char = bas.m_comp2char;
 67 |     m_C			= bas.m_C;
 68 |     m_sigma		= bas.m_sigma;
 69 | }
 70 | 
 71 | byte_alphabet::byte_alphabet(const byte_alphabet& bas): char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma)
 72 | {
 73 |     copy(bas);
 74 | }
 75 | 
 76 | byte_alphabet& byte_alphabet::operator=(const byte_alphabet& bas)
 77 | {
 78 |     if (this != &bas) {
 79 |         copy(bas);
 80 |     }
 81 |     return *this;
 82 | }
 83 | 
 84 | byte_alphabet& byte_alphabet::operator=(byte_alphabet&& bas)
 85 | {
 86 |     if (this != &bas) {
 87 |         m_char2comp = std::move(bas.m_char2comp);
 88 |         m_comp2char = std::move(bas.m_comp2char);
 89 |         m_C         = std::move(bas.m_C);
 90 |         m_sigma     = std::move(bas.m_sigma);
 91 |     }
 92 |     return *this;
 93 | }
 94 | 
 95 | void byte_alphabet::swap(byte_alphabet& bas)
 96 | {
 97 |     m_char2comp.swap(bas.m_char2comp);
 98 |     m_comp2char.swap(bas.m_comp2char);
 99 |     m_C.swap(bas.m_C);
100 |     std::swap(m_sigma, bas.m_sigma);
101 | }
102 | 
103 | byte_alphabet::size_type byte_alphabet::serialize(std::ostream& out, structure_tree_node* v, std::string name)const
104 | {
105 |     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
106 |     size_type written_bytes = 0;
107 |     written_bytes += m_char2comp.serialize(out, child, "m_char2comp");
108 |     written_bytes += m_comp2char.serialize(out, child, "m_comp2char");
109 |     written_bytes += m_C.serialize(out, child, "m_C");
110 |     written_bytes += write_member(m_sigma, out, child, "m_sigma");
111 |     structure_tree::add_size(child, written_bytes);
112 |     return written_bytes;
113 | }
114 | 
115 | void byte_alphabet::load(std::istream& in)
116 | {
117 |     m_char2comp.load(in);
118 |     m_comp2char.load(in);
119 |     m_C.load(in);
120 |     read_member(m_sigma, in);
121 | }
122 | }
123 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/io.cpp:
--------------------------------------------------------------------------------
  1 | #include "sdsl/io.hpp"
  2 | #include "sdsl/sfstream.hpp"
  3 | #include "sdsl/util.hpp"
  4 | #include <vector>
  5 | 
  6 | namespace sdsl
  7 | {
  8 | 
  9 | 
 10 | bool store_to_file(const char* v, const std::string& file)
 11 | {
 12 |     osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out);
 13 |     if (!out) {
 14 |         if (util::verbose) {
 15 |             std::cerr<<"ERROR: store_to_file(const char *v, const std::string&)"<<std::endl;
 16 |             return false;
 17 |         }
 18 |     }
 19 |     uint64_t n = strlen((const char*)v);
 20 |     out.write(v, n);
 21 |     out.close();
 22 |     return true;
 23 | }
 24 | 
 25 | bool store_to_file(const std::string& v, const std::string& file)
 26 | {
 27 |     osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out);
 28 |     if (!out) {
 29 |         if (util::verbose) {
 30 |             std::cerr<<"ERROR: store_to_file(const std::string& v, const std::string&)"<<std::endl;
 31 |             return false;
 32 |         }
 33 |     }
 34 |     out.write(v.data(),v.size());
 35 |     out.close();
 36 |     return true;
 37 | }
 38 | 
 39 | 
 40 | bool store_to_checked_file(const char* v, const std::string& file)
 41 | {
 42 |     std::string checkfile = file+"_check";
 43 |     osfstream out(checkfile, std::ios::binary | std::ios::trunc | std::ios::out);
 44 |     if (!out) {
 45 |         if (util::verbose) {
 46 |             std::cerr<<"ERROR: store_to_checked_file(const char *v, const std::string&)"<<std::endl;
 47 |             return false;
 48 |         }
 49 |     }
 50 |     add_hash(v, out);
 51 |     out.close();
 52 |     return store_to_file(v, file);
 53 | }
 54 | 
 55 | 
 56 | template<>
 57 | size_t write_member<std::string>(const std::string& t, std::ostream& out, structure_tree_node* v, std::string name)
 58 | {
 59 |     structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(t));
 60 |     size_t written_bytes = 0;
 61 |     written_bytes += write_member(t.size(), out, child, "length");
 62 |     out.write(t.c_str(), t.size());
 63 |     written_bytes += t.size();
 64 |     structure_tree::add_size(v, written_bytes);
 65 |     return written_bytes;
 66 | }
 67 | 
 68 | template<>
 69 | void read_member<std::string>(std::string& t, std::istream& in)
 70 | {
 71 |     std::string::size_type size;
 72 |     read_member(size, in);
 73 |     char* buf = new char[size];
 74 |     in.read(buf, size);
 75 |     std::string temp(buf, size);
 76 |     delete [] buf;
 77 |     t.swap(temp);
 78 | }
 79 | 
 80 | uint64_t _parse_number(std::string::const_iterator& c, const std::string::const_iterator& end)
 81 | {
 82 |     std::string::const_iterator s = c;
 83 |     while (c != end and isdigit(*c)) ++c;
 84 |     if (c > s) {
 85 |         return std::stoull(std::string(s,c));
 86 |     } else {
 87 |         return 0;
 88 |     }
 89 | }
 90 | 
 91 | std::string cache_file_name(const std::string& key, const cache_config& config)
 92 | {
 93 |     if (config.file_map.count(key) != 0) {
 94 |         return config.file_map.at(key);
 95 |     }
 96 |     return config.dir+"/"+key+"_"+config.id+".sdsl";
 97 | }
 98 | 
 99 | void register_cache_file(const std::string& key, cache_config& config)
100 | {
101 |     std::string file_name = cache_file_name(key, config);
102 |     isfstream in(file_name);
103 |     if (in) {  // if file exists, register it.
104 |         config.file_map[key] = file_name;
105 |     }
106 | }
107 | 
108 | 
109 | bool cache_file_exists(const std::string& key, const cache_config& config)
110 | {
111 |     std::string file_name = cache_file_name(key, config);
112 |     isfstream in(file_name);
113 |     if (in) {
114 |         in.close();
115 |         return true;
116 |     }
117 |     return false;
118 | }
119 | 
120 | std::string tmp_file(const cache_config& config, std::string name_part)
121 | {
122 |     return config.dir+"/"+ util::to_string(util::pid()) + "_" + util::to_string(util::id()) + name_part + ".sdsl";
123 | }
124 | 
125 | std::string tmp_file(const std::string& filename, std::string name_part)
126 | {
127 |     return util::dirname(filename) + "/" + util::to_string(util::pid()) + "_" +
128 |            util::to_string(util::id()) + name_part + ".sdsl";
129 | }
130 | 
131 | }// end namespace sdsl
132 | 
133 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/lcp_support_tree.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/lcp_support_tree.hpp"
 2 | 
 3 | namespace sdsl
 4 | {
 5 | 
 6 | void construct_first_child_lcp(int_vector_buffer<>& lcp_buf, int_vector<>& fc_lcp)
 7 | {
 8 |     typedef int_vector_size_type size_type;
 9 |     size_type n = lcp_buf.size();
10 |     if (n == 0) {	// if n == 0 we are done
11 |         fc_lcp = int_vector<>(0);
12 |     }
13 |     {
14 |         int_vector<> tmp(n, 0, bits::hi(n)+1);
15 |         fc_lcp.swap(tmp);
16 |     }
17 | 
18 |     size_type fc_cnt=0; // first child counter
19 |     sorted_multi_stack_support vec_stack(n);
20 |     size_type y;
21 |     for (size_type i=0, x; i < n; ++i) {
22 |         x = lcp_buf[i];
23 |         while (!vec_stack.empty() and x < vec_stack.top()) {
24 |             y = vec_stack.top();
25 |             if (vec_stack.pop()) {
26 |                 fc_lcp[fc_cnt++] = y;
27 |             }
28 |         }
29 |         vec_stack.push(x);
30 |     }
31 | 
32 |     while (!vec_stack.empty()) {
33 |         y = vec_stack.top();
34 |         if (vec_stack.pop()) {
35 |             fc_lcp[fc_cnt++] = y;
36 |         }
37 |     }
38 |     if (fc_cnt < fc_lcp.size()) {
39 |         fc_lcp.resize(fc_cnt);
40 |     }
41 | }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/louds_tree.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/louds_tree.hpp"
 2 | 
 3 | namespace sdsl
 4 | {
 5 | std::ostream& operator<<(std::ostream& os, const louds_node& v)
 6 | {
 7 |     os<<"("<<v.nr<<","<<v.pos<<")";
 8 |     return os;
 9 | }
10 | }
11 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/nn_dict_dynamic.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/nn_dict_dynamic.hpp"
 2 | #include "sdsl/util.hpp"
 3 | 
 4 | namespace sdsl
 5 | {
 6 | namespace util
 7 | {
 8 | void set_zero_bits(nn_dict_dynamic& nn)
 9 | {
10 |     util::set_to_value(nn.m_tree, 0);
11 | }
12 | } // end util
13 | } // end sdsl
14 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/ram_filebuf.cpp:
--------------------------------------------------------------------------------
  1 | #include "sdsl/ram_filebuf.hpp"
  2 | #include <iostream>
  3 | #include <limits>
  4 | 
  5 | #ifdef WIN32
  6 | #include "iso646.h"
  7 | #endif
  8 | 
  9 | 
 10 | namespace sdsl
 11 | {
 12 | 
 13 | 
 14 | ram_filebuf::~ram_filebuf() {}
 15 | 
 16 | ram_filebuf::ram_filebuf() {}
 17 | 
 18 | ram_filebuf::ram_filebuf(std::vector<char>& ram_file) : m_ram_file(&ram_file)
 19 | {
 20 |     char* begin = m_ram_file->data();
 21 |     char* end   = begin + m_ram_file->size();
 22 |     setg(begin, begin, end); // set get pointers eback(), eptr(), egptr()
 23 | }
 24 | 
 25 | std::streambuf*
 26 | ram_filebuf::open(const std::string name, std::ios_base::openmode mode)
 27 | {
 28 |     // open ram_file
 29 |     if ((mode & std::ios_base::in) and !(mode & std::ios_base::trunc)) {
 30 |         // file must exist, initial position at the start
 31 |         if (!ram_fs::exists(name)) {
 32 |             m_ram_file = nullptr;
 33 |         } else {
 34 |             m_ram_file = &ram_fs::content(name);
 35 |         }
 36 |     } else { // existence of file not required
 37 |         if (!ram_fs::exists(name)) {
 38 |             // create empty file, if it does not yet exist
 39 |             ram_fs::store(name, ram_fs::content_type());// TODO: create method in ram_fs?? or store w 1 arg?
 40 |         }
 41 |         m_ram_file = &ram_fs::content(name);
 42 |         if ((mode & std::ios_base::out) and !(mode & std::ios_base::app)) {
 43 |             m_ram_file->clear();
 44 |         }
 45 |     }
 46 | 
 47 |     if (m_ram_file and(mode & std::ios_base::trunc)) {
 48 |         m_ram_file->clear();
 49 |     }
 50 |     if (m_ram_file) {
 51 |         if (mode & std::ios_base::ate) {
 52 |             // TODO: move put pointer to the end of the file
 53 |         } else {
 54 | 
 55 |         }
 56 |         setg(m_ram_file->data(), m_ram_file->data(), m_ram_file->data()+m_ram_file->size());
 57 |         setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size());
 58 |     }
 59 | // ATTENTION: if m_ram_file->size() == 0, then data might be nullptr !!!
 60 |     return m_ram_file ? this : nullptr;
 61 | }
 62 | 
 63 | bool
 64 | ram_filebuf::is_open()
 65 | {
 66 |     return m_ram_file!=nullptr;
 67 | }
 68 | 
 69 | ram_filebuf*
 70 | ram_filebuf::close()
 71 | {
 72 |     if (!this->is_open())
 73 |         return nullptr;
 74 |     m_ram_file = nullptr;
 75 |     setg(nullptr, nullptr, nullptr);
 76 |     setp(nullptr, nullptr);
 77 |     return this;
 78 | }
 79 | 
 80 | ram_filebuf::pos_type
 81 | ram_filebuf::seekpos(pos_type sp, std::ios_base::openmode mode)
 82 | {
 83 |     if (sp >= (pos_type)0 and sp <= (pos_type)m_ram_file->size()) {
 84 |         setg(m_ram_file->data(), m_ram_file->data()+sp, m_ram_file->data()+m_ram_file->size());
 85 |         setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size());
 86 |         pbump64(sp);
 87 |     } else {
 88 |         if (mode & std::ios_base::out) {
 89 |             // extend buffer
 90 |             m_ram_file->resize(sp, 0);
 91 |             setg(m_ram_file->data(), m_ram_file->data()+sp, m_ram_file->data()+m_ram_file->size());
 92 |             setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size());
 93 |             pbump64(sp);
 94 |         } else {
 95 |             return pos_type(off_type(-1));
 96 |         }
 97 |     }
 98 |     return sp;
 99 | }
100 | 
101 | ram_filebuf::pos_type
102 | ram_filebuf::pubseekoff(off_type off, std::ios_base::seekdir way,
103 |                         std::ios_base::openmode which)
104 | {
105 |     if (std::ios_base::beg == way) {
106 |         if (seekpos(off, which) == pos_type(-1)) {
107 |             return pos_type(-1);
108 |         }
109 |     } else if (std::ios_base::cur == way) {
110 |         if (seekpos(gptr()-eback()+off, which) == pos_type(-1)) {
111 |             return pos_type(-1);
112 |         }
113 |     } else if (std::ios_base::end == way) {
114 |         if (seekpos(egptr()-eback()+off, which) == pos_type(-1)) {
115 |             return pos_type(-1);
116 |         }
117 |     }
118 |     return gptr()-eback();
119 | }
120 | 
121 | 
122 | ram_filebuf::pos_type
123 | ram_filebuf::pubseekpos(pos_type sp, std::ios_base::openmode which)
124 | {
125 |     if (seekpos(sp, which) == pos_type(-1)) {
126 |         return pos_type(-1);
127 |     } else {
128 |         return gptr()-eback();
129 |     }
130 | }
131 | 
132 | int
133 | ram_filebuf::sync()
134 | {
135 |     return 0; // we are always in sync, since buffer is sink
136 | }
137 | 
138 | ram_filebuf::int_type
139 | ram_filebuf::overflow(int_type c)
140 | {
141 |     if (m_ram_file) {
142 |         m_ram_file->push_back(c);
143 |         setp(m_ram_file->data(), m_ram_file->data()+m_ram_file->size());
144 |         std::ptrdiff_t add = epptr()-pbase();
145 |         pbump64(add);
146 |         setg(m_ram_file->data(), gptr(), m_ram_file->data()+m_ram_file->size());
147 |     }
148 |     return traits_type::to_int_type(c);
149 | }
150 | 
151 | void ram_filebuf::pbump64(std::ptrdiff_t x)
152 | {
153 |     while (x > std::numeric_limits<int>::max()) {
154 |         pbump(std::numeric_limits<int>::max());
155 |         x -= std::numeric_limits<int>::max();
156 |     }
157 |     pbump(x);
158 | }
159 | 
160 | }
161 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/ram_fs.cpp:
--------------------------------------------------------------------------------
  1 | #include "sdsl/ram_fs.hpp"
  2 | #include "sdsl/util.hpp"
  3 | #include <cstdio>
  4 | #include <iostream>
  5 | #include <algorithm>
  6 | 
  7 | static int nifty_counter = 0;
  8 | 
  9 | sdsl::ram_fs::mss_type sdsl::ram_fs::m_map;
 10 | std::recursive_mutex sdsl::ram_fs::m_rlock;
 11 | 
 12 | 
 13 | sdsl::ram_fs_initializer::ram_fs_initializer()
 14 | {
 15 |     if (0 == nifty_counter++) {
 16 |         if (!ram_fs::m_map.empty()) {
 17 |             throw std::logic_error("Static preinitialized object is not empty.");
 18 |         }
 19 |     }
 20 | }
 21 | 
 22 | sdsl::ram_fs_initializer::~ram_fs_initializer()
 23 | {
 24 |     if (0 == --nifty_counter) {
 25 |         // clean up
 26 |     }
 27 | }
 28 | 
 29 | namespace sdsl
 30 | {
 31 | 
 32 | ram_fs::ram_fs() {}
 33 | 
 34 | void
 35 | ram_fs::store(const std::string& name, content_type data)
 36 | {
 37 |     std::lock_guard<std::recursive_mutex> lock(m_rlock);
 38 |     if (!exists(name)) {
 39 |         std::string cname = name;
 40 |         m_map.insert(std::make_pair(std::move(cname), std::move(data)));
 41 |     } else {
 42 |         m_map[name] = std::move(data);
 43 |     }
 44 | }
 45 | 
 46 | bool
 47 | ram_fs::exists(const std::string& name)
 48 | {
 49 |     std::lock_guard<std::recursive_mutex> lock(m_rlock);
 50 |     return m_map.find(name) != m_map.end();
 51 | }
 52 | 
 53 | ram_fs::content_type&
 54 | ram_fs::content(const std::string& name)
 55 | {
 56 |     std::lock_guard<std::recursive_mutex> lock(m_rlock);
 57 |     return m_map[name];
 58 | }
 59 | 
 60 | size_t
 61 | ram_fs::file_size(const std::string& name)
 62 | {
 63 |     std::lock_guard<std::recursive_mutex> lock(m_rlock);
 64 |     if (exists(name)) {
 65 |         return m_map[name].size();
 66 |     } else {
 67 |         return 0;
 68 |     }
 69 | }
 70 | 
 71 | int
 72 | ram_fs::remove(const std::string& name)
 73 | {
 74 |     std::lock_guard<std::recursive_mutex> lock(m_rlock);
 75 |     m_map.erase(name);
 76 |     return 0;
 77 | }
 78 | 
 79 | int
 80 | ram_fs::rename(const std::string old_filename, const std::string new_filename)
 81 | {
 82 |     std::lock_guard<std::recursive_mutex> lock(m_rlock);
 83 |     m_map[new_filename] = std::move(m_map[old_filename]);
 84 |     remove(old_filename);
 85 |     return 0;
 86 | }
 87 | 
 88 | bool is_ram_file(const std::string& file)
 89 | {
 90 |     if (file.size() > 0) {
 91 |         if (file[0]=='@') {
 92 |             return true;
 93 |         }
 94 |     }
 95 |     return false;
 96 | }
 97 | 
 98 | std::string ram_file_name(const std::string& file)
 99 | {
100 |     if (is_ram_file(file)) {
101 |         return file;
102 |     } else {
103 |         return "@" + file;
104 |     }
105 | }
106 | 
107 | std::string disk_file_name(const std::string& file)
108 | {
109 |     if (!is_ram_file(file)) {
110 |         return file;
111 |     } else {
112 |         return file.substr(1);
113 |     }
114 | }
115 | 
116 | int remove(const std::string& file)
117 | {
118 |     if (is_ram_file(file)) {
119 |         return ram_fs::remove(file);
120 |     } else {
121 |         return std::remove(file.c_str());
122 |     }
123 | }
124 | 
125 | int rename(const std::string& old_filename, const std::string& new_filename)
126 | {
127 |     if (is_ram_file(old_filename)) {
128 |         if (!is_ram_file(new_filename)) {  // error, if new file is not also RAM-file
129 |             return -1;
130 |         }
131 |         return ram_fs::rename(old_filename, new_filename);
132 |     } else {
133 |         return std::rename(old_filename.c_str(), new_filename.c_str());
134 |     }
135 | }
136 | 
137 | } // end namespace sdsl
138 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/rrr_vector_15.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/rrr_vector_15.hpp"
 2 | 
 3 | //! Namespace for the succinct data structure library
 4 | namespace sdsl
 5 | {
 6 | // initialize the inner class
 7 | binomial15::impl binomial15::iii;
 8 | 
 9 | } // end namespace
10 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/sd_vector.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/sd_vector.hpp"
 2 | #include <cassert>
 3 | 
 4 | //! Namespace for the succinct data structure library
 5 | namespace sdsl
 6 | {
 7 | 
 8 | sd_vector_builder::sd_vector_builder() :
 9 |     m_size(0), m_capacity(0),
10 |     m_wl(0),
11 |     m_tail(0), m_items(0),
12 |     m_last_high(0), m_highpos(0)
13 | {
14 | }
15 | 
16 | sd_vector_builder::sd_vector_builder(size_type n, size_type m) :
17 |     m_size(n), m_capacity(m),
18 |     m_wl(0),
19 |     m_tail(0), m_items(0),
20 |     m_last_high(0), m_highpos(0)
21 | {
22 |     if(m_capacity > m_size)
23 |     {
24 |         throw std::runtime_error("sd_vector_builder: requested capacity is larger than vector size.");
25 |     }
26 | 
27 |     size_type logm = bits::hi(m_capacity) + 1;
28 |     const size_type logn = bits::hi(m_size) + 1;
29 |     if(logm == logn)
30 |     {
31 |         --logm; // to ensure logn-logm > 0
32 |         assert(logn - logm > 0);
33 |     }
34 |     m_wl = logn - logm;
35 |     m_low = int_vector<>(m_capacity, 0, m_wl);
36 |     m_high = bit_vector(m_capacity + (1ULL << logm), 0);
37 | }
38 | 
39 | void
40 | sd_vector_builder::swap(sd_vector_builder& sdb)
41 | {
42 |     std::swap(m_size, sdb.m_size);
43 |     std::swap(m_capacity, sdb.m_capacity);
44 |     std::swap(m_wl, sdb.m_wl);
45 |     std::swap(m_tail, sdb.m_tail);
46 |     std::swap(m_items, sdb.m_items);
47 |     std::swap(m_last_high, sdb.m_last_high);
48 |     std::swap(m_highpos, sdb.m_highpos);
49 |     m_low.swap(sdb.m_low);
50 |     m_high.swap(sdb.m_high);
51 | }
52 | 
53 | template<>
54 | sd_vector<>::sd_vector(sd_vector_builder& builder)
55 | {
56 |     if(builder.items() != builder.capacity())
57 |     {
58 |       throw std::runtime_error("sd_vector: the builder is not full.");
59 |     }
60 | 
61 |     m_size = builder.m_size;
62 |     m_wl = builder.m_wl;
63 |     m_low.swap(builder.m_low);
64 |     m_high.swap(builder.m_high);
65 |     util::init_support(m_high_1_select, &m_high);
66 |     util::init_support(m_high_0_select, &m_high);
67 | 
68 |     builder = sd_vector_builder();
69 | }
70 | 
71 | } // end namespace
72 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/uint128_t.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/uint128_t.hpp"
 2 | 
 3 | //! Namespace for the succinct data structure library
 4 | namespace sdsl
 5 | {
 6 | 
 7 | std::ostream& operator<<(std::ostream& os, const uint128_t& x)
 8 | {
 9 |     uint64_t X[2] = {(uint64_t)(x >> 64), (uint64_t)x};
10 |     for (int j=0; j < 2; ++j) {
11 |         for (int i=0; i < 16; ++i) {
12 |             os << std::hex << ((X[j]>>60)&0xFULL) << std::dec;
13 |             X[j] <<= 4;
14 |         }
15 |     }
16 |     return os;
17 | }
18 | 
19 | } // end namespace
20 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/uint256_t.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/uint256_t.hpp"
 2 | 
 3 | //! Namespace for the succinct data structure library
 4 | namespace sdsl
 5 | {
 6 | std::ostream& operator<<(std::ostream& os, const uint256_t& x)
 7 | {
 8 |     uint64_t X[4] = {(uint64_t)(x.m_high >> 64), (uint64_t)x.m_high, x.m_mid, x.m_lo};
 9 |     for (int j=0; j < 4; ++j) {
10 |         for (int i=0; i < 16; ++i) {
11 |             os << std::hex << ((X[j]>>60)&0xFULL) << std::dec;
12 |             X[j] <<= 4;
13 |         }
14 |     }
15 |     return os;
16 | }
17 | } // end namespace
18 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/util.cpp:
--------------------------------------------------------------------------------
  1 | /* sdsl - succinct data structures library
  2 |     Copyright (C) 2009-2013 Simon Gog
  3 | 
  4 |     This program is free software: you can redistribute it and/or modify
  5 |     it under the terms of the GNU General Public License as published by
  6 |     the Free Software Foundation, either version 3 of the License, or
  7 |     (at your option) any later version.
  8 | 
  9 |     This program is distributed in the hope that it will be useful,
 10 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 |     GNU General Public License for more details.
 13 | 
 14 |     You should have received a copy of the GNU General Public License
 15 |     along with this program.  If not, see http://www.gnu.org/licenses/ .
 16 | */
 17 | 
 18 | #include "sdsl/util.hpp"
 19 | 
 20 | #include <sys/types.h> // for file_size
 21 | #include <sys/stat.h>  // for file_size
 22 | #include <iomanip>
 23 | #include <vector>
 24 | #include <string>
 25 | 
 26 | #include <type_traits>
 27 | #include <typeinfo>
 28 | #ifndef MSVC_COMPILER
 29 | #include <cxxabi.h>
 30 | #endif
 31 | 
 32 | namespace sdsl
 33 | {
 34 | 
 35 | namespace util
 36 | {
 37 | 
 38 | uint64_t _id_helper::id = 0;
 39 | 
 40 | std::string basename(std::string file)
 41 | {
 42 |     file = disk_file_name(file); // remove RAM-prefix
 43 | #ifdef MSVC_COMPILER
 44 |     char* c = _strdup((const char*)file.c_str());
 45 |     char file_name[_MAX_FNAME] = { 0 };
 46 |     ::_splitpath_s(c, NULL, 0, NULL, NULL, file_name, _MAX_FNAME, NULL, 0);
 47 |     std::string res(file_name);
 48 | #else
 49 |     char* c = strdup((const char*)file.c_str());
 50 |     std::string res = std::string(::basename(c));
 51 | #endif
 52 |     free(c);
 53 |     return res;
 54 | }
 55 | 
 56 | std::string dirname(std::string file)
 57 | {
 58 |     bool ram_file = is_ram_file(file);
 59 |     file = disk_file_name(file); // remove RAM-prefix
 60 | #ifdef MSVC_COMPILER
 61 |     char* c = _strdup((const char*)file.c_str());
 62 |     char dir_name[_MAX_DIR] = { 0 };
 63 |     char drive[_MAX_DRIVE] = {0};
 64 |     ::_splitpath_s(c, drive, _MAX_DRIVE, dir_name, _MAX_DIR, NULL,0, NULL,0);
 65 |     std::string res = std::string(drive) + std::string(dir_name);
 66 | #else
 67 |     char* c = strdup((const char*)file.c_str());
 68 |     std::string res = std::string(::dirname(c));
 69 | #endif
 70 |     free(c);
 71 |     if (ram_file) {
 72 |         if ("." == res) {
 73 |             res = ram_file_name("");
 74 |         } else if ("/" ==res) {
 75 |             res = ram_file_name(res);
 76 |         }
 77 |     }
 78 |     return res;
 79 | }
 80 | 
 81 | uint64_t pid()
 82 | {
 83 | #ifdef MSVC_COMPILER
 84 |     return _getpid();
 85 | #else
 86 |     return getpid();
 87 | #endif
 88 | }
 89 | 
 90 | char* str_from_errno()
 91 | {
 92 | #ifdef MSVC_COMPILER
 93 | #pragma warning(disable:4996)
 94 |     return strerror(errno);
 95 | #pragma warning(default:4996)
 96 | #else
 97 |     return strerror(errno);
 98 | #endif
 99 | }
100 | 
101 | 
102 | uint64_t id()
103 | {
104 |     return _id_helper::getId();
105 | }
106 | 
107 | std::string demangle(const std::string& name)
108 | {
109 | #ifdef HAVE_CXA_DEMANGLE
110 |     char buf[4096];
111 |     size_t size = 4096;
112 |     int status = 0;
113 |     abi::__cxa_demangle(name.c_str(), buf, &size, &status);
114 |     if (status==0)
115 |         return std::string(buf);
116 |     return name;
117 | #else
118 |     return name;
119 | #endif
120 | }
121 | 
122 | std::string demangle2(const std::string& name)
123 | {
124 |     std::string result = demangle(name);
125 |     std::vector<std::string> words_to_delete;
126 |     words_to_delete.push_back("sdsl::");
127 |     words_to_delete.push_back("(unsigned char)");
128 |     words_to_delete.push_back(", unsigned long");
129 | 
130 |     for (size_t k=0; k<words_to_delete.size(); ++k) {
131 |         std::string w = words_to_delete[k];
132 |         for (size_t i = result.find(w); i != std::string::npos; i = result.find(w, i)) {
133 |             result.erase(i, w.length());
134 |             ++i;
135 |         }
136 |     }
137 |     size_t index = 0;
138 |     std::string to_replace = "int_vector<1>";
139 |     while ((index = result.find(to_replace, index)) != std::string::npos) {
140 |         result.replace(index, to_replace.size(), "bit_vector");
141 |     }
142 |     return result;
143 | }
144 | 
145 | void delete_all_files(tMSS& file_map)
146 | {
147 |     for (auto file_pair : file_map) {
148 |         sdsl::remove(file_pair.second);
149 |     }
150 |     file_map.clear();
151 | }
152 | 
153 | std::string to_latex_string(unsigned char c)
154 | {
155 |     if (c == '_')
156 |         return "\\_";
157 |     else if (c == '\0')
158 |         return "\\$";
159 |     else
160 |         return to_string(c);
161 | }
162 | 
163 | void set_verbose()
164 | {
165 |     verbose = true;
166 | }
167 | 
168 | size_t file_size(const std::string& file)
169 | {
170 |     if (is_ram_file(file)) {
171 |         return ram_fs::file_size(file);
172 |     } else {
173 |         struct stat fs;
174 |         stat(file.c_str(), &fs);
175 |         return fs.st_size;
176 |     }
177 | }
178 | 
179 | }// end namespace util
180 | 
181 | }// end namespace sdsl
182 | 
183 | 


--------------------------------------------------------------------------------
/external/sdsl/lib/wt_helper.cpp:
--------------------------------------------------------------------------------
 1 | #include "sdsl/wt_helper.hpp"
 2 | 
 3 | namespace sdsl
 4 | {
 5 | 
 6 | bool empty(const range_type& r)
 7 | {
 8 |     return std::get<0>(r) == (std::get<1>(r) + 1);
 9 | }
10 | 
11 | int_vector<>::size_type size(const range_type& r)
12 | {
13 |     return std::get<1>(r) - std::get<0>(r) + 1;
14 | }
15 | 
16 | 
17 | pc_node::pc_node(uint64_t freq, uint64_t sym, uint64_t parent,
18 |                  uint64_t child_left, uint64_t child_right):
19 |     freq(freq), sym(sym), parent(parent)
20 | {
21 |     child[0] = child_left;
22 |     child[1] = child_right;
23 | }
24 | 
25 | pc_node& pc_node::operator=(const pc_node& v)
26 | {
27 |     freq     = v.freq;
28 |     sym      = v.sym;
29 |     parent   = v.parent;
30 |     child[0] = v.child[0];
31 |     child[1] = v.child[1];
32 |     return *this;
33 | }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/external/sg-entropy/Make.helper:
--------------------------------------------------------------------------------
 1 | SG_ENTROPY_INCS = \
 2 | 	stdx/bit.h \
 3 | 	stdx/define.h \
 4 | 	stdx/exception.h \
 5 | 	io/bit_stream.h \
 6 | 	io/stream.h \
 7 | 	io/stream_array.h \
 8 | 	entropy/arith32.h \
 9 | 	entropy/arith64.h \
10 | 	entropy/range32.h \
11 | 	entropy/range64.h
12 | 
13 | SG_ENTROPY_LIBS = \
14 | 	stdx/bit.cpp \
15 | 	stdx/exception.cpp \
16 | 	io/bit_stream.cpp \
17 | 	io/stream_array.cpp \
18 | 	entropy/arith32.cpp \
19 | 	entropy/arith64.cpp \
20 | 	entropy/range32.cpp \
21 | 	entropy/range64.cpp
22 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/arith32.cpp:
--------------------------------------------------------------------------------
  1 | //Entropy Coding Source code
  2 | //By Sachin Garg, 2006
  3 | //
  4 | //Includes range coder based upon the carry-less implementation 
  5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
  6 | //DDJ code.
  7 | // 
  8 | //Modified to use 64-bit variables for improved performance.
  9 | //32-bit reference implementations also included.
 10 | //
 11 | //For details:
 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
 13 | 
 14 | #include "entropy/arith32.h"
 15 | 
 16 | const SG::DWord	SG::Entropy::ArithmeticCoder32::MaxRange=0x3FFF;
 17 | 
 18 | SG::Entropy::ArithmeticCoder32::ArithmeticCoder32() :
 19 | 	High(0xFFFF),
 20 | 	Low(0),
 21 | 	UnderflowCount(0),
 22 | 	TempRange(0)
 23 | {
 24 | }
 25 | 
 26 | SG::Entropy::ArithmeticEncoder32::ArithmeticEncoder32(SG::io::BitOutputStream &BitOStream) :
 27 | 	Flushed(false),
 28 | 	Output(BitOStream)
 29 | {
 30 | }
 31 | 
 32 | SG::Entropy::ArithmeticEncoder32::~ArithmeticEncoder32()
 33 | {
 34 | 	if(!Flushed) Flush();
 35 | }
 36 | 
 37 | void SG::Entropy::ArithmeticEncoder32::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange)
 38 | {
 39 | 	TempRange=(High-Low)+1;
 40 | 	High=Low + ((TempRange*SymbolHigh)/TotalRange)-1;
 41 | 	Low	=Low + ((TempRange*SymbolLow )/TotalRange);
 42 | 
 43 | 	for(;;)
 44 | 	{
 45 | 		if((High & 0x8000)==(Low & 0x8000))
 46 | 		{
 47 | 			Output.WriteBit(High>>15);
 48 | 			while(UnderflowCount)
 49 | 			{
 50 | 				Output.WriteBit((High>>15)^1);
 51 | 				UnderflowCount--;
 52 | 			}
 53 | 		}
 54 | 		else
 55 | 		{
 56 | 			if((Low	& 0x4000) && !(High	& 0x4000))
 57 | 			{
 58 | 				UnderflowCount++;
 59 | 
 60 | 				Low	 &=	0x3FFF;
 61 | 				High |=	0x4000;
 62 | 			}
 63 | 			else
 64 | 				return;
 65 | 		}
 66 | 
 67 | 		Low	=(Low<<1) &	0xFFFF;
 68 | 		High=((High<<1)|1) & 0xFFFF;
 69 | 	}
 70 | }
 71 | 
 72 | void SG::Entropy::ArithmeticEncoder32::Flush()
 73 | {
 74 | 	if(!Flushed)
 75 | 	{
 76 | 		Output.WriteBit((Low>>14)&1);
 77 | 		UnderflowCount++;
 78 | 
 79 | 		while(UnderflowCount)
 80 | 		{
 81 | 			Output.WriteBit(((Low>>14)^1)&1);
 82 | 			UnderflowCount--;
 83 | 		}
 84 | 
 85 | 		Output.Flush();
 86 | 		Flushed=true;
 87 | 	}
 88 | }
 89 | 
 90 | SG::Entropy::ArithmeticDecoder32::ArithmeticDecoder32(SG::io::BitInputStream &BitIStream) :
 91 | 	Code(0),
 92 | 	Input(BitIStream)
 93 | {
 94 | 	for(SG::FastInt I=0;I<16;I++)
 95 | 	{
 96 | 		Code<<=1;
 97 | 		Code+=Input.ReadBit();;
 98 | 	}
 99 | }
100 | 
101 | SG::DWord SG::Entropy::ArithmeticDecoder32::GetCurrentCount(SG::DWord TotalRange)
102 | {
103 | 	TempRange=(High-Low)+1;
104 | 	return (SG::DWord)(((((Code-Low)+1)*(SG::QWord)TotalRange)-1)/TempRange);
105 | }
106 | 
107 | void SG::Entropy::ArithmeticDecoder32::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange)
108 | {
109 | 	TempRange=(High-Low)+1;
110 | 	High=Low+((TempRange*SymbolHigh)/TotalRange)-1;
111 | 	Low	=Low+((TempRange*SymbolLow )/TotalRange);
112 | 
113 | 	for(;;)
114 | 	{
115 | 		if((High & 0x8000) == (Low & 0x8000))
116 | 		{
117 | 		}
118 | 		else
119 | 		{
120 | 			if((Low	& 0x4000) && !(High	& 0x4000))
121 | 			{
122 | 				Code ^=	0x4000;
123 | 				Low	 &=	0x3FFF;
124 | 				High |=	0x4000;
125 | 			}
126 | 			else
127 | 				return;
128 | 		}
129 | 		Low	 = (Low	<< 1) &	0xFFFF;
130 | 		High = ((High<<1) |	1) & 0xFFFF;
131 | 
132 | 		Code <<=1;
133 | 		Code|=Input.ReadBit();
134 | 		Code &=0xFFFF;
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/arith32.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef	__sg_entropy_arith32
15 | #define	__sg_entropy_arith32
16 | 
17 | #include "stdx/define.h"
18 | #include "stdx/bit.h"
19 | #include "io/bit_stream.h"
20 | 
21 | namespace SG
22 | {
23 | 	namespace Entropy
24 | 	{
25 | 		/*	Code for arithmetic coding
26 | 			derived from work by Mark Nelson, Tom st Denis, Charles Bloom
27 |             
28 | 			author : Sachin Garg
29 | 		*/
30 | 		class ArithmeticCoder32
31 | 		{
32 | 		public:
33 | 			static const SG::DWord MaxRange;
34 | 
35 | 		protected:
36 | 
37 | 			ArithmeticCoder32();
38 | 			SG::DWord High,Low,UnderflowCount;
39 | 			SG::DWord TempRange;
40 | 		};
41 | 
42 | 		class ArithmeticEncoder32 : public ArithmeticCoder32
43 | 		{
44 | 		private:
45 | 			SG::Boolean Flushed;
46 | 			SG::io::BitOutputStream &Output;
47 | 
48 | 		public:
49 | 			ArithmeticEncoder32(SG::io::BitOutputStream &BitOStream);
50 | 			~ArithmeticEncoder32();
51 | 
52 | 			void EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
53 | 			void Flush();
54 | 		};
55 | 
56 | 		class ArithmeticDecoder32 : public ArithmeticCoder32
57 | 		{
58 | 		private:
59 | 			SG::DWord Code;
60 | 			SG::io::BitInputStream &Input;
61 | 			
62 | 		public:
63 |             ArithmeticDecoder32(SG::io::BitInputStream &BitIStream);
64 | 
65 | 			SG::DWord GetCurrentCount(SG::DWord TotalRange);
66 | 			void RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
67 | 		};
68 | 	}
69 | }
70 | 
71 | #endif
72 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/arith64.cpp:
--------------------------------------------------------------------------------
  1 | //Entropy Coding Source code
  2 | //By Sachin Garg, 2006
  3 | //
  4 | //Includes range coder based upon the carry-less implementation 
  5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
  6 | //DDJ code.
  7 | // 
  8 | //Modified to use 64-bit variables for improved performance.
  9 | //32-bit reference implementations also included.
 10 | //
 11 | //For details:
 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
 13 | 
 14 | #include "entropy/arith64.h"
 15 | 
 16 | const SG::DWord	SG::Entropy::ArithmeticCoder64::MaxRange=0x3FFFFFFF;
 17 | 
 18 | SG::Entropy::ArithmeticCoder64::ArithmeticCoder64() :
 19 | 	High(0xFFFFFFFF),
 20 | 	Low(0),
 21 | 	UnderflowCount(0),
 22 | 	TempRange(0)
 23 | {
 24 | }
 25 | 
 26 | SG::Entropy::ArithmeticEncoder64::ArithmeticEncoder64(SG::io::BitOutputStream &BitOStream) :
 27 | 	Flushed(false),
 28 | 	Output(BitOStream)
 29 | {
 30 | }
 31 | 
 32 | SG::Entropy::ArithmeticEncoder64::~ArithmeticEncoder64()
 33 | {
 34 | 	if(!Flushed) Flush();
 35 | }
 36 | 
 37 | void SG::Entropy::ArithmeticEncoder64::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange)
 38 | {
 39 | 	TempRange=(High-Low)+1;
 40 | 	High=Low + ((TempRange*(SG::QWord)SymbolHigh)/TotalRange)-1;
 41 | 	Low	=Low + ((TempRange*(SG::QWord)SymbolLow )/TotalRange);
 42 | 
 43 | 	for(;;)
 44 | 	{
 45 | 		if((High & 0x80000000)==(Low & 0x80000000))
 46 | 		{
 47 | 			Output.WriteBit(High>>31);
 48 | 			while(UnderflowCount)
 49 | 			{
 50 | 				Output.WriteBit((High>>31)^1);
 51 | 				UnderflowCount--;
 52 | 			}
 53 | 		}
 54 | 		else
 55 | 		{
 56 | 			if((Low	& 0x40000000) && !(High	& 0x40000000))
 57 | 			{
 58 | 				UnderflowCount++;
 59 | 
 60 | 				Low	 &=	0x3FFFFFFF;
 61 | 				High |=	0x40000000;
 62 | 			}
 63 | 			else
 64 | 				return;
 65 | 		}
 66 | 
 67 | 		Low	=(Low<<1) &	0xFFFFFFFF;
 68 | 		High=((High<<1)|1) & 0xFFFFFFFF;
 69 | 	}
 70 | }
 71 | 
 72 | void SG::Entropy::ArithmeticEncoder64::Flush()
 73 | {
 74 | 	if(!Flushed)
 75 | 	{
 76 | 		Output.WriteBit((Low>>30)&1);
 77 | 		UnderflowCount++;
 78 | 
 79 | 		while(UnderflowCount)
 80 | 		{
 81 | 			Output.WriteBit(((Low>>30)^1)&1);
 82 | 			UnderflowCount--;
 83 | 		}
 84 | 
 85 | 		Output.Flush();
 86 | 		Flushed=true;
 87 | 	}
 88 | }
 89 | 
 90 | SG::Entropy::ArithmeticDecoder64::ArithmeticDecoder64(SG::io::BitInputStream &BitIStream) :
 91 | 	Code(0),
 92 | 	Input(BitIStream)
 93 | {
 94 | 	for(SG::FastInt I=0;I<32;I++)
 95 | 	{
 96 | 		Code<<=1;
 97 | 		Code+=Input.ReadBit();;
 98 | 	}
 99 | }
100 | 
101 | 
102 | 
103 | SG::DWord SG::Entropy::ArithmeticDecoder64::GetCurrentCount(SG::DWord TotalRange)
104 | {
105 | 	TempRange=(High-Low)+1;
106 | 	return (SG::DWord)(((((Code-Low)+1)*(SG::QWord)TotalRange)-1)/TempRange);
107 | }
108 | 
109 | void SG::Entropy::ArithmeticDecoder64::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange)
110 | {
111 | 	TempRange=(High-Low)+1;
112 | 	High=Low+((TempRange*(SG::QWord)SymbolHigh)/TotalRange)-1;
113 | 	Low	=Low+((TempRange*(SG::QWord)SymbolLow )/TotalRange);
114 | 
115 | 	for(;;)
116 | 	{
117 | 		if((High & 0x80000000) == (Low & 0x80000000))
118 | 		{
119 | 		}
120 | 		else
121 | 		{
122 | 			if((Low	& 0x40000000) && !(High	& 0x40000000))
123 | 			{
124 | 				Code ^=	0x40000000;
125 | 				Low	 &=	0x3FFFFFFF;
126 | 				High |=	0x40000000;
127 | 			}
128 | 			else
129 | 				return;
130 | 		}
131 | 		Low	 = (Low	<< 1) &	0xFFFFFFFF;
132 | 		High = ((High<<1) |	1) & 0xFFFFFFFF;
133 | 
134 | 		Code <<=1;
135 | 		Code|=Input.ReadBit();
136 | 		Code &=0xFFFFFFFF;
137 | 	}
138 | }
139 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/arith64.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef	__sg_entropy_arith64
15 | #define	__sg_entropy_arith64
16 | 
17 | #include "stdx/define.h"
18 | #include "stdx/bit.h"
19 | #include "io/bit_stream.h"
20 | 
21 | namespace SG
22 | {
23 | 	namespace Entropy
24 | 	{
25 | 		/*	Code for arithmetic coding, derived from work by Mark Nelson, Tom st Denis, Charles Bloom
26 | 			Modified to use 64-bit integer maths, for increased precision
27 |             
28 | 			author : Sachin Garg
29 | 		*/
30 | 		class ArithmeticCoder64
31 | 		{
32 | 		public:
33 | 			static const SG::DWord MaxRange;
34 | 
35 | 		protected:
36 | 
37 | 			ArithmeticCoder64();
38 | 			SG::QWord	High,Low,UnderflowCount;
39 | 			SG::QWord	TempRange;
40 | 		};
41 | 
42 | 		class ArithmeticEncoder64 : public ArithmeticCoder64
43 | 		{
44 | 		private:
45 | 			SG::Boolean Flushed;
46 | 			SG::io::BitOutputStream &Output;
47 | 
48 | 		public:
49 | 			ArithmeticEncoder64(SG::io::BitOutputStream &BitOStream);
50 | 			~ArithmeticEncoder64();
51 | 
52 | 			void EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
53 | 			void Flush();
54 | 		};
55 | 
56 | 		class ArithmeticDecoder64 : public ArithmeticCoder64
57 | 		{
58 | 		private:
59 | 			SG::QWord Code;
60 | 			SG::io::BitInputStream &Input;
61 | 
62 | 		public:
63 | 			ArithmeticDecoder64(SG::io::BitInputStream &BitIStream);
64 | 
65 | 			SG::DWord GetCurrentCount(SG::DWord TotalRange);
66 | 			void RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
67 | 		};
68 | 	}
69 | }
70 | #endif
71 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/range32.cpp:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #include "entropy/range32.h"
15 | 
16 | const SG::DWord	SG::Entropy::RangeCoder32::Top=(SG::DWord)1<<24;
17 | const SG::DWord	SG::Entropy::RangeCoder32::Bottom=(SG::DWord)1<<16;
18 | const SG::DWord	SG::Entropy::RangeCoder32::MaxRange=Bottom;
19 | 
20 | SG::Entropy::RangeCoder32::RangeCoder32() :
21 | 	Low(0),
22 | 	Range((SG::DWord)-1)
23 | {
24 | }
25 | 
26 | SG::Entropy::RangeEncoder32::RangeEncoder32(SG::io::OutputStream &OStream) :
27 | 	Flushed(false),
28 | 	Output(OStream)
29 | {
30 | }
31 | 
32 | SG::Entropy::RangeEncoder32::~RangeEncoder32()
33 | {
34 | 	if (!Flushed) Flush();
35 | }
36 | 
37 | void SG::Entropy::RangeEncoder32::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange)
38 | {
39 | 	Low += SymbolLow*(Range/=TotalRange);
40 | 	Range *= SymbolHigh-SymbolLow;
41 | 
42 | 	while ((Low ^ (Low+Range))<Top || Range<Bottom && ((Range= -Low & (Bottom-1)),1))
43 | 	{
44 | 		Output.WriteByte(Low>>24), Range<<=8, Low<<=8;
45 | 	}
46 | }
47 | 
48 | void SG::Entropy::RangeEncoder32::Flush()
49 | {
50 | 	if(!Flushed)
51 | 	{
52 | 		for(SG::FastInt i=0;i<4;i++)
53 | 		{
54 | 			Output.WriteByte(Low>>24);
55 | 			Low<<=8;
56 | 		}
57 | 
58 | 		Flushed=true;
59 | 	}
60 | }
61 | 
62 | SG::Entropy::RangeDecoder32::RangeDecoder32(SG::io::InputStream &IStream) :
63 | 	Code(0),
64 | 	Input(IStream)
65 | {
66 | 	for(SG::FastInt i=0;i<4;i++)
67 | 	{
68 | 		Code = (Code << 8) | Input.ReadByte();
69 | 	}
70 | }
71 | 
72 | SG::DWord SG::Entropy::RangeDecoder32::GetCurrentCount(SG::DWord TotalRange)
73 | {
74 | 	return (Code-Low)/(Range/=TotalRange);
75 | }
76 | 
77 | void SG::Entropy::RangeDecoder32::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord /*TotalRange*/)
78 | {
79 | 	Low += SymbolLow*Range;
80 | 	Range *= SymbolHigh-SymbolLow;
81 | 
82 | 	while ((Low ^ Low+Range)<Top || Range<Bottom && ((Range= -Low & Bottom-1),1))
83 | 	{
84 | 		Code= Code<<8 | Input.ReadByte(), Range<<=8, Low<<=8;
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/range32.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef	__sg_entropy_range32
15 | #define	__sg_entropy_range32
16 | 
17 | #include "stdx/define.h"
18 | #include "stdx/bit.h"
19 | #include "io/stream.h"
20 | 
21 | namespace SG
22 | {
23 | 	namespace Entropy
24 | 	{
25 | 		/*	Code for range coding, derived from public domain work by Dmitry Subbotin
26 | 			Using 32-bit integer maths
27 |             
28 | 			author : Sachin Garg
29 | 		*/
30 | 		class RangeCoder32
31 | 		{
32 | 		public:
33 | 			static const SG::DWord MaxRange;
34 | 		protected:
35 | 
36 | 			RangeCoder32();
37 | 			static const SG::DWord Top,Bottom;
38 | 			SG::DWord Low,Range;
39 | 		};
40 | 
41 | 		class RangeEncoder32:public RangeCoder32
42 | 		{
43 | 		private:
44 | 			SG::Boolean Flushed;
45 | 			SG::io::OutputStream &Output;
46 | 
47 | 		public:
48 | 			RangeEncoder32(SG::io::OutputStream &OStream);
49 | 			~RangeEncoder32();
50 | 
51 | 			void EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
52 | 			void Flush();
53 | 		};
54 | 
55 | 		class RangeDecoder32:public RangeCoder32
56 | 		{
57 | 		private:
58 | 			SG::DWord Code;
59 | 			SG::io::InputStream &Input;
60 | 
61 | 		public:
62 | 			RangeDecoder32(SG::io::InputStream &IStream);
63 | 
64 | 			SG::DWord GetCurrentCount(SG::DWord TotalRange);
65 | 			void RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
66 | 		};
67 | 	}
68 | }
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/range64.cpp:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #include "entropy/range64.h"
15 | 
16 | const SG::QWord SG::Entropy::RangeCoder64::Top=(SG::QWord)1<<56;
17 | const SG::QWord SG::Entropy::RangeCoder64::Bottom=(SG::QWord)1<<48;
18 | const SG::QWord SG::Entropy::RangeCoder64::MaxRange=Bottom;
19 | 
20 | SG::Entropy::RangeCoder64::RangeCoder64() :
21 | 	Low(0),
22 | 	Range((SG::QWord)-1)
23 | {
24 | }
25 | 
26 | SG::Entropy::RangeEncoder64::RangeEncoder64(SG::io::OutputStream &OStream) :
27 | 	Flushed(false),
28 | 	Output(OStream)
29 | {
30 | }
31 | 
32 | SG::Entropy::RangeEncoder64::~RangeEncoder64()
33 | {
34 | 	if (!Flushed) Flush();
35 | }
36 | 
37 | void SG::Entropy::RangeEncoder64::EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange)
38 | {
39 | 	Low += SymbolLow*(Range/=TotalRange);
40 | 	Range *= SymbolHigh-SymbolLow;
41 | 
42 | 	while ((Low ^ (Low+Range))<Top || Range<Bottom && ((Range= -Low & (Bottom-1)),1))
43 | 	{
44 | 		Output.WriteByte(Low>>56), Range<<=8, Low<<=8;
45 | 	}
46 | }
47 | 
48 | void SG::Entropy::RangeEncoder64::Flush()
49 | {
50 | 	if(!Flushed)
51 | 	{
52 | 		for(SG::FastInt i=0;i<8;i++)
53 | 		{
54 | 			Output.WriteByte(Low>>56);
55 | 			Low<<=8;
56 | 		}
57 | 		Flushed=true;
58 | 	}
59 | }
60 | 
61 | SG::Entropy::RangeDecoder64::RangeDecoder64(SG::io::InputStream &IStream) :
62 | 	Code(0),
63 | 	Input(IStream)
64 | {
65 | 	for(SG::FastInt i=0;i<8;i++)
66 | 	{
67 | 		Code = (Code << 8) | Input.ReadByte();
68 | 	}
69 | }
70 | 
71 | SG::DWord SG::Entropy::RangeDecoder64::GetCurrentCount(SG::DWord TotalRange)
72 | {
73 | 	return (Code-Low)/(Range/=TotalRange);
74 | }
75 | 
76 | void SG::Entropy::RangeDecoder64::RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord /*TotalRange*/)
77 | {
78 | 	Low += SymbolLow*Range;
79 | 	Range *= SymbolHigh-SymbolLow;
80 | 
81 | 	while ((Low ^ Low+Range)<Top || Range<Bottom && ((Range= -Low & Bottom-1),1))
82 | 	{
83 | 		Code= Code<<8 | Input.ReadByte(), Range<<=8, Low<<=8;
84 | 	}
85 | }
86 | 


--------------------------------------------------------------------------------
/external/sg-entropy/entropy/range64.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef	__sg_entropy_range64
15 | #define	__sg_entropy_range64
16 | 
17 | #include "stdx/define.h"
18 | #include "io/stream.h"
19 | 
20 | namespace SG
21 | {
22 | 	namespace Entropy
23 | 	{
24 | 		/*	Code for range coding, derived from public domain work by Dmitry Subbotin
25 | 			Modified to use 64-bit integer maths, for increased precision
26 | 
27 | 			Note :	Cannot be used at full 'capacity' as the interface still takes DWord parameters (not QWord)
28 | 					This is done to maintain uniformity in interface across all entropy coders, feel free to 
29 | 					change this.
30 |             
31 | 			author : Sachin Garg
32 | 		*/
33 | 		class RangeCoder64
34 | 		{
35 | 		public:
36 | 			static const SG::QWord MaxRange;
37 | 
38 | 		protected:
39 | 
40 | 			RangeCoder64();
41 | 			static const SG::QWord Top,Bottom;
42 | 			SG::QWord Low,Range;
43 | 		};
44 | 
45 | 		class RangeEncoder64:public RangeCoder64
46 | 		{
47 | 		private:
48 | 			SG::Boolean Flushed;
49 | 			SG::io::OutputStream &Output;
50 | 
51 | 		public:
52 | 			RangeEncoder64(SG::io::OutputStream &OStream);
53 | 			~RangeEncoder64();
54 | 
55 | 			void EncodeRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
56 | 			void Flush();
57 | 		};
58 | 
59 | 		class RangeDecoder64:public RangeCoder64
60 | 		{
61 | 		private:
62 | 			SG::QWord Code;
63 | 			SG::io::InputStream &Input;
64 | 
65 | 		public:
66 | 			RangeDecoder64(SG::io::InputStream &IStream);
67 | 
68 | 			SG::DWord GetCurrentCount(SG::DWord TotalRange);
69 | 			void RemoveRange(SG::DWord SymbolLow,SG::DWord SymbolHigh,SG::DWord TotalRange);
70 | 		};
71 | 	}
72 | }
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/external/sg-entropy/io/bit_stream.cpp:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #include "stdx/bit.h"
15 | #include "io/bit_stream.h"
16 | 
17 | SG::io::BitInputStream::BitInputStream(SG::io::InputStream &Under) :
18 | 	_Under(Under),
19 | 	_Position(0)
20 | {
21 |     _Buffer=_Under.ReadByte(); 
22 | }
23 | 
24 | SG::Binary SG::io::BitInputStream::ReadBit()
25 | {
26 | 	SG::Binary Result=stdx::GetBit(_Position,_Buffer);
27 | 
28 | 	if(_Position==7)
29 | 	{
30 | 		_Position=0;
31 | 		_Buffer=_Under.ReadByte();
32 | 	}
33 | 	else
34 | 	{
35 |         ++_Position;
36 | 	}
37 | 
38 |     return Result;
39 | }
40 | 
41 | SG::io::BitOutputStream::BitOutputStream(SG::io::OutputStream &Under) :
42 | 	_Under(Under),
43 | 	_Position(0)
44 | {
45 | }
46 | 
47 | SG::io::BitOutputStream::~BitOutputStream()
48 | {
49 | 	Flush();
50 | }
51 | 
52 | void SG::io::BitOutputStream::WriteBit(SG::Binary Value)
53 | {
54 | 	stdx::SetBit(_Position,_Buffer,Value);
55 | 
56 | 	if(_Position==7)
57 | 	{
58 | 		_Position=0;
59 | 		_Under.WriteByte(_Buffer);
60 | 	}
61 | 	else
62 | 	{
63 | 		++_Position;
64 | 	}
65 | }
66 | 
67 | void SG::io::BitOutputStream::Flush()
68 | {
69 | 	if(_Position!=0)
70 | 	{
71 | 		_Under.WriteByte(_Buffer);
72 | 		_Position=0;
73 | 		_Buffer=0;
74 | 	}
75 | }
76 | 


--------------------------------------------------------------------------------
/external/sg-entropy/io/bit_stream.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef __sg_io_bit_stream
15 | #define __sg_io_bit_stream
16 | 
17 | #include "stdx/define.h"
18 | #include "io/stream.h"
19 | 
20 | namespace SG
21 | {
22 | 	namespace io
23 | 	{
24 | 		class BitInputStream
25 | 		{
26 | 		protected:
27 | 			SG::Byte _Position;
28 | 			SG::Byte _Buffer;
29 | 			SG::io::InputStream &_Under;
30 |                         
31 | 		public:
32 | 			BitInputStream(SG::io::InputStream &Under);
33 | 
34 | 			SG::Binary ReadBit();
35 | 			SG::Boolean Ended();
36 | 		};
37 | 
38 | 		class BitOutputStream
39 | 		{
40 | 		protected:
41 | 
42 | 			SG::Byte _Position;
43 | 			SG::Byte _Buffer;
44 | 			SG::io::OutputStream &_Under;
45 | 
46 | 		public:
47 | 			BitOutputStream(SG::io::OutputStream &Under);
48 | 			~BitOutputStream();
49 | 
50 | 			void WriteBit(SG::Binary Value);
51 | 			void Flush();
52 | 		};
53 | 	}
54 | }
55 | 
56 | #endif
57 | 


--------------------------------------------------------------------------------
/external/sg-entropy/io/stream.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef __sg_io_stream
15 | #define __sg_io_stream
16 | 
17 | #include "stdx/define.h"
18 | 
19 | namespace SG
20 | {
21 | 	namespace io
22 | 	{
23 | 		class InputStream
24 | 		{
25 | 		protected:
26 | 			
27 | 		public:
28 | 			InputStream(){}
29 | 			virtual ~InputStream(){}
30 | 
31 | 			virtual int ReadByte()=0;
32 | 			virtual SG::Boolean Ended()=0;
33 | 		};
34 | 
35 | 		class OutputStream
36 | 		{
37 | 		protected:
38 | 
39 | 		public:
40 | 			OutputStream(){}
41 | 			virtual ~OutputStream(){Flush();}
42 | 
43 | 			virtual void WriteByte(SG::Byte Value)=0;
44 | 			virtual void Flush(){};
45 | 		};
46 | 	}
47 | }
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/external/sg-entropy/io/stream_array.cpp:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #include "io/stream_array.h"
15 | #include "stdx/exception.h"
16 | 
17 | SG::io::ArrayInputStream::ArrayInputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset) :
18 | 	_Array(Array),
19 | 	_Size(Size),
20 | 	_Offset(InitialOffset)
21 | {
22 | }
23 | 
24 | int SG::io::ArrayInputStream::ReadByte()
25 | {
26 | 	int Result=_Offset<_Size ? _Array[_Offset]:(-1);
27 | 	++_Offset;
28 | 	return Result;
29 | }
30 | 
31 | SG::Boolean SG::io::ArrayInputStream::Ended()
32 | {
33 | 	return _Offset>=_Size;
34 | }
35 | 
36 | SG::io::ArrayOutputStream::ArrayOutputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset) :
37 | 	_Array(Array),
38 | 	_Size(Size),
39 | 	_Offset(InitialOffset)
40 | {
41 | }
42 | 
43 | void SG::io::ArrayOutputStream::WriteByte(SG::Byte Value)
44 | {
45 | 	if(_Offset>=_Size) throw stdx::Exception("Buffer Overflow","SG::io::ArrayOutputStream::WriteByte");
46 | 	_Array[_Offset]=Value;
47 | 	++_Offset;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/external/sg-entropy/io/stream_array.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef __sg_io_streamarray
15 | #define __sg_io_streamarray
16 | 
17 | #include "stdx/define.h"
18 | #include "stdx/bit.h"
19 | #include "io/stream.h"
20 | 
21 | namespace SG
22 | {
23 | 	namespace io
24 | 	{
25 | 		class ArrayInputStream : public SG::io::InputStream
26 | 		{
27 | 		private:
28 | 			SG::Counter _Offset;
29 | 			SG::Byte *_Array;
30 | 			SG::Counter _Size;
31 | 
32 | 		public:
33 | 			ArrayInputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset=0);
34 | 			~ArrayInputStream(){}
35 | 
36 | 			int ReadByte();
37 | 			SG::Boolean Ended();
38 | 
39 | 			SG::Counter Tell(){return _Offset;}
40 | 			void Seek(SG::Counter Offset){_Offset=Offset;}
41 | 		};
42 | 
43 | 		class ArrayOutputStream : public SG::io::OutputStream
44 | 		{
45 | 		private:
46 | 			SG::Counter _Offset;
47 | 			SG::Byte *_Array;
48 | 			SG::Counter _Size;
49 | 
50 | 		public:
51 | 			ArrayOutputStream(SG::Byte *Array,SG::Counter Size,SG::Counter InitialOffset=0);
52 | 			~ArrayOutputStream(){}
53 | 
54 | 			void WriteByte(SG::Byte Value);
55 | 			void Flush(){};
56 | 
57 | 			SG::Counter Tell(){return _Offset;}
58 | 			void Seek(SG::Counter Offset){_Offset=Offset;}
59 | 		};
60 | 	}
61 | }
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/external/sg-entropy/license.txt:
--------------------------------------------------------------------------------
1 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
2 | 
3 |    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
4 |    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
5 |    3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.
6 | 
7 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/external/sg-entropy/order0test_arith32.cpp:
--------------------------------------------------------------------------------
  1 | //Entropy Coding Source code
  2 | //By Sachin Garg, 2006
  3 | //
  4 | //Includes range coder based upon the carry-less implementation 
  5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
  6 | //DDJ code.
  7 | // 
  8 | //Modified to use 64-bit variables for improved performance.
  9 | //32-bit reference implementations also included.
 10 | //
 11 | //For details:
 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
 13 | 
 14 | #include <iostream>
 15 | #include <fstream>
 16 | #include <time.h>
 17 | #include "stdx/define.h"
 18 | 
 19 | #include "io/bit_stream.h"
 20 | #include "io/stream_array.h"
 21 | 
 22 | #include "entropy/arith32.h"
 23 | 
 24 | using namespace std;
 25 | using namespace SG;
 26 | 
 27 | void Rescale(Counter *Frequency) {
 28 | 	for(int i=1;i<=256;i++) {
 29 | 		Frequency[i]/=2;
 30 | 		if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1;
 31 | 	}
 32 | }
 33 | 
 34 | //A quick test for entropty coders. Uses order-0 model.
 35 | int main(int argc,char *argv[])
 36 | {
 37 | 	fstream Fin,Fout;
 38 | 	Counter FileSizeB;
 39 | 
 40 | 	Counter Seconds, OutputSize;
 41 | 
 42 | 	if(argc!=4)
 43 | 	{
 44 | 		cerr<<"Usage: c|d InputFileName OutputFileName\n"
 45 | 			<<"c: compress\n"
 46 | 			<<"d: decompress\n";
 47 | 		return 1;
 48 | 	}
 49 | 
 50 | 	Fin.open(argv[2],ios::in|ios::binary);
 51 | 	if(!Fin.good())	{	cerr<<"File not found\n";	return 1;	}
 52 | 
 53 | 	Fin.seekg(0,ios::end);
 54 | 	FileSizeB=Fin.tellg();
 55 | 	Fin.seekg(0,ios::beg);
 56 | 
 57 | 	Byte *InputFile=new Byte[(DWord)(FileSizeB)];
 58 | 	Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)];
 59 | 	if(InputFile==NULL||OutputFile==NULL)	{ cerr<<"Memory allocation error\n";	return 1;	}
 60 | 
 61 | 	Fin.read((char *)InputFile,FileSizeB);
 62 | 
 63 | 	Fout.open(argv[3],ios::out|ios::binary);
 64 | 	if(!Fout.good()) {	cerr<<"Error creating file\n";	return 1;	}
 65 | 
 66 | 	Seconds=clock();
 67 | 
 68 | 	if(argv[1][0]=='c')
 69 | 	{
 70 | 		cout<<"Compressing...\n";
 71 | 
 72 | 		io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB));
 73 | 		io::ArrayOutputStream  OutputStream(OutputFile,(DWord)(FileSizeB+2000000));
 74 | 		io::BitOutputStream  BitStream(OutputStream);
 75 | 
 76 | 		for(int i=0;i<sizeof(Counter);i++) OutputStream.WriteByte(((Byte*)&FileSizeB)[i]);
 77 | 
 78 | 		SG::Entropy::ArithmeticEncoder32 EntropyCoder(BitStream);
 79 | 
 80 |         Counter Freq[257];
 81 | 		for(int i=0;i<257;i++) Freq[i]=i;
 82 | 
 83 | 		for(int i=0;i<FileSizeB;i++)
 84 | 		{
 85 | 			Byte ch=ByteStream.ReadByte();
 86 | 			EntropyCoder.EncodeRange(Freq[ch],Freq[ch+1],Freq[256]);
 87 | 
 88 | 			for(int j=ch+1;j<257;j++) Freq[j]++;	
 89 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
 90 | 		}
 91 | 		EntropyCoder.Flush();
 92 | 		BitStream.Flush();
 93 | 		OutputSize = OutputStream.Tell();
 94 | 
 95 | 		cout<<OutputSize<<"/"<<FileSizeB<<"\n";
 96 | 
 97 | 		Fout.write((char*)OutputFile,OutputSize);
 98 | 	}
 99 | 	else if (argv[1][0]=='d')
100 | 	{
101 | 		cout<<"Decompressing...\n";
102 | 
103 | 		io::ArrayInputStream InputStream(InputFile,(DWord)(FileSizeB));
104 | 		io::ArrayOutputStream ByteStream(OutputFile,(DWord)(FileSizeB+2000000));
105 | 
106 | 		for(int i=0;i<sizeof(Counter);i++) ((Byte *)&OutputSize)[i]=InputStream.ReadByte();
107 | 		io::BitInputStream BitStream(InputStream);
108 | 
109 | 		SG::Entropy::ArithmeticDecoder32 EntropyCoder(BitStream);
110 | 		
111 | 		Counter Freq[257];
112 | 		for(int i=0;i<=256;i++) Freq[i]=i;
113 | 
114 | 		for(int i=0;i<OutputSize;i++)
115 | 		{
116 | 			Counter Count = EntropyCoder.GetCurrentCount(Freq[256]);
117 | 
118 | 			Byte Symbol;
119 | 			for(Symbol=255;Freq[Symbol]>Count;Symbol--);
120 | 			//Symbol--;
121 | 
122 | 			ByteStream.WriteByte(Symbol);
123 | 			EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]);
124 | 
125 | 			for(int j=Symbol+1;j<257;j++) Freq[j]++;
126 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
127 | 		}
128 | 
129 | 		Fout.write((char*)OutputFile,OutputSize);
130 | 	}
131 | 	else
132 | 	{
133 |         cerr<<"Invalid parameter\n";
134 | 		return 1;
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/external/sg-entropy/order0test_arith64.cpp:
--------------------------------------------------------------------------------
  1 | //Entropy Coding Source code
  2 | //By Sachin Garg, 2006
  3 | //
  4 | //Includes range coder based upon the carry-less implementation 
  5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
  6 | //DDJ code.
  7 | // 
  8 | //Modified to use 64-bit variables for improved performance.
  9 | //32-bit reference implementations also included.
 10 | //
 11 | //For details:
 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
 13 | 
 14 | #include <iostream>
 15 | #include <fstream>
 16 | #include <time.h>
 17 | #include "stdx/define.h"
 18 | 
 19 | #include "io/bit_stream.h"
 20 | #include "io/stream_array.h"
 21 | 
 22 | #include "entropy/arith64.h"
 23 | 
 24 | using namespace std;
 25 | using namespace SG;
 26 | 
 27 | void Rescale(Counter *Frequency) {
 28 | 	for(int i=1;i<=256;i++) {
 29 | 		Frequency[i]/=2;
 30 | 		if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1;
 31 | 	}
 32 | }
 33 | 
 34 | //A quick test for entropty coders. Uses order-0 model.
 35 | int main(int argc,char *argv[])
 36 | {
 37 | 	fstream Fin,Fout;
 38 | 	Counter FileSizeB;
 39 | 
 40 | 	Counter Seconds, OutputSize;
 41 | 
 42 | 	if(argc!=4)
 43 | 	{
 44 | 		cerr<<"Usage: c|d InputFileName OutputFileName\n"
 45 | 			<<"c: compress\n"
 46 | 			<<"d: decompress\n";
 47 | 		return 1;
 48 | 	}
 49 | 
 50 | 	Fin.open(argv[2],ios::in|ios::binary);
 51 | 	if(!Fin.good())	{	cerr<<"File not found\n";	return 1;	}
 52 | 
 53 | 	Fin.seekg(0,ios::end);
 54 | 	FileSizeB=Fin.tellg();
 55 | 	Fin.seekg(0,ios::beg);
 56 | 
 57 | 	Byte *InputFile=new Byte[(DWord)(FileSizeB)];
 58 | 	Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)];
 59 | 	if(InputFile==NULL||OutputFile==NULL)	{ cerr<<"Memory allocation error\n";	return 1;	}
 60 | 
 61 | 	Fin.read((char *)InputFile,FileSizeB);
 62 | 
 63 | 	Fout.open(argv[3],ios::out|ios::binary);
 64 | 	if(!Fout.good()) {	cerr<<"Error creating file\n";	return 1;	}
 65 | 
 66 | 	Seconds=clock();
 67 | 
 68 | 	if(argv[1][0]=='c')
 69 | 	{
 70 | 		cout<<"Compressing...\n";
 71 | 
 72 | 		io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB));
 73 | 		io::ArrayOutputStream  OutputStream(OutputFile,(DWord)(FileSizeB+2000000));
 74 | 		io::BitOutputStream  BitStream(OutputStream);
 75 | 
 76 | 		for(int i=0;i<sizeof(Counter);i++) OutputStream.WriteByte(((Byte*)&FileSizeB)[i]);
 77 | 
 78 | 		SG::Entropy::ArithmeticEncoder64 EntropyCoder(BitStream);
 79 | 
 80 |         Counter Freq[257];
 81 | 		for(int i=0;i<257;i++) Freq[i]=i;
 82 | 
 83 | 		for(int i=0;i<FileSizeB;i++)
 84 | 		{
 85 | 			Byte ch=ByteStream.ReadByte();
 86 | 			EntropyCoder.EncodeRange(Freq[ch],Freq[ch+1],Freq[256]);
 87 | 
 88 | 			for(int j=ch+1;j<257;j++) Freq[j]++;	
 89 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
 90 | 		}
 91 | 		EntropyCoder.Flush();
 92 | 		BitStream.Flush();
 93 | 		OutputSize = OutputStream.Tell();
 94 | 
 95 | 		cout<<OutputSize<<"/"<<FileSizeB<<"\n";
 96 | 
 97 | 		Fout.write((char*)OutputFile,OutputSize);
 98 | 	}
 99 | 	else if (argv[1][0]=='d')
100 | 	{
101 | 		cout<<"Decompressing...\n";
102 | 
103 | 		io::ArrayInputStream InputStream(InputFile,(DWord)(FileSizeB));
104 | 		io::ArrayOutputStream ByteStream(OutputFile,(DWord)(FileSizeB+2000000));
105 | 
106 | 		for(int i=0;i<sizeof(Counter);i++) ((Byte *)&OutputSize)[i]=InputStream.ReadByte();
107 | 		io::BitInputStream BitStream(InputStream);
108 | 
109 | 		SG::Entropy::ArithmeticDecoder64 EntropyCoder(BitStream);
110 | 		
111 | 		Counter Freq[257];
112 | 		for(int i=0;i<=256;i++) Freq[i]=i;
113 | 
114 | 		for(int i=0;i<OutputSize;i++)
115 | 		{
116 | 			Counter Count = EntropyCoder.GetCurrentCount(Freq[256]);
117 | 
118 | 			Byte Symbol;
119 | 			for(Symbol=255;Freq[Symbol]>Count;Symbol--);
120 | 			//Symbol--;
121 | 
122 | 			ByteStream.WriteByte(Symbol);
123 | 			EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]);
124 | 
125 | 			for(int j=Symbol+1;j<257;j++) Freq[j]++;
126 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
127 | 		}
128 | 
129 | 		Fout.write((char*)OutputFile,OutputSize);
130 | 	}
131 | 	else
132 | 	{
133 |         cerr<<"Invalid parameter\n";
134 | 		return 1;
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/external/sg-entropy/order0test_range32.cpp:
--------------------------------------------------------------------------------
  1 | //Entropy Coding Source code
  2 | //By Sachin Garg, 2006
  3 | //
  4 | //Includes range coder based upon the carry-less implementation 
  5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
  6 | //DDJ code.
  7 | // 
  8 | //Modified to use 64-bit variables for improved performance.
  9 | //32-bit reference implementations also included.
 10 | //
 11 | //For details:
 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
 13 | 
 14 | #include <iostream>
 15 | #include <fstream>
 16 | #include <time.h>
 17 | #include "stdx/define.h"
 18 | 
 19 | #include "io/bit_stream.h"
 20 | #include "io/stream_array.h"
 21 | 
 22 | #include "entropy/range32.h"
 23 | 
 24 | using namespace std;
 25 | using namespace SG;
 26 | 
 27 | void Rescale(Counter *Frequency) {
 28 | 	for(int i=1;i<=256;i++) {
 29 | 		Frequency[i]/=2;
 30 | 		if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1;
 31 | 	}
 32 | }
 33 | 
 34 | //A quick test for entropty coders. Uses order-0 model.
 35 | int main(int argc,char *argv[])
 36 | {
 37 | 	fstream Fin,Fout;
 38 | 	Counter FileSizeB;
 39 | 
 40 | 	Counter Seconds, OutputSize;
 41 | 
 42 | 	if(argc!=4)
 43 | 	{
 44 | 		cerr<<"Usage: c|d InputFileName OutputFileName\n"
 45 | 			<<"c: compress\n"
 46 | 			<<"d: decompress\n";
 47 | 		return 1;
 48 | 	}
 49 | 
 50 | 	Fin.open(argv[2],ios::in|ios::binary);
 51 | 	if(!Fin.good())	{	cerr<<"File not found\n";	return 1;	}
 52 | 
 53 | 	Fin.seekg(0,ios::end);
 54 | 	FileSizeB=Fin.tellg();
 55 | 	Fin.seekg(0,ios::beg);
 56 | 
 57 | 	Byte *InputFile=new Byte[(DWord)(FileSizeB)];
 58 | 	Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)];
 59 | 	if(InputFile==NULL||OutputFile==NULL)	{ cerr<<"Memory allocation error\n";	return 1;	}
 60 | 
 61 | 	Fin.read((char *)InputFile,FileSizeB);
 62 | 
 63 | 	Fout.open(argv[3],ios::out|ios::binary);
 64 | 	if(!Fout.good()) {	cerr<<"Error creating file\n";	return 1;	}
 65 | 
 66 | 	Seconds=clock();
 67 | 
 68 | 	if(argv[1][0]=='c')
 69 | 	{
 70 | 		cout<<"Compressing...\n";
 71 | 
 72 | 		io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB));
 73 | 		io::ArrayOutputStream  OutputStream(OutputFile,(DWord)(FileSizeB+2000000));
 74 | 
 75 | 		for(int i=0;i<sizeof(Counter);i++) OutputStream.WriteByte(((Byte*)&FileSizeB)[i]);
 76 | 
 77 | 		SG::Entropy::RangeEncoder32 EntropyCoder(OutputStream);
 78 | 
 79 |         Counter Freq[257];
 80 | 		for(int i=0;i<257;i++) Freq[i]=i;
 81 | 
 82 | 		for(int i=0;i<FileSizeB;i++)
 83 | 		{
 84 | 			Byte ch=ByteStream.ReadByte();
 85 | 			EntropyCoder.EncodeRange(Freq[ch],Freq[ch+1],Freq[256]);
 86 | 
 87 | 			for(int j=ch+1;j<257;j++) Freq[j]++;	
 88 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
 89 | 		}
 90 | 		EntropyCoder.Flush();
 91 | 
 92 | 		OutputSize = OutputStream.Tell();
 93 | 
 94 | 		cout<<OutputSize<<"/"<<FileSizeB<<"\n";
 95 | 
 96 | 		Fout.write((char*)OutputFile,OutputSize);
 97 | 	}
 98 | 	else if (argv[1][0]=='d')
 99 | 	{
100 | 		cout<<"Decompressing...\n";
101 | 
102 | 		io::ArrayInputStream InputStream(InputFile,(DWord)(FileSizeB));
103 | 		io::ArrayOutputStream ByteStream(OutputFile,(DWord)(FileSizeB+2000000));
104 | 
105 | 		for(int i=0;i<sizeof(Counter);i++) ((Byte *)&OutputSize)[i]=InputStream.ReadByte();
106 | 
107 | 		SG::Entropy::RangeDecoder32 EntropyCoder(InputStream);
108 | 		
109 | 		Counter Freq[257];
110 | 		for(int i=0;i<=256;i++) Freq[i]=i;
111 | 
112 | 		for(int i=0;i<OutputSize;i++)
113 | 		{
114 | 			Counter Count = EntropyCoder.GetCurrentCount(Freq[256]);
115 | 
116 | 			Byte Symbol;
117 | 			for(Symbol=255;Freq[Symbol]>Count;Symbol--);
118 | 			//Symbol--;
119 | 
120 | 			ByteStream.WriteByte(Symbol);
121 | 			EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]);
122 | 
123 | 			for(int j=Symbol+1;j<257;j++) Freq[j]++;
124 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
125 | 		}
126 | 
127 | 		Fout.write((char*)OutputFile,OutputSize);
128 | 	}
129 | 	else
130 | 	{
131 |         cerr<<"Invalid parameter\n";
132 | 		return 1;
133 | 	}
134 | }
135 | 


--------------------------------------------------------------------------------
/external/sg-entropy/order0test_range64.cpp:
--------------------------------------------------------------------------------
  1 | //Entropy Coding Source code
  2 | //By Sachin Garg, 2006
  3 | //
  4 | //Includes range coder based upon the carry-less implementation 
  5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
  6 | //DDJ code.
  7 | // 
  8 | //Modified to use 64-bit variables for improved performance.
  9 | //32-bit reference implementations also included.
 10 | //
 11 | //For details:
 12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
 13 | 
 14 | #include <iostream>
 15 | #include <fstream>
 16 | #include <time.h>
 17 | #include "stdx/define.h"
 18 | 
 19 | #include "io/bit_stream.h"
 20 | #include "io/stream_array.h"
 21 | 
 22 | #include "entropy/range64.h"
 23 | 
 24 | using namespace std;
 25 | using namespace SG;
 26 | 
 27 | void Rescale(Counter *Frequency) {
 28 | 	for(int i=1;i<=256;i++) {
 29 | 		Frequency[i]/=2;
 30 | 		if(Frequency[i]<=Frequency[i-1]) Frequency[i]=Frequency[i-1]+1;
 31 | 	}
 32 | }
 33 | 
 34 | //A quick test for entropty coders. Uses order-0 model.
 35 | int main(int argc,char *argv[])
 36 | {
 37 | 	fstream Fin,Fout;
 38 | 	Counter FileSizeB;
 39 | 
 40 | 	Counter Seconds, OutputSize;
 41 | 
 42 | 	if(argc!=4)
 43 | 	{
 44 | 		cerr<<"Usage: c|d InputFileName OutputFileName\n"
 45 | 			<<"c: compress\n"
 46 | 			<<"d: decompress\n";
 47 | 		return 1;
 48 | 	}
 49 | 
 50 | 	Fin.open(argv[2],ios::in|ios::binary);
 51 | 	if(!Fin.good())	{	cerr<<"File not found\n";	return 1;	}
 52 | 
 53 | 	Fin.seekg(0,ios::end);
 54 | 	FileSizeB=Fin.tellg();
 55 | 	Fin.seekg(0,ios::beg);
 56 | 
 57 | 	Byte *InputFile=new Byte[(DWord)(FileSizeB)];
 58 | 	Byte *OutputFile=new Byte[(DWord)(FileSizeB+2000000)];
 59 | 	if(InputFile==NULL||OutputFile==NULL)	{ cerr<<"Memory allocation error\n";	return 1;	}
 60 | 
 61 | 	Fin.read((char *)InputFile,FileSizeB);
 62 | 
 63 | 	Fout.open(argv[3],ios::out|ios::binary);
 64 | 	if(!Fout.good()) {	cerr<<"Error creating file\n";	return 1;	}
 65 | 
 66 | 	Seconds=clock();
 67 | 
 68 | 	if(argv[1][0]=='c')
 69 | 	{
 70 | 		cout<<"Compressing...\n";
 71 | 
 72 | 		io::ArrayInputStream ByteStream(InputFile,(DWord)(FileSizeB));
 73 | 		io::ArrayOutputStream  OutputStream(OutputFile,(DWord)(FileSizeB+2000000));
 74 | 
 75 | 		for(int i=0;i<sizeof(Counter);i++) OutputStream.WriteByte(((Byte*)&FileSizeB)[i]);
 76 | 
 77 | 		SG::Entropy::RangeEncoder64 EntropyCoder(OutputStream);
 78 | 
 79 |         Counter Freq[257];
 80 | 		for(int i=0;i<257;i++) Freq[i]=i;
 81 | 
 82 | 		for(int i=0;i<FileSizeB;i++)
 83 | 		{
 84 | 			Byte ch=ByteStream.ReadByte();
 85 | 			EntropyCoder.EncodeRange(Freq[ch],Freq[ch+1],Freq[256]);
 86 | 
 87 | 			for(int j=ch+1;j<257;j++) Freq[j]++;	
 88 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
 89 | 		}
 90 | 		EntropyCoder.Flush();
 91 | 
 92 | 		OutputSize = OutputStream.Tell();
 93 | 
 94 | 		cout<<OutputSize<<"/"<<FileSizeB<<"\n";
 95 | 
 96 | 		Fout.write((char*)OutputFile,OutputSize);
 97 | 	}
 98 | 	else if (argv[1][0]=='d')
 99 | 	{
100 | 		cout<<"Decompressing...\n";
101 | 
102 | 		io::ArrayInputStream InputStream(InputFile,(DWord)(FileSizeB));
103 | 		io::ArrayOutputStream ByteStream(OutputFile,(DWord)(FileSizeB+2000000));
104 | 
105 | 		for(int i=0;i<sizeof(Counter);i++) ((Byte *)&OutputSize)[i]=InputStream.ReadByte();
106 | 
107 | 		SG::Entropy::RangeDecoder64 EntropyCoder(InputStream);
108 | 		
109 | 		Counter Freq[257];
110 | 		for(int i=0;i<=256;i++) Freq[i]=i;
111 | 
112 | 		for(int i=0;i<OutputSize;i++)
113 | 		{
114 | 			Counter Count = EntropyCoder.GetCurrentCount(Freq[256]);
115 | 
116 | 			Byte Symbol;
117 | 			for(Symbol=255;Freq[Symbol]>Count;Symbol--);
118 | 			//Symbol--;
119 | 
120 | 			ByteStream.WriteByte(Symbol);
121 | 			EntropyCoder.RemoveRange(Freq[Symbol],Freq[Symbol+1],Freq[256]);
122 | 
123 | 			for(int j=Symbol+1;j<257;j++) Freq[j]++;
124 | 			if(Freq[256]>=EntropyCoder.MaxRange) Rescale(Freq);
125 | 		}
126 | 
127 | 		Fout.write((char*)OutputFile,OutputSize);
128 | 	}
129 | 	else
130 | 	{
131 |         cerr<<"Invalid parameter\n";
132 | 		return 1;
133 | 	}
134 | }
135 | 


--------------------------------------------------------------------------------
/external/sg-entropy/readme.txt:
--------------------------------------------------------------------------------
 1 | Entropy Coding Source code
 2 | By Sachin Garg, 2006
 3 | 
 4 | Includes range coder based upon the carry-less implementation 
 5 | by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | DDJ code.
 7 |  
 8 | Modified to use 64-bit variables for improved performance.
 9 | 32-bit reference implementations also included.
10 | 
11 | For details:
12 | http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | Please send your suggestions, improvements, errors, feedback etc... 
15 | Read license.txt before using this in anyway.
16 | 


--------------------------------------------------------------------------------
/external/sg-entropy/stdx/bit.cpp:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #include "stdx/bit.h"
15 | 
16 | 
17 | SG::Binary SG::stdx::GetBit(SG::Byte BitNo,SG::Byte Data)
18 | {
19 | 	return (1<<BitNo)&Data?1:0;
20 | }
21 | 
22 | void SG::stdx::SetBit(SG::Byte BitNo,SG::Byte &Data,SG::Binary X)
23 | {
24 | 	if(X)
25 | 		Data=Data|(1<<BitNo);
26 | 	else
27 | 		Data=Data&(~(1<<BitNo));
28 | }
29 | 
30 | //SG::stdx::Type::Binary SG::stdx::Bit::GetArr(SG::stdx::Type::Counter BitNo,SG::stdx::Type::Byte *Data)
31 | //{
32 | //	return Get((SG::stdx::Type::Byte)(BitNo%8),Data[BitNo/8]);
33 | //}
34 | //
35 | //void SG::stdx::Bit::SetArr(SG::stdx::Type::Counter BitNo,SG::stdx::Type::Byte *Data,SG::stdx::Type::Binary X)
36 | //{
37 | //	Set((SG::stdx::Type::Byte)(BitNo%8),Data[BitNo/8],X);
38 | //}
39 | 


--------------------------------------------------------------------------------
/external/sg-entropy/stdx/bit.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef __sg_stdx_bit
15 | #define __sg_stdx_bit
16 | 
17 | #include "stdx/define.h"
18 | 
19 | namespace SG
20 | {
21 | 	namespace stdx
22 | 	{
23 | 		//To ease operating on bits.
24 | 
25 | 		//Checks if BitNo of data is set(1) or not. Returns 1 if yes, 0 otherwise.
26 | 		SG::Binary GetBit(SG::Byte BitNo,SG::Byte Data);
27 | 
28 | 		//Sets BitNo of data as X (X = 0 or 1)
29 | 		void SetBit(SG::Byte BitNo,SG::Byte &Data,SG::Binary X);
30 | 
31 | 		//Similar to above functions - for vector<SG::Byte>
32 | 		//inline SG::Binary GetArr(SG::Counter BitNo,std::vector<Byte> &Data);
33 | 		//inline void SetArr(SG::Counter BitNo,std::vector<Byte> &Data,SG::stdx::Type::Binary X);
34 | 	}
35 | }
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/external/sg-entropy/stdx/define.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef __sg_stdx_define
15 | #define __sg_stdx_define
16 | 
17 | //Constants
18 | #define True 1
19 | #define False 0
20 | #define Yes 1
21 | #define No 0
22 | 
23 | #ifndef NULL
24 | #define NULL 0
25 | #endif
26 | 
27 | #define NotFound -1
28 | 
29 | namespace SG
30 | {
31 | 	//Compiler/implementation dependent typedefs
32 | 	typedef unsigned char Byte;
33 | 	typedef unsigned short Word;
34 | 	typedef unsigned long DWord;
35 | 	//typedef unsigned __int64 QWord;		//MS platform
36 | 	typedef unsigned long long QWord;	//Linux and other Unices
37 | 	typedef long double Real;
38 | 
39 | 	typedef Byte Binary;	//reperesents 0 or 1
40 | 	typedef bool Boolean;	//reperesents true or false
41 | 
42 | 	typedef DWord Counter;
43 | 	typedef Word SmallCounter;
44 | 	typedef QWord BigCounter;
45 | 
46 | 	typedef signed long Num;
47 | 	typedef unsigned int FastInt;
48 | }
49 | 
50 | #endif
51 | 
52 | 


--------------------------------------------------------------------------------
/external/sg-entropy/stdx/exception.cpp:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #include "stdx/exception.h"
15 | 
16 | SG::stdx::Exception::Exception(std::string Description,std::string Location)
17 | {
18 | 	Exception::Description	= Description;
19 | 	Exception::Location		= Location;
20 | }
21 | 


--------------------------------------------------------------------------------
/external/sg-entropy/stdx/exception.h:
--------------------------------------------------------------------------------
 1 | //Entropy Coding Source code
 2 | //By Sachin Garg, 2006
 3 | //
 4 | //Includes range coder based upon the carry-less implementation 
 5 | //by Dmitry Subbotin, and arithmetic coder based upon Mark Nelson's
 6 | //DDJ code.
 7 | // 
 8 | //Modified to use 64-bit variables for improved performance.
 9 | //32-bit reference implementations also included.
10 | //
11 | //For details:
12 | //http://www.sachingarg.com/compression/entropy_coding/64bit
13 | 
14 | #ifndef __sg_stdx_exception
15 | #define __sg_stdx_exception
16 | 
17 | #include <string>
18 | 
19 | namespace SG
20 | {
21 | 	namespace stdx
22 | 	{
23 | 		//Standard exception object to be thrown
24 | 		class Exception
25 | 		{
26 | 
27 | 		public:
28 | 			Exception(std::string Description,std::string Location);
29 | 
30 | 			std::string Description;
31 | 			std::string Location;
32 | 		};
33 | 	}
34 | }
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/include/aux-encoding.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * aux-encoding.hpp for bwt tunneling
 3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |  * of this software and associated documentation files (the "Software"), to deal
 7 |  * in the Software without restriction, including without limitation the rights
 8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |  * copies of the Software, and to permit persons to whom the Software is
10 |  * furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in all
13 |  * copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef _AUX_ENCODING_HPP
25 | #define _AUX_ENCODING_HPP
26 | 
27 | #include "twobitvector.hpp"
28 | 
29 | //! namespace gathering constants for interpretation of the auxiliary data structure
30 | namespace aux_encoding {
31 | 	typedef twobitvector::value_type value_type;
32 | 	//! regular bwt entry
33 | 	const value_type REG = 0;
34 | 	//! entry indicating the end of a tunnel
35 | 	const value_type SKP_F = 1;
36 | 	//! entry indicating the start of a tunnel
37 | 	const value_type IGN_L = 2;
38 | 	//! entry to be removed
39 | 	const value_type REM = SKP_F | IGN_L;
40 | 	//! alphabet size in auxiliary data structure
41 | 	const value_type SIGMA = 3;
42 | };
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/include/bcm-compressor.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * bcm-compressor.hpp for bwt tunneling
 3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |  * of this software and associated documentation files (the "Software"), to deal
 7 |  * in the Software without restriction, including without limitation the rights
 8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |  * copies of the Software, and to permit persons to whom the Software is
10 |  * furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in all
13 |  * copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef BCM_COMPRESSOR_HPP
25 | #define BCM_COMPRESSOR_HPP
26 | 
27 | #include "bwt-compressor.hpp"
28 | #include "tbwt-compressor.hpp"
29 | #include "bcm-ss.hpp"
30 | 
31 | #include "block-scores-rle-model.hpp"
32 | 
33 | #include <istream>
34 | #include <limits>
35 | #include <ostream>
36 | #include <stdexcept>
37 | 
38 | //! class which encodes a BWT with second stage by Ilya Muravyov
39 | class BW_SS_BCM : public block_scores_rle_model {
40 | 	
41 | public:
42 | 	//! encodes the transform t using MTF + RLE0 + Entropy
43 | 	template<class T>
44 | 	static void encode( T &t, std::ostream &out ) {
45 | 		bcm::CM cm;
46 | 		for (t_idx_t i = 0; i < t.size(); i++) {
47 | 			cm.Encode( t[i], out );
48 | 		}
49 | 		cm.Flush(out);
50 | 	}
51 | 
52 | 	//! decodes the transform and stores it in t using MTF + RLE0 + Entropy (t must have length of output)
53 | 	template<class T>
54 | 	static void decode( std::istream &in, T &t ) {
55 | 		bcm::CM cm;
56 | 		cm.Init(in);
57 | 		for (t_idx_t i = 0; i < t.size(); i++) {
58 | 			t[i] = cm.Decode(in);
59 | 		}
60 | 	}
61 | };
62 | 
63 | //typedefs defining compressors
64 | typedef bwt_compressor<BW_SS_BCM> bwt_compressor_bcm;
65 | typedef tbwt_compressor<BW_SS_BCM> tbwt_compressor_bcm;
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/include/block-nav-support.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * block-nav-support.hpp for bwt tunneling
 3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |  * of this software and associated documentation files (the "Software"), to deal
 7 |  * in the Software without restriction, including without limitation the rights
 8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |  * copies of the Software, and to permit persons to whom the Software is
10 |  * furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in all
13 |  * copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef _BLOCK_NAV_SUPPORT_HPP
25 | #define _BLOCK_NAV_SUPPORT_HPP
26 | 
27 | #include "bwt-run-support.hpp"
28 | #include "bwt-config.hpp"
29 | 
30 | #include <vector>
31 | 
32 | //! support class for blocks and block navigation in a bwt.
33 | /*! class offers methods to compute blocks, as well as methods
34 |    to store, enumerate and remove block collisions.
35 | */
36 | class block_nav_support {
37 | 	private:
38 | 		const bwt_run_support &bwtrs; //navigation
39 | 		const t_size_t mbh = 2; //minimal block height
40 | 
41 | 		std::vector<t_idx_t> m_end; //end position of blocks (see below)
42 | 		std::vector<t_idx_t> collisions; //map for collisions
43 | 
44 | 		void compute_blocks();
45 | 		void init_empty_collision_map();
46 | 
47 | 	public:
48 | 		//! number of blocks (always equal to number of runs)
49 | 		const t_size_t& blocks;
50 | 
51 | 		//! exclusive end position (upper left position in BWT) of block
52 | 		const std::vector<t_idx_t> &end = m_end;
53 | 
54 | 		//! constructor, expects a navigation and a minimal block height.
55 | 		/*! note that collisions will NOT be computed by this function,
56 | 		   use function add_collision for this purpose.
57 | 		*/
58 | 		block_nav_support( const bwt_run_support &bwsupport )
59 | 			: bwtrs( bwsupport ), blocks( bwtrs.runs ) {
60 | 			compute_blocks();
61 | 			init_empty_collision_map();
62 | 		};
63 | 
64 | 		//! adds a collision between inner block ic_b and outer block oc_b.
65 | 		void add_collision( t_idx_t ic_b, t_idx_t oc_b );
66 | 
67 | 		//! sets end of a block b to the given value
68 | 		void set_end( t_idx_t b, t_idx_t e );
69 | 
70 | 		//! computes all inner colliding blocks of the given one (array is ordered in text order).
71 | 		//! Note that first block always is block b.
72 | 		std::vector<t_idx_t> get_inner_collisions( t_idx_t b ) const;
73 | 
74 | 		//! computes all outer colliding blocks of the given one. Note that first block always is block b.
75 | 		std::vector<t_idx_t> get_outer_collisions( t_idx_t b ) const;
76 | 
77 | 		//! removes all collisions between colliding inner and outer blocks of b
78 | 		void remove_inner_outer_collisions( t_idx_t b );
79 | };
80 | 
81 | #endif
82 | 


--------------------------------------------------------------------------------
/include/bw94-compressor.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bw94-compressor.hpp for bwt tunneling
  3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  6 |  * of this software and associated documentation files (the "Software"), to deal
  7 |  * in the Software without restriction, including without limitation the rights
  8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 |  * copies of the Software, and to permit persons to whom the Software is
 10 |  * furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in all
 13 |  * copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #ifndef BW94_COMPRESSOR_HPP
 25 | #define BW94_COMPRESSOR_HPP
 26 | 
 27 | #include "bwt-compressor.hpp"
 28 | #include "tbwt-compressor.hpp"
 29 | 
 30 | #include "aux-encoding.hpp"
 31 | #include "bwt-run-support.hpp"
 32 | #include "entropy-coder.hpp"
 33 | #include "mtf-coder.hpp"
 34 | #include "rle0-coder.hpp"
 35 | #include "twobitvector.hpp"
 36 | 
 37 | #include "block-scores-rle-model.hpp"
 38 | 
 39 | #include <istream>
 40 | #include <limits>
 41 | #include <ostream>
 42 | #include <stdexcept>
 43 | 
 44 | //! class which encodes a BWT with MTF + RLE0 + Entropy as second stage
 45 | class BW_SS_BW94 : public block_scores_rle_model {
 46 | public:
 47 | 	//! encodes the transform t using MTF + RLE0 + Entropy
 48 | 	template<class T>
 49 | 	static void encode( T &t, std::ostream &out ) {
 50 | 		//write alphabet
 51 | 		auto alph = mtf_coder<T>::compute_alph( t );
 52 | 		out.put( (t_uchar_t)alph.size() ); //store alphabet size (note that this stores 0 if full alphabet is used)
 53 | 		for (t_idx_t i = 0; i < alph.size(); i++) { //and the alphabet itself
 54 | 			out.put( alph[i] );
 55 | 		}
 56 | 
 57 | 		//prepare encoders
 58 | 		mtf_coder<T> mtfcoder( alph );
 59 | 		rle0_encoder<T> rle0coder;
 60 | 		entropy_encoder<std::ostream> entcoder( out );
 61 | 		entcoder.reset( alph.size() + 1 );
 62 | 
 63 | 		for (t_idx_t i = 0; i < t.size(); ) { //do encoding
 64 | 			do {
 65 | 				if (i >= t.size())	break;
 66 | 				//feed rle0-encoder with mtf coded input until some contents can be written
 67 | 			} while (rle0coder.encode_char( mtfcoder.encode_char( t[i++] ) ));
 68 | 
 69 | 			//move the output of the rle0coder to the entropy coder
 70 | 			while (rle0coder.has_next_enc_char()) {
 71 | 				entcoder.encode_char( rle0coder.next_enc_char() );
 72 | 			}
 73 | 		}
 74 | 		entcoder.flush();
 75 | 	}
 76 | 
 77 | 	//! decodes the transform and stores it in t using MTF + RLE0 + Entropy (t must have length of output)
 78 | 	template<class T>
 79 | 	static void decode( std::istream &in, T &t ) {
 80 | 		t_size_t alphsize = in.get();
 81 | 		//check validity
 82 | 		if (alphsize == 0u) {
 83 | 			if (t.size() == 0) return;
 84 | 			alphsize = std::numeric_limits<t_uchar_t>::max()+1u; //remember that on full alphabet 0 is stored
 85 | 		}
 86 | 		if (alphsize > t.size())
 87 | 			throw std::invalid_argument("alphabet must be smaller than encoded string size");
 88 | 
 89 | 		//read alphabet
 90 | 		T alph; alph.resize( alphsize ); 
 91 | 		for (t_idx_t i = 0; i < alph.size(); i++) {
 92 | 			alph[i] = in.get();
 93 | 		}
 94 | 
 95 | 		//set up required decodes
 96 | 		mtf_coder<T> mtfcoder( alph );
 97 | 		rle0_decoder<T> rle0coder;
 98 | 		entropy_decoder<std::istream> entcoder( in );
 99 | 		entcoder.reset( alph.size() + 1 );
100 | 
101 | 		//do decoding
102 | 		for (t_idx_t i = 0; i < t.size(); entcoder.next() ) {
103 | 			//feed rle0-decoder with input
104 | 			rle0coder.decode_char( entcoder.decode_char() );
105 | 
106 | 			//fetch characters from rle0-decoder and invert mtf
107 | 			while (i < t.size() && rle0coder.has_next_char()) {
108 | 				t[i++] = mtfcoder.decode_char( rle0coder.next_char() );
109 | 			}
110 | 		}
111 | 		if (rle0coder.has_next_char()) {
112 | 			throw std::invalid_argument("encoded rle0-sequence is longer than text length");
113 | 		}
114 | 	}
115 | };
116 | 
117 | //typedefs defining compressors
118 | typedef bwt_compressor<BW_SS_BW94> bwt_compressor_bw94;
119 | typedef tbwt_compressor<BW_SS_BW94> tbwt_compressor_bw94;
120 | 
121 | #endif
122 | 


--------------------------------------------------------------------------------
/include/bwt-config.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * bwt-config.hpp for bwt tunneling
 3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |  * of this software and associated documentation files (the "Software"), to deal
 7 |  * in the Software without restriction, including without limitation the rights
 8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |  * copies of the Software, and to permit persons to whom the Software is
10 |  * furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in all
13 |  * copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef _BWT_CONFIG_HPP
25 | #define _BWT_CONFIG_HPP
26 | 
27 | #include <limits>
28 | #include <stdint.h>
29 | #include <vector>
30 | 
31 | typedef uint8_t  t_uchar_t;
32 | typedef uint32_t t_size_t;
33 | typedef uint32_t t_idx_t;
34 | typedef int64_t  t_bitsize_t;
35 | typedef typename std::vector<t_uchar_t> t_string_t;
36 | 
37 | const t_size_t t_max_size = (1024ul + 512ul)*1024ul*1024ul; //maximal size of input (1,5 GB)
38 | 
39 | #include "divsufsort.h"
40 | 
41 | //do some type assertions
42 | static_assert( std::numeric_limits<saidx_t>::max() > t_max_size,
43 |                "saidx_t is too small" );
44 | static_assert( std::numeric_limits<t_idx_t>::max() > t_max_size,
45 |                "t_idx_t is too small" );
46 | static_assert( std::numeric_limits<t_size_t>::max() > t_max_size,
47 |                "t_size_t is too small" );
48 | static_assert( std::numeric_limits<t_bitsize_t>::max() > 8ul * t_max_size,
49 |                "t_bitsize_t is too small" );
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/include/bwt-run-support.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * bwt-run-support.hpp for bwt tunneling
  3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  6 |  * of this software and associated documentation files (the "Software"), to deal
  7 |  * in the Software without restriction, including without limitation the rights
  8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 |  * copies of the Software, and to permit persons to whom the Software is
 10 |  * furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in all
 13 |  * copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #ifndef _BWT_RUN_SUPPORT_HPP
 25 | #define _BWT_RUN_SUPPORT_HPP
 26 | 
 27 | #include <vector>
 28 | 
 29 | #include "bwt-config.hpp"
 30 | 
 31 | //! support structure for bwt navigation and bwt run support
 32 | /*! The support structure distinguishes between positions in
 33 |    the logical BWT, that is, the BWT of a nullterminated string,
 34 |    and the indexed BWT, which is BWT of a nullterminated string
 35 |    where the null-character in the BWT is removed and stored by
 36 |    a such-called bwt idx.
 37 |    Both versions differ not much, but make a difference in positioning.
 38 |    Unless especially stated, this support structure always uses
 39 |    logical positioning, but also offers conversion methods to switch
 40 |    between logical and indexed positioning.
 41 | */
 42 | class bwt_run_support {
 43 | 	private:
 44 | 		t_size_t m_runs; //number of logical runs
 45 | 		t_size_t m_idx_runs; //number of runs (indexed BWT)
 46 | 		t_idx_t m_bwt_idx; //bwt index
 47 | 		t_size_t m_n; //logical text length
 48 | 		t_size_t m_idx_n; //text length (indexed BWT)
 49 | 		t_size_t m_sigma; //size of alphabet
 50 | 		t_size_t m_max_char_val; //maximal value of an element in alphabet
 51 | 
 52 | 		std::vector<t_idx_t> m_lfr; //lf, only for the start of runs
 53 | 		std::vector<t_idx_t> m_rs; //start positions of all runs, sorted ascending.
 54 | 		                           //additionally, m_rs[m_runs] = n+1 holds.
 55 | 
 56 | 	public:
 57 | 		//! constructor, expects a indexed BWT and its primary index.
 58 | 		bwt_run_support( const t_uchar_t *bwt, t_size_t _n, t_idx_t _idx );
 59 | 
 60 | 		//! logical number of runs in BWT
 61 | 		const t_size_t &runs = m_runs;
 62 | 
 63 | 		//! number of runs in the indexed BWT
 64 | 		const t_size_t &idx_runs = m_idx_runs;
 65 | 
 66 | 		//! primary index of the bwt
 67 | 		const t_idx_t &bwt_idx = m_bwt_idx;
 68 | 
 69 | 		//! logical length of text
 70 | 		const t_size_t &n = m_n;
 71 | 
 72 | 		//! real text length (also length of indexed BWT)
 73 | 		const t_size_t &idx_n = m_idx_n;
 74 | 
 75 | 		//! size of alphabet in text
 76 | 		const t_size_t &sigma = m_sigma;
 77 | 
 78 | 		//! maximal value of an element in alphabet
 79 | 		const t_size_t &max_char_val = m_max_char_val;
 80 | 
 81 | 		//! utility function, returns lf at the start of the given run
 82 | 		t_idx_t run_lf( t_idx_t r ) const {
 83 | 			return m_lfr[r];
 84 | 		};
 85 | 
 86 | 		//! utility function, returns the start of a run
 87 | 		t_idx_t start( t_idx_t r ) const {
 88 | 			return m_rs[r];
 89 | 		};
 90 | 
 91 | 		//! function returns the run to which position i belongs,
 92 | 		//  or a value >= runs if i does not belong to any run (e.g. i < 0 or i >= n)
 93 | 		t_idx_t run_of( t_idx_t i ) const;
 94 | 
 95 | 		//! utility function, computes height of a run
 96 | 		t_size_t height( t_idx_t r ) const {
 97 | 			return m_rs[r+1]-m_rs[r];
 98 | 		};
 99 | 
100 | 		//! utility function, computes exclusive end of a run
101 | 		t_idx_t end( t_idx_t r ) const {
102 | 			return m_rs[r+1];
103 | 		};
104 | 
105 | 		//! utility function, converts a position in the indexed bwt to
106 | 		//! a position in the logical bwt
107 | 		t_idx_t idx_to_log( t_idx_t p_idx ) const {
108 | 			return (p_idx < bwt_idx) ? p_idx : p_idx + 1;
109 | 		};
110 | 
111 | 		//! utility function, converts a logical position in the bwt to
112 | 		//! a position in the indexed bwt
113 | 		t_idx_t log_to_idx( t_idx_t p_log ) const {
114 | 			return (p_log <= bwt_idx) ? p_log : p_log - 1;
115 | 		};
116 | };
117 | 
118 | #endif
119 | 


--------------------------------------------------------------------------------
/include/mtf-coder.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * mtf-coder.hpp for bwt tunneling
  3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  6 |  * of this software and associated documentation files (the "Software"), to deal
  7 |  * in the Software without restriction, including without limitation the rights
  8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 |  * copies of the Software, and to permit persons to whom the Software is
 10 |  * furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in all
 13 |  * copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #ifndef _MTF_CODER_HPP
 25 | #define _MTF_CODER_HPP
 26 | 
 27 | #include <limits>
 28 | #include <stdexcept>
 29 | #include <vector>
 30 | 
 31 | //! class for mtf-transformations, requires a string type
 32 | /*! template parameter string_t should support random access [], as well as
 33 |   resize() - function, empty construction and size()-function.
 34 |  */
 35 | template<class string_t> 
 36 | class mtf_coder {
 37 | 	public:
 38 | 		typedef typename string_t::value_type char_type;
 39 | 		typedef typename string_t::size_type size_type;
 40 | 	private:
 41 | 		string_t alph;
 42 | 	public:
 43 | 		//! constructs an mtf coder, expects an alphabet of the underlying source.
 44 | 		mtf_coder (string_t _alph) : alph(_alph) {};
 45 | 
 46 | 		//! encodes a single character, and returns the coding for the character
 47 | 		char_type encode_char( char_type c ) {
 48 | 			char_type r = 0; //rank of S[i] in alph
 49 | 			while (alph[0] != c) { //move S[i] to front
 50 | 				++r;
 51 | 
 52 | 				char_type tmp = alph[0];
 53 | 				alph[0] = alph[r];
 54 | 				alph[r] = tmp;
 55 | 			}
 56 | 			return r;
 57 | 		};
 58 | 
 59 | 		//! decodes a single encoded character and returns its decoded value.
 60 | 		/*! throws invalid_argument if ranks in S are bigger than alphabet size.
 61 | 		 */
 62 | 		char_type decode_char( char_type c ) {
 63 | 			if (c >= alph.size())
 64 | 				throw std::invalid_argument("MTF Retransform failed");
 65 | 
 66 | 			while (c > 0) { //move alph[c] to front
 67 | 				char_type tmp = alph[c-1];
 68 | 				alph[c-1] = alph[c];
 69 | 				alph[c] = tmp;
 70 | 
 71 | 				--c;
 72 | 			}
 73 | 			return (char_type)alph[0];
 74 | 		};
 75 | 
 76 | 		//! computes alphabet from underlying string S.
 77 | 		/*! alphabet must consist of elements in [0..maxsigma-1],
 78 | 		   depending on the type of S (e.g., if S is a vector of 1-byte-characters,
 79 | 		   it's suitable to choose maxsigma = 256)
 80 | 		   Note that alphabet order is equal to the order of the first appearance
 81 | 		   of the characters in S.
 82 | 		*/
 83 | 		static string_t compute_alph( const string_t &S,
 84 | 		                              size_type maxsigma = std::numeric_limits<char_type>::max()+1 ) {
 85 | 			//set up alphabet and bitmap
 86 | 			std::vector<bool> charUsed( maxsigma );
 87 | 			string_t alph;  alph.resize( maxsigma );
 88 | 			size_type sigma = 0;
 89 | 
 90 | 			//compute alphabet
 91 | 			for (size_type i = 0; i < S.size(); i++) {
 92 | 				size_type ch = S[i];
 93 | 				if (!charUsed[ch]) {
 94 | 					alph[sigma++] = ch;
 95 | 					charUsed[ch] = true;
 96 | 				}
 97 | 			}
 98 | 			alph.resize( sigma );
 99 | 			return alph;
100 | 		};
101 | 
102 | 		//! transforms a string S using Move-To-Front Transformation.
103 | 		/*! alph must be a list of the alphabet used in S, e.g. as computed
104 | 		   by function compute_alph (alph should be a copy, as it gets modified
105 | 		   during execution)
106 | 		*/
107 | 		static void transform( string_t &S, string_t alph ) {
108 | 			mtf_coder coder( std::move( alph ) );
109 | 			for (size_type i = 0; i < S.size(); i++) {
110 | 				S[i] = coder.encode_char( S[i] );
111 | 			}
112 | 		};
113 | 
114 | 		//! retransforms a Move-To-Front transformed string S using alph.
115 | 		/*! this function thus is the inverse operation of mtf_transform.
116 | 		   Note that for correct reconstruction, alph must be same as
117 | 		   given to mtf_transform.
118 | 		   throws invalid_argument if ranks in S are bigger than alphabet size
119 | 		*/
120 | 		static void retransform( string_t &S, string_t alph ) {
121 | 			mtf_coder coder( std::move( alph ) );
122 | 			for (size_type i = 0; i < S.size(); i++) {
123 | 				S[i] = coder.decode_char( S[i] );
124 | 			}
125 | 		};
126 | };
127 | 
128 | #endif
129 | 


--------------------------------------------------------------------------------
/include/twobitvector.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * twobitvector.hpp for bwt tunneling
  3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  6 |  * of this software and associated documentation files (the "Software"), to deal
  7 |  * in the Software without restriction, including without limitation the rights
  8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 |  * copies of the Software, and to permit persons to whom the Software is
 10 |  * furnished to do so, subject to the following conditions:
 11 |  *
 12 |  * The above copyright notice and this permission notice shall be included in all
 13 |  * copies or substantial portions of the Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #ifndef _TWOBITVECTOR_HPP
 25 | #define _TWOBITVECTOR_HPP
 26 | 
 27 | #include <assert.h>
 28 | #include <stdint.h>
 29 | #include <vector>
 30 | 
 31 | //! a simple implementation of a vector where each entry requires 2 bits.
 32 | class twobitvector {
 33 | 	public:
 34 | 		typedef uint8_t                            value_type;
 35 | 		typedef std::vector<value_type>::size_type size_type;
 36 | 
 37 | 		//! reference type for twobitvector
 38 | 		class reference {
 39 | 			private:
 40 | 				value_type &val;
 41 | 				value_type shift;
 42 | 			
 43 | 				friend class twobitvector;
 44 | 				reference( value_type &v, value_type s ) : val{v}, shift{s} {};
 45 | 			public:
 46 | 				//! get value
 47 | 				operator value_type() const {
 48 | 					return (val >> shift) & 3u;
 49 | 				};
 50 | 				//! set value
 51 | 				reference& operator=(value_type v) {
 52 | 					val ^= (((val >> shift) ^ v) & 3u) << shift;
 53 | 					return *this;
 54 | 				};
 55 | 				//! set value using another reference
 56 | 				reference& operator=(const reference& x) {
 57 | 					return *this=((x.val >> x.shift) & 3u);
 58 | 				};
 59 | 		};
 60 | 	private:
 61 | 		std::vector<value_type> m_data;
 62 | 		size_type m_size = 0;
 63 | 	
 64 | 	public:	
 65 | 		//! resize vector to the given size.
 66 | 		/*! if n is bigger than current size, old contents stay and the end
 67 | 		   is filled with zeros.
 68 | 		*/
 69 | 		void resize( size_type n ) {
 70 | 			m_data.resize( (n >> 2) + 1 );
 71 | 			m_size = n;
 72 | 		};
 73 | 
 74 | 		//! returns the number of entries in the twobitvector
 75 | 		size_type size() const {
 76 | 			return m_size;
 77 | 		};
 78 | 
 79 | 		//! returns a pointer to the underlying data field
 80 | 		const uint8_t *data() const {
 81 | 			return (const uint8_t *)m_data.data();
 82 | 		};
 83 | 
 84 | 		//! length of the underlying data field in bytes
 85 | 		size_type datasize() const {
 86 | 			return m_data.size();
 87 | 		};
 88 | 
 89 | 		//! random read access to the elements
 90 | 		value_type operator[]( size_type i ) const {
 91 | 			assert(i < m_size);
 92 | 			return (m_data[i >> 2] >> ((i & 3u) << 1)) & 3u;
 93 | 		};
 94 | 
 95 | 		//! random read/write access to the elements
 96 | 		reference operator[]( size_type i ) {
 97 | 			assert(i < m_size);
 98 | 			return reference( m_data[i >> 2], (i & 3u) << 1 );
 99 | 		};
100 | 
101 | 		//TODO: add more functions if required
102 | };
103 | 
104 | #endif
105 | 


--------------------------------------------------------------------------------
/include/wt-compressor.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * wt-compressor.hpp for bwt tunneling
 3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |  * of this software and associated documentation files (the "Software"), to deal
 7 |  * in the Software without restriction, including without limitation the rights
 8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |  * copies of the Software, and to permit persons to whom the Software is
10 |  * furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in all
13 |  * copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef BW94_COMPRESSOR_HPP
25 | #define BW94_COMPRESSOR_HPP
26 | 
27 | #include "bwt-compressor.hpp"
28 | #include "tbwt-compressor.hpp"
29 | 
30 | #include "sdsl/bit_vectors.hpp"
31 | #include "sdsl/wavelet_trees.hpp"
32 | 
33 | #include "block-scores-rle-model.hpp"
34 | 
35 | #include <istream>
36 | #include <ostream>
37 | 
38 | //! class which encodes a BWT with a wavelet tree (and hybrid bitvectors) as second stage
39 | class BW_SS_WT : public block_scores_rle_model {
40 | public:
41 | 	//! encodes the transform t using a wavelet tree
42 | 	template<class T>
43 | 	static void encode( T &t, std::ostream &out ) {
44 | 		sdsl::wt_huff<sdsl::hyb_vector<>> wt( t, t.size() );
45 | 		wt.serialize( out );
46 | 	}
47 | 
48 | 	//! decodes the transform and stores it in t
49 | 	template<class T>
50 | 	static void decode( std::istream &in, T &t ) {
51 | 		sdsl::wt_huff<sdsl::hyb_vector<>> wt;
52 | 		wt.load( in );
53 | 		for (t_idx_t i = 0; i < t.size(); i++) {
54 | 			t[i] = wt[i];
55 | 		}
56 | 	}
57 | };
58 | 
59 | //typedefs defining compressors
60 | typedef bwt_compressor<BW_SS_WT> bwt_compressor_wt;
61 | typedef tbwt_compressor<BW_SS_WT> tbwt_compressor_wt;
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/lib/bwt-run-support.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * bwt-run-support.cpp for bwt tunneling
 3 |  * Copyright (c) 2017 Uwe Baier All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |  * of this software and associated documentation files (the "Software"), to deal
 7 |  * in the Software without restriction, including without limitation the rights
 8 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |  * copies of the Software, and to permit persons to whom the Software is
10 |  * furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice shall be included in all
13 |  * copies or substantial portions of the Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #include "bwt-run-support.hpp"
25 | 
26 | #include <algorithm>
27 | #include <limits>
28 | 
29 | using namespace std;
30 | 
31 | t_idx_t bwt_run_support::run_of( t_idx_t i ) const {
32 | 	//use binary search with runstart - array
33 | 	auto it = upper_bound( m_rs.begin(), m_rs.end(), i );
34 | 	return (t_idx_t)(it - m_rs.begin()) - 1;
35 | }
36 | 
37 | bwt_run_support::bwt_run_support( const t_uchar_t *bwt, t_size_t _n, t_idx_t idx ) {
38 | 	//init some basic variables
39 | 	m_bwt_idx = idx;
40 | 	m_idx_n = _n;
41 | 	m_idx_runs = 0;
42 | 	m_sigma = 0;
43 | 	m_max_char_val = 0;
44 | 
45 | 	//build C Array and count runs
46 | 	vector<t_size_t> C( numeric_limits<t_uchar_t>::max() + 1 );
47 | 	t_idx_t borders[] = {bwt_idx,idx_n};
48 | 	t_idx_t i = 0;
49 | 	for (t_idx_t b : borders) { //to split runs at primary index
50 | 		t_uchar_t lastchar = (i < idx_n) ? bwt[i]+1 : 0;
51 | 		while (i < b) {
52 | 			if (lastchar != bwt[i]) { //start of a run
53 | 				++m_idx_runs;
54 | 				lastchar = bwt[i];
55 | 			}
56 | 			++C[lastchar];
57 | 			++i;
58 | 		}
59 | 	}
60 | 	m_n = idx_n + 1;
61 | 	m_runs = idx_runs + 1; //for bwt index
62 | 
63 | 	//build cumulative sums of the C array
64 | 	t_idx_t l = 1; //for bwt index
65 | 	for (t_idx_t c = 0; c < C.size(); c++) {
66 | 		auto tmp = C[c];
67 | 		C[c] = l;
68 | 		l += tmp;
69 | 		if (tmp > 0) {
70 | 			++m_sigma;
71 | 			m_max_char_val = c;
72 | 		}
73 | 	}
74 | 
75 | 	//compute LF
76 | 	m_lfr.reserve( m_runs + 1 );
77 | 	m_rs.reserve( m_runs + 1 );
78 | 	i = 0;
79 | 	t_idx_t i_log = 0; //logical position of i
80 | 	for (t_idx_t b : borders) { //to split runs at primary index
81 | 		t_uchar_t lastchar = (i < n) ? bwt[i]+1 : 0;
82 | 		while (i < b) {
83 | 			if (lastchar != bwt[i]) { //start of a run
84 | 				m_rs.push_back( i_log ); //store start of run
85 | 
86 | 				lastchar = bwt[i];
87 | 				m_lfr.push_back( C[lastchar] );
88 | 			}
89 | 			++C[lastchar];
90 | 			++i; ++i_log;
91 | 		}
92 | 		//add a terminator to both lfr and rs (for both primary index and n)
93 | 		m_rs.push_back( i_log++ );
94 | 		m_lfr.push_back( 0 );
95 | 	}
96 | }
97 | 


--------------------------------------------------------------------------------