├── tools
├── zbs
│ ├── hive
│ │ └── hive.cc
│ ├── Makefile
│ ├── bmq
│ │ └── bmq.cc
│ ├── zip-bench.sh
│ ├── sufsort_bench.cpp
│ └── zbs_stat.cpp
├── fsa
│ ├── test-3.txt
│ ├── Makefile
│ ├── test-1.txt
│ ├── test-2.txt
│ ├── test_nlt.sh
│ ├── dfa_text.cpp
│ ├── test-patricia.txt
│ └── patricia_bench.cpp
├── general
│ ├── Makefile
│ ├── test-tpjoin.txt
│ ├── revline.cpp
│ ├── deserial_strseq.cpp
│ ├── hex2bson.cpp
│ ├── text_key_val_to_kvbin.cpp
│ └── split_into_sorted_runs.cpp
├── configure
│ ├── glibc_memcpy_fix.h
│ └── compiler.cpp
└── codegen
│ └── gen_leap_year_bits.cpp
├── cmake
└── modules
│ └── FindBoost.cmake
├── src
└── terark
│ ├── idx
│ └── idx_dummy_placeholder.cpp
│ ├── io
│ ├── ZcMemMap.cpp
│ ├── todo
│ │ ├── DataIO_Polymorphic.hpp
│ │ ├── inter_thread_pipe.hpp
│ │ ├── DataIO_Parser.hpp
│ │ └── inter_thread_pipe.cpp
│ ├── discard
│ │ ├── DataInput.cpp
│ │ ├── hole_stream.hpp
│ │ └── is_primitive.hpp
│ ├── var_int_declare_read.hpp
│ ├── readv_writev.hpp
│ ├── var_int_declare_write.hpp
│ ├── StdvecWriter.hpp
│ ├── DataInput_VarIntAsFixLen.hpp
│ ├── file_load_save.hpp
│ ├── DataOutput_VarIntAsFixLen.hpp
│ ├── DataIO_SmartPtr.hpp
│ ├── HexCodingStream.cpp
│ ├── FileDataIO.hpp
│ ├── DataOutput_String.hpp
│ ├── readv_writev.cpp
│ ├── DataInputIterator.hpp
│ ├── IOException.hpp
│ ├── DataOutput_BigEndian.hpp
│ ├── DataOutput_LittleEndian.hpp
│ ├── DataIO_Exception.cpp
│ ├── win
│ │ └── MfcFileStream.cpp
│ ├── DataIO_Exception.hpp
│ ├── DataInput_String.hpp
│ ├── DataInput_BigEndian.hpp
│ └── DataInput_LittleEndian.hpp
│ ├── util
│ ├── hugepage.cpp
│ ├── cpu_prefetch.hpp
│ ├── truncate_file.hpp
│ ├── checksum_exception.hpp
│ ├── stdptr.hpp
│ ├── vm_util.hpp
│ ├── checksum_exception.cpp
│ ├── stat.hpp
│ ├── nolocks_localtime.hpp
│ ├── throw.cpp
│ ├── crc.hpp
│ ├── tmpfile.hpp
│ ├── sorted_uint_vec_get_block_word.hpp
│ ├── truncate_file.cpp
│ ├── memcmp_coding.hpp
│ ├── DataBuffer.cpp
│ ├── fast_getcpu.hpp
│ ├── strjoin.hpp
│ ├── autoclose.hpp
│ ├── deepcopy_ptr.hpp
│ ├── strbuilder.hpp
│ ├── tmpfile.cpp
│ ├── profiling.cpp
│ ├── base64.hpp
│ └── throw.hpp
│ ├── fsa
│ ├── fsa_cache.cpp
│ ├── fsa_ext.hpp
│ ├── ppi
│ │ ├── state_move_fast.hpp
│ │ ├── dawg_dfa_mmap.hpp
│ │ └── flat_dfa_mmap.hpp
│ ├── fsa_cache.hpp
│ ├── forward_decl.hpp
│ ├── fsa_ext.cpp
│ ├── tmplinst.cpp
│ ├── x_fsa_util.hpp
│ ├── dfa_mmap_header.hpp
│ └── dfa_algo_basic.hpp
│ ├── parallel_lib.hpp
│ ├── easy_use_hash_map.hpp
│ ├── rank_select.hpp
│ ├── thread
│ ├── mutex.hpp
│ ├── fiber_aio.hpp
│ ├── futex.hpp
│ ├── fiber_local.hpp
│ └── fiber_pool.hpp
│ ├── str_lex_iter.cpp
│ ├── pass_by_value.hpp
│ ├── zbs
│ ├── ZstdStream.hpp
│ ├── simple_zip_blob_store.hpp
│ ├── zip_reorder_map.cpp
│ ├── zero_length_blob_store.hpp
│ ├── xxhash_helper.hpp
│ ├── lru_page_cache.hpp
│ ├── sufarr_inducedsort.h
│ └── abstract_blob_store.hpp
│ ├── str_lex_iter.hpp
│ ├── mempool.hpp
│ ├── succinct
│ ├── rank_select_basic.hpp
│ └── rank_select_inline_slow.hpp
│ ├── multi_way_basic.hpp
│ ├── gold_hash_map_iterator.hpp
│ └── smallmap.hpp
├── tests
├── core
│ ├── Makefile
│ ├── never-add-stdvec-writer.hpp
│ ├── test_call_on_main_stack.cpp
│ ├── test_boost_fss.cpp
│ └── test_ProcPipeStream.cpp
├── tries
│ ├── Makefile
│ └── test_dict_order_gen.cpp
├── entropy
│ ├── Makefile
│ └── test_entropy.cpp
├── succinct
│ ├── Makefile
│ └── test.sh
└── zbs
│ └── sample.txt
├── 3rdparty
├── zstd
│ └── zstd
│ │ ├── .gitignore
│ │ ├── dll
│ │ └── example
│ │ │ ├── fullbench-dll.vcxproj.user
│ │ │ ├── build_package.bat
│ │ │ ├── fullbench-dll.sln
│ │ │ └── Makefile
│ │ ├── libzstd.pc.in
│ │ ├── common
│ │ └── debug.c
│ │ ├── deprecated
│ │ └── zbuff_common.c
│ │ ├── compress
│ │ ├── zstd_compress_superblock.h
│ │ ├── zstd_compress_literals.h
│ │ ├── zstd_fast.h
│ │ └── zstd_double_fast.h
│ │ └── decompress
│ │ └── zstd_ddict.h
└── base64
│ ├── test
│ ├── codec_supported.h
│ ├── codec_supported.c
│ ├── Makefile
│ ├── moby_dick_plain.txt
│ └── moby_dick_base64.txt
│ ├── .gitignore
│ ├── base64-benchmarks.png
│ ├── .travis.yml
│ ├── lib
│ ├── exports.txt
│ └── arch
│ │ ├── sse2
│ │ └── compare_macros.h
│ │ ├── generic
│ │ ├── 32
│ │ │ ├── enc_loop.c
│ │ │ └── dec_loop.c
│ │ ├── 64
│ │ │ └── enc_loop.c
│ │ ├── codec.c
│ │ ├── enc_tail.c
│ │ ├── enc_head.c
│ │ ├── dec_head.c
│ │ └── dec_tail.c
│ │ ├── neon32
│ │ └── enc_loop.c
│ │ ├── ssse3
│ │ ├── enc_loop.c
│ │ ├── dec_reshuffle.c
│ │ ├── codec.c
│ │ ├── enc_translate.c
│ │ ├── enc_reshuffle.c
│ │ └── dec_loop.c
│ │ ├── avx
│ │ └── codec.c
│ │ ├── sse42
│ │ └── codec.c
│ │ ├── sse41
│ │ ├── codec.c
│ │ └── enc_reshuffle.c
│ │ ├── avx2
│ │ ├── enc_loop.c
│ │ └── dec_loop.c
│ │ └── neon64
│ │ └── enc_loop.c
│ └── LICENSE
├── scripts
├── README.md
├── cpu_has_bmi2.sh
├── cpu_features.sh
├── test.sh
└── build_makefile.sh
├── gtests
├── tools
│ ├── core
│ │ ├── Makefile
│ │ ├── test_boost_fss.cpp
│ │ └── test_ProcPipeStream.cpp
│ ├── tries
│ │ ├── Makefile
│ │ └── test_dict_order_gen.cpp
│ ├── succinct
│ │ ├── Makefile
│ │ └── test.sh
│ └── zbs
│ │ └── sample.txt
├── simple_test.cpp
├── common
│ ├── prefetch_test.cpp
│ └── sortable_strvec_test.cpp
├── build.sh
├── utils_test.cpp
├── zbs
│ └── zbs_mixed_len.hpp
├── index
│ └── adfa_test.cpp
├── rank_select
│ └── rank_select_few_reg_test.cpp
├── CMakeLists.txt
└── utils.hpp
├── .gitmodules
├── get-compiler-name.sh
├── cpu_features.sh
├── CHANGELOG
├── .gitlab-ci.yml
├── cpu_has_bmi2.sh
├── .gitignore
├── README.md
└── LICENSE
/tools/zbs/hive/hive.cc:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cmake/modules/FindBoost.cmake:
--------------------------------------------------------------------------------
1 | # TODO
2 |
--------------------------------------------------------------------------------
/src/terark/idx/idx_dummy_placeholder.cpp:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/core/Makefile:
--------------------------------------------------------------------------------
1 |
2 | include ../../tools/fsa/Makefile.common
3 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/.gitignore:
--------------------------------------------------------------------------------
1 | # make install artefact
2 | libzstd.pc
3 |
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | - `Makefile` and `build_makefile.sh` are deprecated
2 |
--------------------------------------------------------------------------------
/tools/fsa/test-3.txt:
--------------------------------------------------------------------------------
1 | 0000AAAAXXXX
2 | 0000AAAAYYYY
3 | 0000AAAAZZZZ
4 |
--------------------------------------------------------------------------------
/tools/fsa/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := fsa
3 |
4 | include Makefile.common
5 |
--------------------------------------------------------------------------------
/tools/zbs/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := zbs fsa
3 |
4 | include ../fsa/Makefile.common
5 |
--------------------------------------------------------------------------------
/tests/tries/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := fsa
3 |
4 | include ../../tools/fsa/Makefile.common
5 |
--------------------------------------------------------------------------------
/tools/general/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_BIN_USE_STATIC_LIB ?= 1
3 |
4 | include ../fsa/Makefile.common
5 |
--------------------------------------------------------------------------------
/3rdparty/base64/test/codec_supported.h:
--------------------------------------------------------------------------------
1 | extern char **codecs;
2 |
3 | int codec_supported (int flags);
4 |
--------------------------------------------------------------------------------
/src/terark/io/ZcMemMap.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/topling/topling-zip/HEAD/src/terark/io/ZcMemMap.cpp
--------------------------------------------------------------------------------
/tests/entropy/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := fsa zbs
3 |
4 | include ../../tools/fsa/Makefile.common
5 |
--------------------------------------------------------------------------------
/gtests/tools/core/Makefile:
--------------------------------------------------------------------------------
1 |
2 | SRCS += $(wildcard *.cpp)
3 |
4 | include ../../tools/fsa/Makefile.common
5 |
--------------------------------------------------------------------------------
/src/terark/util/hugepage.cpp:
--------------------------------------------------------------------------------
1 | #include "hugepage.hpp"
2 |
3 | namespace terark {
4 |
5 | } // namespace terark
6 |
--------------------------------------------------------------------------------
/tools/configure/glibc_memcpy_fix.h:
--------------------------------------------------------------------------------
1 | /* gcc flag: -include "" */
2 | __asm__(".symver memcpy,memcpy@GLIBC_2.2.5");
3 |
--------------------------------------------------------------------------------
/tools/fsa/test-1.txt:
--------------------------------------------------------------------------------
1 | a
2 | b
3 | c
4 | ddd
5 | eee
6 | fff
7 | ggg
8 | hhhh
9 | iiii
10 | jjjjj
11 | kkkkk
12 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "boost-include"]
2 | path = boost-include
3 | url = https://github.com/rockeet/boost-fiber-leipeng
4 |
--------------------------------------------------------------------------------
/3rdparty/base64/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | bin/base64
3 | lib/config.h
4 | lib/table_generator
5 | test/benchmark
6 | test/test_base64
7 |
--------------------------------------------------------------------------------
/3rdparty/base64/base64-benchmarks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/topling/topling-zip/HEAD/3rdparty/base64/base64-benchmarks.png
--------------------------------------------------------------------------------
/src/terark/io/todo/DataIO_Polymorphic.hpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/topling/topling-zip/HEAD/src/terark/io/todo/DataIO_Polymorphic.hpp
--------------------------------------------------------------------------------
/gtests/tools/tries/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := fsa
3 |
4 | SRCS += $(wildcard *.cpp)
5 |
6 | include ../../tools/fsa/Makefile.common
7 |
--------------------------------------------------------------------------------
/gtests/simple_test.cpp:
--------------------------------------------------------------------------------
1 | #include "gtest/gtest.h"
2 |
3 | TEST(UTILS_TEST, FILE_EXISTS) {
4 | std::cout << "this is a demo test" << std::endl;
5 | }
6 |
--------------------------------------------------------------------------------
/tools/fsa/test-2.txt:
--------------------------------------------------------------------------------
1 | 00##
2 | 08##
3 | 16##
4 | 24##
5 | 32##
6 | 40##
7 | 48##
8 | 56##
9 | 64##
10 | 72##
11 | 80##
12 | 88##
13 | 96##
14 |
--------------------------------------------------------------------------------
/gtests/tools/succinct/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := fsa
3 |
4 | SRCS += $(wildcard *.cpp)
5 | #SRCS += $(wildcard extra/*.cpp)
6 |
7 | include ../../tools/fsa/Makefile.common
8 |
--------------------------------------------------------------------------------
/src/terark/fsa/fsa_cache.cpp:
--------------------------------------------------------------------------------
1 | #include "fsa_cache_detail.hpp"
2 |
3 | namespace terark {
4 |
5 | FSA_Cache::~FSA_Cache() {
6 | }
7 |
8 | } // namespace terark
9 |
10 |
--------------------------------------------------------------------------------
/tests/succinct/Makefile:
--------------------------------------------------------------------------------
1 |
2 | TERARK_EXT_LIBS := fsa
3 |
4 | EXE_SRCS += $(wildcard *.cpp)
5 | #EXE_SRCS += $(wildcard extra/*.cpp)
6 |
7 | include ../../tools/fsa/Makefile.common
8 |
--------------------------------------------------------------------------------
/3rdparty/base64/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 |
3 | compiler:
4 | - clang
5 | - gcc
6 |
7 | script:
8 | - SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make -C test
9 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/dll/example/fullbench-dll.vcxproj.user:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/src/terark/util/cpu_prefetch.hpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-07-16.
3 | //
4 | #pragma once
5 |
6 | #include
7 |
8 | #define TERARK_CPU_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
9 |
--------------------------------------------------------------------------------
/tools/zbs/bmq/bmq.cc:
--------------------------------------------------------------------------------
1 | #include
2 |
3 |
4 | /**
5 | * Simple test case for bytedance message queue streaming compression & decompression.
6 | * @return
7 | */
8 | int main() {
9 |
10 | return 0;
11 | }
--------------------------------------------------------------------------------
/3rdparty/base64/lib/exports.txt:
--------------------------------------------------------------------------------
1 | trk_base64_encode
2 | trk_base64_stream_encode
3 | trk_base64_stream_encode_init
4 | trk_base64_stream_encode_final
5 | trk_base64_decode
6 | trk_base64_stream_decode
7 | trk_base64_stream_decode_init
8 |
--------------------------------------------------------------------------------
/gtests/common/prefetch_test.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "utils.h"
6 |
7 | namespace terark {
8 |
9 | TEST(PREFETCH, SIMPLE_TEST) {
10 |
11 | }
12 | }
13 |
14 |
--------------------------------------------------------------------------------
/src/terark/fsa/fsa_ext.hpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-05-07.
3 | //
4 | #pragma once
5 | #include "fsa.hpp"
6 |
7 | namespace terark {
8 | TERARK_DLL_EXPORT size_t dfa_write_text(const BaseDFA* dfa, FILE*);
9 | }
10 |
--------------------------------------------------------------------------------
/get-compiler-name.sh:
--------------------------------------------------------------------------------
1 |
2 | dir=`dirname "$0"`
3 | cd $dir
4 | if [ -z "$CXX" ]; then
5 | CXX=g++
6 | fi
7 | tmpfile=$(mktemp -u compiler-XXXXXX)
8 | ${CXX} tools/configure/compiler.cpp -o ${tmpfile}.exe && ./${tmpfile}.exe && rm -f ${tmpfile}*
9 |
--------------------------------------------------------------------------------
/src/terark/io/discard/DataInput.cpp:
--------------------------------------------------------------------------------
1 | #include "terark/io/DataInput.hpp"
2 |
3 | namespace terark {
4 |
5 | namespace serialization { namespace polymorphic {
6 |
7 |
8 | } } // serialization::polymorphic
9 |
10 | } // namespace terark
11 |
--------------------------------------------------------------------------------
/scripts/cpu_has_bmi2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | mydir=`dirname $0`
4 | $mydir/cpu_features.sh | grep -qs bmi2
5 | bmi2_status=${PIPESTATUS[1]}
6 | if [ $bmi2_status -eq 0 ] # 0 indicate success
7 | then
8 | echo 1
9 | else
10 | echo 0
11 | fi
12 |
13 |
--------------------------------------------------------------------------------
/cpu_features.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ `uname` == Darwin ]; then
4 | sysctl -n machdep.cpu.features | tr 'A-Z' 'a-z' | sed -E 's/[[:space:]]+/'$'\\\n/g'
5 | else
6 | cat /proc/cpuinfo | sed -n '/^flags\s*:\s*/s/^[^:]*:\s*//p' | uniq | tr 'A-Z' 'a-z' | sed 's/\s\+/\n/g'
7 | fi
8 |
--------------------------------------------------------------------------------
/scripts/cpu_features.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ `uname` == Darwin ]; then
4 | sysctl -n machdep.cpu.features | tr 'A-Z' 'a-z' | sed -E 's/[[:space:]]+/'$'\\\n/g'
5 | else
6 | cat /proc/cpuinfo | sed -n '/^flags\s*:\s*/s/^[^:]*:\s*//p' | uniq | tr 'A-Z' 'a-z' | sed 's/\s\+/\n/g'
7 | fi
8 |
--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -e # exit on error
4 |
5 | if [ `uname` == Darwin ]; then
6 | cpuNum=`sysctl -n machdep.cpu.thread_count`
7 | else
8 | cpuNum=`nproc`
9 | fi
10 |
11 | make -j$cpuNum test
12 |
13 | # more test cases under google test framework
14 | ./gtest.sh
15 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/sse2/compare_macros.h:
--------------------------------------------------------------------------------
1 | #define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n))
2 | #define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n))
3 | #define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n))
4 | #define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1))
5 |
--------------------------------------------------------------------------------
/src/terark/io/var_int_declare_read.hpp:
--------------------------------------------------------------------------------
1 | uint32_t read_var_uint32();
2 | uint32_t read_var_uint30();
3 | uint64_t read_var_uint64();
4 | uint64_t read_var_uint61();
5 | int32_t read_var_int32();
6 | int32_t read_var_int30();
7 | int64_t read_var_int64();
8 | int64_t read_var_int61();
9 | void read_string(std::string& str);
10 |
11 |
--------------------------------------------------------------------------------
/tools/general/test-tpjoin.txt:
--------------------------------------------------------------------------------
1 | a A 0 00
2 | b B 1 11
3 | c C 2 22
4 | d D 3 33
5 | e E 4 44
6 | f F 5 55
7 | g G 6 66
8 | h H 7 77
9 | i I 8 88
10 | j J 9 99
11 | k K 0 00
12 | l L A aa
13 | m M B bb
14 | n N C cc
15 | o O D dd
16 | p P E ee
17 | q Q F ff
18 | r R G gg
19 | s S H hh
20 | t T I ii
21 | u U J jj
22 | v V K kk
23 | w W L ll
24 |
--------------------------------------------------------------------------------
/src/terark/parallel_lib.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #if defined(TOPLING_ENABLE_PARALLEL_ALGO) && defined(__GNUC__) && __GNUC__ * 1000 + __GNUC_MINOR__ >= 4007
4 | #include
5 | #define terark_parallel_sort __gnu_parallel::sort
6 | #else
7 | #include
8 | #define terark_parallel_sort std::sort
9 | #endif
10 |
11 |
--------------------------------------------------------------------------------
/tools/general/revline.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | int main(int argc, char* argv[]) {
6 | terark::LineBuf line;
7 | FILE* fp = stdin;
8 | while (line.getline(fp) >= 0) {
9 | line.chomp();
10 | std::reverse(line.begin(), line.end());
11 | printf("%s\n", line.p);
12 | }
13 | return 0;
14 | }
15 |
--------------------------------------------------------------------------------
/tools/fsa/test_nlt.sh:
--------------------------------------------------------------------------------
1 |
2 | for f in test*.txt; do
3 | for ((nl=1; nl < 4; nl++)); do
4 | env LD_LIBRARY_PATH=../../build/Linux-x86_64-g++-6.3-bmi2-1/lib \
5 | dbg/nlt_build.exe -n $nl -o $f.nlt $f
6 | if ! diff <(LC_ALL=C sort $f) <(dbg/dfa_text.exe $f.nlt); then
7 | echo Fail on text file $f 1>&2
8 | exit
9 | fi
10 | done
11 | done
12 |
--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | ## 2020-12-11
2 | - Refine code style and add License file, ready to open source
3 | - Update README
4 |
5 | ## 2020-11-06
6 | - Rename terark-core to terark-zip
7 | - Add gtest for unit tests
8 | - Structure refactoring
9 |
10 | ## 2019-10-31
11 | - submit the first version of terark-core as a standalone product, will add more API documents and test cases in the future.
12 |
--------------------------------------------------------------------------------
/tools/configure/compiler.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int main() {
4 | #ifdef __clang_major__
5 | printf("clang-%d.%d", __clang_major__, __clang_minor__);
6 | #elif defined(__INTEL_COMPILER)
7 | printf("icc-%d.%d", __INTEL_COMPILER/100, __INTEL_COMPILER%100);
8 | #elif defined(__GNUC__)
9 | printf("g++-%d.%d", __GNUC__, __GNUC_MINOR__);
10 | #endif
11 | return 0;
12 | }
13 |
14 |
--------------------------------------------------------------------------------
/src/terark/fsa/ppi/state_move_fast.hpp:
--------------------------------------------------------------------------------
1 |
2 | public:
3 |
4 | struct StateMoveContext {};
5 |
6 | transition_t
7 | state_move_fast(size_t parent, auchar_t ch, StateMoveContext)
8 | const {
9 | return state_move(parent, ch);
10 | }
11 |
12 | transition_t
13 | state_move_slow(size_t parent, auchar_t ch, StateMoveContext)
14 | const {
15 | return state_move(parent, ch);
16 | }
17 |
18 |
19 |
--------------------------------------------------------------------------------
/gtests/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ex
3 |
4 | BASE_DIR=`pwd`
5 | if [ `uname` == Darwin ]; then
6 | cpuNum=`sysctl -n machdep.cpu.thread_count`
7 | else
8 | cpuNum=`nproc`
9 | fi
10 |
11 | # Build terark-core libraries
12 | cd ../ && ./build.sh
13 |
14 | # Build test cases under gtests
15 | rm -rf $BASE_DIR/build && mkdir -p $BASE_DIR/build
16 | cd $BASE_DIR/build && cmake ../ && make -j $cpuNum
--------------------------------------------------------------------------------
/src/terark/easy_use_hash_map.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "gold_hash_map.hpp"
4 | #include "hash_strmap.hpp"
5 |
6 | namespace terark {
7 |
8 | template
9 | class easy_use_hash_map : public gold_hash_map {};
10 |
11 | template
12 | class easy_use_hash_map : public hash_strmap {};
13 |
14 | } // namespace terark
15 |
--------------------------------------------------------------------------------
/src/terark/util/truncate_file.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | namespace terark {
6 | TERARK_DLL_EXPORT
7 | void truncate_file(const char* fpath, unsigned long long size);
8 |
9 | template
10 | inline
11 | void truncate_file(const String& fpath, unsigned long long size) {
12 | assert(fpath.data()[fpath.size()] == '\0');
13 | truncate_file(fpath.data(), size);
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/src/terark/rank_select.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "succinct/rank_select_simple.hpp"
4 | #include "succinct/rank_select_se_256.hpp"
5 | #include "succinct/rank_select_il_256.hpp"
6 | #include "succinct/rank_select_se_512.hpp"
7 | #include "succinct/rank_select_mixed_il_256.hpp"
8 | #include "succinct/rank_select_mixed_xl_256.hpp"
9 | #include "succinct/rank_select_mixed_se_512.hpp"
10 | #include "succinct/rank_select_few.hpp"
11 |
--------------------------------------------------------------------------------
/src/terark/io/readv_writev.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 |
6 | namespace terark {
7 |
8 | TERARK_DLL_EXPORT ssize_t easy_readv(int fd, iovec*, int num, int* next_idx);
9 | TERARK_DLL_EXPORT ssize_t easy_writev(int fd, iovec*, int num, int* next_idx);
10 |
11 | inline bool iovec_finished(const iovec* iov, int num) {
12 | return 0 == iov[num-1].iov_len;
13 | }
14 |
15 | } // namespace terark
16 |
--------------------------------------------------------------------------------
/src/terark/io/var_int_declare_write.hpp:
--------------------------------------------------------------------------------
1 | void write_var_uint32(uint32_t x);
2 | void write_var_uint30(uint32_t x);
3 | void write_var_uint64(uint64_t x);
4 | void write_var_uint61(uint64_t x);
5 | void write_var_int32(int32_t x);
6 | void write_var_int30(int32_t x);
7 | void write_var_int64(int64_t x);
8 | void write_var_int61(int64_t x);
9 | void write_string(const std::string& str);
10 | // void write_string(const char* str, size_t len);
11 |
12 |
--------------------------------------------------------------------------------
/tests/succinct/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #set -x
4 |
5 | BASE=`cd ../../..;pwd`
6 | export PKG_TERARK_HOME=$BASE/topling-rocks/pkg/topling-rocks-Linux-x86_64-g++-4.8-bmi2-0
7 | export LD_LIBRARY_PATH=$PKG_TERARK_HOME/lib:$LD_LIBRARY_PATH
8 |
9 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.hpp .
10 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.cpp .
11 |
12 | make clean
13 | make -j4
14 |
15 | ./dbg/rs_fewzero_ut.exe
16 | ./dbg/rank_select_unit_test.exe
17 |
--------------------------------------------------------------------------------
/src/terark/fsa/ppi/dawg_dfa_mmap.hpp:
--------------------------------------------------------------------------------
1 | void finish_load_mmap(const DFA_MmapHeader* base) override {
2 | super::finish_load_mmap(base);
3 | this->is_compiled = true;
4 | this->n_words = size_t(base->dawg_num_words);
5 | this->m_is_dag = true;
6 | }
7 |
8 | long prepare_save_mmap(DFA_MmapHeader* base, const void** dataPtrs)
9 | const override {
10 | super::prepare_save_mmap(base, dataPtrs);
11 | base->dawg_num_words = this->n_words;
12 | base->is_dag = true;
13 | return 0;
14 | }
15 |
16 |
--------------------------------------------------------------------------------
/gtests/tools/succinct/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #set -x
4 |
5 | BASE=`cd ../../..;pwd`
6 | export PKG_TERARK_HOME=$BASE/terark-zip-rocksdb/pkg/terark-zip-rocksdb-Linux-x86_64-g++-4.8-bmi2-0
7 | export LD_LIBRARY_PATH=$PKG_TERARK_HOME/lib:$LD_LIBRARY_PATH
8 |
9 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.hpp .
10 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.cpp .
11 |
12 | make clean
13 | make -j4
14 |
15 | ./dbg/rs_fewzero_ut.exe
16 | ./dbg/rank_select_unit_test.exe
17 |
--------------------------------------------------------------------------------
/src/terark/fsa/fsa_cache.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 |
6 | namespace terark {
7 |
8 | class TERARK_DLL_EXPORT FSA_Cache {
9 | public:
10 | virtual ~FSA_Cache();
11 | virtual bool has_fsa_cache() const = 0;
12 | virtual bool build_fsa_cache(double cacheRatio, const char* walkMethod)=0;
13 | virtual void print_fsa_cache_stat(FILE*) const = 0;
14 | };
15 |
16 | class NTD_CacheTrie; // forward declaration
17 |
18 | } // namespace terark
19 |
20 |
--------------------------------------------------------------------------------
/src/terark/util/checksum_exception.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | namespace terark {
8 |
9 | class TERARK_DLL_EXPORT BadChecksumException : public std::logic_error {
10 | typedef std::logic_error super;
11 | public:
12 | uint64_t m_old;
13 | uint64_t m_new;
14 | ~BadChecksumException();
15 | BadChecksumException(fstring msg, uint64_t Old, uint64_t New);
16 | };
17 |
18 | } // terark
19 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/libzstd.pc.in:
--------------------------------------------------------------------------------
1 | # ZSTD - standard compression algorithm
2 | # Copyright (C) 2014-2016, Yann Collet, Facebook
3 | # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
4 |
5 | prefix=@PREFIX@
6 | exec_prefix=${prefix}
7 | includedir=${prefix}/@INCLUDEDIR@
8 | libdir=${exec_prefix}/@LIBDIR@
9 |
10 | Name: zstd
11 | Description: fast lossless compression algorithm library
12 | URL: http://www.zstd.net/
13 | Version: @VERSION@
14 | Libs: -L${libdir} -lzstd
15 | Cflags: -I${includedir}
16 |
--------------------------------------------------------------------------------
/tests/zbs/sample.txt:
--------------------------------------------------------------------------------
1 | ncurses is keg-only, which means it was not symlinked into /usr/local,
2 | because macOS already provides this software and installing another version in
3 | parallel can cause all kinds of trouble.
4 |
5 | If you need to have ncurses first in your PATH run:
6 | echo 'export PATH="/usr/local/opt/ncurses/bin:$PATH"' >> ~/.zshrc
7 |
8 | For compilers to find ncurses you may need to set:
9 | export LDFLAGS="-L/usr/local/opt/ncurses/lib"
10 | export CPPFLAGS="-I/usr/local/opt/ncurses/include"
11 | 123
12 | 456
13 | 789
14 |
--------------------------------------------------------------------------------
/gtests/tools/zbs/sample.txt:
--------------------------------------------------------------------------------
1 | ncurses is keg-only, which means it was not symlinked into /usr/local,
2 | because macOS already provides this software and installing another version in
3 | parallel can cause all kinds of trouble.
4 |
5 | If you need to have ncurses first in your PATH run:
6 | echo 'export PATH="/usr/local/opt/ncurses/bin:$PATH"' >> ~/.zshrc
7 |
8 | For compilers to find ncurses you may need to set:
9 | export LDFLAGS="-L/usr/local/opt/ncurses/lib"
10 | export CPPFLAGS="-I/usr/local/opt/ncurses/include"
11 | 123
12 | 456
13 | 789
14 |
--------------------------------------------------------------------------------
/tools/fsa/dfa_text.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-05-07.
3 | //
4 |
5 | #include
6 |
7 | int main(int argc, char* argv[]) {
8 | using namespace terark;
9 | if (argc < 2) {
10 | fprintf(stderr, "usage: %s dfa-file\n", argv[0]);
11 | return 1;
12 | }
13 | try {
14 | std::unique_ptr dfa(BaseDFA::load_from(argv[1]));
15 | dfa_write_text(dfa.get(), stdout);
16 | return 0;
17 | }
18 | catch (const std::exception&) {
19 | return 2;
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/tools/general/deserial_strseq.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | int main(int argc, char* argv[]) {
7 | using namespace terark;
8 | NonOwnerFileStream fstdin(stdin);
9 | NativeDataInput dio(&fstdin);
10 | valvec buf;
11 | try {
12 | while (true) {
13 | dio >> buf;
14 | printf("%.*s\n", int(buf.size()), buf.data());
15 | }
16 | }
17 | catch (const EndOfFileException&) {
18 | }
19 | return 0;
20 | }
21 |
22 |
--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | stages:
2 | - build_and_test
3 | - benchmark
4 |
5 | compile:
6 | stage: build_and_test
7 | script:
8 | - echo `pwd` && ls -lh
9 | - echo $CI_COMMIT_SHA && git checkout $CI_COMMIT_SHA
10 | - git submodule update --init --recursive
11 | - mkdir build && cd build && cmake ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DWITH_TESTS=ON
12 | - make -j $(nproc)
13 | tags:
14 | - terark
15 |
16 | test:
17 | stage: build_and_test
18 | script:
19 | - echo ""
20 | only:
21 | refs:
22 | - stage
23 | tags:
24 | - terark
25 |
--------------------------------------------------------------------------------
/tests/core/never-add-stdvec-writer.hpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | int main() {
6 | using namespace terark;
7 | NativeDataOutput > writer;
8 | writer << 1;
9 | writer << std::string("abc");
10 |
11 | NativeDataInput reader;
12 | int i;
13 | std::string s;
14 | reader >> i;
15 | reader >> s;
16 | assert(1 == i);
17 | assert("abc" == s);
18 |
19 | printf("%s done\n", argv[0]);
20 | return 0;
21 | }
22 |
--------------------------------------------------------------------------------
/cpu_has_bmi2.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | if [ -z "$CXX" ]; then
4 | CXX=g++
5 | fi
6 | if [ -z "$TMPDIR" ]; then
7 | TMPDIR=/tmp
8 | fi
9 | tmpfile=$(mktemp -u ${TMPDIR}/detect_bmi2-XXXXXX)
10 | if [ -z "$CPU" ]; then
11 | # default bmi2 flags is native
12 | CPU=-march=native
13 | fi
14 | cat > ${tmpfile}.cpp << EOF
15 | #include
16 | int main() {
17 | #ifdef __BMI2__
18 | printf("1");
19 | #else
20 | printf("0");
21 | #endif
22 | return 0;
23 | }
24 | EOF
25 | ${CXX} ${CPU} ${tmpfile}.cpp -o ${tmpfile} && ${tmpfile} && rm -f ${tmpfile}*
26 |
--------------------------------------------------------------------------------
/3rdparty/base64/test/codec_supported.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "../include/libbase64.h"
4 |
5 | static char *_codecs[] =
6 | { "AVX2"
7 | , "NEON32"
8 | , "NEON64"
9 | , "plain"
10 | , "SSSE3"
11 | , "SSE41"
12 | , "SSE42"
13 | , "AVX"
14 | , NULL
15 | } ;
16 |
17 | char **codecs = _codecs;
18 |
19 | int
20 | codec_supported (int flags)
21 | {
22 | // Check if given codec is supported by trying to decode a test string:
23 | char *a = "aGVsbG8=";
24 | char b[10];
25 | size_t outlen;
26 |
27 | return (trk_base64_decode(a, strlen(a), b, &outlen, flags) != -1);
28 | }
29 |
--------------------------------------------------------------------------------
/gtests/utils_test.cpp:
--------------------------------------------------------------------------------
1 | #include "gtest/gtest.h"
2 | #include "utils.hpp"
3 |
4 | TEST(UTILS_TEST, FILE_EXISTS) {
5 | std::cout << 0 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/123") << std::endl;
6 | std::cout << 1 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/README.md") << std::endl;
7 | std::cout << 1 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/CmakeLists.txt") << std::endl;
8 | std::cout << 0 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/not_exist") << std::endl;
9 | }
10 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/codec.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "../../../include/libbase64.h"
6 | #include "../../codecs.h"
7 |
8 | BASE64_ENC_FUNCTION(plain)
9 | {
10 | #include "enc_head.c"
11 | #if BASE64_WORDSIZE == 32
12 | #include "32/enc_loop.c"
13 | #elif BASE64_WORDSIZE == 64
14 | #include "64/enc_loop.c"
15 | #endif
16 | #include "enc_tail.c"
17 | }
18 |
19 | BASE64_DEC_FUNCTION(plain)
20 | {
21 | #include "dec_head.c"
22 | #if BASE64_WORDSIZE >= 32
23 | #include "32/dec_loop.c"
24 | #endif
25 | #include "dec_tail.c"
26 | }
27 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/neon32/enc_loop.c:
--------------------------------------------------------------------------------
1 | // If we have ARM NEON support, pick off 48 bytes at a time:
2 | while (srclen >= 48)
3 | {
4 | uint8x16x3_t str;
5 | uint8x16x4_t res;
6 |
7 | // Load 48 bytes and deinterleave:
8 | str = vld3q_u8((uint8_t *)c);
9 |
10 | // Reshuffle:
11 | res = enc_reshuffle(str);
12 |
13 | // Translate reshuffled bytes to the Base64 alphabet:
14 | res = enc_translate(res);
15 |
16 | // Interleave and store result:
17 | vst4q_u8((uint8_t *)o, res);
18 |
19 | c += 48; // 3 * 16 bytes of input
20 | o += 64; // 4 * 16 bytes of output
21 | outl += 64;
22 | srclen -= 48;
23 | }
24 |
--------------------------------------------------------------------------------
/tools/fsa/test-patricia.txt:
--------------------------------------------------------------------------------
1 | 00123456789abcdefgh
2 | 00123456789abcdefgh 0
3 | 00123456789abcdefgh 1
4 | 00123456789abcdefgh 2
5 | 00123456789abcdefgh 3
6 | 00123456789abcdefgh 4
7 | 00123456789abcdefgh 5
8 | 00123456789abcdefgh 6
9 | 00123456789abcdefgh 7
10 | 00123456789abcdefgh 8
11 | 00123456789abcdefgh 9
12 | 00123456789abcdefgh a
13 | 00123456789abcdefgh b
14 | 00123456789abcdefgh c
15 | 00123456789abcdefgh d
16 | 00123456789abcdefgh e
17 | 00123456789abcdefgh f
18 | 00123456789abcdefgh g
19 | 00123456789abcdefgh h
20 | 00123456789abcdefgh h final is not
21 | 00123456789abcdefgh h final is
22 | 00123456789abcdefgh h final ix
--------------------------------------------------------------------------------
/src/terark/io/todo/inter_thread_pipe.hpp:
--------------------------------------------------------------------------------
1 | #ifndef __terark_thread_LockFreeQueue_H__
2 | #define __terark_thread_LockFreeQueue_H__
3 |
4 | namespace thread {
5 |
6 | class inter_thread_pipe_impl;
7 |
8 | class inter_thread_pipe
9 | : public RefCounter
10 | , public IInputStream
11 | , public IOutputStream
12 | {
13 | inter_thread_pipe_impl* mio;
14 | public:
15 | explicit inter_thread_pipe(size_t capacity);
16 | ~inter_thread_pipe();
17 | void eof();
18 | void read(void* vbuf, size_t length);
19 | void write(void* vbuf, size_t length);
20 | };
21 |
22 | } // namespace thread
23 |
24 | #endif // __terark_thread_LockFreeQueue_H__
25 |
26 |
27 |
--------------------------------------------------------------------------------
/src/terark/util/stdptr.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | namespace terark {
5 |
6 | // for pre c++17
7 |
8 | template
9 | std::shared_ptr as_shared_ptr(T* p) {
10 | return std::shared_ptr(p);
11 | }
12 |
13 | template
14 | std::shared_ptr as_shared_ptr(T* p, D d) {
15 | return std::shared_ptr(p, d);
16 | }
17 |
18 | template
19 | std::unique_ptr as_unique_ptr(T* p) {
20 | return std::unique_ptr(p);
21 | }
22 |
23 | template
24 | std::unique_ptr as_unique_ptr(T* p, D d) {
25 | return std::unique_ptr(p, d);
26 | }
27 |
28 |
29 | } // namespace terark
30 |
31 |
--------------------------------------------------------------------------------
/gtests/zbs/zbs_mixed_len.hpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "zbs.hpp"
4 |
5 | namespace terark {
6 | // TODO
7 | class ZBSMixedLen : ZBS {
8 | // public:
9 | // ZBSMixedLen() {
10 | // MixedLenBlobStore::MyBuilder mlbuilder(fixedLen, varLenSize, varLenCnt,
11 | // nlt_fname, 0, checksumLevel,
12 | // checksumType);
13 | // }
14 | // ~ZBSMixedLen() {}
15 |
16 | // public:
17 | // void add_record(const std::string &record) {}
18 |
19 | // private:
20 | // MixedLenBlobStore::MyBuilder builder_;
21 | };
22 | } // namespace terark
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/enc_tail.c:
--------------------------------------------------------------------------------
1 | if (srclen-- == 0) {
2 | break;
3 | }
4 | *o++ = base64_table_enc[*c >> 2];
5 | st.carry = (*c++ << 4) & 0x30;
6 | st.bytes++;
7 | outl += 1;
8 |
9 | case 1: if (srclen-- == 0) {
10 | break;
11 | }
12 | *o++ = base64_table_enc[st.carry | (*c >> 4)];
13 | st.carry = (*c++ << 2) & 0x3C;
14 | st.bytes++;
15 | outl += 1;
16 |
17 | case 2: if (srclen-- == 0) {
18 | break;
19 | }
20 | *o++ = base64_table_enc[st.carry | (*c >> 6)];
21 | *o++ = base64_table_enc[*c++ & 0x3F];
22 | st.bytes = 0;
23 | outl += 2;
24 | }
25 | }
26 | state->bytes = st.bytes;
27 | state->carry = st.carry;
28 | *outlen = outl;
29 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/ssse3/enc_loop.c:
--------------------------------------------------------------------------------
1 | // If we have SSSE3 support, pick off 12 bytes at a time for as long as we can.
2 | // But because we read 16 bytes at a time, ensure we have enough room to do a
3 | // full 16-byte read without segfaulting:
4 | while (srclen >= 16)
5 | {
6 | // Load string:
7 | __m128i str = _mm_loadu_si128((__m128i *)c);
8 |
9 | // Reshuffle:
10 | str = enc_reshuffle(str);
11 |
12 | // Translate reshuffled bytes to the Base64 alphabet:
13 | str = enc_translate(str);
14 |
15 | // Store:
16 | _mm_storeu_si128((__m128i *)o, str);
17 |
18 | c += 12; // 3 * 4 bytes of input
19 | o += 16; // 4 * 4 bytes of output
20 | outl += 16;
21 | srclen -= 12;
22 | }
23 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/enc_head.c:
--------------------------------------------------------------------------------
1 | // Assume that *out is large enough to contain the output.
2 | // Theoretically it should be 4/3 the length of src.
3 | const uint8_t *c = (const uint8_t *)src;
4 | uint8_t *o = (uint8_t *)out;
5 |
6 | // Use local temporaries to avoid cache thrashing:
7 | size_t outl = 0;
8 | struct base64_state st;
9 | st.bytes = state->bytes;
10 | st.carry = state->carry;
11 |
12 | // Turn three bytes into four 6-bit numbers:
13 | // in[0] = 00111111
14 | // in[1] = 00112222
15 | // in[2] = 00222233
16 | // in[3] = 00333333
17 |
18 | // Duff's device, a for() loop inside a switch() statement. Legal!
19 | switch (st.bytes)
20 | {
21 | for (;;)
22 | {
23 | case 0:
24 |
--------------------------------------------------------------------------------
/src/terark/thread/mutex.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 | #if defined(TERARK_WITH_TBB)
6 | #include
7 | #endif
8 |
9 | namespace terark {
10 |
11 | #if defined(TERARK_WITH_TBB)
12 | #if TERARK_WITH_TBB+1 >= 2+1
13 | class TERARK_DLL_EXPORT spin_mutex : boost::noncopyable {
14 | unsigned char m_is_locked;
15 | public:
16 | spin_mutex() : m_is_locked(0) {}
17 | void lock();
18 | void unlock();
19 | };
20 | #else
21 | using tbb::spin_mutex;
22 | #endif
23 | #else
24 | typedef std::mutex spin_mutex;
25 | #endif
26 |
27 | } // namespace terark
28 |
--------------------------------------------------------------------------------
/src/terark/io/StdvecWriter.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #pragma error Never Try to add such a file StdvecWriter
4 |
5 | #include // for size_t
6 |
7 | namespace terark {
8 |
9 | template
10 | class StdvecWriter : public Stdvec {
11 | public:
12 | typedef typename Stdvec::value_type value_type;
13 | static_assert(sizeof(value_type) == 1, "value_type must be 1 byte");
14 |
15 | using Stdvec::Stdvec;
16 | void ensureWrite(const void* buf, size_t len) {
17 | this->insert(this->end(), (const value_type*)(buf), len);
18 | }
19 |
20 | void writeByte(unsigned char b) {
21 | this->push_back(b);
22 | }
23 | };
24 |
25 | } // namespace terark
26 |
--------------------------------------------------------------------------------
/src/terark/io/DataInput_VarIntAsFixLen.hpp:
--------------------------------------------------------------------------------
1 | MyType& operator>>(var_int32_t & x) { return *this >> x.t; }
2 | MyType& operator>>(var_uint32_t& x) { return *this >> x.t; }
3 |
4 | #if !defined(BOOST_NO_INT64_T)
5 | MyType& operator>>(var_int64_t & x) { return *this >> x.t; }
6 | MyType& operator>>(var_uint64_t& x) { return *this >> x.t; }
7 | #endif
8 | MyType& operator>>(serialize_version_t& x) { return *this >> x.t; }
9 |
10 | MyType& operator>>(var_int30_t & x) { return *this >> x.t; }
11 | MyType& operator>>(var_uint30_t& x) { return *this >> x.t; }
12 |
13 | #if !defined(BOOST_NO_INT64_T)
14 | MyType& operator>>(var_int61_t & x) { return *this >> x.t; }
15 | MyType& operator>>(var_uint61_t& x) { return *this >> x.t; }
16 | #endif
17 |
18 |
--------------------------------------------------------------------------------
/src/terark/util/vm_util.hpp:
--------------------------------------------------------------------------------
1 | // created by leipeng 2022-07-21 09:48, all rights reserved
2 | #pragma once
3 | #if defined(_MSC_VER)
4 | // nothing
5 | #else
6 | #include
7 | #endif
8 | #include
9 |
10 | namespace terark {
11 |
12 | constexpr size_t VM_PAGE_SIZE = 4096;
13 |
14 | #if defined(_MSC_VER)
15 | constexpr bool g_has_madv_populate = true;
16 | constexpr size_t g_min_prefault_pages = 1;
17 | #else
18 | TERARK_DLL_EXPORT extern const int g_linux_kernel_version;
19 | TERARK_DLL_EXPORT extern const bool g_has_madv_populate;
20 | TERARK_DLL_EXPORT extern const size_t g_min_prefault_pages;
21 | #endif
22 |
23 | TERARK_DLL_EXPORT void vm_prefetch(const void* addr, size_t len, size_t min_pages);
24 |
25 | } // namespace terark
26 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/ssse3/dec_reshuffle.c:
--------------------------------------------------------------------------------
1 | static inline __m128i
2 | dec_reshuffle (__m128i in)
3 | {
4 | // Mask in a single byte per shift:
5 | const __m128i maskB2 = _mm_set1_epi32(0x003F0000);
6 | const __m128i maskB1 = _mm_set1_epi32(0x00003F00);
7 |
8 | // Pack bytes together:
9 | __m128i out = _mm_srli_epi32(in, 16);
10 |
11 | out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2));
12 |
13 | out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12));
14 |
15 | out = _mm_or_si128(out, _mm_slli_epi32(in, 26));
16 |
17 | // Reshuffle and repack into 12-byte output format:
18 | return _mm_shuffle_epi8(out, _mm_setr_epi8(
19 | 3, 2, 1,
20 | 7, 6, 5,
21 | 11, 10, 9,
22 | 15, 14, 13,
23 | -1, -1, -1, -1));
24 | }
25 |
--------------------------------------------------------------------------------
/src/terark/io/file_load_save.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | namespace terark {
8 |
9 | template
10 | void native_load_file(const char* fname, Object* obj) {
11 | assert(NULL != fname);
12 | assert(NULL != obj);
13 | FileStream file(fname, "rb");
14 | NativeDataInput dio; dio.attach(&file);
15 | Object tmp;
16 | dio >> tmp;
17 | obj->swap(tmp);
18 | }
19 |
20 | template
21 | void native_save_file(const char* fname, const Object& obj) {
22 | assert(NULL != fname);
23 | FileStream file(fname, "wb");
24 | NativeDataOutput dio; dio.attach(&file);
25 | dio << obj;
26 | }
27 |
28 | } // namespace terark
29 |
--------------------------------------------------------------------------------
/src/terark/thread/fiber_aio.hpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-08-22.
3 | //
4 | #pragma once
5 |
6 | #include
7 | #include // for size_t, ssize_t
8 | #include
9 | #include
10 |
11 | namespace terark {
12 |
13 | TERARK_DLL_EXPORT
14 | intptr_t fiber_aio_read(int fd, void* buf, size_t len, off_t offset);
15 |
16 | TERARK_DLL_EXPORT
17 | void fiber_aio_vm_prefetch(const void* buf, size_t len);
18 |
19 | TERARK_DLL_EXPORT
20 | intptr_t fiber_aio_write(int fd, const void* buf, size_t len, off_t offset);
21 |
22 | /// put the write to a dedicated thread to execute the write by aio
23 | TERARK_DLL_EXPORT
24 | intptr_t fiber_put_write(int fd, const void* buf, size_t len, off_t offset);
25 |
26 |
27 | } // namespace terark
28 |
--------------------------------------------------------------------------------
/src/terark/thread/futex.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | // this file should only be #include in .c/cc/cpp files
3 | #include
4 | #include /* For SYS_xxx definitions */
5 | #include
6 |
7 | inline long
8 | futex(void* uaddr, uint32_t op, uint32_t val, const timespec* timeout = NULL,
9 | void* uaddr2 = NULL, uint32_t val3 = 0) {
10 | return syscall(SYS_futex, uaddr, (unsigned long)op, (unsigned long)val,
11 | timeout, uaddr2, (unsigned long)val3);
12 | }
13 |
14 | inline long
15 | futex(void* uaddr, uint32_t op, uint32_t val, uint32_t val2,
16 | void* uaddr2 = NULL, uint32_t val3 = 0) {
17 | return syscall(SYS_futex, uaddr, (unsigned long)op, (unsigned long)val,
18 | val2, uaddr2, (unsigned long)val3);
19 | }
20 |
--------------------------------------------------------------------------------
/tools/fsa/patricia_bench.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int main(int argc, char* argv[]) {
5 | using namespace terark;
6 | std::unique_ptr dfa(MatchingDFA::load_mmap(0));
7 | fstrvecl fsv;
8 | ADFA_LexIteratorUP iter(dfa->adfa_make_iter());
9 | if (iter->seek_begin()) {
10 | do {
11 | fstring word = iter->word();
12 | fsv.push_back(word);
13 | } while (iter->incr());
14 | }
15 | for (size_t i = 0; i < fsv.size(); ++i) {
16 | fstring word = fsv[i];
17 | TERARK_VERIFY_S(iter->seek_lower_bound(word), "word = %s", word);
18 | }
19 | fprintf(stderr, "key num = %zd, key len sum = %zd\n", fsv.size(), fsv.strpool.size());
20 | return 0;
21 | }
22 |
--------------------------------------------------------------------------------
/src/terark/fsa/forward_decl.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 |
8 | namespace terark {
9 |
10 | class BaseDFA;
11 | class MatchingDFA;
12 |
13 | struct TERARK_DLL_EXPORT BaseDFADeleter {
14 | void operator()(BaseDFA*) const;
15 | void operator()(MatchingDFA*) const;
16 | };
17 |
18 | typedef std::unique_ptr BaseDFAPtr;
19 | typedef std::unique_ptr MatchingDFAPtr;
20 |
21 | TERARK_DLL_EXPORT BaseDFA* BaseDFA_load(fstring fname);
22 | TERARK_DLL_EXPORT BaseDFA* BaseDFA_load(FILE*);
23 |
24 | TERARK_DLL_EXPORT MatchingDFA* MatchingDFA_load(fstring fname);
25 | TERARK_DLL_EXPORT MatchingDFA* MatchingDFA_load(FILE*);
26 |
27 | } // namespace terark
28 |
--------------------------------------------------------------------------------
/3rdparty/base64/test/Makefile:
--------------------------------------------------------------------------------
1 | CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic
2 | ifdef OPENMP
3 | CFLAGS += -fopenmp
4 | endif
5 |
6 | TARGET := $(shell $(CC) -dumpmachine)
7 | ifneq (, $(findstring darwin, $(TARGET)))
8 | BENCH_LDFLAGS=
9 | else
10 | # default to linux, -lrt needed
11 | BENCH_LDFLAGS=-lrt
12 | endif
13 |
14 | .PHONY: clean test
15 |
16 | test: clean test_base64 benchmark
17 | ./test_base64
18 | ./benchmark
19 |
20 | test_base64: test_base64.c codec_supported.o ../lib/libbase64.o
21 | $(CC) $(CFLAGS) -o $@ $^
22 |
23 | benchmark: benchmark.c codec_supported.o ../lib/libbase64.o
24 | $(CC) $(CFLAGS) -o $@ $^ $(BENCH_LDFLAGS)
25 |
26 | ../%:
27 | make -C .. $*
28 |
29 | %.o: %.c
30 | $(CC) $(CFLAGS) -o $@ -c $<
31 |
32 | clean:
33 | rm -f benchmark test_base64 *.o
34 |
--------------------------------------------------------------------------------
/src/terark/thread/fiber_local.hpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-08-22.
3 | //
4 | #pragma once
5 | #include
6 |
7 | namespace terark {
8 |
9 | template
10 | class recycle_pool {
11 | valvec m_free;
12 | static_assert(std::is_move_constructible >::value, "valvec must be move constructible");
13 | static_assert(std::is_move_constructible::value, "T must be move constructible");
14 | public:
15 | T get() {
16 | if (m_free.size()) {
17 | return m_free.pop_val();
18 | }
19 | else {
20 | return T();
21 | }
22 | }
23 | void put(T&& p) {
24 | m_free.emplace_back(std::move(p));
25 | }
26 | };
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/terark/fsa/fsa_ext.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-05-07.
3 | //
4 |
5 | #include "fsa_ext.hpp"
6 |
7 | namespace terark {
8 |
9 | TERARK_DLL_EXPORT
10 | size_t dfa_write_text(const BaseDFA* dfa, FILE* fp) {
11 | auto adfa = dynamic_cast(dfa);
12 | if (!adfa) {
13 | THROW_STD(invalid_argument, "dfa is not an AcyclicPathDFA");
14 | }
15 | ADFA_LexIteratorUP iter(adfa->adfa_make_iter());
16 | bool hasNext = iter->seek_begin();
17 | size_t nth = 0;
18 | while (hasNext) {
19 | fstring word = iter->word();
20 | fprintf(fp, "%.*s\n", word.ilen(), word.data());
21 | hasNext = iter->incr();
22 | nth++;
23 | }
24 | return nth;
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/tools/general/hex2bson.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-10-20.
3 | //
4 | #include
5 | #include
6 | #include
7 |
8 | using namespace terark;
9 |
10 | int main() {
11 | LineBuf line;
12 | valvec obuf;
13 | while (line.getline(stdin) > 0) {
14 | line.chomp();
15 | obuf.resize_no_init(line.size()/2 + 1 + 4);
16 | size_t hexstrlen = hex_decode(line.p, line.n, obuf.data() + 4, obuf.capacity() - 4);
17 | // ignore (hexstrlen % 2 == 1)
18 | size_t datalen = hexstrlen/2;
19 | size_t bsonlen = 4 + datalen;
20 | *(uint32_t*)obuf.data() = bsonlen;
21 | size_t written = fwrite(obuf.data(), 1, bsonlen, stdout);
22 | if (written != bsonlen) {
23 | perror("fwrite(stdout) failed");
24 | exit(1);
25 | }
26 | }
27 | return 0;
28 | }
29 |
--------------------------------------------------------------------------------
/src/terark/util/checksum_exception.cpp:
--------------------------------------------------------------------------------
1 | #include "checksum_exception.hpp"
2 |
3 | #ifndef __STDC_FORMAT_MACROS
4 | #define __STDC_FORMAT_MACROS
5 | #endif // __STDC_FORMAT_MACROS
6 |
7 | #include
8 |
9 | namespace terark {
10 |
11 | BadChecksumException::~BadChecksumException() {}
12 |
13 | static std::string ChecksumErrMsg(fstring msg, uint64_t Old, uint64_t New) {
14 | char buf[72];
15 | std::string res;
16 | res.reserve(msg.size() + 64);
17 | res.append(msg.data(), msg.size());
18 | res.append(buf, sprintf(buf, ": Old = 0x%16" PRIX64 " , New = 0x%16" PRIX64, Old, New));
19 | return res;
20 | }
21 |
22 | BadChecksumException::
23 | BadChecksumException(fstring msg, uint64_t Old, uint64_t New)
24 | : super(ChecksumErrMsg(msg, Old, New)), m_old(Old), m_new(New) {}
25 |
26 | } // terark
27 |
28 |
--------------------------------------------------------------------------------
/src/terark/util/stat.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // for best compatibility, this file should be the last include
4 |
5 | #include
6 | #include
7 |
8 | #ifndef S_ISDIR
9 | #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
10 | #endif
11 | #ifndef S_ISREG
12 | #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
13 | #endif
14 |
15 | #ifdef _MSC_VER
16 | #ifndef _CRT_NONSTDC_NO_DEPRECATE
17 | #error _CRT_NONSTDC_NO_DEPRECATE must be defined to use posix functions on Visual C++
18 | #endif
19 | // VC does not forward stat/fstat to stat64/fstat64
20 | // VC stat on large file will fail
21 | #define ll_stat _stat64
22 | #define ll_fstat _fstat64
23 | #define ll_lseek _lseeki64
24 | #else
25 | #define ll_stat stat
26 | #define ll_fstat fstat
27 | #define ll_lseek lseek
28 | #endif
29 |
30 |
--------------------------------------------------------------------------------
/src/terark/str_lex_iter.cpp:
--------------------------------------------------------------------------------
1 | #include "str_lex_iter.hpp"
2 |
3 | namespace terark {
4 |
5 | template
6 | StringLexIteratorT::~StringLexIteratorT() {}
7 |
8 | template
9 | void StringLexIteratorT::dispose() {
10 | // default is to direct delete
11 | delete this;
12 | }
13 |
14 | template
15 | bool StringLexIteratorT::seek_begin() {
16 | return seek_lower_bound(fstr());
17 | }
18 |
19 | template
20 | bool StringLexIteratorT::seek_rev_lower_bound(fstr str) {
21 | if (seek_lower_bound(str)) {
22 | if (word() == str)
23 | return true;
24 | return decr();
25 | }
26 | return seek_end();
27 | }
28 |
29 | template class StringLexIteratorT;
30 | template class StringLexIteratorT;
31 |
32 | } // namespace terark
33 |
--------------------------------------------------------------------------------
/src/terark/util/nolocks_localtime.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | namespace terark {
6 |
7 | // with params time zone and dst
8 | TERARK_DLL_EXPORT void nolocks_localtime_tzd(struct tm*, time_t, long tz, int dst);
9 |
10 | // with params time zone
11 | TERARK_DLL_EXPORT void nolocks_localtime_tz(struct tm*, time_t, long tz);
12 |
13 | // proto type is same as localtime
14 | TERARK_DLL_EXPORT struct tm* nolocks_localtime(const time_t*);
15 |
16 | // proto type is same as localtime_r
17 | TERARK_DLL_EXPORT struct tm* nolocks_localtime_r(const time_t*, struct tm*);
18 |
19 | TERARK_DLL_EXPORT const char* StrDateTimeEpochSec(time_t);
20 | TERARK_DLL_EXPORT const char* StrDateTimeEpochUS(long long time_us);
21 |
22 | TERARK_DLL_EXPORT const char* StrDateTimeNow();
23 |
24 | } // namespace terark
25 |
--------------------------------------------------------------------------------
/src/terark/util/throw.cpp:
--------------------------------------------------------------------------------
1 | #include "throw.hpp"
2 | #include "autofree.hpp"
3 | #include
4 | #include
5 | #include
6 |
7 | namespace terark {
8 |
9 | TERARK_DLL_EXPORT
10 | std::string ExceptionFormatString(const char* format, ...) {
11 | #ifdef _MSC_VER
12 | std::string buf(16*1024, '\0');
13 | va_list ap;
14 | va_start(ap, format);
15 | int len = _vsnprintf(&buf[0], buf.size(), format, ap);
16 | va_end(ap);
17 | buf.resize(len);
18 | buf.shrink_to_fit();
19 | //fprintf(stderr, "%s\n", buf.c_str());
20 | return buf;
21 | #else
22 | terark::AutoFree buf;
23 | va_list ap;
24 | va_start(ap, format);
25 | int len = vasprintf(&buf.p, format, ap);
26 | va_end(ap);
27 | //fprintf(stderr, "%s\n", buf.p);
28 | return std::string(buf.p, len);
29 | #endif
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/ssse3/codec.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "../../../include/libbase64.h"
6 | #include "../../codecs.h"
7 |
8 | #ifdef __SSSE3__
9 | #include
10 |
11 | #include "../sse2/compare_macros.h"
12 |
13 | #include "dec_reshuffle.c"
14 | #include "enc_reshuffle.c"
15 | #include "enc_translate.c"
16 |
17 | #endif // __SSSE3__
18 |
19 | BASE64_ENC_FUNCTION(ssse3)
20 | {
21 | #ifdef __SSSE3__
22 | #include "../generic/enc_head.c"
23 | #include "enc_loop.c"
24 | #include "../generic/enc_tail.c"
25 | #else
26 | BASE64_ENC_STUB
27 | #endif
28 | }
29 |
30 | BASE64_DEC_FUNCTION(ssse3)
31 | {
32 | #ifdef __SSSE3__
33 | #include "../generic/dec_head.c"
34 | #include "dec_loop.c"
35 | #include "../generic/dec_tail.c"
36 | #else
37 | BASE64_DEC_STUB
38 | #endif
39 | }
40 |
--------------------------------------------------------------------------------
/src/terark/util/crc.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | namespace terark {
6 |
7 | TERARK_DLL_EXPORT
8 | uint32_t Crc32c_update(uint32_t inCrc32, const void *buf, size_t bufLen);
9 |
10 | TERARK_DLL_EXPORT
11 | uint16_t Crc16c_update(uint16_t inCrc16, const void *buf, size_t bufLen);
12 |
13 | class TERARK_DLL_EXPORT BadCrc32cException : public BadChecksumException {
14 | public:
15 | BadCrc32cException(fstring msg, uint32_t Old, uint32_t New)
16 | : BadChecksumException(msg, Old, New) {}
17 | ~BadCrc32cException();
18 | };
19 |
20 | class TERARK_DLL_EXPORT BadCrc16cException : public BadChecksumException {
21 | public:
22 | BadCrc16cException(fstring msg, uint16_t Old, uint16_t New)
23 | : BadChecksumException(msg, Old, New) {}
24 | ~BadCrc16cException();
25 | };
26 |
27 | } // terark
28 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/avx/codec.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "../../../include/libbase64.h"
6 | #include "../../codecs.h"
7 |
8 | #ifdef __AVX__
9 | #include
10 |
11 | #include "../sse2/compare_macros.h"
12 |
13 | #include "../ssse3/dec_reshuffle.c"
14 | #include "../ssse3/enc_translate.c"
15 | #include "../ssse3/enc_reshuffle.c"
16 |
17 | #endif // __AVX__
18 |
19 | BASE64_ENC_FUNCTION(avx)
20 | {
21 | #ifdef __AVX__
22 | #include "../generic/enc_head.c"
23 | #include "../ssse3/enc_loop.c"
24 | #include "../generic/enc_tail.c"
25 | #else
26 | BASE64_ENC_STUB
27 | #endif
28 | }
29 |
30 | BASE64_DEC_FUNCTION(avx)
31 | {
32 | #ifdef __AVX__
33 | #include "../generic/dec_head.c"
34 | #include "../sse42/dec_loop.c"
35 | #include "../generic/dec_tail.c"
36 | #else
37 | BASE64_DEC_STUB
38 | #endif
39 | }
40 |
--------------------------------------------------------------------------------
/src/terark/util/tmpfile.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | namespace terark {
8 |
9 | class TERARK_DLL_EXPORT TempFileDeleteOnClose {
10 | public:
11 | std::string path;
12 | FileStream fp;
13 | NativeDataOutput writer;
14 |
15 | ~TempFileDeleteOnClose();
16 | void open_temp();
17 | void open();
18 | void dopen(int fd);
19 | void close();
20 | void complete_write();
21 | };
22 |
23 | struct TERARK_DLL_EXPORT FilePair {
24 | TempFileDeleteOnClose key;
25 | TempFileDeleteOnClose value;
26 | bool isFullValue = true;
27 | };
28 |
29 |
30 | class TERARK_DLL_EXPORT AutoDeleteFile {
31 | public:
32 | std::string fpath;
33 | operator fstring() const { return fpath; }
34 | void Delete();
35 | ~AutoDeleteFile();
36 | };
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | cmake-build-debug
2 | .idea
3 | build
4 | tools/*/dbg
5 | tools/*/rls
6 |
7 | pkg
8 | # Compiled Object files
9 | *.slo
10 | *.lo
11 | *.o
12 | *.obj
13 |
14 | # Precompiled Headers
15 | *.gch
16 | *.pch
17 |
18 | # Compiled Dynamic libraries
19 | *.so
20 | *.dylib
21 | *.dll
22 |
23 | # Fortran module files
24 | *.mod
25 | *.smod
26 |
27 | # Compiled Static libraries
28 | *.lai
29 | *.la
30 | *.a
31 | *.lib
32 |
33 | # Executables
34 | *.exe
35 | *.out
36 | *.app
37 |
38 | .idea/
39 | .vs/
40 | *.tlog
41 | *.lastbuildstate
42 | *.idb
43 | *.pdb
44 | *.ilk
45 | */x64/Debug/
46 | */x64/Release/
47 | */CMakeFiles/
48 |
49 | build
50 | vs2015
51 | vs2017
52 |
53 | # vim backup file
54 | *~
55 |
56 | # got files
57 | *.got
58 |
59 | # vscode file
60 | .vscode
61 |
62 | cmake-build-debug
63 | .DS_Store
64 |
65 | terark-core
66 | terark-rocksdb
67 | output
68 | tests/Testing
69 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/sse42/codec.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "../../../include/libbase64.h"
6 | #include "../../codecs.h"
7 |
8 | #ifdef __SSE4_2__
9 | #include
10 |
11 | #include "../sse2/compare_macros.h"
12 |
13 | #include "../ssse3/dec_reshuffle.c"
14 | #include "../ssse3/enc_translate.c"
15 | #include "../ssse3/enc_reshuffle.c"
16 |
17 | #endif // __SSE4_2__
18 |
19 | BASE64_ENC_FUNCTION(sse42)
20 | {
21 | #ifdef __SSE4_2__
22 | #include "../generic/enc_head.c"
23 | #include "../ssse3/enc_loop.c"
24 | #include "../generic/enc_tail.c"
25 | #else
26 | BASE64_ENC_STUB
27 | #endif
28 | }
29 |
30 | BASE64_DEC_FUNCTION(sse42)
31 | {
32 | #ifdef __SSE4_2__
33 | #include "../generic/dec_head.c"
34 | #include "dec_loop.c"
35 | #include "../generic/dec_tail.c"
36 | #else
37 | BASE64_DEC_STUB
38 | #endif
39 | }
40 |
--------------------------------------------------------------------------------
/tests/core/test_call_on_main_stack.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-10-28.
3 | //
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | int main() {
10 | using namespace boost::fibers;
11 | auto fn = []() {
12 | scheduler* sched = context::active()->get_scheduler();
13 | auto largeFn = []() {
14 | char buf[256 * 1024] = {0};
15 | sprintf(buf, "fn: large stack");
16 | //printf("%s\n", buf);
17 | };
18 | //size_t loop = 1024*1024;
19 | size_t loop = 1;
20 | for (size_t i = 0; i < loop; ++i) {
21 | sched->call_on_main_stack(largeFn);
22 | }
23 | };
24 | fiber f1(fn);
25 | boost::this_fiber::yield();
26 | f1.join();
27 | printf("call_on_main_stack passed\n");
28 | return 0;
29 | }
30 |
--------------------------------------------------------------------------------
/src/terark/util/sorted_uint_vec_get_block_word.hpp:
--------------------------------------------------------------------------------
1 | #if (Width == 1)
2 | for (size_t j = 0; j < RealWordUnits; ++j) {
3 | aVals[i*TERARK_WORD_BITS + j] = val;
4 | if (w & 1) {
5 | val += smallDiff; // faster than Width != 1
6 | }
7 | else {
8 | size_t largeDiff = febitvec::s_get_uint(pLargeBase, largeBitPos, largeUnitWidth);
9 | val += largeDiff;
10 | largeBitPos += largeUnitWidth;
11 | }
12 | w >>= 1;
13 | }
14 | #else
15 | for (size_t j = 0; j < RealWordUnits; ++j) {
16 | aVals[i*WordUnits + j] = val;
17 | size_t diff = w & UnitMask;
18 | if (diff) {
19 | val += minDiffVal + diff;
20 | }
21 | else {
22 | size_t largeDiff = febitvec::s_get_uint(pLargeBase, largeBitPos, largeUnitWidth);
23 | val += largeDiff;
24 | largeBitPos += largeUnitWidth;
25 | }
26 | w >>= Width;
27 | }
28 | #endif
29 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/sse41/codec.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "../../../include/libbase64.h"
6 | #include "../../codecs.h"
7 |
8 | #ifdef __SSE4_1__
9 | #include
10 |
11 | #include "../sse2/compare_macros.h"
12 |
13 | #include "../ssse3/dec_reshuffle.c"
14 | #include "../ssse3/enc_translate.c"
15 | #include "../ssse3/enc_reshuffle.c"
16 |
17 | #endif // __SSE4_1__
18 |
19 | BASE64_ENC_FUNCTION(sse41)
20 | {
21 | #ifdef __SSE4_1__
22 | #include "../generic/enc_head.c"
23 | #include "../ssse3/enc_loop.c"
24 | #include "../generic/enc_tail.c"
25 | #else
26 | BASE64_ENC_STUB
27 | #endif
28 | }
29 |
30 | BASE64_DEC_FUNCTION(sse41)
31 | {
32 | #ifdef __SSE4_1__
33 | #include "../generic/dec_head.c"
34 | #include "../ssse3/dec_loop.c"
35 | #include "../generic/dec_tail.c"
36 | #else
37 | BASE64_DEC_STUB
38 | #endif
39 | }
40 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/dll/example/build_package.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 | MKDIR bin\dll bin\static bin\example bin\include
3 | COPY tests\fullbench.c bin\example\
4 | COPY programs\datagen.c bin\example\
5 | COPY programs\datagen.h bin\example\
6 | COPY programs\util.h bin\example\
7 | COPY programs\platform.h bin\example\
8 | COPY lib\common\mem.h bin\example\
9 | COPY lib\common\zstd_internal.h bin\example\
10 | COPY lib\common\error_private.h bin\example\
11 | COPY lib\common\xxhash.h bin\example\
12 | COPY lib\libzstd.a bin\static\libzstd_static.lib
13 | COPY lib\dll\libzstd.* bin\dll\
14 | COPY lib\dll\example\Makefile bin\example\
15 | COPY lib\dll\example\fullbench-dll.* bin\example\
16 | COPY lib\dll\example\README.md bin\
17 | COPY lib\zstd.h bin\include\
18 | COPY lib\common\zstd_errors.h bin\include\
19 | COPY lib\dictBuilder\zdict.h bin\include\
20 | COPY programs\zstd.exe bin\zstd.exe
21 |
--------------------------------------------------------------------------------
/src/terark/io/DataOutput_VarIntAsFixLen.hpp:
--------------------------------------------------------------------------------
1 | public:
2 | MyType& operator<<(var_int32_t x) { return this->operator<<(x.t); }
3 | MyType& operator<<(var_uint32_t x) { return this->operator<<(x.t); }
4 |
5 | #if !defined(BOOST_NO_INT64_T)
6 | MyType& operator<<(var_int64_t x) { return this->operator<<(x.t); }
7 | MyType& operator<<(var_uint64_t x) { return this->operator<<(x.t); }
8 | #endif
9 |
10 | MyType& operator<<(serialize_version_t x) { return this->operator<<(x.t); }
11 |
12 | //-----------------------------------------------------------------------------------
13 | MyType& operator<<(var_int30_t x) { return this->operator<<(x.t); }
14 | MyType& operator<<(var_uint30_t x) { return this->operator<<(x.t); }
15 |
16 | #if !defined(BOOST_NO_INT64_T)
17 | MyType& operator<<(var_int61_t x) { return this->operator<<(x.t); }
18 | MyType& operator<<(var_uint61_t x) { return this->operator<<(x.t); }
19 | #endif
20 |
21 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/dec_head.c:
--------------------------------------------------------------------------------
1 | int ret = 0;
2 | const uint8_t *c = (const uint8_t *)src;
3 | uint8_t *o = (uint8_t *)out;
4 | uint8_t q;
5 |
6 | // Use local temporaries to avoid cache thrashing:
7 | size_t outl = 0;
8 | struct base64_state st;
9 | st.eof = state->eof;
10 | st.bytes = state->bytes;
11 | st.carry = state->carry;
12 |
13 | // If we previously saw an EOF or an invalid character, bail out:
14 | if (st.eof) {
15 | *outlen = 0;
16 | ret = 0;
17 | // If there was a trailing '=' to check, check it:
18 | if (srclen && (st.eof == BASE64_AEOF)) {
19 | state->bytes = 0;
20 | state->eof = BASE64_EOF;
21 | ret = ((base64_table_dec[*c++] == 254) && (srclen == 1)) ? 1 : 0;
22 | }
23 | return ret;
24 | }
25 |
26 | // Turn four 6-bit numbers into three bytes:
27 | // out[0] = 11111122
28 | // out[1] = 22223333
29 | // out[2] = 33444444
30 |
31 | // Duff's device again:
32 | switch (st.bytes)
33 | {
34 | for (;;)
35 | {
36 | case 0:
37 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/common/debug.c:
--------------------------------------------------------------------------------
1 | /* ******************************************************************
2 | * debug
3 | * Part of FSE library
4 | * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5 | *
6 | * You can contact the author at :
7 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8 | *
9 | * This source code is licensed under both the BSD-style license (found in the
10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11 | * in the COPYING file in the root directory of this source tree).
12 | * You may select, at your option, one of the above-listed licenses.
13 | ****************************************************************** */
14 |
15 |
16 | /*
17 | * This module only hosts one global variable
18 | * which can be used to dynamically influence the verbosity of traces,
19 | * such as DEBUGLOG and RAWLOG
20 | */
21 |
22 | #include "debug.h"
23 |
24 | int g_debuglevel = DEBUGLEVEL;
25 |
--------------------------------------------------------------------------------
/src/terark/io/DataIO_SmartPtr.hpp:
--------------------------------------------------------------------------------
1 | /* vim: set tabstop=4 : */
2 | #pragma once
3 |
4 | #include
5 | #include
6 |
7 | namespace terark {
8 |
9 | //!
10 | #define DATA_IO_SMART_PTR_LOAD_SAVE(SmartPtrTemplate) \
11 | template \
12 | void DataIO_loadObject(DataIO& dio, SmartPtrTemplate& x) \
13 | { \
14 | x.reset(new T); \
15 | dio >> *x; \
16 | } \
17 | template \
18 | void DataIO_saveObject(DataIO& dio, const SmartPtrTemplate& x)\
19 | { \
20 | dio << *x; \
21 | }
22 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 |
24 | DATA_IO_SMART_PTR_LOAD_SAVE(std::auto_ptr)
25 | DATA_IO_SMART_PTR_LOAD_SAVE(boost::intrusive_ptr)
26 | DATA_IO_SMART_PTR_LOAD_SAVE(boost::scoped_ptr)
27 | DATA_IO_SMART_PTR_LOAD_SAVE(boost::shared_ptr)
28 |
29 |
30 | } // namespace terark
31 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/32/enc_loop.c:
--------------------------------------------------------------------------------
1 | // If we have 32-bit ints, pick off 3 bytes at a time for as long as we can,
2 | // but ensure that there are at least 4 bytes available to avoid segfaulting:
3 | while (srclen >= 4)
4 | {
5 | // Load string:
6 | uint32_t str = *(uint32_t *)c;
7 |
8 | // Reorder to 32-bit big-endian, if not already in that format. The
9 | // workset must be in big-endian, otherwise the shifted bits do not
10 | // carry over properly among adjacent bytes:
11 | str = cpu_to_be32(str);
12 |
13 | // Shift input by 6 bytes each round and mask in only the lower 6 bits;
14 | // look up the character in the Base64 encoding table and write it to
15 | // the output location:
16 | *o++ = base64_table_enc[(str >> 26) & 0x3F];
17 | *o++ = base64_table_enc[(str >> 20) & 0x3F];
18 | *o++ = base64_table_enc[(str >> 14) & 0x3F];
19 | *o++ = base64_table_enc[(str >> 8) & 0x3F];
20 |
21 | c += 3; // 3 bytes of input
22 | outl += 4; // 4 bytes of output
23 | srclen -= 3;
24 | }
25 |
--------------------------------------------------------------------------------
/src/terark/io/HexCodingStream.cpp:
--------------------------------------------------------------------------------
1 | /* vim: set tabstop=4 : */
2 | #include "HexCodingStream.hpp"
3 | #include "DataInput.hpp"
4 | #include
5 |
6 | namespace terark {
7 |
8 | // '0' == 0x30
9 | // 'a' == 0x61
10 | // 'A' == 0x41
11 | const unsigned char G_hex_val_hexTab[] =
12 | {
13 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
14 | 255, 255, 255, 255, 255, 255,
15 | // below, begin with '0' + 16 = 0x40
16 | 255,
17 | 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
18 | 255, 255, 255, 255, 255,
19 | 255, 255, 255, 255, 255,
20 | // below, begin with 'A' + 16 = 0x51
21 | 255, 255, 255, 255,
22 | 255, 255, 255, 255,
23 | 255, 255, 255, 255,
24 | 255, 255, 255, 255,
25 | // below, begin with 'a' = 0x61
26 | 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
27 | };
28 |
29 | void invalid_hex_char(unsigned char ch, const char* func)
30 | {
31 | string_appender<> oss;
32 | oss << "invalid hex char(ch=" << char(ch) << ",ascii=" << int(ch) << ") in func: " << func;
33 | throw DataFormatException(oss.str());
34 | }
35 |
36 | } // namespace terark
37 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/avx2/enc_loop.c:
--------------------------------------------------------------------------------
1 | // If we have AVX2 support, pick off 24 bytes at a time for as long as we can.
2 | // But because we read 32 bytes at a time, ensure we have enough room to do a
3 | // full 32-byte read without segfaulting:
4 |
5 | if (srclen >= 32) {
6 | const uint8_t* const o_orig = o;
7 |
8 | // first load is done at c-0 not to get a segfault
9 | __m256i inputvector = _mm256_loadu_si256((__m256i *)(c - 0));
10 |
11 | // shift by 4 bytes, as required by enc_reshuffle
12 | inputvector = _mm256_permutevar8x32_epi32(inputvector, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
13 |
14 | for (;;) {
15 | inputvector = enc_reshuffle(inputvector);
16 | inputvector = enc_translate(inputvector);
17 | _mm256_storeu_si256((__m256i *)o, inputvector);
18 | c += 24;
19 | o += 32;
20 | srclen -= 24;
21 | if(srclen < 28) {
22 | break;
23 | }
24 | // Load at c-4, as required by enc_reshuffle
25 | inputvector = _mm256_loadu_si256((__m256i *)(c - 4));
26 | }
27 | outl += (size_t)(o - o_orig);
28 | }
29 |
--------------------------------------------------------------------------------
/src/terark/fsa/ppi/flat_dfa_mmap.hpp:
--------------------------------------------------------------------------------
1 |
2 | void finish_load_mmap(const DFA_MmapHeader* base) override {
3 | assert(sizeof(State) == base->state_size);
4 | byte_t* bbase = (byte_t*)base;
5 | if (base->total_states >= size_t(-1)) {
6 | THROW_STD(out_of_range, "total_states=%lld", (long long)base->total_states);
7 | }
8 | states.clear();
9 | states.risk_set_data((State*)(bbase + base->blocks[0].offset));
10 | states.risk_set_size(size_t(base->total_states));
11 | states.risk_set_capacity(size_t(base->total_states));
12 | m_gnode_states = size_t(base->gnode_states);
13 | m_zpath_states = size_t(base->zpath_states);
14 | this->set_trans_num(size_t(base->transition_num));
15 | }
16 |
17 | long prepare_save_mmap(DFA_MmapHeader* base, const void** dataPtrs)
18 | const override {
19 | base->state_size = sizeof(State);
20 | base->transition_num = total_transitions();
21 | base->num_blocks = 1;
22 | base->blocks[0].offset = sizeof(DFA_MmapHeader);
23 | base->blocks[0].length = sizeof(State)*states.size();
24 | dataPtrs[0] = states.data();
25 | return 0;
26 | }
27 |
--------------------------------------------------------------------------------
/src/terark/pass_by_value.hpp:
--------------------------------------------------------------------------------
1 | /* vim: set tabstop=4 : */
2 | #pragma once
3 |
4 | //#if defined(_MSC_VER) && (_MSC_VER >= 1020)
5 | //# pragma once
6 | //#endif
7 |
8 | namespace terark {
9 |
10 | //! 当 T 是一个人造的引用时,使用这个类来转发调用
11 | //!
12 | //! input >> t 实际调用的是 void DataIO_loadObject(Input& input, T t)
13 | //! 这里 pass_by_value 和 T 都是传值调用的
14 | //!
15 | //! T 中包含一个真实的引用,例如当 T 是 load_as_var_int_proxy 时
16 | //! 这样,就不需要将每个类似 load_as_var_int_proxy 的 Class 都写到 DataInput 接口中
17 | //! 从而 DataInput 接口只需要一个 pass_by_value
18 | //!
19 | //! 如此,实际上是使用了两个中间层一个是 load_as_var_int_proxy,用来做真实的 proxy
20 | //! 另一个就是 pass_by_value 了,只用来适配 DataInput 接口,
21 | //! 因为作为 T& 不能绑定到临时变量
22 | //! ---- Add this line for Microsoft C++ 2013 brain dead compiler error ----
23 | template class pass_by_value
24 | {
25 | public:
26 | T val;
27 |
28 | typedef T type;
29 |
30 | pass_by_value(const T& val) : val(val) {}
31 |
32 | T& operator=(const T& y) { val = y; return val; }
33 |
34 | operator T&() { return val; }
35 |
36 | T& get() { return val; }
37 | };
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/sse41/enc_reshuffle.c:
--------------------------------------------------------------------------------
1 | static inline __m128i
2 | enc_reshuffle (__m128i in)
3 | {
4 | // Slice into 32-bit chunks and operate on all chunks in parallel.
5 | // All processing is done within the 32-bit chunk. First, shuffle:
6 | // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb]
7 | // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd]
8 | in = _mm_shuffle_epi8(in, _mm_set_epi8(
9 | -1, 9, 10, 11,
10 | -1, 6, 7, 8,
11 | -1, 3, 4, 5,
12 | -1, 0, 1, 2));
13 |
14 | // merged = [0000aaaa|aabbbbbb|bbbbcccc|ccdddddd]
15 | const __m128i merged = _mm_blend_epi16(_mm_slli_epi32(in, 4), in, 0x55);
16 |
17 | // bd = [00000000|00bbbbbb|00000000|00dddddd]
18 | const __m128i bd = _mm_and_si128(merged, _mm_set1_epi32(0x003F003F));
19 |
20 | // ac = [00aaaaaa|00000000|00cccccc|00000000]
21 | const __m128i ac = _mm_and_si128(_mm_slli_epi32(merged, 2), _mm_set1_epi32(0x3F003F00));
22 |
23 | // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd]
24 | const __m128i indices = _mm_or_si128(ac, bd);
25 |
26 | // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa]
27 | return _mm_bswap_epi32(indices);
28 | }
29 |
--------------------------------------------------------------------------------
/src/terark/io/todo/DataIO_Parser.hpp:
--------------------------------------------------------------------------------
1 | /* vim: set tabstop=4 : */
2 | #ifndef __terark_io_DataIO_Parser_h__
3 | #define __terark_io_DataIO_Parser_h__
4 |
5 | #if defined(_MSC_VER) && (_MSC_VER >= 1020)
6 | # pragma once
7 | #endif
8 |
9 | #include "StreamBuffer.hpp"
10 | #include "DataInput.hpp"
11 | #include
12 |
13 | namespace terark {
14 |
15 | template
16 | class DataIO_TextReader : public DataInput >
17 | {
18 | public:
19 |
20 |
21 | protected:
22 | PrimInput* input;
23 | DataIO_Parser* parser;
24 | };
25 |
26 | template
27 | class DataIO_XML_Reader
28 | {
29 | InputBuffer* buf;
30 | public:
31 | Final_Input& operator>>(int& x)
32 | {
33 | int ch = buf->readByte();
34 | switch (ch)
35 | {
36 | case 0: // oct
37 | break;
38 |
39 | }
40 | do {
41 | ch =
42 | } while (isdigit(ch));
43 | }
44 | };
45 |
46 | class DataIO_Parser
47 | {
48 | public:
49 |
50 | protected:
51 | const char* szMemberText;
52 | std::vector memberNames;
53 | };
54 |
55 | }
56 |
57 | #endif // __terark_io_DataIO_Parser_h__
58 |
59 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/deprecated/zbuff_common.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under both the BSD-style license (found in the
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 | * in the COPYING file in the root directory of this source tree).
8 | * You may select, at your option, one of the above-listed licenses.
9 | */
10 |
11 | /*-*************************************
12 | * Dependencies
13 | ***************************************/
14 | #include "../common/error_private.h"
15 | #include "zbuff.h"
16 |
17 | /*-****************************************
18 | * ZBUFF Error Management (deprecated)
19 | ******************************************/
20 |
21 | /*! ZBUFF_isError() :
22 | * tells if a return value is an error code */
23 | unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
24 | /*! ZBUFF_getErrorName() :
25 | * provides error code string from function result (useful for debugging) */
26 | const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
27 |
--------------------------------------------------------------------------------
/gtests/index/adfa_test.cpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 |
6 | #include "terark/util/function.hpp"
7 | #include "terark/util/linebuf.hpp"
8 | #include "terark/util/profiling.hpp"
9 | #include "terark/hash_strmap.hpp"
10 | #include "terark/fsa/cspptrie.inl"
11 | #include "terark/fsa/nest_trie_dawg.hpp"
12 |
13 |
14 | namespace terark {
15 |
16 | template
17 | void buil_dfa(Inserter inserter) {
18 | DFA dfa;
19 | }
20 |
21 | TEST(ADFA_TEST, EMPTY_DFA_TEST_1) {
22 | // MainPatricia trie(sizeof(uint32_t), 1<<20, Patricia::SingleThreadShared);
23 | MainPatricia dfa;
24 | std::unique_ptr iterU(dfa.adfa_make_iter(initial_state));
25 | auto iter = iterU.get();
26 | ASSERT_TRUE(!iter->seek_begin());
27 | ASSERT_TRUE(!iter->seek_end());
28 | ASSERT_TRUE(!iter->seek_lower_bound("1"));
29 | ASSERT_TRUE(!iter->seek_lower_bound("2"));
30 | ASSERT_TRUE(!iter->seek_lower_bound("9"));
31 | ASSERT_TRUE(!iter->seek_lower_bound("\xFF"));
32 | }
33 |
34 | TEST(ADFA_TEST, ITERATOR_TEST) {
35 |
36 | }
37 | }
38 |
39 |
--------------------------------------------------------------------------------
/src/terark/util/truncate_file.cpp:
--------------------------------------------------------------------------------
1 | #include "truncate_file.hpp"
2 | #include
3 | #include
4 |
5 | #if defined(_MSC_VER)
6 | #include
7 | #else
8 | #include
9 | #endif
10 | #include
11 | #include
12 | #include
13 |
14 | namespace terark {
15 |
16 | TERARK_DLL_EXPORT
17 | void truncate_file(const char* fpath, unsigned long long size) {
18 | #ifdef _MSC_VER
19 | Auto_close_fd fd(::_open(fpath, O_CREAT|O_BINARY|O_RDWR, 0644));
20 | #else
21 | Auto_close_fd fd(::open(fpath, O_CREAT|O_RDWR, 0644));
22 | #endif
23 | if (fd < 0) {
24 | THROW_STD(logic_error
25 | , "FATAL: ::open(%s, O_CREAT|O_BINARY|O_RDWR) = %s"
26 | , fpath, strerror(errno));
27 | }
28 | #ifdef _MSC_VER
29 | int err = ::_chsize_s(fd, size);
30 | if (err) {
31 | THROW_STD(logic_error, "FATAL: ::_chsize_s(%s, %lld) = %s"
32 | , fpath, size, strerror(errno));
33 | }
34 | #else
35 | int err = ::ftruncate(fd, size);
36 | if (err) {
37 | THROW_STD(logic_error, "FATAL: ::truncate(%s, %lld) = %s"
38 | , fpath, size, strerror(errno));
39 | }
40 | #endif
41 | }
42 |
43 | } // namespace terark
44 |
--------------------------------------------------------------------------------
/src/terark/zbs/ZstdStream.hpp:
--------------------------------------------------------------------------------
1 | #if defined(_MSC_VER) && (_MSC_VER >= 1020)
2 | #pragma once
3 | #endif
4 |
5 | #include
6 | #include
7 |
8 | namespace terark {
9 |
10 | class TERARK_DLL_EXPORT ZstdInputStream : public IInputStream, public RefCounter {
11 | DECLARE_NONE_COPYABLE_CLASS(ZstdInputStream)
12 |
13 | public:
14 | explicit ZstdInputStream(IInputStream*);
15 | ~ZstdInputStream();
16 |
17 | void resetIstream(IInputStream*);
18 | size_t read(void* buf, size_t size) throw();
19 | bool eof() const;
20 |
21 | private:
22 | class Impl;
23 | Impl* m_impl;
24 | };
25 |
26 | class TERARK_DLL_EXPORT ZstdOutputStream : public IOutputStream, public RefCounter {
27 | DECLARE_NONE_COPYABLE_CLASS(ZstdOutputStream)
28 |
29 | public:
30 | explicit ZstdOutputStream(IOutputStream*);
31 | ~ZstdOutputStream();
32 |
33 | void setCLevel(size_t l);
34 | void resetOstream(IOutputStream*);
35 | size_t write(const void* buf, size_t size) throw();
36 | void flush();
37 | void close();
38 |
39 | private:
40 | class Impl;
41 | Impl* m_impl;
42 | };
43 |
44 | } // namespace terark
--------------------------------------------------------------------------------
/3rdparty/base64/test/moby_dick_plain.txt:
--------------------------------------------------------------------------------
1 | Call me Ishmael. Some years ago--never mind how long precisely--having
2 | little or no money in my purse, and nothing particular to interest me on
3 | shore, I thought I would sail about a little and see the watery part of
4 | the world. It is a way I have of driving off the spleen and regulating
5 | the circulation. Whenever I find myself growing grim about the mouth;
6 | whenever it is a damp, drizzly November in my soul; whenever I find
7 | myself involuntarily pausing before coffin warehouses, and bringing up
8 | the rear of every funeral I meet; and especially whenever my hypos get
9 | such an upper hand of me, that it requires a strong moral principle to
10 | prevent me from deliberately stepping into the street, and methodically
11 | knocking people's hats off--then, I account it high time to get to sea
12 | as soon as I can. This is my substitute for pistol and ball. With a
13 | philosophical flourish Cato throws himself upon his sword; I quietly
14 | take to the ship. There is nothing surprising in this. If they but knew
15 | it, almost all men in their degree, some time or other, cherish very
16 | nearly the same feelings towards the ocean with me.
17 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/64/enc_loop.c:
--------------------------------------------------------------------------------
1 | // If we have 64-bit ints, pick off 6 bytes at a time for as long as we can,
2 | // but ensure that there are at least 8 bytes available to avoid segfaulting:
3 | while (srclen >= 8)
4 | {
5 | // Load string:
6 | uint64_t str = *(uint64_t *)c;
7 |
8 | // Reorder to 64-bit big-endian, if not already in that format. The
9 | // workset must be in big-endian, otherwise the shifted bits do not
10 | // carry over properly among adjacent bytes:
11 | str = cpu_to_be64(str);
12 |
13 | // Shift input by 6 bytes each round and mask in only the lower 6 bits;
14 | // look up the character in the Base64 encoding table and write it to
15 | // the output location:
16 | *o++ = base64_table_enc[(str >> 58) & 0x3F];
17 | *o++ = base64_table_enc[(str >> 52) & 0x3F];
18 | *o++ = base64_table_enc[(str >> 46) & 0x3F];
19 | *o++ = base64_table_enc[(str >> 40) & 0x3F];
20 | *o++ = base64_table_enc[(str >> 34) & 0x3F];
21 | *o++ = base64_table_enc[(str >> 28) & 0x3F];
22 | *o++ = base64_table_enc[(str >> 22) & 0x3F];
23 | *o++ = base64_table_enc[(str >> 16) & 0x3F];
24 |
25 | c += 6; // 6 bytes of input
26 | outl += 8; // 8 bytes of output
27 | srclen -= 6;
28 | }
29 |
--------------------------------------------------------------------------------
/tests/core/test_boost_fss.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-08-28.
3 | //
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | void clean_fss_int(int* p) {
10 | printf("destroy int = %d\n", *p);
11 | }
12 | static boost::fibers::fiber_specific_ptr fs0(clean_fss_int);
13 | static thread_local boost::fibers::fiber_specific_ptr fs1(clean_fss_int);
14 |
15 | int main(int argc, char* argv[]) {
16 | boost::fibers::fiber_specific_ptr fs2(clean_fss_int);
17 | auto func = [&](const char* name) {
18 | printf("---- %s ----\n", name);
19 | boost::fibers::fiber_specific_ptr fs3(clean_fss_int);
20 | if (fs0.get() == NULL) {
21 | fs0.reset(new int(0));
22 | }
23 | if (fs1.get() == NULL) {
24 | fs1.reset(new int(1));
25 | }
26 | if (fs2.get() == NULL) {
27 | fs2.reset(new int(2));
28 | }
29 | if (fs3.get() == NULL) {
30 | fs3.reset(new int(3));
31 | }
32 | };
33 | std::thread thr(func, "thread");
34 | thr.join();
35 | boost::fibers::fiber fb(func, "fiber");
36 | fb.join();
37 | return 0;
38 | }
39 |
--------------------------------------------------------------------------------
/gtests/tools/core/test_boost_fss.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-08-28.
3 | //
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | void clean_fss_int(int* p) {
10 | printf("destroy int = %d\n", *p);
11 | }
12 | static boost::fibers::fiber_specific_ptr fs0(clean_fss_int);
13 | static thread_local boost::fibers::fiber_specific_ptr fs1(clean_fss_int);
14 |
15 | int main(int argc, char* argv[]) {
16 | boost::fibers::fiber_specific_ptr fs2(clean_fss_int);
17 | auto func = [&](const char* name) {
18 | printf("---- %s ----\n", name);
19 | boost::fibers::fiber_specific_ptr fs3(clean_fss_int);
20 | if (fs0.get() == NULL) {
21 | fs0.reset(new int(0));
22 | }
23 | if (fs1.get() == NULL) {
24 | fs1.reset(new int(1));
25 | }
26 | if (fs2.get() == NULL) {
27 | fs2.reset(new int(2));
28 | }
29 | if (fs3.get() == NULL) {
30 | fs3.reset(new int(3));
31 | }
32 | };
33 | std::thread thr(func, "thread");
34 | thr.join();
35 | boost::fibers::fiber fb(func, "fiber");
36 | fb.join();
37 | return 0;
38 | }
39 |
--------------------------------------------------------------------------------
/tools/general/text_key_val_to_kvbin.cpp:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #ifdef _MSC_VER
8 | #include
9 | #include
10 | #endif
11 |
12 | int main(int argc, char* argv[]) {
13 | BOOST_STATIC_ASSERT(sizeof(int) == 4);
14 | int kvlen[2]; // int32
15 | int lineno = 0;
16 | #ifdef _MSC_VER
17 | if (_setmode(_fileno(stdout), _O_BINARY) < 0) {
18 | THROW_STD(invalid_argument, "set stdout as binary mode failed");
19 | }
20 | #endif
21 | terark::LineBuf line;
22 | while (line.getline(stdin) > 0) {
23 | lineno++;
24 | line.chomp();
25 | if (line.empty()) {
26 | fprintf(stderr, "line:%d is empty\n", lineno);
27 | continue;
28 | }
29 | const char* beg = line.begin();
30 | const char* end = line.end();
31 | const char* tab = std::find(beg, end, '\t');
32 | if (tab == end) {
33 | kvlen[1] = 0;
34 | } else {
35 | kvlen[1] = end - tab - 1;
36 | }
37 | kvlen[0] = tab - beg;
38 | fwrite(kvlen, 1, sizeof(kvlen), stdout);
39 | fwrite(beg+0, 1, kvlen[0], stdout);
40 | fwrite(tab+1, 1, kvlen[1], stdout);
41 | }
42 | return 0;
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/gtests/tools/core/test_ProcPipeStream.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int main(int argc, char* argv[]) {
5 | using namespace terark;
6 | {
7 | LineBuf line;
8 | ProcPipeStream pp("echo aaaa", "r");
9 | printf("reading result\n");
10 | //line.getline(pp);
11 |
12 | line.read_all(pp);
13 | line.chomp();
14 | assert(line.size() == 4);
15 | printf("read result = len=%zd : %s\n", line.n, line.p);
16 | assert(fstring(line) == "aaaa");
17 | printf("1 passed\n");
18 | }
19 |
20 | {
21 | printf("2 begin...\n");
22 | ProcPipeStream pp("cat > proc.test.tmp", "w");
23 | fprintf(pp, "%s\n", "bbbb");
24 | pp.close();
25 |
26 | LineBuf line;
27 | line.read_all("proc.test.tmp");
28 | line.chomp();
29 | assert(fstring(line) == "bbbb");
30 | printf("2 passed\n");
31 | }
32 |
33 | try {
34 | ProcPipeStream pp("test-non-existed-file", "r");
35 | pp.close();
36 | assert(pp.err_code() != 0); // will not goes here
37 | }
38 | catch (const std::exception&) {
39 | }
40 | printf("3 passed\n");
41 |
42 | return 0;
43 | }
44 |
--------------------------------------------------------------------------------
/tools/zbs/zip-bench.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | cd /home/leipeng/terark-mysql/storage/rocksdb/terarkdb/terark-core/tools/zbs
7 | export LD_LIBRARY_PATH=../../build/Linux-x86_64-g++-4.9-bmi2-1/lib_shared
8 |
9 | for ifile in /data01/hdfs_data/raw.json.*; do
10 | for sampleRatio in 030 040 045; do
11 | ofile=/data00/leipeng/bmq-${ifile#/data01/hdfs_data/raw.json.}
12 | time env DictZipBlobStore_zipThreads=0 rls/zbs_build.exe -ZEBp -j128 -S 0.$sampleRatio -o ${ofile}.zbs.${sampleRatio} $ifile
13 | rls/zbs_unzip.exe -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio}
14 | rls/zbs_unzip.exe -r -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio}
15 | time env DictZipBlobStore_zipThreads=0 rls/zbs_build.exe -ZEBp -j128 -S 0.$sampleRatio -o ${ofile}.zbs.${sampleRatio}.huf -e h $ifile
16 | rls/zbs_unzip.exe -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio}.huf
17 | rls/zbs_unzip.exe -r -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio}.huf
18 | done
19 | time rls/zbs_build.exe -j128 -z 6 -T o -o ${ofile}.zstd.rec -B $ifile
20 | rls/zbs_unzip.exe -t -b 1 -T 10 ${ofile}.zstd.rec
21 | rls/zbs_unzip.exe -r -t -b 1 -T 10 ${ofile}.zstd.rec
22 | time zstd -f -o ${ofile}.zstd.all $ifile
23 | time zstd -d < ${ofile}.zstd.all > /dev/null
24 | done
25 |
26 |
--------------------------------------------------------------------------------
/src/terark/util/memcmp_coding.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 |
5 | namespace terark {
6 |
7 | TERARK_DLL_EXPORT
8 | char* encode_0_01_00(const char* ibeg, const char* iend, char* obeg, char* oend);
9 |
10 | TERARK_DLL_EXPORT
11 | char* decode_01_00(const char* ibeg, const char**ires, char* obeg, char* oend);
12 |
13 | TERARK_DLL_EXPORT
14 | const char* end_of_01_00(const char* encoded);
15 |
16 | TERARK_DLL_EXPORT
17 | const char* end_of_01_00(const char* beg, const char* end);
18 |
19 | // float encoding/decoding intentinally use unsigned char*
20 | TERARK_DLL_EXPORT
21 | unsigned char* encode_memcmp_float(float src, unsigned char* dst);
22 |
23 | TERARK_DLL_EXPORT
24 | unsigned char* encode_memcmp_double(double src, unsigned char* dst);
25 |
26 | TERARK_DLL_EXPORT const unsigned char*
27 | decode_memcmp_float(const unsigned char* src, float* dst);
28 |
29 | TERARK_DLL_EXPORT const unsigned char*
30 | decode_memcmp_double(const unsigned char* src, double* dst);
31 |
32 | template
33 | TERARK_DLL_EXPORT
34 | unsigned char* encode_memcmp_real(Real src, unsigned char* dst);
35 |
36 | template
37 | TERARK_DLL_EXPORT
38 | const unsigned char*
39 | decode_memcmp_real(const unsigned char* src, Real* dst);
40 |
41 | } // namespace terark
42 |
--------------------------------------------------------------------------------
/src/terark/io/todo/inter_thread_pipe.cpp:
--------------------------------------------------------------------------------
1 | #include "inter_thread_pipe.cpp"
2 |
3 | namespace terark {
4 |
5 | class inter_thread_pipe_impl
6 | {
7 | boost::mutex m_mutex;
8 | boost::condition m_cond;
9 | unsigned char *m_bufp, *m_putp, *m_getp;
10 | size_t m_size;
11 | long m_timeout;
12 |
13 | public:
14 | bool eof()
15 | {
16 | boost::mutex::scoped_lock lock(m_mutex);
17 | return (m_size+(m_get-m_putp)) % m_size == 1;
18 | }
19 |
20 | size_t read(void* vbuf, size_t length)
21 | {
22 | boost::mutex::scoped_lock lock(m_mutex);
23 | }
24 |
25 | size_t write(void* vbuf, size_t length)
26 | {
27 |
28 | }
29 |
30 | void flush()
31 | {
32 | }
33 | };
34 |
35 | inter_thread_pipe::inter_thread_pipe(size_t capacity)
36 | : mio(new capacity)
37 | {
38 | }
39 |
40 | inter_thread_pipe::~inter_thread_pipe()
41 | {
42 | delete capacity;
43 | }
44 |
45 | bool inter_thread_pipe::eof()
46 | {
47 |
48 | return mio->eof();
49 | }
50 |
51 | size_t inter_thread_pipe::read(void* vbuf, size_t length)
52 | {
53 | return mio->read(vbuf, length);
54 | }
55 |
56 | size_t inter_thread_pipe::write(void* vbuf, size_t length)
57 | {
58 |
59 | }
60 |
61 | void inter_thread_pipe::flush()
62 | {
63 | }
64 |
65 | } // namespace thread
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/neon64/enc_loop.c:
--------------------------------------------------------------------------------
1 | // If we have ARM NEON support, pick off 48 bytes at a time:
2 | while (srclen >= 48)
3 | {
4 | uint8x16x3_t str;
5 | uint8x16x4_t res;
6 |
7 | // Load 48 bytes and deinterleave:
8 | str = vld3q_u8((uint8_t *)c);
9 |
10 | // Divide bits of three input bytes over four output bytes:
11 | res.val[0] = vshrq_n_u8(str.val[0], 2);
12 | res.val[1] = vshrq_n_u8(str.val[1], 4) | vshlq_n_u8(str.val[0], 4);
13 | res.val[2] = vshrq_n_u8(str.val[2], 6) | vshlq_n_u8(str.val[1], 2);
14 | res.val[3] = str.val[2];
15 |
16 | // Clear top two bits:
17 | res.val[0] &= vdupq_n_u8(0x3F);
18 | res.val[1] &= vdupq_n_u8(0x3F);
19 | res.val[2] &= vdupq_n_u8(0x3F);
20 | res.val[3] &= vdupq_n_u8(0x3F);
21 |
22 | // The bits have now been shifted to the right locations;
23 | // translate their values 0..63 to the Base64 alphabet.
24 | // Use a 64-byte table lookup:
25 | res.val[0] = vqtbl4q_u8(tbl_enc, res.val[0]);
26 | res.val[1] = vqtbl4q_u8(tbl_enc, res.val[1]);
27 | res.val[2] = vqtbl4q_u8(tbl_enc, res.val[2]);
28 | res.val[3] = vqtbl4q_u8(tbl_enc, res.val[3]);
29 |
30 | // Interleave and store result:
31 | vst4q_u8((uint8_t *)o, res);
32 |
33 | c += 48; // 3 * 16 bytes of input
34 | o += 64; // 4 * 16 bytes of output
35 | outl += 64;
36 | srclen -= 48;
37 | }
38 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/ssse3/enc_translate.c:
--------------------------------------------------------------------------------
1 | static inline __m128i
2 | enc_translate (const __m128i in)
3 | {
4 | // LUT contains Absolute offset for all ranges:
5 | const __m128i lut = _mm_setr_epi8(
6 | 65, 71, -4, -4,
7 | -4, -4, -4, -4,
8 | -4, -4, -4, -4,
9 | -19, -16, 0, 0
10 | );
11 |
12 | // Translate values 0..63 to the Base64 alphabet. There are five sets:
13 | // # From To Abs Index Characters
14 | // 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ
15 | // 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz
16 | // 2 [52..61] [48..57] -4 [2..11] 0123456789
17 | // 3 [62] [43] -19 12 +
18 | // 4 [63] [47] -16 13 /
19 |
20 | // Create LUT indices from input:
21 | // the index for range #0 is right, others are 1 less than expected:
22 | __m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
23 |
24 | // mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0:
25 | __m128i mask = CMPGT(in, 25);
26 |
27 | // substract -1, so add 1 to indices for range #[1..4], All indices are now correct:
28 | indices = _mm_sub_epi8(indices, mask);
29 |
30 | // Add offsets to input values:
31 | __m128i out = _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
32 |
33 | return out;
34 | }
35 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/compress/zstd_compress_superblock.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under both the BSD-style license (found in the
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 | * in the COPYING file in the root directory of this source tree).
8 | * You may select, at your option, one of the above-listed licenses.
9 | */
10 |
11 | #ifndef ZSTD_COMPRESS_ADVANCED_H
12 | #define ZSTD_COMPRESS_ADVANCED_H
13 |
14 | /*-*************************************
15 | * Dependencies
16 | ***************************************/
17 |
18 | #include "../zstd.h" /* ZSTD_CCtx */
19 |
20 | /*-*************************************
21 | * Target Compressed Block Size
22 | ***************************************/
23 |
24 | /* ZSTD_compressSuperBlock() :
25 | * Used to compress a super block when targetCBlockSize is being used.
26 | * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
27 | size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
28 | void* dst, size_t dstCapacity,
29 | void const* src, size_t srcSize,
30 | unsigned lastBlock);
31 |
32 | #endif /* ZSTD_COMPRESS_ADVANCED_H */
33 |
--------------------------------------------------------------------------------
/3rdparty/base64/lib/arch/generic/32/dec_loop.c:
--------------------------------------------------------------------------------
1 | // Read source 4 bytes at a time
2 | // Since we might be writing one byte more than needed,
3 | // we need to make sure there will still be some room
4 | // for one extra byte in o.
5 | // This will be the case if srclen > 0 when the loop
6 | // is exited
7 | while (srclen > 4)
8 | {
9 | union {
10 | uint32_t asint;
11 | uint8_t aschar[4];
12 | } x;
13 |
14 | x.asint = base64_table_dec_d0[c[0]]
15 | | base64_table_dec_d1[c[1]]
16 | | base64_table_dec_d2[c[2]]
17 | | base64_table_dec_d3[c[3]];
18 |
19 | #if BASE64_LITTLE_ENDIAN
20 | // LUTs for little-endian set Most Significant Bit
21 | // in case of invalid character
22 | if (x.asint & 0x80000000U) break;
23 | #else
24 | // LUTs for big-endian set Least Significant Bit
25 | // in case of invalid character
26 | if (x.asint & 1U) break;
27 | #endif
28 |
29 | #if HAVE_FAST_UNALIGNED_ACCESS
30 | // This might segfault or be too slow on
31 | // some architectures, do this only if specified
32 | // with HAVE_FAST_UNALIGNED_ACCESS macro
33 | // We write one byte more than needed
34 | *(uint32_t*)o = x.asint;
35 | #else
36 | // Fallback, write bytes one by one
37 | o[0] = x.aschar[0];
38 | o[1] = x.aschar[1];
39 | o[2] = x.aschar[2];
40 | #endif
41 |
42 | c += 4;
43 | o += 3;
44 | outl += 3;
45 | srclen -= 4;
46 | }
47 |
--------------------------------------------------------------------------------
/src/terark/util/DataBuffer.cpp:
--------------------------------------------------------------------------------
1 | #include "DataBuffer.hpp"
2 |
3 | namespace terark {
4 |
5 | inline
6 | DataBuffer::DataBuffer(size_t size)
7 | : m_refcount(0), m_size(size)
8 | {}
9 |
10 | DataBuffer* DataBuffer::create(size_t size)
11 | {
12 | DataBuffer* p = (DataBuffer*)new char[sizeof(DataBuffer) + size];
13 | new (p) DataBuffer(size); // placement new...
14 | return p;
15 | }
16 | void DataBuffer::destroy(DataBuffer* p)
17 | {
18 | char* pb = (char*)p;
19 | delete [] pb;
20 | }
21 |
22 | DataBufferPtr::DataBufferPtr(size_t size)
23 | : MyBase(DataBuffer::create(size))
24 | {}
25 |
26 | // SmartBuffer
27 |
28 | SmartBuffer::SmartBuffer(size_t size)
29 | {
30 | m_data = size ? new byte[size] : 0;
31 | m_size = size;
32 | m_refcountp = new std::atomic(1);
33 | }
34 |
35 | SmartBuffer::~SmartBuffer()
36 | {
37 | if (m_refcountp && 0 == --*m_refcountp)
38 | {
39 | delete m_refcountp;
40 | delete [] m_data;
41 | }
42 | }
43 |
44 | SmartBuffer::SmartBuffer(const SmartBuffer& rhs)
45 | : m_data(rhs.m_data)
46 | , m_size(rhs.m_size)
47 | , m_refcountp(rhs.m_refcountp)
48 | {
49 | if (m_refcountp)
50 | ++*m_refcountp;
51 | }
52 |
53 | const SmartBuffer& SmartBuffer::operator=(const SmartBuffer& rhs)
54 | {
55 | SmartBuffer(rhs).swap(*this);
56 | return *this;
57 | }
58 |
59 |
60 |
61 |
62 |
63 | } // namespace terark
64 |
65 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/compress/zstd_compress_literals.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under both the BSD-style license (found in the
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 | * in the COPYING file in the root directory of this source tree).
8 | * You may select, at your option, one of the above-listed licenses.
9 | */
10 |
11 | #ifndef ZSTD_COMPRESS_LITERALS_H
12 | #define ZSTD_COMPRESS_LITERALS_H
13 |
14 | #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
15 |
16 |
17 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
18 |
19 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
20 |
21 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
22 | ZSTD_hufCTables_t* nextHuf,
23 | ZSTD_strategy strategy, int disableLiteralCompression,
24 | void* dst, size_t dstCapacity,
25 | const void* src, size_t srcSize,
26 | void* entropyWorkspace, size_t entropyWorkspaceSize,
27 | const int bmi2);
28 |
29 | #endif /* ZSTD_COMPRESS_LITERALS_H */
30 |
--------------------------------------------------------------------------------
/src/terark/io/FileDataIO.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "DataIO.hpp"
4 | #include "FileStream.hpp"
5 | #include "StreamBuffer.hpp"
6 |
7 | namespace terark {
8 | template
9 | class FileDataInput : public DataIO {
10 | public:
11 | FileStream file;
12 | FileDataInput(const char* fname) : file(fname, "rb") {
13 | this->attach(&file);
14 | }
15 | };
16 | template
17 | class FileDataOutput : public DataIO {
18 | public:
19 | FileStream file;
20 | FileDataOutput(const char* fname) : file(fname, "wb") {
21 | this->attach(&file);
22 | }
23 | ~FileDataOutput() {
24 | this->flush();
25 | this->attach(NULL);
26 | }
27 | };
28 |
29 | typedef FileDataInput > NativeFileDataInput;
30 | typedef FileDataOutput > NativeFileDataOutput;
31 | typedef FileDataInput > PortableFileDataInput;
32 | typedef FileDataOutput > PortableFileDataOutput;
33 |
34 | typedef FileDataInput > BigEndianFileDataInput;
35 | typedef FileDataOutput > BigEndianFileDataOutput;
36 | typedef FileDataInput > LittleEndianFileDataInput;
37 | typedef FileDataOutput > LittleEndianFileDataOutput;
38 | }
39 |
--------------------------------------------------------------------------------
/src/terark/io/discard/hole_stream.hpp:
--------------------------------------------------------------------------------
1 | /* vim: set tabstop=4 : */
2 | #ifndef __terark_io_hole_stream_h__
3 | #define __terark_io_hole_stream_h__
4 |
5 | #if defined(_MSC_VER) && (_MSC_VER >= 1020)
6 | # pragma once
7 | #endif
8 |
9 | //#include
10 | //#include // for memcpy
11 | //#include
12 | //#include
13 |
14 | #include
15 | //#include "IOException.hpp"
16 |
17 | namespace terark {
18 |
19 | class HoleStream
20 | {
21 | public:
22 | explicit HoleStream() : m_pos(0) {}
23 |
24 | // size_t read(void* vbuf, size_t length) { m_pos += length; return length; }
25 | size_t write(const void* vbuf, size_t length) { m_pos += length; return length; }
26 |
27 | // void ensureRead(void* vbuf, size_t length) { m_pos += length; }
28 | void ensureWrite(const void* vbuf, size_t length) { m_pos += length; }
29 |
30 | // byte readByte() { return 0; }
31 |
32 | void writeByte(unsigned char) { m_pos++; }
33 |
34 | private:
35 | stream_position_t m_pos;
36 | };
37 |
38 | class SeekableHoleStream
39 | {
40 | public:
41 | explicit SeekableHoleStream(stream_position_t size)
42 | {
43 | m_pos = 0;
44 | m_size = size;
45 | }
46 |
47 | private:
48 | stream_position_t m_pos;
49 | stream_position_t m_size;
50 | };
51 |
52 | } // namespace terark
53 |
54 | #endif
55 |
56 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/compress/zstd_fast.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3 | * All rights reserved.
4 | *
5 | * This source code is licensed under both the BSD-style license (found in the
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 | * in the COPYING file in the root directory of this source tree).
8 | * You may select, at your option, one of the above-listed licenses.
9 | */
10 |
11 | #ifndef ZSTD_FAST_H
12 | #define ZSTD_FAST_H
13 |
14 | #if defined (__cplusplus)
15 | extern "C" {
16 | #endif
17 |
18 | #include "../common/mem.h" /* U32 */
19 | #include "zstd_compress_internal.h"
20 |
21 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm);
23 | size_t ZSTD_compressBlock_fast(
24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
25 | void const* src, size_t srcSize);
26 | size_t ZSTD_compressBlock_fast_dictMatchState(
27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 | void const* src, size_t srcSize);
29 | size_t ZSTD_compressBlock_fast_extDict(
30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31 | void const* src, size_t srcSize);
32 |
33 | #if defined (__cplusplus)
34 | }
35 | #endif
36 |
37 | #endif /* ZSTD_FAST_H */
38 |
--------------------------------------------------------------------------------
/3rdparty/zstd/zstd/dll/example/fullbench-dll.sln:
--------------------------------------------------------------------------------
1 | Microsoft Visual Studio Solution File, Format Version 12.00
2 | # Visual Studio Express 2012 for Windows Desktop
3 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench-dll", "fullbench-dll.vcxproj", "{13992FD2-077E-4954-B065-A428198201A9}"
4 | EndProject
5 | Global
6 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
7 | Debug|Win32 = Debug|Win32
8 | Debug|x64 = Debug|x64
9 | Release|Win32 = Release|Win32
10 | Release|x64 = Release|x64
11 | EndGlobalSection
12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
13 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.ActiveCfg = Debug|Win32
14 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.Build.0 = Debug|Win32
15 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.ActiveCfg = Debug|x64
16 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.Build.0 = Debug|x64
17 | {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.ActiveCfg = Release|Win32
18 | {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32
19 | {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64
20 | {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64
21 | EndGlobalSection
22 | GlobalSection(SolutionProperties) = preSolution
23 | HideSolutionNode = FALSE
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/gtests/rank_select/rank_select_few_reg_test.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #define RS_REG_TEST(__P, __W) \
7 | TEST(RANK_SELECT_FEW_REG_TEST, PIVOT_##__P##_WIDTH_##__W){ \
8 | rank_select_few<__P, __W> rs; \
9 | rank_select_few_builder<__P, __W> rsbuild(50, 3276849, false); \
10 | for(int i = 0; i < 3276899; i++) \
11 | if(i < 588939 || i > 588988) rsbuild.insert(i); \
12 | rsbuild.finish(&rs); \
13 | for(int i = 0; i < 50; i++){ \
14 | ASSERT_TRUE(rs.select0(i) == 588939+i); \
15 | ASSERT_TRUE(rs.rank0(588939+i) == i); \
16 | ASSERT_TRUE(rs.zero_seq_revlen(588939+i) == i); \
17 | } \
18 | ASSERT_TRUE(rs.zero_seq_revlen(588989) == 50); \
19 | ASSERT_TRUE(rs.zero_seq_revlen(588990) == 0); \
20 | ASSERT_TRUE(rs.zero_seq_len(3276799) == 0); \
21 | ASSERT_TRUE(rs.zero_seq_len(3276800) == 0); \
22 | ASSERT_TRUE(rs.zero_seq_len(3276801) == 0); \
23 | ASSERT_TRUE(rs.is1(3276799)); \
24 | ASSERT_TRUE(rs.is1(3276800)); \
25 | ASSERT_TRUE(rs.is1(3276801)); \
26 | }
27 |
28 | namespace terark {
29 | RS_REG_TEST(0, 3)
30 | RS_REG_TEST(0, 4)
31 | RS_REG_TEST(0, 5)
32 | RS_REG_TEST(0, 6)
33 | RS_REG_TEST(0, 7)
34 | RS_REG_TEST(0, 8)
35 | }
--------------------------------------------------------------------------------
/src/terark/thread/fiber_pool.hpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2022-08-24 14:24
3 | //
4 |
5 | #include "fiber_yield.hpp"
6 | #include
7 |
8 | namespace terark {
9 |
10 | class TERARK_DLL_EXPORT FiberPool : public FiberYield {
11 | public:
12 | static constexpr int MAX_QUEUE_LEN = 256;
13 | static constexpr int DEFAULT_FIBER_CNT = 16;
14 | struct task_t {
15 | void (*func)(void* arg1, size_t arg2, size_t arg3);
16 | void* arg1;
17 | size_t arg2; // theoretically arg1 is enough, we add arg2 & arg3 for common
18 | size_t arg3; // case optimization to avoid user code alloc memory for args.
19 | // also buffered_channel use array to store task_t, with arg2
20 | // and arg3, sizeof(task_t) == 32 is power of 2, this helps
21 | // compiler optimization
22 | };
23 | explicit FiberPool(boost::fibers::context** activepp);
24 | ~FiberPool();
25 | void update_fiber_count(int count);
26 | void push(task_t&& task);
27 | bool try_push(const task_t& task);
28 | int wait(int timeout_us);
29 | int wait();
30 | int fiber_cnt() const { return m_fiber_cnt; }
31 | int pending_cnt() const { return m_pending_cnt; }
32 | protected:
33 | void fiber_proc(int fiber_idx);
34 | int m_fiber_cnt = 0;
35 | int m_pending_cnt = 0;
36 | boost::fibers::buffered_channel m_channel;
37 | };
38 |
39 | } // namespace terark
40 |
--------------------------------------------------------------------------------
/src/terark/util/fast_getcpu.hpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #if defined(__linux__) && (defined(__amd64__) || defined(__amd64) || \
4 | defined(__x86_64__) || defined(__x86_64) || \
5 | defined(__ia64__) || defined(_IA64) || defined(__IA64__) ) || \
6 | defined(__INTEL_COMPILER) && ( \
7 | defined(__ia64) || defined(__itanium__) || \
8 | defined(__x86_64) || defined(__x86_64__) )
9 |
10 | namespace terark {
11 | terark_forceinline unsigned int fast_getcpu(void) {
12 | /* Abused to load per CPU data from limit */
13 | const unsigned GDT_ENTRY_PER_CPU = 15;
14 | const unsigned __PER_CPU_SEG = (GDT_ENTRY_PER_CPU * 8 + 3);
15 | static const unsigned VGETCPU_CPU_MASK = 0xfff;
16 | unsigned int p;
17 | /*
18 | * Load per CPU data from GDT. LSL is faster than RDTSCP and
19 | * works on all CPUs. This is volatile so that it orders
20 | * correctly wrt barrier() and to keep gcc from cleverly
21 | * hoisting it out of the calling function.
22 | */
23 | asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
24 | // unsigned node = p >> 12;
25 | return p & VGETCPU_CPU_MASK;
26 | }
27 | } // namespace terark
28 |
29 | #elif !defined(_MSC_VER)
30 |
31 | #include
32 | namespace terark {
33 | terark_forceinline unsigned int fast_getcpu(void) {
34 | return sched_getcpu();
35 | }
36 | } // namespace terark
37 |
38 | #endif
39 |
--------------------------------------------------------------------------------
/src/terark/util/strjoin.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include
4 | #include
5 |
6 | template
7 | class strjoin_helper {
8 | private:
9 | String v;
10 | typedef typename String::value_type char_t;
11 | typedef strjoin_helper me;
12 | public:
13 | template explicit strjoin_helper(const StrX& x) : v(x) {}
14 | template explicit strjoin_helper(const Char* s, ptrdiff_t n) : v(s, n) {}
15 | operator String() const { return v; }
16 | me& operator+(const String& y) { v += y; return *this; }
17 | me& operator+(const char_t* y) { v += y; return *this; }
18 | me& operator+(const me & y) { v += y.v; return *this; }
19 | friend me operator+(const char_t* x, const me& y) { me t(x); t.v += y.v; return t; }
20 | friend me operator+(const String& x, const me& y) { me t(x); t.v += y.v; return t; }
21 | };
22 |
23 | template
24 | strjoin_helper strjoin(const AnyString& x) { return strjoin_helper(x); }
25 |
26 | strjoin_helper strjoin(const char* s) { return strjoin_helper(s); }
27 | strjoin_helper strjoin(const char* s, ptrdiff_t n) { return strjoin_helper(s, n); }
28 |
29 | strjoin_helper strjoin(const wchar_t* s) { return strjoin_helper(s); }
30 | strjoin_helper strjoin(const wchar_t* s, ptrdiff_t n) { return strjoin_helper(s, n); }
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | This repo is `topling-zip`, but for compatibility and keep all commit history gracefully, we keep using namespace `terark`, do not change it.
2 |
3 | To conform open source license, the following term of disallowing bytedance is deleted since 2023-04-24,
4 | that is say: bytedance using this software is no longer illeagal and is not a shame.
5 |
6 | # ~~0. Disallow Bytedance Inc.~~
7 | ~~All changes after 2021-06-01 is disallowed to be used by bytedance,~~ see [LICENSE](LICENSE).
8 |
9 | The term of disallowing bytedance is also deleted in [LICENSE](LICENSE).
10 |
11 | # 1. Compile
12 | ```bash
13 | make -j `nproc` pkg
14 | ```
15 |
16 |
17 |
18 |
19 | # 1. Introduction
20 | - TerarkZip is [TerarkDB](https://github.com/bytedance/terarkdb)'s submodule
21 | - Users can also use TerarkZip as a compression and indexing algorithm library
22 | - TerarkZip also provides a set of useful utilities including `rank-select`, `bitmap` etc.
23 |
24 | # 2. Features
25 | - Indexing
26 | - Nested Lous Trie
27 | - Compression
28 | - PA-Zip Compression
29 | - Entropy Compression
30 |
31 | # 3. Usage
32 | ## Method 1: CMake
33 | - In your CMakeLists.txt
34 | - ADD_SUBDIRECTORY(terark-zip)
35 | - use `terark-zip` target anywhere you want
36 |
37 | ## Method 2: Static Library
38 | - ./build.sh
39 | - cd output
40 | - move `include` and `lib` directories to your project
41 |
42 |
43 | ## 4. License
44 | - BSD 3-Clause License
45 |
--------------------------------------------------------------------------------
/src/terark/fsa/tmplinst.cpp:
--------------------------------------------------------------------------------
1 | #include "tmplinst.hpp"
2 | #include
3 |
4 | namespace terark {
5 |
6 | static hash_strmap& gs_by_dio_class_name() {
7 | static hash_strmap me;
8 | return me;
9 | }
10 | static hash_strmap& gs_by_rtti_class_name () {
11 | static hash_strmap me;
12 | return me;
13 | }
14 |
15 | DFA_ClassMetaInfo::~DFA_ClassMetaInfo() {
16 | }
17 |
18 | const DFA_ClassMetaInfo* DFA_ClassMetaInfo::find(fstring class_name) {
19 | size_t idx = gs_by_dio_class_name().find_i(class_name);
20 | if (gs_by_dio_class_name().end_i() != idx)
21 | return gs_by_dio_class_name().val(idx);
22 | else
23 | return NULL;
24 | }
25 |
26 | const DFA_ClassMetaInfo* DFA_ClassMetaInfo::find(const BaseDFA* dfa) {
27 | size_t idx = gs_by_rtti_class_name().find_i(typeid(*dfa).name());
28 | if (gs_by_rtti_class_name().end_i() != idx)
29 | return gs_by_rtti_class_name().val(idx);
30 | else
31 | return NULL;
32 | }
33 |
34 | void DFA_ClassMetaInfo::register_me(const char* class_name, const char* rtti_class_name) {
35 | // fprintf(stderr, "register_class: %s\n", meta->class_name);
36 | this->class_name = class_name;
37 | this->rtti_class_name.assign(rtti_class_name);
38 | gs_by_dio_class_name().insert_i(class_name, this);
39 | gs_by_rtti_class_name().insert_i(rtti_class_name, this);
40 | }
41 |
42 | } // namespace terark
43 |
44 |
--------------------------------------------------------------------------------
/tools/general/split_into_sorted_runs.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | // write sorted run filename to stdout
5 | // write sorted run content to filename
6 |
7 | using namespace terark;
8 |
9 | int main(int argc, char* argv[]) {
10 | if (argc < 2) {
11 | fprintf(stderr, "usage: %s fnamePrefix\n", argv[0]);
12 | return 1;
13 | }
14 | const char* fnamePrefix = argv[1];
15 | if (strlen(fnamePrefix) > 100) {
16 | fprintf(stderr, "ERROR: fnamePrefix = %s is too long(max 100)\n", fnamePrefix);
17 | return 1;
18 | }
19 | char fname[128];
20 | LineBuf line;
21 | valvec prev;
22 | FILE* fo = NULL;
23 | int fileIdx = 0;
24 | while (line.getline(stdin) > 0) {
25 | line.chomp();
26 | if (NULL == fo || prev > fstring(line)) {
27 | if (fo) {
28 | fclose(fo);
29 | }
30 | sprintf(fname, "%s%06d", fnamePrefix, fileIdx++);
31 | fo = fopen(fname, "w");
32 | if (NULL == fo) {
33 | fprintf(stderr, "ERROR: fopen(%s, w) = %s\n", fname, strerror(errno));
34 | return 2;
35 | }
36 | printf("%s\n", fname);
37 | fflush(stdout);
38 | }
39 | prev.assign(line.p, line.n);
40 | line.push_back('\n');
41 | size_t wn = fwrite(line.p, 1, line.n, fo);
42 | if (wn != line.n) {
43 | fprintf(stderr, "ERROR: fwrite(%s, %zd) = %s\n", fname, line.n, strerror(errno));
44 | return 3;
45 | }
46 | }
47 | if (fo) {
48 | fclose(fo);
49 | }
50 | return 0;
51 | }
52 |
53 |
--------------------------------------------------------------------------------
/tools/zbs/sufsort_bench.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by leipeng on 2019-11-04.
3 | //
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 |
11 | //Makefile:CXXFLAGS:-I../../3rdparty/zstd
12 |
13 | namespace terark {
14 | extern int g_useDivSufSort;
15 | }
16 | using namespace terark;
17 |
18 | int main(int argc, char* argv[])
19 | try {
20 | bool openMP = getEnvBool("use_openmp", 0);
21 | valvec mem;
22 | size_t fsize = 0;
23 | {
24 | struct ll_stat st;
25 | if (::ll_fstat(0, &st) < 0) {
26 | THROW_STD(runtime_error, "fstat failed");
27 | }
28 | fsize = st.st_size;
29 | }
30 | use_hugepage_resize_no_init(&mem, pow2_align_up(fsize, 8)*5);
31 | auto rdsize = ::read(0, mem.data(), fsize);
32 | if (size_t(rdsize) != fsize) {
33 | THROW_STD(runtime_error, "ERROR: read(stdin, %zd) = %zd : err = %s\n", fsize, rdsize, strerror(errno));
34 | }
35 | int* sufarr = (int*)(mem.data() + pow2_align_up(fsize, 8));
36 | if (g_useDivSufSort == 1)
37 | divsufsort(mem.data(), sufarr, fsize, openMP);
38 | else
39 | sufarr_inducedsort(mem.data(), sufarr, fsize);
40 |
41 | return 0;
42 | }
43 | catch (...) {
44 | fprintf(stderr, "exit 1 on exception\n");
45 | return 1;
46 | }
47 |
48 |
--------------------------------------------------------------------------------
/src/terark/str_lex_iter.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include