├── tools ├── zbs │ ├── hive │ │ └── hive.cc │ ├── Makefile │ ├── bmq │ │ └── bmq.cc │ ├── zip-bench.sh │ ├── sufsort_bench.cpp │ └── zbs_stat.cpp ├── fsa │ ├── test-3.txt │ ├── Makefile │ ├── test-1.txt │ ├── test-2.txt │ ├── test_nlt.sh │ ├── dfa_text.cpp │ ├── test-patricia.txt │ └── patricia_bench.cpp ├── general │ ├── Makefile │ ├── test-tpjoin.txt │ ├── revline.cpp │ ├── deserial_strseq.cpp │ ├── hex2bson.cpp │ ├── text_key_val_to_kvbin.cpp │ └── split_into_sorted_runs.cpp ├── configure │ ├── glibc_memcpy_fix.h │ └── compiler.cpp └── codegen │ └── gen_leap_year_bits.cpp ├── cmake └── modules │ └── FindBoost.cmake ├── src └── terark │ ├── idx │ └── idx_dummy_placeholder.cpp │ ├── io │ ├── ZcMemMap.cpp │ ├── todo │ │ ├── DataIO_Polymorphic.hpp │ │ ├── inter_thread_pipe.hpp │ │ ├── DataIO_Parser.hpp │ │ └── inter_thread_pipe.cpp │ ├── discard │ │ ├── DataInput.cpp │ │ ├── hole_stream.hpp │ │ └── is_primitive.hpp │ ├── var_int_declare_read.hpp │ ├── readv_writev.hpp │ ├── var_int_declare_write.hpp │ ├── StdvecWriter.hpp │ ├── DataInput_VarIntAsFixLen.hpp │ ├── file_load_save.hpp │ ├── DataOutput_VarIntAsFixLen.hpp │ ├── DataIO_SmartPtr.hpp │ ├── HexCodingStream.cpp │ ├── FileDataIO.hpp │ ├── DataOutput_String.hpp │ ├── readv_writev.cpp │ ├── DataInputIterator.hpp │ ├── IOException.hpp │ ├── DataOutput_BigEndian.hpp │ ├── DataOutput_LittleEndian.hpp │ ├── DataIO_Exception.cpp │ ├── win │ │ └── MfcFileStream.cpp │ ├── DataIO_Exception.hpp │ ├── DataInput_String.hpp │ ├── DataInput_BigEndian.hpp │ └── DataInput_LittleEndian.hpp │ ├── util │ ├── hugepage.cpp │ ├── cpu_prefetch.hpp │ ├── truncate_file.hpp │ ├── checksum_exception.hpp │ ├── stdptr.hpp │ ├── vm_util.hpp │ ├── checksum_exception.cpp │ ├── stat.hpp │ ├── nolocks_localtime.hpp │ ├── throw.cpp │ ├── crc.hpp │ ├── tmpfile.hpp │ ├── sorted_uint_vec_get_block_word.hpp │ ├── truncate_file.cpp │ ├── memcmp_coding.hpp │ ├── DataBuffer.cpp │ ├── fast_getcpu.hpp │ ├── strjoin.hpp │ ├── autoclose.hpp │ ├── deepcopy_ptr.hpp │ ├── strbuilder.hpp │ ├── tmpfile.cpp │ ├── profiling.cpp │ ├── base64.hpp │ └── throw.hpp │ ├── fsa │ ├── fsa_cache.cpp │ ├── fsa_ext.hpp │ ├── ppi │ │ ├── state_move_fast.hpp │ │ ├── dawg_dfa_mmap.hpp │ │ └── flat_dfa_mmap.hpp │ ├── fsa_cache.hpp │ ├── forward_decl.hpp │ ├── fsa_ext.cpp │ ├── tmplinst.cpp │ ├── x_fsa_util.hpp │ ├── dfa_mmap_header.hpp │ └── dfa_algo_basic.hpp │ ├── parallel_lib.hpp │ ├── easy_use_hash_map.hpp │ ├── rank_select.hpp │ ├── thread │ ├── mutex.hpp │ ├── fiber_aio.hpp │ ├── futex.hpp │ ├── fiber_local.hpp │ └── fiber_pool.hpp │ ├── str_lex_iter.cpp │ ├── pass_by_value.hpp │ ├── zbs │ ├── ZstdStream.hpp │ ├── simple_zip_blob_store.hpp │ ├── zip_reorder_map.cpp │ ├── zero_length_blob_store.hpp │ ├── xxhash_helper.hpp │ ├── lru_page_cache.hpp │ ├── sufarr_inducedsort.h │ └── abstract_blob_store.hpp │ ├── str_lex_iter.hpp │ ├── mempool.hpp │ ├── succinct │ ├── rank_select_basic.hpp │ └── rank_select_inline_slow.hpp │ ├── multi_way_basic.hpp │ ├── gold_hash_map_iterator.hpp │ └── smallmap.hpp ├── tests ├── core │ ├── Makefile │ ├── never-add-stdvec-writer.hpp │ ├── test_call_on_main_stack.cpp │ ├── test_boost_fss.cpp │ └── test_ProcPipeStream.cpp ├── tries │ ├── Makefile │ └── test_dict_order_gen.cpp ├── entropy │ ├── Makefile │ └── test_entropy.cpp ├── succinct │ ├── Makefile │ └── test.sh └── zbs │ └── sample.txt ├── 3rdparty ├── zstd │ └── zstd │ │ ├── .gitignore │ │ ├── dll │ │ └── example │ │ │ ├── fullbench-dll.vcxproj.user │ │ │ ├── build_package.bat │ │ │ ├── fullbench-dll.sln │ │ │ └── Makefile │ │ ├── libzstd.pc.in │ │ ├── common │ │ └── debug.c │ │ ├── deprecated │ │ └── zbuff_common.c │ │ ├── compress │ │ ├── zstd_compress_superblock.h │ │ ├── zstd_compress_literals.h │ │ ├── zstd_fast.h │ │ └── zstd_double_fast.h │ │ └── decompress │ │ └── zstd_ddict.h └── base64 │ ├── test │ ├── codec_supported.h │ ├── codec_supported.c │ ├── Makefile │ ├── moby_dick_plain.txt │ └── moby_dick_base64.txt │ ├── .gitignore │ ├── base64-benchmarks.png │ ├── .travis.yml │ ├── lib │ ├── exports.txt │ └── arch │ │ ├── sse2 │ │ └── compare_macros.h │ │ ├── generic │ │ ├── 32 │ │ │ ├── enc_loop.c │ │ │ └── dec_loop.c │ │ ├── 64 │ │ │ └── enc_loop.c │ │ ├── codec.c │ │ ├── enc_tail.c │ │ ├── enc_head.c │ │ ├── dec_head.c │ │ └── dec_tail.c │ │ ├── neon32 │ │ └── enc_loop.c │ │ ├── ssse3 │ │ ├── enc_loop.c │ │ ├── dec_reshuffle.c │ │ ├── codec.c │ │ ├── enc_translate.c │ │ ├── enc_reshuffle.c │ │ └── dec_loop.c │ │ ├── avx │ │ └── codec.c │ │ ├── sse42 │ │ └── codec.c │ │ ├── sse41 │ │ ├── codec.c │ │ └── enc_reshuffle.c │ │ ├── avx2 │ │ ├── enc_loop.c │ │ └── dec_loop.c │ │ └── neon64 │ │ └── enc_loop.c │ └── LICENSE ├── scripts ├── README.md ├── cpu_has_bmi2.sh ├── cpu_features.sh ├── test.sh └── build_makefile.sh ├── gtests ├── tools │ ├── core │ │ ├── Makefile │ │ ├── test_boost_fss.cpp │ │ └── test_ProcPipeStream.cpp │ ├── tries │ │ ├── Makefile │ │ └── test_dict_order_gen.cpp │ ├── succinct │ │ ├── Makefile │ │ └── test.sh │ └── zbs │ │ └── sample.txt ├── simple_test.cpp ├── common │ ├── prefetch_test.cpp │ └── sortable_strvec_test.cpp ├── build.sh ├── utils_test.cpp ├── zbs │ └── zbs_mixed_len.hpp ├── index │ └── adfa_test.cpp ├── rank_select │ └── rank_select_few_reg_test.cpp ├── CMakeLists.txt └── utils.hpp ├── .gitmodules ├── get-compiler-name.sh ├── cpu_features.sh ├── CHANGELOG ├── .gitlab-ci.yml ├── cpu_has_bmi2.sh ├── .gitignore ├── README.md └── LICENSE /tools/zbs/hive/hive.cc: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cmake/modules/FindBoost.cmake: -------------------------------------------------------------------------------- 1 | # TODO 2 | -------------------------------------------------------------------------------- /src/terark/idx/idx_dummy_placeholder.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/core/Makefile: -------------------------------------------------------------------------------- 1 | 2 | include ../../tools/fsa/Makefile.common 3 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/.gitignore: -------------------------------------------------------------------------------- 1 | # make install artefact 2 | libzstd.pc 3 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | - `Makefile` and `build_makefile.sh` are deprecated 2 | -------------------------------------------------------------------------------- /tools/fsa/test-3.txt: -------------------------------------------------------------------------------- 1 | 0000AAAAXXXX 2 | 0000AAAAYYYY 3 | 0000AAAAZZZZ 4 | -------------------------------------------------------------------------------- /tools/fsa/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := fsa 3 | 4 | include Makefile.common 5 | -------------------------------------------------------------------------------- /tools/zbs/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := zbs fsa 3 | 4 | include ../fsa/Makefile.common 5 | -------------------------------------------------------------------------------- /tests/tries/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := fsa 3 | 4 | include ../../tools/fsa/Makefile.common 5 | -------------------------------------------------------------------------------- /tools/general/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_BIN_USE_STATIC_LIB ?= 1 3 | 4 | include ../fsa/Makefile.common 5 | -------------------------------------------------------------------------------- /3rdparty/base64/test/codec_supported.h: -------------------------------------------------------------------------------- 1 | extern char **codecs; 2 | 3 | int codec_supported (int flags); 4 | -------------------------------------------------------------------------------- /src/terark/io/ZcMemMap.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topling/topling-zip/HEAD/src/terark/io/ZcMemMap.cpp -------------------------------------------------------------------------------- /tests/entropy/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := fsa zbs 3 | 4 | include ../../tools/fsa/Makefile.common 5 | -------------------------------------------------------------------------------- /gtests/tools/core/Makefile: -------------------------------------------------------------------------------- 1 | 2 | SRCS += $(wildcard *.cpp) 3 | 4 | include ../../tools/fsa/Makefile.common 5 | -------------------------------------------------------------------------------- /src/terark/util/hugepage.cpp: -------------------------------------------------------------------------------- 1 | #include "hugepage.hpp" 2 | 3 | namespace terark { 4 | 5 | } // namespace terark 6 | -------------------------------------------------------------------------------- /tools/configure/glibc_memcpy_fix.h: -------------------------------------------------------------------------------- 1 | /* gcc flag: -include "" */ 2 | __asm__(".symver memcpy,memcpy@GLIBC_2.2.5"); 3 | -------------------------------------------------------------------------------- /tools/fsa/test-1.txt: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | c 4 | ddd 5 | eee 6 | fff 7 | ggg 8 | hhhh 9 | iiii 10 | jjjjj 11 | kkkkk 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "boost-include"] 2 | path = boost-include 3 | url = https://github.com/rockeet/boost-fiber-leipeng 4 | -------------------------------------------------------------------------------- /3rdparty/base64/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | bin/base64 3 | lib/config.h 4 | lib/table_generator 5 | test/benchmark 6 | test/test_base64 7 | -------------------------------------------------------------------------------- /3rdparty/base64/base64-benchmarks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topling/topling-zip/HEAD/3rdparty/base64/base64-benchmarks.png -------------------------------------------------------------------------------- /src/terark/io/todo/DataIO_Polymorphic.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/topling/topling-zip/HEAD/src/terark/io/todo/DataIO_Polymorphic.hpp -------------------------------------------------------------------------------- /gtests/tools/tries/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := fsa 3 | 4 | SRCS += $(wildcard *.cpp) 5 | 6 | include ../../tools/fsa/Makefile.common 7 | -------------------------------------------------------------------------------- /gtests/simple_test.cpp: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | 3 | TEST(UTILS_TEST, FILE_EXISTS) { 4 | std::cout << "this is a demo test" << std::endl; 5 | } 6 | -------------------------------------------------------------------------------- /tools/fsa/test-2.txt: -------------------------------------------------------------------------------- 1 | 00## 2 | 08## 3 | 16## 4 | 24## 5 | 32## 6 | 40## 7 | 48## 8 | 56## 9 | 64## 10 | 72## 11 | 80## 12 | 88## 13 | 96## 14 | -------------------------------------------------------------------------------- /gtests/tools/succinct/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := fsa 3 | 4 | SRCS += $(wildcard *.cpp) 5 | #SRCS += $(wildcard extra/*.cpp) 6 | 7 | include ../../tools/fsa/Makefile.common 8 | -------------------------------------------------------------------------------- /src/terark/fsa/fsa_cache.cpp: -------------------------------------------------------------------------------- 1 | #include "fsa_cache_detail.hpp" 2 | 3 | namespace terark { 4 | 5 | FSA_Cache::~FSA_Cache() { 6 | } 7 | 8 | } // namespace terark 9 | 10 | -------------------------------------------------------------------------------- /tests/succinct/Makefile: -------------------------------------------------------------------------------- 1 | 2 | TERARK_EXT_LIBS := fsa 3 | 4 | EXE_SRCS += $(wildcard *.cpp) 5 | #EXE_SRCS += $(wildcard extra/*.cpp) 6 | 7 | include ../../tools/fsa/Makefile.common 8 | -------------------------------------------------------------------------------- /3rdparty/base64/.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - clang 5 | - gcc 6 | 7 | script: 8 | - SSSE3_CFLAGS=-mssse3 SSE41_CFLAGS=-msse4.1 SSE42_CFLAGS=-msse4.2 AVX_CFLAGS=-mavx make -C test 9 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/dll/example/fullbench-dll.vcxproj.user: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /src/terark/util/cpu_prefetch.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-07-16. 3 | // 4 | #pragma once 5 | 6 | #include 7 | 8 | #define TERARK_CPU_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) 9 | -------------------------------------------------------------------------------- /tools/zbs/bmq/bmq.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | /** 5 | * Simple test case for bytedance message queue streaming compression & decompression. 6 | * @return 7 | */ 8 | int main() { 9 | 10 | return 0; 11 | } -------------------------------------------------------------------------------- /3rdparty/base64/lib/exports.txt: -------------------------------------------------------------------------------- 1 | trk_base64_encode 2 | trk_base64_stream_encode 3 | trk_base64_stream_encode_init 4 | trk_base64_stream_encode_final 5 | trk_base64_decode 6 | trk_base64_stream_decode 7 | trk_base64_stream_decode_init 8 | -------------------------------------------------------------------------------- /gtests/common/prefetch_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "utils.h" 6 | 7 | namespace terark { 8 | 9 | TEST(PREFETCH, SIMPLE_TEST) { 10 | 11 | } 12 | } 13 | 14 | -------------------------------------------------------------------------------- /src/terark/fsa/fsa_ext.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-05-07. 3 | // 4 | #pragma once 5 | #include "fsa.hpp" 6 | 7 | namespace terark { 8 | TERARK_DLL_EXPORT size_t dfa_write_text(const BaseDFA* dfa, FILE*); 9 | } 10 | -------------------------------------------------------------------------------- /get-compiler-name.sh: -------------------------------------------------------------------------------- 1 | 2 | dir=`dirname "$0"` 3 | cd $dir 4 | if [ -z "$CXX" ]; then 5 | CXX=g++ 6 | fi 7 | tmpfile=$(mktemp -u compiler-XXXXXX) 8 | ${CXX} tools/configure/compiler.cpp -o ${tmpfile}.exe && ./${tmpfile}.exe && rm -f ${tmpfile}* 9 | -------------------------------------------------------------------------------- /src/terark/io/discard/DataInput.cpp: -------------------------------------------------------------------------------- 1 | #include "terark/io/DataInput.hpp" 2 | 3 | namespace terark { 4 | 5 | namespace serialization { namespace polymorphic { 6 | 7 | 8 | } } // serialization::polymorphic 9 | 10 | } // namespace terark 11 | -------------------------------------------------------------------------------- /scripts/cpu_has_bmi2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mydir=`dirname $0` 4 | $mydir/cpu_features.sh | grep -qs bmi2 5 | bmi2_status=${PIPESTATUS[1]} 6 | if [ $bmi2_status -eq 0 ] # 0 indicate success 7 | then 8 | echo 1 9 | else 10 | echo 0 11 | fi 12 | 13 | -------------------------------------------------------------------------------- /cpu_features.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ `uname` == Darwin ]; then 4 | sysctl -n machdep.cpu.features | tr 'A-Z' 'a-z' | sed -E 's/[[:space:]]+/'$'\\\n/g' 5 | else 6 | cat /proc/cpuinfo | sed -n '/^flags\s*:\s*/s/^[^:]*:\s*//p' | uniq | tr 'A-Z' 'a-z' | sed 's/\s\+/\n/g' 7 | fi 8 | -------------------------------------------------------------------------------- /scripts/cpu_features.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ `uname` == Darwin ]; then 4 | sysctl -n machdep.cpu.features | tr 'A-Z' 'a-z' | sed -E 's/[[:space:]]+/'$'\\\n/g' 5 | else 6 | cat /proc/cpuinfo | sed -n '/^flags\s*:\s*/s/^[^:]*:\s*//p' | uniq | tr 'A-Z' 'a-z' | sed 's/\s\+/\n/g' 7 | fi 8 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e # exit on error 4 | 5 | if [ `uname` == Darwin ]; then 6 | cpuNum=`sysctl -n machdep.cpu.thread_count` 7 | else 8 | cpuNum=`nproc` 9 | fi 10 | 11 | make -j$cpuNum test 12 | 13 | # more test cases under google test framework 14 | ./gtest.sh 15 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/sse2/compare_macros.h: -------------------------------------------------------------------------------- 1 | #define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) 2 | #define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n)) 3 | #define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n)) 4 | #define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1)) 5 | -------------------------------------------------------------------------------- /src/terark/io/var_int_declare_read.hpp: -------------------------------------------------------------------------------- 1 | uint32_t read_var_uint32(); 2 | uint32_t read_var_uint30(); 3 | uint64_t read_var_uint64(); 4 | uint64_t read_var_uint61(); 5 | int32_t read_var_int32(); 6 | int32_t read_var_int30(); 7 | int64_t read_var_int64(); 8 | int64_t read_var_int61(); 9 | void read_string(std::string& str); 10 | 11 | -------------------------------------------------------------------------------- /tools/general/test-tpjoin.txt: -------------------------------------------------------------------------------- 1 | a A 0 00 2 | b B 1 11 3 | c C 2 22 4 | d D 3 33 5 | e E 4 44 6 | f F 5 55 7 | g G 6 66 8 | h H 7 77 9 | i I 8 88 10 | j J 9 99 11 | k K 0 00 12 | l L A aa 13 | m M B bb 14 | n N C cc 15 | o O D dd 16 | p P E ee 17 | q Q F ff 18 | r R G gg 19 | s S H hh 20 | t T I ii 21 | u U J jj 22 | v V K kk 23 | w W L ll 24 | -------------------------------------------------------------------------------- /src/terark/parallel_lib.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #if defined(TOPLING_ENABLE_PARALLEL_ALGO) && defined(__GNUC__) && __GNUC__ * 1000 + __GNUC_MINOR__ >= 4007 4 | #include 5 | #define terark_parallel_sort __gnu_parallel::sort 6 | #else 7 | #include 8 | #define terark_parallel_sort std::sort 9 | #endif 10 | 11 | -------------------------------------------------------------------------------- /tools/general/revline.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char* argv[]) { 6 | terark::LineBuf line; 7 | FILE* fp = stdin; 8 | while (line.getline(fp) >= 0) { 9 | line.chomp(); 10 | std::reverse(line.begin(), line.end()); 11 | printf("%s\n", line.p); 12 | } 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /tools/fsa/test_nlt.sh: -------------------------------------------------------------------------------- 1 | 2 | for f in test*.txt; do 3 | for ((nl=1; nl < 4; nl++)); do 4 | env LD_LIBRARY_PATH=../../build/Linux-x86_64-g++-6.3-bmi2-1/lib \ 5 | dbg/nlt_build.exe -n $nl -o $f.nlt $f 6 | if ! diff <(LC_ALL=C sort $f) <(dbg/dfa_text.exe $f.nlt); then 7 | echo Fail on text file $f 1>&2 8 | exit 9 | fi 10 | done 11 | done 12 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | ## 2020-12-11 2 | - Refine code style and add License file, ready to open source 3 | - Update README 4 | 5 | ## 2020-11-06 6 | - Rename terark-core to terark-zip 7 | - Add gtest for unit tests 8 | - Structure refactoring 9 | 10 | ## 2019-10-31 11 | - submit the first version of terark-core as a standalone product, will add more API documents and test cases in the future. 12 | -------------------------------------------------------------------------------- /tools/configure/compiler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main() { 4 | #ifdef __clang_major__ 5 | printf("clang-%d.%d", __clang_major__, __clang_minor__); 6 | #elif defined(__INTEL_COMPILER) 7 | printf("icc-%d.%d", __INTEL_COMPILER/100, __INTEL_COMPILER%100); 8 | #elif defined(__GNUC__) 9 | printf("g++-%d.%d", __GNUC__, __GNUC_MINOR__); 10 | #endif 11 | return 0; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /src/terark/fsa/ppi/state_move_fast.hpp: -------------------------------------------------------------------------------- 1 | 2 | public: 3 | 4 | struct StateMoveContext {}; 5 | 6 | transition_t 7 | state_move_fast(size_t parent, auchar_t ch, StateMoveContext) 8 | const { 9 | return state_move(parent, ch); 10 | } 11 | 12 | transition_t 13 | state_move_slow(size_t parent, auchar_t ch, StateMoveContext) 14 | const { 15 | return state_move(parent, ch); 16 | } 17 | 18 | 19 | -------------------------------------------------------------------------------- /gtests/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | BASE_DIR=`pwd` 5 | if [ `uname` == Darwin ]; then 6 | cpuNum=`sysctl -n machdep.cpu.thread_count` 7 | else 8 | cpuNum=`nproc` 9 | fi 10 | 11 | # Build terark-core libraries 12 | cd ../ && ./build.sh 13 | 14 | # Build test cases under gtests 15 | rm -rf $BASE_DIR/build && mkdir -p $BASE_DIR/build 16 | cd $BASE_DIR/build && cmake ../ && make -j $cpuNum -------------------------------------------------------------------------------- /src/terark/easy_use_hash_map.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "gold_hash_map.hpp" 4 | #include "hash_strmap.hpp" 5 | 6 | namespace terark { 7 | 8 | template 9 | class easy_use_hash_map : public gold_hash_map {}; 10 | 11 | template 12 | class easy_use_hash_map : public hash_strmap {}; 13 | 14 | } // namespace terark 15 | -------------------------------------------------------------------------------- /src/terark/util/truncate_file.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace terark { 6 | TERARK_DLL_EXPORT 7 | void truncate_file(const char* fpath, unsigned long long size); 8 | 9 | template 10 | inline 11 | void truncate_file(const String& fpath, unsigned long long size) { 12 | assert(fpath.data()[fpath.size()] == '\0'); 13 | truncate_file(fpath.data(), size); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/terark/rank_select.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "succinct/rank_select_simple.hpp" 4 | #include "succinct/rank_select_se_256.hpp" 5 | #include "succinct/rank_select_il_256.hpp" 6 | #include "succinct/rank_select_se_512.hpp" 7 | #include "succinct/rank_select_mixed_il_256.hpp" 8 | #include "succinct/rank_select_mixed_xl_256.hpp" 9 | #include "succinct/rank_select_mixed_se_512.hpp" 10 | #include "succinct/rank_select_few.hpp" 11 | -------------------------------------------------------------------------------- /src/terark/io/readv_writev.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | TERARK_DLL_EXPORT ssize_t easy_readv(int fd, iovec*, int num, int* next_idx); 9 | TERARK_DLL_EXPORT ssize_t easy_writev(int fd, iovec*, int num, int* next_idx); 10 | 11 | inline bool iovec_finished(const iovec* iov, int num) { 12 | return 0 == iov[num-1].iov_len; 13 | } 14 | 15 | } // namespace terark 16 | -------------------------------------------------------------------------------- /src/terark/io/var_int_declare_write.hpp: -------------------------------------------------------------------------------- 1 | void write_var_uint32(uint32_t x); 2 | void write_var_uint30(uint32_t x); 3 | void write_var_uint64(uint64_t x); 4 | void write_var_uint61(uint64_t x); 5 | void write_var_int32(int32_t x); 6 | void write_var_int30(int32_t x); 7 | void write_var_int64(int64_t x); 8 | void write_var_int61(int64_t x); 9 | void write_string(const std::string& str); 10 | // void write_string(const char* str, size_t len); 11 | 12 | -------------------------------------------------------------------------------- /tests/succinct/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -x 4 | 5 | BASE=`cd ../../..;pwd` 6 | export PKG_TERARK_HOME=$BASE/topling-rocks/pkg/topling-rocks-Linux-x86_64-g++-4.8-bmi2-0 7 | export LD_LIBRARY_PATH=$PKG_TERARK_HOME/lib:$LD_LIBRARY_PATH 8 | 9 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.hpp . 10 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.cpp . 11 | 12 | make clean 13 | make -j4 14 | 15 | ./dbg/rs_fewzero_ut.exe 16 | ./dbg/rank_select_unit_test.exe 17 | -------------------------------------------------------------------------------- /src/terark/fsa/ppi/dawg_dfa_mmap.hpp: -------------------------------------------------------------------------------- 1 | void finish_load_mmap(const DFA_MmapHeader* base) override { 2 | super::finish_load_mmap(base); 3 | this->is_compiled = true; 4 | this->n_words = size_t(base->dawg_num_words); 5 | this->m_is_dag = true; 6 | } 7 | 8 | long prepare_save_mmap(DFA_MmapHeader* base, const void** dataPtrs) 9 | const override { 10 | super::prepare_save_mmap(base, dataPtrs); 11 | base->dawg_num_words = this->n_words; 12 | base->is_dag = true; 13 | return 0; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /gtests/tools/succinct/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -x 4 | 5 | BASE=`cd ../../..;pwd` 6 | export PKG_TERARK_HOME=$BASE/terark-zip-rocksdb/pkg/terark-zip-rocksdb-Linux-x86_64-g++-4.8-bmi2-0 7 | export LD_LIBRARY_PATH=$PKG_TERARK_HOME/lib:$LD_LIBRARY_PATH 8 | 9 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.hpp . 10 | cp $BASE/terark/src/terark/succinct/rank_select_fewzero.cpp . 11 | 12 | make clean 13 | make -j4 14 | 15 | ./dbg/rs_fewzero_ut.exe 16 | ./dbg/rank_select_unit_test.exe 17 | -------------------------------------------------------------------------------- /src/terark/fsa/fsa_cache.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | class TERARK_DLL_EXPORT FSA_Cache { 9 | public: 10 | virtual ~FSA_Cache(); 11 | virtual bool has_fsa_cache() const = 0; 12 | virtual bool build_fsa_cache(double cacheRatio, const char* walkMethod)=0; 13 | virtual void print_fsa_cache_stat(FILE*) const = 0; 14 | }; 15 | 16 | class NTD_CacheTrie; // forward declaration 17 | 18 | } // namespace terark 19 | 20 | -------------------------------------------------------------------------------- /src/terark/util/checksum_exception.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | class TERARK_DLL_EXPORT BadChecksumException : public std::logic_error { 10 | typedef std::logic_error super; 11 | public: 12 | uint64_t m_old; 13 | uint64_t m_new; 14 | ~BadChecksumException(); 15 | BadChecksumException(fstring msg, uint64_t Old, uint64_t New); 16 | }; 17 | 18 | } // terark 19 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/libzstd.pc.in: -------------------------------------------------------------------------------- 1 | # ZSTD - standard compression algorithm 2 | # Copyright (C) 2014-2016, Yann Collet, Facebook 3 | # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 4 | 5 | prefix=@PREFIX@ 6 | exec_prefix=${prefix} 7 | includedir=${prefix}/@INCLUDEDIR@ 8 | libdir=${exec_prefix}/@LIBDIR@ 9 | 10 | Name: zstd 11 | Description: fast lossless compression algorithm library 12 | URL: http://www.zstd.net/ 13 | Version: @VERSION@ 14 | Libs: -L${libdir} -lzstd 15 | Cflags: -I${includedir} 16 | -------------------------------------------------------------------------------- /tests/zbs/sample.txt: -------------------------------------------------------------------------------- 1 | ncurses is keg-only, which means it was not symlinked into /usr/local, 2 | because macOS already provides this software and installing another version in 3 | parallel can cause all kinds of trouble. 4 | 5 | If you need to have ncurses first in your PATH run: 6 | echo 'export PATH="/usr/local/opt/ncurses/bin:$PATH"' >> ~/.zshrc 7 | 8 | For compilers to find ncurses you may need to set: 9 | export LDFLAGS="-L/usr/local/opt/ncurses/lib" 10 | export CPPFLAGS="-I/usr/local/opt/ncurses/include" 11 | 123 12 | 456 13 | 789 14 | -------------------------------------------------------------------------------- /gtests/tools/zbs/sample.txt: -------------------------------------------------------------------------------- 1 | ncurses is keg-only, which means it was not symlinked into /usr/local, 2 | because macOS already provides this software and installing another version in 3 | parallel can cause all kinds of trouble. 4 | 5 | If you need to have ncurses first in your PATH run: 6 | echo 'export PATH="/usr/local/opt/ncurses/bin:$PATH"' >> ~/.zshrc 7 | 8 | For compilers to find ncurses you may need to set: 9 | export LDFLAGS="-L/usr/local/opt/ncurses/lib" 10 | export CPPFLAGS="-I/usr/local/opt/ncurses/include" 11 | 123 12 | 456 13 | 789 14 | -------------------------------------------------------------------------------- /tools/fsa/dfa_text.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-05-07. 3 | // 4 | 5 | #include 6 | 7 | int main(int argc, char* argv[]) { 8 | using namespace terark; 9 | if (argc < 2) { 10 | fprintf(stderr, "usage: %s dfa-file\n", argv[0]); 11 | return 1; 12 | } 13 | try { 14 | std::unique_ptr dfa(BaseDFA::load_from(argv[1])); 15 | dfa_write_text(dfa.get(), stdout); 16 | return 0; 17 | } 18 | catch (const std::exception&) { 19 | return 2; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /tools/general/deserial_strseq.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char* argv[]) { 7 | using namespace terark; 8 | NonOwnerFileStream fstdin(stdin); 9 | NativeDataInput dio(&fstdin); 10 | valvec buf; 11 | try { 12 | while (true) { 13 | dio >> buf; 14 | printf("%.*s\n", int(buf.size()), buf.data()); 15 | } 16 | } 17 | catch (const EndOfFileException&) { 18 | } 19 | return 0; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - build_and_test 3 | - benchmark 4 | 5 | compile: 6 | stage: build_and_test 7 | script: 8 | - echo `pwd` && ls -lh 9 | - echo $CI_COMMIT_SHA && git checkout $CI_COMMIT_SHA 10 | - git submodule update --init --recursive 11 | - mkdir build && cd build && cmake ../ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DWITH_TESTS=ON 12 | - make -j $(nproc) 13 | tags: 14 | - terark 15 | 16 | test: 17 | stage: build_and_test 18 | script: 19 | - echo "" 20 | only: 21 | refs: 22 | - stage 23 | tags: 24 | - terark 25 | -------------------------------------------------------------------------------- /tests/core/never-add-stdvec-writer.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main() { 6 | using namespace terark; 7 | NativeDataOutput > writer; 8 | writer << 1; 9 | writer << std::string("abc"); 10 | 11 | NativeDataInput reader; 12 | int i; 13 | std::string s; 14 | reader >> i; 15 | reader >> s; 16 | assert(1 == i); 17 | assert("abc" == s); 18 | 19 | printf("%s done\n", argv[0]); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /cpu_has_bmi2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | if [ -z "$CXX" ]; then 4 | CXX=g++ 5 | fi 6 | if [ -z "$TMPDIR" ]; then 7 | TMPDIR=/tmp 8 | fi 9 | tmpfile=$(mktemp -u ${TMPDIR}/detect_bmi2-XXXXXX) 10 | if [ -z "$CPU" ]; then 11 | # default bmi2 flags is native 12 | CPU=-march=native 13 | fi 14 | cat > ${tmpfile}.cpp << EOF 15 | #include 16 | int main() { 17 | #ifdef __BMI2__ 18 | printf("1"); 19 | #else 20 | printf("0"); 21 | #endif 22 | return 0; 23 | } 24 | EOF 25 | ${CXX} ${CPU} ${tmpfile}.cpp -o ${tmpfile} && ${tmpfile} && rm -f ${tmpfile}* 26 | -------------------------------------------------------------------------------- /3rdparty/base64/test/codec_supported.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../include/libbase64.h" 4 | 5 | static char *_codecs[] = 6 | { "AVX2" 7 | , "NEON32" 8 | , "NEON64" 9 | , "plain" 10 | , "SSSE3" 11 | , "SSE41" 12 | , "SSE42" 13 | , "AVX" 14 | , NULL 15 | } ; 16 | 17 | char **codecs = _codecs; 18 | 19 | int 20 | codec_supported (int flags) 21 | { 22 | // Check if given codec is supported by trying to decode a test string: 23 | char *a = "aGVsbG8="; 24 | char b[10]; 25 | size_t outlen; 26 | 27 | return (trk_base64_decode(a, strlen(a), b, &outlen, flags) != -1); 28 | } 29 | -------------------------------------------------------------------------------- /gtests/utils_test.cpp: -------------------------------------------------------------------------------- 1 | #include "gtest/gtest.h" 2 | #include "utils.hpp" 3 | 4 | TEST(UTILS_TEST, FILE_EXISTS) { 5 | std::cout << 0 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/123") << std::endl; 6 | std::cout << 1 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/README.md") << std::endl; 7 | std::cout << 1 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/CmakeLists.txt") << std::endl; 8 | std::cout << 0 << " " << terark::file_exist("/Users/guokuankuan/Programs/terark-tools/not_exist") << std::endl; 9 | } 10 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/codec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../../include/libbase64.h" 6 | #include "../../codecs.h" 7 | 8 | BASE64_ENC_FUNCTION(plain) 9 | { 10 | #include "enc_head.c" 11 | #if BASE64_WORDSIZE == 32 12 | #include "32/enc_loop.c" 13 | #elif BASE64_WORDSIZE == 64 14 | #include "64/enc_loop.c" 15 | #endif 16 | #include "enc_tail.c" 17 | } 18 | 19 | BASE64_DEC_FUNCTION(plain) 20 | { 21 | #include "dec_head.c" 22 | #if BASE64_WORDSIZE >= 32 23 | #include "32/dec_loop.c" 24 | #endif 25 | #include "dec_tail.c" 26 | } 27 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/neon32/enc_loop.c: -------------------------------------------------------------------------------- 1 | // If we have ARM NEON support, pick off 48 bytes at a time: 2 | while (srclen >= 48) 3 | { 4 | uint8x16x3_t str; 5 | uint8x16x4_t res; 6 | 7 | // Load 48 bytes and deinterleave: 8 | str = vld3q_u8((uint8_t *)c); 9 | 10 | // Reshuffle: 11 | res = enc_reshuffle(str); 12 | 13 | // Translate reshuffled bytes to the Base64 alphabet: 14 | res = enc_translate(res); 15 | 16 | // Interleave and store result: 17 | vst4q_u8((uint8_t *)o, res); 18 | 19 | c += 48; // 3 * 16 bytes of input 20 | o += 64; // 4 * 16 bytes of output 21 | outl += 64; 22 | srclen -= 48; 23 | } 24 | -------------------------------------------------------------------------------- /tools/fsa/test-patricia.txt: -------------------------------------------------------------------------------- 1 | 00123456789abcdefgh 2 | 00123456789abcdefgh 0 3 | 00123456789abcdefgh 1 4 | 00123456789abcdefgh 2 5 | 00123456789abcdefgh 3 6 | 00123456789abcdefgh 4 7 | 00123456789abcdefgh 5 8 | 00123456789abcdefgh 6 9 | 00123456789abcdefgh 7 10 | 00123456789abcdefgh 8 11 | 00123456789abcdefgh 9 12 | 00123456789abcdefgh a 13 | 00123456789abcdefgh b 14 | 00123456789abcdefgh c 15 | 00123456789abcdefgh d 16 | 00123456789abcdefgh e 17 | 00123456789abcdefgh f 18 | 00123456789abcdefgh g 19 | 00123456789abcdefgh h 20 | 00123456789abcdefgh h final is not 21 | 00123456789abcdefgh h final is 22 | 00123456789abcdefgh h final ix -------------------------------------------------------------------------------- /src/terark/io/todo/inter_thread_pipe.hpp: -------------------------------------------------------------------------------- 1 | #ifndef __terark_thread_LockFreeQueue_H__ 2 | #define __terark_thread_LockFreeQueue_H__ 3 | 4 | namespace thread { 5 | 6 | class inter_thread_pipe_impl; 7 | 8 | class inter_thread_pipe 9 | : public RefCounter 10 | , public IInputStream 11 | , public IOutputStream 12 | { 13 | inter_thread_pipe_impl* mio; 14 | public: 15 | explicit inter_thread_pipe(size_t capacity); 16 | ~inter_thread_pipe(); 17 | void eof(); 18 | void read(void* vbuf, size_t length); 19 | void write(void* vbuf, size_t length); 20 | }; 21 | 22 | } // namespace thread 23 | 24 | #endif // __terark_thread_LockFreeQueue_H__ 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/terark/util/stdptr.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace terark { 5 | 6 | // for pre c++17 7 | 8 | template 9 | std::shared_ptr as_shared_ptr(T* p) { 10 | return std::shared_ptr(p); 11 | } 12 | 13 | template 14 | std::shared_ptr as_shared_ptr(T* p, D d) { 15 | return std::shared_ptr(p, d); 16 | } 17 | 18 | template 19 | std::unique_ptr as_unique_ptr(T* p) { 20 | return std::unique_ptr(p); 21 | } 22 | 23 | template 24 | std::unique_ptr as_unique_ptr(T* p, D d) { 25 | return std::unique_ptr(p, d); 26 | } 27 | 28 | 29 | } // namespace terark 30 | 31 | -------------------------------------------------------------------------------- /gtests/zbs/zbs_mixed_len.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "zbs.hpp" 4 | 5 | namespace terark { 6 | // TODO 7 | class ZBSMixedLen : ZBS { 8 | // public: 9 | // ZBSMixedLen() { 10 | // MixedLenBlobStore::MyBuilder mlbuilder(fixedLen, varLenSize, varLenCnt, 11 | // nlt_fname, 0, checksumLevel, 12 | // checksumType); 13 | // } 14 | // ~ZBSMixedLen() {} 15 | 16 | // public: 17 | // void add_record(const std::string &record) {} 18 | 19 | // private: 20 | // MixedLenBlobStore::MyBuilder builder_; 21 | }; 22 | } // namespace terark -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/enc_tail.c: -------------------------------------------------------------------------------- 1 | if (srclen-- == 0) { 2 | break; 3 | } 4 | *o++ = base64_table_enc[*c >> 2]; 5 | st.carry = (*c++ << 4) & 0x30; 6 | st.bytes++; 7 | outl += 1; 8 | 9 | case 1: if (srclen-- == 0) { 10 | break; 11 | } 12 | *o++ = base64_table_enc[st.carry | (*c >> 4)]; 13 | st.carry = (*c++ << 2) & 0x3C; 14 | st.bytes++; 15 | outl += 1; 16 | 17 | case 2: if (srclen-- == 0) { 18 | break; 19 | } 20 | *o++ = base64_table_enc[st.carry | (*c >> 6)]; 21 | *o++ = base64_table_enc[*c++ & 0x3F]; 22 | st.bytes = 0; 23 | outl += 2; 24 | } 25 | } 26 | state->bytes = st.bytes; 27 | state->carry = st.carry; 28 | *outlen = outl; 29 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/ssse3/enc_loop.c: -------------------------------------------------------------------------------- 1 | // If we have SSSE3 support, pick off 12 bytes at a time for as long as we can. 2 | // But because we read 16 bytes at a time, ensure we have enough room to do a 3 | // full 16-byte read without segfaulting: 4 | while (srclen >= 16) 5 | { 6 | // Load string: 7 | __m128i str = _mm_loadu_si128((__m128i *)c); 8 | 9 | // Reshuffle: 10 | str = enc_reshuffle(str); 11 | 12 | // Translate reshuffled bytes to the Base64 alphabet: 13 | str = enc_translate(str); 14 | 15 | // Store: 16 | _mm_storeu_si128((__m128i *)o, str); 17 | 18 | c += 12; // 3 * 4 bytes of input 19 | o += 16; // 4 * 4 bytes of output 20 | outl += 16; 21 | srclen -= 12; 22 | } 23 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/enc_head.c: -------------------------------------------------------------------------------- 1 | // Assume that *out is large enough to contain the output. 2 | // Theoretically it should be 4/3 the length of src. 3 | const uint8_t *c = (const uint8_t *)src; 4 | uint8_t *o = (uint8_t *)out; 5 | 6 | // Use local temporaries to avoid cache thrashing: 7 | size_t outl = 0; 8 | struct base64_state st; 9 | st.bytes = state->bytes; 10 | st.carry = state->carry; 11 | 12 | // Turn three bytes into four 6-bit numbers: 13 | // in[0] = 00111111 14 | // in[1] = 00112222 15 | // in[2] = 00222233 16 | // in[3] = 00333333 17 | 18 | // Duff's device, a for() loop inside a switch() statement. Legal! 19 | switch (st.bytes) 20 | { 21 | for (;;) 22 | { 23 | case 0: 24 | -------------------------------------------------------------------------------- /src/terark/thread/mutex.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #if defined(TERARK_WITH_TBB) 6 | #include 7 | #endif 8 | 9 | namespace terark { 10 | 11 | #if defined(TERARK_WITH_TBB) 12 | #if TERARK_WITH_TBB+1 >= 2+1 13 | class TERARK_DLL_EXPORT spin_mutex : boost::noncopyable { 14 | unsigned char m_is_locked; 15 | public: 16 | spin_mutex() : m_is_locked(0) {} 17 | void lock(); 18 | void unlock(); 19 | }; 20 | #else 21 | using tbb::spin_mutex; 22 | #endif 23 | #else 24 | typedef std::mutex spin_mutex; 25 | #endif 26 | 27 | } // namespace terark 28 | -------------------------------------------------------------------------------- /src/terark/io/StdvecWriter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #pragma error Never Try to add such a file StdvecWriter 4 | 5 | #include // for size_t 6 | 7 | namespace terark { 8 | 9 | template 10 | class StdvecWriter : public Stdvec { 11 | public: 12 | typedef typename Stdvec::value_type value_type; 13 | static_assert(sizeof(value_type) == 1, "value_type must be 1 byte"); 14 | 15 | using Stdvec::Stdvec; 16 | void ensureWrite(const void* buf, size_t len) { 17 | this->insert(this->end(), (const value_type*)(buf), len); 18 | } 19 | 20 | void writeByte(unsigned char b) { 21 | this->push_back(b); 22 | } 23 | }; 24 | 25 | } // namespace terark 26 | -------------------------------------------------------------------------------- /src/terark/io/DataInput_VarIntAsFixLen.hpp: -------------------------------------------------------------------------------- 1 | MyType& operator>>(var_int32_t & x) { return *this >> x.t; } 2 | MyType& operator>>(var_uint32_t& x) { return *this >> x.t; } 3 | 4 | #if !defined(BOOST_NO_INT64_T) 5 | MyType& operator>>(var_int64_t & x) { return *this >> x.t; } 6 | MyType& operator>>(var_uint64_t& x) { return *this >> x.t; } 7 | #endif 8 | MyType& operator>>(serialize_version_t& x) { return *this >> x.t; } 9 | 10 | MyType& operator>>(var_int30_t & x) { return *this >> x.t; } 11 | MyType& operator>>(var_uint30_t& x) { return *this >> x.t; } 12 | 13 | #if !defined(BOOST_NO_INT64_T) 14 | MyType& operator>>(var_int61_t & x) { return *this >> x.t; } 15 | MyType& operator>>(var_uint61_t& x) { return *this >> x.t; } 16 | #endif 17 | 18 | -------------------------------------------------------------------------------- /src/terark/util/vm_util.hpp: -------------------------------------------------------------------------------- 1 | // created by leipeng 2022-07-21 09:48, all rights reserved 2 | #pragma once 3 | #if defined(_MSC_VER) 4 | // nothing 5 | #else 6 | #include 7 | #endif 8 | #include 9 | 10 | namespace terark { 11 | 12 | constexpr size_t VM_PAGE_SIZE = 4096; 13 | 14 | #if defined(_MSC_VER) 15 | constexpr bool g_has_madv_populate = true; 16 | constexpr size_t g_min_prefault_pages = 1; 17 | #else 18 | TERARK_DLL_EXPORT extern const int g_linux_kernel_version; 19 | TERARK_DLL_EXPORT extern const bool g_has_madv_populate; 20 | TERARK_DLL_EXPORT extern const size_t g_min_prefault_pages; 21 | #endif 22 | 23 | TERARK_DLL_EXPORT void vm_prefetch(const void* addr, size_t len, size_t min_pages); 24 | 25 | } // namespace terark 26 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/ssse3/dec_reshuffle.c: -------------------------------------------------------------------------------- 1 | static inline __m128i 2 | dec_reshuffle (__m128i in) 3 | { 4 | // Mask in a single byte per shift: 5 | const __m128i maskB2 = _mm_set1_epi32(0x003F0000); 6 | const __m128i maskB1 = _mm_set1_epi32(0x00003F00); 7 | 8 | // Pack bytes together: 9 | __m128i out = _mm_srli_epi32(in, 16); 10 | 11 | out = _mm_or_si128(out, _mm_srli_epi32(_mm_and_si128(in, maskB2), 2)); 12 | 13 | out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, maskB1), 12)); 14 | 15 | out = _mm_or_si128(out, _mm_slli_epi32(in, 26)); 16 | 17 | // Reshuffle and repack into 12-byte output format: 18 | return _mm_shuffle_epi8(out, _mm_setr_epi8( 19 | 3, 2, 1, 20 | 7, 6, 5, 21 | 11, 10, 9, 22 | 15, 14, 13, 23 | -1, -1, -1, -1)); 24 | } 25 | -------------------------------------------------------------------------------- /src/terark/io/file_load_save.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | template 10 | void native_load_file(const char* fname, Object* obj) { 11 | assert(NULL != fname); 12 | assert(NULL != obj); 13 | FileStream file(fname, "rb"); 14 | NativeDataInput dio; dio.attach(&file); 15 | Object tmp; 16 | dio >> tmp; 17 | obj->swap(tmp); 18 | } 19 | 20 | template 21 | void native_save_file(const char* fname, const Object& obj) { 22 | assert(NULL != fname); 23 | FileStream file(fname, "wb"); 24 | NativeDataOutput dio; dio.attach(&file); 25 | dio << obj; 26 | } 27 | 28 | } // namespace terark 29 | -------------------------------------------------------------------------------- /src/terark/thread/fiber_aio.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-08-22. 3 | // 4 | #pragma once 5 | 6 | #include 7 | #include // for size_t, ssize_t 8 | #include 9 | #include 10 | 11 | namespace terark { 12 | 13 | TERARK_DLL_EXPORT 14 | intptr_t fiber_aio_read(int fd, void* buf, size_t len, off_t offset); 15 | 16 | TERARK_DLL_EXPORT 17 | void fiber_aio_vm_prefetch(const void* buf, size_t len); 18 | 19 | TERARK_DLL_EXPORT 20 | intptr_t fiber_aio_write(int fd, const void* buf, size_t len, off_t offset); 21 | 22 | /// put the write to a dedicated thread to execute the write by aio 23 | TERARK_DLL_EXPORT 24 | intptr_t fiber_put_write(int fd, const void* buf, size_t len, off_t offset); 25 | 26 | 27 | } // namespace terark 28 | -------------------------------------------------------------------------------- /src/terark/thread/futex.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // this file should only be #include in .c/cc/cpp files 3 | #include 4 | #include /* For SYS_xxx definitions */ 5 | #include 6 | 7 | inline long 8 | futex(void* uaddr, uint32_t op, uint32_t val, const timespec* timeout = NULL, 9 | void* uaddr2 = NULL, uint32_t val3 = 0) { 10 | return syscall(SYS_futex, uaddr, (unsigned long)op, (unsigned long)val, 11 | timeout, uaddr2, (unsigned long)val3); 12 | } 13 | 14 | inline long 15 | futex(void* uaddr, uint32_t op, uint32_t val, uint32_t val2, 16 | void* uaddr2 = NULL, uint32_t val3 = 0) { 17 | return syscall(SYS_futex, uaddr, (unsigned long)op, (unsigned long)val, 18 | val2, uaddr2, (unsigned long)val3); 19 | } 20 | -------------------------------------------------------------------------------- /tools/fsa/patricia_bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) { 5 | using namespace terark; 6 | std::unique_ptr dfa(MatchingDFA::load_mmap(0)); 7 | fstrvecl fsv; 8 | ADFA_LexIteratorUP iter(dfa->adfa_make_iter()); 9 | if (iter->seek_begin()) { 10 | do { 11 | fstring word = iter->word(); 12 | fsv.push_back(word); 13 | } while (iter->incr()); 14 | } 15 | for (size_t i = 0; i < fsv.size(); ++i) { 16 | fstring word = fsv[i]; 17 | TERARK_VERIFY_S(iter->seek_lower_bound(word), "word = %s", word); 18 | } 19 | fprintf(stderr, "key num = %zd, key len sum = %zd\n", fsv.size(), fsv.strpool.size()); 20 | return 0; 21 | } 22 | -------------------------------------------------------------------------------- /src/terark/fsa/forward_decl.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace terark { 9 | 10 | class BaseDFA; 11 | class MatchingDFA; 12 | 13 | struct TERARK_DLL_EXPORT BaseDFADeleter { 14 | void operator()(BaseDFA*) const; 15 | void operator()(MatchingDFA*) const; 16 | }; 17 | 18 | typedef std::unique_ptr BaseDFAPtr; 19 | typedef std::unique_ptr MatchingDFAPtr; 20 | 21 | TERARK_DLL_EXPORT BaseDFA* BaseDFA_load(fstring fname); 22 | TERARK_DLL_EXPORT BaseDFA* BaseDFA_load(FILE*); 23 | 24 | TERARK_DLL_EXPORT MatchingDFA* MatchingDFA_load(fstring fname); 25 | TERARK_DLL_EXPORT MatchingDFA* MatchingDFA_load(FILE*); 26 | 27 | } // namespace terark 28 | -------------------------------------------------------------------------------- /3rdparty/base64/test/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS += -std=c99 -O3 -Wall -Wextra -pedantic 2 | ifdef OPENMP 3 | CFLAGS += -fopenmp 4 | endif 5 | 6 | TARGET := $(shell $(CC) -dumpmachine) 7 | ifneq (, $(findstring darwin, $(TARGET))) 8 | BENCH_LDFLAGS= 9 | else 10 | # default to linux, -lrt needed 11 | BENCH_LDFLAGS=-lrt 12 | endif 13 | 14 | .PHONY: clean test 15 | 16 | test: clean test_base64 benchmark 17 | ./test_base64 18 | ./benchmark 19 | 20 | test_base64: test_base64.c codec_supported.o ../lib/libbase64.o 21 | $(CC) $(CFLAGS) -o $@ $^ 22 | 23 | benchmark: benchmark.c codec_supported.o ../lib/libbase64.o 24 | $(CC) $(CFLAGS) -o $@ $^ $(BENCH_LDFLAGS) 25 | 26 | ../%: 27 | make -C .. $* 28 | 29 | %.o: %.c 30 | $(CC) $(CFLAGS) -o $@ -c $< 31 | 32 | clean: 33 | rm -f benchmark test_base64 *.o 34 | -------------------------------------------------------------------------------- /src/terark/thread/fiber_local.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-08-22. 3 | // 4 | #pragma once 5 | #include 6 | 7 | namespace terark { 8 | 9 | template 10 | class recycle_pool { 11 | valvec m_free; 12 | static_assert(std::is_move_constructible >::value, "valvec must be move constructible"); 13 | static_assert(std::is_move_constructible::value, "T must be move constructible"); 14 | public: 15 | T get() { 16 | if (m_free.size()) { 17 | return m_free.pop_val(); 18 | } 19 | else { 20 | return T(); 21 | } 22 | } 23 | void put(T&& p) { 24 | m_free.emplace_back(std::move(p)); 25 | } 26 | }; 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/terark/fsa/fsa_ext.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-05-07. 3 | // 4 | 5 | #include "fsa_ext.hpp" 6 | 7 | namespace terark { 8 | 9 | TERARK_DLL_EXPORT 10 | size_t dfa_write_text(const BaseDFA* dfa, FILE* fp) { 11 | auto adfa = dynamic_cast(dfa); 12 | if (!adfa) { 13 | THROW_STD(invalid_argument, "dfa is not an AcyclicPathDFA"); 14 | } 15 | ADFA_LexIteratorUP iter(adfa->adfa_make_iter()); 16 | bool hasNext = iter->seek_begin(); 17 | size_t nth = 0; 18 | while (hasNext) { 19 | fstring word = iter->word(); 20 | fprintf(fp, "%.*s\n", word.ilen(), word.data()); 21 | hasNext = iter->incr(); 22 | nth++; 23 | } 24 | return nth; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /tools/general/hex2bson.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-10-20. 3 | // 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace terark; 9 | 10 | int main() { 11 | LineBuf line; 12 | valvec obuf; 13 | while (line.getline(stdin) > 0) { 14 | line.chomp(); 15 | obuf.resize_no_init(line.size()/2 + 1 + 4); 16 | size_t hexstrlen = hex_decode(line.p, line.n, obuf.data() + 4, obuf.capacity() - 4); 17 | // ignore (hexstrlen % 2 == 1) 18 | size_t datalen = hexstrlen/2; 19 | size_t bsonlen = 4 + datalen; 20 | *(uint32_t*)obuf.data() = bsonlen; 21 | size_t written = fwrite(obuf.data(), 1, bsonlen, stdout); 22 | if (written != bsonlen) { 23 | perror("fwrite(stdout) failed"); 24 | exit(1); 25 | } 26 | } 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /src/terark/util/checksum_exception.cpp: -------------------------------------------------------------------------------- 1 | #include "checksum_exception.hpp" 2 | 3 | #ifndef __STDC_FORMAT_MACROS 4 | #define __STDC_FORMAT_MACROS 5 | #endif // __STDC_FORMAT_MACROS 6 | 7 | #include 8 | 9 | namespace terark { 10 | 11 | BadChecksumException::~BadChecksumException() {} 12 | 13 | static std::string ChecksumErrMsg(fstring msg, uint64_t Old, uint64_t New) { 14 | char buf[72]; 15 | std::string res; 16 | res.reserve(msg.size() + 64); 17 | res.append(msg.data(), msg.size()); 18 | res.append(buf, sprintf(buf, ": Old = 0x%16" PRIX64 " , New = 0x%16" PRIX64, Old, New)); 19 | return res; 20 | } 21 | 22 | BadChecksumException:: 23 | BadChecksumException(fstring msg, uint64_t Old, uint64_t New) 24 | : super(ChecksumErrMsg(msg, Old, New)), m_old(Old), m_new(New) {} 25 | 26 | } // terark 27 | 28 | -------------------------------------------------------------------------------- /src/terark/util/stat.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // for best compatibility, this file should be the last include 4 | 5 | #include 6 | #include 7 | 8 | #ifndef S_ISDIR 9 | #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) 10 | #endif 11 | #ifndef S_ISREG 12 | #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) 13 | #endif 14 | 15 | #ifdef _MSC_VER 16 | #ifndef _CRT_NONSTDC_NO_DEPRECATE 17 | #error _CRT_NONSTDC_NO_DEPRECATE must be defined to use posix functions on Visual C++ 18 | #endif 19 | // VC does not forward stat/fstat to stat64/fstat64 20 | // VC stat on large file will fail 21 | #define ll_stat _stat64 22 | #define ll_fstat _fstat64 23 | #define ll_lseek _lseeki64 24 | #else 25 | #define ll_stat stat 26 | #define ll_fstat fstat 27 | #define ll_lseek lseek 28 | #endif 29 | 30 | -------------------------------------------------------------------------------- /src/terark/str_lex_iter.cpp: -------------------------------------------------------------------------------- 1 | #include "str_lex_iter.hpp" 2 | 3 | namespace terark { 4 | 5 | template 6 | StringLexIteratorT::~StringLexIteratorT() {} 7 | 8 | template 9 | void StringLexIteratorT::dispose() { 10 | // default is to direct delete 11 | delete this; 12 | } 13 | 14 | template 15 | bool StringLexIteratorT::seek_begin() { 16 | return seek_lower_bound(fstr()); 17 | } 18 | 19 | template 20 | bool StringLexIteratorT::seek_rev_lower_bound(fstr str) { 21 | if (seek_lower_bound(str)) { 22 | if (word() == str) 23 | return true; 24 | return decr(); 25 | } 26 | return seek_end(); 27 | } 28 | 29 | template class StringLexIteratorT; 30 | template class StringLexIteratorT; 31 | 32 | } // namespace terark 33 | -------------------------------------------------------------------------------- /src/terark/util/nolocks_localtime.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace terark { 6 | 7 | // with params time zone and dst 8 | TERARK_DLL_EXPORT void nolocks_localtime_tzd(struct tm*, time_t, long tz, int dst); 9 | 10 | // with params time zone 11 | TERARK_DLL_EXPORT void nolocks_localtime_tz(struct tm*, time_t, long tz); 12 | 13 | // proto type is same as localtime 14 | TERARK_DLL_EXPORT struct tm* nolocks_localtime(const time_t*); 15 | 16 | // proto type is same as localtime_r 17 | TERARK_DLL_EXPORT struct tm* nolocks_localtime_r(const time_t*, struct tm*); 18 | 19 | TERARK_DLL_EXPORT const char* StrDateTimeEpochSec(time_t); 20 | TERARK_DLL_EXPORT const char* StrDateTimeEpochUS(long long time_us); 21 | 22 | TERARK_DLL_EXPORT const char* StrDateTimeNow(); 23 | 24 | } // namespace terark 25 | -------------------------------------------------------------------------------- /src/terark/util/throw.cpp: -------------------------------------------------------------------------------- 1 | #include "throw.hpp" 2 | #include "autofree.hpp" 3 | #include 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | TERARK_DLL_EXPORT 10 | std::string ExceptionFormatString(const char* format, ...) { 11 | #ifdef _MSC_VER 12 | std::string buf(16*1024, '\0'); 13 | va_list ap; 14 | va_start(ap, format); 15 | int len = _vsnprintf(&buf[0], buf.size(), format, ap); 16 | va_end(ap); 17 | buf.resize(len); 18 | buf.shrink_to_fit(); 19 | //fprintf(stderr, "%s\n", buf.c_str()); 20 | return buf; 21 | #else 22 | terark::AutoFree buf; 23 | va_list ap; 24 | va_start(ap, format); 25 | int len = vasprintf(&buf.p, format, ap); 26 | va_end(ap); 27 | //fprintf(stderr, "%s\n", buf.p); 28 | return std::string(buf.p, len); 29 | #endif 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/ssse3/codec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../../include/libbase64.h" 6 | #include "../../codecs.h" 7 | 8 | #ifdef __SSSE3__ 9 | #include 10 | 11 | #include "../sse2/compare_macros.h" 12 | 13 | #include "dec_reshuffle.c" 14 | #include "enc_reshuffle.c" 15 | #include "enc_translate.c" 16 | 17 | #endif // __SSSE3__ 18 | 19 | BASE64_ENC_FUNCTION(ssse3) 20 | { 21 | #ifdef __SSSE3__ 22 | #include "../generic/enc_head.c" 23 | #include "enc_loop.c" 24 | #include "../generic/enc_tail.c" 25 | #else 26 | BASE64_ENC_STUB 27 | #endif 28 | } 29 | 30 | BASE64_DEC_FUNCTION(ssse3) 31 | { 32 | #ifdef __SSSE3__ 33 | #include "../generic/dec_head.c" 34 | #include "dec_loop.c" 35 | #include "../generic/dec_tail.c" 36 | #else 37 | BASE64_DEC_STUB 38 | #endif 39 | } 40 | -------------------------------------------------------------------------------- /src/terark/util/crc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace terark { 6 | 7 | TERARK_DLL_EXPORT 8 | uint32_t Crc32c_update(uint32_t inCrc32, const void *buf, size_t bufLen); 9 | 10 | TERARK_DLL_EXPORT 11 | uint16_t Crc16c_update(uint16_t inCrc16, const void *buf, size_t bufLen); 12 | 13 | class TERARK_DLL_EXPORT BadCrc32cException : public BadChecksumException { 14 | public: 15 | BadCrc32cException(fstring msg, uint32_t Old, uint32_t New) 16 | : BadChecksumException(msg, Old, New) {} 17 | ~BadCrc32cException(); 18 | }; 19 | 20 | class TERARK_DLL_EXPORT BadCrc16cException : public BadChecksumException { 21 | public: 22 | BadCrc16cException(fstring msg, uint16_t Old, uint16_t New) 23 | : BadChecksumException(msg, Old, New) {} 24 | ~BadCrc16cException(); 25 | }; 26 | 27 | } // terark 28 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/avx/codec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../../include/libbase64.h" 6 | #include "../../codecs.h" 7 | 8 | #ifdef __AVX__ 9 | #include 10 | 11 | #include "../sse2/compare_macros.h" 12 | 13 | #include "../ssse3/dec_reshuffle.c" 14 | #include "../ssse3/enc_translate.c" 15 | #include "../ssse3/enc_reshuffle.c" 16 | 17 | #endif // __AVX__ 18 | 19 | BASE64_ENC_FUNCTION(avx) 20 | { 21 | #ifdef __AVX__ 22 | #include "../generic/enc_head.c" 23 | #include "../ssse3/enc_loop.c" 24 | #include "../generic/enc_tail.c" 25 | #else 26 | BASE64_ENC_STUB 27 | #endif 28 | } 29 | 30 | BASE64_DEC_FUNCTION(avx) 31 | { 32 | #ifdef __AVX__ 33 | #include "../generic/dec_head.c" 34 | #include "../sse42/dec_loop.c" 35 | #include "../generic/dec_tail.c" 36 | #else 37 | BASE64_DEC_STUB 38 | #endif 39 | } 40 | -------------------------------------------------------------------------------- /src/terark/util/tmpfile.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | class TERARK_DLL_EXPORT TempFileDeleteOnClose { 10 | public: 11 | std::string path; 12 | FileStream fp; 13 | NativeDataOutput writer; 14 | 15 | ~TempFileDeleteOnClose(); 16 | void open_temp(); 17 | void open(); 18 | void dopen(int fd); 19 | void close(); 20 | void complete_write(); 21 | }; 22 | 23 | struct TERARK_DLL_EXPORT FilePair { 24 | TempFileDeleteOnClose key; 25 | TempFileDeleteOnClose value; 26 | bool isFullValue = true; 27 | }; 28 | 29 | 30 | class TERARK_DLL_EXPORT AutoDeleteFile { 31 | public: 32 | std::string fpath; 33 | operator fstring() const { return fpath; } 34 | void Delete(); 35 | ~AutoDeleteFile(); 36 | }; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-build-debug 2 | .idea 3 | build 4 | tools/*/dbg 5 | tools/*/rls 6 | 7 | pkg 8 | # Compiled Object files 9 | *.slo 10 | *.lo 11 | *.o 12 | *.obj 13 | 14 | # Precompiled Headers 15 | *.gch 16 | *.pch 17 | 18 | # Compiled Dynamic libraries 19 | *.so 20 | *.dylib 21 | *.dll 22 | 23 | # Fortran module files 24 | *.mod 25 | *.smod 26 | 27 | # Compiled Static libraries 28 | *.lai 29 | *.la 30 | *.a 31 | *.lib 32 | 33 | # Executables 34 | *.exe 35 | *.out 36 | *.app 37 | 38 | .idea/ 39 | .vs/ 40 | *.tlog 41 | *.lastbuildstate 42 | *.idb 43 | *.pdb 44 | *.ilk 45 | */x64/Debug/ 46 | */x64/Release/ 47 | */CMakeFiles/ 48 | 49 | build 50 | vs2015 51 | vs2017 52 | 53 | # vim backup file 54 | *~ 55 | 56 | # got files 57 | *.got 58 | 59 | # vscode file 60 | .vscode 61 | 62 | cmake-build-debug 63 | .DS_Store 64 | 65 | terark-core 66 | terark-rocksdb 67 | output 68 | tests/Testing 69 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/sse42/codec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../../include/libbase64.h" 6 | #include "../../codecs.h" 7 | 8 | #ifdef __SSE4_2__ 9 | #include 10 | 11 | #include "../sse2/compare_macros.h" 12 | 13 | #include "../ssse3/dec_reshuffle.c" 14 | #include "../ssse3/enc_translate.c" 15 | #include "../ssse3/enc_reshuffle.c" 16 | 17 | #endif // __SSE4_2__ 18 | 19 | BASE64_ENC_FUNCTION(sse42) 20 | { 21 | #ifdef __SSE4_2__ 22 | #include "../generic/enc_head.c" 23 | #include "../ssse3/enc_loop.c" 24 | #include "../generic/enc_tail.c" 25 | #else 26 | BASE64_ENC_STUB 27 | #endif 28 | } 29 | 30 | BASE64_DEC_FUNCTION(sse42) 31 | { 32 | #ifdef __SSE4_2__ 33 | #include "../generic/dec_head.c" 34 | #include "dec_loop.c" 35 | #include "../generic/dec_tail.c" 36 | #else 37 | BASE64_DEC_STUB 38 | #endif 39 | } 40 | -------------------------------------------------------------------------------- /tests/core/test_call_on_main_stack.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-10-28. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | int main() { 10 | using namespace boost::fibers; 11 | auto fn = []() { 12 | scheduler* sched = context::active()->get_scheduler(); 13 | auto largeFn = []() { 14 | char buf[256 * 1024] = {0}; 15 | sprintf(buf, "fn: large stack"); 16 | //printf("%s\n", buf); 17 | }; 18 | //size_t loop = 1024*1024; 19 | size_t loop = 1; 20 | for (size_t i = 0; i < loop; ++i) { 21 | sched->call_on_main_stack(largeFn); 22 | } 23 | }; 24 | fiber f1(fn); 25 | boost::this_fiber::yield(); 26 | f1.join(); 27 | printf("call_on_main_stack passed\n"); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /src/terark/util/sorted_uint_vec_get_block_word.hpp: -------------------------------------------------------------------------------- 1 | #if (Width == 1) 2 | for (size_t j = 0; j < RealWordUnits; ++j) { 3 | aVals[i*TERARK_WORD_BITS + j] = val; 4 | if (w & 1) { 5 | val += smallDiff; // faster than Width != 1 6 | } 7 | else { 8 | size_t largeDiff = febitvec::s_get_uint(pLargeBase, largeBitPos, largeUnitWidth); 9 | val += largeDiff; 10 | largeBitPos += largeUnitWidth; 11 | } 12 | w >>= 1; 13 | } 14 | #else 15 | for (size_t j = 0; j < RealWordUnits; ++j) { 16 | aVals[i*WordUnits + j] = val; 17 | size_t diff = w & UnitMask; 18 | if (diff) { 19 | val += minDiffVal + diff; 20 | } 21 | else { 22 | size_t largeDiff = febitvec::s_get_uint(pLargeBase, largeBitPos, largeUnitWidth); 23 | val += largeDiff; 24 | largeBitPos += largeUnitWidth; 25 | } 26 | w >>= Width; 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/sse41/codec.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "../../../include/libbase64.h" 6 | #include "../../codecs.h" 7 | 8 | #ifdef __SSE4_1__ 9 | #include 10 | 11 | #include "../sse2/compare_macros.h" 12 | 13 | #include "../ssse3/dec_reshuffle.c" 14 | #include "../ssse3/enc_translate.c" 15 | #include "../ssse3/enc_reshuffle.c" 16 | 17 | #endif // __SSE4_1__ 18 | 19 | BASE64_ENC_FUNCTION(sse41) 20 | { 21 | #ifdef __SSE4_1__ 22 | #include "../generic/enc_head.c" 23 | #include "../ssse3/enc_loop.c" 24 | #include "../generic/enc_tail.c" 25 | #else 26 | BASE64_ENC_STUB 27 | #endif 28 | } 29 | 30 | BASE64_DEC_FUNCTION(sse41) 31 | { 32 | #ifdef __SSE4_1__ 33 | #include "../generic/dec_head.c" 34 | #include "../ssse3/dec_loop.c" 35 | #include "../generic/dec_tail.c" 36 | #else 37 | BASE64_DEC_STUB 38 | #endif 39 | } 40 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/dll/example/build_package.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | MKDIR bin\dll bin\static bin\example bin\include 3 | COPY tests\fullbench.c bin\example\ 4 | COPY programs\datagen.c bin\example\ 5 | COPY programs\datagen.h bin\example\ 6 | COPY programs\util.h bin\example\ 7 | COPY programs\platform.h bin\example\ 8 | COPY lib\common\mem.h bin\example\ 9 | COPY lib\common\zstd_internal.h bin\example\ 10 | COPY lib\common\error_private.h bin\example\ 11 | COPY lib\common\xxhash.h bin\example\ 12 | COPY lib\libzstd.a bin\static\libzstd_static.lib 13 | COPY lib\dll\libzstd.* bin\dll\ 14 | COPY lib\dll\example\Makefile bin\example\ 15 | COPY lib\dll\example\fullbench-dll.* bin\example\ 16 | COPY lib\dll\example\README.md bin\ 17 | COPY lib\zstd.h bin\include\ 18 | COPY lib\common\zstd_errors.h bin\include\ 19 | COPY lib\dictBuilder\zdict.h bin\include\ 20 | COPY programs\zstd.exe bin\zstd.exe 21 | -------------------------------------------------------------------------------- /src/terark/io/DataOutput_VarIntAsFixLen.hpp: -------------------------------------------------------------------------------- 1 | public: 2 | MyType& operator<<(var_int32_t x) { return this->operator<<(x.t); } 3 | MyType& operator<<(var_uint32_t x) { return this->operator<<(x.t); } 4 | 5 | #if !defined(BOOST_NO_INT64_T) 6 | MyType& operator<<(var_int64_t x) { return this->operator<<(x.t); } 7 | MyType& operator<<(var_uint64_t x) { return this->operator<<(x.t); } 8 | #endif 9 | 10 | MyType& operator<<(serialize_version_t x) { return this->operator<<(x.t); } 11 | 12 | //----------------------------------------------------------------------------------- 13 | MyType& operator<<(var_int30_t x) { return this->operator<<(x.t); } 14 | MyType& operator<<(var_uint30_t x) { return this->operator<<(x.t); } 15 | 16 | #if !defined(BOOST_NO_INT64_T) 17 | MyType& operator<<(var_int61_t x) { return this->operator<<(x.t); } 18 | MyType& operator<<(var_uint61_t x) { return this->operator<<(x.t); } 19 | #endif 20 | 21 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/dec_head.c: -------------------------------------------------------------------------------- 1 | int ret = 0; 2 | const uint8_t *c = (const uint8_t *)src; 3 | uint8_t *o = (uint8_t *)out; 4 | uint8_t q; 5 | 6 | // Use local temporaries to avoid cache thrashing: 7 | size_t outl = 0; 8 | struct base64_state st; 9 | st.eof = state->eof; 10 | st.bytes = state->bytes; 11 | st.carry = state->carry; 12 | 13 | // If we previously saw an EOF or an invalid character, bail out: 14 | if (st.eof) { 15 | *outlen = 0; 16 | ret = 0; 17 | // If there was a trailing '=' to check, check it: 18 | if (srclen && (st.eof == BASE64_AEOF)) { 19 | state->bytes = 0; 20 | state->eof = BASE64_EOF; 21 | ret = ((base64_table_dec[*c++] == 254) && (srclen == 1)) ? 1 : 0; 22 | } 23 | return ret; 24 | } 25 | 26 | // Turn four 6-bit numbers into three bytes: 27 | // out[0] = 11111122 28 | // out[1] = 22223333 29 | // out[2] = 33444444 30 | 31 | // Duff's device again: 32 | switch (st.bytes) 33 | { 34 | for (;;) 35 | { 36 | case 0: 37 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/common/debug.c: -------------------------------------------------------------------------------- 1 | /* ****************************************************************** 2 | * debug 3 | * Part of FSE library 4 | * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. 5 | * 6 | * You can contact the author at : 7 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 | * 9 | * This source code is licensed under both the BSD-style license (found in the 10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 | * in the COPYING file in the root directory of this source tree). 12 | * You may select, at your option, one of the above-listed licenses. 13 | ****************************************************************** */ 14 | 15 | 16 | /* 17 | * This module only hosts one global variable 18 | * which can be used to dynamically influence the verbosity of traces, 19 | * such as DEBUGLOG and RAWLOG 20 | */ 21 | 22 | #include "debug.h" 23 | 24 | int g_debuglevel = DEBUGLEVEL; 25 | -------------------------------------------------------------------------------- /src/terark/io/DataIO_SmartPtr.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #pragma once 3 | 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | //! 10 | #define DATA_IO_SMART_PTR_LOAD_SAVE(SmartPtrTemplate) \ 11 | template \ 12 | void DataIO_loadObject(DataIO& dio, SmartPtrTemplate& x) \ 13 | { \ 14 | x.reset(new T); \ 15 | dio >> *x; \ 16 | } \ 17 | template \ 18 | void DataIO_saveObject(DataIO& dio, const SmartPtrTemplate& x)\ 19 | { \ 20 | dio << *x; \ 21 | } 22 | //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 23 | 24 | DATA_IO_SMART_PTR_LOAD_SAVE(std::auto_ptr) 25 | DATA_IO_SMART_PTR_LOAD_SAVE(boost::intrusive_ptr) 26 | DATA_IO_SMART_PTR_LOAD_SAVE(boost::scoped_ptr) 27 | DATA_IO_SMART_PTR_LOAD_SAVE(boost::shared_ptr) 28 | 29 | 30 | } // namespace terark 31 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/32/enc_loop.c: -------------------------------------------------------------------------------- 1 | // If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, 2 | // but ensure that there are at least 4 bytes available to avoid segfaulting: 3 | while (srclen >= 4) 4 | { 5 | // Load string: 6 | uint32_t str = *(uint32_t *)c; 7 | 8 | // Reorder to 32-bit big-endian, if not already in that format. The 9 | // workset must be in big-endian, otherwise the shifted bits do not 10 | // carry over properly among adjacent bytes: 11 | str = cpu_to_be32(str); 12 | 13 | // Shift input by 6 bytes each round and mask in only the lower 6 bits; 14 | // look up the character in the Base64 encoding table and write it to 15 | // the output location: 16 | *o++ = base64_table_enc[(str >> 26) & 0x3F]; 17 | *o++ = base64_table_enc[(str >> 20) & 0x3F]; 18 | *o++ = base64_table_enc[(str >> 14) & 0x3F]; 19 | *o++ = base64_table_enc[(str >> 8) & 0x3F]; 20 | 21 | c += 3; // 3 bytes of input 22 | outl += 4; // 4 bytes of output 23 | srclen -= 3; 24 | } 25 | -------------------------------------------------------------------------------- /src/terark/io/HexCodingStream.cpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #include "HexCodingStream.hpp" 3 | #include "DataInput.hpp" 4 | #include 5 | 6 | namespace terark { 7 | 8 | // '0' == 0x30 9 | // 'a' == 0x61 10 | // 'A' == 0x41 11 | const unsigned char G_hex_val_hexTab[] = 12 | { 13 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14 | 255, 255, 255, 255, 255, 255, 15 | // below, begin with '0' + 16 = 0x40 16 | 255, 17 | 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 18 | 255, 255, 255, 255, 255, 19 | 255, 255, 255, 255, 255, 20 | // below, begin with 'A' + 16 = 0x51 21 | 255, 255, 255, 255, 22 | 255, 255, 255, 255, 23 | 255, 255, 255, 255, 24 | 255, 255, 255, 255, 25 | // below, begin with 'a' = 0x61 26 | 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 27 | }; 28 | 29 | void invalid_hex_char(unsigned char ch, const char* func) 30 | { 31 | string_appender<> oss; 32 | oss << "invalid hex char(ch=" << char(ch) << ",ascii=" << int(ch) << ") in func: " << func; 33 | throw DataFormatException(oss.str()); 34 | } 35 | 36 | } // namespace terark 37 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/avx2/enc_loop.c: -------------------------------------------------------------------------------- 1 | // If we have AVX2 support, pick off 24 bytes at a time for as long as we can. 2 | // But because we read 32 bytes at a time, ensure we have enough room to do a 3 | // full 32-byte read without segfaulting: 4 | 5 | if (srclen >= 32) { 6 | const uint8_t* const o_orig = o; 7 | 8 | // first load is done at c-0 not to get a segfault 9 | __m256i inputvector = _mm256_loadu_si256((__m256i *)(c - 0)); 10 | 11 | // shift by 4 bytes, as required by enc_reshuffle 12 | inputvector = _mm256_permutevar8x32_epi32(inputvector, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); 13 | 14 | for (;;) { 15 | inputvector = enc_reshuffle(inputvector); 16 | inputvector = enc_translate(inputvector); 17 | _mm256_storeu_si256((__m256i *)o, inputvector); 18 | c += 24; 19 | o += 32; 20 | srclen -= 24; 21 | if(srclen < 28) { 22 | break; 23 | } 24 | // Load at c-4, as required by enc_reshuffle 25 | inputvector = _mm256_loadu_si256((__m256i *)(c - 4)); 26 | } 27 | outl += (size_t)(o - o_orig); 28 | } 29 | -------------------------------------------------------------------------------- /src/terark/fsa/ppi/flat_dfa_mmap.hpp: -------------------------------------------------------------------------------- 1 | 2 | void finish_load_mmap(const DFA_MmapHeader* base) override { 3 | assert(sizeof(State) == base->state_size); 4 | byte_t* bbase = (byte_t*)base; 5 | if (base->total_states >= size_t(-1)) { 6 | THROW_STD(out_of_range, "total_states=%lld", (long long)base->total_states); 7 | } 8 | states.clear(); 9 | states.risk_set_data((State*)(bbase + base->blocks[0].offset)); 10 | states.risk_set_size(size_t(base->total_states)); 11 | states.risk_set_capacity(size_t(base->total_states)); 12 | m_gnode_states = size_t(base->gnode_states); 13 | m_zpath_states = size_t(base->zpath_states); 14 | this->set_trans_num(size_t(base->transition_num)); 15 | } 16 | 17 | long prepare_save_mmap(DFA_MmapHeader* base, const void** dataPtrs) 18 | const override { 19 | base->state_size = sizeof(State); 20 | base->transition_num = total_transitions(); 21 | base->num_blocks = 1; 22 | base->blocks[0].offset = sizeof(DFA_MmapHeader); 23 | base->blocks[0].length = sizeof(State)*states.size(); 24 | dataPtrs[0] = states.data(); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /src/terark/pass_by_value.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #pragma once 3 | 4 | //#if defined(_MSC_VER) && (_MSC_VER >= 1020) 5 | //# pragma once 6 | //#endif 7 | 8 | namespace terark { 9 | 10 | //! 当 T 是一个人造的引用时,使用这个类来转发调用 11 | //! 12 | //! input >> t 实际调用的是 void DataIO_loadObject(Input& input, T t) 13 | //! 这里 pass_by_value 和 T 都是传值调用的 14 | //! 15 | //! T 中包含一个真实的引用,例如当 T 是 load_as_var_int_proxy 时 16 | //! 这样,就不需要将每个类似 load_as_var_int_proxy 的 Class 都写到 DataInput 接口中 17 | //! 从而 DataInput 接口只需要一个 pass_by_value 18 | //! 19 | //! 如此,实际上是使用了两个中间层一个是 load_as_var_int_proxy,用来做真实的 proxy 20 | //! 另一个就是 pass_by_value 了,只用来适配 DataInput 接口, 21 | //! 因为作为 T& 不能绑定到临时变量 22 | //! ---- Add this line for Microsoft C++ 2013 brain dead compiler error ---- 23 | template class pass_by_value 24 | { 25 | public: 26 | T val; 27 | 28 | typedef T type; 29 | 30 | pass_by_value(const T& val) : val(val) {} 31 | 32 | T& operator=(const T& y) { val = y; return val; } 33 | 34 | operator T&() { return val; } 35 | 36 | T& get() { return val; } 37 | }; 38 | 39 | } 40 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/sse41/enc_reshuffle.c: -------------------------------------------------------------------------------- 1 | static inline __m128i 2 | enc_reshuffle (__m128i in) 3 | { 4 | // Slice into 32-bit chunks and operate on all chunks in parallel. 5 | // All processing is done within the 32-bit chunk. First, shuffle: 6 | // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] 7 | // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] 8 | in = _mm_shuffle_epi8(in, _mm_set_epi8( 9 | -1, 9, 10, 11, 10 | -1, 6, 7, 8, 11 | -1, 3, 4, 5, 12 | -1, 0, 1, 2)); 13 | 14 | // merged = [0000aaaa|aabbbbbb|bbbbcccc|ccdddddd] 15 | const __m128i merged = _mm_blend_epi16(_mm_slli_epi32(in, 4), in, 0x55); 16 | 17 | // bd = [00000000|00bbbbbb|00000000|00dddddd] 18 | const __m128i bd = _mm_and_si128(merged, _mm_set1_epi32(0x003F003F)); 19 | 20 | // ac = [00aaaaaa|00000000|00cccccc|00000000] 21 | const __m128i ac = _mm_and_si128(_mm_slli_epi32(merged, 2), _mm_set1_epi32(0x3F003F00)); 22 | 23 | // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] 24 | const __m128i indices = _mm_or_si128(ac, bd); 25 | 26 | // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] 27 | return _mm_bswap_epi32(indices); 28 | } 29 | -------------------------------------------------------------------------------- /src/terark/io/todo/DataIO_Parser.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #ifndef __terark_io_DataIO_Parser_h__ 3 | #define __terark_io_DataIO_Parser_h__ 4 | 5 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 6 | # pragma once 7 | #endif 8 | 9 | #include "StreamBuffer.hpp" 10 | #include "DataInput.hpp" 11 | #include 12 | 13 | namespace terark { 14 | 15 | template 16 | class DataIO_TextReader : public DataInput > 17 | { 18 | public: 19 | 20 | 21 | protected: 22 | PrimInput* input; 23 | DataIO_Parser* parser; 24 | }; 25 | 26 | template 27 | class DataIO_XML_Reader 28 | { 29 | InputBuffer* buf; 30 | public: 31 | Final_Input& operator>>(int& x) 32 | { 33 | int ch = buf->readByte(); 34 | switch (ch) 35 | { 36 | case 0: // oct 37 | break; 38 | 39 | } 40 | do { 41 | ch = 42 | } while (isdigit(ch)); 43 | } 44 | }; 45 | 46 | class DataIO_Parser 47 | { 48 | public: 49 | 50 | protected: 51 | const char* szMemberText; 52 | std::vector memberNames; 53 | }; 54 | 55 | } 56 | 57 | #endif // __terark_io_DataIO_Parser_h__ 58 | 59 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/deprecated/zbuff_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | /*-************************************* 12 | * Dependencies 13 | ***************************************/ 14 | #include "../common/error_private.h" 15 | #include "zbuff.h" 16 | 17 | /*-**************************************** 18 | * ZBUFF Error Management (deprecated) 19 | ******************************************/ 20 | 21 | /*! ZBUFF_isError() : 22 | * tells if a return value is an error code */ 23 | unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } 24 | /*! ZBUFF_getErrorName() : 25 | * provides error code string from function result (useful for debugging) */ 26 | const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } 27 | -------------------------------------------------------------------------------- /gtests/index/adfa_test.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include "terark/util/function.hpp" 7 | #include "terark/util/linebuf.hpp" 8 | #include "terark/util/profiling.hpp" 9 | #include "terark/hash_strmap.hpp" 10 | #include "terark/fsa/cspptrie.inl" 11 | #include "terark/fsa/nest_trie_dawg.hpp" 12 | 13 | 14 | namespace terark { 15 | 16 | template 17 | void buil_dfa(Inserter inserter) { 18 | DFA dfa; 19 | } 20 | 21 | TEST(ADFA_TEST, EMPTY_DFA_TEST_1) { 22 | // MainPatricia trie(sizeof(uint32_t), 1<<20, Patricia::SingleThreadShared); 23 | MainPatricia dfa; 24 | std::unique_ptr iterU(dfa.adfa_make_iter(initial_state)); 25 | auto iter = iterU.get(); 26 | ASSERT_TRUE(!iter->seek_begin()); 27 | ASSERT_TRUE(!iter->seek_end()); 28 | ASSERT_TRUE(!iter->seek_lower_bound("1")); 29 | ASSERT_TRUE(!iter->seek_lower_bound("2")); 30 | ASSERT_TRUE(!iter->seek_lower_bound("9")); 31 | ASSERT_TRUE(!iter->seek_lower_bound("\xFF")); 32 | } 33 | 34 | TEST(ADFA_TEST, ITERATOR_TEST) { 35 | 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/terark/util/truncate_file.cpp: -------------------------------------------------------------------------------- 1 | #include "truncate_file.hpp" 2 | #include 3 | #include 4 | 5 | #if defined(_MSC_VER) 6 | #include 7 | #else 8 | #include 9 | #endif 10 | #include 11 | #include 12 | #include 13 | 14 | namespace terark { 15 | 16 | TERARK_DLL_EXPORT 17 | void truncate_file(const char* fpath, unsigned long long size) { 18 | #ifdef _MSC_VER 19 | Auto_close_fd fd(::_open(fpath, O_CREAT|O_BINARY|O_RDWR, 0644)); 20 | #else 21 | Auto_close_fd fd(::open(fpath, O_CREAT|O_RDWR, 0644)); 22 | #endif 23 | if (fd < 0) { 24 | THROW_STD(logic_error 25 | , "FATAL: ::open(%s, O_CREAT|O_BINARY|O_RDWR) = %s" 26 | , fpath, strerror(errno)); 27 | } 28 | #ifdef _MSC_VER 29 | int err = ::_chsize_s(fd, size); 30 | if (err) { 31 | THROW_STD(logic_error, "FATAL: ::_chsize_s(%s, %lld) = %s" 32 | , fpath, size, strerror(errno)); 33 | } 34 | #else 35 | int err = ::ftruncate(fd, size); 36 | if (err) { 37 | THROW_STD(logic_error, "FATAL: ::truncate(%s, %lld) = %s" 38 | , fpath, size, strerror(errno)); 39 | } 40 | #endif 41 | } 42 | 43 | } // namespace terark 44 | -------------------------------------------------------------------------------- /src/terark/zbs/ZstdStream.hpp: -------------------------------------------------------------------------------- 1 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 2 | #pragma once 3 | #endif 4 | 5 | #include 6 | #include 7 | 8 | namespace terark { 9 | 10 | class TERARK_DLL_EXPORT ZstdInputStream : public IInputStream, public RefCounter { 11 | DECLARE_NONE_COPYABLE_CLASS(ZstdInputStream) 12 | 13 | public: 14 | explicit ZstdInputStream(IInputStream*); 15 | ~ZstdInputStream(); 16 | 17 | void resetIstream(IInputStream*); 18 | size_t read(void* buf, size_t size) throw(); 19 | bool eof() const; 20 | 21 | private: 22 | class Impl; 23 | Impl* m_impl; 24 | }; 25 | 26 | class TERARK_DLL_EXPORT ZstdOutputStream : public IOutputStream, public RefCounter { 27 | DECLARE_NONE_COPYABLE_CLASS(ZstdOutputStream) 28 | 29 | public: 30 | explicit ZstdOutputStream(IOutputStream*); 31 | ~ZstdOutputStream(); 32 | 33 | void setCLevel(size_t l); 34 | void resetOstream(IOutputStream*); 35 | size_t write(const void* buf, size_t size) throw(); 36 | void flush(); 37 | void close(); 38 | 39 | private: 40 | class Impl; 41 | Impl* m_impl; 42 | }; 43 | 44 | } // namespace terark -------------------------------------------------------------------------------- /3rdparty/base64/test/moby_dick_plain.txt: -------------------------------------------------------------------------------- 1 | Call me Ishmael. Some years ago--never mind how long precisely--having 2 | little or no money in my purse, and nothing particular to interest me on 3 | shore, I thought I would sail about a little and see the watery part of 4 | the world. It is a way I have of driving off the spleen and regulating 5 | the circulation. Whenever I find myself growing grim about the mouth; 6 | whenever it is a damp, drizzly November in my soul; whenever I find 7 | myself involuntarily pausing before coffin warehouses, and bringing up 8 | the rear of every funeral I meet; and especially whenever my hypos get 9 | such an upper hand of me, that it requires a strong moral principle to 10 | prevent me from deliberately stepping into the street, and methodically 11 | knocking people's hats off--then, I account it high time to get to sea 12 | as soon as I can. This is my substitute for pistol and ball. With a 13 | philosophical flourish Cato throws himself upon his sword; I quietly 14 | take to the ship. There is nothing surprising in this. If they but knew 15 | it, almost all men in their degree, some time or other, cherish very 16 | nearly the same feelings towards the ocean with me. 17 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/64/enc_loop.c: -------------------------------------------------------------------------------- 1 | // If we have 64-bit ints, pick off 6 bytes at a time for as long as we can, 2 | // but ensure that there are at least 8 bytes available to avoid segfaulting: 3 | while (srclen >= 8) 4 | { 5 | // Load string: 6 | uint64_t str = *(uint64_t *)c; 7 | 8 | // Reorder to 64-bit big-endian, if not already in that format. The 9 | // workset must be in big-endian, otherwise the shifted bits do not 10 | // carry over properly among adjacent bytes: 11 | str = cpu_to_be64(str); 12 | 13 | // Shift input by 6 bytes each round and mask in only the lower 6 bits; 14 | // look up the character in the Base64 encoding table and write it to 15 | // the output location: 16 | *o++ = base64_table_enc[(str >> 58) & 0x3F]; 17 | *o++ = base64_table_enc[(str >> 52) & 0x3F]; 18 | *o++ = base64_table_enc[(str >> 46) & 0x3F]; 19 | *o++ = base64_table_enc[(str >> 40) & 0x3F]; 20 | *o++ = base64_table_enc[(str >> 34) & 0x3F]; 21 | *o++ = base64_table_enc[(str >> 28) & 0x3F]; 22 | *o++ = base64_table_enc[(str >> 22) & 0x3F]; 23 | *o++ = base64_table_enc[(str >> 16) & 0x3F]; 24 | 25 | c += 6; // 6 bytes of input 26 | outl += 8; // 8 bytes of output 27 | srclen -= 6; 28 | } 29 | -------------------------------------------------------------------------------- /tests/core/test_boost_fss.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-08-28. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | void clean_fss_int(int* p) { 10 | printf("destroy int = %d\n", *p); 11 | } 12 | static boost::fibers::fiber_specific_ptr fs0(clean_fss_int); 13 | static thread_local boost::fibers::fiber_specific_ptr fs1(clean_fss_int); 14 | 15 | int main(int argc, char* argv[]) { 16 | boost::fibers::fiber_specific_ptr fs2(clean_fss_int); 17 | auto func = [&](const char* name) { 18 | printf("---- %s ----\n", name); 19 | boost::fibers::fiber_specific_ptr fs3(clean_fss_int); 20 | if (fs0.get() == NULL) { 21 | fs0.reset(new int(0)); 22 | } 23 | if (fs1.get() == NULL) { 24 | fs1.reset(new int(1)); 25 | } 26 | if (fs2.get() == NULL) { 27 | fs2.reset(new int(2)); 28 | } 29 | if (fs3.get() == NULL) { 30 | fs3.reset(new int(3)); 31 | } 32 | }; 33 | std::thread thr(func, "thread"); 34 | thr.join(); 35 | boost::fibers::fiber fb(func, "fiber"); 36 | fb.join(); 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /gtests/tools/core/test_boost_fss.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-08-28. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | void clean_fss_int(int* p) { 10 | printf("destroy int = %d\n", *p); 11 | } 12 | static boost::fibers::fiber_specific_ptr fs0(clean_fss_int); 13 | static thread_local boost::fibers::fiber_specific_ptr fs1(clean_fss_int); 14 | 15 | int main(int argc, char* argv[]) { 16 | boost::fibers::fiber_specific_ptr fs2(clean_fss_int); 17 | auto func = [&](const char* name) { 18 | printf("---- %s ----\n", name); 19 | boost::fibers::fiber_specific_ptr fs3(clean_fss_int); 20 | if (fs0.get() == NULL) { 21 | fs0.reset(new int(0)); 22 | } 23 | if (fs1.get() == NULL) { 24 | fs1.reset(new int(1)); 25 | } 26 | if (fs2.get() == NULL) { 27 | fs2.reset(new int(2)); 28 | } 29 | if (fs3.get() == NULL) { 30 | fs3.reset(new int(3)); 31 | } 32 | }; 33 | std::thread thr(func, "thread"); 34 | thr.join(); 35 | boost::fibers::fiber fb(func, "fiber"); 36 | fb.join(); 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /tools/general/text_key_val_to_kvbin.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #ifdef _MSC_VER 8 | #include 9 | #include 10 | #endif 11 | 12 | int main(int argc, char* argv[]) { 13 | BOOST_STATIC_ASSERT(sizeof(int) == 4); 14 | int kvlen[2]; // int32 15 | int lineno = 0; 16 | #ifdef _MSC_VER 17 | if (_setmode(_fileno(stdout), _O_BINARY) < 0) { 18 | THROW_STD(invalid_argument, "set stdout as binary mode failed"); 19 | } 20 | #endif 21 | terark::LineBuf line; 22 | while (line.getline(stdin) > 0) { 23 | lineno++; 24 | line.chomp(); 25 | if (line.empty()) { 26 | fprintf(stderr, "line:%d is empty\n", lineno); 27 | continue; 28 | } 29 | const char* beg = line.begin(); 30 | const char* end = line.end(); 31 | const char* tab = std::find(beg, end, '\t'); 32 | if (tab == end) { 33 | kvlen[1] = 0; 34 | } else { 35 | kvlen[1] = end - tab - 1; 36 | } 37 | kvlen[0] = tab - beg; 38 | fwrite(kvlen, 1, sizeof(kvlen), stdout); 39 | fwrite(beg+0, 1, kvlen[0], stdout); 40 | fwrite(tab+1, 1, kvlen[1], stdout); 41 | } 42 | return 0; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /gtests/tools/core/test_ProcPipeStream.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) { 5 | using namespace terark; 6 | { 7 | LineBuf line; 8 | ProcPipeStream pp("echo aaaa", "r"); 9 | printf("reading result\n"); 10 | //line.getline(pp); 11 | 12 | line.read_all(pp); 13 | line.chomp(); 14 | assert(line.size() == 4); 15 | printf("read result = len=%zd : %s\n", line.n, line.p); 16 | assert(fstring(line) == "aaaa"); 17 | printf("1 passed\n"); 18 | } 19 | 20 | { 21 | printf("2 begin...\n"); 22 | ProcPipeStream pp("cat > proc.test.tmp", "w"); 23 | fprintf(pp, "%s\n", "bbbb"); 24 | pp.close(); 25 | 26 | LineBuf line; 27 | line.read_all("proc.test.tmp"); 28 | line.chomp(); 29 | assert(fstring(line) == "bbbb"); 30 | printf("2 passed\n"); 31 | } 32 | 33 | try { 34 | ProcPipeStream pp("test-non-existed-file", "r"); 35 | pp.close(); 36 | assert(pp.err_code() != 0); // will not goes here 37 | } 38 | catch (const std::exception&) { 39 | } 40 | printf("3 passed\n"); 41 | 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /tools/zbs/zip-bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | cd /home/leipeng/terark-mysql/storage/rocksdb/terarkdb/terark-core/tools/zbs 7 | export LD_LIBRARY_PATH=../../build/Linux-x86_64-g++-4.9-bmi2-1/lib_shared 8 | 9 | for ifile in /data01/hdfs_data/raw.json.*; do 10 | for sampleRatio in 030 040 045; do 11 | ofile=/data00/leipeng/bmq-${ifile#/data01/hdfs_data/raw.json.} 12 | time env DictZipBlobStore_zipThreads=0 rls/zbs_build.exe -ZEBp -j128 -S 0.$sampleRatio -o ${ofile}.zbs.${sampleRatio} $ifile 13 | rls/zbs_unzip.exe -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio} 14 | rls/zbs_unzip.exe -r -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio} 15 | time env DictZipBlobStore_zipThreads=0 rls/zbs_build.exe -ZEBp -j128 -S 0.$sampleRatio -o ${ofile}.zbs.${sampleRatio}.huf -e h $ifile 16 | rls/zbs_unzip.exe -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio}.huf 17 | rls/zbs_unzip.exe -r -t -b 1 -T 10 ${ofile}.zbs.${sampleRatio}.huf 18 | done 19 | time rls/zbs_build.exe -j128 -z 6 -T o -o ${ofile}.zstd.rec -B $ifile 20 | rls/zbs_unzip.exe -t -b 1 -T 10 ${ofile}.zstd.rec 21 | rls/zbs_unzip.exe -r -t -b 1 -T 10 ${ofile}.zstd.rec 22 | time zstd -f -o ${ofile}.zstd.all $ifile 23 | time zstd -d < ${ofile}.zstd.all > /dev/null 24 | done 25 | 26 | -------------------------------------------------------------------------------- /src/terark/util/memcmp_coding.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace terark { 6 | 7 | TERARK_DLL_EXPORT 8 | char* encode_0_01_00(const char* ibeg, const char* iend, char* obeg, char* oend); 9 | 10 | TERARK_DLL_EXPORT 11 | char* decode_01_00(const char* ibeg, const char**ires, char* obeg, char* oend); 12 | 13 | TERARK_DLL_EXPORT 14 | const char* end_of_01_00(const char* encoded); 15 | 16 | TERARK_DLL_EXPORT 17 | const char* end_of_01_00(const char* beg, const char* end); 18 | 19 | // float encoding/decoding intentinally use unsigned char* 20 | TERARK_DLL_EXPORT 21 | unsigned char* encode_memcmp_float(float src, unsigned char* dst); 22 | 23 | TERARK_DLL_EXPORT 24 | unsigned char* encode_memcmp_double(double src, unsigned char* dst); 25 | 26 | TERARK_DLL_EXPORT const unsigned char* 27 | decode_memcmp_float(const unsigned char* src, float* dst); 28 | 29 | TERARK_DLL_EXPORT const unsigned char* 30 | decode_memcmp_double(const unsigned char* src, double* dst); 31 | 32 | template 33 | TERARK_DLL_EXPORT 34 | unsigned char* encode_memcmp_real(Real src, unsigned char* dst); 35 | 36 | template 37 | TERARK_DLL_EXPORT 38 | const unsigned char* 39 | decode_memcmp_real(const unsigned char* src, Real* dst); 40 | 41 | } // namespace terark 42 | -------------------------------------------------------------------------------- /src/terark/io/todo/inter_thread_pipe.cpp: -------------------------------------------------------------------------------- 1 | #include "inter_thread_pipe.cpp" 2 | 3 | namespace terark { 4 | 5 | class inter_thread_pipe_impl 6 | { 7 | boost::mutex m_mutex; 8 | boost::condition m_cond; 9 | unsigned char *m_bufp, *m_putp, *m_getp; 10 | size_t m_size; 11 | long m_timeout; 12 | 13 | public: 14 | bool eof() 15 | { 16 | boost::mutex::scoped_lock lock(m_mutex); 17 | return (m_size+(m_get-m_putp)) % m_size == 1; 18 | } 19 | 20 | size_t read(void* vbuf, size_t length) 21 | { 22 | boost::mutex::scoped_lock lock(m_mutex); 23 | } 24 | 25 | size_t write(void* vbuf, size_t length) 26 | { 27 | 28 | } 29 | 30 | void flush() 31 | { 32 | } 33 | }; 34 | 35 | inter_thread_pipe::inter_thread_pipe(size_t capacity) 36 | : mio(new capacity) 37 | { 38 | } 39 | 40 | inter_thread_pipe::~inter_thread_pipe() 41 | { 42 | delete capacity; 43 | } 44 | 45 | bool inter_thread_pipe::eof() 46 | { 47 | 48 | return mio->eof(); 49 | } 50 | 51 | size_t inter_thread_pipe::read(void* vbuf, size_t length) 52 | { 53 | return mio->read(vbuf, length); 54 | } 55 | 56 | size_t inter_thread_pipe::write(void* vbuf, size_t length) 57 | { 58 | 59 | } 60 | 61 | void inter_thread_pipe::flush() 62 | { 63 | } 64 | 65 | } // namespace thread 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/neon64/enc_loop.c: -------------------------------------------------------------------------------- 1 | // If we have ARM NEON support, pick off 48 bytes at a time: 2 | while (srclen >= 48) 3 | { 4 | uint8x16x3_t str; 5 | uint8x16x4_t res; 6 | 7 | // Load 48 bytes and deinterleave: 8 | str = vld3q_u8((uint8_t *)c); 9 | 10 | // Divide bits of three input bytes over four output bytes: 11 | res.val[0] = vshrq_n_u8(str.val[0], 2); 12 | res.val[1] = vshrq_n_u8(str.val[1], 4) | vshlq_n_u8(str.val[0], 4); 13 | res.val[2] = vshrq_n_u8(str.val[2], 6) | vshlq_n_u8(str.val[1], 2); 14 | res.val[3] = str.val[2]; 15 | 16 | // Clear top two bits: 17 | res.val[0] &= vdupq_n_u8(0x3F); 18 | res.val[1] &= vdupq_n_u8(0x3F); 19 | res.val[2] &= vdupq_n_u8(0x3F); 20 | res.val[3] &= vdupq_n_u8(0x3F); 21 | 22 | // The bits have now been shifted to the right locations; 23 | // translate their values 0..63 to the Base64 alphabet. 24 | // Use a 64-byte table lookup: 25 | res.val[0] = vqtbl4q_u8(tbl_enc, res.val[0]); 26 | res.val[1] = vqtbl4q_u8(tbl_enc, res.val[1]); 27 | res.val[2] = vqtbl4q_u8(tbl_enc, res.val[2]); 28 | res.val[3] = vqtbl4q_u8(tbl_enc, res.val[3]); 29 | 30 | // Interleave and store result: 31 | vst4q_u8((uint8_t *)o, res); 32 | 33 | c += 48; // 3 * 16 bytes of input 34 | o += 64; // 4 * 16 bytes of output 35 | outl += 64; 36 | srclen -= 48; 37 | } 38 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/ssse3/enc_translate.c: -------------------------------------------------------------------------------- 1 | static inline __m128i 2 | enc_translate (const __m128i in) 3 | { 4 | // LUT contains Absolute offset for all ranges: 5 | const __m128i lut = _mm_setr_epi8( 6 | 65, 71, -4, -4, 7 | -4, -4, -4, -4, 8 | -4, -4, -4, -4, 9 | -19, -16, 0, 0 10 | ); 11 | 12 | // Translate values 0..63 to the Base64 alphabet. There are five sets: 13 | // # From To Abs Index Characters 14 | // 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ 15 | // 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz 16 | // 2 [52..61] [48..57] -4 [2..11] 0123456789 17 | // 3 [62] [43] -19 12 + 18 | // 4 [63] [47] -16 13 / 19 | 20 | // Create LUT indices from input: 21 | // the index for range #0 is right, others are 1 less than expected: 22 | __m128i indices = _mm_subs_epu8(in, _mm_set1_epi8(51)); 23 | 24 | // mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: 25 | __m128i mask = CMPGT(in, 25); 26 | 27 | // substract -1, so add 1 to indices for range #[1..4], All indices are now correct: 28 | indices = _mm_sub_epi8(indices, mask); 29 | 30 | // Add offsets to input values: 31 | __m128i out = _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices)); 32 | 33 | return out; 34 | } 35 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/compress/zstd_compress_superblock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_ADVANCED_H 12 | #define ZSTD_COMPRESS_ADVANCED_H 13 | 14 | /*-************************************* 15 | * Dependencies 16 | ***************************************/ 17 | 18 | #include "../zstd.h" /* ZSTD_CCtx */ 19 | 20 | /*-************************************* 21 | * Target Compressed Block Size 22 | ***************************************/ 23 | 24 | /* ZSTD_compressSuperBlock() : 25 | * Used to compress a super block when targetCBlockSize is being used. 26 | * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ 27 | size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, 28 | void* dst, size_t dstCapacity, 29 | void const* src, size_t srcSize, 30 | unsigned lastBlock); 31 | 32 | #endif /* ZSTD_COMPRESS_ADVANCED_H */ 33 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/32/dec_loop.c: -------------------------------------------------------------------------------- 1 | // Read source 4 bytes at a time 2 | // Since we might be writing one byte more than needed, 3 | // we need to make sure there will still be some room 4 | // for one extra byte in o. 5 | // This will be the case if srclen > 0 when the loop 6 | // is exited 7 | while (srclen > 4) 8 | { 9 | union { 10 | uint32_t asint; 11 | uint8_t aschar[4]; 12 | } x; 13 | 14 | x.asint = base64_table_dec_d0[c[0]] 15 | | base64_table_dec_d1[c[1]] 16 | | base64_table_dec_d2[c[2]] 17 | | base64_table_dec_d3[c[3]]; 18 | 19 | #if BASE64_LITTLE_ENDIAN 20 | // LUTs for little-endian set Most Significant Bit 21 | // in case of invalid character 22 | if (x.asint & 0x80000000U) break; 23 | #else 24 | // LUTs for big-endian set Least Significant Bit 25 | // in case of invalid character 26 | if (x.asint & 1U) break; 27 | #endif 28 | 29 | #if HAVE_FAST_UNALIGNED_ACCESS 30 | // This might segfault or be too slow on 31 | // some architectures, do this only if specified 32 | // with HAVE_FAST_UNALIGNED_ACCESS macro 33 | // We write one byte more than needed 34 | *(uint32_t*)o = x.asint; 35 | #else 36 | // Fallback, write bytes one by one 37 | o[0] = x.aschar[0]; 38 | o[1] = x.aschar[1]; 39 | o[2] = x.aschar[2]; 40 | #endif 41 | 42 | c += 4; 43 | o += 3; 44 | outl += 3; 45 | srclen -= 4; 46 | } 47 | -------------------------------------------------------------------------------- /src/terark/util/DataBuffer.cpp: -------------------------------------------------------------------------------- 1 | #include "DataBuffer.hpp" 2 | 3 | namespace terark { 4 | 5 | inline 6 | DataBuffer::DataBuffer(size_t size) 7 | : m_refcount(0), m_size(size) 8 | {} 9 | 10 | DataBuffer* DataBuffer::create(size_t size) 11 | { 12 | DataBuffer* p = (DataBuffer*)new char[sizeof(DataBuffer) + size]; 13 | new (p) DataBuffer(size); // placement new... 14 | return p; 15 | } 16 | void DataBuffer::destroy(DataBuffer* p) 17 | { 18 | char* pb = (char*)p; 19 | delete [] pb; 20 | } 21 | 22 | DataBufferPtr::DataBufferPtr(size_t size) 23 | : MyBase(DataBuffer::create(size)) 24 | {} 25 | 26 | // SmartBuffer 27 | 28 | SmartBuffer::SmartBuffer(size_t size) 29 | { 30 | m_data = size ? new byte[size] : 0; 31 | m_size = size; 32 | m_refcountp = new std::atomic(1); 33 | } 34 | 35 | SmartBuffer::~SmartBuffer() 36 | { 37 | if (m_refcountp && 0 == --*m_refcountp) 38 | { 39 | delete m_refcountp; 40 | delete [] m_data; 41 | } 42 | } 43 | 44 | SmartBuffer::SmartBuffer(const SmartBuffer& rhs) 45 | : m_data(rhs.m_data) 46 | , m_size(rhs.m_size) 47 | , m_refcountp(rhs.m_refcountp) 48 | { 49 | if (m_refcountp) 50 | ++*m_refcountp; 51 | } 52 | 53 | const SmartBuffer& SmartBuffer::operator=(const SmartBuffer& rhs) 54 | { 55 | SmartBuffer(rhs).swap(*this); 56 | return *this; 57 | } 58 | 59 | 60 | 61 | 62 | 63 | } // namespace terark 64 | 65 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/compress/zstd_compress_literals.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_LITERALS_H 12 | #define ZSTD_COMPRESS_LITERALS_H 13 | 14 | #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ 15 | 16 | 17 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 18 | 19 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 20 | 21 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, 22 | ZSTD_hufCTables_t* nextHuf, 23 | ZSTD_strategy strategy, int disableLiteralCompression, 24 | void* dst, size_t dstCapacity, 25 | const void* src, size_t srcSize, 26 | void* entropyWorkspace, size_t entropyWorkspaceSize, 27 | const int bmi2); 28 | 29 | #endif /* ZSTD_COMPRESS_LITERALS_H */ 30 | -------------------------------------------------------------------------------- /src/terark/io/FileDataIO.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "DataIO.hpp" 4 | #include "FileStream.hpp" 5 | #include "StreamBuffer.hpp" 6 | 7 | namespace terark { 8 | template 9 | class FileDataInput : public DataIO { 10 | public: 11 | FileStream file; 12 | FileDataInput(const char* fname) : file(fname, "rb") { 13 | this->attach(&file); 14 | } 15 | }; 16 | template 17 | class FileDataOutput : public DataIO { 18 | public: 19 | FileStream file; 20 | FileDataOutput(const char* fname) : file(fname, "wb") { 21 | this->attach(&file); 22 | } 23 | ~FileDataOutput() { 24 | this->flush(); 25 | this->attach(NULL); 26 | } 27 | }; 28 | 29 | typedef FileDataInput > NativeFileDataInput; 30 | typedef FileDataOutput > NativeFileDataOutput; 31 | typedef FileDataInput > PortableFileDataInput; 32 | typedef FileDataOutput > PortableFileDataOutput; 33 | 34 | typedef FileDataInput > BigEndianFileDataInput; 35 | typedef FileDataOutput > BigEndianFileDataOutput; 36 | typedef FileDataInput > LittleEndianFileDataInput; 37 | typedef FileDataOutput > LittleEndianFileDataOutput; 38 | } 39 | -------------------------------------------------------------------------------- /src/terark/io/discard/hole_stream.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #ifndef __terark_io_hole_stream_h__ 3 | #define __terark_io_hole_stream_h__ 4 | 5 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 6 | # pragma once 7 | #endif 8 | 9 | //#include 10 | //#include // for memcpy 11 | //#include 12 | //#include 13 | 14 | #include 15 | //#include "IOException.hpp" 16 | 17 | namespace terark { 18 | 19 | class HoleStream 20 | { 21 | public: 22 | explicit HoleStream() : m_pos(0) {} 23 | 24 | // size_t read(void* vbuf, size_t length) { m_pos += length; return length; } 25 | size_t write(const void* vbuf, size_t length) { m_pos += length; return length; } 26 | 27 | // void ensureRead(void* vbuf, size_t length) { m_pos += length; } 28 | void ensureWrite(const void* vbuf, size_t length) { m_pos += length; } 29 | 30 | // byte readByte() { return 0; } 31 | 32 | void writeByte(unsigned char) { m_pos++; } 33 | 34 | private: 35 | stream_position_t m_pos; 36 | }; 37 | 38 | class SeekableHoleStream 39 | { 40 | public: 41 | explicit SeekableHoleStream(stream_position_t size) 42 | { 43 | m_pos = 0; 44 | m_size = size; 45 | } 46 | 47 | private: 48 | stream_position_t m_pos; 49 | stream_position_t m_size; 50 | }; 51 | 52 | } // namespace terark 53 | 54 | #endif 55 | 56 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/compress/zstd_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_FAST_H 12 | #define ZSTD_FAST_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "../common/mem.h" /* U32 */ 19 | #include "zstd_compress_internal.h" 20 | 21 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 23 | size_t ZSTD_compressBlock_fast( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_fast_dictMatchState( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_fast_extDict( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | #if defined (__cplusplus) 34 | } 35 | #endif 36 | 37 | #endif /* ZSTD_FAST_H */ 38 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/dll/example/fullbench-dll.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 12.00 2 | # Visual Studio Express 2012 for Windows Desktop 3 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "fullbench-dll", "fullbench-dll.vcxproj", "{13992FD2-077E-4954-B065-A428198201A9}" 4 | EndProject 5 | Global 6 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 7 | Debug|Win32 = Debug|Win32 8 | Debug|x64 = Debug|x64 9 | Release|Win32 = Release|Win32 10 | Release|x64 = Release|x64 11 | EndGlobalSection 12 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 13 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.ActiveCfg = Debug|Win32 14 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|Win32.Build.0 = Debug|Win32 15 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.ActiveCfg = Debug|x64 16 | {13992FD2-077E-4954-B065-A428198201A9}.Debug|x64.Build.0 = Debug|x64 17 | {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.ActiveCfg = Release|Win32 18 | {13992FD2-077E-4954-B065-A428198201A9}.Release|Win32.Build.0 = Release|Win32 19 | {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.ActiveCfg = Release|x64 20 | {13992FD2-077E-4954-B065-A428198201A9}.Release|x64.Build.0 = Release|x64 21 | EndGlobalSection 22 | GlobalSection(SolutionProperties) = preSolution 23 | HideSolutionNode = FALSE 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /gtests/rank_select/rank_select_few_reg_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define RS_REG_TEST(__P, __W) \ 7 | TEST(RANK_SELECT_FEW_REG_TEST, PIVOT_##__P##_WIDTH_##__W){ \ 8 | rank_select_few<__P, __W> rs; \ 9 | rank_select_few_builder<__P, __W> rsbuild(50, 3276849, false); \ 10 | for(int i = 0; i < 3276899; i++) \ 11 | if(i < 588939 || i > 588988) rsbuild.insert(i); \ 12 | rsbuild.finish(&rs); \ 13 | for(int i = 0; i < 50; i++){ \ 14 | ASSERT_TRUE(rs.select0(i) == 588939+i); \ 15 | ASSERT_TRUE(rs.rank0(588939+i) == i); \ 16 | ASSERT_TRUE(rs.zero_seq_revlen(588939+i) == i); \ 17 | } \ 18 | ASSERT_TRUE(rs.zero_seq_revlen(588989) == 50); \ 19 | ASSERT_TRUE(rs.zero_seq_revlen(588990) == 0); \ 20 | ASSERT_TRUE(rs.zero_seq_len(3276799) == 0); \ 21 | ASSERT_TRUE(rs.zero_seq_len(3276800) == 0); \ 22 | ASSERT_TRUE(rs.zero_seq_len(3276801) == 0); \ 23 | ASSERT_TRUE(rs.is1(3276799)); \ 24 | ASSERT_TRUE(rs.is1(3276800)); \ 25 | ASSERT_TRUE(rs.is1(3276801)); \ 26 | } 27 | 28 | namespace terark { 29 | RS_REG_TEST(0, 3) 30 | RS_REG_TEST(0, 4) 31 | RS_REG_TEST(0, 5) 32 | RS_REG_TEST(0, 6) 33 | RS_REG_TEST(0, 7) 34 | RS_REG_TEST(0, 8) 35 | } -------------------------------------------------------------------------------- /src/terark/thread/fiber_pool.hpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2022-08-24 14:24 3 | // 4 | 5 | #include "fiber_yield.hpp" 6 | #include 7 | 8 | namespace terark { 9 | 10 | class TERARK_DLL_EXPORT FiberPool : public FiberYield { 11 | public: 12 | static constexpr int MAX_QUEUE_LEN = 256; 13 | static constexpr int DEFAULT_FIBER_CNT = 16; 14 | struct task_t { 15 | void (*func)(void* arg1, size_t arg2, size_t arg3); 16 | void* arg1; 17 | size_t arg2; // theoretically arg1 is enough, we add arg2 & arg3 for common 18 | size_t arg3; // case optimization to avoid user code alloc memory for args. 19 | // also buffered_channel use array to store task_t, with arg2 20 | // and arg3, sizeof(task_t) == 32 is power of 2, this helps 21 | // compiler optimization 22 | }; 23 | explicit FiberPool(boost::fibers::context** activepp); 24 | ~FiberPool(); 25 | void update_fiber_count(int count); 26 | void push(task_t&& task); 27 | bool try_push(const task_t& task); 28 | int wait(int timeout_us); 29 | int wait(); 30 | int fiber_cnt() const { return m_fiber_cnt; } 31 | int pending_cnt() const { return m_pending_cnt; } 32 | protected: 33 | void fiber_proc(int fiber_idx); 34 | int m_fiber_cnt = 0; 35 | int m_pending_cnt = 0; 36 | boost::fibers::buffered_channel m_channel; 37 | }; 38 | 39 | } // namespace terark 40 | -------------------------------------------------------------------------------- /src/terark/util/fast_getcpu.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if defined(__linux__) && (defined(__amd64__) || defined(__amd64) || \ 4 | defined(__x86_64__) || defined(__x86_64) || \ 5 | defined(__ia64__) || defined(_IA64) || defined(__IA64__) ) || \ 6 | defined(__INTEL_COMPILER) && ( \ 7 | defined(__ia64) || defined(__itanium__) || \ 8 | defined(__x86_64) || defined(__x86_64__) ) 9 | 10 | namespace terark { 11 | terark_forceinline unsigned int fast_getcpu(void) { 12 | /* Abused to load per CPU data from limit */ 13 | const unsigned GDT_ENTRY_PER_CPU = 15; 14 | const unsigned __PER_CPU_SEG = (GDT_ENTRY_PER_CPU * 8 + 3); 15 | static const unsigned VGETCPU_CPU_MASK = 0xfff; 16 | unsigned int p; 17 | /* 18 | * Load per CPU data from GDT. LSL is faster than RDTSCP and 19 | * works on all CPUs. This is volatile so that it orders 20 | * correctly wrt barrier() and to keep gcc from cleverly 21 | * hoisting it out of the calling function. 22 | */ 23 | asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); 24 | // unsigned node = p >> 12; 25 | return p & VGETCPU_CPU_MASK; 26 | } 27 | } // namespace terark 28 | 29 | #elif !defined(_MSC_VER) 30 | 31 | #include 32 | namespace terark { 33 | terark_forceinline unsigned int fast_getcpu(void) { 34 | return sched_getcpu(); 35 | } 36 | } // namespace terark 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /src/terark/util/strjoin.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | template 7 | class strjoin_helper { 8 | private: 9 | String v; 10 | typedef typename String::value_type char_t; 11 | typedef strjoin_helper me; 12 | public: 13 | template explicit strjoin_helper(const StrX& x) : v(x) {} 14 | template explicit strjoin_helper(const Char* s, ptrdiff_t n) : v(s, n) {} 15 | operator String() const { return v; } 16 | me& operator+(const String& y) { v += y; return *this; } 17 | me& operator+(const char_t* y) { v += y; return *this; } 18 | me& operator+(const me & y) { v += y.v; return *this; } 19 | friend me operator+(const char_t* x, const me& y) { me t(x); t.v += y.v; return t; } 20 | friend me operator+(const String& x, const me& y) { me t(x); t.v += y.v; return t; } 21 | }; 22 | 23 | template 24 | strjoin_helper strjoin(const AnyString& x) { return strjoin_helper(x); } 25 | 26 | strjoin_helper strjoin(const char* s) { return strjoin_helper(s); } 27 | strjoin_helper strjoin(const char* s, ptrdiff_t n) { return strjoin_helper(s, n); } 28 | 29 | strjoin_helper strjoin(const wchar_t* s) { return strjoin_helper(s); } 30 | strjoin_helper strjoin(const wchar_t* s, ptrdiff_t n) { return strjoin_helper(s, n); } 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repo is `topling-zip`, but for compatibility and keep all commit history gracefully, we keep using namespace `terark`, do not change it. 2 | 3 | To conform open source license, the following term of disallowing bytedance is deleted since 2023-04-24, 4 | that is say: bytedance using this software is no longer illeagal and is not a shame. 5 | 6 | # ~~0. Disallow Bytedance Inc.~~ 7 | ~~All changes after 2021-06-01 is disallowed to be used by bytedance,~~ see [LICENSE](LICENSE). 8 | 9 | The term of disallowing bytedance is also deleted in [LICENSE](LICENSE). 10 | 11 | # 1. Compile 12 | ```bash 13 | make -j `nproc` pkg 14 | ``` 15 |
16 |
17 |
18 | 19 | # 1. Introduction 20 | - TerarkZip is [TerarkDB](https://github.com/bytedance/terarkdb)'s submodule 21 | - Users can also use TerarkZip as a compression and indexing algorithm library 22 | - TerarkZip also provides a set of useful utilities including `rank-select`, `bitmap` etc. 23 | 24 | # 2. Features 25 | - Indexing 26 | - Nested Lous Trie 27 | - Compression 28 | - PA-Zip Compression 29 | - Entropy Compression 30 | 31 | # 3. Usage 32 | ## Method 1: CMake 33 | - In your CMakeLists.txt 34 | - ADD_SUBDIRECTORY(terark-zip) 35 | - use `terark-zip` target anywhere you want 36 | 37 | ## Method 2: Static Library 38 | - ./build.sh 39 | - cd output 40 | - move `include` and `lib` directories to your project 41 | 42 | 43 | ## 4. License 44 | - BSD 3-Clause License 45 | -------------------------------------------------------------------------------- /src/terark/fsa/tmplinst.cpp: -------------------------------------------------------------------------------- 1 | #include "tmplinst.hpp" 2 | #include 3 | 4 | namespace terark { 5 | 6 | static hash_strmap& gs_by_dio_class_name() { 7 | static hash_strmap me; 8 | return me; 9 | } 10 | static hash_strmap& gs_by_rtti_class_name () { 11 | static hash_strmap me; 12 | return me; 13 | } 14 | 15 | DFA_ClassMetaInfo::~DFA_ClassMetaInfo() { 16 | } 17 | 18 | const DFA_ClassMetaInfo* DFA_ClassMetaInfo::find(fstring class_name) { 19 | size_t idx = gs_by_dio_class_name().find_i(class_name); 20 | if (gs_by_dio_class_name().end_i() != idx) 21 | return gs_by_dio_class_name().val(idx); 22 | else 23 | return NULL; 24 | } 25 | 26 | const DFA_ClassMetaInfo* DFA_ClassMetaInfo::find(const BaseDFA* dfa) { 27 | size_t idx = gs_by_rtti_class_name().find_i(typeid(*dfa).name()); 28 | if (gs_by_rtti_class_name().end_i() != idx) 29 | return gs_by_rtti_class_name().val(idx); 30 | else 31 | return NULL; 32 | } 33 | 34 | void DFA_ClassMetaInfo::register_me(const char* class_name, const char* rtti_class_name) { 35 | // fprintf(stderr, "register_class: %s\n", meta->class_name); 36 | this->class_name = class_name; 37 | this->rtti_class_name.assign(rtti_class_name); 38 | gs_by_dio_class_name().insert_i(class_name, this); 39 | gs_by_rtti_class_name().insert_i(rtti_class_name, this); 40 | } 41 | 42 | } // namespace terark 43 | 44 | -------------------------------------------------------------------------------- /tools/general/split_into_sorted_runs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // write sorted run filename to stdout 5 | // write sorted run content to filename 6 | 7 | using namespace terark; 8 | 9 | int main(int argc, char* argv[]) { 10 | if (argc < 2) { 11 | fprintf(stderr, "usage: %s fnamePrefix\n", argv[0]); 12 | return 1; 13 | } 14 | const char* fnamePrefix = argv[1]; 15 | if (strlen(fnamePrefix) > 100) { 16 | fprintf(stderr, "ERROR: fnamePrefix = %s is too long(max 100)\n", fnamePrefix); 17 | return 1; 18 | } 19 | char fname[128]; 20 | LineBuf line; 21 | valvec prev; 22 | FILE* fo = NULL; 23 | int fileIdx = 0; 24 | while (line.getline(stdin) > 0) { 25 | line.chomp(); 26 | if (NULL == fo || prev > fstring(line)) { 27 | if (fo) { 28 | fclose(fo); 29 | } 30 | sprintf(fname, "%s%06d", fnamePrefix, fileIdx++); 31 | fo = fopen(fname, "w"); 32 | if (NULL == fo) { 33 | fprintf(stderr, "ERROR: fopen(%s, w) = %s\n", fname, strerror(errno)); 34 | return 2; 35 | } 36 | printf("%s\n", fname); 37 | fflush(stdout); 38 | } 39 | prev.assign(line.p, line.n); 40 | line.push_back('\n'); 41 | size_t wn = fwrite(line.p, 1, line.n, fo); 42 | if (wn != line.n) { 43 | fprintf(stderr, "ERROR: fwrite(%s, %zd) = %s\n", fname, line.n, strerror(errno)); 44 | return 3; 45 | } 46 | } 47 | if (fo) { 48 | fclose(fo); 49 | } 50 | return 0; 51 | } 52 | 53 | -------------------------------------------------------------------------------- /tools/zbs/sufsort_bench.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by leipeng on 2019-11-04. 3 | // 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | //Makefile:CXXFLAGS:-I../../3rdparty/zstd 12 | 13 | namespace terark { 14 | extern int g_useDivSufSort; 15 | } 16 | using namespace terark; 17 | 18 | int main(int argc, char* argv[]) 19 | try { 20 | bool openMP = getEnvBool("use_openmp", 0); 21 | valvec mem; 22 | size_t fsize = 0; 23 | { 24 | struct ll_stat st; 25 | if (::ll_fstat(0, &st) < 0) { 26 | THROW_STD(runtime_error, "fstat failed"); 27 | } 28 | fsize = st.st_size; 29 | } 30 | use_hugepage_resize_no_init(&mem, pow2_align_up(fsize, 8)*5); 31 | auto rdsize = ::read(0, mem.data(), fsize); 32 | if (size_t(rdsize) != fsize) { 33 | THROW_STD(runtime_error, "ERROR: read(stdin, %zd) = %zd : err = %s\n", fsize, rdsize, strerror(errno)); 34 | } 35 | int* sufarr = (int*)(mem.data() + pow2_align_up(fsize, 8)); 36 | if (g_useDivSufSort == 1) 37 | divsufsort(mem.data(), sufarr, fsize, openMP); 38 | else 39 | sufarr_inducedsort(mem.data(), sufarr, fsize); 40 | 41 | return 0; 42 | } 43 | catch (...) { 44 | fprintf(stderr, "exit 1 on exception\n"); 45 | return 1; 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/terark/str_lex_iter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | template 9 | class TERARK_DLL_EXPORT StringLexIteratorT : public CacheAlignedNewDelete { 10 | StringLexIteratorT(const StringLexIteratorT&) = delete; 11 | StringLexIteratorT& operator=(const StringLexIteratorT&) = delete; 12 | protected: 13 | typedef typename terark_get_uchar_type::type uch_t; 14 | valvec m_word; 15 | StringLexIteratorT() = default; 16 | virtual ~StringLexIteratorT(); 17 | public: 18 | typedef basic_fstring fstr; 19 | virtual void dispose(); 20 | virtual bool incr() = 0; 21 | virtual bool decr() = 0; 22 | virtual bool seek_begin(); 23 | virtual bool seek_end() = 0; 24 | virtual bool seek_lower_bound(fstr) = 0; 25 | bool seek_rev_lower_bound(fstr); // convenient function 26 | 27 | fstr word() const { return fstr(m_word.data(), m_word.size()); } 28 | 29 | // for user add app data after m_word.size() and before m_word.capacity() 30 | // user should not add more than 16 bytes app data 31 | valvec& mutable_word() { return m_word; } 32 | }; 33 | 34 | typedef StringLexIteratorT StringLexIterator; 35 | typedef StringLexIteratorT StringLexIterator16; 36 | 37 | struct DisposeAsDelete { 38 | template void operator()(T* p) const { p->dispose(); } 39 | }; 40 | 41 | } // namespace terark 42 | -------------------------------------------------------------------------------- /src/terark/mempool.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "valvec.hpp" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "mempool_lock_free.hpp" 11 | #include "mempool_lock_none.hpp" 12 | #include "mempool_fixed_cap.hpp" 13 | #include "mempool_lock_mutex.hpp" 14 | #include "mempool_thread_cache.hpp" 15 | 16 | namespace terark { 17 | 18 | template 19 | class MemPool_CompileX : protected valvec { 20 | protected: 21 | size_t fragment_size; // for compatible with MemPool_Lock(Free|None|Mutex) 22 | typedef valvec mem; 23 | public: 24 | using mem::data; 25 | using mem::size; // bring to public... 26 | using mem::reserve; 27 | using mem::capacity; 28 | using mem::risk_set_data; 29 | using mem::risk_set_size; 30 | using mem::risk_set_capacity; 31 | using mem::risk_release_ownership; 32 | size_t frag_size() const { return fragment_size; } 33 | void risk_set_frag_size(size_t s) { fragment_size = s; } 34 | // void sfree(size_t,size_t) { assert(false); } 35 | // size_t alloc(size_t) { assert(false); return 0; } 36 | // size_t alloc3(size_t,size_t,size_t) { assert(false); return 0; } 37 | valvec* get_valvec() { return this; } 38 | }; 39 | 40 | } // namespace terark 41 | 42 | -------------------------------------------------------------------------------- /src/terark/zbs/simple_zip_blob_store.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | class TERARK_DLL_EXPORT SimpleZipBlobStore : public AbstractBlobStore { 9 | struct FileHeader; 10 | valvec m_strpool; 11 | ZipIntVector m_off_len; 12 | UintVecMin0 m_records; 13 | size_t m_lenBits; 14 | 15 | protected: 16 | void init_from_memory(fstring dataMem, Dictionary dict) override; 17 | public: 18 | SimpleZipBlobStore(); 19 | ~SimpleZipBlobStore(); 20 | 21 | void get_meta_blocks(valvec* blocks) const override; 22 | void get_data_blocks(valvec* blocks) const override; 23 | void detach_meta_blocks(const valvec& blocks) override; 24 | void build_from(class SortableStrVec& strVec, const class NestLoudsTrieConfig&); 25 | void load_mmap(fstring fpath, const void* mmapBase, size_t mmapSize); 26 | void save_mmap(function write) const override; 27 | using AbstractBlobStore::save_mmap; 28 | 29 | size_t mem_size() const override; 30 | 31 | void get_record_append_imp(size_t recId, valvec* recData) const; 32 | 33 | fstring get_mmap() const override; 34 | void reorder_zip_data(ZReorderMap& newToOld, 35 | function writeAppend, 36 | fstring tmpFile) const override; 37 | }; 38 | 39 | } // namespace terark 40 | -------------------------------------------------------------------------------- /src/terark/zbs/zip_reorder_map.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "zip_reorder_map.hpp" 3 | 4 | namespace terark { 5 | 6 | 7 | ZReorderMap::Builder::~Builder() { 8 | } 9 | 10 | void ZReorderMap::Builder::push_back(size_t value) { 11 | assert(size_ > 0); 12 | assert(value <= 0x7FFFFFFFFFULL); 13 | size_t next_value = size_t(intptr_t(base_value_) + intptr_t(seq_length_) * sign_); 14 | if (value != next_value) { 15 | if (seq_length_ > 1) { 16 | size_t current_value = base_value_ << 1; 17 | writer_.ensureWrite(¤t_value, 5); 18 | writer_ << var_uint64_t(seq_length_); 19 | } 20 | else if (seq_length_ == 1) { 21 | size_t current_value = (base_value_ << 1) | 1; 22 | writer_.ensureWrite(¤t_value, 5); 23 | } 24 | base_value_ = value; 25 | seq_length_ = 1; 26 | } 27 | else { 28 | ++seq_length_; 29 | } 30 | --size_; 31 | } 32 | 33 | void ZReorderMap::Builder::finish() { 34 | assert(size_ == 0); 35 | if (seq_length_ > 1) { 36 | size_t current_value = base_value_ << 1; 37 | writer_.ensureWrite(¤t_value, 5); 38 | writer_ << var_uint64_t(seq_length_); 39 | } 40 | else if (seq_length_ == 1) { 41 | size_t current_value = (base_value_ << 1) | 1; 42 | writer_.ensureWrite(¤t_value, 5); 43 | } 44 | writer_.flush_buffer(); 45 | file_.flush(); 46 | } 47 | 48 | } // namespace terark 49 | -------------------------------------------------------------------------------- /3rdparty/base64/test/moby_dick_base64.txt: -------------------------------------------------------------------------------- 1 | Q2FsbCBtZSBJc2htYWVsLiBTb21lIHllYXJzIGFnby0tbmV2ZXIgbWluZCBob3cgbG9uZyBwcmVjaXNlbHktLWhhdmluZwpsaXR0bGUgb3Igbm8gbW9uZXkgaW4gbXkgcHVyc2UsIGFuZCBub3RoaW5nIHBhcnRpY3VsYXIgdG8gaW50ZXJlc3QgbWUgb24Kc2hvcmUsIEkgdGhvdWdodCBJIHdvdWxkIHNhaWwgYWJvdXQgYSBsaXR0bGUgYW5kIHNlZSB0aGUgd2F0ZXJ5IHBhcnQgb2YKdGhlIHdvcmxkLiBJdCBpcyBhIHdheSBJIGhhdmUgb2YgZHJpdmluZyBvZmYgdGhlIHNwbGVlbiBhbmQgcmVndWxhdGluZwp0aGUgY2lyY3VsYXRpb24uIFdoZW5ldmVyIEkgZmluZCBteXNlbGYgZ3Jvd2luZyBncmltIGFib3V0IHRoZSBtb3V0aDsKd2hlbmV2ZXIgaXQgaXMgYSBkYW1wLCBkcml6emx5IE5vdmVtYmVyIGluIG15IHNvdWw7IHdoZW5ldmVyIEkgZmluZApteXNlbGYgaW52b2x1bnRhcmlseSBwYXVzaW5nIGJlZm9yZSBjb2ZmaW4gd2FyZWhvdXNlcywgYW5kIGJyaW5naW5nIHVwCnRoZSByZWFyIG9mIGV2ZXJ5IGZ1bmVyYWwgSSBtZWV0OyBhbmQgZXNwZWNpYWxseSB3aGVuZXZlciBteSBoeXBvcyBnZXQKc3VjaCBhbiB1cHBlciBoYW5kIG9mIG1lLCB0aGF0IGl0IHJlcXVpcmVzIGEgc3Ryb25nIG1vcmFsIHByaW5jaXBsZSB0bwpwcmV2ZW50IG1lIGZyb20gZGVsaWJlcmF0ZWx5IHN0ZXBwaW5nIGludG8gdGhlIHN0cmVldCwgYW5kIG1ldGhvZGljYWxseQprbm9ja2luZyBwZW9wbGUncyBoYXRzIG9mZi0tdGhlbiwgSSBhY2NvdW50IGl0IGhpZ2ggdGltZSB0byBnZXQgdG8gc2VhCmFzIHNvb24gYXMgSSBjYW4uIFRoaXMgaXMgbXkgc3Vic3RpdHV0ZSBmb3IgcGlzdG9sIGFuZCBiYWxsLiBXaXRoIGEKcGhpbG9zb3BoaWNhbCBmbG91cmlzaCBDYXRvIHRocm93cyBoaW1zZWxmIHVwb24gaGlzIHN3b3JkOyBJIHF1aWV0bHkKdGFrZSB0byB0aGUgc2hpcC4gVGhlcmUgaXMgbm90aGluZyBzdXJwcmlzaW5nIGluIHRoaXMuIElmIHRoZXkgYnV0IGtuZXcKaXQsIGFsbW9zdCBhbGwgbWVuIGluIHRoZWlyIGRlZ3JlZSwgc29tZSB0aW1lIG9yIG90aGVyLCBjaGVyaXNoIHZlcnkKbmVhcmx5IHRoZSBzYW1lIGZlZWxpbmdzIHRvd2FyZHMgdGhlIG9jZWFuIHdpdGggbWUuCg== -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/compress/zstd_double_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_DOUBLE_FAST_H 12 | #define ZSTD_DOUBLE_FAST_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "../common/mem.h" /* U32 */ 19 | #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ 20 | 21 | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 23 | size_t ZSTD_compressBlock_doubleFast( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_doubleFast_dictMatchState( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_doubleFast_extDict( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | 34 | #if defined (__cplusplus) 35 | } 36 | #endif 37 | 38 | #endif /* ZSTD_DOUBLE_FAST_H */ 39 | -------------------------------------------------------------------------------- /gtests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | CMAKE_MINIMUM_REQUIRED(VERSION 3.6) 2 | PROJECT(terark-core-gtest) 3 | 4 | SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++1y -fsanitize=address") 5 | SET(CMAKE_CXX_STANDARD 14) 6 | 7 | #MESSAGE("googletest include dir: ${GTEST_INC}") 8 | #MESSAGE("googletest library dir: ${GTEST_LIB_DIR}") 9 | 10 | INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/googletest/googletest/include) 11 | INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../output/include) 12 | INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src) 13 | 14 | 15 | ADD_SUBDIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/googletest) 16 | 17 | LINK_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../output/lib_static) 18 | 19 | SET(TEST_SRC "simple_test.cpp" 20 | "utils_test.cpp" 21 | "zbs/zbs_test.cpp") 22 | 23 | SET(TERARK_LIBS "-lterark-idx-d -lterark-zbs-d -lterark-fsa-d -lterark-core-d") 24 | 25 | ENABLE_TESTING() 26 | FUNCTION(test_func test_file) 27 | SET(extra_args ${ARGN}) 28 | GET_FILENAME_COMPONENT(test_target_name "${test_file}" NAME_WE) 29 | ADD_EXECUTABLE("${test_target_name}" "${test_file}" "${MAIN_SRC}") 30 | TARGET_LINK_LIBRARIES("${test_target_name}" ${TERARK_LIBS} gomp aio rt gtest_main gmock_main gtest "${extra_args}") 31 | ADD_TEST(NAME "${test_target_name}" COMMAND "${test_target_name}") 32 | ENDFUNCTION(test_func) 33 | 34 | 35 | FOREACH (test_item ${TEST_SRC}) 36 | MESSAGE("${test_item}") 37 | test_func(${test_item}) 38 | ENDFOREACH () 39 | -------------------------------------------------------------------------------- /src/terark/fsa/x_fsa_util.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #if defined(__GNUC__) 6 | #if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 7 7 | #elif defined(__clang__) 8 | #elif defined(__INTEL_COMPILER) 9 | #else 10 | #error "Requires GCC-4.7+" 11 | #endif 12 | #endif 13 | 14 | #ifdef _MSC_VER 15 | #define strcasecmp _stricmp 16 | #endif 17 | 18 | namespace terark { 19 | 20 | template 21 | terark_warn_unused_result 22 | inline Uint align_to_64(Uint x) { return (x + 63) & size_t(-64); } 23 | 24 | struct CompareBy_pos { 25 | template 26 | bool operator()(const T& x, const T& y) const { 27 | return x.pos < y.pos; 28 | } 29 | template 30 | bool operator()(const T& x, size_t y) const { return x.pos < y; } 31 | template 32 | bool operator()(size_t x, const T& y) const { return x < y.pos; } 33 | }; 34 | 35 | struct CharTarget_By_ch { 36 | template 37 | bool operator()(const CT& x, const CT& y) const { return x.ch < y.ch; } 38 | template 39 | unsigned short 40 | operator()(const CT& x) const { return (unsigned short)(x.ch); } 41 | }; 42 | 43 | struct IdentityTR { 44 | unsigned char operator()(unsigned char c) const { return c; } 45 | }; 46 | struct TableTranslator { 47 | const unsigned char* tr_tab; 48 | unsigned char operator()(unsigned char c) const { return tr_tab[c]; } 49 | TableTranslator(const unsigned char* tr_tab1) : tr_tab(tr_tab1) {} 50 | }; 51 | 52 | } 53 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/decompress/zstd_ddict.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef ZSTD_DDICT_H 13 | #define ZSTD_DDICT_H 14 | 15 | /*-******************************************************* 16 | * Dependencies 17 | *********************************************************/ 18 | #include /* size_t */ 19 | #include "../zstd.h" /* ZSTD_DDict, and several public functions */ 20 | 21 | 22 | /*-******************************************************* 23 | * Interface 24 | *********************************************************/ 25 | 26 | /* note: several prototypes are already published in `zstd.h` : 27 | * ZSTD_createDDict() 28 | * ZSTD_createDDict_byReference() 29 | * ZSTD_createDDict_advanced() 30 | * ZSTD_freeDDict() 31 | * ZSTD_initStaticDDict() 32 | * ZSTD_sizeof_DDict() 33 | * ZSTD_estimateDDictSize() 34 | * ZSTD_getDictID_fromDict() 35 | */ 36 | 37 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); 38 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); 39 | 40 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); 41 | 42 | 43 | 44 | #endif /* ZSTD_DDICT_H */ 45 | -------------------------------------------------------------------------------- /3rdparty/base64/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2007, Nick Galbreath 2 | Copyright (c) 2013-2017, Alfred Klomp 3 | Copyright (c) 2015-2017, Wojciech Mula 4 | Copyright (c) 2016-2017, Matthieu Darbois 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | - Redistributions of source code must retain the above copyright notice, 12 | this list of conditions and the following disclaimer. 13 | 14 | - Redistributions in binary form must reproduce the above copyright 15 | notice, this list of conditions and the following disclaimer in the 16 | documentation and/or other materials provided with the distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 19 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 21 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 24 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /src/terark/zbs/zero_length_blob_store.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * plain_blob_store.hpp 3 | * 4 | * Created on: 2017年2月10日 5 | * Author: leipeng 6 | */ 7 | #pragma once 8 | 9 | #include "abstract_blob_store.hpp" 10 | #include 11 | #include 12 | 13 | namespace terark { 14 | 15 | class TERARK_DLL_EXPORT ZeroLengthBlobStore : public AbstractBlobStore { 16 | public: 17 | void init_from_memory(fstring dataMem, Dictionary dict) override; 18 | void get_meta_blocks(valvec* blocks) const override; 19 | void get_data_blocks(valvec* blocks) const override; 20 | void detach_meta_blocks(const valvec& blocks) override; 21 | void save_mmap(function write) const override; 22 | using AbstractBlobStore::save_mmap; 23 | using AbstractBlobStore::m_numRecords; 24 | 25 | ZeroLengthBlobStore(); 26 | ~ZeroLengthBlobStore(); 27 | 28 | void finish(size_t records); 29 | 30 | size_t mem_size() const override; 31 | void get_record_append_imp(size_t recID, valvec* recData) const; 32 | void fspread_record_append_imp( 33 | pread_func_t fspread, void* lambda, 34 | size_t baseOffset, size_t recID, 35 | valvec* recData, 36 | valvec* rdbuf) const; 37 | void reorder_zip_data(ZReorderMap& newToOld, 38 | function writeAppend, 39 | fstring tmpFile) const override; 40 | 41 | size_t get_zipped_size_imp(size_t recID, CacheOffsets*) const; 42 | }; 43 | 44 | } // namespace terark 45 | -------------------------------------------------------------------------------- /src/terark/succinct/rank_select_basic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef __BMI2__ 8 | # include "rank_select_inline_bmi2.hpp" 9 | #else 10 | # include "rank_select_inline_slow.hpp" 11 | #endif 12 | 13 | #if defined(__BMI2__) && TERARK_WORD_BITS == 64 14 | // plain extract bits may be faster than _bextr_u64 in some CPU, 15 | // but _bextr_u64 is always faster when extract const bit pos and width 16 | // rank512 is mostly used to extract const bit pos and width 17 | // # define rank512(bm64, i) TERARK_GET_BITS_64(bm64, i, 9) 18 | # define rank512(bm64, i) _bextr_u64(bm64, (i-1)*9, 9) 19 | #else 20 | # define rank512(bm64, i) ((bm64 >> (i-1)*9) & 511) 21 | #endif 22 | 23 | #define rank_select_check_overflow(SIZE, OP, TYPE) \ 24 | do { \ 25 | if ((SIZE) OP size_t(std::numeric_limits::max())) \ 26 | TERARK_DIE(#TYPE" overflow , size = %zd", size_t(SIZE)); \ 27 | } while (false) 28 | 29 | namespace terark { 30 | 31 | template 32 | struct RankSelectConstants { 33 | static_assert((iLineBits & (iLineBits - 1)) == 0, "iLineBits must be power of 2"); 34 | static const size_t LineBits = iLineBits; 35 | static const size_t LineShift = StaticUintBits::value - 1; 36 | static const size_t LineWords = LineBits / WordBits; 37 | 38 | static size_t BitsToLines(size_t nbits) 39 | { return (nbits + LineBits - 1) / LineBits; } 40 | }; 41 | 42 | } // namespace terark 43 | -------------------------------------------------------------------------------- /src/terark/zbs/xxhash_helper.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace terark { 6 | 7 | class XXHash64 { 8 | XXH64_state_t* xxhstate; 9 | public: 10 | explicit XXHash64(uint64_t seed) { 11 | auto xxhs = XXH64_createState(); 12 | if (NULL == xxhs) { 13 | throw std::bad_alloc(); 14 | } 15 | XXH64_reset(xxhs, seed); 16 | xxhstate = xxhs; 17 | } 18 | XXHash64(const XXHash64& y) { 19 | auto xxhs = XXH64_createState(); 20 | if (NULL == xxhs) { 21 | throw std::bad_alloc(); 22 | } 23 | XXH64_copyState(xxhs, y.xxhstate); 24 | xxhstate = xxhs; 25 | } 26 | XXHash64& operator=(const XXHash64& y) { 27 | XXH64_copyState(xxhstate, y.xxhstate); 28 | return *this; 29 | } 30 | ~XXHash64() { 31 | XXH64_freeState(xxhstate); 32 | } 33 | XXHash64& reset(uint64_t seed) { 34 | XXH64_reset(xxhstate, seed); 35 | return *this; 36 | } 37 | XXHash64& update(const void* data, size_t len) { 38 | XXH64_update(xxhstate, data, len); 39 | return *this; 40 | } 41 | XXHash64& update(fstring data) { 42 | XXH64_update(xxhstate, data.data(), data.size()); 43 | return *this; 44 | } 45 | uint64_t digest() const { 46 | return XXH64_digest(xxhstate); 47 | } 48 | uint64_t operator()(const void* data, size_t len) { 49 | XXH64_update(xxhstate, data, len); 50 | return XXH64_digest(xxhstate); 51 | } 52 | uint64_t operator()(fstring data) { 53 | return (*this)(data.data(), data.size()); 54 | } 55 | }; 56 | 57 | inline uint64_t XXH64(fstring data, uint64_t seed) { 58 | return ::XXH64(data.data(), data.size(), seed); 59 | } 60 | 61 | } // namespace terark 62 | 63 | -------------------------------------------------------------------------------- /tests/core/test_ProcPipeStream.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char* argv[]) { 5 | using namespace terark; 6 | { 7 | printf("1 begin...\n"); 8 | LineBuf line; 9 | ProcPipeStream pp("echo aaaa", "r"); 10 | printf("reading result\n"); 11 | //line.getline(pp); 12 | 13 | line.read_all(pp); 14 | line.chomp(); 15 | assert(line.size() == 4); 16 | printf("read result = len=%zd : %s\n", line.n, line.p); 17 | assert(fstring(line) == "aaaa"); 18 | printf("1 passed\n"); 19 | } 20 | 21 | { 22 | printf("2 begin...\n"); 23 | ProcPipeStream pp("cat > proc.test.tmp", "w"); 24 | fprintf(pp, "%s\n", "bbbb"); 25 | pp.close(); 26 | 27 | LineBuf line; 28 | line.read_all("proc.test.tmp"); 29 | line.chomp(); 30 | assert(fstring(line) == "bbbb"); 31 | printf("2 passed\n"); 32 | ::remove("proc.test.tmp"); 33 | } 34 | 35 | printf("3 begin...\n"); 36 | try { 37 | ProcPipeStream pp("test-non-existed-file", "r"); 38 | pp.close(); 39 | assert(pp.err_code() != 0); 40 | } 41 | catch (const std::exception&) { 42 | } 43 | printf("3 passed\n"); 44 | 45 | { 46 | printf("4 begin...\n"); 47 | fflush(stdout); 48 | std::string res = vfork_cmd("(echo aa; cat)", "bb").get(); 49 | // printf("res.size() = %zd: %s\n", res.size(), res.c_str()); 50 | assert(res == "aa\nbb"); 51 | printf("4 passed\n"); 52 | } 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, Topling Inc. 2 | All rights reserved. 3 | 4 | Copyright (c) 2020, Bytedance Inc. 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | 3. Neither the name of the copyright holder nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /src/terark/io/DataOutput_String.hpp: -------------------------------------------------------------------------------- 1 | public: 2 | MyType& operator<<( char x) { this->writeByte((byte)x); return *this; } 3 | MyType& operator<<( signed char x) { this->writeByte((byte)x); return *this; } 4 | MyType& operator<<(unsigned char x) { this->writeByte((byte)x); return *this; } 5 | 6 | MyType& save(const char* s, size_t n) { this->ensureWrite(s, n); return *this; } 7 | MyType& save(const signed char* s, size_t n) { this->ensureWrite(s, n); return *this; } 8 | MyType& save(const unsigned char* s, size_t n) { this->ensureWrite(s, n); return *this; } 9 | 10 | MyType& operator<<(const char* s) 11 | { 12 | var_size_t n(strlen(s)); 13 | *this << n; 14 | this->ensureWrite(s, n.t); 15 | return *this; 16 | } 17 | MyType& operator<<(const wchar_t* s) 18 | { 19 | var_size_t n(wcslen(s)); 20 | *this << n; 21 | this->save(s, n.t); 22 | return *this; 23 | } 24 | 25 | template 26 | MyType& operator<<(const std::basic_string& x) 27 | { 28 | var_size_t length(x.size()); 29 | *this << (length); 30 | this->save(x.data(), length.t); 31 | return *this; 32 | } 33 | 34 | MyType& operator<<(const std::string& x) 35 | { 36 | #if defined(TERARK_DATA_IO_SLOW_VAR_INT) 37 | var_size_t length(x.size()); 38 | *this << (length); 39 | this->save(x.data(), length.t); 40 | #else 41 | this->getStream()->write_string(x); 42 | #endif 43 | return *this; 44 | } 45 | 46 | MyType& operator<<(const std::wstring& x) 47 | { 48 | var_size_t length(x.size()); 49 | *this << (length); 50 | this->save(x.data(), length.t); 51 | return *this; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/terark/util/autoclose.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #ifdef _MSC_VER 7 | #include 8 | #else 9 | #include 10 | #endif 11 | 12 | namespace terark { 13 | class Auto_fclose : boost::noncopyable { 14 | FILE* f; 15 | public: 16 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \ 17 | defined(_MSC_VER) && _MSC_VER >= 1700 18 | Auto_fclose(Auto_fclose&& y) { 19 | this->f = y.f; 20 | y.f = NULL; 21 | } 22 | #endif 23 | operator FILE*() const { return f; } 24 | bool operator!() const { return NULL == f; } 25 | FILE* operator->() const { return f; } // feof(fp) maybe a macro 26 | explicit Auto_fclose(FILE* fp = NULL) { f = fp; } 27 | ~Auto_fclose() { if (NULL != f) ::fclose(f); } 28 | void operator=(FILE* f0) { f = f0; } // disable chained assign 29 | FILE* self_or(FILE* f2) const { return f ? f : f2; } 30 | }; 31 | typedef Auto_fclose Auto_close_fp; 32 | 33 | class Auto_close_fd : boost::noncopyable { 34 | int f; 35 | public: 36 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \ 37 | defined(_MSC_VER) && _MSC_VER >= 1700 38 | Auto_close_fd(Auto_close_fd&& y) { 39 | this->f = y.f; 40 | y.f = -1; 41 | } 42 | #endif 43 | operator int() const { return f; } 44 | bool operator!() const { return f < 0; } 45 | explicit Auto_close_fd(int fd = -1) { f = fd; } 46 | ~Auto_close_fd() { 47 | #ifdef _MSC_VER 48 | if (f >= 0) ::_close(f); 49 | #else 50 | if (f >= 0) ::close(f); 51 | #endif 52 | } 53 | void operator=(int f0) { f = f0; } // disable chained assign 54 | int self_or(int f2) const { return f >= 0 ? f : f2; } 55 | }; 56 | 57 | } // namespace terark 58 | -------------------------------------------------------------------------------- /src/terark/zbs/lru_page_cache.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | class SingleLruReadonlyCache; 10 | class MultiLruReadonlyCache; 11 | class TERARK_DLL_EXPORT LruReadonlyCache : public RefCounter { 12 | public: 13 | class Buffer : private boost::noncopyable { 14 | friend class SingleLruReadonlyCache; 15 | friend class MultiLruReadonlyCache; 16 | enum CacheType : unsigned char { 17 | hit, 18 | evicted_others, 19 | initial_free, 20 | dropped_free, 21 | hit_others_load, 22 | mix, // for multi page only 23 | }; 24 | SingleLruReadonlyCache* owner; 25 | valvec* rdbuf; 26 | uint32_t index; // index == 0 indicate not ref any page 27 | CacheType cache_type; 28 | // byte_t reserved; 29 | // uint16_t missed_pages; 30 | void discard_impl(); 31 | public: 32 | explicit 33 | Buffer(valvec* rb) : rdbuf(rb), index(0) { assert(rb); } 34 | ~Buffer() { discard(); } 35 | void discard() { if (index) discard_impl(); } 36 | }; 37 | static LruReadonlyCache* 38 | create(size_t totalcapacityBytes, size_t shards, size_t maxFiles, bool aio); 39 | 40 | virtual const byte_t* pread(intptr_t fi, size_t offset, size_t len, Buffer*) = 0; 41 | virtual intptr_t open(intptr_t fd) = 0; 42 | virtual void close(intptr_t fi) = 0; 43 | virtual bool safe_close(intptr_t fi) = 0; 44 | virtual void print_stat_cnt(FILE*) const = 0; 45 | }; 46 | 47 | TERARK_DLL_EXPORT 48 | void fdpread(intptr_t fd, void* buf, size_t len, size_t offset); 49 | 50 | } // namespace terark 51 | -------------------------------------------------------------------------------- /3rdparty/zstd/zstd/dll/example/Makefile: -------------------------------------------------------------------------------- 1 | # ################################################################ 2 | # Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under both the BSD-style license (found in the 6 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | # in the COPYING file in the root directory of this source tree). 8 | # You may select, at your option, one of the above-listed licenses. 9 | # ################################################################ 10 | 11 | VOID := /dev/null 12 | ZSTDDIR := ../include 13 | LIBDIR := ../static 14 | DLLDIR := ../dll 15 | 16 | CFLAGS ?= -O3 # can select custom flags. For example : CFLAGS="-O2 -g" make 17 | CFLAGS += -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow -Wswitch-enum \ 18 | -Wdeclaration-after-statement -Wstrict-prototypes \ 19 | -Wpointer-arith -Wstrict-aliasing=1 20 | CFLAGS += $(MOREFLAGS) 21 | CPPFLAGS:= -I$(ZSTDDIR) -DXXH_NAMESPACE=ZSTD_ 22 | FLAGS := $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) 23 | 24 | 25 | # Define *.exe as extension for Windows systems 26 | ifneq (,$(filter Windows%,$(OS))) 27 | EXT =.exe 28 | else 29 | EXT = 30 | endif 31 | 32 | .PHONY: default fullbench-dll fullbench-lib 33 | 34 | 35 | default: all 36 | 37 | all: fullbench-dll fullbench-lib 38 | 39 | 40 | fullbench-lib: fullbench.c datagen.c 41 | $(CC) $(FLAGS) $^ -o $@$(EXT) $(LIBDIR)/libzstd_static.lib 42 | 43 | fullbench-dll: fullbench.c datagen.c 44 | $(CC) $(FLAGS) $^ -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(DLLDIR)/libzstd.dll 45 | 46 | clean: 47 | @$(RM) fullbench-dll$(EXT) fullbench-lib$(EXT) \ 48 | @echo Cleaning completed 49 | -------------------------------------------------------------------------------- /src/terark/io/discard/is_primitive.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #ifndef __terark_io_is_primitive_h__ 3 | #define __terark_io_var_int_h__ 4 | 5 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 6 | # pragma once 7 | #endif 8 | 9 | #include 10 | 11 | #include 12 | 13 | // should be the last #include 14 | #include 15 | 16 | namespace terark { 17 | 18 | BOOST_TT_AUX_BOOL_TRAIT_DEF1(is_primitive,T,false) 19 | 20 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, char, true) 21 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, unsigned char, true) 22 | 23 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, int, true) 24 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, long, true) 25 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, short, true) 26 | 27 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, unsigned int, true) 28 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, unsigned long, true) 29 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, unsigned short, true) 30 | 31 | #if defined(BOOST_HAS_LONG_LONG) 32 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, long long, true) 33 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, unsigned long long, true) 34 | #elif defined(BOOST_HAS_MS_INT64) 35 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, __int64, true) 36 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, unsigned __int64, true) 37 | #endif 38 | 39 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, std::string, true) 40 | BOOST_TT_AUX_BOOL_TRAIT_CV_SPEC1(is_primitive, std::wstring, true) 41 | 42 | 43 | } // namespace terark 44 | 45 | 46 | #include "boost/type_traits/detail/bool_trait_undef.hpp" 47 | 48 | 49 | #endif // __terark_io_var_int_h__ 50 | 51 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/ssse3/enc_reshuffle.c: -------------------------------------------------------------------------------- 1 | static inline __m128i 2 | enc_reshuffle (__m128i in) 3 | { 4 | // input, bytes MSB to LSB: 5 | // 0 0 0 0 l k j i h g f e d c b a 6 | 7 | in = _mm_shuffle_epi8(in, _mm_set_epi8( 8 | 10, 11, 9, 10, 9 | 7, 8, 6, 7, 10 | 4, 5, 3, 4, 11 | 1, 2, 0, 1)); 12 | // in, bytes MSB to LSB: 13 | // k l j k 14 | // h i g h 15 | // e f d e 16 | // b c a b 17 | 18 | const __m128i t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); 19 | // bits, upper case are most significant bits, lower case are least significant bits 20 | // 0000kkkk LL000000 JJJJJJ00 00000000 21 | // 0000hhhh II000000 GGGGGG00 00000000 22 | // 0000eeee FF000000 DDDDDD00 00000000 23 | // 0000bbbb CC000000 AAAAAA00 00000000 24 | 25 | const __m128i t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); 26 | // 00000000 00kkkkLL 00000000 00JJJJJJ 27 | // 00000000 00hhhhII 00000000 00GGGGGG 28 | // 00000000 00eeeeFF 00000000 00DDDDDD 29 | // 00000000 00bbbbCC 00000000 00AAAAAA 30 | 31 | const __m128i t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); 32 | // 00000000 00llllll 000000jj KKKK0000 33 | // 00000000 00iiiiii 000000gg HHHH0000 34 | // 00000000 00ffffff 000000dd EEEE0000 35 | // 00000000 00cccccc 000000aa BBBB0000 36 | 37 | const __m128i t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); 38 | // 00llllll 00000000 00jjKKKK 00000000 39 | // 00iiiiii 00000000 00ggHHHH 00000000 40 | // 00ffffff 00000000 00ddEEEE 00000000 41 | // 00cccccc 00000000 00aaBBBB 00000000 42 | 43 | return _mm_or_si128(t1, t3); 44 | // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ 45 | // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG 46 | // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD 47 | // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA 48 | } 49 | -------------------------------------------------------------------------------- /tools/codegen/gen_leap_year_bits.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static int is_leap_year(long long year) { 6 | if (year % 4) return 0; /* A year not divisible by 4 is not leap. */ 7 | else if (year % 100) return 1; /* If div by 4 and not 100 is surely leap. */ 8 | else if (year % 400) return 0; /* If div by 100 *and* 400 is not leap. */ 9 | else return 1; /* If div by 100 and not by 400 is leap. */ 10 | } 11 | 12 | int main(int argc, char** argv) { 13 | long long beg = 1900, end = 2040; 14 | auto prog = argv[0]; 15 | if (argc == 2) { 16 | fprintf(stderr, "usage %s [beg end]\n", prog); 17 | return 1; 18 | } 19 | if (argc >= 3) { 20 | beg = strtoll(argv[1], NULL, 10); 21 | end = strtoll(argv[2], NULL, 10); 22 | } 23 | for (long long year = beg; year < end;) { 24 | for (int col = 0; col < 5; col++) { 25 | long long ubits = 0; 26 | for (int i = 0; i < 64; ++i) { 27 | if (is_leap_year(year++)) { 28 | ubits |= 1ull << i; 29 | } 30 | } 31 | printf("0x%016llX, ", ubits); 32 | } 33 | printf("\n"); 34 | } 35 | printf("// rank1 ------------------------\n"); 36 | long long rank = 0; 37 | for (long long year = beg; year < end;) { 38 | for (int col = 0; col < 5; col++) { 39 | for (int i = 0; i < 64; ++i) { 40 | if (is_leap_year(year++)) { 41 | rank++; 42 | } 43 | } 44 | printf("0x%04llX, ", rank); 45 | } 46 | printf("\n"); 47 | } 48 | return 0; 49 | } 50 | 51 | -------------------------------------------------------------------------------- /tests/entropy/test_entropy.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | 5 | using namespace terark; 6 | 7 | int BUG_Huffman_decoder() { 8 | using namespace Huffman; 9 | // 63 zeros 10 | fstring raw_data = "000000000000000000000000000000000000000000000000000000000000000"; 11 | 12 | freq_hist h; 13 | h.add_record(raw_data); 14 | h.finish(); 15 | if (h.histogram().o0_size >= NORMALISE) { 16 | h.normalise(NORMALISE); 17 | } 18 | encoder e(h.histogram()); 19 | auto encoded_data = e.encode(raw_data, GetTlsTerarkContext()); 20 | decoder d(e.table()); 21 | valvec record; 22 | auto success = d.decode(encoded_data.data, &record, GetTlsTerarkContext()); 23 | if (!success) { 24 | return -1; 25 | } 26 | if (record.size() > record.capacity()) { 27 | return -2; 28 | } 29 | if (record != raw_data) { 30 | return -3; 31 | } 32 | return 0; 33 | } 34 | 35 | 36 | int BUG_Huffman_decoder_2() { 37 | using namespace Huffman; 38 | 39 | const char* table_char = "\377\001\000\003\001\000\002\177\200\247\200\002Y\020H\207\001\376\001\000\240\006\002@!\004\372\377\377\377\377\237\366\377\377\377\247\224R\245\224RJ)\245D)\245\224RJ)RJ)\245\224R\252\224RJ)\245\224R\372\377SJ)\245\224RJ)\245\224\242(\212RJ)\205\242(\212\242(\212\377\377(\372\377?\032\376\377\377\377\377\377"; 40 | fstring table(table_char, 104); 41 | 42 | std::unique_ptr ptr(new decoder_o1(table)); 43 | return 0; 44 | } 45 | 46 | int main(int argc, char* argv[]) { 47 | if (BUG_Huffman_decoder() != 0) { 48 | return -1; 49 | } 50 | if (BUG_Huffman_decoder_2() != 0) { 51 | return -1; 52 | } 53 | return 0; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /src/terark/multi_way_basic.hpp: -------------------------------------------------------------------------------- 1 | 2 | private: 3 | void operator++(int); //!< disabled 4 | 5 | public: 6 | const value_type& operator* () const { return current_value(); } 7 | const value_type* operator->() const { return ¤t_value(); } 8 | MyType& operator++() { this->increment(); return *this; } 9 | 10 | public: 11 | bool comp_value(const value_type& x, const value_type& y) const { 12 | return this->m_comp(this->get_key(x), this->get_key(y)); 13 | } 14 | 15 | bool comp_key(const key_type& x, const key_type& y) const { 16 | return this->m_comp(x, y); 17 | } 18 | 19 | /** 20 | @brief 求多个集合的和集,相当于合并多个有序序列 21 | 22 | 这个操作对序列的要求有所放宽,允许每个序列中的元素可以重复 23 | @see intersection 24 | */ 25 | template 26 | _OutIt merge(_OutIt dest) { 27 | for (; !empty(); this->increment(), ++dest) 28 | *dest = current_value(); 29 | return dest; 30 | } 31 | template 32 | _OutIt copy(_OutIt dest) { return merge(dest); } 33 | 34 | template 35 | _OutIt copy_if(_OutIt dest, _Cond cond) { 36 | while (!empty()) { 37 | value_type x = current_value(); 38 | if (cond(x)) 39 | *dest = x, ++dest; 40 | } 41 | return dest; 42 | } 43 | 44 | template 45 | _OutIt copy_equal(_OutIt dest) { 46 | return copy_equal(dest, boost::multi_index::identity()); 47 | } 48 | 49 | /** 50 | @brief 求多个集合的交集 51 | 52 | @param dest 将结果拷贝到这里 53 | 54 | @note 55 | -# 如果 MyType 是 LoserTree 56 | -# 每个输入的末尾必须是无穷大元素 57 | */ 58 | template 59 | _OutIt intersection(_OutIt dest) { 60 | assert(!empty()); 61 | if (this->total_ways() <= 32) 62 | return intersection_32(dest); 63 | else 64 | return intersection_n(dest); 65 | } 66 | 67 | template 68 | _OutIt unique(_OutIt dest) { return union_set(dest); } 69 | 70 | -------------------------------------------------------------------------------- /src/terark/util/deepcopy_ptr.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace terark { 9 | 10 | template 11 | class DeepCopyPtr { 12 | public: 13 | T* p; 14 | 15 | T* get() const { return p; } 16 | 17 | ~DeepCopyPtr() { 18 | delete p; 19 | } 20 | DeepCopyPtr() : p(NULL) {} 21 | explicit DeepCopyPtr(T* q) { 22 | if (q) { 23 | p = new T(*q); 24 | } else { 25 | p = NULL; 26 | } 27 | } 28 | DeepCopyPtr(const DeepCopyPtr& q) { 29 | if (q.p) { 30 | p = new T(*q.p); 31 | } else { 32 | p = NULL; 33 | } 34 | } 35 | DeepCopyPtr& operator=(const DeepCopyPtr& q) { 36 | DeepCopyPtr(q).swap(*this); 37 | return *this; 38 | } 39 | DeepCopyPtr(DeepCopyPtr&& q) { 40 | p = q.p; 41 | q.p = NULL; 42 | } 43 | DeepCopyPtr& operator=(DeepCopyPtr&& q) { 44 | p = q.p; 45 | q.p = NULL; 46 | } 47 | DeepCopyPtr& operator=(T* q) { 48 | DeepCopyPtr(q).swap(*this); 49 | return *this; 50 | } 51 | 52 | T* release_and_set(T* newptr) { 53 | T* oldptr = p; 54 | p = newptr; 55 | return oldptr; 56 | } 57 | T* release() { 58 | T* q = p; 59 | p = NULL; 60 | return q; 61 | } 62 | 63 | void reset(T* q) { 64 | DeepCopyPtr(q).swap(*this); 65 | } 66 | 67 | void swap(DeepCopyPtr& y) { T* tmp = p; p = y.p; y.p = tmp; } 68 | 69 | operator T* () const { return p; } 70 | T* operator->() const { return p; } // ? direct, simple and stupid ? 71 | T& operator* () const { return *p; } // ? direct, simple and stupid ? 72 | }; 73 | 74 | } // namespace terark 75 | 76 | namespace std { 77 | template 78 | void swap(terark::DeepCopyPtr& x, terark::DeepCopyPtr& y) { x.swap(y); } 79 | } 80 | -------------------------------------------------------------------------------- /gtests/utils.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #define TIME_START(VAR_) VAR_ = std::chrono::high_resolution_clock::now() 8 | #define TIME_END(VAR_) VAR_ = std::chrono::high_resolution_clock::now() 9 | 10 | #define PRINT_TIME(START, END) \ 11 | auto duration__ = END - START; \ 12 | auto ms__ = \ 13 | std::chrono::duration_cast(duration__) \ 14 | .count(); \ 15 | std::cout << name " took " << ms__ << " ms" << std::endl 16 | 17 | namespace terark { 18 | 19 | inline bool file_exist(const char *fname) { 20 | int fd = ::open(fname, O_RDONLY); 21 | return fd > 0; 22 | } 23 | 24 | /** 25 | * Get line from ifstream by last line breaker delim and emit the last delim. 26 | * e.g. 27 | * get line from : [abcd\n\nefgh] will have [abcd\n] and leave [efgh] in the 28 | * stream. 29 | * 30 | */ 31 | // inline int getline(ifstream& ifile, std::string line) { 32 | // TODO 33 | // return 1; 34 | //} 35 | 36 | template 37 | int parse_lines(const char *fname, const F &&callback) { 38 | size_t cnt = 0; 39 | std::ifstream infile(fname); 40 | if (infile.fail()) { 41 | std::cout << "source file doesn't exist, exit ! file name = " 42 | << std::string(fname) << std::endl; 43 | exit(0); 44 | } 45 | std::string line; 46 | while (std::getline(infile, line)) { 47 | callback(line); 48 | ++cnt; 49 | } 50 | std::cout << "file parse finished, total rows = " << cnt 51 | << ", file name = " << fname << std::endl; 52 | return cnt; 53 | } 54 | 55 | } // namespace terark -------------------------------------------------------------------------------- /src/terark/io/readv_writev.cpp: -------------------------------------------------------------------------------- 1 | #include "readv_writev.hpp" 2 | 3 | namespace terark { 4 | 5 | // once finished, corresponding iovec will be cleared 6 | TERARK_DLL_EXPORT 7 | ssize_t easy_writev(int fd, iovec* iov, int num, int* next_idx) { 8 | int idx = *next_idx; 9 | ssize_t finished = writev(fd, iov + idx, num - idx); 10 | if (finished > 0) { 11 | ssize_t concat_beg = 0; 12 | for (; idx < num; ++idx) { 13 | ssize_t concat_end = concat_beg + iov[idx].iov_len; 14 | if (finished < concat_end) { 15 | ssize_t offset = finished - concat_beg; 16 | iov[idx].iov_base = offset + (char*)iov[idx].iov_base; 17 | iov[idx].iov_len -= offset; 18 | *next_idx = idx; 19 | break; 20 | } 21 | else { 22 | iov[idx].iov_base = nullptr; 23 | iov[idx].iov_len = 0; 24 | } 25 | concat_beg = concat_end; 26 | } 27 | } 28 | return finished; 29 | } 30 | 31 | // once finished, corresponding iovec will be cleared 32 | TERARK_DLL_EXPORT 33 | ssize_t easy_readv(int fd, iovec* iov, int num, int* next_idx) { 34 | int idx = *next_idx; 35 | ssize_t finished = readv(fd, iov + idx, num - idx); 36 | if (finished > 0) { 37 | ssize_t concat_beg = 0; 38 | for (; idx < num; ++idx) { 39 | ssize_t concat_end = concat_beg + iov[idx].iov_len; 40 | if (finished < concat_end) { 41 | ssize_t offset = finished - concat_beg; 42 | iov[idx].iov_base = offset + (char*)iov[idx].iov_base; 43 | iov[idx].iov_len -= offset; 44 | *next_idx = idx; 45 | break; 46 | } 47 | else { 48 | iov[idx].iov_base = nullptr; 49 | iov[idx].iov_len = 0; 50 | } 51 | concat_beg = concat_end; 52 | } 53 | } 54 | return finished; 55 | } 56 | 57 | } // namespace terark 58 | -------------------------------------------------------------------------------- /src/terark/succinct/rank_select_inline_slow.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * rank_select_inline.hpp 3 | * 4 | * Created on: Sep 1, 2015 5 | * Author: leipeng 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace terark { 13 | 14 | ///@param r rank range is [0, 64), more exctly: [0, popcnt(x)) 15 | ///@returns [0, popcnt(x)), the bitpos of r'th 1 16 | inline unsigned UintSelect1(uint64_t x, size_t r) { 17 | assert(0 != x); 18 | #if defined(NDEBUG) 19 | /* 20 | if (terark_unlikely(r >= (unsigned)fast_popcount(x))) { 21 | fprintf(stderr 22 | , "%s:%d: assert(r < popcnt(x)) fail: r=%u, popcnt(x)=%d" 23 | , __FILE__, __LINE__ 24 | , r, (int)fast_popcount(x)); 25 | abort(); 26 | } 27 | */ 28 | #else 29 | unsigned nPopCnt = (unsigned)fast_popcount(x); 30 | assert(r < nPopCnt); 31 | #endif 32 | 33 | unsigned s, t; 34 | uint64_t a, b, c, d; 35 | 36 | a = x - ((x >> 1) & 0x5555555555555555); 37 | b = (a & 0x3333333333333333) + ((a >> 2) & 0x3333333333333333); 38 | c = (b + (b >> 4)) & 0x0F0F0F0F0F0F0F0F; 39 | d = (c + (c >> 8)) & 0x00FF00FF00FF00FF; 40 | 41 | s = 0; 42 | t = ((d >> 16) + d) & 255; // popcnt(lo32) 43 | if (r >= t) {s += 32; r -= t;} 44 | 45 | t = (d >> s) & 0xFF; 46 | if (r >= t) {s += 16; r -= t;} 47 | 48 | t = (c >> s) & 0xF; 49 | if (r >= t) {s += 8; r -= t;} 50 | 51 | t = (b >> s) & 0x7; 52 | if (r >= t) {s += 4; r -= t;} 53 | 54 | t = (a >> s) & 0x3; 55 | if (r >= t) {s += 2; r -= t;} 56 | 57 | t = (x >> s) & 0x1; 58 | if (r >= t) s++; 59 | 60 | return s; 61 | } 62 | 63 | // 'k' may be 0 64 | #define TERARK_GET_BITS_64(u64,k,width) ( k ? (u64 >> (k-1)*width) & ((1<end_i()); 18 | return owner->elem_at(index); 19 | } 20 | pointer operator->() const { 21 | assert(NULL != owner); 22 | assert(index < owner->end_i()); 23 | return &owner->elem_at(index); 24 | } 25 | ClassIterator& operator++() { 26 | assert(index < owner->end_i()); 27 | index = owner->next_i(index); 28 | return *this; 29 | } 30 | ClassIterator& operator--() { 31 | assert(index <= owner->end_i()); 32 | assert(index > 0); 33 | index = owner->prev_i(index); 34 | return *this; 35 | } 36 | ClassIterator operator++(int) { 37 | assert(index < owner->end_i()); 38 | size_t oldindex = index; 39 | index = owner->next_i(index); 40 | return ClassIterator(owner, oldindex); 41 | } 42 | ClassIterator operator--(int) { 43 | assert(index <= owner->end_i()); 44 | assert(index > 0); 45 | size_t oldindex = index; 46 | index = owner->prev_i(index); 47 | return ClassIterator(owner, oldindex); 48 | } 49 | size_t get_index() const { return index; } 50 | OwnerPtr get_owner() const { return owner; } 51 | 52 | friend bool operator==(ClassIterator x, ClassIterator y) { 53 | assert(x.owner == y.owner); 54 | return x.index == y.index; 55 | } 56 | friend bool operator!=(ClassIterator x, ClassIterator y) { 57 | assert(x.owner == y.owner); 58 | return x.index != y.index; 59 | } 60 | 61 | #undef ClassIterator 62 | 63 | -------------------------------------------------------------------------------- /src/terark/util/strbuilder.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | #if defined(__GLIBC__) || defined(__CYGWIN__) || \ 9 | defined(__DARWIN_C_LEVEL) && defined(__DARWIN_C_FULL) && __DARWIN_C_LEVEL >= __DARWIN_C_FULL 10 | // This class is more simple to use, but it should be used for one-time printf 11 | // This class is about 50% faster than StrBuilder on one-time printf 12 | class StrPrintf { 13 | StrPrintf(const StrPrintf&); 14 | StrPrintf& operator=(const StrPrintf&); 15 | public: 16 | char* s; // for easy access 17 | int n; 18 | StrPrintf(const char* format, ...); 19 | StrPrintf(std::string& dest, const char* format, ...); 20 | ~StrPrintf(); 21 | operator std::string() const; 22 | }; 23 | #else 24 | #pragma message("StrPrintf is skiped because not in glibc") 25 | #endif 26 | 27 | #if defined(__GNUC__) || defined(__CYGWIN__) 28 | // This class should be used for multiple-time append by printf 29 | // This class is about 30% faster than StrPrintf on building big strings 30 | // This class is about 50% slower than std::ostingstream on building big strings 31 | class StrBuilder { 32 | StrBuilder(const StrBuilder&); 33 | StrBuilder& operator=(const StrBuilder&); 34 | FILE* memFile; 35 | char* s; 36 | size_t n; 37 | public: 38 | ~StrBuilder(); 39 | StrBuilder(); 40 | StrBuilder& printf(const char* format, ...); 41 | void clear(); 42 | StrBuilder& flush(); 43 | size_t size() const { return n; } 44 | int ilen() const { return (int)n; } 45 | const char* c_str(); 46 | operator std::string() const; 47 | void setEof(int end_offset); // assert(end_offset < 0) 48 | void setEof(int end_offset, const char* endmark); // assert(end_offset < 0) 49 | }; 50 | #else 51 | #pragma message("strbuilder skiped because not in glibc") 52 | #endif 53 | 54 | } // namespace terark 55 | -------------------------------------------------------------------------------- /tools/zbs/zbs_stat.cpp: -------------------------------------------------------------------------------- 1 | #ifdef _MSC_VER 2 | #define _CRT_NONSTDC_NO_WARNINGS 3 | #define _CRT_SECURE_NO_WARNINGS 4 | #define _SCL_SECURE_NO_WARNINGS 5 | #endif 6 | 7 | #include 8 | #include 9 | 10 | using namespace terark; 11 | 12 | void usage(const char* prog) { 13 | fprintf(stderr, "Usage: %s Options Input-BlobStore-File\n" 14 | "Synopsis:\n" 15 | "Options:\n" 16 | " -h Show this help information\n" 17 | , prog); 18 | exit(1); 19 | } 20 | 21 | int main(int argc, char* argv[]) { 22 | for (;;) { 23 | int opt = getopt(argc, argv, "h"); 24 | switch (opt) { 25 | case -1: 26 | goto GetoptDone; 27 | case '?': 28 | case 'h': 29 | default: 30 | usage(argv[0]); 31 | } 32 | } 33 | GetoptDone: 34 | if (optind >= argc) { 35 | fprintf(stderr, "Missing input BlobStore file\n"); 36 | usage(argv[0]); 37 | } 38 | const char* fname = argv[optind]; 39 | const bool mmapPopulate = false; 40 | #ifdef NDEBUG 41 | std::unique_ptr ds; 42 | try { ds.reset(AbstractBlobStore::load_from_mmap(fname, mmapPopulate)); } 43 | catch (const std::exception&) { return 3; } 44 | #else 45 | std::unique_ptr ds(AbstractBlobStore::load_from_mmap(fname, mmapPopulate)); 46 | #endif 47 | long long num = ds->num_records(); 48 | long long unzip = ds->total_data_size(); 49 | long long ziped = ds->mem_size(); 50 | long long dict = ds->get_dict().memory.size(); 51 | fprintf(stderr, "record num: %11lld\n", num); 52 | fprintf(stderr, "unzip size: %11lld, avg: %8.3f\n", unzip, 1.0*unzip/num); 53 | fprintf(stderr, "ziped size: %11lld, avg: %8.3f\n", ziped, 1.0*ziped/num); 54 | fprintf(stderr, "unzip / zip: %11.7f\n", 1.0*unzip / ziped); 55 | fprintf(stderr, "zip / unzip: %11.7f\n", 1.0*ziped / unzip); 56 | fprintf(stderr, "dict size: %11lld\n", dict); 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/terark/util/tmpfile.cpp: -------------------------------------------------------------------------------- 1 | // created by leipeng at 2020-01-09 10:32 2 | 3 | #include "tmpfile.hpp" 4 | #if _MSC_VER 5 | #include 6 | #endif 7 | 8 | namespace terark { 9 | 10 | TempFileDeleteOnClose::~TempFileDeleteOnClose() { 11 | if (fp) 12 | this->close(); 13 | } 14 | 15 | void TempFileDeleteOnClose::open_temp() { 16 | if (!fstring(path).endsWith("XXXXXX")) { 17 | THROW_STD(invalid_argument, 18 | "ERROR: path = \"%s\", must ends with \"XXXXXX\"", path.c_str()); 19 | } 20 | #if _MSC_VER 21 | if (int err = _mktemp_s(&path[0], path.size() + 1)) { 22 | THROW_STD(invalid_argument, "ERROR: _mktemp_s(%s) = %s" 23 | , path.c_str(), strerror(err)); 24 | } 25 | this->open(); 26 | #else 27 | int fd = mkstemp(&path[0]); 28 | if (fd < 0) { 29 | int err = errno; 30 | THROW_STD(invalid_argument, "ERROR: mkstemp(%s) = %s", path.c_str(), strerror(err)); 31 | } 32 | this->dopen(fd); 33 | #endif 34 | } 35 | 36 | void TempFileDeleteOnClose::open(){ 37 | fp.open(path.c_str(), "wb+"); 38 | fp.disbuf(); 39 | writer.attach(&fp); 40 | } 41 | 42 | void TempFileDeleteOnClose::dopen(int fd) { 43 | fp.dopen(fd, "wb+"); 44 | fp.disbuf(); 45 | writer.attach(&fp); 46 | } 47 | 48 | void TempFileDeleteOnClose::close() { 49 | assert(nullptr != fp); 50 | writer.resetbuf(); 51 | fp.close(); 52 | ::remove(path.c_str()); 53 | } 54 | 55 | void TempFileDeleteOnClose::complete_write() { 56 | writer.flush_buffer(); 57 | fp.rewind(); 58 | } 59 | 60 | void AutoDeleteFile::Delete() { 61 | if (!fpath.empty()) { 62 | ::remove(fpath.c_str()); 63 | fpath.clear(); 64 | } 65 | } 66 | AutoDeleteFile::~AutoDeleteFile(){ 67 | if (!fpath.empty()) { 68 | ::remove(fpath.c_str()); 69 | } 70 | } 71 | 72 | } // namespace terark 73 | -------------------------------------------------------------------------------- /src/terark/io/DataInputIterator.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #pragma once 3 | 4 | #include "DataInput.hpp" 5 | 6 | namespace terark { 7 | 8 | template 9 | LittleEndianDataInput LittleEndianDataInputer(StreamClass* stream) 10 | { 11 | return LittleEndianDataInput(stream); 12 | } 13 | 14 | template 15 | PortableDataInput PortableDataInputer(StreamClass* stream) 16 | { 17 | return PortableDataInput(stream); 18 | } 19 | ////////////////////////////////////////////////////////////////////////// 20 | 21 | template 22 | class DataInputIterator : 23 | public boost::input_iterator_helper, T> 24 | { 25 | DataInput m_input; 26 | size_t m_count; 27 | 28 | public: 29 | //! 序列的 count 已知,构造这个序列 iterator 30 | DataInputIterator(DataInput input, size_t count) 31 | : m_input(input), m_count(count) 32 | { 33 | assert(m_count > 0); 34 | } 35 | 36 | //! 序列的 count 还在 stream 中,构造时读取它(var_uint32_t 的 count) 37 | DataInputIterator(DataInput input) 38 | : m_input(input) 39 | { 40 | var_uint32_t x; input >> x; 41 | m_count = x.t; 42 | } 43 | 44 | DataInputIterator() 45 | : m_count(0) {} 46 | 47 | //! 读取之后立即往前走,所以,同一个位置只能读取一次 48 | T operator*() 49 | { 50 | assert(m_count > 0); 51 | --m_count; 52 | 53 | T x; m_input >> x; 54 | return x; 55 | } 56 | 57 | //! 无操作 58 | DataInputIterator& operator++() 59 | { 60 | assert(m_count >= 0); 61 | return *this; 62 | } 63 | 64 | bool operator==(const DataInputIterator& r) const 65 | { 66 | return r.m_count == this->m_count; 67 | } 68 | 69 | bool is_end() const { return 0 == m_count; } 70 | 71 | size_t count() const { return m_count; } 72 | }; 73 | 74 | ////////////////////////////////////////////////////////////////////////// 75 | 76 | } 77 | 78 | #endif // __terark_io_DataInputIterator_h__ 79 | 80 | -------------------------------------------------------------------------------- /tests/tries/test_dict_order_gen.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #define _SCL_SECURE_NO_WARNINGS 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace terark; 11 | 12 | int main(int argc, char* argv[]) { 13 | const char* fname = "fab-data.txt"; 14 | if (argc >= 2) { 15 | fname = argv[1]; 16 | } 17 | Auto_fclose fp(fopen(fname, "r")); 18 | if (!fp) { 19 | fprintf(stderr, "fopen(%s) = %s\n", fname, strerror(errno)); 20 | return 1; 21 | } 22 | hash_strmap<> strSet; 23 | LineBuf line; 24 | size_t lineno = 0; 25 | printf("reading file %s ...\n", fname); 26 | while (line.getline(fp) > 0) { 27 | line.trim(); 28 | strSet.insert_i(line); 29 | lineno++; 30 | if (lineno % TERARK_IF_DEBUG(1000, 10000) == 0) { 31 | printf("lineno=%zd\n", lineno); 32 | } 33 | } 34 | printf("done, lines=%zd...\n", lineno); 35 | 36 | printf("strSet.sort_slow()...\n"); 37 | strSet.sort_slow(); 38 | if (strSet.size() > 0) 39 | printf("strSet.key(0).size() = %zd\n", strSet.key(0).size()); 40 | 41 | SortableStrVec strVec; 42 | for (size_t i = 0; i < strSet.size(); ++i) { 43 | strVec.push_back(strSet.key(i)); 44 | } 45 | NestLoudsTrieConfig conf; 46 | NestLoudsTrieDAWG_SE_512 trie; 47 | trie.build_from(strVec, conf); 48 | valvec trieKey; 49 | NonRecursiveDictionaryOrderToStateMapGenerator gen; 50 | gen(trie, [&](size_t byteLexNth, size_t state) { 51 | size_t trieIdx = trie.state_to_word_id(state); 52 | trie.nth_word(trieIdx, &trieKey); 53 | fstring hashKey = strSet.key(byteLexNth); 54 | TERARK_RT_assert(hashKey == trieKey, std::logic_error); 55 | // printf("%zd %zd\n", byteLexNth, trieIdx); 56 | // printf("%s\n", hashKey.c_str()); 57 | }); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /gtests/tools/tries/test_dict_order_gen.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #define _SCL_SECURE_NO_WARNINGS 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace terark; 11 | 12 | int main(int argc, char* argv[]) { 13 | const char* fname = "fab-data.txt"; 14 | if (argc >= 2) { 15 | fname = argv[1]; 16 | } 17 | Auto_fclose fp(fopen(fname, "r")); 18 | if (!fp) { 19 | fprintf(stderr, "fopen(%s) = %s\n", fname, strerror(errno)); 20 | return 1; 21 | } 22 | hash_strmap<> strSet; 23 | LineBuf line; 24 | size_t lineno = 0; 25 | printf("reading file %s ...\n", fname); 26 | while (line.getline(fp) > 0) { 27 | line.trim(); 28 | strSet.insert_i(line); 29 | lineno++; 30 | if (lineno % TERARK_IF_DEBUG(1000, 10000) == 0) { 31 | printf("lineno=%zd\n", lineno); 32 | } 33 | } 34 | printf("done, lines=%zd...\n", lineno); 35 | 36 | printf("strSet.sort_slow()...\n"); 37 | strSet.sort_slow(); 38 | if (strSet.size() > 0) 39 | printf("strSet.key(0).size() = %zd\n", strSet.key(0).size()); 40 | 41 | SortableStrVec strVec; 42 | for (size_t i = 0; i < strSet.size(); ++i) { 43 | strVec.push_back(strSet.key(i)); 44 | } 45 | NestLoudsTrieConfig conf; 46 | NestLoudsTrieDAWG_SE_512 trie; 47 | trie.build_from(strVec, conf); 48 | valvec trieKey; 49 | NonRecursiveDictionaryOrderToStateMapGenerator gen; 50 | gen(trie, [&](size_t byteLexNth, size_t state) { 51 | size_t trieIdx = trie.state_to_word_id(state); 52 | trie.nth_word(trieIdx, &trieKey); 53 | fstring hashKey = strSet.key(byteLexNth); 54 | TERARK_RT_assert(hashKey == trieKey, std::logic_error); 55 | // printf("%zd %zd\n", byteLexNth, trieIdx); 56 | // printf("%s\n", hashKey.c_str()); 57 | }); 58 | return 0; 59 | } 60 | 61 | -------------------------------------------------------------------------------- /src/terark/fsa/dfa_mmap_header.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "fsa.hpp" 4 | 5 | namespace terark { 6 | 7 | struct DFA_BlockDataEntry { 8 | uint64_t offset; 9 | uint64_t length; 10 | uint64_t endpos() const { return offset + length; } 11 | }; 12 | 13 | struct DFA_MmapHeaderBase { 14 | enum { MAX_BLOCK_NUM = 12 }; 15 | enum { current_version = 1 }; 16 | uint8_t magic_len; // 13 17 | char magic[19]; // nark-dfa-mmap 18 | char dfa_class_name[60]; 19 | 20 | byte_t is_dag; 21 | byte_t num_blocks; 22 | uint16_t kv_delim; 23 | uint32_t header_size; // == sizeof(DFA_MmapHeader) 24 | uint32_t version; 25 | uint32_t state_size; 26 | 27 | uint64_t file_size; 28 | uint64_t total_states; 29 | uint64_t zpath_states; 30 | uint64_t numFreeStates; 31 | uint64_t firstFreeState; 32 | uint64_t transition_num; 33 | uint64_t dawg_num_words; 34 | 35 | byte_t ac_word_ext; // 0: no length; 1: length; 2: length+content 36 | byte_t is_nlt_dawg_strpool; // for NestLoudsTrieBlobStore 37 | byte_t louds_dfa_cross_dst_uintbits; 38 | byte_t crc32cLevel; // 0: no crc; 1: header; 2: file 39 | uint32_t file_crc32; 40 | uint64_t zpath_length; 41 | byte_t louds_dfa_cache_ptrbit; 42 | byte_t padding2; 43 | uint16_t louds_dfa_num_zpath_trie; 44 | uint32_t louds_dfa_cache_states; 45 | uint64_t gnode_states; 46 | uint32_t atom_dfa_num; 47 | uint32_t dfa_cluster_num; 48 | uint32_t louds_dfa_min_zpath_id; 49 | uint32_t louds_dfa_min_cross_dst; 50 | uint64_t adfa_total_words_len; // for Acyclic DFA, U64_MAX for Cyclic DFA 51 | uint64_t reserve1[10]; 52 | 53 | DFA_BlockDataEntry blocks[MAX_BLOCK_NUM]; 54 | }; 55 | struct DFA_MmapHeader : DFA_MmapHeaderBase { 56 | char reserved[1020-sizeof(DFA_MmapHeaderBase)]; 57 | uint32_t header_crc32; 58 | }; 59 | BOOST_STATIC_ASSERT(sizeof(DFA_MmapHeader) == 1024); 60 | 61 | typedef DFA_MmapHeader DFA_Stat; 62 | 63 | } // namespace terark 64 | -------------------------------------------------------------------------------- /src/terark/util/profiling.cpp: -------------------------------------------------------------------------------- 1 | #include "../config.hpp" 2 | #include "profiling.hpp" 3 | #include 4 | #include 5 | #include 6 | #if defined(_MSC_VER) 7 | # define NOMINMAX 8 | # define WIN32_LEAN_AND_MEAN 9 | # include 10 | #else 11 | # include 12 | # include 13 | #endif 14 | 15 | namespace terark { 16 | 17 | profiling::profiling() 18 | { 19 | #if defined(_MSC_VER) 20 | LARGE_INTEGER li; 21 | QueryPerformanceFrequency(&li); 22 | m_freq = li.QuadPart; 23 | #endif 24 | } 25 | 26 | long long profiling::now() const 27 | { 28 | #if defined(_MSC_VER) 29 | LARGE_INTEGER li; 30 | QueryPerformanceCounter(&li); 31 | return li.QuadPart; 32 | #elif defined(CLOCK_MONOTONIC) || \ 33 | defined(CLOCK_THREAD_CPUTIME_ID) || \ 34 | defined(CLOCK_PROCESS_CPUTIME_ID) || \ 35 | defined(CLOCK_REALTIME) 36 | struct timespec ts; 37 | #define USE_CLOCK(clock) int ret = clock_gettime(clock, &ts) 38 | #if 0 39 | #elif defined(CLOCK_MONOTONIC) 40 | USE_CLOCK(CLOCK_MONOTONIC); 41 | #elif defined(CLOCK_THREAD_CPUTIME_ID) 42 | USE_CLOCK(CLOCK_THREAD_CPUTIME_ID); 43 | #elif defined(CLOCK_PROCESS_CPUTIME_ID) 44 | USE_CLOCK(CLOCK_PROCESS_CPUTIME_ID); 45 | #else 46 | USE_CLOCK(CLOCK_REALTIME); 47 | #endif 48 | if (ret != 0) { 49 | perror("profiling::now.clock_gettime"); 50 | abort(); 51 | } 52 | return (long long)ts.tv_sec * 1000000000 + ts.tv_nsec; 53 | #else 54 | struct timeval tv; 55 | int ret = gettimeofday(&tv, NULL); 56 | if (ret != 0) { 57 | perror("profiling::now.gettimeofday"); 58 | abort(); 59 | } 60 | return (long long)tv.tv_sec * 1000000000 + tv.tv_usec * 1000; 61 | #endif 62 | } 63 | 64 | #if defined(CLOCK_MONOTONIC_RAW) || defined(CLOCK_MONOTONIC) 65 | #else 66 | profiling& qtime::pf() noexcept { 67 | static profiling instance; 68 | return instance; 69 | } 70 | #endif 71 | 72 | } // namespace terark 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/terark/io/IOException.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #pragma once 3 | 4 | #include 5 | #include 6 | 7 | namespace terark { 8 | 9 | #if defined(_MSC_VER) 10 | // non dll-interface class 'std::exception' used as base for dll-interface 11 | #pragma warning(push) 12 | #pragma warning(disable:4275) 13 | #endif 14 | class TERARK_DLL_EXPORT IOException : public std::exception 15 | { 16 | protected: 17 | std::string m_message; 18 | int m_errCode; 19 | public: 20 | explicit IOException(fstring msg); 21 | explicit IOException(int errCode, fstring szMsg); 22 | virtual ~IOException() override; 23 | 24 | const char* what() const noexcept override; 25 | int errCode() const throw() { return m_errCode; } 26 | 27 | static int lastError(); 28 | static std::string errorText(int errCode); 29 | }; 30 | #if defined(_MSC_VER) 31 | #pragma warning(pop) 32 | #endif 33 | 34 | class TERARK_DLL_EXPORT OpenFileException : public IOException 35 | { 36 | std::string m_path; 37 | public: 38 | using IOException::IOException; 39 | explicit OpenFileException(fstring path, fstring szMsg); 40 | ~OpenFileException() override; 41 | }; 42 | 43 | // blocked streams read 0 bytes will cause this exception 44 | // other streams read not enough maybe cause this exception 45 | // all streams read 0 bytes will cause this exception 46 | class TERARK_DLL_EXPORT EndOfFileException : public IOException 47 | { 48 | public: 49 | using IOException::IOException; 50 | }; 51 | 52 | class TERARK_DLL_EXPORT OutOfSpaceException : public IOException 53 | { 54 | public: 55 | using IOException::IOException; 56 | }; 57 | 58 | class TERARK_DLL_EXPORT DelayWriteException : public IOException 59 | { 60 | public: 61 | using IOException::IOException; 62 | // size_t streamPosition; 63 | }; 64 | 65 | class TERARK_DLL_EXPORT BrokenPipeException : public IOException 66 | { 67 | public: 68 | using IOException::IOException; 69 | }; 70 | 71 | 72 | } // namespace terark 73 | -------------------------------------------------------------------------------- /src/terark/io/DataOutput_BigEndian.hpp: -------------------------------------------------------------------------------- 1 | public: 2 | 3 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(short) 4 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(unsigned short) 5 | 6 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(int) 7 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(unsigned int) 8 | 9 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(long) 10 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(unsigned long) 11 | 12 | #if defined(BOOST_HAS_LONG_LONG) 13 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(long long) 14 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(unsigned long long) 15 | #elif defined(BOOST_HAS_MS_INT64) 16 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(__int64) 17 | DATA_IO_GEN_BIG_ENDIAN_INT_OUTPUT(unsigned __int64) 18 | #endif 19 | 20 | MyType& save(const wchar_t* s, size_t n) 21 | { 22 | #ifdef BOOST_ENDIAN_BIG_BYTE 23 | this->ensureWrite(s, sizeof(wchar_t)*n); 24 | #else 25 | std::vector tempv(s, s + n); 26 | byte_swap(&*tempv.begin(), n); 27 | this->ensureWrite(&*tempv.begin(), sizeof(wchar_t)*n); 28 | #endif 29 | return *this; 30 | } 31 | 32 | #ifndef BOOST_NO_INTRINSIC_WCHAR_T 33 | MyType& operator<<(wchar_t x) 34 | { 35 | #ifdef BOOST_ENDIAN_LITTLE_BYTE 36 | x = byte_swap(x); 37 | #endif 38 | this->ensureWrite(&x, sizeof(x)); 39 | return *this; 40 | } 41 | #endif 42 | 43 | template MyType& operator<<(const T& x) 44 | { 45 | DataIO_save_elem(*this, x, DATA_IO_BSWAP_FOR_BIG(T)()); 46 | return *this; 47 | } 48 | 49 | template 50 | MyType& operator<<(const T (&x)[Dim]) 51 | { 52 | DataIO_save_array(*this, x, Dim, DATA_IO_BSWAP_FOR_BIG(T)()); 53 | return *this; 54 | } 55 | 56 | template 57 | MyType& operator<<(const valvec& x) 58 | { 59 | DataIO_save_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_BIG(T)()); 60 | return *this; 61 | } 62 | 63 | template 64 | MyType& operator<<(const std::vector& x) 65 | { 66 | DataIO_save_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_BIG(T)()); 67 | return *this; 68 | } 69 | 70 | -------------------------------------------------------------------------------- /scripts/build_makefile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # NO_ASAN=1 ./build.sh 4 | # 5 | set -e 6 | 7 | BASE_DIR=$PWD 8 | BOOST_INC= 9 | BOOST_LIB_DIR= 10 | 11 | if [ `uname` == Darwin ]; then 12 | cpuNum=`sysctl -n machdep.cpu.thread_count` 13 | else 14 | cpuNum=`nproc` 15 | fi 16 | 17 | echo Current BUILD_BRANCH = $BUILD_BRANCH 18 | echo Current BUILD_REPO_BRANCH = $BUILD_REPO_BRANCH 19 | 20 | if test -n "$BUILD_BRANCH"; then 21 | git checkout "$BUILD_BRANCH" 22 | fi 23 | 24 | git submodule update --init 25 | 26 | # build boost 27 | if [ -z "$BOOST_LIB_DIR" ];then 28 | echo "build from submodule if BOOST_LIB_DIR is not set" 29 | # TODO 30 | BOOST_INC=$BASE_DIR/boost-include 31 | BOOST_LIB_DIR=$BOOST_INC/stage/lib 32 | else 33 | echo "use prebuild boost" 34 | # TODO 35 | fi 36 | 37 | # build core 38 | BRANCH_NAME=`git rev-parse --abbrev-ref HEAD` 39 | echo Current BRANCH_NAME = $BRANCH_NAME 40 | 41 | if test -n "$BUILD_BRANCH"; then 42 | # this script is run in SCM auto build 43 | sudo apt-get update 44 | sudo apt-get install libaio-dev 45 | else 46 | echo you must ensure libaio-dev have been installed 47 | fi 48 | 49 | rm -rf pkg 50 | 51 | if [ "$NO_ASAN" ];then 52 | echo "build without ASAN" 53 | make pkg -j $cpuNum PKG_WITH_STATIC=1 PKG_WITH_DBG=1 DBG_ASAN='' AFR_ASAN='' 54 | else 55 | echo "build with ASAN" 56 | make pkg -j $cpuNum PKG_WITH_STATIC=1 PKG_WITH_DBG=1 57 | fi 58 | 59 | # move all binaries to output/ dir for next CICD steps 60 | WITH_BMI2=`./cpu_has_bmi2.sh` 61 | SYSTEM=`uname -m -s | sed 's:[ /]:-:g'` 62 | tmpfile=`mktemp compiler-XXXXXX` 63 | COMPILER=`${CXX:-gcc} tools/configure/compiler.cpp -o $tmpfile.exe && ./$tmpfile.exe && rm -f $tmpfile*` 64 | PLATFORM_DIR=$SYSTEM-$COMPILER-bmi2-$WITH_BMI2 65 | 66 | echo $PLATFORM_DIR && rm -rf output && mkdir output 67 | 68 | if [ `uname` == Darwin ]; then 69 | cp -r pkg/terark-fsa_all-$PLATFORM_DIR/* output 70 | else 71 | cp -lrP pkg/terark-fsa_all-$PLATFORM_DIR/* output 72 | fi 73 | -------------------------------------------------------------------------------- /src/terark/util/base64.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "libbase64.h" 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | static const int BASE64_FLAG = 9 | //0; 10 | //BASE64_FORCE_AVX2; 11 | //BASE64_FORCE_SSE42; 12 | BASE64_FORCE_PLAIN; 13 | 14 | template 15 | typename boost::enable_if_c::type 16 | base64_encode(const ByteType2* src, size_t srclen, valvec* out) { 17 | size_t outlen = srclen * 4 / 3 + 32; 18 | out->resize_no_init(outlen); 19 | trk_base64_encode(reinterpret_cast(src), srclen, 20 | reinterpret_cast(out->data()), &outlen, BASE64_FLAG); 21 | assert(outlen <= out->size()); 22 | out->risk_set_size(outlen); 23 | out->grow_capacity(1)[0] = '\0'; 24 | } 25 | 26 | template 27 | typename boost::enable_if_c::type 28 | base64_encode(fstring src, valvec* out) { 29 | base64_encode(src.data(), src.size(), out); 30 | } 31 | 32 | valvec base64_encode(fstring src) { 33 | valvec out; 34 | base64_encode(src.data(), src.size(), &out); 35 | return out; 36 | } 37 | 38 | template 39 | typename boost::enable_if_c::type 40 | base64_decode(const ByteType2* src, size_t srclen, valvec* out) { 41 | size_t outlen = srclen * 3 / 4 + 32; 42 | out->resize_no_init(outlen); 43 | trk_base64_decode(reinterpret_cast(src), srclen, 44 | reinterpret_cast(out->data()), &outlen, BASE64_FLAG); 45 | assert(outlen <= out->size()); 46 | out->risk_set_size(outlen); 47 | out->grow_capacity(1)[0] = '\0'; 48 | } 49 | 50 | template 51 | typename boost::enable_if_c::type 52 | base64_decode(fstring src, valvec* out) { 53 | base64_decode(src.data(), src.size(), out); 54 | } 55 | 56 | 57 | } // namespace terark 58 | 59 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/generic/dec_tail.c: -------------------------------------------------------------------------------- 1 | if (srclen-- == 0) { 2 | ret = 1; 3 | break; 4 | } 5 | if ((q = base64_table_dec[*c++]) >= 254) { 6 | st.eof = BASE64_EOF; 7 | // Treat character '=' as invalid for byte 0: 8 | break; 9 | } 10 | st.carry = q << 2; 11 | st.bytes++; 12 | 13 | case 1: if (srclen-- == 0) { 14 | ret = 1; 15 | break; 16 | } 17 | if ((q = base64_table_dec[*c++]) >= 254) { 18 | st.eof = BASE64_EOF; 19 | // Treat character '=' as invalid for byte 1: 20 | break; 21 | } 22 | *o++ = st.carry | (q >> 4); 23 | st.carry = q << 4; 24 | st.bytes++; 25 | outl++; 26 | 27 | case 2: if (srclen-- == 0) { 28 | ret = 1; 29 | break; 30 | } 31 | if ((q = base64_table_dec[*c++]) >= 254) { 32 | st.bytes++; 33 | // When q == 254, the input char is '='. 34 | // Check if next byte is also '=': 35 | if (q == 254) { 36 | if (srclen-- != 0) { 37 | st.bytes = 0; 38 | // EOF: 39 | st.eof = BASE64_EOF; 40 | q = base64_table_dec[*c++]; 41 | ret = ((q == 254) && (srclen == 0)) ? 1 : 0; 42 | break; 43 | } 44 | else { 45 | // Almost EOF 46 | st.eof = BASE64_AEOF; 47 | ret = 1; 48 | break; 49 | } 50 | } 51 | // If we get here, there was an error: 52 | break; 53 | } 54 | *o++ = st.carry | (q >> 2); 55 | st.carry = q << 6; 56 | st.bytes++; 57 | outl++; 58 | 59 | case 3: if (srclen-- == 0) { 60 | ret = 1; 61 | break; 62 | } 63 | if ((q = base64_table_dec[*c++]) >= 254) { 64 | st.bytes = 0; 65 | st.eof = BASE64_EOF; 66 | // When q == 254, the input char is '='. Return 1 and EOF. 67 | // When q == 255, the input char is invalid. Return 0 and EOF. 68 | ret = ((q == 254) && (srclen == 0)) ? 1 : 0; 69 | break; 70 | } 71 | *o++ = st.carry | q; 72 | st.carry = 0; 73 | st.bytes = 0; 74 | outl++; 75 | } 76 | } 77 | state->eof = st.eof; 78 | state->bytes = st.bytes; 79 | state->carry = st.carry; 80 | *outlen = outl; 81 | return ret; 82 | -------------------------------------------------------------------------------- /src/terark/io/DataOutput_LittleEndian.hpp: -------------------------------------------------------------------------------- 1 | public: 2 | 3 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(short) 4 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(unsigned short) 5 | 6 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(int) 7 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(unsigned int) 8 | 9 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(long) 10 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(unsigned long) 11 | 12 | #if defined(BOOST_HAS_LONG_LONG) 13 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(long long) 14 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(unsigned long long) 15 | #elif defined(BOOST_HAS_MS_INT64) 16 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(__int64) 17 | DATA_IO_GEN_LITTLE_ENDIAN_INT_OUTPUT(unsigned __int64) 18 | #endif 19 | 20 | MyType& save(const wchar_t* s, size_t n) 21 | { 22 | #ifdef BOOST_ENDIAN_LITTLE_BYTE 23 | this->ensureWrite(s, sizeof(wchar_t)*n); 24 | #else 25 | std::vector tempv(s, s + n); 26 | byte_swap(&*tempv.begin(), n); 27 | this->ensureWrite(&*tempv.begin(), sizeof(wchar_t)*n); 28 | #endif 29 | return *this; 30 | } 31 | 32 | #ifndef BOOST_NO_INTRINSIC_WCHAR_T 33 | MyType& operator<<(wchar_t x) 34 | { 35 | #ifdef BOOST_ENDIAN_BIG_BYTE 36 | x = byte_swap(x); 37 | #endif 38 | this->ensureWrite(&x, sizeof(x)); 39 | return *this; 40 | } 41 | #endif 42 | 43 | template MyType& operator<<(const T& x) 44 | { 45 | DataIO_save_elem(*this, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 46 | return *this; 47 | } 48 | 49 | template 50 | MyType& operator<<(const T (&x)[Dim]) 51 | { 52 | DataIO_save_array(*this, x, Dim, DATA_IO_BSWAP_FOR_LITTLE(T)()); 53 | return *this; 54 | } 55 | 56 | template 57 | MyType& operator<<(const valvec& x) 58 | { 59 | DataIO_save_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 60 | return *this; 61 | } 62 | 63 | template 64 | MyType& operator<<(const std::vector& x) 65 | { 66 | DataIO_save_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 67 | return *this; 68 | } 69 | 70 | -------------------------------------------------------------------------------- /src/terark/io/DataIO_Exception.cpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | 3 | #include "DataIO_Exception.hpp" 4 | #include // for sprintf 5 | #include 6 | 7 | namespace terark { 8 | 9 | DataFormatException::DataFormatException(const char* szMsg) 10 | : m_message(szMsg) 11 | { } 12 | 13 | DataFormatException::DataFormatException(const std::string& strMsg) 14 | : m_message(strMsg) 15 | { } 16 | 17 | DataFormatException::~DataFormatException() 18 | {} 19 | 20 | InvalidObjectException::InvalidObjectException(const char* szMsg) 21 | : DataFormatException(szMsg) 22 | { } 23 | 24 | InvalidObjectException::InvalidObjectException(const std::string& strMsg) 25 | : DataFormatException(strMsg) 26 | { } 27 | 28 | // a size value is too large, such as container's size 29 | // 30 | void SizeValueTooLargeException::checkSizeValue(size_t value, size_t maxValue) 31 | { 32 | if (value > maxValue) 33 | throw SizeValueTooLargeException(value, maxValue); 34 | } 35 | SizeValueTooLargeException::SizeValueTooLargeException(size_t value, size_t maxValue, const char* szMsg) 36 | : DataFormatException(szMsg) 37 | { 38 | char szBuf[256]; 39 | sprintf(szBuf, "[value=%zd(0x%zX), maxValue=%zd(0x%zX)]", value, value, maxValue, maxValue); 40 | m_message.append(szBuf); 41 | } 42 | SizeValueTooLargeException::SizeValueTooLargeException(const std::string& strMsg) 43 | : DataFormatException(strMsg) 44 | { } 45 | 46 | BadVersionException::BadVersionException(unsigned loaded_version, unsigned curr_version, const char* className) 47 | : DataFormatException("") 48 | { 49 | static_cast&>(m_message = "") 50 | << "class=\"" << className << "\", version[loaded=" << loaded_version << ", current=" << curr_version << "]"; 51 | } 52 | 53 | NotFoundFactoryException::NotFoundFactoryException(const char* szMsg) 54 | : DataFormatException(szMsg) 55 | { } 56 | NotFoundFactoryException::NotFoundFactoryException(const std::string& strMsg) 57 | : DataFormatException(strMsg) 58 | { } 59 | 60 | 61 | } // namespace terark 62 | 63 | -------------------------------------------------------------------------------- /src/terark/io/win/MfcFileStream.cpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #if defined(_MSC_VER) 3 | 4 | #include "MfcFileStream.hpp" 5 | #include "byte_io_impl.hpp" 6 | 7 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 8 | # pragma warning(push) 9 | # pragma warning(disable: 4267) 10 | #endif 11 | 12 | namespace terark { 13 | 14 | size_t MfcFileStream::read(void* vbuf, size_t length) 15 | { 16 | return m_fp->Read(vbuf, length); 17 | } 18 | size_t MfcFileStream::write(const void* vbuf, size_t length) 19 | { 20 | TRY 21 | m_fp->Write(vbuf, length); 22 | CATCH(CException, e) 23 | throw OutOfSpaceException("MfcFileStream::Write"); 24 | END_CATCH 25 | return length; 26 | } 27 | bool MfcFileStream::seek(stream_offset_t offset, int origin) 28 | { 29 | TRY 30 | m_fp->Seek(offset, origin); 31 | CATCH(CException, e) 32 | throw OutOfSpaceException("MfcFileStream::Write"); 33 | END_CATCH 34 | return true; 35 | } 36 | void MfcFileStream::flush() 37 | { 38 | m_fp->Flush(); 39 | } 40 | 41 | TERARK_GEN_ensureRead (MfcFileStream::) 42 | TERARK_GEN_ensureWrite(MfcFileStream::) 43 | TERARK_GEN_getByte(MfcFileStream::) 44 | TERARK_GEN_readByte(MfcFileStream::) 45 | TERARK_GEN_writeByte(MfcFileStream::) 46 | 47 | size_t MfcArchiveStream::read(void* vbuf, size_t length) 48 | { 49 | return m_fp->Read(vbuf, length); 50 | } 51 | size_t MfcArchiveStream::write(const void* vbuf, size_t length) 52 | { 53 | TRY 54 | m_fp->Write(vbuf, length); 55 | CATCH(CException, e) 56 | throw OutOfSpaceException("MfcFileStream::Write"); 57 | END_CATCH 58 | return length; 59 | } 60 | void MfcArchiveStream::flush() { m_fp->Flush(); } 61 | 62 | TERARK_GEN_ensureRead (MfcArchiveStream::) 63 | TERARK_GEN_ensureWrite(MfcArchiveStream::) 64 | TERARK_GEN_getByte(MfcArchiveStream::) 65 | TERARK_GEN_readByte(MfcArchiveStream::) 66 | TERARK_GEN_writeByte(MfcArchiveStream::) 67 | } 68 | 69 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 70 | # pragma warning(pop) 71 | #endif 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /src/terark/fsa/dfa_algo_basic.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace terark { 7 | 8 | class TERARK_DLL_EXPORT NonRecursiveDictionaryOrderToStateMapGenerator { 9 | valvec m_stack; 10 | ///@param on_map function on_map(dictOrderNth, stateId) 11 | template 12 | size_t gen(const DFA& dfa, size_t root, OnMap on_map) { 13 | assert(root < dfa.total_states()); 14 | assert(m_stack.empty()); 15 | size_t nth = 0, sigma = dfa.get_sigma(); 16 | m_stack.reserve(sigma * 2); 17 | m_stack.push_back(root); 18 | while (!m_stack.empty()) { 19 | size_t state = m_stack.pop_val(); 20 | assert(state < dfa.total_states()); 21 | if (dfa.is_term(state)) { 22 | on_map(nth, state); 23 | nth++; 24 | } 25 | size_t oldsize = m_stack.size(); 26 | auto ptrdest = m_stack.grow_no_init(sigma); 27 | size_t numdest = dfa.get_all_dest(state, ptrdest); 28 | m_stack.risk_set_size(oldsize + numdest); 29 | std::reverse(ptrdest, ptrdest + numdest); 30 | } 31 | return nth; 32 | } 33 | public: 34 | template 35 | size_t operator()(const DFA& dfa, OnMap* on_map) { 36 | return gen(dfa, initial_state, *on_map); 37 | } 38 | template 39 | size_t operator()(const DFA& dfa, OnMap on_map) { 40 | return gen(dfa, initial_state, on_map); 41 | } 42 | template 43 | size_t operator()(const DFA& dfa, size_t root, OnMap* on_map) { 44 | return gen(dfa, root, *on_map); 45 | } 46 | template 47 | size_t operator()(const DFA& dfa, size_t root, OnMap on_map) { 48 | return gen(dfa, root, on_map); 49 | } 50 | }; 51 | 52 | } // namespace terark 53 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/ssse3/dec_loop.c: -------------------------------------------------------------------------------- 1 | // If we have SSSE3 support, pick off 16 bytes at a time for as long as we can, 2 | // but make sure that we quit before seeing any == markers at the end of the 3 | // string. Also, because we write four zeroes at the end of the output, ensure 4 | // that there are at least 6 valid bytes of input data remaining to close the 5 | // gap. 16 + 2 + 6 = 24 bytes: 6 | while (srclen >= 24) 7 | { 8 | // Load string: 9 | __m128i str = _mm_loadu_si128((__m128i *)c); 10 | 11 | // The input consists of six character sets in the Base64 alphabet, 12 | // which we need to map back to the 6-bit values they represent. 13 | // There are three ranges, two singles, and then there's the rest. 14 | // 15 | // # From To Add Characters 16 | // 1 [43] [62] +19 + 17 | // 2 [47] [63] +16 / 18 | // 3 [48..57] [52..61] +4 0..9 19 | // 4 [65..90] [0..25] -65 A..Z 20 | // 5 [97..122] [26..51] -71 a..z 21 | // (6) Everything else => invalid input 22 | 23 | const __m128i set1 = CMPEQ(str, '+'); 24 | const __m128i set2 = CMPEQ(str, '/'); 25 | const __m128i set3 = RANGE(str, '0', '9'); 26 | const __m128i set4 = RANGE(str, 'A', 'Z'); 27 | const __m128i set5 = RANGE(str, 'a', 'z'); 28 | 29 | __m128i delta = REPLACE(set1, 19); 30 | delta = _mm_or_si128(delta, REPLACE(set2, 16)); 31 | delta = _mm_or_si128(delta, REPLACE(set3, 4)); 32 | delta = _mm_or_si128(delta, REPLACE(set4, -65)); 33 | delta = _mm_or_si128(delta, REPLACE(set5, -71)); 34 | 35 | // Check for invalid input: if any of the delta values are zero, 36 | // fall back on bytewise code to do error checking and reporting: 37 | if (_mm_movemask_epi8(CMPEQ(delta, 0))) { 38 | break; 39 | } 40 | 41 | // Now simply add the delta values to the input: 42 | str = _mm_add_epi8(str, delta); 43 | 44 | // Reshuffle the input to packed 12-byte output format: 45 | str = dec_reshuffle(str); 46 | 47 | // Store back: 48 | _mm_storeu_si128((__m128i *)o, str); 49 | 50 | c += 16; 51 | o += 12; 52 | outl += 12; 53 | srclen -= 16; 54 | } 55 | -------------------------------------------------------------------------------- /3rdparty/base64/lib/arch/avx2/dec_loop.c: -------------------------------------------------------------------------------- 1 | // If we have AVX2 support, pick off 32 bytes at a time for as long as we can, 2 | // but make sure that we quit before seeing any == markers at the end of the 3 | // string. Also, because we write 8 zeroes at the end of the output, ensure 4 | // that there are at least 11 valid bytes of input data remaining to close the 5 | // gap. 32 + 2 + 11 = 45 bytes: 6 | while (srclen >= 45) 7 | { 8 | // Load string: 9 | __m256i str = _mm256_loadu_si256((__m256i *)c); 10 | 11 | // The input consists of six character sets in the Base64 alphabet, 12 | // which we need to map back to the 6-bit values they represent. 13 | // There are three ranges, two singles, and then there's the rest. 14 | // 15 | // # From To Add Characters 16 | // 1 [43] [62] +19 + 17 | // 2 [47] [63] +16 / 18 | // 3 [48..57] [52..61] +4 0..9 19 | // 4 [65..90] [0..25] -65 A..Z 20 | // 5 [97..122] [26..51] -71 a..z 21 | // (6) Everything else => invalid input 22 | 23 | const __m256i set1 = CMPEQ(str, '+'); 24 | const __m256i set2 = CMPEQ(str, '/'); 25 | const __m256i set3 = RANGE(str, '0', '9'); 26 | const __m256i set4 = RANGE(str, 'A', 'Z'); 27 | const __m256i set5 = RANGE(str, 'a', 'z'); 28 | 29 | __m256i delta = REPLACE(set1, 19); 30 | delta = _mm256_or_si256(delta, REPLACE(set2, 16)); 31 | delta = _mm256_or_si256(delta, REPLACE(set3, 4)); 32 | delta = _mm256_or_si256(delta, REPLACE(set4, -65)); 33 | delta = _mm256_or_si256(delta, REPLACE(set5, -71)); 34 | 35 | // Check for invalid input: if any of the delta values are zero, 36 | // fall back on bytewise code to do error checking and reporting: 37 | if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { 38 | break; 39 | } 40 | 41 | // Now simply add the delta values to the input: 42 | str = _mm256_add_epi8(str, delta); 43 | 44 | // Reshuffle the input to packed 12-byte output format: 45 | str = dec_reshuffle(str); 46 | 47 | // Store back: 48 | _mm256_storeu_si256((__m256i *)o, str); 49 | 50 | c += 32; 51 | o += 24; 52 | outl += 24; 53 | srclen -= 32; 54 | } 55 | -------------------------------------------------------------------------------- /src/terark/io/DataIO_Exception.hpp: -------------------------------------------------------------------------------- 1 | /* vim: set tabstop=4 : */ 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace terark { 9 | 10 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 11 | // non dll-interface class 'std::exception' used as base for dll-interface 12 | #pragma warning(push) 13 | #pragma warning(disable:4275) 14 | #endif 15 | class TERARK_DLL_EXPORT DataFormatException : public std::exception 16 | { 17 | protected: 18 | std::string m_message; 19 | public: 20 | explicit DataFormatException(const char* szMsg = "terark::DataFormatException"); 21 | explicit DataFormatException(const std::string& strMsg); 22 | virtual ~DataFormatException(); 23 | 24 | const char* what() const noexcept override { return m_message.c_str(); } 25 | }; 26 | #if defined(_MSC_VER) && (_MSC_VER >= 1020) 27 | #pragma warning(pop) 28 | #endif 29 | 30 | class TERARK_DLL_EXPORT InvalidObjectException : public DataFormatException 31 | { 32 | public: 33 | explicit InvalidObjectException(const char* szMsg = "terark::InvalidObjectException"); 34 | explicit InvalidObjectException(const std::string& strMsg); 35 | }; 36 | 37 | // a size value is too large, such as container's size 38 | // 39 | class TERARK_DLL_EXPORT SizeValueTooLargeException : public DataFormatException 40 | { 41 | public: 42 | static void checkSizeValue(size_t value, size_t maxValue); 43 | SizeValueTooLargeException(size_t value, size_t maxValue, const char* szMsg = "terark::SizeValueTooLargeException"); 44 | explicit SizeValueTooLargeException(const std::string& strMsg); 45 | }; 46 | 47 | class TERARK_DLL_EXPORT BadVersionException : public DataFormatException 48 | { 49 | public: 50 | explicit BadVersionException(unsigned loaded_version, unsigned curr_version, const char* className); 51 | }; 52 | 53 | class TERARK_DLL_EXPORT NotFoundFactoryException : public DataFormatException 54 | { 55 | public: 56 | explicit NotFoundFactoryException(const char* szMsg = "terark::NotFoundFactoryException"); 57 | explicit NotFoundFactoryException(const std::string& strMsg); 58 | }; 59 | 60 | 61 | } // namespace terark 62 | -------------------------------------------------------------------------------- /gtests/common/sortable_strvec_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "terark/util/sortable_strvec.hpp" 6 | #include "utils.h" 7 | 8 | namespace terark { 9 | 10 | TEST(SORTABLE_STRVEC_TEST, SIMPLE_TEST) { 11 | SortableStrVec strVec; 12 | strVec.push_back("1"); 13 | strVec.push_back("2"); 14 | strVec.push_back("2"); 15 | strVec.push_back("3"); 16 | strVec.push_back("4"); 17 | strVec.push_back("5"); 18 | strVec.push_back("5"); 19 | strVec.push_back("6"); 20 | strVec.push_back("7"); 21 | 22 | ASSERT_TRUE(strVec.lower_bound("2") == 1); 23 | ASSERT_TRUE(strVec.lower_bound("4") == 4); 24 | ASSERT_TRUE(strVec.lower_bound("0") == 0); 25 | } 26 | /** 27 | * Helper function equivelent to std::lower_bound 28 | */ 29 | int lower_bound(int arr[], int target, int arr_len ) { 30 | if(arr_len == 0) return 0; 31 | int l = 0; 32 | int h = arr_len; 33 | int mid = 0; 34 | while(l < h) { 35 | mid = l + (h - l) / 2; 36 | if(arr[mid] >= target) { 37 | h = mid; 38 | } else { 39 | l = mid + 1; 40 | } 41 | } 42 | return l; 43 | } 44 | 45 | // helper function, not useful 46 | int upper_bound(int arr[], int target, int arr_len) { 47 | if(arr_len == 0) return 0; 48 | int l = 0; 49 | int h = arr_len; 50 | int mid = 0; 51 | while(l < h) { 52 | mid = (h + l) / 2; 53 | } 54 | return l; 55 | } 56 | 57 | TEST(LOWER_BOUND_TEST, SIMPLE_TEST) { 58 | int arr[9] = {1,2,2,3,4,5,5,6,7}; 59 | ASSERT_TRUE(lower_bound(arr, 2, 9) == 1); 60 | ASSERT_TRUE(lower_bound(arr, 3, 9) == 3); 61 | ASSERT_TRUE(lower_bound(arr, 4, 9) == 4); 62 | ASSERT_TRUE(lower_bound(arr, 10, 9) == 9); 63 | ASSERT_TRUE(lower_bound(arr, 0, 9) == 0); 64 | ASSERT_TRUE(lower_bound(arr, -10, 9) == 0); 65 | } 66 | 67 | TEST(LOWER_BOUND_TEST, MORE_TEST) { 68 | int arr[13] = {-10,-2,2,3,4,5,5,6,7,7,7,7,7}; 69 | ASSERT_TRUE(lower_bound(arr, 2, 13) == 2); 70 | ASSERT_TRUE(lower_bound(arr, 7, 13) == 8); 71 | } 72 | 73 | } 74 | 75 | -------------------------------------------------------------------------------- /src/terark/io/DataInput_String.hpp: -------------------------------------------------------------------------------- 1 | 2 | MyType& operator>>( char& x) { x = ( char)getStream()->readByte(); return *this; } 3 | MyType& operator>>(unsigned char& x) { x = (unsigned char)getStream()->readByte(); return *this; } 4 | MyType& operator>>( signed char& x) { x = ( signed char)getStream()->readByte(); return *this; } 5 | 6 | MyType& load( char* s, size_t n) { this->ensureRead(s, n); return *this; } 7 | MyType& load(unsigned char* s, size_t n) { this->ensureRead(s, n); return *this; } 8 | MyType& load( signed char* s, size_t n) { this->ensureRead(s, n); return *this; } 9 | 10 | #ifdef TERARK_DATA_IO_SLOW_VAR_INT 11 | MyType& operator>>(std::string& x) { return load_s1(x); } 12 | #else 13 | MyType& operator>>(std::string& x) 14 | { 15 | this->getStream()->read_string(x); 16 | return *this; 17 | } 18 | #endif 19 | MyType& operator>>(std::wstring& x) { return load_s1(x); } 20 | 21 | private: 22 | //! string in file format: [length : ....content.... ] 23 | template 24 | MyType& load_s1(std::basic_string& x) 25 | { 26 | var_size_t length; 27 | *this >> length; 28 | x.resize(length.t); // str will be allocated at least (length+1) chars.. 29 | if (terark_likely(length.t)) { 30 | // CharType* data = const_cast(str.data()); 31 | CharType* data = &*x.begin(); // this will make a mutable string content 32 | this->load(data, length.t); 33 | // data[length.t] = 0; // in most string implementation, this is accessible 34 | // data[length.t] = 0; // in some string implementation, this is out of string bound 35 | } 36 | return *this; 37 | } 38 | 39 | #ifdef TERARK_DATA_IO_ENABLE_LOAD_RAW_CHAR_PTR 40 | public: 41 | MyType& operator>>(char*& s) { return load_s0(s); } 42 | MyType& operator>>(wchar_t*& s) { return load_s0(s); } 43 | private: 44 | template MyType& load_s0(ChT*& s) 45 | { 46 | assert(0 == s); 47 | var_size_t n; 48 | *this >> n; 49 | s = new ChT[n.t+1]; 50 | this->load(s, n.t); 51 | s[n] = 0; 52 | return *this; 53 | } 54 | #endif 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/terark/io/DataInput_BigEndian.hpp: -------------------------------------------------------------------------------- 1 | public: 2 | 3 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(short) 4 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(unsigned short) 5 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(int) 6 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(unsigned int) 7 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(long) 8 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(unsigned long) 9 | 10 | #if defined(BOOST_HAS_LONG_LONG) 11 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(long long) 12 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(unsigned long long) 13 | #elif defined(BOOST_HAS_MS_INT64) 14 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(__int64) 15 | DATA_IO_GEN_BIG_ENDIAN_INT_INPUT(unsigned __int64) 16 | #endif 17 | 18 | MyType& load(wchar_t* s, size_t n) 19 | { 20 | this->ensureRead(s, sizeof(wchar_t)*n); 21 | #ifdef BOOST_ENDIAN_LITTLE_BYTE 22 | byte_swap(s, n); 23 | #endif 24 | return *this; 25 | } 26 | #ifndef BOOST_NO_INTRINSIC_WCHAR_T 27 | MyType& operator>>(wchar_t& x) 28 | { 29 | this->ensureRead(&x, sizeof(x)); 30 | #ifdef BOOST_ENDIAN_LITTLE_BYTE 31 | x = byte_swap(x); 32 | #endif 33 | return *this; 34 | } 35 | #endif 36 | 37 | template MyType& operator>>(T& x) 38 | { 39 | DataIO_load_elem(*this, x, DATA_IO_BSWAP_FOR_BIG(T)()); 40 | return *this; 41 | } 42 | 43 | template 44 | MyType& operator>>(T (&x)[Dim]) 45 | { 46 | DataIO_load_array(*this, x, Dim, DATA_IO_BSWAP_FOR_BIG(T)()); 47 | return *this; 48 | } 49 | 50 | template 51 | MyType& operator>>(valvec& x) 52 | { 53 | DataIO_load_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_BIG(T)()); 54 | return *this; 55 | } 56 | 57 | template 58 | MyType& operator>>(std::vector& x) 59 | { 60 | DataIO_load_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_BIG(T)()); 61 | return *this; 62 | } 63 | 64 | template 65 | MyType& load_add(valvec& x) { 66 | DataIO_load_add_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_BIG(T)()); 67 | return *this; 68 | } 69 | 70 | template 71 | MyType& load_add(std::vector& x) { 72 | DataIO_load_add_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_BIG(T)()); 73 | return *this; 74 | } 75 | -------------------------------------------------------------------------------- /src/terark/zbs/abstract_blob_store.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "blob_store.hpp" 3 | 4 | namespace terark { 5 | 6 | class SortableStrVec; 7 | class ZReorderMap; 8 | class LruReadonlyCache; 9 | 10 | class TERARK_DLL_EXPORT AbstractBlobStore : public BlobStore { 11 | public: 12 | struct TERARK_DLL_EXPORT Builder : public CacheAlignedNewDelete { 13 | static Builder* createBuilder(fstring clazz, fstring outputFileName, fstring moreConfig); 14 | virtual ~Builder(); 15 | virtual Builder* getPreBuilder() const; 16 | virtual void addRecord(fstring rec) = 0; 17 | virtual void finish() = 0; 18 | }; 19 | protected: 20 | char* m_fpath_str; 21 | uint16_t m_fpath_len; 22 | bool m_isMmapData; 23 | bool m_isUserMem; 24 | bool m_isDetachMeta; 25 | MemoryCloseType m_dictCloseType; 26 | uint08_t m_checksumLevel; 27 | uint08_t m_checksumType; 28 | const struct FileHeaderBase* m_mmapBase; 29 | 30 | void risk_swap(AbstractBlobStore& y); 31 | 32 | public: 33 | static AbstractBlobStore* load_from_mmap(fstring fpath, bool mmapPopulate); 34 | static AbstractBlobStore* load_from_user_memory(fstring dataMem); 35 | static AbstractBlobStore* load_from_user_memory(fstring dataMem, Dictionary dict); 36 | virtual void save_mmap(fstring fpath) const; // has default implementation 37 | virtual void save_mmap(function write) const = 0; 38 | 39 | const char* name() const override; 40 | void set_fpath(fstring fpath); 41 | fstring get_fpath() const; 42 | Dictionary get_dict() const override; 43 | fstring get_mmap() const override; 44 | 45 | uint08_t get_checksum_level() const { return m_checksumLevel; } 46 | 47 | AbstractBlobStore(); 48 | virtual ~AbstractBlobStore(); 49 | virtual void reorder_zip_data(ZReorderMap& newToOld, 50 | function writeAppend, 51 | fstring tmpFile) const = 0; 52 | }; 53 | 54 | TERARK_DLL_EXPORT 55 | AbstractBlobStore* 56 | NestLoudsTrieBlobStore_build(fstring clazz, int nestLevel, SortableStrVec&); 57 | 58 | } // namespace terark 59 | -------------------------------------------------------------------------------- /src/terark/util/throw.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace terark { 9 | 10 | TERARK_DLL_EXPORT 11 | std::string ExceptionFormatString(const char* format, ...) 12 | #ifdef __GNUC__ 13 | __attribute__ ((__format__ (__printf__, 1, 2))) 14 | #endif 15 | ; 16 | 17 | #define ExceptionMessage(fmt, ...) \ 18 | terark::ExceptionFormatString("%s:%d: %s: errno=%d : " fmt, \ 19 | __FILE__, __LINE__, BOOST_CURRENT_FUNCTION, errno, ##__VA_ARGS__) 20 | 21 | #define TERARK_THROW(Except, fmt, ...) \ 22 | throw Except(ExceptionMessage(fmt, ##__VA_ARGS__)) 23 | 24 | #define THROW_STD(Except, fmt, ...) \ 25 | TERARK_THROW(std::Except, fmt, ##__VA_ARGS__) 26 | 27 | #define TERARK_EXPECT_F(expr, Except, fmt, ...) \ 28 | do { \ 29 | if (!(expr)) \ 30 | throw Except(terark::ExceptionFormatString( \ 31 | "%s:%d: %s: expect(%s) failed: " fmt, \ 32 | __FILE__, __LINE__, BOOST_CURRENT_FUNCTION, #expr, ##__VA_ARGS__)); \ 33 | } while (0) 34 | 35 | #define TERARK_EXPECT_LT(x,y,e) TERARK_EXPECT_F(x < y, e, "%lld %lld", (long long)(x), (long long)(y)) 36 | #define TERARK_EXPECT_GT(x,y,e) TERARK_EXPECT_F(x > y, e, "%lld %lld", (long long)(x), (long long)(y)) 37 | #define TERARK_EXPECT_LE(x,y,e) TERARK_EXPECT_F(x <= y, e, "%lld %lld", (long long)(x), (long long)(y)) 38 | #define TERARK_EXPECT_GE(x,y,e) TERARK_EXPECT_F(x >= y, e, "%lld %lld", (long long)(x), (long long)(y)) 39 | #define TERARK_EXPECT_EQ(x,y,e) TERARK_EXPECT_F(x == y, e, "%lld %lld", (long long)(x), (long long)(y)) 40 | #define TERARK_EXPECT_NE(x,y,e) TERARK_EXPECT_F(x != y, e, "%lld %lld", (long long)(x), (long long)(y)) 41 | 42 | // _EZ: Equal To Zero 43 | #define TERARK_EXPECT_EZ(x,e) TERARK_EXPECT_F(x == 0, e, "%lld", (long long)(x)) 44 | 45 | // _AL: Align, _NA: Not Align 46 | #define TERARK_EXPECT_AL(x,a,e) TERARK_EXPECT_F((x) % (a) == 0, e, "%lld %% %lld = %lld", (long long)(x), (long long)(a), (long long)((x) % (a))) 47 | #define TERARK_EXPECT_NA(x,a,e) TERARK_EXPECT_F((x) % (a) != 0, e, "%lld", (long long)(x)) 48 | 49 | 50 | } // namespace terark 51 | -------------------------------------------------------------------------------- /src/terark/io/DataInput_LittleEndian.hpp: -------------------------------------------------------------------------------- 1 | public: 2 | 3 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(short) 4 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(unsigned short) 5 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(int) 6 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(unsigned int) 7 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(long) 8 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(unsigned long) 9 | 10 | #if defined(BOOST_HAS_LONG_LONG) 11 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(long long) 12 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(unsigned long long) 13 | #elif defined(BOOST_HAS_MS_INT64) 14 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(__int64) 15 | DATA_IO_GEN_LITTLE_ENDIAN_INT_INPUT(unsigned __int64) 16 | #endif 17 | 18 | MyType& load(wchar_t* s, size_t n) 19 | { 20 | this->ensureRead(s, sizeof(wchar_t)*n); 21 | #ifdef BOOST_ENDIAN_BIG_BYTE 22 | byte_swap(s, n); 23 | #endif 24 | return *this; 25 | } 26 | 27 | #ifndef BOOST_NO_INTRINSIC_WCHAR_T 28 | MyType& operator>>(wchar_t& x) 29 | { 30 | this->ensureRead(&x, sizeof(x)); 31 | #ifdef BOOST_ENDIAN_BIG_BYTE 32 | x = byte_swap(x); 33 | #endif 34 | return *this; 35 | } 36 | #endif 37 | 38 | template MyType& operator>>(T& x) 39 | { 40 | DataIO_load_elem(*this, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 41 | return *this; 42 | } 43 | 44 | template 45 | MyType& operator>>(T (&x)[Dim]) 46 | { 47 | DataIO_load_array(*this, x, Dim, DATA_IO_BSWAP_FOR_LITTLE(T)()); 48 | return *this; 49 | } 50 | 51 | template 52 | MyType& operator>>(valvec& x) 53 | { 54 | DataIO_load_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 55 | return *this; 56 | } 57 | 58 | template 59 | MyType& operator>>(std::vector& x) 60 | { 61 | DataIO_load_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 62 | return *this; 63 | } 64 | 65 | template 66 | MyType& load_add(valvec& x) { 67 | DataIO_load_add_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 68 | return *this; 69 | } 70 | 71 | template 72 | MyType& load_add(std::vector& x) { 73 | DataIO_load_add_vector(*this, (T*)NULL, x, DATA_IO_BSWAP_FOR_LITTLE(T)()); 74 | return *this; 75 | } 76 | -------------------------------------------------------------------------------- /src/terark/smallmap.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace terark { 12 | 13 | template 14 | class smallmap { 15 | public: 16 | explicit smallmap(size_t indexSize) { 17 | index = (short*)malloc(sizeof(short) * indexSize); 18 | if (NULL == index) throw std::bad_alloc(); 19 | p = (Mapped*)malloc(sizeof(Mapped) * indexSize); 20 | if (NULL == p) { 21 | free(index); 22 | throw std::bad_alloc(); 23 | } 24 | // std::uninitialized_fill_n(p, indexSize, Mapped()); 25 | for (size_t i = 0; i < indexSize; ++i) 26 | new(p+i)Mapped(); 27 | n = 0; 28 | c = indexSize; 29 | memset(index, -1, sizeof(short) * c); 30 | } 31 | ~smallmap() { 32 | STDEXT_destroy_range(p, p + c); 33 | free(p); 34 | free(index); 35 | } 36 | Mapped& bykey(size_t key) { 37 | assert(key < c); 38 | assert(n <= c); 39 | if (-1 == index[key]) { 40 | assert(n < c); 41 | index[key] = (short)n; 42 | // assert(isprint(key) || isspace(key)); 43 | p[n].ch = key; 44 | return p[n++]; 45 | } 46 | return p[index[key]]; 47 | } 48 | void resize0() { 49 | if (n <= 16) { 50 | for (size_t i = 0; i < n; ++i) { 51 | Mapped& v = p[i]; 52 | assert(-1 != v.ch); 53 | assert(-1 != index[v.ch]); 54 | index[v.ch] = -1; 55 | v.resize0(); 56 | } 57 | } else { 58 | memset(index, -1, sizeof(short) * c); 59 | for (size_t i = 0; i < n; ++i) 60 | p[i].resize0(); 61 | } 62 | n = 0; 63 | } 64 | bool exists(size_t key) const { 65 | assert(key < c); 66 | return -1 != index[key]; 67 | } 68 | Mapped& byidx(size_t idx) { 69 | assert(idx < n); 70 | return p[idx]; 71 | } 72 | Mapped* begin() { return p; } 73 | Mapped* end() { return p + n; } 74 | size_t size() const { return n; } 75 | private: 76 | short* index; 77 | Mapped* p; 78 | size_t n, c; 79 | }; 80 | 81 | } // namespace terark 82 | --------------------------------------------------------------------------------