├── Dockerfile ├── LICENSE ├── README.md ├── data_export.py ├── datasketches ├── LICENSE ├── Makefile ├── common │ ├── MurmurHash3.h │ ├── binomial_bounds.hpp │ ├── bounds_binomial_proportions.hpp │ ├── ceiling_power_of_2.hpp │ ├── common_defs.hpp │ ├── conditional_back_inserter.hpp │ ├── conditional_forward.hpp │ ├── count_zeros.hpp │ ├── inv_pow2_table.hpp │ ├── memory_operations.hpp │ └── serde.hpp ├── cpc │ ├── compression_data.hpp │ ├── cpc_common.hpp │ ├── cpc_compressor.hpp │ ├── cpc_compressor_impl.hpp │ ├── cpc_confidence.hpp │ ├── cpc_sketch.hpp │ ├── cpc_sketch_impl.hpp │ ├── cpc_union.hpp │ ├── cpc_union_impl.hpp │ ├── cpc_util.hpp │ ├── icon_estimator.hpp │ ├── kxp_byte_lookup.hpp │ ├── u32_table.hpp │ └── u32_table_impl.hpp ├── hll │ ├── AuxHashMap-internal.hpp │ ├── AuxHashMap.hpp │ ├── CompositeInterpolationXTable-internal.hpp │ ├── CompositeInterpolationXTable.hpp │ ├── CouponHashSet-internal.hpp │ ├── CouponHashSet.hpp │ ├── CouponList-internal.hpp │ ├── CouponList.hpp │ ├── CubicInterpolation-internal.hpp │ ├── CubicInterpolation.hpp │ ├── HarmonicNumbers-internal.hpp │ ├── HarmonicNumbers.hpp │ ├── Hll4Array-internal.hpp │ ├── Hll4Array.hpp │ ├── Hll6Array-internal.hpp │ ├── Hll6Array.hpp │ ├── Hll8Array-internal.hpp │ ├── Hll8Array.hpp │ ├── HllArray-internal.hpp │ ├── HllArray.hpp │ ├── HllSketch-internal.hpp │ ├── HllSketchImpl-internal.hpp │ ├── HllSketchImpl.hpp │ ├── HllSketchImplFactory.hpp │ ├── HllUnion-internal.hpp │ ├── HllUtil.hpp │ ├── RelativeErrorTables-internal.hpp │ ├── RelativeErrorTables.hpp │ ├── coupon_iterator-internal.hpp │ ├── coupon_iterator.hpp │ ├── hll.hpp │ └── hll.private.hpp └── measure.cpp ├── experiments.py ├── external ├── catch2 │ ├── LICENSE.txt │ └── catch.hpp ├── farmhash │ ├── COPYING │ ├── farmhash.cc │ └── farmhash.h ├── tclap │ ├── AUTHORS │ ├── Arg.h │ ├── ArgContainer.h │ ├── ArgException.h │ ├── ArgGroup.h │ ├── ArgTraits.h │ ├── COPYING │ ├── CmdLine.h │ ├── CmdLineInterface.h │ ├── CmdLineOutput.h │ ├── Constraint.h │ ├── DeferDelete.h │ ├── DocBookOutput.h │ ├── HelpVisitor.h │ ├── IgnoreRestVisitor.h │ ├── MultiArg.h │ ├── MultiSwitchArg.h │ ├── OptionalUnlabeledTracker.h │ ├── StandardTraits.h │ ├── StdOutput.h │ ├── SwitchArg.h │ ├── UnlabeledMultiArg.h │ ├── UnlabeledValueArg.h │ ├── ValueArg.h │ ├── ValuesConstraint.h │ ├── VersionVisitor.h │ ├── Visitor.h │ └── sstream.h └── zstd │ ├── LICENSE │ ├── Makefile │ ├── common │ ├── bitstream.h │ ├── compiler.h │ ├── cpu.h │ ├── debug.c │ ├── debug.h │ ├── entropy_common.c │ ├── error_private.c │ ├── error_private.h │ ├── fse.h │ ├── fse_decompress.c │ ├── huf.h │ ├── mem.h │ ├── pool.c │ ├── pool.h │ ├── portability_macros.h │ ├── threading.c │ ├── threading.h │ ├── xxhash.c │ ├── xxhash.h │ ├── zstd_common.c │ ├── zstd_deps.h │ ├── zstd_internal.h │ └── zstd_trace.h │ ├── compress │ ├── clevels.h │ ├── fse_compress.c │ ├── hist.c │ ├── hist.h │ ├── huf_compress.c │ ├── zstd_compress.c │ ├── zstd_compress_internal.h │ ├── zstd_compress_literals.c │ ├── zstd_compress_literals.h │ ├── zstd_compress_sequences.c │ ├── zstd_compress_sequences.h │ ├── zstd_compress_superblock.c │ ├── zstd_compress_superblock.h │ ├── zstd_cwksp.h │ ├── zstd_double_fast.c │ ├── zstd_double_fast.h │ ├── zstd_fast.c │ ├── zstd_fast.h │ ├── zstd_lazy.c │ ├── zstd_lazy.h │ ├── zstd_ldm.c │ ├── zstd_ldm.h │ ├── zstd_ldm_geartab.h │ ├── zstd_opt.c │ ├── zstd_opt.h │ ├── zstdmt_compress.c │ └── zstdmt_compress.h │ ├── decompress │ ├── huf_decompress.c │ ├── huf_decompress_amd64.S │ ├── zstd_ddict.c │ ├── zstd_ddict.h │ ├── zstd_decompress.c │ ├── zstd_decompress_block.c │ ├── zstd_decompress_block.h │ └── zstd_decompress_internal.h │ ├── libzstd.mk │ ├── libzstd.pc.in │ ├── zstd.h │ └── zstd_errors.h ├── hyperlogloglog ├── Hash.hpp ├── HyperLogLog.hpp ├── HyperLogLogLog.hpp ├── HyperLogLogZstd.hpp ├── Makefile ├── PackedMap.hpp ├── PackedVector.hpp ├── common.hpp ├── measure.cpp ├── measure.hpp └── test.cpp ├── inputgenerator ├── Makefile └── inputgenerator.cpp └── zetasketch ├── .gitattributes ├── .gitignore ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src └── main └── java └── Measure.java /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | RUN apt update && apt -y upgrade 3 | RUN apt install -y build-essential 4 | RUN apt install -y gcc-10 g++-10 cpp-10 5 | RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 6 | RUN DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC apt-get -y install tzdata 7 | RUN apt install -y openjdk-11-jdk 8 | RUN apt install -y python3 python3-numpy 9 | COPY external/ /app/external/ 10 | RUN cd /app/external/zstd && make ZSTD_LEGACY_SUPPORT=0 11 | COPY zetasketch /app/zetasketch 12 | RUN cd /app/zetasketch && ./gradlew build 13 | RUN cp -v /app/zetasketch/build/libs/zetasketch.jar /app/zetasketch/measure.jar 14 | COPY hyperlogloglog /app/hyperlogloglog 15 | RUN cd /app/hyperlogloglog && make 16 | COPY inputgenerator /app/inputgenerator 17 | RUN cd /app/inputgenerator && make 18 | COPY datasketches /app/datasketches 19 | RUN cd /app/datasketches && make 20 | WORKDIR /app/ 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Matti Karppa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hyperlogloglog 2 | HyperLogLogLog: Cardinality Estimation With One Log More 3 | 4 | ## Introduction 5 | 6 | This is an implementation of the HyperLogLogLog algorithm, as 7 | described in [1]. We also include heuristic variants, called 8 | HyperLogLogLog* in the paper, the vanilla HyperLogLog, and an 9 | entropy-compressed version of the HyperLogLog, compressed using 10 | Facebook's Zstd library. 11 | 12 | In addition to the implementations of the algorithms, we also provide 13 | the experimental framework to run the experiments described in the 14 | paper in a controlled Docker environment. 15 | 16 | ## Requirements 17 | 18 | You will need at least the following to compile the code: 19 | * A sufficiently recent C++ compiler with at least C++17 support 20 | (tested: Apple CLang 13.1.6, GCC 10.3.0) 21 | 22 | The following are recommended: 23 | * A sufficiently recent version of Python to run the experiments 24 | (tested: CPython 3.8.10) 25 | * A sufficiently recent version of Docker (tested: 20.10.11) 26 | * OpenJDK 11 for building the ZetaSketch tool if Docker is not used 27 | 28 | ## Building with Docker (Recommended) 29 | The following instructions will walk you through building a Docker 30 | image for a controlled environment where you can run the experiments. 31 | 32 | Run the following command to build the image with the name `hyperlogloglog`: 33 | ``` 34 | $ docker build -t hyperlogloglog . 35 | ``` 36 | 37 | If everything goes smoothly, you should be done! You can run unit 38 | tests with the following command: 39 | ``` 40 | $ docker run hyperlogloglog /bin/bash -c 'cd hyperlogloglog && make test && ./test' 41 | ``` 42 | 43 | ## Running experiments on Docker 44 | 45 | Experiments can be run with the `experiments.py` file provided for 46 | convenience. By default, the script will run a full set of 47 | experiments. The results will be stored under a subdirectory called 48 | `results`. 49 | 50 | Please run `python3 experiments.py --help` for instructions on how to 51 | run individual experiments or filter which experiments to run. 52 | 53 | Note that the script assumes the Docker environment has been set 54 | up. It does not work without the Docker image. 55 | 56 | ## Building the code manually 57 | 58 | Follow these instructions if you want to compile the code manually, 59 | for example, for embedding the code in your own projects. Please also have 60 | a look at `Dockerfile` as it describes how the experimental pipeline 61 | is set up. 62 | 63 | The `hyperlogloglog` library depends on Zstd, so start by compiling 64 | it. 65 | ``` 66 | cd external/zstd 67 | make ZSTD_LEGACY_SUPPORT=0 68 | ``` 69 | This should produce a file called `libzstd.so` or `libzstd.dylib` or 70 | such (depending on your OS) in the directory. 71 | 72 | Now you can proceed to building the actual `hyperlogloglog` library. 73 | ``` 74 | cd hyperlogloglog 75 | make 76 | ``` 77 | This will compile the `measure` program that can be used to measure 78 | various algorithms with various input (as in the experiments). See 79 | `measure --help` for more information. 80 | 81 | Note: On MacOS, loading the dynamic library might not work as 82 | expected. A workaround is to run `measure` as follows: 83 | ``` 84 | $ DYLD_LIBRARY_PATH=../external/zstd ./measure 85 | ``` 86 | 87 | If you want to use the input generator, compile it as follows: 88 | ``` 89 | cd inputgenerator 90 | make 91 | ``` 92 | The `inputgenerator` works similarly to `measure`, see `inputgenerator 93 | --help` for more information. 94 | 95 | For a comparable measurement tool for the Apache Data Sketches 96 | implementation, compile as follows: 97 | ``` 98 | cd datasketches 99 | make 100 | ``` 101 | This also creates an executable called `measure`. As before, see 102 | `measure --help` for more information. 103 | 104 | Finally, for compiling the ZetaSketch measurement tool, use gradle: 105 | ``` 106 | cd zetasketch 107 | ./gradlew build 108 | ``` 109 | This creates a self-contained file called `build/libs/zetasketch.jar` 110 | that can be run as any usual Java archive. Run it using `java -jar 111 | build/libs/zetasketch.jar` for more information. 112 | 113 | ## External libraries 114 | The following external libraries are provided: 115 | * [Catch2](https://github.com/catchorg/Catch2) v2.13.7 116 | * [Apache DataSketches](https://datasketches.apache.org/) 3.2.0 117 | * [Google FarmHash](https://github.com/google/farmhash) version 1.1 118 | * [TCLAP](http://tclap.sourceforge.net/) 1.4.0 119 | * [Facebook ZStandard](https://github.com/facebook/zstd) commit 64205b7832fa0b4433214e26c294545b4c962834 120 | 121 | ## License 122 | All HyperLogLogLog code and the experimental framework has been 123 | licensed under the MIT license. For external libraries, see the 124 | respective subdirectories for license information. 125 | 126 | ## How to cite? 127 | If you use this work as part of your research endeavors, we kindly ask 128 | you to cite the KDD paper [1]. 129 | 130 | ## References 131 | [1] Matti Karppa and Rasmus Pagh. 2022. HyperLogLogLog: 132 | Cardinality Estimation With One Log More. In Proceedings of the 28th 133 | ACM SIGKDD Conference on Knowledge Discovery & Data Mining (KDD '22). 134 | Association for Computing Machinery, New York, NY, USA. 135 | -------------------------------------------------------------------------------- /data_export.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import h5py 4 | import os 5 | import pandas as pd 6 | import numpy as np 7 | import sys 8 | 9 | def main(): 10 | results = list() 11 | for root, dirs, files in os.walk('results/'): 12 | for fn in files: 13 | if not fn.endswith('.hdf5'): 14 | continue 15 | filename = root + '/' + fn 16 | with h5py.File(filename,'r') as f: 17 | data = np.array(f['measurements']) 18 | num_reps = data.shape[0] 19 | for rep in range(num_reps): 20 | result = { **dict(f.attrs), **{ 21 | 'rep' : rep, 22 | 'time' : data[rep,0], 23 | 'estimate' : data[rep,1], 24 | 'bitsize' : data[rep,2], 25 | 'compressCount' : data[rep,3], 26 | 'rebaseCount' : data[rep,4] 27 | }} 28 | results.append(result) 29 | pd.DataFrame(results).to_csv(sys.stdout,index = False) 30 | 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /datasketches/Makefile: -------------------------------------------------------------------------------- 1 | CXX=c++ 2 | CXXFLAGS=-std=c++17 -O3 -march=native -pedantic -Wall -Wextra -I../external/datasketches/hll/ -I../external/datasketches/common/ -I../external/datasketches/cpc/ -I../external/ -Wno-type-limits 3 | LDFLAGS= 4 | 5 | all: measure 6 | 7 | measure: measure.o 8 | $(CXX) -o measure measure.o $(LDFLAGS) 9 | 10 | measure.o: measure.cpp 11 | $(CXX) $(CXXFLAGS) -c measure.cpp -o measure.o 12 | 13 | clean: 14 | rm -vf *.o measure 15 | -------------------------------------------------------------------------------- /datasketches/common/ceiling_power_of_2.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CEILING_POWER_OF_2_HPP_ 21 | #define CEILING_POWER_OF_2_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | // compute the next highest power of 2 of 32-bit n 28 | // taken from https://graphics.stanford.edu/~seander/bithacks.html 29 | static inline uint32_t ceiling_power_of_2(uint32_t n) { 30 | --n; 31 | n |= n >> 1; 32 | n |= n >> 2; 33 | n |= n >> 4; 34 | n |= n >> 8; 35 | n |= n >> 16; 36 | return ++n; 37 | } 38 | 39 | } /* namespace datasketches */ 40 | 41 | #endif // CEILING_POWER_OF_2_HPP_ 42 | -------------------------------------------------------------------------------- /datasketches/common/common_defs.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COMMON_DEFS_HPP_ 21 | #define _COMMON_DEFS_HPP_ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace datasketches { 29 | 30 | static const uint64_t DEFAULT_SEED = 9001; 31 | 32 | enum resize_factor { X1 = 0, X2, X4, X8 }; 33 | 34 | template using AllocChar = typename std::allocator_traits::template rebind_alloc; 35 | template using string = std::basic_string, AllocChar>; 36 | 37 | // utility function to hide unused compiler warning 38 | // usually has no additional cost 39 | template void unused(T&&...) {} 40 | 41 | // common helping functions 42 | // TODO: find a better place for them 43 | 44 | constexpr uint8_t log2(uint32_t n) { 45 | return (n > 1) ? 1 + log2(n >> 1) : 0; 46 | } 47 | 48 | constexpr uint8_t lg_size_from_count(uint32_t n, double load_factor) { 49 | return log2(n) + ((n > static_cast((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1); 50 | } 51 | 52 | // stream helpers to hide casts 53 | template 54 | static inline T read(std::istream& is) { 55 | T value; 56 | is.read(reinterpret_cast(&value), sizeof(T)); 57 | return value; 58 | } 59 | 60 | template 61 | static inline void read(std::istream& is, T* ptr, size_t size_bytes) { 62 | is.read(reinterpret_cast(ptr), size_bytes); 63 | } 64 | 65 | template 66 | static inline void write(std::ostream& os, T& value) { 67 | os.write(reinterpret_cast(&value), sizeof(T)); 68 | } 69 | 70 | template 71 | static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) { 72 | os.write(reinterpret_cast(ptr), size_bytes); 73 | } 74 | 75 | } // namespace 76 | 77 | #endif // _COMMON_DEFS_HPP_ 78 | -------------------------------------------------------------------------------- /datasketches/common/conditional_back_inserter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CONDITIONAL_BACK_INSERTER_HPP_ 21 | #define CONDITIONAL_BACK_INSERTER_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | namespace datasketches { 27 | 28 | template 29 | class conditional_back_insert_iterator: public std::back_insert_iterator { 30 | public: 31 | template 32 | conditional_back_insert_iterator(Container& c, P&& p): std::back_insert_iterator(c), p(std::forward

(p)) {} 33 | 34 | // MSVC seems to insist on having copy constructor and assignment 35 | conditional_back_insert_iterator(const conditional_back_insert_iterator& other): 36 | std::back_insert_iterator(other), p(other.p) {} 37 | conditional_back_insert_iterator& operator=(const conditional_back_insert_iterator& other) { 38 | std::back_insert_iterator::operator=(other); 39 | p = other.p; 40 | return *this; 41 | } 42 | 43 | conditional_back_insert_iterator& operator=(const typename Container::value_type& value) { 44 | if (p(value)) std::back_insert_iterator::operator=(value); 45 | return *this; 46 | } 47 | 48 | conditional_back_insert_iterator& operator=(typename Container::value_type&& value) { 49 | if (p(value)) std::back_insert_iterator::operator=(std::move(value)); 50 | return *this; 51 | } 52 | 53 | conditional_back_insert_iterator& operator*() { return *this; } 54 | conditional_back_insert_iterator& operator++() { return *this; } 55 | conditional_back_insert_iterator& operator++(int) { return *this; } 56 | 57 | private: 58 | Predicate p; 59 | }; 60 | 61 | template 62 | conditional_back_insert_iterator conditional_back_inserter(Container& c, Predicate&& p) { 63 | return conditional_back_insert_iterator(c, std::forward(p)); 64 | } 65 | 66 | } /* namespace datasketches */ 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /datasketches/common/conditional_forward.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CONDITIONAL_FORWARD_HPP_ 21 | #define CONDITIONAL_FORWARD_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | // Forward type T2 as rvalue reference if type T1 is rvalue reference 28 | 29 | template 30 | using fwd_type = typename std::conditional::value, 31 | T2, typename std::remove_reference::type&&>::type; 32 | 33 | template 34 | fwd_type conditional_forward(T2&& value) { 35 | return std::forward>(std::forward(value)); 36 | } 37 | 38 | // Forward container as iterators 39 | 40 | template 41 | auto forward_begin(Container&& c) -> typename std::enable_if< 42 | std::is_lvalue_reference::value || 43 | std::is_same::type::const_iterator, decltype(c.begin())>::value, 44 | decltype(c.begin()) 45 | >::type 46 | { 47 | return c.begin(); 48 | } 49 | 50 | template 51 | auto forward_begin(Container&& c) -> typename std::enable_if< 52 | !std::is_lvalue_reference::value && 53 | !std::is_same::type::const_iterator, decltype(c.begin())>::value, 54 | decltype(std::make_move_iterator(c.begin())) 55 | >::type 56 | { 57 | return std::make_move_iterator(c.begin()); 58 | } 59 | 60 | template 61 | auto forward_end(Container&& c) -> typename std::enable_if< 62 | std::is_lvalue_reference::value || 63 | std::is_same::type::const_iterator, decltype(c.begin())>::value, 64 | decltype(c.end()) 65 | >::type 66 | { 67 | return c.end(); 68 | } 69 | 70 | template 71 | auto forward_end(Container&& c) -> typename std::enable_if< 72 | !std::is_lvalue_reference::value && 73 | !std::is_same::type::const_iterator, decltype(c.begin())>::value, 74 | decltype(std::make_move_iterator(c.end())) 75 | >::type 76 | { 77 | return std::make_move_iterator(c.end()); 78 | } 79 | 80 | } /* namespace datasketches */ 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /datasketches/common/count_zeros.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COUNT_ZEROS_HPP_ 21 | #define _COUNT_ZEROS_HPP_ 22 | 23 | #include 24 | 25 | #include 26 | 27 | namespace datasketches { 28 | 29 | static const uint8_t byte_leading_zeros_table[256] = { 30 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 31 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 32 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 33 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 46 | }; 47 | 48 | static const uint8_t byte_trailing_zeros_table[256] = { 49 | 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 50 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 51 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 52 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 53 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 54 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 55 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 56 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 57 | 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 58 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 59 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 60 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 61 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 62 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 63 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 64 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 65 | }; 66 | 67 | static const uint64_t FCLZ_MASK_56 = 0x00ffffffffffffff; 68 | static const uint64_t FCLZ_MASK_48 = 0x0000ffffffffffff; 69 | static const uint64_t FCLZ_MASK_40 = 0x000000ffffffffff; 70 | static const uint64_t FCLZ_MASK_32 = 0x00000000ffffffff; 71 | static const uint64_t FCLZ_MASK_24 = 0x0000000000ffffff; 72 | static const uint64_t FCLZ_MASK_16 = 0x000000000000ffff; 73 | static const uint64_t FCLZ_MASK_08 = 0x00000000000000ff; 74 | 75 | static inline uint8_t count_leading_zeros_in_u64(uint64_t input) { 76 | if (input > FCLZ_MASK_56) 77 | return byte_leading_zeros_table[(input >> 56) & FCLZ_MASK_08]; 78 | if (input > FCLZ_MASK_48) 79 | return 8 + byte_leading_zeros_table[(input >> 48) & FCLZ_MASK_08]; 80 | if (input > FCLZ_MASK_40) 81 | return 16 + byte_leading_zeros_table[(input >> 40) & FCLZ_MASK_08]; 82 | if (input > FCLZ_MASK_32) 83 | return 24 + byte_leading_zeros_table[(input >> 32) & FCLZ_MASK_08]; 84 | if (input > FCLZ_MASK_24) 85 | return 32 + byte_leading_zeros_table[(input >> 24) & FCLZ_MASK_08]; 86 | if (input > FCLZ_MASK_16) 87 | return 40 + byte_leading_zeros_table[(input >> 16) & FCLZ_MASK_08]; 88 | if (input > FCLZ_MASK_08) 89 | return 48 + byte_leading_zeros_table[(input >> 8) & FCLZ_MASK_08]; 90 | if (true) 91 | return 56 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08]; 92 | } 93 | 94 | static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) { 95 | for (int i = 0; i < 4; i++) { 96 | const int byte = input & 0xff; 97 | if (byte != 0) return static_cast((i << 3) + byte_trailing_zeros_table[byte]); 98 | input >>= 8; 99 | } 100 | return 32; 101 | } 102 | 103 | static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) { 104 | for (int i = 0; i < 8; i++) { 105 | const int byte = input & 0xff; 106 | if (byte != 0) return static_cast((i << 3) + byte_trailing_zeros_table[byte]); 107 | input >>= 8; 108 | } 109 | return 64; 110 | } 111 | 112 | } /* namespace datasketches */ 113 | 114 | #endif // _COUNT_ZEROS_HPP_ 115 | -------------------------------------------------------------------------------- /datasketches/common/memory_operations.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _MEMORY_OPERATIONS_HPP_ 21 | #define _MEMORY_OPERATIONS_HPP_ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace datasketches { 28 | 29 | static inline void ensure_minimum_memory(size_t bytes_available, size_t min_needed) { 30 | if (bytes_available < min_needed) { 31 | throw std::out_of_range("Insufficient buffer size detected: bytes available " 32 | + std::to_string(bytes_available) + ", minimum needed " + std::to_string(min_needed)); 33 | } 34 | } 35 | 36 | static inline void check_memory_size(size_t requested_index, size_t capacity) { 37 | if (requested_index > capacity) { 38 | throw std::out_of_range("Attempt to access memory beyond limits: requested index " 39 | + std::to_string(requested_index) + ", capacity " + std::to_string(capacity)); 40 | } 41 | } 42 | 43 | // note: size is in bytes, not items 44 | static inline size_t copy_from_mem(const void* src, void* dst, size_t size) { 45 | memcpy(dst, src, size); 46 | return size; 47 | } 48 | 49 | // note: size is in bytes, not items 50 | static inline size_t copy_to_mem(const void* src, void* dst, size_t size) { 51 | memcpy(dst, src, size); 52 | return size; 53 | } 54 | 55 | template 56 | static inline size_t copy_to_mem(const T& item, void* dst) { 57 | memcpy(dst, &item, sizeof(T)); 58 | return sizeof(T); 59 | } 60 | 61 | template 62 | static inline size_t copy_from_mem(const void* src, T& item) { 63 | memcpy(&item, src, sizeof(T)); 64 | return sizeof(T); 65 | } 66 | 67 | } // namespace 68 | 69 | #endif // _MEMORY_OPERATIONS_HPP_ 70 | -------------------------------------------------------------------------------- /datasketches/cpc/cpc_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CPC_COMMON_HPP_ 21 | #define CPC_COMMON_HPP_ 22 | 23 | #include 24 | 25 | #include "MurmurHash3.h" 26 | 27 | namespace datasketches { 28 | 29 | namespace cpc_constants { 30 | const uint8_t MIN_LG_K = 4; 31 | const uint8_t MAX_LG_K = 26; 32 | const uint8_t DEFAULT_LG_K = 11; 33 | } 34 | 35 | // TODO: Redundant and deprecated. Will be removed in next major version release. 36 | static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K; 37 | static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K; 38 | static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K; 39 | 40 | template using AllocU8 = typename std::allocator_traits::template rebind_alloc; 41 | template using AllocU16 = typename std::allocator_traits::template rebind_alloc; 42 | template using AllocU32 = typename std::allocator_traits::template rebind_alloc; 43 | template using AllocU64 = typename std::allocator_traits::template rebind_alloc; 44 | 45 | template using vector_u8 = std::vector>; 46 | template using vector_u32 = std::vector>; 47 | template using vector_u64 = std::vector>; 48 | 49 | // forward declaration 50 | template class u32_table; 51 | 52 | template 53 | struct compressed_state { 54 | explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0), 55 | window_data(allocator), window_data_words(0) {} 56 | vector_u32 table_data; 57 | uint32_t table_data_words; 58 | uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode 59 | vector_u32 window_data; 60 | uint32_t window_data_words; 61 | }; 62 | 63 | template 64 | struct uncompressed_state { 65 | explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {} 66 | u32_table table; 67 | vector_u8 window; 68 | }; 69 | 70 | } /* namespace datasketches */ 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /datasketches/cpc/cpc_union.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CPC_UNION_HPP_ 21 | #define CPC_UNION_HPP_ 22 | 23 | #include 24 | 25 | #include "cpc_sketch.hpp" 26 | #include "common_defs.hpp" 27 | 28 | namespace datasketches { 29 | 30 | /* 31 | * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Union 32 | * 33 | * author Kevin Lang 34 | * author Alexander Saydakov 35 | */ 36 | 37 | // alias with default allocator for convenience 38 | using cpc_union = cpc_union_alloc>; 39 | 40 | template 41 | class cpc_union_alloc { 42 | public: 43 | /** 44 | * Creates an instance of the union given the lg_k parameter and hash seed. 45 | * @param lg_k base 2 logarithm of the number of bins in the sketch 46 | * @param seed for hash function 47 | */ 48 | explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A()); 49 | 50 | cpc_union_alloc(const cpc_union_alloc& other); 51 | cpc_union_alloc(cpc_union_alloc&& other) noexcept; 52 | ~cpc_union_alloc(); 53 | 54 | cpc_union_alloc& operator=(const cpc_union_alloc& other); 55 | cpc_union_alloc& operator=(cpc_union_alloc&& other) noexcept; 56 | 57 | /** 58 | * This method is to update the union with a given sketch (lvalue) 59 | * @param sketch to update the union with 60 | */ 61 | void update(const cpc_sketch_alloc& sketch); 62 | 63 | /** 64 | * This method is to update the union with a given sketch (rvalue) 65 | * @param sketch to update the union with 66 | */ 67 | void update(cpc_sketch_alloc&& sketch); 68 | 69 | /** 70 | * This method produces a copy of the current state of the union as a sketch. 71 | * @return the result of the union 72 | */ 73 | cpc_sketch_alloc get_result() const; 74 | 75 | private: 76 | typedef typename std::allocator_traits::template rebind_alloc AllocU8; 77 | typedef typename std::allocator_traits::template rebind_alloc AllocU64; 78 | typedef typename std::allocator_traits::template rebind_alloc> AllocCpc; 79 | 80 | uint8_t lg_k; 81 | uint64_t seed; 82 | cpc_sketch_alloc* accumulator; 83 | vector_u64 bit_matrix; 84 | 85 | template void internal_update(S&& sketch); // to support both rvalue and lvalue 86 | 87 | cpc_sketch_alloc get_result_from_accumulator() const; 88 | cpc_sketch_alloc get_result_from_bit_matrix() const; 89 | 90 | void switch_to_bit_matrix(); 91 | void walk_table_updating_sketch(const u32_table& table); 92 | void or_table_into_matrix(const u32_table& table); 93 | void or_window_into_matrix(const vector_u8& sliding_window, uint8_t offset, uint8_t src_lg_k); 94 | void or_matrix_into_matrix(const vector_u64& src_matrix, uint8_t src_lg_k); 95 | void reduce_k(uint8_t new_lg_k); 96 | }; 97 | 98 | } /* namespace datasketches */ 99 | 100 | #include "cpc_union_impl.hpp" 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /datasketches/cpc/cpc_util.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CPC_UTIL_HPP_ 21 | #define CPC_UTIL_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) { 28 | if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument"); 29 | const uint64_t quotient = x / y; 30 | if (quotient * y == x) return (quotient); 31 | else return quotient + 1; 32 | } 33 | 34 | static inline uint8_t floor_log2_of_long(uint64_t x) { 35 | if (x < 1) throw std::invalid_argument("floor_log2_of_long: bad argument"); 36 | uint8_t p = 0; 37 | uint64_t y = 1; 38 | while (true) { 39 | if (y == x) return p; 40 | if (y > x) return p - 1; 41 | p += 1; 42 | y <<= 1; 43 | } 44 | } 45 | 46 | // This place-holder code was inadequate because it caused 47 | // the cost of the post-merge get_result() operation to be O(C) 48 | // instead of O(K). It did have the advantage of being 49 | // very simple and trustworthy during initial testing. 50 | static inline uint64_t wegner_count_bits_set_in_matrix(const uint64_t* array, size_t length) { 51 | uint64_t pattern = 0; 52 | uint64_t count = 0; 53 | // clock_t t0, t1; 54 | // t0 = clock(); 55 | // Wegner's Bit-Counting Algorithm, CACM 3 (1960), p. 322. 56 | for (uint64_t i = 0; i < length; i++) { 57 | pattern = array[i]; 58 | while (pattern != 0) { 59 | pattern &= (pattern - 1); 60 | count++; 61 | } 62 | } 63 | // t1 = clock(); 64 | // printf ("\n(Wegner CountBitsTime %.1f)\n", ((double) (t1 - t0)) / 1000.0); 65 | // fflush (stdout); 66 | return count; 67 | } 68 | 69 | // Note: this is an adaptation of the Java code, 70 | // which is apparently a variation of Figure 5-2 in "Hacker's Delight" 71 | // by Henry S. Warren. 72 | static inline uint32_t warren_bit_count(uint64_t i) { 73 | i = i - ((i >> 1) & 0x5555555555555555ULL); 74 | i = (i & 0x3333333333333333ULL) + ((i >> 2) & 0x3333333333333333ULL); 75 | i = (i + (i >> 4)) & 0x0f0f0f0f0f0f0f0fULL; 76 | i = i + (i >> 8); 77 | i = i + (i >> 16); 78 | i = i + (i >> 32); 79 | return i & 0x7f; 80 | } 81 | 82 | static inline uint32_t warren_count_bits_set_in_matrix(const uint64_t* array, uint32_t length) { 83 | uint32_t count = 0; 84 | for (uint32_t i = 0; i < length; i++) { 85 | count += warren_bit_count(array[i]); 86 | } 87 | return count; 88 | } 89 | 90 | // This code is Figure 5-9 in "Hacker's Delight" by Henry S. Warren. 91 | 92 | #define CSA(h,l,a,b,c) {uint64_t u = a ^ b; uint64_t v = c; h = (a & b) | (u & v); l = u ^ v;} 93 | 94 | static inline uint32_t count_bits_set_in_matrix(const uint64_t* a, uint32_t length) { 95 | if ((length & 0x7) != 0) throw std::invalid_argument("the length of the array must be a multiple of 8"); 96 | uint32_t total = 0; 97 | uint64_t ones, twos, twos_a, twos_b, fours, fours_a, fours_b, eights; 98 | fours = twos = ones = 0; 99 | 100 | for (uint32_t i = 0; i <= length - 8; i += 8) { 101 | CSA(twos_a, ones, ones, a[i+0], a[i+1]); 102 | CSA(twos_b, ones, ones, a[i+2], a[i+3]); 103 | CSA(fours_a, twos, twos, twos_a, twos_b); 104 | 105 | CSA(twos_a, ones, ones, a[i+4], a[i+5]); 106 | CSA(twos_b, ones, ones, a[i+6], a[i+7]); 107 | CSA(fours_b, twos, twos, twos_a, twos_b); 108 | 109 | CSA(eights, fours, fours, fours_a, fours_b); 110 | 111 | total += warren_bit_count(eights); 112 | } 113 | total = 8 * total + 4 * warren_bit_count(fours) + 2 * warren_bit_count(twos) + warren_bit_count(ones); 114 | 115 | // Because I still don't fully trust this fancy version 116 | // assert(total == wegner_count_bits_set_in_matrix(A, length)); 117 | //if (total != wegner_count_bits_set_in_matrix(a, length)) throw std::logic_error("count_bits_set_in_matrix error"); 118 | 119 | return total; 120 | } 121 | 122 | // Here are some timings made with quickTestMerge.c 123 | // for the "5 5" case: 124 | 125 | // Wegner CountBitsTime 29.3 126 | // Warren CountBitsTime 5.3 127 | // CSA CountBitsTime 4.3 128 | 129 | } /* namespace datasketches */ 130 | 131 | #endif 132 | -------------------------------------------------------------------------------- /datasketches/cpc/kxp_byte_lookup.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef KXP_BYTE_LOOKUP_HPP_ 21 | #define KXP_BYTE_LOOKUP_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | // the table was created by the following procedure: 26 | 27 | //void fill_kxp_byte_table() { 28 | // for (int byte = 0; byte < 256; byte++) { 29 | // double sum = 0.0; 30 | // for (int col = 0; col < 8; col++) { 31 | // const uint8_t bit = (byte >> col) & 1; 32 | // if (bit == 0) { // note the inverted logic 33 | // sum += INVERSE_POWERS_OF_2[col + 1]; // note the "+1" 34 | // } 35 | // } 36 | // kxp_byte_table[byte] = sum; 37 | // 38 | // printf("%.17g", kxp_byte_table[byte]); 39 | // if (byte != 255) printf(", "); 40 | // if ((byte + 1) % 8 == 0) printf("\n"); 41 | // } 42 | //} 43 | 44 | static const double KXP_BYTE_TABLE[256] = { 45 | 0.99609375, 0.49609375, 0.74609375, 0.24609375, 0.87109375, 0.37109375, 0.62109375, 0.12109375, 46 | 0.93359375, 0.43359375, 0.68359375, 0.18359375, 0.80859375, 0.30859375, 0.55859375, 0.05859375, 47 | 0.96484375, 0.46484375, 0.71484375, 0.21484375, 0.83984375, 0.33984375, 0.58984375, 0.08984375, 48 | 0.90234375, 0.40234375, 0.65234375, 0.15234375, 0.77734375, 0.27734375, 0.52734375, 0.02734375, 49 | 0.98046875, 0.48046875, 0.73046875, 0.23046875, 0.85546875, 0.35546875, 0.60546875, 0.10546875, 50 | 0.91796875, 0.41796875, 0.66796875, 0.16796875, 0.79296875, 0.29296875, 0.54296875, 0.04296875, 51 | 0.94921875, 0.44921875, 0.69921875, 0.19921875, 0.82421875, 0.32421875, 0.57421875, 0.07421875, 52 | 0.88671875, 0.38671875, 0.63671875, 0.13671875, 0.76171875, 0.26171875, 0.51171875, 0.01171875, 53 | 0.98828125, 0.48828125, 0.73828125, 0.23828125, 0.86328125, 0.36328125, 0.61328125, 0.11328125, 54 | 0.92578125, 0.42578125, 0.67578125, 0.17578125, 0.80078125, 0.30078125, 0.55078125, 0.05078125, 55 | 0.95703125, 0.45703125, 0.70703125, 0.20703125, 0.83203125, 0.33203125, 0.58203125, 0.08203125, 56 | 0.89453125, 0.39453125, 0.64453125, 0.14453125, 0.76953125, 0.26953125, 0.51953125, 0.01953125, 57 | 0.97265625, 0.47265625, 0.72265625, 0.22265625, 0.84765625, 0.34765625, 0.59765625, 0.09765625, 58 | 0.91015625, 0.41015625, 0.66015625, 0.16015625, 0.78515625, 0.28515625, 0.53515625, 0.03515625, 59 | 0.94140625, 0.44140625, 0.69140625, 0.19140625, 0.81640625, 0.31640625, 0.56640625, 0.06640625, 60 | 0.87890625, 0.37890625, 0.62890625, 0.12890625, 0.75390625, 0.25390625, 0.50390625, 0.00390625, 61 | 0.9921875, 0.4921875, 0.7421875, 0.2421875, 0.8671875, 0.3671875, 0.6171875, 0.1171875, 62 | 0.9296875, 0.4296875, 0.6796875, 0.1796875, 0.8046875, 0.3046875, 0.5546875, 0.0546875, 63 | 0.9609375, 0.4609375, 0.7109375, 0.2109375, 0.8359375, 0.3359375, 0.5859375, 0.0859375, 64 | 0.8984375, 0.3984375, 0.6484375, 0.1484375, 0.7734375, 0.2734375, 0.5234375, 0.0234375, 65 | 0.9765625, 0.4765625, 0.7265625, 0.2265625, 0.8515625, 0.3515625, 0.6015625, 0.1015625, 66 | 0.9140625, 0.4140625, 0.6640625, 0.1640625, 0.7890625, 0.2890625, 0.5390625, 0.0390625, 67 | 0.9453125, 0.4453125, 0.6953125, 0.1953125, 0.8203125, 0.3203125, 0.5703125, 0.0703125, 68 | 0.8828125, 0.3828125, 0.6328125, 0.1328125, 0.7578125, 0.2578125, 0.5078125, 0.0078125, 69 | 0.984375, 0.484375, 0.734375, 0.234375, 0.859375, 0.359375, 0.609375, 0.109375, 70 | 0.921875, 0.421875, 0.671875, 0.171875, 0.796875, 0.296875, 0.546875, 0.046875, 71 | 0.953125, 0.453125, 0.703125, 0.203125, 0.828125, 0.328125, 0.578125, 0.078125, 72 | 0.890625, 0.390625, 0.640625, 0.140625, 0.765625, 0.265625, 0.515625, 0.015625, 73 | 0.96875, 0.46875, 0.71875, 0.21875, 0.84375, 0.34375, 0.59375, 0.09375, 74 | 0.90625, 0.40625, 0.65625, 0.15625, 0.78125, 0.28125, 0.53125, 0.03125, 75 | 0.9375, 0.4375, 0.6875, 0.1875, 0.8125, 0.3125, 0.5625, 0.0625, 76 | 0.875, 0.375, 0.625, 0.125, 0.75, 0.25, 0.5, 0 77 | }; 78 | 79 | } /* namespace datasketches */ 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /datasketches/cpc/u32_table.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | // author Kevin Lang, Oath Research 21 | 22 | #ifndef U32_TABLE_HPP_ 23 | #define U32_TABLE_HPP_ 24 | 25 | // This is a highly specialized hash table that was designed 26 | // to be a part of the library's CPC sketch implementation 27 | 28 | #include "cpc_common.hpp" 29 | 30 | namespace datasketches { 31 | 32 | static const uint32_t U32_TABLE_UPSIZE_NUMER = 3LL; 33 | static const uint32_t U32_TABLE_UPSIZE_DENOM = 4LL; 34 | 35 | static const uint32_t U32_TABLE_DOWNSIZE_NUMER = 1LL; 36 | static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL; 37 | 38 | template 39 | class u32_table { 40 | public: 41 | 42 | u32_table(const A& allocator); 43 | u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator); 44 | 45 | inline uint32_t get_num_items() const; 46 | inline const uint32_t* get_slots() const; 47 | inline uint8_t get_lg_size() const; 48 | inline void clear(); 49 | 50 | // returns true iff the item was new and was therefore added to the table 51 | inline bool maybe_insert(uint32_t item); 52 | // returns true iff the item was present and was therefore removed from the table 53 | inline bool maybe_delete(uint32_t item); 54 | 55 | static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator); 56 | 57 | vector_u32 unwrapping_get_items() const; 58 | 59 | static void merge( 60 | const uint32_t* arr_a, size_t start_a, size_t length_a, // input 61 | const uint32_t* arr_b, size_t start_b, size_t length_b, // input 62 | uint32_t* arr_c, size_t start_c // output 63 | ); 64 | 65 | static void introspective_insertion_sort(uint32_t* a, size_t l, size_t r); 66 | static void knuth_shell_sort3(uint32_t* a, size_t l, size_t r); 67 | 68 | private: 69 | 70 | uint8_t lg_size; // log2 of number of slots 71 | uint8_t num_valid_bits; 72 | uint32_t num_items; 73 | vector_u32 slots; 74 | 75 | inline uint32_t lookup(uint32_t item) const; 76 | inline void must_insert(uint32_t item); 77 | inline void rebuild(uint8_t new_lg_size); 78 | }; 79 | 80 | } /* namespace datasketches */ 81 | 82 | #include "u32_table_impl.hpp" 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /datasketches/hll/AuxHashMap.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _AUXHASHMAP_HPP_ 21 | #define _AUXHASHMAP_HPP_ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "coupon_iterator.hpp" 28 | 29 | namespace datasketches { 30 | 31 | template 32 | class AuxHashMap final { 33 | public: 34 | AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator); 35 | static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator); 36 | static AuxHashMap* newAuxHashMap(const AuxHashMap& that); 37 | 38 | static AuxHashMap* deserialize(const void* bytes, size_t len, 39 | uint8_t lgConfigK, 40 | uint32_t auxCount, uint8_t lgAuxArrInts, 41 | bool srcCompact, const A& allocator); 42 | static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK, 43 | uint32_t auxCount, uint8_t lgAuxArrInts, 44 | bool srcCompact, const A& allocator); 45 | virtual ~AuxHashMap() = default; 46 | static std::function*)> make_deleter(); 47 | 48 | AuxHashMap* copy() const; 49 | uint32_t getUpdatableSizeBytes() const; 50 | uint32_t getCompactSizeBytes() const; 51 | 52 | uint32_t getAuxCount() const; 53 | uint32_t* getAuxIntArr(); 54 | uint8_t getLgAuxArrInts() const; 55 | 56 | coupon_iterator begin(bool all = false) const; 57 | coupon_iterator end() const; 58 | 59 | void mustAdd(uint32_t slotNo, uint8_t value); 60 | uint8_t mustFindValueFor(uint32_t slotNo) const; 61 | void mustReplace(uint32_t slotNo, uint8_t value); 62 | 63 | private: 64 | typedef typename std::allocator_traits::template rebind_alloc> ahmAlloc; 65 | 66 | using vector_int = std::vector::template rebind_alloc>; 67 | 68 | // static so it can be used when resizing 69 | static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo); 70 | 71 | void checkGrow(); 72 | void growAuxSpace(); 73 | 74 | const uint8_t lgConfigK; 75 | uint8_t lgAuxArrInts; 76 | uint32_t auxCount; 77 | vector_int entries; 78 | }; 79 | 80 | } 81 | 82 | #include "AuxHashMap-internal.hpp" 83 | 84 | #endif /* _AUXHASHMAP_HPP_ */ 85 | -------------------------------------------------------------------------------- /datasketches/hll/CompositeInterpolationXTable.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_ 21 | #define _COMPOSITEINTERPOLATIONXTABLE_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template> 28 | class CompositeInterpolationXTable { 29 | public: 30 | static uint32_t get_y_stride(uint8_t logK); 31 | 32 | static const double* get_x_arr(uint8_t logK); 33 | static uint32_t get_x_arr_length(); 34 | }; 35 | 36 | } 37 | 38 | #include "CompositeInterpolationXTable-internal.hpp" 39 | 40 | #endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */ 41 | -------------------------------------------------------------------------------- /datasketches/hll/CouponHashSet.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COUPONHASHSET_HPP_ 21 | #define _COUPONHASHSET_HPP_ 22 | 23 | #include "CouponList.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class CouponHashSet : public CouponList { 29 | public: 30 | static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator); 31 | static CouponHashSet* newSet(std::istream& is, const A& allocator); 32 | CouponHashSet(uint8_t lgConfigK, target_hll_type tgtHllType, const A& allocator); 33 | CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType); 34 | 35 | virtual ~CouponHashSet() = default; 36 | virtual std::function*)> get_deleter() const; 37 | 38 | protected: 39 | using vector_int = std::vector::template rebind_alloc>; 40 | 41 | virtual CouponHashSet* copy() const; 42 | virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const; 43 | 44 | virtual HllSketchImpl* couponUpdate(uint32_t coupon); 45 | 46 | virtual uint32_t getMemDataStart() const; 47 | virtual uint8_t getPreInts() const; 48 | 49 | friend class HllSketchImplFactory; 50 | 51 | private: 52 | using ChsAlloc = typename std::allocator_traits::template rebind_alloc>; 53 | bool checkGrowOrPromote(); 54 | void growHashSet(uint8_t tgtLgCoupArrSize); 55 | }; 56 | 57 | } 58 | 59 | #endif /* _COUPONHASHSET_HPP_ */ 60 | -------------------------------------------------------------------------------- /datasketches/hll/CouponList.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COUPONLIST_HPP_ 21 | #define _COUPONLIST_HPP_ 22 | 23 | #include "HllSketchImpl.hpp" 24 | #include "coupon_iterator.hpp" 25 | 26 | #include 27 | 28 | namespace datasketches { 29 | 30 | template 31 | class HllSketchImplFactory; 32 | 33 | template 34 | class CouponList : public HllSketchImpl { 35 | public: 36 | CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator); 37 | CouponList(const CouponList& that, target_hll_type tgtHllType); 38 | 39 | static CouponList* newList(const void* bytes, size_t len, const A& allocator); 40 | static CouponList* newList(std::istream& is, const A& allocator); 41 | virtual vector_u8 serialize(bool compact, unsigned header_size_bytes) const; 42 | virtual void serialize(std::ostream& os, bool compact) const; 43 | 44 | virtual ~CouponList() = default; 45 | virtual std::function*)> get_deleter() const; 46 | 47 | virtual CouponList* copy() const; 48 | virtual CouponList* copyAs(target_hll_type tgtHllType) const; 49 | 50 | virtual HllSketchImpl* couponUpdate(uint32_t coupon); 51 | 52 | virtual double getEstimate() const; 53 | virtual double getCompositeEstimate() const; 54 | virtual double getUpperBound(uint8_t numStdDev) const; 55 | virtual double getLowerBound(uint8_t numStdDev) const; 56 | 57 | virtual bool isEmpty() const; 58 | virtual uint32_t getCouponCount() const; 59 | 60 | coupon_iterator begin(bool all = false) const; 61 | coupon_iterator end() const; 62 | 63 | protected: 64 | using ClAlloc = typename std::allocator_traits::template rebind_alloc>; 65 | 66 | using vector_int = std::vector::template rebind_alloc>; 67 | 68 | HllSketchImpl* promoteHeapListToSet(CouponList& list); 69 | HllSketchImpl* promoteHeapListOrSetToHll(CouponList& src); 70 | 71 | virtual uint32_t getUpdatableSerializationBytes() const; 72 | virtual uint32_t getCompactSerializationBytes() const; 73 | virtual uint32_t getMemDataStart() const; 74 | virtual uint8_t getPreInts() const; 75 | virtual bool isCompact() const; 76 | virtual bool isOutOfOrderFlag() const; 77 | virtual void putOutOfOrderFlag(bool oooFlag); 78 | 79 | virtual A getAllocator() const; 80 | 81 | uint32_t couponCount_; 82 | bool oooFlag_; 83 | vector_int coupons_; 84 | 85 | friend class HllSketchImplFactory; 86 | }; 87 | 88 | } 89 | 90 | #endif /* _COUPONLIST_HPP_ */ 91 | -------------------------------------------------------------------------------- /datasketches/hll/CubicInterpolation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _CUBICINTERPOLATION_HPP_ 21 | #define _CUBICINTERPOLATION_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template> 28 | class CubicInterpolation { 29 | public: 30 | static double usingXAndYTables(const double xArr[], const double yArr[], 31 | int len, double x); 32 | 33 | static double usingXAndYTables(double x); 34 | 35 | static double usingXArrAndYStride(const double xArr[], const int xArrLen, 36 | double yStride, double x); 37 | }; 38 | 39 | } 40 | 41 | #include "CubicInterpolation-internal.hpp" 42 | 43 | #endif /* _CUBICINTERPOLATION_HPP_ */ 44 | -------------------------------------------------------------------------------- /datasketches/hll/HarmonicNumbers-internal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HARMONICNUMBERS_INTERNAL_HPP_ 21 | #define _HARMONICNUMBERS_INTERNAL_HPP_ 22 | 23 | #include "HarmonicNumbers.hpp" 24 | 25 | #include 26 | 27 | namespace datasketches { 28 | 29 | template 30 | double HarmonicNumbers::getBitMapEstimate(const int bitVectorLength, const int numBitsSet) { 31 | return (bitVectorLength * (harmonicNumber(bitVectorLength) - harmonicNumber(bitVectorLength - numBitsSet))); 32 | } 33 | 34 | static const int NUM_EXACT_HARMONIC_NUMBERS = 25; 35 | 36 | static double tableOfExactHarmonicNumbers[] = { 37 | 0.0, // 0 38 | 1.0, // 1 39 | 1.5, // 2 40 | 11.0 / 6.0, // 3 41 | 25.0 / 12.0, // 4 42 | 137.0 / 60.0, // 5 43 | 49.0 / 20.0, // 6 44 | 363.0 / 140.0, // 7 45 | 761.0 / 280.0, // 8 46 | 7129.0 / 2520.0, // 9 47 | 7381.0 / 2520.0, // 10 48 | 83711.0 / 27720.0, // 11 49 | 86021.0 / 27720.0, // 12 50 | 1145993.0 / 360360.0, // 13 51 | 1171733.0 / 360360.0, // 14 52 | 1195757.0 / 360360.0, // 15 53 | 2436559.0 / 720720.0, // 16 54 | 42142223.0 / 12252240.0, // 17 55 | 14274301.0 / 4084080.0, // 18 56 | 275295799.0 / 77597520.0, // 19 57 | 55835135.0 / 15519504.0, // 20 58 | 18858053.0 / 5173168.0, // 21 59 | 19093197.0 / 5173168.0, // 22 60 | 444316699.0 / 118982864.0, // 23 61 | 1347822955.0 / 356948592.0 // 24 62 | }; 63 | 64 | static const double EULER_MASCHERONI_CONSTANT = 0.577215664901532860606512090082; 65 | 66 | template 67 | double HarmonicNumbers::harmonicNumber(const uint64_t x_i) { 68 | if (x_i < NUM_EXACT_HARMONIC_NUMBERS) { 69 | return tableOfExactHarmonicNumbers[x_i]; 70 | } else { 71 | double x = static_cast(x_i); 72 | double invSq = 1.0 / (x * x); 73 | double sum = log(x) + EULER_MASCHERONI_CONSTANT + (1.0 / (2.0 * x)); 74 | /* note: the number of terms included from this series expansion is appropriate 75 | for the size of the exact table (25) and the precision of doubles */ 76 | double pow = invSq; // now n^-2 77 | sum -= pow * (1.0 / 12.0); 78 | pow *= invSq; // now n^-4 79 | sum += pow * (1.0 / 120.0); 80 | pow *= invSq; /* now n^-6 */ 81 | sum -= pow * (1.0 / 252.0); 82 | pow *= invSq; /* now n^-8 */ 83 | sum += pow * (1.0 / 240.0); 84 | return sum; 85 | } 86 | } 87 | 88 | } 89 | 90 | #endif // _HARMONICNUMBERS_INTERNAL_HPP_ 91 | -------------------------------------------------------------------------------- /datasketches/hll/HarmonicNumbers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HARMONICNUMBERS_HPP_ 21 | #define _HARMONICNUMBERS_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | namespace datasketches { 27 | 28 | template> 29 | class HarmonicNumbers { 30 | public: 31 | /** 32 | * This is the estimator you would use for flat bit map random accessed, similar to a Bloom filter. 33 | * @param bitVectorLength the length of the bit vector in bits. Must be > 0. 34 | * @param numBitsSet the number of bits set in this bit vector. Must be ≥ 0 and ≤ 35 | * bitVectorLength. 36 | * @return the estimate. 37 | */ 38 | static double getBitMapEstimate(int bitVectorLength, int numBitsSet); 39 | 40 | private: 41 | static double harmonicNumber(uint64_t x_i); 42 | }; 43 | 44 | } 45 | 46 | #include "HarmonicNumbers-internal.hpp" 47 | 48 | #endif /* _HARMONICNUMBERS_HPP_ */ 49 | -------------------------------------------------------------------------------- /datasketches/hll/Hll4Array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL4ARRAY_HPP_ 21 | #define _HLL4ARRAY_HPP_ 22 | 23 | #include "AuxHashMap.hpp" 24 | #include "HllArray.hpp" 25 | 26 | namespace datasketches { 27 | 28 | template 29 | class Hll4Iterator; 30 | 31 | template 32 | class Hll4Array final : public HllArray { 33 | public: 34 | explicit Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator); 35 | explicit Hll4Array(const Hll4Array& that); 36 | 37 | virtual ~Hll4Array(); 38 | virtual std::function*)> get_deleter() const; 39 | 40 | virtual Hll4Array* copy() const; 41 | 42 | inline uint8_t getSlot(uint32_t slotNo) const; 43 | inline void putSlot(uint32_t slotNo, uint8_t value); 44 | inline uint8_t get_value(uint32_t index) const; 45 | 46 | virtual uint32_t getUpdatableSerializationBytes() const; 47 | virtual uint32_t getHllByteArrBytes() const; 48 | 49 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) final; 50 | void mergeHll(const HllArray& src); 51 | 52 | virtual AuxHashMap* getAuxHashMap() const; 53 | // does *not* delete old map if overwriting 54 | void putAuxHashMap(AuxHashMap* auxHashMap); 55 | 56 | virtual typename HllArray::const_iterator begin(bool all = false) const; 57 | virtual typename HllArray::const_iterator end() const; 58 | 59 | private: 60 | void internalCouponUpdate(uint32_t coupon); 61 | void internalHll4Update(uint32_t slotNo, uint8_t newVal); 62 | void shiftToBiggerCurMin(); 63 | 64 | AuxHashMap* auxHashMap_; 65 | }; 66 | 67 | } 68 | 69 | #endif /* _HLL4ARRAY_HPP_ */ 70 | -------------------------------------------------------------------------------- /datasketches/hll/Hll6Array-internal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL6ARRAY_INTERNAL_HPP_ 21 | #define _HLL6ARRAY_INTERNAL_HPP_ 22 | 23 | #include 24 | 25 | #include "Hll6Array.hpp" 26 | 27 | namespace datasketches { 28 | 29 | template 30 | Hll6Array::Hll6Array(uint8_t lgConfigK, bool startFullSize, const A& allocator): 31 | HllArray(lgConfigK, target_hll_type::HLL_6, startFullSize, allocator) 32 | { 33 | const int numBytes = this->hll6ArrBytes(lgConfigK); 34 | this->hllByteArr_.resize(numBytes, 0); 35 | } 36 | 37 | template 38 | std::function*)> Hll6Array::get_deleter() const { 39 | return [](HllSketchImpl* ptr) { 40 | using Hll6Alloc = typename std::allocator_traits::template rebind_alloc>; 41 | Hll6Array* hll = static_cast*>(ptr); 42 | Hll6Alloc hll6Alloc(hll->getAllocator()); 43 | hll->~Hll6Array(); 44 | hll6Alloc.deallocate(hll, 1); 45 | }; 46 | } 47 | 48 | template 49 | Hll6Array* Hll6Array::copy() const { 50 | using Hll6Alloc = typename std::allocator_traits::template rebind_alloc>; 51 | Hll6Alloc hll6Alloc(this->getAllocator()); 52 | return new (hll6Alloc.allocate(1)) Hll6Array(*this); 53 | } 54 | 55 | template 56 | uint8_t Hll6Array::getSlot(uint32_t slotNo) const { 57 | const uint32_t startBit = slotNo * 6; 58 | const uint32_t shift = startBit & 0x7; 59 | const uint32_t byteIdx = startBit >> 3; 60 | const uint16_t twoByteVal = (this->hllByteArr_[byteIdx + 1] << 8) | this->hllByteArr_[byteIdx]; 61 | return (twoByteVal >> shift) & hll_constants::VAL_MASK_6; 62 | } 63 | 64 | template 65 | void Hll6Array::putSlot(uint32_t slotNo, uint8_t value) { 66 | const uint32_t startBit = slotNo * 6; 67 | const uint32_t shift = startBit & 0x7; 68 | const uint32_t byteIdx = startBit >> 3; 69 | const uint16_t valShifted = (value & 0x3F) << shift; 70 | uint16_t curMasked = (this->hllByteArr_[byteIdx + 1] << 8) | this->hllByteArr_[byteIdx]; 71 | curMasked &= (~(hll_constants::VAL_MASK_6 << shift)); 72 | const uint16_t insert = curMasked | valShifted; 73 | this->hllByteArr_[byteIdx] = insert & 0xFF; 74 | this->hllByteArr_[byteIdx + 1] = (insert & 0xFF00) >> 8; 75 | } 76 | 77 | template 78 | uint32_t Hll6Array::getHllByteArrBytes() const { 79 | return this->hll6ArrBytes(this->lgConfigK_); 80 | } 81 | 82 | template 83 | HllSketchImpl* Hll6Array::couponUpdate(uint32_t coupon) { 84 | internalCouponUpdate(coupon); 85 | return this; 86 | } 87 | 88 | template 89 | void Hll6Array::internalCouponUpdate(uint32_t coupon) { 90 | const uint32_t configKmask = (1 << this->lgConfigK_) - 1; 91 | const uint32_t slotNo = HllUtil::getLow26(coupon) & configKmask; 92 | const uint8_t newVal = HllUtil::getValue(coupon); 93 | 94 | const uint8_t curVal = getSlot(slotNo); 95 | if (newVal > curVal) { 96 | putSlot(slotNo, newVal); 97 | this->hipAndKxQIncrementalUpdate(curVal, newVal); 98 | if (curVal == 0) { 99 | this->numAtCurMin_--; // interpret numAtCurMin as num zeros 100 | } 101 | } 102 | } 103 | 104 | template 105 | void Hll6Array::mergeHll(const HllArray& src) { 106 | for (const auto coupon: src) { 107 | internalCouponUpdate(coupon); 108 | } 109 | } 110 | 111 | } 112 | 113 | #endif // _HLL6ARRAY_INTERNAL_HPP_ 114 | -------------------------------------------------------------------------------- /datasketches/hll/Hll6Array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL6ARRAY_HPP_ 21 | #define _HLL6ARRAY_HPP_ 22 | 23 | #include "HllArray.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class Hll6Iterator; 29 | 30 | template 31 | class Hll6Array final : public HllArray { 32 | public: 33 | Hll6Array(uint8_t lgConfigK, bool startFullSize, const A& allocator); 34 | 35 | virtual ~Hll6Array() = default; 36 | virtual std::function*)> get_deleter() const; 37 | 38 | virtual Hll6Array* copy() const; 39 | 40 | inline uint8_t getSlot(uint32_t slotNo) const; 41 | inline void putSlot(uint32_t slotNo, uint8_t value); 42 | 43 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) final; 44 | void mergeHll(const HllArray& src); 45 | 46 | virtual uint32_t getHllByteArrBytes() const; 47 | 48 | private: 49 | void internalCouponUpdate(uint32_t coupon); 50 | }; 51 | 52 | } 53 | 54 | #endif /* _HLL6ARRAY_HPP_ */ 55 | -------------------------------------------------------------------------------- /datasketches/hll/Hll8Array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL8ARRAY_HPP_ 21 | #define _HLL8ARRAY_HPP_ 22 | 23 | #include "HllArray.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class Hll8Iterator; 29 | 30 | template 31 | class Hll8Array final : public HllArray { 32 | public: 33 | Hll8Array(uint8_t lgConfigK, bool startFullSize, const A& allocator); 34 | 35 | virtual ~Hll8Array() = default; 36 | virtual std::function*)> get_deleter() const; 37 | 38 | virtual Hll8Array* copy() const; 39 | 40 | inline uint8_t getSlot(uint32_t slotNo) const; 41 | inline void putSlot(uint32_t slotNo, uint8_t value); 42 | 43 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) final; 44 | void mergeList(const CouponList& src); 45 | void mergeHll(const HllArray& src); 46 | 47 | virtual uint32_t getHllByteArrBytes() const; 48 | 49 | private: 50 | inline void internalCouponUpdate(uint32_t coupon); 51 | }; 52 | 53 | } 54 | 55 | #endif /* _HLL8ARRAY_HPP_ */ 56 | -------------------------------------------------------------------------------- /datasketches/hll/HllArray.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLLARRAY_HPP_ 21 | #define _HLLARRAY_HPP_ 22 | 23 | #include "HllSketchImpl.hpp" 24 | #include "HllUtil.hpp" 25 | 26 | namespace datasketches { 27 | 28 | template 29 | class AuxHashMap; 30 | 31 | template 32 | class HllArray : public HllSketchImpl { 33 | public: 34 | HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator); 35 | 36 | static HllArray* newHll(const void* bytes, size_t len, const A& allocator); 37 | static HllArray* newHll(std::istream& is, const A& allocator); 38 | 39 | virtual vector_u8 serialize(bool compact, unsigned header_size_bytes) const; 40 | virtual void serialize(std::ostream& os, bool compact) const; 41 | 42 | virtual ~HllArray() = default; 43 | virtual std::function*)> get_deleter() const = 0; 44 | 45 | virtual HllArray* copy() const = 0; 46 | virtual HllArray* copyAs(target_hll_type tgtHllType) const; 47 | 48 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) = 0; 49 | 50 | virtual double getEstimate() const; 51 | virtual double getCompositeEstimate() const; 52 | virtual double getLowerBound(uint8_t numStdDev) const; 53 | virtual double getUpperBound(uint8_t numStdDev) const; 54 | 55 | inline void addToHipAccum(double delta); 56 | 57 | inline void decNumAtCurMin(); 58 | 59 | inline uint8_t getCurMin() const; 60 | inline uint32_t getNumAtCurMin() const; 61 | inline double getHipAccum() const; 62 | 63 | virtual uint32_t getHllByteArrBytes() const = 0; 64 | 65 | virtual uint32_t getUpdatableSerializationBytes() const; 66 | virtual uint32_t getCompactSerializationBytes() const; 67 | 68 | virtual bool isOutOfOrderFlag() const; 69 | virtual bool isEmpty() const; 70 | virtual bool isCompact() const; 71 | 72 | virtual void putOutOfOrderFlag(bool flag); 73 | 74 | inline double getKxQ0() const; 75 | inline double getKxQ1() const; 76 | 77 | virtual uint32_t getMemDataStart() const; 78 | virtual uint8_t getPreInts() const; 79 | 80 | void putCurMin(uint8_t curMin); 81 | void putHipAccum(double hipAccum); 82 | inline void putKxQ0(double kxq0); 83 | inline void putKxQ1(double kxq1); 84 | void putNumAtCurMin(uint32_t numAtCurMin); 85 | 86 | static uint32_t hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK); 87 | static uint32_t hll4ArrBytes(uint8_t lgConfigK); 88 | static uint32_t hll6ArrBytes(uint8_t lgConfigK); 89 | static uint32_t hll8ArrBytes(uint8_t lgConfigK); 90 | 91 | virtual AuxHashMap* getAuxHashMap() const; 92 | 93 | class const_iterator; 94 | virtual const_iterator begin(bool all = false) const; 95 | virtual const_iterator end() const; 96 | 97 | virtual A getAllocator() const; 98 | 99 | protected: 100 | void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue); 101 | double getHllBitMapEstimate() const; 102 | double getHllRawEstimate() const; 103 | 104 | double hipAccum_; 105 | double kxq0_; 106 | double kxq1_; 107 | vector_u8 hllByteArr_; //init by sub-classes 108 | uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array 109 | uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0 110 | bool oooFlag_; //Out-Of-Order Flag 111 | 112 | friend class HllSketchImplFactory; 113 | }; 114 | 115 | template 116 | class HllArray::const_iterator: public std::iterator { 117 | public: 118 | const_iterator(const uint8_t* array, uint32_t array_slze, uint32_t index, target_hll_type hll_type, const AuxHashMap* exceptions, uint8_t offset, bool all); 119 | const_iterator& operator++(); 120 | bool operator!=(const const_iterator& other) const; 121 | uint32_t operator*() const; 122 | private: 123 | const uint8_t* array_; 124 | uint32_t array_size_; 125 | uint32_t index_; 126 | target_hll_type hll_type_; 127 | const AuxHashMap* exceptions_; 128 | uint8_t offset_; 129 | bool all_; 130 | uint8_t value_; // cached value to avoid computing in operator++ and in operator*() 131 | static inline uint8_t get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap* exceptions, uint8_t offset); 132 | }; 133 | 134 | } 135 | 136 | #endif /* _HLLARRAY_HPP_ */ 137 | -------------------------------------------------------------------------------- /datasketches/hll/HllSketchImpl-internal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLLSKETCHIMPL_INTERNAL_HPP_ 21 | #define _HLLSKETCHIMPL_INTERNAL_HPP_ 22 | 23 | #include "HllSketchImpl.hpp" 24 | #include "HllSketchImplFactory.hpp" 25 | 26 | namespace datasketches { 27 | 28 | template 29 | HllSketchImpl::HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, 30 | hll_mode mode, bool startFullSize) 31 | : lgConfigK_(lgConfigK), 32 | tgtHllType_(tgtHllType), 33 | mode_(mode), 34 | startFullSize_(startFullSize) 35 | { 36 | } 37 | 38 | template 39 | HllSketchImpl::~HllSketchImpl() { 40 | } 41 | 42 | template 43 | target_hll_type HllSketchImpl::extractTgtHllType(uint8_t modeByte) { 44 | switch ((modeByte >> 2) & 0x3) { 45 | case 0: 46 | return target_hll_type::HLL_4; 47 | case 1: 48 | return target_hll_type::HLL_6; 49 | case 2: 50 | return target_hll_type::HLL_8; 51 | default: 52 | throw std::invalid_argument("Invalid target HLL type"); 53 | } 54 | } 55 | 56 | template 57 | hll_mode HllSketchImpl::extractCurMode(uint8_t modeByte) { 58 | switch (modeByte & 0x3) { 59 | case 0: 60 | return hll_mode::LIST; 61 | case 1: 62 | return hll_mode::SET; 63 | case 2: 64 | return hll_mode::HLL; 65 | default: 66 | throw std::invalid_argument("Invalid current sketch mode"); 67 | } 68 | } 69 | 70 | template 71 | uint8_t HllSketchImpl::makeFlagsByte(bool compact) const { 72 | uint8_t flags = 0; 73 | flags |= (isEmpty() ? hll_constants::EMPTY_FLAG_MASK : 0); 74 | flags |= (compact ? hll_constants::COMPACT_FLAG_MASK : 0); 75 | flags |= (isOutOfOrderFlag() ? hll_constants::OUT_OF_ORDER_FLAG_MASK : 0); 76 | flags |= (startFullSize_ ? hll_constants::FULL_SIZE_FLAG_MASK : 0); 77 | return flags; 78 | } 79 | 80 | // lo2bits = curMode, next 2 bits = tgtHllType 81 | // Dec Lo4Bits TgtHllType, CurMode 82 | // 0 0000 HLL_4, LIST 83 | // 1 0001 HLL_4, SET 84 | // 2 0010 HLL_4, HLL 85 | // 4 0100 HLL_6, LIST 86 | // 5 0101 HLL_6, SET 87 | // 6 0110 HLL_6, HLL 88 | // 8 1000 HLL_8, LIST 89 | // 9 1001 HLL_8, SET 90 | // 10 1010 HLL_8, HLL 91 | template 92 | uint8_t HllSketchImpl::makeModeByte() const { 93 | uint8_t byte = 0; 94 | 95 | switch (mode_) { 96 | case LIST: 97 | byte = 0; 98 | break; 99 | case SET: 100 | byte = 1; 101 | break; 102 | case HLL: 103 | byte = 2; 104 | break; 105 | } 106 | 107 | switch (tgtHllType_) { 108 | case HLL_4: 109 | byte |= (0 << 2); // for completeness 110 | break; 111 | case HLL_6: 112 | byte |= (1 << 2); 113 | break; 114 | case HLL_8: 115 | byte |= (2 << 2); 116 | break; 117 | } 118 | 119 | return byte; 120 | } 121 | 122 | template 123 | HllSketchImpl* HllSketchImpl::reset() { 124 | return HllSketchImplFactory::reset(this, startFullSize_); 125 | } 126 | 127 | template 128 | target_hll_type HllSketchImpl::getTgtHllType() const { 129 | return tgtHllType_; 130 | } 131 | 132 | template 133 | uint8_t HllSketchImpl::getLgConfigK() const { 134 | return lgConfigK_; 135 | } 136 | 137 | template 138 | hll_mode HllSketchImpl::getCurMode() const { 139 | return mode_; 140 | } 141 | 142 | template 143 | bool HllSketchImpl::isStartFullSize() const { 144 | return startFullSize_; 145 | } 146 | 147 | } 148 | 149 | #endif // _HLLSKETCHIMPL_INTERNAL_HPP_ 150 | -------------------------------------------------------------------------------- /datasketches/hll/HllSketchImpl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLLSKETCHIMPL_HPP_ 21 | #define _HLLSKETCHIMPL_HPP_ 22 | 23 | #include "HllUtil.hpp" 24 | #include "hll.hpp" // for TgtHllType 25 | 26 | #include 27 | 28 | namespace datasketches { 29 | 30 | template 31 | class HllSketchImpl { 32 | public: 33 | HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize); 34 | virtual ~HllSketchImpl(); 35 | 36 | virtual void serialize(std::ostream& os, bool compact) const = 0; 37 | virtual vector_u8 serialize(bool compact, unsigned header_size_bytes) const = 0; 38 | 39 | virtual HllSketchImpl* copy() const = 0; 40 | virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0; 41 | HllSketchImpl* reset(); 42 | 43 | virtual std::function*)> get_deleter() const = 0; 44 | 45 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) = 0; 46 | 47 | hll_mode getCurMode() const; 48 | 49 | virtual double getEstimate() const = 0; 50 | virtual double getCompositeEstimate() const = 0; 51 | virtual double getUpperBound(uint8_t numStdDev) const = 0; 52 | virtual double getLowerBound(uint8_t numStdDev) const = 0; 53 | 54 | inline uint8_t getLgConfigK() const; 55 | 56 | virtual uint32_t getMemDataStart() const = 0; 57 | 58 | virtual uint8_t getPreInts() const = 0; 59 | 60 | target_hll_type getTgtHllType() const; 61 | 62 | virtual uint32_t getUpdatableSerializationBytes() const = 0; 63 | virtual uint32_t getCompactSerializationBytes() const = 0; 64 | 65 | virtual bool isCompact() const = 0; 66 | virtual bool isEmpty() const = 0; 67 | virtual bool isOutOfOrderFlag() const = 0; 68 | virtual void putOutOfOrderFlag(bool oooFlag) = 0; 69 | virtual A getAllocator() const = 0; 70 | bool isStartFullSize() const; 71 | 72 | protected: 73 | static target_hll_type extractTgtHllType(uint8_t modeByte); 74 | static hll_mode extractCurMode(uint8_t modeByte); 75 | uint8_t makeFlagsByte(bool compact) const; 76 | uint8_t makeModeByte() const; 77 | 78 | const uint8_t lgConfigK_; 79 | const target_hll_type tgtHllType_; 80 | const hll_mode mode_; 81 | const bool startFullSize_; 82 | }; 83 | 84 | } 85 | 86 | #endif // _HLLSKETCHIMPL_HPP_ 87 | -------------------------------------------------------------------------------- /datasketches/hll/RelativeErrorTables-internal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _RELATIVEERRORTABLES_INTERNAL_HPP_ 21 | #define _RELATIVEERRORTABLES_INTERNAL_HPP_ 22 | 23 | #include "RelativeErrorTables.hpp" 24 | 25 | namespace datasketches { 26 | 27 | //case 0 28 | static double HIP_LB[] = //sd 1, 2, 3 29 | { //Q(.84134), Q(.97725), Q(.99865) respectively 30 | 0.207316195, 0.502865572, 0.882303765, //4 31 | 0.146981579, 0.335426881, 0.557052, //5 32 | 0.104026721, 0.227683872, 0.365888317, //6 33 | 0.073614601, 0.156781585, 0.245740374, //7 34 | 0.05205248, 0.108783763, 0.168030442, //8 35 | 0.036770852, 0.075727545, 0.11593785, //9 36 | 0.025990219, 0.053145536, 0.080772263, //10 37 | 0.018373987, 0.037266176, 0.056271814, //11 38 | 0.012936253, 0.02613829, 0.039387631, //12 39 | }; 40 | 41 | //case 1 42 | static double HIP_UB[] = //sd 1, 2, 3 43 | { //Q(.15866), Q(.02275), Q(.00135) respectively 44 | -0.207805347, -0.355574279, -0.475535095, //4 45 | -0.146988328, -0.262390832, -0.360864026, //5 46 | -0.103877775, -0.191503663, -0.269311582, //6 47 | -0.073452978, -0.138513438, -0.198487447, //7 48 | -0.051982806, -0.099703123, -0.144128618, //8 49 | -0.036768609, -0.07138158, -0.104430324, //9 50 | -0.025991325, -0.050854296, -0.0748143, //10 51 | -0.01834533, -0.036121138, -0.05327616, //11 52 | -0.012920332, -0.025572893, -0.037896952, //12 53 | }; 54 | 55 | //case 2 56 | static double NON_HIP_LB[] = //sd 1, 2, 3` 57 | { //Q(.84134), Q(.97725), Q(.99865) respectively 58 | 0.254409839, 0.682266712, 1.304022158, //4 59 | 0.181817353, 0.443389054, 0.778776219, //5 60 | 0.129432281, 0.295782195, 0.49252279, //6 61 | 0.091640655, 0.201175925, 0.323664385, //7 62 | 0.064858051, 0.138523393, 0.218805328, //8 63 | 0.045851855, 0.095925072, 0.148635751, //9 64 | 0.032454144, 0.067009668, 0.102660669, //10 65 | 0.022921382, 0.046868565, 0.071307398, //11 66 | 0.016155679, 0.032825719, 0.049677541 //12 67 | }; 68 | 69 | //case 3 70 | static double NON_HIP_UB[] = //sd 1, 2, 3 71 | { //Q(.15866), Q(.02275), Q(.00135) respectively 72 | -0.256980172, -0.411905944, -0.52651057, //4 73 | -0.182332109, -0.310275547, -0.412660505, //5 74 | -0.129314228, -0.230142294, -0.315636197, //6 75 | -0.091584836, -0.16834013, -0.236346847, //7 76 | -0.06487411, -0.122045231, -0.174112107, //8 77 | -0.04591465, -0.08784505, -0.126917615, //9 78 | -0.032433119, -0.062897613, -0.091862929, //10 79 | -0.022960633, -0.044875401, -0.065736049, //11 80 | -0.016186662, -0.031827816, -0.046973459 //12 81 | }; 82 | 83 | template 84 | double RelativeErrorTables::getRelErr(const bool upperBound, const bool oooFlag, 85 | const int lgK, const int stdDev) { 86 | const int idx = ((lgK - 4) * 3) + (stdDev - 1); 87 | const int sw = (oooFlag ? 2 : 0) | (upperBound ? 1 : 0); 88 | double f = 0; 89 | switch (sw) { 90 | case 0 : { // HIP, LB 91 | f = HIP_LB[idx]; 92 | break; 93 | } 94 | case 1 : { // HIP, UB 95 | f = HIP_UB[idx]; 96 | break; 97 | } 98 | case 2 : { // NON_HIP, LB 99 | f = NON_HIP_LB[idx]; 100 | break; 101 | } 102 | case 3 : { // NON_HIP, UB 103 | f = NON_HIP_UB[idx]; 104 | break; 105 | } 106 | } 107 | return f; 108 | } 109 | 110 | } 111 | 112 | #endif // _RELATIVEERRORTABLES_INTERNAL_HPP_ -------------------------------------------------------------------------------- /datasketches/hll/RelativeErrorTables.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _RELATIVEERRORTABLES_HPP_ 21 | #define _RELATIVEERRORTABLES_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template> 28 | class RelativeErrorTables { 29 | public: 30 | /** 31 | * Return Relative Error for UB or LB for HIP or Non-HIP as a function of numStdDev. 32 | * @param upperBound true if for upper bound 33 | * @param oooFlag true if for Non-HIP 34 | * @param lgK must be between 4 and 12 inclusive 35 | * @param stdDev must be between 1 and 3 inclusive 36 | * @return Relative Error for UB or LB for HIP or Non-HIP as a function of numStdDev. 37 | */ 38 | static double getRelErr(bool upperBound, bool oooFlag, 39 | int lgK, int stdDev); 40 | }; 41 | 42 | } 43 | 44 | #include "RelativeErrorTables-internal.hpp" 45 | 46 | #endif /* _RELATIVEERRORTABLES_HPP_ */ 47 | -------------------------------------------------------------------------------- /datasketches/hll/coupon_iterator-internal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _INTARRAYPAIRITERATOR_INTERNAL_HPP_ 21 | #define _INTARRAYPAIRITERATOR_INTERNAL_HPP_ 22 | 23 | #include "HllUtil.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | coupon_iterator::coupon_iterator(const uint32_t* array, size_t array_size, size_t index, bool all): 29 | array_(array), array_size_(array_size), index_(index), all_(all) { 30 | while (index_ < array_size_) { 31 | if (all_ || array_[index_] != hll_constants::EMPTY) break; 32 | ++index_; 33 | } 34 | } 35 | 36 | template 37 | coupon_iterator& coupon_iterator::operator++() { 38 | while (++index_ < array_size_) { 39 | if (all_ || array_[index_] != hll_constants::EMPTY) break; 40 | } 41 | return *this; 42 | } 43 | 44 | template 45 | bool coupon_iterator::operator!=(const coupon_iterator& other) const { 46 | return index_ != other.index_; 47 | } 48 | 49 | template 50 | uint32_t coupon_iterator::operator*() const { 51 | return array_[index_]; 52 | } 53 | 54 | } 55 | 56 | #endif // _INTARRAYPAIRITERATOR_INTERNAL_HPP_ 57 | -------------------------------------------------------------------------------- /datasketches/hll/coupon_iterator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _INTARRAYPAIRITERATOR_HPP_ 21 | #define _INTARRAYPAIRITERATOR_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | template 26 | class coupon_iterator: public std::iterator { 27 | public: 28 | coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all); 29 | coupon_iterator& operator++(); 30 | bool operator!=(const coupon_iterator& other) const; 31 | uint32_t operator*() const; 32 | private: 33 | const uint32_t* array_; 34 | size_t array_size_; 35 | size_t index_; 36 | bool all_; 37 | }; 38 | 39 | } 40 | 41 | #include "coupon_iterator-internal.hpp" 42 | 43 | #endif /* _INTARRAYPAIRITERATOR_HPP_ */ 44 | -------------------------------------------------------------------------------- /datasketches/hll/hll.private.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _HLL_PRIVATE_HPP_ 2 | #define _HLL_PRIVATE_HPP_ 3 | 4 | #include "AuxHashMap.hpp" 5 | #include "CompositeInterpolationXTable.hpp" 6 | #include "CouponHashSet.hpp" 7 | #include "CouponList.hpp" 8 | #include "CubicInterpolation.hpp" 9 | #include "HarmonicNumbers.hpp" 10 | #include "Hll4Array.hpp" 11 | #include "Hll6Array.hpp" 12 | #include "Hll8Array.hpp" 13 | #include "HllArray.hpp" 14 | #include "HllSketchImpl.hpp" 15 | #include "HllSketchImplFactory.hpp" 16 | #include "HllUtil.hpp" 17 | #include "RelativeErrorTables.hpp" 18 | 19 | #include "AuxHashMap-internal.hpp" 20 | #include "coupon_iterator.hpp" 21 | #include "CouponHashSet-internal.hpp" 22 | #include "CouponList-internal.hpp" 23 | #include "Hll4Array-internal.hpp" 24 | #include "Hll6Array-internal.hpp" 25 | #include "Hll8Array-internal.hpp" 26 | #include "HllArray-internal.hpp" 27 | #include "HllSketch-internal.hpp" 28 | #include "HllSketchImpl-internal.hpp" 29 | #include "HllUnion-internal.hpp" 30 | #include "coupon_iterator-internal.hpp" 31 | 32 | #endif // _HLL_PRIVATE_HPP_ 33 | -------------------------------------------------------------------------------- /external/catch2/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /external/farmhash/COPYING: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /external/tclap/AUTHORS: -------------------------------------------------------------------------------- 1 | 2 | original author: Michael E. Smoot 3 | current maintainer: Daniel Aarno 4 | more contributions: Erik Zeek 5 | more contributions: Fabien Carmagnac (Tinbergen-AM) 6 | outstanding editing: Carol Smoot 7 | -------------------------------------------------------------------------------- /external/tclap/ArgContainer.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ArgContainer.h 6 | * 7 | * Copyright (c) 2018 Google LLC 8 | * All rights reserved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_ARG_CONTAINER_H 24 | #define TCLAP_ARG_CONTAINER_H 25 | 26 | namespace TCLAP { 27 | 28 | class Arg; 29 | 30 | /** 31 | * Interface that allows adding an Arg to a "container". 32 | * 33 | * A container does not have to be a container in the C++ standard 34 | * library sense, just something that wants to hold on to references 35 | * to Arg's. The container does not own the added Arg's and it is the 36 | * user's responsibility to ensure the life time (scope) of the Arg's 37 | * outlives any operations on the container. 38 | */ 39 | class ArgContainer { 40 | public: 41 | virtual ~ArgContainer() {} 42 | 43 | /** 44 | * Adds an argument. Ownership is not transfered. 45 | * \param a - Argument to be added. 46 | */ 47 | virtual ArgContainer &add(Arg &a) = 0; 48 | 49 | /** 50 | * Adds an argument. Ownership is not transfered. 51 | * \param a - Argument to be added. 52 | */ 53 | virtual ArgContainer &add(Arg *a) = 0; 54 | }; 55 | 56 | } // namespace TCLAP 57 | 58 | #endif // TCLAP_ARG_CONTAINER_H 59 | -------------------------------------------------------------------------------- /external/tclap/ArgTraits.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ArgTraits.h 6 | * 7 | * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . 8 | * Copyright (c) 2017 Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | // This is an internal tclap file, you should probably not have to 25 | // include this directly 26 | 27 | #ifndef TCLAP_ARG_TRAITS_H 28 | #define TCLAP_ARG_TRAITS_H 29 | 30 | namespace TCLAP { 31 | 32 | // We use two empty structs to get compile type specialization 33 | // function to work 34 | 35 | /** 36 | * A value like argument value type is a value that can be set using 37 | * operator>>. This is the default value type. 38 | */ 39 | struct ValueLike { 40 | typedef ValueLike ValueCategory; 41 | virtual ~ValueLike() {} 42 | }; 43 | 44 | /** 45 | * A string like argument value type is a value that can be set using 46 | * operator=(string). Useful if the value type contains spaces which 47 | * will be broken up into individual tokens by operator>>. 48 | */ 49 | struct StringLike { 50 | virtual ~StringLike() {} 51 | }; 52 | 53 | /** 54 | * A class can inherit from this object to make it have string like 55 | * traits. This is a compile time thing and does not add any overhead 56 | * to the inherenting class. 57 | */ 58 | struct StringLikeTrait { 59 | typedef StringLike ValueCategory; 60 | virtual ~StringLikeTrait() {} 61 | }; 62 | 63 | /** 64 | * A class can inherit from this object to make it have value like 65 | * traits. This is a compile time thing and does not add any overhead 66 | * to the inherenting class. 67 | */ 68 | struct ValueLikeTrait { 69 | typedef ValueLike ValueCategory; 70 | virtual ~ValueLikeTrait() {} 71 | }; 72 | 73 | /** 74 | * Arg traits are used to get compile type specialization when parsing 75 | * argument values. Using an ArgTraits you can specify the way that 76 | * values gets assigned to any particular type during parsing. The two 77 | * supported types are StringLike and ValueLike. ValueLike is the 78 | * default and means that operator>> will be used to assign values to 79 | * the type. 80 | */ 81 | template 82 | class ArgTraits { 83 | // This is a bit silly, but what we want to do is: 84 | // 1) If there exists a specialization of ArgTraits for type X, 85 | // use it. 86 | // 87 | // 2) If no specialization exists but X has the typename 88 | // X::ValueCategory, use the specialization for X::ValueCategory. 89 | // 90 | // 3) If neither (1) nor (2) defines the trait, use the default 91 | // which is ValueLike. 92 | 93 | // This is the "how": 94 | // 95 | // test(0) (where 0 is the NULL ptr) will match 96 | // test(typename C::ValueCategory*) iff type T has the 97 | // corresponding typedef. If it does not test(...) will be 98 | // matched. This allows us to determine if T::ValueCategory 99 | // exists by checking the sizeof for the test function (return 100 | // value must have different sizeof). 101 | template 102 | static short test(typename C::ValueCategory *); // NOLINT 103 | template 104 | static long test(...); // NOLINT 105 | static const bool hasTrait = sizeof(test(0)) == sizeof(short); // NOLINT 106 | 107 | template 108 | struct DefaultArgTrait { 109 | typedef ValueLike ValueCategory; 110 | }; 111 | 112 | template 113 | struct DefaultArgTrait { 114 | typedef typename C::ValueCategory ValueCategory; 115 | }; 116 | 117 | public: 118 | typedef typename DefaultArgTrait::ValueCategory ValueCategory; 119 | }; 120 | 121 | } // namespace TCLAP 122 | 123 | #endif // TCLAP_ARG_TRAITS_H 124 | -------------------------------------------------------------------------------- /external/tclap/COPYING: -------------------------------------------------------------------------------- 1 | 2 | 3 | Copyright (c) 2003-2012 Michael E. Smoot 4 | Copyright (c) 2004-2016 Daniel Aarno 5 | Copyright (c) 2017-2021 Google LLC 6 | 7 | Permission is hereby granted, free of charge, to any person 8 | obtaining a copy of this software and associated documentation 9 | files (the "Software"), to deal in the Software without restriction, 10 | including without limitation the rights to use, copy, modify, merge, 11 | publish, distribute, sublicense, and/or sell copies of the Software, 12 | and to permit persons to whom the Software is furnished to do so, 13 | subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be 16 | included in all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 22 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 23 | AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | 27 | 28 | -------------------------------------------------------------------------------- /external/tclap/CmdLineOutput.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: CmdLineOutput.h 6 | * 7 | * Copyright (c) 2004, Michael E. Smoot 8 | * Copyright (c) 2017, Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | #ifndef TCLAP_CMD_LINE_OUTPUT_H 25 | #define TCLAP_CMD_LINE_OUTPUT_H 26 | 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | namespace TCLAP { 38 | 39 | class CmdLineInterface; 40 | class ArgException; 41 | 42 | /** 43 | * The interface that any output object must implement. 44 | */ 45 | class CmdLineOutput { 46 | public: 47 | /** 48 | * Virtual destructor. 49 | */ 50 | virtual ~CmdLineOutput() {} 51 | 52 | /** 53 | * Generates some sort of output for the USAGE. 54 | * \param c - The CmdLine object the output is generated for. 55 | */ 56 | virtual void usage(CmdLineInterface &c) = 0; 57 | 58 | /** 59 | * Generates some sort of output for the version. 60 | * \param c - The CmdLine object the output is generated for. 61 | */ 62 | virtual void version(CmdLineInterface &c) = 0; 63 | 64 | /** 65 | * Generates some sort of output for a failure. 66 | * \param c - The CmdLine object the output is generated for. 67 | * \param e - The ArgException that caused the failure. 68 | */ 69 | virtual void failure(CmdLineInterface &c, ArgException &e) = 0; 70 | }; 71 | 72 | inline bool isInArgGroup(const Arg *arg, const std::list &argSets) { 73 | for (std::list::const_iterator it = argSets.begin(); 74 | it != argSets.end(); ++it) { 75 | if (std::find((*it)->begin(), (*it)->end(), arg) != (*it)->end()) { 76 | return true; 77 | } 78 | } 79 | return false; 80 | } 81 | 82 | inline void removeArgsInArgGroups(std::list &argList, 83 | const std::list &argSets) { 84 | for (std::list::iterator it = argList.begin(); 85 | it != argList.end();) { 86 | if (isInArgGroup(*it, argSets)) { 87 | it = argList.erase(it); 88 | } else { 89 | ++it; 90 | } 91 | } 92 | } 93 | 94 | inline std::string basename(std::string s) { 95 | // TODO(macbishop): See if we can make this more robust 96 | size_t p = s.find_last_of("/\\"); 97 | if (p != std::string::npos) { 98 | s.erase(0, p + 1); 99 | } 100 | 101 | p = s.rfind(".exe"); 102 | if (p == s.length() - 4) { 103 | s.erase(s.length() - 4); 104 | } 105 | 106 | return s; 107 | } 108 | 109 | } // namespace TCLAP 110 | 111 | #endif // TCLAP_CMD_LINE_OUTPUT_H 112 | -------------------------------------------------------------------------------- /external/tclap/Constraint.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: Constraint.h 6 | * 7 | * Copyright (c) 2005, Michael E. Smoot 8 | * Copyright (c) 2017, Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | #ifndef TCLAP_CONSTRAINT_H 25 | #define TCLAP_CONSTRAINT_H 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | namespace TCLAP { 36 | 37 | /** 38 | * The interface that defines the interaction between the Arg and Constraint. 39 | */ 40 | template 41 | class Constraint { 42 | public: 43 | /** 44 | * Returns a description of the Constraint. 45 | */ 46 | virtual std::string description() const = 0; 47 | 48 | /** 49 | * Returns the short ID for the Constraint. 50 | */ 51 | virtual std::string shortID() const = 0; 52 | 53 | /** 54 | * The method used to verify that the value parsed from the command 55 | * line meets the constraint. 56 | * \param value - The value that will be checked. 57 | */ 58 | virtual bool check(const T &value) const = 0; 59 | 60 | /** 61 | * Destructor. 62 | * Silences warnings about Constraint being a base class with virtual 63 | * functions but without a virtual destructor. 64 | */ 65 | virtual ~Constraint() { ; } 66 | 67 | static std::string shortID(Constraint *constraint) { 68 | if (!constraint) 69 | throw std::logic_error( 70 | "Cannot create a ValueArg with a NULL constraint"); 71 | return constraint->shortID(); 72 | } 73 | }; 74 | 75 | } // namespace TCLAP 76 | 77 | #endif // TCLAP_CONSTRAINT_H 78 | -------------------------------------------------------------------------------- /external/tclap/DeferDelete.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: DeferDelete.h 6 | * 7 | * Copyright (c) 2020, Google LLC 8 | * All rights reserved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_DEFER_DELETE_H 24 | #define TCLAP_DEFER_DELETE_H 25 | 26 | namespace TCLAP { 27 | 28 | /** 29 | * DeferDelete can be used by objects that need to allocate arbitrary other 30 | * objects to live for the duration of the first object. Any object 31 | * added to DeferDelete (by calling operator()) will be deleted when 32 | * the DeferDelete object is destroyed. 33 | */ 34 | class DeferDelete { 35 | class DeletableBase { 36 | public: 37 | virtual ~DeletableBase() {} 38 | }; 39 | 40 | template 41 | class Deletable : public DeletableBase { 42 | public: 43 | Deletable(T *o) : _o(o) {} 44 | virtual ~Deletable() { delete _o; } 45 | 46 | private: 47 | Deletable(const Deletable &) {} 48 | Deletable operator=(const Deletable &) {} 49 | 50 | T *_o; 51 | }; 52 | 53 | std::list _toBeDeleted; 54 | 55 | public: 56 | DeferDelete() : _toBeDeleted() {} 57 | ~DeferDelete() { 58 | for (std::list::iterator it = _toBeDeleted.begin(); 59 | it != _toBeDeleted.end(); ++it) { 60 | delete *it; 61 | } 62 | } 63 | 64 | template 65 | void operator()(T *toDelete) { 66 | _toBeDeleted.push_back(new Deletable(toDelete)); 67 | } 68 | }; 69 | 70 | } // namespace TCLAP 71 | 72 | #endif // TCLAP_DEFER_DELETE_H 73 | -------------------------------------------------------------------------------- /external/tclap/HelpVisitor.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: HelpVisitor.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * All rights reserved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_HELP_VISITOR_H 24 | #define TCLAP_HELP_VISITOR_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace TCLAP { 31 | 32 | /** 33 | * A Visitor object that calls the usage method of the given CmdLineOutput 34 | * object for the specified CmdLine object. 35 | */ 36 | class HelpVisitor : public Visitor { 37 | private: 38 | /** 39 | * Prevent accidental copying. 40 | */ 41 | HelpVisitor(const HelpVisitor &rhs); 42 | HelpVisitor &operator=(const HelpVisitor &rhs); 43 | 44 | protected: 45 | /** 46 | * The CmdLine the output will be generated for. 47 | */ 48 | CmdLineInterface *_cmd; 49 | 50 | /** 51 | * The output object. 52 | */ 53 | CmdLineOutput **_out; 54 | 55 | public: 56 | /** 57 | * Constructor. 58 | * \param cmd - The CmdLine the output will be generated for. 59 | * \param out - The type of output. 60 | */ 61 | HelpVisitor(CmdLineInterface *cmd, CmdLineOutput **out) 62 | : Visitor(), _cmd(cmd), _out(out) {} 63 | 64 | /** 65 | * Calls the usage method of the CmdLineOutput for the 66 | * specified CmdLine. 67 | */ 68 | void visit() { 69 | (*_out)->usage(*_cmd); 70 | throw ExitException(0); 71 | } 72 | }; 73 | } // namespace TCLAP 74 | 75 | #endif // TCLAP_HELP_VISITOR_H 76 | -------------------------------------------------------------------------------- /external/tclap/IgnoreRestVisitor.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: IgnoreRestVisitor.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * Copyright (c) 2020, Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | #ifndef TCLAP_IGNORE_REST_VISITOR_H 25 | #define TCLAP_IGNORE_REST_VISITOR_H 26 | 27 | #include 28 | #include 29 | 30 | namespace TCLAP { 31 | 32 | /** 33 | * A Visitor that tells the CmdLine to begin ignoring arguments after 34 | * this one is parsed. 35 | */ 36 | class IgnoreRestVisitor : public Visitor { 37 | public: 38 | IgnoreRestVisitor(CmdLineInterface &cmdLine) 39 | : Visitor(), cmdLine_(cmdLine) {} 40 | void visit() { cmdLine_.beginIgnoring(); } 41 | 42 | private: 43 | CmdLineInterface &cmdLine_; 44 | }; 45 | } // namespace TCLAP 46 | 47 | #endif // TCLAP_IGNORE_REST_VISITOR_H 48 | -------------------------------------------------------------------------------- /external/tclap/OptionalUnlabeledTracker.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: OptionalUnlabeledTracker.h 6 | * 7 | * Copyright (c) 2005, Michael E. Smoot . 8 | * All rights reserved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_OPTIONAL_UNLABELED_TRACKER_H 24 | #define TCLAP_OPTIONAL_UNLABELED_TRACKER_H 25 | 26 | #include 27 | 28 | namespace TCLAP { 29 | 30 | class OptionalUnlabeledTracker { 31 | public: 32 | static void check(bool req, const std::string &argName); 33 | 34 | static void gotOptional() { alreadyOptionalRef() = true; } 35 | 36 | static bool &alreadyOptional() { return alreadyOptionalRef(); } 37 | 38 | private: 39 | static bool &alreadyOptionalRef() { 40 | static bool ct = false; 41 | return ct; 42 | } 43 | }; 44 | 45 | inline void OptionalUnlabeledTracker::check(bool req, 46 | const std::string &argName) { 47 | if (OptionalUnlabeledTracker::alreadyOptional()) 48 | throw(SpecificationException( 49 | "You can't specify ANY Unlabeled Arg following an optional " 50 | "Unlabeled Arg", 51 | argName)); 52 | 53 | if (!req) OptionalUnlabeledTracker::gotOptional(); 54 | } 55 | 56 | } // namespace TCLAP 57 | 58 | #endif // TCLAP_OPTIONAL_UNLABELED_TRACKER_H 59 | -------------------------------------------------------------------------------- /external/tclap/StandardTraits.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: StandardTraits.h 6 | * 7 | * Copyright (c) 2007, Daniel Aarno, Michael E. Smoot . 8 | * Copyright (c) 2017, Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | // This is an internal tclap file, you should probably not have to 25 | // include this directly 26 | 27 | #ifndef TCLAP_STANDARD_TRAITS_H 28 | #define TCLAP_STANDARD_TRAITS_H 29 | 30 | #include 31 | 32 | // If Microsoft has already typedef'd wchar_t as an unsigned 33 | // short, then compiles will break because it's as if we're 34 | // creating ArgTraits twice for unsigned short. Thus... 35 | #ifdef _MSC_VER 36 | #ifndef _NATIVE_WCHAR_T_DEFINED 37 | #define TCLAP_DONT_DECLARE_WCHAR_T_ARGTRAITS 38 | #endif 39 | #endif 40 | 41 | namespace TCLAP { 42 | 43 | // Integer types (signed, unsigned and bool) and floating point types all 44 | // have value-like semantics. 45 | 46 | // Strings have string like argument traits. 47 | template <> 48 | struct ArgTraits { 49 | typedef StringLike ValueCategory; 50 | }; 51 | 52 | template 53 | void SetString(T &dst, const std::string &src) { 54 | dst = src; 55 | } 56 | 57 | } // namespace TCLAP 58 | 59 | #endif // TCLAP_STANDARD_TRAITS_H 60 | -------------------------------------------------------------------------------- /external/tclap/ValuesConstraint.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: ValuesConstraint.h 6 | * 7 | * Copyright (c) 2005, Michael E. Smoot 8 | * Copyright (c) 2017, Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | #ifndef TCLAP_VALUES_CONSTRAINT_H 25 | #define TCLAP_VALUES_CONSTRAINT_H 26 | 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | 33 | namespace TCLAP { 34 | 35 | /** 36 | * A Constraint that constrains the Arg to only those values specified 37 | * in the constraint. 38 | */ 39 | template 40 | class ValuesConstraint : public Constraint { 41 | public: 42 | /** 43 | * Constructor. 44 | * \param allowed - vector of allowed values. 45 | */ 46 | explicit ValuesConstraint(std::vector &allowed); 47 | 48 | /** 49 | * Virtual destructor. 50 | */ 51 | virtual ~ValuesConstraint() {} 52 | 53 | /** 54 | * Returns a description of the Constraint. 55 | */ 56 | virtual std::string description() const; 57 | 58 | /** 59 | * Returns the short ID for the Constraint. 60 | */ 61 | virtual std::string shortID() const; 62 | 63 | /** 64 | * The method used to verify that the value parsed from the command 65 | * line meets the constraint. 66 | * \param value - The value that will be checked. 67 | */ 68 | virtual bool check(const T &value) const; 69 | 70 | protected: 71 | /** 72 | * The list of valid values. 73 | */ 74 | std::vector _allowed; 75 | 76 | /** 77 | * The string used to describe the allowed values of this constraint. 78 | */ 79 | std::string _typeDesc; 80 | }; 81 | 82 | template 83 | ValuesConstraint::ValuesConstraint(std::vector &allowed) 84 | : _allowed(allowed), _typeDesc("") { 85 | for (unsigned int i = 0; i < _allowed.size(); i++) { 86 | std::ostringstream os; 87 | os << _allowed[i]; 88 | 89 | std::string temp(os.str()); 90 | 91 | if (i > 0) _typeDesc += "|"; 92 | _typeDesc += temp; 93 | } 94 | } 95 | 96 | template 97 | bool ValuesConstraint::check(const T &val) const { 98 | if (std::find(_allowed.begin(), _allowed.end(), val) == _allowed.end()) 99 | return false; 100 | else 101 | return true; 102 | } 103 | 104 | template 105 | std::string ValuesConstraint::shortID() const { 106 | return _typeDesc; 107 | } 108 | 109 | template 110 | std::string ValuesConstraint::description() const { 111 | return _typeDesc; 112 | } 113 | 114 | } // namespace TCLAP 115 | #endif // TCLAP_VALUES_CONSTRAINT_H 116 | -------------------------------------------------------------------------------- /external/tclap/VersionVisitor.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: VersionVisitor.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * All rights reserved. 9 | * 10 | * See the file COPYING in the top directory of this distribution for 11 | * more information. 12 | * 13 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | * DEALINGS IN THE SOFTWARE. 20 | * 21 | *****************************************************************************/ 22 | 23 | #ifndef TCLAP_VERSION_VISITOR_H 24 | #define TCLAP_VERSION_VISITOR_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace TCLAP { 31 | 32 | /** 33 | * A Visitor that will call the version method of the given CmdLineOutput 34 | * for the specified CmdLine object and then exit. 35 | */ 36 | class VersionVisitor : public Visitor { 37 | private: 38 | /** 39 | * Prevent accidental copying 40 | */ 41 | VersionVisitor(const VersionVisitor &rhs); 42 | VersionVisitor &operator=(const VersionVisitor &rhs); 43 | 44 | protected: 45 | /** 46 | * The CmdLine of interest. 47 | */ 48 | CmdLineInterface *_cmd; 49 | 50 | /** 51 | * The output object. 52 | */ 53 | CmdLineOutput **_out; 54 | 55 | public: 56 | /** 57 | * Constructor. 58 | * \param cmd - The CmdLine the output is generated for. 59 | * \param out - The type of output. 60 | */ 61 | VersionVisitor(CmdLineInterface *cmd, CmdLineOutput **out) 62 | : Visitor(), _cmd(cmd), _out(out) {} 63 | 64 | /** 65 | * Calls the version method of the output object using the 66 | * specified CmdLine. 67 | */ 68 | void visit() { 69 | (*_out)->version(*_cmd); 70 | throw ExitException(0); 71 | } 72 | }; 73 | } // namespace TCLAP 74 | 75 | #endif // TCLAP_VERSION_VISITOR_H 76 | -------------------------------------------------------------------------------- /external/tclap/Visitor.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: Visitor.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * Copyright (c) 2017, Google LLC 9 | * All rights reserved. 10 | * 11 | * See the file COPYING in the top directory of this distribution for 12 | * more information. 13 | * 14 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | * DEALINGS IN THE SOFTWARE. 21 | * 22 | *****************************************************************************/ 23 | 24 | #ifndef TCLAP_VISITOR_H 25 | #define TCLAP_VISITOR_H 26 | 27 | namespace TCLAP { 28 | 29 | /** 30 | * A base class that defines the interface for visitors. 31 | */ 32 | class Visitor { 33 | public: 34 | /** 35 | * Constructor. Does nothing. 36 | */ 37 | Visitor() {} 38 | 39 | /** 40 | * Destructor. Does nothing. 41 | */ 42 | virtual ~Visitor() {} 43 | 44 | /** 45 | * This method (to implemented by children) will be 46 | * called when the visitor is visited. 47 | */ 48 | virtual void visit() = 0; 49 | }; 50 | } // namespace TCLAP 51 | 52 | #endif // TCLAP_VISITOR_H 53 | -------------------------------------------------------------------------------- /external/tclap/sstream.h: -------------------------------------------------------------------------------- 1 | // -*- Mode: c++; c-basic-offset: 4; tab-width: 4; -*- 2 | 3 | /****************************************************************************** 4 | * 5 | * file: sstream.h 6 | * 7 | * Copyright (c) 2003, Michael E. Smoot . 8 | * Copyright (c) 2004, Michael E. Smoot, Daniel Aarno . 9 | * Copyright (c) 2017 Google Inc. 10 | * All rights reserved. 11 | * 12 | * See the file COPYING in the top directory of this distribution for 13 | * more information. 14 | * 15 | * THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | * DEALINGS IN THE SOFTWARE. 22 | * 23 | *****************************************************************************/ 24 | 25 | #ifndef TCLAP_SSTREAM_H 26 | #define TCLAP_SSTREAM_H 27 | 28 | #if !defined(TCLAP_HAVE_STRSTREAM) 29 | // Assume sstream is available if strstream is not specified 30 | // (https://sourceforge.net/p/tclap/bugs/23/) 31 | #define TCLAP_HAVE_SSTREAM 32 | #endif 33 | 34 | #if defined(TCLAP_HAVE_SSTREAM) 35 | #include 36 | namespace TCLAP { 37 | typedef std::istringstream istringstream; 38 | typedef std::ostringstream ostringstream; 39 | } // namespace TCLAP 40 | #elif defined(TCLAP_HAVE_STRSTREAM) 41 | #include 42 | namespace TCLAP { 43 | typedef std::istrstream istringstream; 44 | typedef std::ostrstream ostringstream; 45 | } // namespace TCLAP 46 | #else 47 | #error "Need a stringstream (sstream or strstream) to compile!" 48 | #endif 49 | 50 | #endif // TCLAP_SSTREAM_H 51 | -------------------------------------------------------------------------------- /external/zstd/LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For Zstandard software 4 | 5 | Copyright (c) 2016-present, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /external/zstd/common/debug.c: -------------------------------------------------------------------------------- 1 | /* ****************************************************************** 2 | * debug 3 | * Part of FSE library 4 | * Copyright (c) Yann Collet, Facebook, Inc. 5 | * 6 | * You can contact the author at : 7 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 | * 9 | * This source code is licensed under both the BSD-style license (found in the 10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 | * in the COPYING file in the root directory of this source tree). 12 | * You may select, at your option, one of the above-listed licenses. 13 | ****************************************************************** */ 14 | 15 | 16 | /* 17 | * This module only hosts one global variable 18 | * which can be used to dynamically influence the verbosity of traces, 19 | * such as DEBUGLOG and RAWLOG 20 | */ 21 | 22 | #include "debug.h" 23 | 24 | int g_debuglevel = DEBUGLEVEL; 25 | -------------------------------------------------------------------------------- /external/zstd/common/debug.h: -------------------------------------------------------------------------------- 1 | /* ****************************************************************** 2 | * debug 3 | * Part of FSE library 4 | * Copyright (c) Yann Collet, Facebook, Inc. 5 | * 6 | * You can contact the author at : 7 | * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 | * 9 | * This source code is licensed under both the BSD-style license (found in the 10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 | * in the COPYING file in the root directory of this source tree). 12 | * You may select, at your option, one of the above-listed licenses. 13 | ****************************************************************** */ 14 | 15 | 16 | /* 17 | * The purpose of this header is to enable debug functions. 18 | * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, 19 | * and DEBUG_STATIC_ASSERT() for compile-time. 20 | * 21 | * By default, DEBUGLEVEL==0, which means run-time debug is disabled. 22 | * 23 | * Level 1 enables assert() only. 24 | * Starting level 2, traces can be generated and pushed to stderr. 25 | * The higher the level, the more verbose the traces. 26 | * 27 | * It's possible to dynamically adjust level using variable g_debug_level, 28 | * which is only declared if DEBUGLEVEL>=2, 29 | * and is a global variable, not multi-thread protected (use with care) 30 | */ 31 | 32 | #ifndef DEBUG_H_12987983217 33 | #define DEBUG_H_12987983217 34 | 35 | #if defined (__cplusplus) 36 | extern "C" { 37 | #endif 38 | 39 | 40 | /* static assert is triggered at compile time, leaving no runtime artefact. 41 | * static assert only works with compile-time constants. 42 | * Also, this variant can only be used inside a function. */ 43 | #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) 44 | 45 | 46 | /* DEBUGLEVEL is expected to be defined externally, 47 | * typically through compiler command line. 48 | * Value must be a number. */ 49 | #ifndef DEBUGLEVEL 50 | # define DEBUGLEVEL 0 51 | #endif 52 | 53 | 54 | /* recommended values for DEBUGLEVEL : 55 | * 0 : release mode, no debug, all run-time checks disabled 56 | * 1 : enables assert() only, no display 57 | * 2 : reserved, for currently active debug path 58 | * 3 : events once per object lifetime (CCtx, CDict, etc.) 59 | * 4 : events once per frame 60 | * 5 : events once per block 61 | * 6 : events once per sequence (verbose) 62 | * 7+: events at every position (*very* verbose) 63 | * 64 | * It's generally inconvenient to output traces > 5. 65 | * In which case, it's possible to selectively trigger high verbosity levels 66 | * by modifying g_debug_level. 67 | */ 68 | 69 | #if (DEBUGLEVEL>=1) 70 | # define ZSTD_DEPS_NEED_ASSERT 71 | # include "zstd_deps.h" 72 | #else 73 | # ifndef assert /* assert may be already defined, due to prior #include */ 74 | # define assert(condition) ((void)0) /* disable assert (default) */ 75 | # endif 76 | #endif 77 | 78 | #if (DEBUGLEVEL>=2) 79 | # define ZSTD_DEPS_NEED_IO 80 | # include "zstd_deps.h" 81 | extern int g_debuglevel; /* the variable is only declared, 82 | it actually lives in debug.c, 83 | and is shared by the whole process. 84 | It's not thread-safe. 85 | It's useful when enabling very verbose levels 86 | on selective conditions (such as position in src) */ 87 | 88 | # define RAWLOG(l, ...) { \ 89 | if (l<=g_debuglevel) { \ 90 | ZSTD_DEBUG_PRINT(__VA_ARGS__); \ 91 | } } 92 | # define DEBUGLOG(l, ...) { \ 93 | if (l<=g_debuglevel) { \ 94 | ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ 95 | ZSTD_DEBUG_PRINT(" \n"); \ 96 | } } 97 | #else 98 | # define RAWLOG(l, ...) {} /* disabled */ 99 | # define DEBUGLOG(l, ...) {} /* disabled */ 100 | #endif 101 | 102 | 103 | #if defined (__cplusplus) 104 | } 105 | #endif 106 | 107 | #endif /* DEBUG_H_12987983217 */ 108 | -------------------------------------------------------------------------------- /external/zstd/common/error_private.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | /* The purpose of this file is to have a single list of error strings embedded in binary */ 12 | 13 | #include "error_private.h" 14 | 15 | const char* ERR_getErrorString(ERR_enum code) 16 | { 17 | #ifdef ZSTD_STRIP_ERROR_STRINGS 18 | (void)code; 19 | return "Error strings stripped"; 20 | #else 21 | static const char* const notErrorCode = "Unspecified error code"; 22 | switch( code ) 23 | { 24 | case PREFIX(no_error): return "No error detected"; 25 | case PREFIX(GENERIC): return "Error (generic)"; 26 | case PREFIX(prefix_unknown): return "Unknown frame descriptor"; 27 | case PREFIX(version_unsupported): return "Version not supported"; 28 | case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; 29 | case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; 30 | case PREFIX(corruption_detected): return "Corrupted block detected"; 31 | case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; 32 | case PREFIX(parameter_unsupported): return "Unsupported parameter"; 33 | case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; 34 | case PREFIX(init_missing): return "Context should be init first"; 35 | case PREFIX(memory_allocation): return "Allocation error : not enough memory"; 36 | case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; 37 | case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; 38 | case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; 39 | case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; 40 | case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; 41 | case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; 42 | case PREFIX(dictionary_wrong): return "Dictionary mismatch"; 43 | case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; 44 | case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; 45 | case PREFIX(srcSize_wrong): return "Src size is incorrect"; 46 | case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; 47 | /* following error codes are not stable and may be removed or changed in a future version */ 48 | case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; 49 | case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; 50 | case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; 51 | case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; 52 | case PREFIX(maxCode): 53 | default: return notErrorCode; 54 | } 55 | #endif 56 | } 57 | -------------------------------------------------------------------------------- /external/zstd/common/pool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef POOL_H 12 | #define POOL_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | 19 | #include "zstd_deps.h" 20 | #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ 21 | #include "../zstd.h" 22 | 23 | typedef struct POOL_ctx_s POOL_ctx; 24 | 25 | /*! POOL_create() : 26 | * Create a thread pool with at most `numThreads` threads. 27 | * `numThreads` must be at least 1. 28 | * The maximum number of queued jobs before blocking is `queueSize`. 29 | * @return : POOL_ctx pointer on success, else NULL. 30 | */ 31 | POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); 32 | 33 | POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, 34 | ZSTD_customMem customMem); 35 | 36 | /*! POOL_free() : 37 | * Free a thread pool returned by POOL_create(). 38 | */ 39 | void POOL_free(POOL_ctx* ctx); 40 | 41 | /*! POOL_resize() : 42 | * Expands or shrinks pool's number of threads. 43 | * This is more efficient than releasing + creating a new context, 44 | * since it tries to preserve and re-use existing threads. 45 | * `numThreads` must be at least 1. 46 | * @return : 0 when resize was successful, 47 | * !0 (typically 1) if there is an error. 48 | * note : only numThreads can be resized, queueSize remains unchanged. 49 | */ 50 | int POOL_resize(POOL_ctx* ctx, size_t numThreads); 51 | 52 | /*! POOL_sizeof() : 53 | * @return threadpool memory usage 54 | * note : compatible with NULL (returns 0 in this case) 55 | */ 56 | size_t POOL_sizeof(POOL_ctx* ctx); 57 | 58 | /*! POOL_function : 59 | * The function type that can be added to a thread pool. 60 | */ 61 | typedef void (*POOL_function)(void*); 62 | 63 | /*! POOL_add() : 64 | * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. 65 | * Possibly blocks until there is room in the queue. 66 | * Note : The function may be executed asynchronously, 67 | * therefore, `opaque` must live until function has been completed. 68 | */ 69 | void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); 70 | 71 | 72 | /*! POOL_tryAdd() : 73 | * Add the job `function(opaque)` to thread pool _if_ a worker is available. 74 | * Returns immediately even if not (does not block). 75 | * @return : 1 if successful, 0 if not. 76 | */ 77 | int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); 78 | 79 | 80 | #if defined (__cplusplus) 81 | } 82 | #endif 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /external/zstd/common/portability_macros.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_PORTABILITY_MACROS_H 12 | #define ZSTD_PORTABILITY_MACROS_H 13 | 14 | /** 15 | * This header file contains macro defintions to support portability. 16 | * This header is shared between C and ASM code, so it MUST only 17 | * contain macro definitions. It MUST not contain any C code. 18 | * 19 | * This header ONLY defines macros to detect platforms/feature support. 20 | * 21 | */ 22 | 23 | 24 | /* compat. with non-clang compilers */ 25 | #ifndef __has_attribute 26 | #define __has_attribute(x) 0 27 | #endif 28 | 29 | /* compat. with non-clang compilers */ 30 | #ifndef __has_builtin 31 | # define __has_builtin(x) 0 32 | #endif 33 | 34 | /* compat. with non-clang compilers */ 35 | #ifndef __has_feature 36 | # define __has_feature(x) 0 37 | #endif 38 | 39 | /* detects whether we are being compiled under msan */ 40 | #ifndef ZSTD_MEMORY_SANITIZER 41 | # if __has_feature(memory_sanitizer) 42 | # define ZSTD_MEMORY_SANITIZER 1 43 | # else 44 | # define ZSTD_MEMORY_SANITIZER 0 45 | # endif 46 | #endif 47 | 48 | /* detects whether we are being compiled under asan */ 49 | #ifndef ZSTD_ADDRESS_SANITIZER 50 | # if __has_feature(address_sanitizer) 51 | # define ZSTD_ADDRESS_SANITIZER 1 52 | # elif defined(__SANITIZE_ADDRESS__) 53 | # define ZSTD_ADDRESS_SANITIZER 1 54 | # else 55 | # define ZSTD_ADDRESS_SANITIZER 0 56 | # endif 57 | #endif 58 | 59 | /* detects whether we are being compiled under dfsan */ 60 | #ifndef ZSTD_DATAFLOW_SANITIZER 61 | # if __has_feature(dataflow_sanitizer) 62 | # define ZSTD_DATAFLOW_SANITIZER 1 63 | # else 64 | # define ZSTD_DATAFLOW_SANITIZER 0 65 | # endif 66 | #endif 67 | 68 | 69 | /* Enable runtime BMI2 dispatch based on the CPU. 70 | * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. 71 | */ 72 | #ifndef DYNAMIC_BMI2 73 | #if ((defined(__clang__) && __has_attribute(__target__)) \ 74 | || (defined(__GNUC__) \ 75 | && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ 76 | && (defined(__x86_64__) || defined(_M_X64)) \ 77 | && !defined(__BMI2__) 78 | # define DYNAMIC_BMI2 1 79 | #else 80 | # define DYNAMIC_BMI2 0 81 | #endif 82 | #endif 83 | 84 | /** 85 | * Only enable assembly for GNUC comptabile compilers, 86 | * because other platforms may not support GAS assembly syntax. 87 | * 88 | * Only enable assembly for Linux / MacOS, other platforms may 89 | * work, but they haven't been tested. This could likely be 90 | * extended to BSD systems. 91 | * 92 | * Disable assembly when MSAN is enabled, because MSAN requires 93 | * 100% of code to be instrumented to work. 94 | */ 95 | #if defined(__GNUC__) 96 | # if defined(__linux__) || defined(__linux) || defined(__APPLE__) 97 | # if ZSTD_MEMORY_SANITIZER 98 | # define ZSTD_ASM_SUPPORTED 0 99 | # elif ZSTD_DATAFLOW_SANITIZER 100 | # define ZSTD_ASM_SUPPORTED 0 101 | # else 102 | # define ZSTD_ASM_SUPPORTED 1 103 | # endif 104 | # else 105 | # define ZSTD_ASM_SUPPORTED 0 106 | # endif 107 | #else 108 | # define ZSTD_ASM_SUPPORTED 0 109 | #endif 110 | 111 | /** 112 | * Determines whether we should enable assembly for x86-64 113 | * with BMI2. 114 | * 115 | * Enable if all of the following conditions hold: 116 | * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM 117 | * - Assembly is supported 118 | * - We are compiling for x86-64 and either: 119 | * - DYNAMIC_BMI2 is enabled 120 | * - BMI2 is supported at compile time 121 | */ 122 | #if !defined(ZSTD_DISABLE_ASM) && \ 123 | ZSTD_ASM_SUPPORTED && \ 124 | defined(__x86_64__) && \ 125 | (DYNAMIC_BMI2 || defined(__BMI2__)) 126 | # define ZSTD_ENABLE_ASM_X86_64_BMI2 1 127 | #else 128 | # define ZSTD_ENABLE_ASM_X86_64_BMI2 0 129 | #endif 130 | 131 | #endif /* ZSTD_PORTABILITY_MACROS_H */ 132 | -------------------------------------------------------------------------------- /external/zstd/common/threading.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 Tino Reichardt 3 | * All rights reserved. 4 | * 5 | * You can contact the author at: 6 | * - zstdmt source repository: https://github.com/mcmilk/zstdmt 7 | * 8 | * This source code is licensed under both the BSD-style license (found in the 9 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 10 | * in the COPYING file in the root directory of this source tree). 11 | * You may select, at your option, one of the above-listed licenses. 12 | */ 13 | 14 | /** 15 | * This file will hold wrapper for systems, which do not support pthreads 16 | */ 17 | 18 | #include "threading.h" 19 | 20 | /* create fake symbol to avoid empty translation unit warning */ 21 | int g_ZSTD_threading_useless_symbol; 22 | 23 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) 24 | 25 | /** 26 | * Windows minimalist Pthread Wrapper, based on : 27 | * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html 28 | */ 29 | 30 | 31 | /* === Dependencies === */ 32 | #include 33 | #include 34 | 35 | 36 | /* === Implementation === */ 37 | 38 | static unsigned __stdcall worker(void *arg) 39 | { 40 | ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; 41 | thread->arg = thread->start_routine(thread->arg); 42 | return 0; 43 | } 44 | 45 | int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, 46 | void* (*start_routine) (void*), void* arg) 47 | { 48 | (void)unused; 49 | thread->arg = arg; 50 | thread->start_routine = start_routine; 51 | thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); 52 | 53 | if (!thread->handle) 54 | return errno; 55 | else 56 | return 0; 57 | } 58 | 59 | int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) 60 | { 61 | DWORD result; 62 | 63 | if (!thread.handle) return 0; 64 | 65 | result = WaitForSingleObject(thread.handle, INFINITE); 66 | switch (result) { 67 | case WAIT_OBJECT_0: 68 | if (value_ptr) *value_ptr = thread.arg; 69 | return 0; 70 | case WAIT_ABANDONED: 71 | return EINVAL; 72 | default: 73 | return GetLastError(); 74 | } 75 | } 76 | 77 | #endif /* ZSTD_MULTITHREAD */ 78 | 79 | #if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) 80 | 81 | #define ZSTD_DEPS_NEED_MALLOC 82 | #include "zstd_deps.h" 83 | 84 | int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) 85 | { 86 | *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); 87 | if (!*mutex) 88 | return 1; 89 | return pthread_mutex_init(*mutex, attr); 90 | } 91 | 92 | int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) 93 | { 94 | if (!*mutex) 95 | return 0; 96 | { 97 | int const ret = pthread_mutex_destroy(*mutex); 98 | ZSTD_free(*mutex); 99 | return ret; 100 | } 101 | } 102 | 103 | int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) 104 | { 105 | *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); 106 | if (!*cond) 107 | return 1; 108 | return pthread_cond_init(*cond, attr); 109 | } 110 | 111 | int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) 112 | { 113 | if (!*cond) 114 | return 0; 115 | { 116 | int const ret = pthread_cond_destroy(*cond); 117 | ZSTD_free(*cond); 118 | return ret; 119 | } 120 | } 121 | 122 | #endif 123 | -------------------------------------------------------------------------------- /external/zstd/common/xxhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | * xxHash - Fast Hash algorithm 3 | * Copyright (c) Yann Collet, Facebook, Inc. 4 | * 5 | * You can contact the author at : 6 | * - xxHash homepage: http://www.xxhash.com 7 | * - xxHash source repository : https://github.com/Cyan4973/xxHash 8 | * 9 | * This source code is licensed under both the BSD-style license (found in the 10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 11 | * in the COPYING file in the root directory of this source tree). 12 | * You may select, at your option, one of the above-listed licenses. 13 | */ 14 | 15 | 16 | 17 | /* 18 | * xxhash.c instantiates functions defined in xxhash.h 19 | */ 20 | 21 | #define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ 22 | #define XXH_IMPLEMENTATION /* access definitions */ 23 | 24 | #include "xxhash.h" 25 | -------------------------------------------------------------------------------- /external/zstd/common/zstd_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | 13 | /*-************************************* 14 | * Dependencies 15 | ***************************************/ 16 | #define ZSTD_DEPS_NEED_MALLOC 17 | #include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ 18 | #include "error_private.h" 19 | #include "zstd_internal.h" 20 | 21 | 22 | /*-**************************************** 23 | * Version 24 | ******************************************/ 25 | unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } 26 | 27 | const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } 28 | 29 | 30 | /*-**************************************** 31 | * ZSTD Error Management 32 | ******************************************/ 33 | #undef ZSTD_isError /* defined within zstd_internal.h */ 34 | /*! ZSTD_isError() : 35 | * tells if a return value is an error code 36 | * symbol is required for external callers */ 37 | unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } 38 | 39 | /*! ZSTD_getErrorName() : 40 | * provides error code string from function result (useful for debugging) */ 41 | const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } 42 | 43 | /*! ZSTD_getError() : 44 | * convert a `size_t` function result into a proper ZSTD_errorCode enum */ 45 | ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } 46 | 47 | /*! ZSTD_getErrorString() : 48 | * provides error code string from enum */ 49 | const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } 50 | 51 | 52 | 53 | /*=************************************************************** 54 | * Custom allocator 55 | ****************************************************************/ 56 | void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) 57 | { 58 | if (customMem.customAlloc) 59 | return customMem.customAlloc(customMem.opaque, size); 60 | return ZSTD_malloc(size); 61 | } 62 | 63 | void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) 64 | { 65 | if (customMem.customAlloc) { 66 | /* calloc implemented as malloc+memset; 67 | * not as efficient as calloc, but next best guess for custom malloc */ 68 | void* const ptr = customMem.customAlloc(customMem.opaque, size); 69 | ZSTD_memset(ptr, 0, size); 70 | return ptr; 71 | } 72 | return ZSTD_calloc(1, size); 73 | } 74 | 75 | void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) 76 | { 77 | if (ptr!=NULL) { 78 | if (customMem.customFree) 79 | customMem.customFree(customMem.opaque, ptr); 80 | else 81 | ZSTD_free(ptr); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /external/zstd/common/zstd_deps.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | /* This file provides common libc dependencies that zstd requires. 12 | * The purpose is to allow replacing this file with a custom implementation 13 | * to compile zstd without libc support. 14 | */ 15 | 16 | /* Need: 17 | * NULL 18 | * INT_MAX 19 | * UINT_MAX 20 | * ZSTD_memcpy() 21 | * ZSTD_memset() 22 | * ZSTD_memmove() 23 | */ 24 | #ifndef ZSTD_DEPS_COMMON 25 | #define ZSTD_DEPS_COMMON 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #if defined(__GNUC__) && __GNUC__ >= 4 32 | # define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) 33 | # define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) 34 | # define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) 35 | #else 36 | # define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) 37 | # define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) 38 | # define ZSTD_memset(p,v,l) memset((p),(v),(l)) 39 | #endif 40 | 41 | #endif /* ZSTD_DEPS_COMMON */ 42 | 43 | /* Need: 44 | * ZSTD_malloc() 45 | * ZSTD_free() 46 | * ZSTD_calloc() 47 | */ 48 | #ifdef ZSTD_DEPS_NEED_MALLOC 49 | #ifndef ZSTD_DEPS_MALLOC 50 | #define ZSTD_DEPS_MALLOC 51 | 52 | #include 53 | 54 | #define ZSTD_malloc(s) malloc(s) 55 | #define ZSTD_calloc(n,s) calloc((n), (s)) 56 | #define ZSTD_free(p) free((p)) 57 | 58 | #endif /* ZSTD_DEPS_MALLOC */ 59 | #endif /* ZSTD_DEPS_NEED_MALLOC */ 60 | 61 | /* 62 | * Provides 64-bit math support. 63 | * Need: 64 | * U64 ZSTD_div64(U64 dividend, U32 divisor) 65 | */ 66 | #ifdef ZSTD_DEPS_NEED_MATH64 67 | #ifndef ZSTD_DEPS_MATH64 68 | #define ZSTD_DEPS_MATH64 69 | 70 | #define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) 71 | 72 | #endif /* ZSTD_DEPS_MATH64 */ 73 | #endif /* ZSTD_DEPS_NEED_MATH64 */ 74 | 75 | /* Need: 76 | * assert() 77 | */ 78 | #ifdef ZSTD_DEPS_NEED_ASSERT 79 | #ifndef ZSTD_DEPS_ASSERT 80 | #define ZSTD_DEPS_ASSERT 81 | 82 | #include 83 | 84 | #endif /* ZSTD_DEPS_ASSERT */ 85 | #endif /* ZSTD_DEPS_NEED_ASSERT */ 86 | 87 | /* Need: 88 | * ZSTD_DEBUG_PRINT() 89 | */ 90 | #ifdef ZSTD_DEPS_NEED_IO 91 | #ifndef ZSTD_DEPS_IO 92 | #define ZSTD_DEPS_IO 93 | 94 | #include 95 | #define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) 96 | 97 | #endif /* ZSTD_DEPS_IO */ 98 | #endif /* ZSTD_DEPS_NEED_IO */ 99 | 100 | /* Only requested when is known to be present. 101 | * Need: 102 | * intptr_t 103 | */ 104 | #ifdef ZSTD_DEPS_NEED_STDINT 105 | #ifndef ZSTD_DEPS_STDINT 106 | #define ZSTD_DEPS_STDINT 107 | 108 | #include 109 | 110 | #endif /* ZSTD_DEPS_STDINT */ 111 | #endif /* ZSTD_DEPS_NEED_STDINT */ 112 | -------------------------------------------------------------------------------- /external/zstd/compress/hist.h: -------------------------------------------------------------------------------- 1 | /* ****************************************************************** 2 | * hist : Histogram functions 3 | * part of Finite State Entropy project 4 | * Copyright (c) Yann Collet, Facebook, Inc. 5 | * 6 | * You can contact the author at : 7 | * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy 8 | * - Public forum : https://groups.google.com/forum/#!forum/lz4c 9 | * 10 | * This source code is licensed under both the BSD-style license (found in the 11 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 12 | * in the COPYING file in the root directory of this source tree). 13 | * You may select, at your option, one of the above-listed licenses. 14 | ****************************************************************** */ 15 | 16 | /* --- dependencies --- */ 17 | #include "../common/zstd_deps.h" /* size_t */ 18 | 19 | 20 | /* --- simple histogram functions --- */ 21 | 22 | /*! HIST_count(): 23 | * Provides the precise count of each byte within a table 'count'. 24 | * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). 25 | * Updates *maxSymbolValuePtr with actual largest symbol value detected. 26 | * @return : count of the most frequent symbol (which isn't identified). 27 | * or an error code, which can be tested using HIST_isError(). 28 | * note : if return == srcSize, there is only one symbol. 29 | */ 30 | size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, 31 | const void* src, size_t srcSize); 32 | 33 | unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ 34 | 35 | 36 | /* --- advanced histogram functions --- */ 37 | 38 | #define HIST_WKSP_SIZE_U32 1024 39 | #define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) 40 | /** HIST_count_wksp() : 41 | * Same as HIST_count(), but using an externally provided scratch buffer. 42 | * Benefit is this function will use very little stack space. 43 | * `workSpace` is a writable buffer which must be 4-bytes aligned, 44 | * `workSpaceSize` must be >= HIST_WKSP_SIZE 45 | */ 46 | size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 47 | const void* src, size_t srcSize, 48 | void* workSpace, size_t workSpaceSize); 49 | 50 | /** HIST_countFast() : 51 | * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. 52 | * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` 53 | */ 54 | size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, 55 | const void* src, size_t srcSize); 56 | 57 | /** HIST_countFast_wksp() : 58 | * Same as HIST_countFast(), but using an externally provided scratch buffer. 59 | * `workSpace` is a writable buffer which must be 4-bytes aligned, 60 | * `workSpaceSize` must be >= HIST_WKSP_SIZE 61 | */ 62 | size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, 63 | const void* src, size_t srcSize, 64 | void* workSpace, size_t workSpaceSize); 65 | 66 | /*! HIST_count_simple() : 67 | * Same as HIST_countFast(), this function is unsafe, 68 | * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. 69 | * It is also a bit slower for large inputs. 70 | * However, it does not need any additional memory (not even on stack). 71 | * @return : count of the most frequent symbol. 72 | * Note this function doesn't produce any error (i.e. it must succeed). 73 | */ 74 | unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, 75 | const void* src, size_t srcSize); 76 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_compress_literals.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_LITERALS_H 12 | #define ZSTD_COMPRESS_LITERALS_H 13 | 14 | #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ 15 | 16 | 17 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 18 | 19 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 20 | 21 | /* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ 22 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, 23 | ZSTD_hufCTables_t* nextHuf, 24 | ZSTD_strategy strategy, int disableLiteralCompression, 25 | void* dst, size_t dstCapacity, 26 | const void* src, size_t srcSize, 27 | void* entropyWorkspace, size_t entropyWorkspaceSize, 28 | const int bmi2, 29 | unsigned suspectUncompressible); 30 | 31 | #endif /* ZSTD_COMPRESS_LITERALS_H */ 32 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_compress_sequences.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_SEQUENCES_H 12 | #define ZSTD_COMPRESS_SEQUENCES_H 13 | 14 | #include "../common/fse.h" /* FSE_repeat, FSE_CTable */ 15 | #include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ 16 | 17 | typedef enum { 18 | ZSTD_defaultDisallowed = 0, 19 | ZSTD_defaultAllowed = 1 20 | } ZSTD_defaultPolicy_e; 21 | 22 | symbolEncodingType_e 23 | ZSTD_selectEncodingType( 24 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, 25 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, 26 | FSE_CTable const* prevCTable, 27 | short const* defaultNorm, U32 defaultNormLog, 28 | ZSTD_defaultPolicy_e const isDefaultAllowed, 29 | ZSTD_strategy const strategy); 30 | 31 | size_t 32 | ZSTD_buildCTable(void* dst, size_t dstCapacity, 33 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, 34 | unsigned* count, U32 max, 35 | const BYTE* codeTable, size_t nbSeq, 36 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, 37 | const FSE_CTable* prevCTable, size_t prevCTableSize, 38 | void* entropyWorkspace, size_t entropyWorkspaceSize); 39 | 40 | size_t ZSTD_encodeSequences( 41 | void* dst, size_t dstCapacity, 42 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 43 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 44 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 45 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); 46 | 47 | size_t ZSTD_fseBitCost( 48 | FSE_CTable const* ctable, 49 | unsigned const* count, 50 | unsigned const max); 51 | 52 | size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, 53 | unsigned const* count, unsigned const max); 54 | #endif /* ZSTD_COMPRESS_SEQUENCES_H */ 55 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_compress_superblock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_ADVANCED_H 12 | #define ZSTD_COMPRESS_ADVANCED_H 13 | 14 | /*-************************************* 15 | * Dependencies 16 | ***************************************/ 17 | 18 | #include "../zstd.h" /* ZSTD_CCtx */ 19 | 20 | /*-************************************* 21 | * Target Compressed Block Size 22 | ***************************************/ 23 | 24 | /* ZSTD_compressSuperBlock() : 25 | * Used to compress a super block when targetCBlockSize is being used. 26 | * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ 27 | size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, 28 | void* dst, size_t dstCapacity, 29 | void const* src, size_t srcSize, 30 | unsigned lastBlock); 31 | 32 | #endif /* ZSTD_COMPRESS_ADVANCED_H */ 33 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_double_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_DOUBLE_FAST_H 12 | #define ZSTD_DOUBLE_FAST_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "../common/mem.h" /* U32 */ 19 | #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ 20 | 21 | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 23 | size_t ZSTD_compressBlock_doubleFast( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_doubleFast_dictMatchState( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_doubleFast_extDict( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | 34 | #if defined (__cplusplus) 35 | } 36 | #endif 37 | 38 | #endif /* ZSTD_DOUBLE_FAST_H */ 39 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_FAST_H 12 | #define ZSTD_FAST_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "../common/mem.h" /* U32 */ 19 | #include "zstd_compress_internal.h" 20 | 21 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 22 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 23 | size_t ZSTD_compressBlock_fast( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_fast_dictMatchState( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_fast_extDict( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | #if defined (__cplusplus) 34 | } 35 | #endif 36 | 37 | #endif /* ZSTD_FAST_H */ 38 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_ldm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_LDM_H 12 | #define ZSTD_LDM_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "zstd_compress_internal.h" /* ldmParams_t, U32 */ 19 | #include "../zstd.h" /* ZSTD_CCtx, size_t */ 20 | 21 | /*-************************************* 22 | * Long distance matching 23 | ***************************************/ 24 | 25 | #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT 26 | 27 | void ZSTD_ldm_fillHashTable( 28 | ldmState_t* state, const BYTE* ip, 29 | const BYTE* iend, ldmParams_t const* params); 30 | 31 | /** 32 | * ZSTD_ldm_generateSequences(): 33 | * 34 | * Generates the sequences using the long distance match finder. 35 | * Generates long range matching sequences in `sequences`, which parse a prefix 36 | * of the source. `sequences` must be large enough to store every sequence, 37 | * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. 38 | * @returns 0 or an error code. 39 | * 40 | * NOTE: The user must have called ZSTD_window_update() for all of the input 41 | * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. 42 | * NOTE: This function returns an error if it runs out of space to store 43 | * sequences. 44 | */ 45 | size_t ZSTD_ldm_generateSequences( 46 | ldmState_t* ldms, rawSeqStore_t* sequences, 47 | ldmParams_t const* params, void const* src, size_t srcSize); 48 | 49 | /** 50 | * ZSTD_ldm_blockCompress(): 51 | * 52 | * Compresses a block using the predefined sequences, along with a secondary 53 | * block compressor. The literals section of every sequence is passed to the 54 | * secondary block compressor, and those sequences are interspersed with the 55 | * predefined sequences. Returns the length of the last literals. 56 | * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. 57 | * `rawSeqStore.seq` may also be updated to split the last sequence between two 58 | * blocks. 59 | * @return The length of the last literals. 60 | * 61 | * NOTE: The source must be at most the maximum block size, but the predefined 62 | * sequences can be any size, and may be longer than the block. In the case that 63 | * they are longer than the block, the last sequences may need to be split into 64 | * two. We handle that case correctly, and update `rawSeqStore` appropriately. 65 | * NOTE: This function does not return any errors. 66 | */ 67 | size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, 68 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 69 | ZSTD_paramSwitch_e useRowMatchFinder, 70 | void const* src, size_t srcSize); 71 | 72 | /** 73 | * ZSTD_ldm_skipSequences(): 74 | * 75 | * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. 76 | * Avoids emitting matches less than `minMatch` bytes. 77 | * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). 78 | */ 79 | void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, 80 | U32 const minMatch); 81 | 82 | /* ZSTD_ldm_skipRawSeqStoreBytes(): 83 | * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. 84 | * Not to be used in conjunction with ZSTD_ldm_skipSequences(). 85 | * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). 86 | */ 87 | void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); 88 | 89 | /** ZSTD_ldm_getTableSize() : 90 | * Estimate the space needed for long distance matching tables or 0 if LDM is 91 | * disabled. 92 | */ 93 | size_t ZSTD_ldm_getTableSize(ldmParams_t params); 94 | 95 | /** ZSTD_ldm_getSeqSpace() : 96 | * Return an upper bound on the number of sequences that can be produced by 97 | * the long distance matcher, or 0 if LDM is disabled. 98 | */ 99 | size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); 100 | 101 | /** ZSTD_ldm_adjustParameters() : 102 | * If the params->hashRateLog is not set, set it to its default value based on 103 | * windowLog and params->hashLog. 104 | * 105 | * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to 106 | * params->hashLog if it is not). 107 | * 108 | * Ensures that the minMatchLength >= targetLength during optimal parsing. 109 | */ 110 | void ZSTD_ldm_adjustParameters(ldmParams_t* params, 111 | ZSTD_compressionParameters const* cParams); 112 | 113 | #if defined (__cplusplus) 114 | } 115 | #endif 116 | 117 | #endif /* ZSTD_FAST_H */ 118 | -------------------------------------------------------------------------------- /external/zstd/compress/zstd_opt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_OPT_H 12 | #define ZSTD_OPT_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "zstd_compress_internal.h" 19 | 20 | /* used in ZSTD_loadDictionaryContent() */ 21 | void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); 22 | 23 | size_t ZSTD_compressBlock_btopt( 24 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 25 | void const* src, size_t srcSize); 26 | size_t ZSTD_compressBlock_btultra( 27 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 28 | void const* src, size_t srcSize); 29 | size_t ZSTD_compressBlock_btultra2( 30 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 31 | void const* src, size_t srcSize); 32 | 33 | 34 | size_t ZSTD_compressBlock_btopt_dictMatchState( 35 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 36 | void const* src, size_t srcSize); 37 | size_t ZSTD_compressBlock_btultra_dictMatchState( 38 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 39 | void const* src, size_t srcSize); 40 | 41 | size_t ZSTD_compressBlock_btopt_extDict( 42 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 43 | void const* src, size_t srcSize); 44 | size_t ZSTD_compressBlock_btultra_extDict( 45 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 46 | void const* src, size_t srcSize); 47 | 48 | /* note : no btultra2 variant for extDict nor dictMatchState, 49 | * because btultra2 is not meant to work with dictionaries 50 | * and is only specific for the first block (no prefix) */ 51 | 52 | #if defined (__cplusplus) 53 | } 54 | #endif 55 | 56 | #endif /* ZSTD_OPT_H */ 57 | -------------------------------------------------------------------------------- /external/zstd/compress/zstdmt_compress.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTDMT_COMPRESS_H 12 | #define ZSTDMT_COMPRESS_H 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | 19 | /* Note : This is an internal API. 20 | * These APIs used to be exposed with ZSTDLIB_API, 21 | * because it used to be the only way to invoke MT compression. 22 | * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead. 23 | * 24 | * This API requires ZSTD_MULTITHREAD to be defined during compilation, 25 | * otherwise ZSTDMT_createCCtx*() will fail. 26 | */ 27 | 28 | /* === Dependencies === */ 29 | #include "../common/zstd_deps.h" /* size_t */ 30 | #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ 31 | #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ 32 | 33 | 34 | /* === Constants === */ 35 | #ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */ 36 | # define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256) 37 | #endif 38 | #ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */ 39 | # define ZSTDMT_JOBSIZE_MIN (512 KB) 40 | #endif 41 | #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) 42 | #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) 43 | 44 | 45 | /* ======================================================== 46 | * === Private interface, for use by ZSTD_compress.c === 47 | * === Not exposed in libzstd. Never invoke directly === 48 | * ======================================================== */ 49 | 50 | /* === Memory management === */ 51 | typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; 52 | /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ 53 | ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, 54 | ZSTD_customMem cMem, 55 | ZSTD_threadPool *pool); 56 | size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); 57 | 58 | size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); 59 | 60 | /* === Streaming functions === */ 61 | 62 | size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); 63 | 64 | /*! ZSTDMT_initCStream_internal() : 65 | * Private use only. Init streaming operation. 66 | * expects params to be valid. 67 | * must receive dict, or cdict, or none, but not both. 68 | * @return : 0, or an error code */ 69 | size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, 70 | const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, 71 | const ZSTD_CDict* cdict, 72 | ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); 73 | 74 | /*! ZSTDMT_compressStream_generic() : 75 | * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() 76 | * depending on flush directive. 77 | * @return : minimum amount of data still to be flushed 78 | * 0 if fully flushed 79 | * or an error code 80 | * note : needs to be init using any ZSTD_initCStream*() variant */ 81 | size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, 82 | ZSTD_outBuffer* output, 83 | ZSTD_inBuffer* input, 84 | ZSTD_EndDirective endOp); 85 | 86 | /*! ZSTDMT_toFlushNow() 87 | * Tell how many bytes are ready to be flushed immediately. 88 | * Probe the oldest active job (not yet entirely flushed) and check its output buffer. 89 | * If return 0, it means there is no active job, 90 | * or, it means oldest job is still active, but everything produced has been flushed so far, 91 | * therefore flushing is limited by speed of oldest job. */ 92 | size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); 93 | 94 | /*! ZSTDMT_updateCParams_whileCompressing() : 95 | * Updates only a selected set of compression parameters, to remain compatible with current frame. 96 | * New parameters will be applied to next compression job. */ 97 | void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); 98 | 99 | /*! ZSTDMT_getFrameProgression(): 100 | * tells how much data has been consumed (input) and produced (output) for current frame. 101 | * able to count progression inside worker threads. 102 | */ 103 | ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); 104 | 105 | 106 | #if defined (__cplusplus) 107 | } 108 | #endif 109 | 110 | #endif /* ZSTDMT_COMPRESS_H */ 111 | -------------------------------------------------------------------------------- /external/zstd/decompress/zstd_ddict.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef ZSTD_DDICT_H 13 | #define ZSTD_DDICT_H 14 | 15 | /*-******************************************************* 16 | * Dependencies 17 | *********************************************************/ 18 | #include "../common/zstd_deps.h" /* size_t */ 19 | #include "../zstd.h" /* ZSTD_DDict, and several public functions */ 20 | 21 | 22 | /*-******************************************************* 23 | * Interface 24 | *********************************************************/ 25 | 26 | /* note: several prototypes are already published in `zstd.h` : 27 | * ZSTD_createDDict() 28 | * ZSTD_createDDict_byReference() 29 | * ZSTD_createDDict_advanced() 30 | * ZSTD_freeDDict() 31 | * ZSTD_initStaticDDict() 32 | * ZSTD_sizeof_DDict() 33 | * ZSTD_estimateDDictSize() 34 | * ZSTD_getDictID_fromDict() 35 | */ 36 | 37 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); 38 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); 39 | 40 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); 41 | 42 | 43 | 44 | #endif /* ZSTD_DDICT_H */ 45 | -------------------------------------------------------------------------------- /external/zstd/decompress/zstd_decompress_block.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef ZSTD_DEC_BLOCK_H 13 | #define ZSTD_DEC_BLOCK_H 14 | 15 | /*-******************************************************* 16 | * Dependencies 17 | *********************************************************/ 18 | #include "../common/zstd_deps.h" /* size_t */ 19 | #include "../zstd.h" /* DCtx, and some public functions */ 20 | #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */ 21 | #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ 22 | 23 | 24 | /* === Prototypes === */ 25 | 26 | /* note: prototypes already published within `zstd.h` : 27 | * ZSTD_decompressBlock() 28 | */ 29 | 30 | /* note: prototypes already published within `zstd_internal.h` : 31 | * ZSTD_getcBlockSize() 32 | * ZSTD_decodeSeqHeaders() 33 | */ 34 | 35 | 36 | /* Streaming state is used to inform allocation of the literal buffer */ 37 | typedef enum { 38 | not_streaming = 0, 39 | is_streaming = 1 40 | } streaming_operation; 41 | 42 | /* ZSTD_decompressBlock_internal() : 43 | * decompress block, starting at `src`, 44 | * into destination buffer `dst`. 45 | * @return : decompressed block size, 46 | * or an error code (which can be tested using ZSTD_isError()) 47 | */ 48 | size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 49 | void* dst, size_t dstCapacity, 50 | const void* src, size_t srcSize, const int frame, const streaming_operation streaming); 51 | 52 | /* ZSTD_buildFSETable() : 53 | * generate FSE decoding table for one symbol (ll, ml or off) 54 | * this function must be called with valid parameters only 55 | * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) 56 | * in which case it cannot fail. 57 | * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is 58 | * defined in zstd_decompress_internal.h. 59 | * Internal use only. 60 | */ 61 | void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, 62 | const short* normalizedCounter, unsigned maxSymbolValue, 63 | const U32* baseValue, const U32* nbAdditionalBits, 64 | unsigned tableLog, void* wksp, size_t wkspSize, 65 | int bmi2); 66 | 67 | 68 | #endif /* ZSTD_DEC_BLOCK_H */ 69 | -------------------------------------------------------------------------------- /external/zstd/libzstd.pc.in: -------------------------------------------------------------------------------- 1 | # ZSTD - standard compression algorithm 2 | # Copyright (C) 2014-2016, Yann Collet, Facebook 3 | # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 4 | 5 | prefix=@PREFIX@ 6 | exec_prefix=@EXEC_PREFIX@ 7 | includedir=@INCLUDEDIR@ 8 | libdir=@LIBDIR@ 9 | 10 | Name: zstd 11 | Description: fast lossless compression algorithm library 12 | URL: http://www.zstd.net/ 13 | Version: @VERSION@ 14 | Libs: -L${libdir} -lzstd 15 | Libs.private: @LIBS_PRIVATE@ 16 | Cflags: -I${includedir} 17 | -------------------------------------------------------------------------------- /external/zstd/zstd_errors.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_ERRORS_H_398273423 12 | #define ZSTD_ERRORS_H_398273423 13 | 14 | #if defined (__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | /*===== dependency =====*/ 19 | #include /* size_t */ 20 | 21 | 22 | /* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ 23 | #ifndef ZSTDERRORLIB_VISIBILITY 24 | # if defined(__GNUC__) && (__GNUC__ >= 4) 25 | # define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) 26 | # else 27 | # define ZSTDERRORLIB_VISIBILITY 28 | # endif 29 | #endif 30 | #if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) 31 | # define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY 32 | #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) 33 | # define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ 34 | #else 35 | # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY 36 | #endif 37 | 38 | /*-********************************************* 39 | * Error codes list 40 | *-********************************************* 41 | * Error codes _values_ are pinned down since v1.3.1 only. 42 | * Therefore, don't rely on values if you may link to any version < v1.3.1. 43 | * 44 | * Only values < 100 are considered stable. 45 | * 46 | * note 1 : this API shall be used with static linking only. 47 | * dynamic linking is not yet officially supported. 48 | * note 2 : Prefer relying on the enum than on its value whenever possible 49 | * This is the only supported way to use the error list < v1.3.1 50 | * note 3 : ZSTD_isError() is always correct, whatever the library version. 51 | **********************************************/ 52 | typedef enum { 53 | ZSTD_error_no_error = 0, 54 | ZSTD_error_GENERIC = 1, 55 | ZSTD_error_prefix_unknown = 10, 56 | ZSTD_error_version_unsupported = 12, 57 | ZSTD_error_frameParameter_unsupported = 14, 58 | ZSTD_error_frameParameter_windowTooLarge = 16, 59 | ZSTD_error_corruption_detected = 20, 60 | ZSTD_error_checksum_wrong = 22, 61 | ZSTD_error_dictionary_corrupted = 30, 62 | ZSTD_error_dictionary_wrong = 32, 63 | ZSTD_error_dictionaryCreation_failed = 34, 64 | ZSTD_error_parameter_unsupported = 40, 65 | ZSTD_error_parameter_outOfBound = 42, 66 | ZSTD_error_tableLog_tooLarge = 44, 67 | ZSTD_error_maxSymbolValue_tooLarge = 46, 68 | ZSTD_error_maxSymbolValue_tooSmall = 48, 69 | ZSTD_error_stage_wrong = 60, 70 | ZSTD_error_init_missing = 62, 71 | ZSTD_error_memory_allocation = 64, 72 | ZSTD_error_workSpace_tooSmall= 66, 73 | ZSTD_error_dstSize_tooSmall = 70, 74 | ZSTD_error_srcSize_wrong = 72, 75 | ZSTD_error_dstBuffer_null = 74, 76 | /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ 77 | ZSTD_error_frameIndex_tooLarge = 100, 78 | ZSTD_error_seekableIO = 102, 79 | ZSTD_error_dstBuffer_wrong = 104, 80 | ZSTD_error_srcBuffer_wrong = 105, 81 | ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ 82 | } ZSTD_ErrorCode; 83 | 84 | /*! ZSTD_getErrorCode() : 85 | convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, 86 | which can be used to compare with enum list published above */ 87 | ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); 88 | ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ 89 | 90 | 91 | #if defined (__cplusplus) 92 | } 93 | #endif 94 | 95 | #endif /* ZSTD_ERRORS_H_398273423 */ 96 | -------------------------------------------------------------------------------- /hyperlogloglog/Hash.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGLOG_HASH 2 | #define HYPERLOGLOGLOG_HASH 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace hyperlogloglog { 10 | template 11 | Word fibonacciHash(const T& x, int b = CHAR_BIT*sizeof(Word)); 12 | 13 | template<> 14 | inline uint64_t fibonacciHash(const uint64_t& x, int b) { 15 | static_assert(CHAR_BIT*sizeof(uint64_t) == 64); 16 | return 0x9e3779b97f4a7c15*x >> (64-b); 17 | } 18 | 19 | template 20 | Word farmhash(const T& x); 21 | 22 | template<> 23 | inline uint64_t farmhash(const std::string& x) { 24 | return farmhash::Hash64(x); 25 | } 26 | 27 | template<> 28 | inline uint64_t farmhash(const uint64_t& x) { 29 | return farmhash::Fingerprint(x); 30 | } 31 | } 32 | 33 | #endif // HYPERLOGLOGLOG_HASH 34 | -------------------------------------------------------------------------------- /hyperlogloglog/HyperLogLog.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGLOG_HYPERLOGLOG 2 | #define HYPERLOGLOGLOG_HYPERLOGLOG 3 | 4 | #include "common.hpp" 5 | #include "PackedVector.hpp" 6 | #include "Hash.hpp" 7 | #include 8 | #include 9 | 10 | namespace hyperlogloglog { 11 | /** 12 | * Basic HyperLogLog. The template parameter Word determines the 13 | * word type and length (that is, the length of the hashes). 14 | */ 15 | template 16 | class HyperLogLog { 17 | public: 18 | /** 19 | * Basic constructor 20 | * m : the number of registers 21 | */ 22 | explicit HyperLogLog(int m) : 23 | m(m), logW(log2i(sizeof(Word)*CHAR_BIT)), 24 | logM(log2i(m)), M(logW,m) { 25 | } 26 | 27 | 28 | 29 | /** 30 | * Returns the size of the sketch (the number of bits) 31 | */ 32 | inline size_t bitSize() const { 33 | return M.bitSize(); 34 | } 35 | 36 | 37 | 38 | /** 39 | * Adds a new element to the sketch 40 | */ 41 | template), 43 | typename JHashFun = decltype(fibonacciHash)> 44 | inline void add(const Object& o, XHashFun h = farmhash, 45 | JHashFun f = fibonacciHash) { 46 | static_assert(std::is_same::value, 47 | "Hash function type does not match the Word type of the class"); 48 | addHash(h(o), f); 49 | } 50 | 51 | 52 | 53 | /** 54 | * Adds a new hash to the sketch. Potentially useful if a 55 | * different kind of hashing scheme is used outside the class. 56 | */ 57 | template)> 58 | inline void addHash(Word x, JHashFun f = fibonacciHash) { 59 | static_assert(std::is_same::value, 60 | "Hash function type does not match the Word type of the class"); 61 | addJr(f(x,logM), rho(x)); 62 | } 63 | 64 | 65 | 66 | /** 67 | * Adds the specific j and r values to the sketch. This may be 68 | * useful if full control is required of the hashing faculties. 69 | * j must satisfy 0 <= j < m but no checks are made 70 | * r must satisfy 0 <= r < log(word length) (64 for uint64_t) but no checks are made 71 | */ 72 | inline void addJr(Word j, Word r) { 73 | Word r0 = M.get(j); 74 | if (r > r0) 75 | M.set(j, r); 76 | } 77 | 78 | 79 | 80 | /** 81 | * Returns a vector that contains the register values 82 | */ 83 | std::vector exportRegisters() const { 84 | std::vector v(m); 85 | for (int i = 0; i < m; ++i) 86 | v[i] = M.get(i); 87 | return v; 88 | } 89 | 90 | 91 | 92 | /** 93 | * Returns the present estimate 94 | */ 95 | double estimate() const { 96 | double E = 0; 97 | int V = 0; 98 | for (int j = 0; j < m; ++j) { 99 | Word r = M.get(j); 100 | V += (r == 0); 101 | E += 1.0 / (1ull << r); 102 | } 103 | E = alpha(m) * m * m / E; 104 | if (E <= 5.0 / 2.0 * m && V != 0) { 105 | return m*log(static_cast(m)/V); 106 | } 107 | else if (E <= (1ull << 32)/30) { 108 | return E; 109 | } 110 | else { 111 | return -(1ll << 32) * log(1-E/(1ll << 32)); 112 | } 113 | } 114 | 115 | 116 | 117 | /** 118 | * Merges this sketch with the other sketch and returns a new sketch 119 | * 120 | * Note: if the sketches were constructed with different hash 121 | * functions, the result will be nonsensical. It is up to the 122 | * caller to ensure that the exact same hash functions were used. 123 | */ 124 | HyperLogLog merge(const HyperLogLog& that) const { 125 | if (m != that.m) 126 | throw std::invalid_argument("Mismatch in the number of registers"); 127 | HyperLogLog H(m); 128 | for (int j = 0; j < m; ++j) 129 | H.M.set(j, std::max(M.get(j), that.M.get(j))); 130 | return H; 131 | } 132 | 133 | 134 | 135 | /** 136 | * Returns the correction coefficient 137 | */ 138 | static double alpha(int m) { 139 | switch(m) { 140 | case 16: 141 | return 0.673; 142 | case 32: 143 | return 0.697; 144 | case 64: 145 | return 0.709; 146 | default: 147 | return 0.7213 / (1.0 + 1.079/m); 148 | } 149 | } 150 | 151 | 152 | 153 | /** 154 | * Returns the number of registers 155 | */ 156 | inline int getM() const { 157 | return m; 158 | } 159 | 160 | 161 | private: 162 | int m; 163 | int logW; // register length 164 | int logM; // register address length 165 | PackedVector M; 166 | }; 167 | } 168 | 169 | #endif // HYPERLOGLOGLOG_HYPERLOGLOG 170 | -------------------------------------------------------------------------------- /hyperlogloglog/HyperLogLogZstd.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGLOG_HYPERLOGLOG_ZSTD 2 | #define HYPERLOGLOGLOG_HYPERLOGLOG_ZSTD 3 | 4 | #include "HyperLogLog.hpp" 5 | #include 6 | 7 | namespace hyperlogloglog { 8 | /** 9 | * Zstd-compressed Basic HyperLogLog. The template parameter Word 10 | * determines the word type and length (that is, the length of the 11 | * hashes). 12 | */ 13 | template 14 | class HyperLogLogZstd { 15 | public: 16 | /** 17 | * Basic constructor 18 | * m : the number of registers 19 | */ 20 | explicit HyperLogLogZstd(int m) : 21 | m(m), logM(log2i(m)), compressedSize(0), 22 | Mcompressed(ZSTD_compressBound(m)), Mtemp(m,0) { 23 | compress(); 24 | } 25 | 26 | 27 | 28 | /** 29 | * Returns the size of the sketch (the number of bits) 30 | */ 31 | inline size_t bitSize() const { 32 | return compressedSize * CHAR_BIT; 33 | } 34 | 35 | 36 | 37 | /** 38 | * Adds a new element to the sketch 39 | */ 40 | template), 42 | typename JHashFun = decltype(fibonacciHash)> 43 | inline void add(const Object& o, XHashFun h = farmhash, 44 | JHashFun f = fibonacciHash) { 45 | static_assert(std::is_same::value, 46 | "Hash function type does not match the Word type of the class"); 47 | addHash(h(o), f); 48 | } 49 | 50 | 51 | 52 | /** 53 | * Adds a new hash to the sketch. Potentially useful if a 54 | * different kind of hashing scheme is used outside the class. 55 | */ 56 | template)> 57 | inline void addHash(Word x, JHashFun f = fibonacciHash) { 58 | static_assert(std::is_same::value, 59 | "Hash function type does not match the Word type of the class"); 60 | addJr(f(x,logM), rho(x)); 61 | } 62 | 63 | 64 | 65 | /** 66 | * Adds the specific j and r values to the sketch. This may be 67 | * useful if full control is required of the hashing faculties. 68 | * j must satisfy 0 <= j < m but no checks are made 69 | * r must satisfy 0 <= r < log(word length) (64 for uint64_t) but no checks are made 70 | */ 71 | inline void addJr(Word j, Word r) { 72 | if (r < lowerBound) 73 | return; 74 | 75 | decompress(); 76 | Word r0 = Mtemp[j]; 77 | if (r > r0) { 78 | Mtemp[j] = r; 79 | compress(); 80 | } 81 | } 82 | 83 | 84 | 85 | /** 86 | * Returns a vector that contains the register values 87 | */ 88 | std::vector exportRegisters() const { 89 | decompress(); 90 | return std::vector(Mtemp.begin(), Mtemp.begin() + m); 91 | } 92 | 93 | 94 | 95 | /** 96 | * Returns the present estimate 97 | */ 98 | double estimate() const { 99 | decompress(); 100 | double E = 0; 101 | int V = 0; 102 | for (int j = 0; j < m; ++j) { 103 | Word r = Mtemp[j]; 104 | V += (r == 0); 105 | E += 1.0 / (1ull << r); 106 | } 107 | E = HyperLogLog::alpha(m) * m * m / E; 108 | if (E <= 5.0 / 2.0 * m && V != 0) { 109 | return m*log(static_cast(m)/V); 110 | } 111 | else if (E <= (1ull << 32)/30) { 112 | return E; 113 | } 114 | else { 115 | return -(1ll << 32) * log(1-E/(1ll << 32)); 116 | } 117 | } 118 | 119 | 120 | /** 121 | * Merges this sketch with the other sketch and returns a new sketch 122 | * 123 | * Note: if the sketches were constructed with different hash 124 | * functions, the result will be nonsensical. It is up to the 125 | * caller to ensure that the exact same hash functions were used. 126 | */ 127 | HyperLogLogZstd merge(const HyperLogLogZstd& that) const { 128 | if (m != that.m) 129 | throw std::invalid_argument("Mismatch in the number of registers"); 130 | HyperLogLogZstd H(m); 131 | decompress(); 132 | that.decompress(); 133 | for (int j = 0; j < m; ++j) 134 | H.Mtemp[j] = std::max(Mtemp[j], that.Mtemp[j]); 135 | H.compress(); 136 | return H; 137 | } 138 | 139 | 140 | 141 | private: 142 | void decompress() const { 143 | ZSTD_decompress(&Mtemp[0], Mtemp.size(), &Mcompressed[0], compressedSize); 144 | } 145 | 146 | void compress() { 147 | compressedSize = ZSTD_compress(&Mcompressed[0], Mcompressed.size(), 148 | &Mtemp[0], m, 1); 149 | lowerBound = sizeof(Word)*CHAR_BIT; 150 | for (Word Mj : Mtemp) 151 | if (Mj < lowerBound) 152 | lowerBound = Mj; 153 | } 154 | 155 | 156 | int m; 157 | int logM; // register address length 158 | size_t compressedSize; 159 | std::vector Mcompressed; 160 | mutable std::vector Mtemp; 161 | Word lowerBound = 0; 162 | }; 163 | } 164 | 165 | #endif // HYPERLOGLOGLOG_HYPERLOGLOG_ZSTD 166 | -------------------------------------------------------------------------------- /hyperlogloglog/Makefile: -------------------------------------------------------------------------------- 1 | CXX=c++ 2 | CXXFLAGS=-std=c++17 -O3 -march=native -pedantic -Wall -Wextra -I../external 3 | LDFLAGS=-L../external/zstd/ -lzstd 4 | HDR=PackedVector.hpp PackedMap.hpp Hash.hpp HyperLogLog.hpp HyperLogLogLog.hpp common.hpp 5 | 6 | all: measure 7 | 8 | measure: measure.o farmhash.o 9 | $(CXX) -o measure measure.o farmhash.o $(LDFLAGS) 10 | 11 | test: test.o farmhash.o 12 | $(CXX) -o test test.o farmhash.o $(LDFLAGS) 13 | 14 | measure.o: measure.cpp $(HDR) 15 | $(CXX) $(CXXFLAGS) -c measure.cpp -o measure.o 16 | 17 | test.o: test.cpp $(HDR) 18 | $(CXX) $(CXXFLAGS) -c test.cpp -o test.o 19 | 20 | farmhash.o: ../external/farmhash/farmhash.cc ../external/farmhash/farmhash.h 21 | $(CXX) $(CXXFLAGS) -Wno-overflow -c -o farmhash.o ../external/farmhash/farmhash.cc 22 | 23 | clean: 24 | rm -vf *.o test measure 25 | -------------------------------------------------------------------------------- /hyperlogloglog/PackedMap.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGLOG_PACKED_MAP 2 | #define HYPERLOGLOGLOG_PACKED_MAP 3 | 4 | #include "PackedVector.hpp" 5 | 6 | namespace hyperlogloglog { 7 | /** 8 | * This class represents a ``packed map'', that is, a dictionary 9 | * type that maps keys to values such that they are stored 10 | * internally in a Packed Vector with minimal bit usage (in terms of 11 | * multiples of word length). 12 | * 13 | * The internal representation is a sorted array. 14 | * There can be no multiples of keys. 15 | */ 16 | template 17 | class PackedMap { 18 | public: 19 | /** 20 | * keySize : Number of bits per key 21 | * valueSize : Number of bits per value 22 | */ 23 | PackedMap(size_t keySize, size_t valueSize) : 24 | keySize(keySize), valueSize(valueSize), elemSize(keySize + valueSize), 25 | keyMask(~(~((Word)0)<> valueSize; 48 | } 49 | 50 | 51 | 52 | /** 53 | * Returns the index of the value associated with the key, 54 | * or a negative value if the key is not found. 55 | */ 56 | int find(Word key) const { 57 | int l = 0; 58 | int r = size() - 1; 59 | while (l <= r) { 60 | int m = (l+r)/2; 61 | Word k = keyAt(m); 62 | if (k < key) 63 | l = m+1; 64 | else if (k > key) 65 | r = m-1; 66 | else 67 | return m; 68 | } 69 | return -1; 70 | } 71 | 72 | 73 | 74 | /** 75 | * Adds a new key-value pair. If the key is already in the data 76 | * structure, its value will be replaced. Otherwise, the data 77 | * structure the pair will be added as a new element to the data 78 | * structure using insertion sort like addition. 79 | */ 80 | void add(Word key, Word value) { 81 | int i = find(key); 82 | Word kv; 83 | packElement(kv, key, value); 84 | if (i >= 0) 85 | arr.set(i, kv); 86 | else { 87 | arr.append(kv); 88 | i = size()-1; 89 | while (i > 0 && keyAt(i-1) > key) { 90 | arr.set(i,arr.get(i-1)); 91 | --i; 92 | } 93 | arr.set(i, kv); 94 | } 95 | } 96 | 97 | 98 | 99 | /** 100 | * Erases the given key from the array. If the key does not exist, 101 | * does not do anything. 102 | */ 103 | inline void erase(Word key) { 104 | int i = find(key); 105 | if (i >= 0) 106 | eraseAt(i); 107 | } 108 | 109 | 110 | 111 | /** 112 | * Erases the element at the given position. 113 | */ 114 | inline void eraseAt(size_t i) { 115 | arr.erase(i); 116 | } 117 | 118 | 119 | 120 | /** 121 | * Returns the number of bits inhabitet by the actual key/value 122 | * pairs (capacity might be larger). 123 | */ 124 | inline size_t bitSize() const { 125 | return arr.bitSize(); 126 | } 127 | 128 | 129 | 130 | private: 131 | inline void packElement(Word& kv, const Word& key, const Word& value) { 132 | kv = ((key & keyMask) << valueSize) | (value&valueMask); 133 | } 134 | 135 | 136 | 137 | size_t keySize; 138 | size_t valueSize; 139 | size_t elemSize; 140 | size_t keyMask; // keySize ones 141 | size_t valueMask; // valueSize ones 142 | PackedVector arr; // internal array 143 | }; 144 | } 145 | 146 | #endif // HYPERLOGLOGLOG_PACKED_MAP 147 | -------------------------------------------------------------------------------- /hyperlogloglog/common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGLOG_COMMON 2 | #define HYPERLOGLOGLOG_COMMON 3 | 4 | #include 5 | #include 6 | 7 | namespace hyperlogloglog { 8 | template 9 | inline int clz(T x); 10 | 11 | template<> 12 | inline int clz(unsigned int x) { 13 | return __builtin_clz(x); 14 | } 15 | 16 | template<> 17 | inline int clz(unsigned long x) { 18 | return __builtin_clzl(x); 19 | } 20 | 21 | template<> 22 | inline int clz(unsigned long long x) { 23 | return __builtin_clzll(x); 24 | } 25 | 26 | template 27 | int rho(T x) { 28 | return clz(x) + 1; 29 | } 30 | 31 | template 32 | constexpr T log2i(T x) { 33 | return x < 2 ? 0 : 1 + log2i(x >> 1); 34 | } 35 | 36 | 37 | 38 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 39 | #ifndef htonll // MacOS X defines this as a macro 40 | inline uint64_t htonll(uint64_t x) { 41 | return (static_cast(htonl(x & 0xffffffff)) << 32) | 42 | (htonl(x >> 32)); 43 | } 44 | 45 | inline uint64_t ntohll(uint64_t x) { 46 | return (static_cast(ntohl(x & 0xffffffff)) << 32) | 47 | (htonl(x >> 32)); 48 | } 49 | #endif // htonll 50 | #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 51 | } 52 | 53 | #endif // HYPERLOGLOGLOG_COMMON 54 | -------------------------------------------------------------------------------- /hyperlogloglog/measure.hpp: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGLOG_MEASURE 2 | #define HYPERLOGLOGLOG_MEASURE 3 | 4 | #include "common.hpp" 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace hyperlogloglog { 11 | template 12 | std::vector readData(size_t n, size_t len); 13 | 14 | 15 | 16 | template<> 17 | inline std::vector readData(size_t n, size_t) { 18 | auto start = std::chrono::steady_clock::now(); 19 | std::vector v(n); 20 | std::cin.read(reinterpret_cast(&v[0]), n*sizeof(uint64_t)); 21 | for (auto it = v.begin(); it != v.end(); ++it) 22 | *it = ntohll(*it); 23 | auto end = std::chrono::steady_clock::now(); 24 | auto diff = end - start; 25 | double seconds = std::chrono::duration_cast(diff).count()/1e9; 26 | std::cerr << "data reading took " << seconds << std::endl; 27 | return v; 28 | } 29 | 30 | 31 | 32 | template<> 33 | inline std::vector readData(size_t n, size_t len) { 34 | auto start = std::chrono::steady_clock::now(); 35 | std::vector temp(n*len); 36 | std::cin.read(&temp[0], n*len); 37 | std::vector v(n); 38 | for (size_t i = 0; i < n; ++i) 39 | v[i] = std::string(temp.begin() + i*len, temp.begin() + (i+1)*len); 40 | auto end = std::chrono::steady_clock::now(); 41 | auto diff = end - start; 42 | double seconds = std::chrono::duration_cast(diff).count()/1e9; 43 | std::cerr << "data reading took " << seconds << std::endl; 44 | return v; 45 | } 46 | 47 | 48 | 49 | template<> 50 | inline std::vector> readData(size_t n, size_t) { 51 | auto start = std::chrono::steady_clock::now(); 52 | std::vector temp(2*n); 53 | std::vector> v(n); 54 | std::cin.read(reinterpret_cast(&temp[0]), 2*n*sizeof(uint32_t)); 55 | int j, r; 56 | for (size_t i = 0; i < n; ++i) { 57 | j = ntohl(temp[2*i]); 58 | r = ntohl(temp[2*i+1]); 59 | v[i].first = j; 60 | v[i].second = r; 61 | } 62 | auto end = std::chrono::steady_clock::now(); 63 | auto diff = end - start; 64 | double seconds = std::chrono::duration_cast(diff).count()/1e9; 65 | std::cerr << "data reading took " << seconds << std::endl; 66 | return v; 67 | } 68 | } 69 | #endif // HYPERLOGLOGLOG_MEASURE 70 | 71 | -------------------------------------------------------------------------------- /inputgenerator/Makefile: -------------------------------------------------------------------------------- 1 | CXX=c++ 2 | CXXFLAGS=-std=c++17 -O3 -march=native -pedantic -Wall -Wextra -I../external 3 | LDFLAGS= 4 | HDR= 5 | 6 | all: inputgenerator 7 | 8 | inputgenerator: inputgenerator.o 9 | $(CXX) -o inputgenerator inputgenerator.o $(LDFLAGS) 10 | 11 | inputgenerator.o: inputgenerator.cpp 12 | $(CXX) -c $(CXXFLAGS) -o inputgenerator.o inputgenerator.cpp 13 | 14 | clean: 15 | rm -vf *.o inputgenerator 16 | -------------------------------------------------------------------------------- /zetasketch/.gitattributes: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/articles/dealing-with-line-endings/ 3 | # 4 | # These are explicitly windows files and should use crlf 5 | *.bat text eol=crlf 6 | 7 | -------------------------------------------------------------------------------- /zetasketch/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore Gradle project-specific cache directory 2 | .gradle 3 | 4 | # Ignore Gradle build output directory 5 | build 6 | -------------------------------------------------------------------------------- /zetasketch/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * This is a general purpose Gradle build. 5 | * Learn more about Gradle by exploring our samples at https://docs.gradle.org/7.2/samples 6 | */ 7 | plugins { 8 | id 'application' 9 | } 10 | 11 | repositories { 12 | mavenCentral() 13 | } 14 | 15 | dependencies { 16 | implementation 'com.google.zetasketch:zetasketch:0.1.0' 17 | } 18 | 19 | application { 20 | mainClass = 'Measure' 21 | } 22 | 23 | jar { 24 | manifest { 25 | attributes( 26 | "Main-Class": 'Measure' 27 | ) 28 | } 29 | 30 | from { 31 | configurations.runtimeClasspath.collect { it.isDirectory() ? it : zipTree(it) } 32 | } 33 | } 34 | 35 | -------------------------------------------------------------------------------- /zetasketch/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mkarppa/hyperlogloglog/50670ae4d70b4f1164c94a6b7cc4b32e2bf70982/zetasketch/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /zetasketch/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.2-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /zetasketch/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /zetasketch/settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user manual at https://docs.gradle.org/7.2/userguide/multi_project_builds.html 8 | */ 9 | 10 | rootProject.name = 'zetasketch' 11 | --------------------------------------------------------------------------------