├── version.cfg.in ├── theta ├── test │ ├── theta_compact_empty_from_java_v2.sk │ ├── theta_compact_empty_from_java_v1.sk │ ├── theta_compact_estimation_from_java_v1.sk │ ├── theta_compact_estimation_from_java_v2.sk │ ├── CMakeLists.txt │ └── theta_sketch_serialize_for_java.cpp ├── include │ ├── theta_a_not_b_impl.hpp │ ├── theta_jaccard_similarity.hpp │ ├── theta_comparators.hpp │ ├── theta_constants.hpp │ ├── theta_intersection_impl.hpp │ ├── theta_set_difference_base.hpp │ ├── theta_intersection_base.hpp │ ├── theta_union_base.hpp │ ├── theta_union_impl.hpp │ ├── theta_a_not_b.hpp │ ├── theta_helpers.hpp │ └── compact_theta_sketch_parser.hpp └── CMakeLists.txt ├── quantiles ├── test │ ├── Qk128_n50_v0.3.0.sk │ ├── Qk128_n50_v0.6.0.sk │ ├── Qk128_n50_v0.8.0.sk │ ├── Qk128_n50_v0.8.3.sk │ ├── Qk128_n1000_v0.3.0.sk │ ├── Qk128_n1000_v0.6.0.sk │ ├── Qk128_n1000_v0.8.0.sk │ ├── Qk128_n1000_v0.8.3.sk │ ├── CMakeLists.txt │ └── quantiles_sketch_serialize_for_java.cpp └── CMakeLists.txt ├── kll ├── test │ ├── kll_sketch_float_one_item_v1.sk │ ├── CMakeLists.txt │ └── kll_sketch_serialize_for_java.cpp └── CMakeLists.txt ├── tdigest ├── test │ ├── tdigest_ref_k100_n10000_double.sk │ ├── tdigest_ref_k100_n10000_float.sk │ ├── tdigest_custom_allocator_test.cpp │ ├── CMakeLists.txt │ ├── tdigest_deserialize_from_java_test.cpp │ └── tdigest_serialize_for_java.cpp └── CMakeLists.txt ├── CODE_OF_CONDUCT.md ├── cmake └── DataSketchesConfig.cmake.in ├── .asf.yaml ├── NOTICE ├── .gitignore ├── .github └── workflows │ ├── sanitize.yml │ ├── doxygen.yml │ ├── serde_compat.yml │ ├── code_coverage.yml │ └── build_cmake.yml ├── hll ├── include │ ├── hll.private.hpp │ ├── CompositeInterpolationXTable.hpp │ ├── CubicInterpolation.hpp │ ├── coupon_iterator.hpp │ ├── HarmonicNumbers.hpp │ ├── RelativeErrorTables.hpp │ ├── Hll6Array.hpp │ ├── coupon_iterator-internal.hpp │ ├── Hll8Array.hpp │ ├── CouponHashSet.hpp │ └── Hll4Array.hpp ├── test │ ├── TablesTest.cpp │ ├── hll_sketch_serialize_for_java.cpp │ ├── CMakeLists.txt │ ├── AuxHashMapTest.cpp │ └── hll_sketch_deserialize_from_java_test.cpp └── CMakeLists.txt ├── common ├── test │ ├── catch_runner.cpp │ ├── test_allocator.cpp │ ├── CMakeLists.txt │ ├── integration_test.cpp │ └── optional_test.cpp ├── include │ ├── ceiling_power_of_2.hpp │ ├── version.hpp.in │ ├── memory_operations.hpp │ ├── conditional_back_inserter.hpp │ ├── conditional_forward.hpp │ └── kolmogorov_smirnov.hpp └── CMakeLists.txt ├── density ├── test │ └── CMakeLists.txt └── CMakeLists.txt ├── tuple ├── include │ ├── tuple_a_not_b_impl.hpp │ ├── array_tuple_a_not_b_impl.hpp │ ├── array_tuple_intersection_impl.hpp │ ├── tuple_jaccard_similarity.hpp │ ├── tuple_intersection_impl.hpp │ ├── array_tuple_union_impl.hpp │ ├── tuple_union_impl.hpp │ ├── array_tuple_a_not_b.hpp │ ├── tuple_a_not_b.hpp │ ├── array_of_doubles_sketch.hpp │ ├── array_tuple_intersection.hpp │ └── array_tuple_union.hpp ├── test │ ├── tuple_sketch_serialize_for_java.cpp │ ├── CMakeLists.txt │ ├── tuple_sketch_deserialize_from_java_test.cpp │ └── aod_sketch_serialize_for_java.cpp └── CMakeLists.txt ├── req ├── include │ └── req_common.hpp ├── test │ ├── req_sketch_serialize_for_java.cpp │ ├── CMakeLists.txt │ └── req_sketch_deserialize_from_java_test.cpp └── CMakeLists.txt ├── count ├── CMakeLists.txt └── test │ └── CMakeLists.txt ├── fi ├── CMakeLists.txt └── test │ ├── CMakeLists.txt │ └── reverse_purge_hash_map_test.cpp ├── filters ├── CMakeLists.txt └── test │ ├── CMakeLists.txt │ ├── bloom_filter_serialize_for_java.cpp │ ├── bloom_filter_deserialize_from_java_test.cpp │ └── bloom_filter_allocation_test.cpp ├── cpc ├── test │ ├── cpc_sketch_serialize_for_java.cpp │ ├── CMakeLists.txt │ ├── cpc_sketch_deserialize_from_java_test.cpp │ └── compression_test.cpp ├── CMakeLists.txt └── include │ ├── cpc_common.hpp │ └── u32_table.hpp └── sampling ├── CMakeLists.txt └── test ├── var_opt_union_serialize_for_java.cpp ├── var_opt_union_deserialize_from_java_test.cpp ├── var_opt_sketch_serialize_for_java.cpp └── CMakeLists.txt /version.cfg.in: -------------------------------------------------------------------------------- 1 | 5.3.@DT@.@HHMM@ 2 | -------------------------------------------------------------------------------- /theta/test/theta_compact_empty_from_java_v2.sk: -------------------------------------------------------------------------------- 1 | ̓ -------------------------------------------------------------------------------- /quantiles/test/Qk128_n50_v0.3.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n50_v0.3.0.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n50_v0.6.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n50_v0.6.0.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n50_v0.8.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n50_v0.8.0.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n50_v0.8.3.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n50_v0.8.3.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n1000_v0.3.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n1000_v0.3.0.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n1000_v0.6.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n1000_v0.6.0.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n1000_v0.8.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n1000_v0.8.0.sk -------------------------------------------------------------------------------- /quantiles/test/Qk128_n1000_v0.8.3.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/quantiles/test/Qk128_n1000_v0.8.3.sk -------------------------------------------------------------------------------- /kll/test/kll_sketch_float_one_item_v1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/kll/test/kll_sketch_float_one_item_v1.sk -------------------------------------------------------------------------------- /tdigest/test/tdigest_ref_k100_n10000_double.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/tdigest/test/tdigest_ref_k100_n10000_double.sk -------------------------------------------------------------------------------- /tdigest/test/tdigest_ref_k100_n10000_float.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/tdigest/test/tdigest_ref_k100_n10000_float.sk -------------------------------------------------------------------------------- /theta/test/theta_compact_empty_from_java_v1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/theta/test/theta_compact_empty_from_java_v1.sk -------------------------------------------------------------------------------- /theta/test/theta_compact_estimation_from_java_v1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/theta/test/theta_compact_estimation_from_java_v1.sk -------------------------------------------------------------------------------- /theta/test/theta_compact_estimation_from_java_v2.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-cpp/HEAD/theta/test/theta_compact_estimation_from_java_v2.sk -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | We adhere to the Apache Software Foundation's [Code of Conduct](https://www.apache.org/foundation/policies/conduct). 4 | -------------------------------------------------------------------------------- /cmake/DataSketchesConfig.cmake.in: -------------------------------------------------------------------------------- 1 | set(DATASKETCHES_VERSION "@PROJECT_VERSION@") 2 | 3 | @PACKAGE_INIT@ 4 | 5 | include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake") 6 | 7 | set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches") 8 | set(DATASKETCHES_LIB "datasketches") 9 | 10 | check_required_components("@PROJECT_NAME@") -------------------------------------------------------------------------------- /.asf.yaml: -------------------------------------------------------------------------------- 1 | github: 2 | homepage: https://datasketches.apache.org 3 | ghp_branch: gh-pages 4 | ghp_path: /docs 5 | 6 | protected_branches: 7 | master: 8 | required_pull_request_reviews: 9 | dismiss_stale_reviews: false 10 | required_approving_review_count: 1 11 | required_signatures: false 12 | required_conversation_resolution: false 13 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache DataSketches C++ 2 | Copyright 2025 The Apache Software Foundation 3 | 4 | Copyright 2015-2018 Yahoo Inc. 5 | Copyright 2019-2020 Verizon Media 6 | Copyright 2021- Yahoo Inc. 7 | 8 | This product includes software developed at 9 | The Apache Software Foundation (http://www.apache.org/). 10 | 11 | Prior to moving to ASF, the software for this project was developed at 12 | Yahoo Inc. (https://developer.yahoo.com). 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Eclipse project files 2 | .cproject 3 | .project 4 | .settings/ 5 | 6 | # Visual Studio Code 7 | .vscode/ 8 | 9 | # OSX files 10 | .DS_Store 11 | 12 | # Compiler output, build specific 13 | *.a 14 | *.o 15 | *.so 16 | *.dll 17 | *.dylib 18 | bin/ 19 | lib/ 20 | Default/ 21 | 22 | # Python files 23 | .eggs/ 24 | .tox/ 25 | dist/ 26 | python/datasketches.egg-info/ 27 | 28 | # Log file 29 | 30 | # Package Files # 31 | *.zip 32 | *.tar.gz 33 | 34 | # Other 35 | local/ 36 | tmp/ 37 | _site/ 38 | _* 39 | _*/ 40 | 41 | # exceptions 42 | !__init__.py 43 | 44 | docs 45 | java 46 | -------------------------------------------------------------------------------- /.github/workflows/sanitize.yml: -------------------------------------------------------------------------------- 1 | name: Sanitize 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | workflow_dispatch: 8 | 9 | env: 10 | BUILD_TYPE: Release 11 | 12 | jobs: 13 | build: 14 | name: Address Sanitizer 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | with: 21 | submodules: true 22 | persist-credentials: false 23 | - name: Configure 24 | run: cmake -B build -S . -DSANITIZE=address 25 | - name: Build C++ unit tests 26 | run: cmake --build build --config Release 27 | - name: Run C++ tests 28 | run: cmake --build build --config Release --target test 29 | -------------------------------------------------------------------------------- /.github/workflows/doxygen.yml: -------------------------------------------------------------------------------- 1 | name: Doxygen 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build-documentation: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | - name: Install doxygen 16 | run: sudo apt install doxygen -y 17 | - name: Run Doxygen 18 | run: doxygen Doxyfile 19 | - name: Pages Deployment 20 | uses: peaceiris/actions-gh-pages@v3.9.3 21 | with: 22 | github_token: ${{ secrets.GITHUB_TOKEN }} 23 | publish_dir: ./docs 24 | destination_dir: docs/${{ github.ref_name }} 25 | enable_jekyll: false 26 | allow_empty_commit: false 27 | force_orphan: false 28 | publish_branch: gh-pages 29 | -------------------------------------------------------------------------------- /hll/include/hll.private.hpp: -------------------------------------------------------------------------------- 1 | #ifndef _HLL_PRIVATE_HPP_ 2 | #define _HLL_PRIVATE_HPP_ 3 | 4 | #include "AuxHashMap.hpp" 5 | #include "CompositeInterpolationXTable.hpp" 6 | #include "CouponHashSet.hpp" 7 | #include "CouponList.hpp" 8 | #include "CubicInterpolation.hpp" 9 | #include "HarmonicNumbers.hpp" 10 | #include "Hll4Array.hpp" 11 | #include "Hll6Array.hpp" 12 | #include "Hll8Array.hpp" 13 | #include "HllArray.hpp" 14 | #include "HllSketchImpl.hpp" 15 | #include "HllSketchImplFactory.hpp" 16 | #include "HllUtil.hpp" 17 | #include "RelativeErrorTables.hpp" 18 | 19 | #include "AuxHashMap-internal.hpp" 20 | #include "coupon_iterator.hpp" 21 | #include "CouponHashSet-internal.hpp" 22 | #include "CouponList-internal.hpp" 23 | #include "Hll4Array-internal.hpp" 24 | #include "Hll6Array-internal.hpp" 25 | #include "Hll8Array-internal.hpp" 26 | #include "HllArray-internal.hpp" 27 | #include "HllSketch-internal.hpp" 28 | #include "HllSketchImpl-internal.hpp" 29 | #include "HllUnion-internal.hpp" 30 | #include "coupon_iterator-internal.hpp" 31 | 32 | #endif // _HLL_PRIVATE_HPP_ 33 | -------------------------------------------------------------------------------- /.github/workflows/serde_compat.yml: -------------------------------------------------------------------------------- 1 | name: Java SerDe Compatibility Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build: 11 | name: SerDe Test 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v5 16 | - name: Checkout Java 17 | uses: actions/checkout@v5 18 | with: 19 | repository: apache/datasketches-java 20 | path: java 21 | - name: Setup Java 22 | uses: actions/setup-java@v5 23 | with: 24 | java-version: '25' 25 | distribution: 'temurin' 26 | - name: Run Java 27 | run: cd java && mvn test -P generate-java-files 28 | - name: Copy files 29 | run: cp java/serialization_test_data/java_generated_files/*.sk java 30 | - name: Run cmake 31 | run: cd build && cmake .. -DSERDE_COMPAT=true 32 | - name: Build C++ unit tests 33 | run: cmake --build build --config Release 34 | - name: Run C++ tests 35 | run: cmake --build build --config Release --target test 36 | -------------------------------------------------------------------------------- /common/test/catch_runner.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _TEST_COMMON_HPP_ 21 | #define _TEST_COMMON_HPP_ 22 | 23 | // this largely just ensures that, for our simple test cases at least, 24 | // we define main() exactly once per test executable 25 | #define CATCH_CONFIG_MAIN 26 | #include 27 | 28 | #endif // _TEST_COMMON_HPP_ 29 | -------------------------------------------------------------------------------- /density/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(density_test) 19 | 20 | target_link_libraries(density_test density common_test_lib) 21 | 22 | set_target_properties(density_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | add_test( 27 | NAME density_test 28 | COMMAND density_test 29 | ) 30 | 31 | target_sources(density_test 32 | PRIVATE 33 | density_sketch_test.cpp 34 | ) 35 | -------------------------------------------------------------------------------- /common/test/test_allocator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include "test_allocator.hpp" 21 | 22 | namespace datasketches { 23 | 24 | // global variable to keep track of allocated size 25 | long long test_allocator_total_bytes = 0; 26 | 27 | // global variable to keep track of net allocations 28 | // (number of allocations minus number of deallocations) 29 | long long test_allocator_net_allocations = 0; 30 | 31 | } /* namespace datasketches */ 32 | -------------------------------------------------------------------------------- /.github/workflows/code_coverage.yml: -------------------------------------------------------------------------------- 1 | name: Code Coverage 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | env: 10 | BUILD_TYPE: Debug 11 | 12 | jobs: 13 | build: 14 | name: Coveralls 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | with: 21 | submodules: true 22 | persist-credentials: false 23 | - name: Download and install lcov 24 | run: | 25 | VERSION="1.15" 26 | mkdir lcov_pkg && cd lcov_pkg 27 | wget "https://github.com/linux-test-project/lcov/releases/download/v$VERSION/lcov-$VERSION.tar.gz" 28 | tar -xzf "lcov-$VERSION.tar.gz" 29 | cd "lcov-$VERSION" 30 | sudo make install 31 | - name: Configure 32 | run: cd build && cmake .. -DCOVERAGE=ON 33 | - name: Build unit tests 34 | run: cmake --build build 35 | - name: Run tests 36 | run: cmake --build build --target test 37 | - name: Generate coverage .info 38 | run: cmake --build build --target coverage_report 39 | - name: Post to Coveralls 40 | uses: coverallsapp/github-action@master 41 | with: 42 | github-token: ${{ secrets.GITHUB_TOKEN }} 43 | path-to-lcov: build/lcov.info 44 | -------------------------------------------------------------------------------- /tuple/include/tuple_a_not_b_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | namespace datasketches { 21 | 22 | template 23 | tuple_a_not_b::tuple_a_not_b(uint64_t seed, const A& allocator): 24 | state_(seed, allocator) 25 | {} 26 | 27 | template 28 | template 29 | auto tuple_a_not_b::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch { 30 | return state_.compute(std::forward(a), b, ordered); 31 | } 32 | 33 | } /* namespace datasketches */ 34 | -------------------------------------------------------------------------------- /req/include/req_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef REQ_COMMON_HPP_ 21 | #define REQ_COMMON_HPP_ 22 | 23 | #include 24 | 25 | #include "serde.hpp" 26 | #include "common_defs.hpp" 27 | 28 | namespace datasketches { 29 | 30 | /// REQ sketch constants 31 | namespace req_constants { 32 | /// minimum value of parameter K 33 | const uint16_t MIN_K = 4; 34 | /// initial number of sections 35 | const uint8_t INIT_NUM_SECTIONS = 3; 36 | /// multiplier for nominal capacity 37 | const unsigned MULTIPLIER = 2; 38 | } 39 | 40 | } /* namespace datasketches */ 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /common/include/ceiling_power_of_2.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CEILING_POWER_OF_2_HPP_ 21 | #define CEILING_POWER_OF_2_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | // compute the next highest power of 2 of 32-bit n 28 | // taken from https://graphics.stanford.edu/~seander/bithacks.html 29 | static inline uint32_t ceiling_power_of_2(uint32_t n) { 30 | --n; 31 | n |= n >> 1; 32 | n |= n >> 2; 33 | n |= n >> 4; 34 | n |= n >> 8; 35 | n |= n >> 16; 36 | return ++n; 37 | } 38 | 39 | } /* namespace datasketches */ 40 | 41 | #endif // CEILING_POWER_OF_2_HPP_ 42 | -------------------------------------------------------------------------------- /hll/include/CompositeInterpolationXTable.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_ 21 | #define _COMPOSITEINTERPOLATIONXTABLE_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template> 28 | class CompositeInterpolationXTable { 29 | public: 30 | static uint32_t get_y_stride(uint8_t logK); 31 | 32 | static const double* get_x_arr(uint8_t logK); 33 | static uint32_t get_x_arr_length(); 34 | }; 35 | 36 | } 37 | 38 | #include "CompositeInterpolationXTable-internal.hpp" 39 | 40 | #endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */ 41 | -------------------------------------------------------------------------------- /tdigest/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(tdigest INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::TDIGEST ALIAS tdigest) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(tdigest 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(tdigest INTERFACE common) 33 | 34 | install(TARGETS tdigest 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/tdigest.hpp 40 | include/tdigest_impl.hpp 41 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 42 | -------------------------------------------------------------------------------- /common/include/version.hpp.in: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _VERSION_HPP_ 21 | #define _VERSION_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | // the configured options and settings for DataSketches 26 | constexpr int VERSION_MAJOR {@DataSketches_VERSION_MAJOR@}; 27 | constexpr int VERSION_MINOR {@DataSketches_VERSION_MINOR@}; 28 | constexpr int VERSION_PATCH {@DataSketches_VERSION_PATCH@}; 29 | constexpr int VERSION_TWEAK {@DataSketches_VERSION_TWEAK@}; 30 | 31 | constexpr auto VERSION_STR = "@DataSketches_VERSION@"; 32 | constexpr auto SOURCE_URL = "https://github.com/apache/datasketches-cpp"; 33 | 34 | } 35 | 36 | #endif // _VERSION_HPP_ 37 | -------------------------------------------------------------------------------- /density/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(density INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::DENSITY ALIAS density) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(density 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(density INTERFACE common) 33 | 34 | install(TARGETS density 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/density_sketch.hpp 40 | include/density_sketch_impl.hpp 41 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 42 | -------------------------------------------------------------------------------- /req/test/req_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("req sketch float generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | req_sketch sketch(12); 30 | for (unsigned i = 1; i <= n; ++i) sketch.update(i); 31 | std::ofstream os("req_float_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 32 | sketch.serialize(os); 33 | } 34 | } 35 | 36 | } /* namespace datasketches */ 37 | -------------------------------------------------------------------------------- /tuple/include/array_tuple_a_not_b_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | namespace datasketches { 21 | 22 | template 23 | array_tuple_a_not_b::array_tuple_a_not_b(uint64_t seed, const Allocator& allocator): 24 | Base(seed, allocator) {} 25 | 26 | template 27 | template 28 | auto array_tuple_a_not_b::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch { 29 | return CompactSketch(a.get_num_values(), Base::compute(std::forward(a), b, ordered)); 30 | } 31 | 32 | } /* namespace datasketches */ 33 | -------------------------------------------------------------------------------- /quantiles/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(quantiles INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::QUANTILES ALIAS quantiles) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(quantiles 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(quantiles INTERFACE common) 33 | 34 | install(TARGETS quantiles 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/quantiles_sketch.hpp 40 | include/quantiles_sketch_impl.hpp 41 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 42 | -------------------------------------------------------------------------------- /count/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(count INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::COUNT ALIAS count) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(count 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(count INTERFACE common) 33 | 34 | install(TARGETS count 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/count_min.hpp 40 | include/count_min_impl.hpp 41 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 42 | -------------------------------------------------------------------------------- /count/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(count_min_test) 19 | 20 | target_link_libraries(count_min_test count common_test_lib) 21 | 22 | set_target_properties(count_min_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" COUNT_TEST_BINARY_PATH) 27 | string(APPEND COUNT_TEST_BINARY_PATH "/") 28 | target_compile_definitions(count_min_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${COUNT_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME count_min_test 35 | COMMAND count_min_test 36 | ) 37 | 38 | target_sources(count_min_test 39 | PRIVATE 40 | count_min_test.cpp 41 | count_min_allocation_test.cpp 42 | ) 43 | -------------------------------------------------------------------------------- /kll/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(kll INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::KLL ALIAS kll) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(kll 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(kll INTERFACE common) 33 | 34 | install(TARGETS kll 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/kll_sketch.hpp 40 | include/kll_sketch_impl.hpp 41 | include/kll_helper.hpp 42 | include/kll_helper_impl.hpp 43 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 44 | -------------------------------------------------------------------------------- /tuple/include/array_tuple_intersection_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | namespace datasketches { 21 | 22 | template 23 | array_tuple_intersection::array_tuple_intersection(uint64_t seed, const Policy& policy, const Allocator& allocator): 24 | Base(seed, policy, allocator) {} 25 | 26 | template 27 | auto array_tuple_intersection::get_result(bool ordered) const -> CompactSketch { 28 | return CompactSketch(this->state_.get_policy().get_external_policy().get_num_values(), Base::get_result(ordered)); 29 | } 30 | 31 | } /* namespace datasketches */ 32 | -------------------------------------------------------------------------------- /req/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(req INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::REQ ALIAS req) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(req 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(req INTERFACE common) 33 | 34 | install(TARGETS req 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/req_common.hpp 40 | include/req_sketch.hpp 41 | include/req_sketch_impl.hpp 42 | include/req_compactor.hpp 43 | include/req_compactor_impl.hpp 44 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 45 | -------------------------------------------------------------------------------- /fi/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(fi INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::FI ALIAS fi) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(fi 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(fi INTERFACE common) 33 | 34 | install(TARGETS fi 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/frequent_items_sketch.hpp 40 | include/frequent_items_sketch_impl.hpp 41 | include/reverse_purge_hash_map.hpp 42 | include/reverse_purge_hash_map_impl.hpp 43 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 44 | -------------------------------------------------------------------------------- /filters/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(filters INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::FILTERS ALIAS filters) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(filters 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(filters INTERFACE common) 33 | 34 | install(TARGETS filters 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/bloom_filter.hpp 40 | include/bloom_filter_impl.hpp 41 | include/bloom_filter_builder_impl.hpp 42 | include/bit_array_ops.hpp 43 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 44 | -------------------------------------------------------------------------------- /theta/include/theta_a_not_b_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_A_NOT_B_IMPL_HPP_ 21 | #define THETA_A_NOT_B_IMPL_HPP_ 22 | 23 | #include 24 | 25 | #include "conditional_back_inserter.hpp" 26 | 27 | namespace datasketches { 28 | 29 | template 30 | theta_a_not_b_alloc::theta_a_not_b_alloc(uint64_t seed, const A& allocator): 31 | state_(seed, allocator) 32 | {} 33 | 34 | template 35 | template 36 | auto theta_a_not_b_alloc::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch { 37 | return state_.compute(std::forward(a), b, ordered); 38 | } 39 | 40 | } /* namespace datasketches */ 41 | 42 | # endif 43 | -------------------------------------------------------------------------------- /cpc/test/cpc_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("cpc sketch generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 100, 200, 2000, 20000}; 28 | for (const unsigned n: n_arr) { 29 | cpc_sketch sketch; 30 | for (unsigned i = 1; i <= n; ++i) sketch.update(i); 31 | REQUIRE(sketch.is_empty() == (n == 0)); 32 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); 33 | std::ofstream os("cpc_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 34 | sketch.serialize(os); 35 | } 36 | } 37 | 38 | } /* namespace datasketches */ 39 | -------------------------------------------------------------------------------- /hll/include/CubicInterpolation.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _CUBICINTERPOLATION_HPP_ 21 | #define _CUBICINTERPOLATION_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template> 28 | class CubicInterpolation { 29 | public: 30 | static double usingXAndYTables(const double xArr[], const double yArr[], 31 | int len, double x); 32 | 33 | static double usingXAndYTables(double x); 34 | 35 | static double usingXArrAndYStride(const double xArr[], const int xArrLen, 36 | double yStride, double x); 37 | }; 38 | 39 | } 40 | 41 | #include "CubicInterpolation-internal.hpp" 42 | 43 | #endif /* _CUBICINTERPOLATION_HPP_ */ 44 | -------------------------------------------------------------------------------- /theta/include/theta_jaccard_similarity.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_JACCARD_SIMILARITY_HPP_ 21 | #define THETA_JACCARD_SIMILARITY_HPP_ 22 | 23 | #include "theta_jaccard_similarity_base.hpp" 24 | #include "theta_union.hpp" 25 | #include "theta_intersection.hpp" 26 | 27 | namespace datasketches { 28 | 29 | /// Theta Jaccard similarity alias 30 | template> 31 | using theta_jaccard_similarity_alloc = jaccard_similarity_base, theta_intersection_alloc, trivial_extract_key>; 32 | 33 | /// Theta Jaccard similarity alias with default allocator 34 | using theta_jaccard_similarity = theta_jaccard_similarity_alloc>; 35 | 36 | } /* namespace datasketches */ 37 | 38 | # endif 39 | -------------------------------------------------------------------------------- /tuple/include/tuple_jaccard_similarity.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef TUPLE_JACCARD_SIMILARITY_HPP_ 21 | #define TUPLE_JACCARD_SIMILARITY_HPP_ 22 | 23 | #include "theta_jaccard_similarity_base.hpp" 24 | #include "tuple_union.hpp" 25 | #include "tuple_intersection.hpp" 26 | 27 | namespace datasketches { 28 | 29 | /// Tuple Jaccard similarity alias 30 | template< 31 | typename Summary, 32 | typename IntersectionPolicy, 33 | typename UnionPolicy = default_tuple_union_policy, 34 | typename Allocator = std::allocator> 35 | using tuple_jaccard_similarity = jaccard_similarity_base, tuple_intersection, pair_extract_key>; 36 | 37 | } /* namespace datasketches */ 38 | 39 | # endif 40 | -------------------------------------------------------------------------------- /tuple/test/tuple_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("tuple sketch int generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | auto sketch = update_tuple_sketch::builder().build(); 30 | for (unsigned i = 0; i < n; ++i) sketch.update(i, i); 31 | REQUIRE(sketch.is_empty() == (n == 0)); 32 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); 33 | std::ofstream os("tuple_int_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 34 | sketch.compact().serialize(os); 35 | } 36 | } 37 | 38 | } /* namespace datasketches */ 39 | -------------------------------------------------------------------------------- /tdigest/test/tdigest_custom_allocator_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include "tdigest.hpp" 23 | #include "test_allocator.hpp" 24 | 25 | namespace datasketches { 26 | 27 | using alloc_d = test_allocator; 28 | using tdigest_d = tdigest; 29 | 30 | TEST_CASE("tdigest custom allocator", "[tdigest]") { 31 | test_allocator_total_bytes = 0; 32 | test_allocator_net_allocations = 0; 33 | { 34 | tdigest_d td(100, alloc_d(0)); 35 | for (int i = 0; i < 10000; ++i) td.update(static_cast(i)); 36 | REQUIRE(test_allocator_total_bytes != 0); 37 | REQUIRE(test_allocator_net_allocations != 0); 38 | } 39 | REQUIRE(test_allocator_total_bytes == 0); 40 | REQUIRE(test_allocator_net_allocations == 0); 41 | } 42 | 43 | } /* namespace datasketches */ 44 | -------------------------------------------------------------------------------- /theta/include/theta_comparators.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_COMPARATORS_HPP_ 21 | #define THETA_COMPARATORS_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | template 26 | struct compare_by_key { 27 | template 28 | bool operator()(Entry1&& a, Entry2&& b) const { 29 | return ExtractKey()(std::forward(a)) < ExtractKey()(std::forward(b)); 30 | } 31 | }; 32 | 33 | // less than 34 | 35 | template 36 | class key_less_than { 37 | public: 38 | explicit key_less_than(const Key& key): key(key) {} 39 | bool operator()(const Entry& entry) const { 40 | return ExtractKey()(entry) < this->key; 41 | } 42 | private: 43 | Key key; 44 | }; 45 | 46 | } /* namespace datasketches */ 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /theta/include/theta_constants.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_CONSTANTS_HPP_ 21 | #define THETA_CONSTANTS_HPP_ 22 | 23 | #include 24 | #include "common_defs.hpp" 25 | 26 | namespace datasketches { 27 | 28 | /// Theta constants 29 | namespace theta_constants { 30 | /// hash table resize factor 31 | using resize_factor = datasketches::resize_factor; 32 | /// default resize factor 33 | const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8; 34 | 35 | /// max theta - signed max for compatibility with Java 36 | const uint64_t MAX_THETA = LLONG_MAX; 37 | /// min log2 of K 38 | const uint8_t MIN_LG_K = 5; 39 | /// max log2 of K 40 | const uint8_t MAX_LG_K = 26; 41 | /// default log2 of K 42 | const uint8_t DEFAULT_LG_K = 12; 43 | } 44 | 45 | } /* namespace datasketches */ 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /sampling/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(sampling INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::SAMPLING ALIAS sampling) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(sampling 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(sampling INTERFACE common) 33 | 34 | install(TARGETS sampling 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/var_opt_sketch.hpp 40 | include/var_opt_sketch_impl.hpp 41 | include/var_opt_union.hpp 42 | include/var_opt_union_impl.hpp 43 | include/ebpps_sample.hpp 44 | include/ebpps_sample_impl.hpp 45 | include/ebpps_sketch.hpp 46 | include/ebpps_sketch_impl.hpp 47 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 48 | -------------------------------------------------------------------------------- /hll/include/coupon_iterator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _INTARRAYPAIRITERATOR_HPP_ 21 | #define _INTARRAYPAIRITERATOR_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | template 26 | class coupon_iterator { 27 | public: 28 | using iterator_category = std::input_iterator_tag; 29 | using value_type = uint32_t; 30 | using difference_type = void; 31 | using pointer = uint32_t*; 32 | using reference = uint32_t; 33 | 34 | coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all); 35 | coupon_iterator& operator++(); 36 | bool operator!=(const coupon_iterator& other) const; 37 | reference operator*() const; 38 | private: 39 | const uint32_t* array_; 40 | size_t array_size_; 41 | size_t index_; 42 | bool all_; 43 | }; 44 | 45 | } 46 | 47 | #include "coupon_iterator-internal.hpp" 48 | 49 | #endif /* _INTARRAYPAIRITERATOR_HPP_ */ 50 | -------------------------------------------------------------------------------- /tuple/include/tuple_intersection_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | namespace datasketches { 21 | 22 | template 23 | tuple_intersection::tuple_intersection(uint64_t seed, const P& policy, const A& allocator): 24 | state_(seed, internal_policy(policy), allocator) 25 | {} 26 | 27 | template 28 | template 29 | void tuple_intersection::update(SS&& sketch) { 30 | state_.update(std::forward(sketch)); 31 | } 32 | 33 | template 34 | auto tuple_intersection::get_result(bool ordered) const -> CompactSketch { 35 | return state_.get_result(ordered); 36 | } 37 | 38 | template 39 | bool tuple_intersection::has_result() const { 40 | return state_.has_result(); 41 | } 42 | 43 | } /* namespace datasketches */ 44 | -------------------------------------------------------------------------------- /req/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(req_test) 19 | 20 | target_link_libraries(req_test req common_test_lib) 21 | 22 | set_target_properties(req_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" REQ_TEST_BINARY_PATH) 27 | string(APPEND REQ_TEST_BINARY_PATH "/") 28 | target_compile_definitions(req_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${REQ_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME req_test 35 | COMMAND req_test 36 | ) 37 | 38 | target_sources(req_test 39 | PRIVATE 40 | req_sketch_test.cpp 41 | req_sketch_custom_type_test.cpp 42 | ) 43 | 44 | if (SERDE_COMPAT) 45 | target_sources(req_test 46 | PRIVATE 47 | req_sketch_deserialize_from_java_test.cpp 48 | ) 49 | endif() 50 | 51 | if (GENERATE) 52 | target_sources(req_test 53 | PRIVATE 54 | req_sketch_serialize_for_java.cpp 55 | ) 56 | endif() 57 | -------------------------------------------------------------------------------- /theta/include/theta_intersection_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_INTERSECTION_IMPL_HPP_ 21 | #define THETA_INTERSECTION_IMPL_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | template 26 | theta_intersection_alloc::theta_intersection_alloc(uint64_t seed, const A& allocator): 27 | state_(seed, nop_policy(), allocator) 28 | {} 29 | 30 | template 31 | template 32 | void theta_intersection_alloc::update(FwdSketch&& sketch) { 33 | state_.update(std::forward(sketch)); 34 | } 35 | 36 | template 37 | auto theta_intersection_alloc::get_result(bool ordered) const -> CompactSketch { 38 | return state_.get_result(ordered); 39 | } 40 | 41 | template 42 | bool theta_intersection_alloc::has_result() const { 43 | return state_.has_result(); 44 | } 45 | 46 | } /* namespace datasketches */ 47 | 48 | # endif 49 | -------------------------------------------------------------------------------- /.github/workflows/build_cmake.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: [push, pull_request] 4 | 5 | env: 6 | BUILD_TYPE: Release 7 | 8 | jobs: 9 | build: 10 | name: ${{ matrix.config.name }} 11 | runs-on: ${{ matrix.config.os }} 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | config: 16 | - { 17 | name: "MacOS Latest, Clang", 18 | os: macos-latest, 19 | test_target: test, 20 | cc: "clang", cxx: "clang++" 21 | } 22 | - { 23 | name: "Ubuntu Latest, GCC", 24 | os: ubuntu-latest, 25 | test_target: test, 26 | cc: "gcc", cxx: "g++" 27 | } 28 | - { 29 | name: "Windows Latest, MSVC", 30 | os: windows-latest, 31 | test_target: RUN_TESTS, 32 | cc: "cl", cxx: "cl", 33 | environment_script: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat" 34 | } 35 | #- { 36 | # name: "Windows Latest, MinGW+gcc", 37 | # os: windows-latest, 38 | # cc: "gcc", cxx: "g++" 39 | # } 40 | 41 | steps: 42 | - name: Checkout 43 | uses: actions/checkout@v4 44 | with: 45 | submodules: true 46 | persist-credentials: false 47 | - name: Configure 48 | run: cmake -B build -S . -DCMAKE_INSTALL_PREFIX=./install_test 49 | - name: Build C++ unit tests 50 | run: cmake --build build --config Release 51 | - name: Run C++ tests 52 | run: cmake --build build --config Release --target ${{ matrix.config.test_target }} 53 | - name: Install headers 54 | run: cmake --build build -t install 55 | -------------------------------------------------------------------------------- /hll/test/TablesTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | 24 | #include "CubicInterpolation.hpp" 25 | 26 | namespace datasketches { 27 | 28 | TEST_CASE("hll tables: interpolation exception", "[hll_tables]") { 29 | REQUIRE_THROWS_AS(CubicInterpolation<>::usingXAndYTables(-1.0), std::invalid_argument); 30 | 31 | REQUIRE_THROWS_AS(CubicInterpolation<>::usingXAndYTables(1e12), std::invalid_argument); 32 | } 33 | 34 | TEST_CASE("hll tables: check corner case", "[hll_tables]") { 35 | int len = 10; 36 | double xArr[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; 37 | double yArr[] = {2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0}; 38 | double x = xArr[len - 1]; 39 | double y = CubicInterpolation<>::usingXAndYTables(xArr, yArr, len, x); 40 | double yExp = yArr[len - 1]; 41 | REQUIRE(y == yExp); 42 | } 43 | 44 | } /* namespace datasketches */ 45 | 46 | -------------------------------------------------------------------------------- /hll/include/HarmonicNumbers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HARMONICNUMBERS_HPP_ 21 | #define _HARMONICNUMBERS_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | namespace datasketches { 27 | 28 | template> 29 | class HarmonicNumbers { 30 | public: 31 | /** 32 | * This is the estimator you would use for flat bit map random accessed, similar to a Bloom filter. 33 | * @param bitVectorLength the length of the bit vector in bits. Must be > 0. 34 | * @param numBitsSet the number of bits set in this bit vector. Must be ≥ 0 and ≤ 35 | * bitVectorLength. 36 | * @return the estimate. 37 | */ 38 | static double getBitMapEstimate(int bitVectorLength, int numBitsSet); 39 | 40 | private: 41 | static double harmonicNumber(uint64_t x_i); 42 | }; 43 | 44 | } 45 | 46 | #include "HarmonicNumbers-internal.hpp" 47 | 48 | #endif /* _HARMONICNUMBERS_HPP_ */ 49 | -------------------------------------------------------------------------------- /cpc/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(cpc_test) 19 | 20 | target_link_libraries(cpc_test cpc common_test_lib) 21 | 22 | set_target_properties(cpc_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" CPC_TEST_BINARY_PATH) 27 | string(APPEND CPC_TEST_BINARY_PATH "/") 28 | target_compile_definitions(cpc_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${CPC_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME cpc_test 35 | COMMAND cpc_test 36 | ) 37 | 38 | target_sources(cpc_test 39 | PRIVATE 40 | cpc_sketch_test.cpp 41 | cpc_union_test.cpp 42 | compression_test.cpp 43 | cpc_sketch_allocation_test.cpp 44 | ) 45 | 46 | if (SERDE_COMPAT) 47 | target_sources(cpc_test 48 | PRIVATE 49 | cpc_sketch_deserialize_from_java_test.cpp 50 | ) 51 | endif() 52 | 53 | if (GENERATE) 54 | target_sources(cpc_test 55 | PRIVATE 56 | cpc_sketch_serialize_for_java.cpp 57 | ) 58 | endif() 59 | -------------------------------------------------------------------------------- /tdigest/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(tdigest_test) 19 | 20 | target_link_libraries(tdigest_test tdigest common common_test_lib) 21 | 22 | set_target_properties(tdigest_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" tdigest_TEST_BINARY_PATH) 27 | string(APPEND tdigest_TEST_BINARY_PATH "/") 28 | target_compile_definitions(tdigest_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${tdigest_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME tdigest_test 35 | COMMAND tdigest_test 36 | ) 37 | 38 | target_sources(tdigest_test 39 | PRIVATE 40 | tdigest_test.cpp 41 | tdigest_custom_allocator_test.cpp 42 | ) 43 | 44 | if (SERDE_COMPAT) 45 | target_sources(tdigest_test 46 | PRIVATE 47 | tdigest_deserialize_from_java_test.cpp 48 | ) 49 | endif() 50 | 51 | if (GENERATE) 52 | target_sources(tdigest_test 53 | PRIVATE 54 | tdigest_serialize_for_java.cpp 55 | ) 56 | endif() 57 | -------------------------------------------------------------------------------- /hll/include/RelativeErrorTables.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _RELATIVEERRORTABLES_HPP_ 21 | #define _RELATIVEERRORTABLES_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template> 28 | class RelativeErrorTables { 29 | public: 30 | /** 31 | * Return Relative Error for UB or LB for HIP or Non-HIP as a function of numStdDev. 32 | * @param upperBound true if for upper bound 33 | * @param oooFlag true if for Non-HIP 34 | * @param lgK must be between 4 and 12 inclusive 35 | * @param stdDev must be between 1 and 3 inclusive 36 | * @return Relative Error for UB or LB for HIP or Non-HIP as a function of numStdDev. 37 | */ 38 | static double getRelErr(bool upperBound, bool oooFlag, 39 | int lgK, int stdDev); 40 | }; 41 | 42 | } 43 | 44 | #include "RelativeErrorTables-internal.hpp" 45 | 46 | #endif /* _RELATIVEERRORTABLES_HPP_ */ 47 | -------------------------------------------------------------------------------- /fi/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(fi_test) 19 | 20 | target_link_libraries(fi_test fi common_test_lib) 21 | 22 | set_target_properties(fi_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" FI_TEST_BINARY_PATH) 27 | string(APPEND FI_TEST_BINARY_PATH "/") 28 | target_compile_definitions(fi_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${FI_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME fi_test 35 | COMMAND fi_test 36 | ) 37 | 38 | target_sources(fi_test 39 | PRIVATE 40 | reverse_purge_hash_map_test.cpp 41 | frequent_items_sketch_test.cpp 42 | frequent_items_sketch_custom_type_test.cpp 43 | ) 44 | 45 | if (SERDE_COMPAT) 46 | target_sources(fi_test 47 | PRIVATE 48 | frequent_items_sketch_deserialize_from_java_test.cpp 49 | ) 50 | endif() 51 | 52 | if (GENERATE) 53 | target_sources(fi_test 54 | PRIVATE 55 | frequent_items_sketch_serialize_for_java.cpp 56 | ) 57 | endif() 58 | -------------------------------------------------------------------------------- /kll/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(kll_test) 19 | 20 | target_link_libraries(kll_test kll common_test_lib) 21 | 22 | set_target_properties(kll_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" KLL_TEST_BINARY_PATH) 27 | string(APPEND KLL_TEST_BINARY_PATH "/") 28 | target_compile_definitions(kll_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${KLL_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME kll_test 35 | COMMAND kll_test 36 | ) 37 | 38 | target_sources(kll_test 39 | PRIVATE 40 | kll_sketch_test.cpp 41 | kll_sketch_custom_type_test.cpp 42 | kll_sketch_validation.cpp 43 | kolmogorov_smirnov_test.cpp 44 | ) 45 | 46 | if (SERDE_COMPAT) 47 | target_sources(kll_test 48 | PRIVATE 49 | kll_sketch_deserialize_from_java_test.cpp 50 | ) 51 | endif() 52 | 53 | if (GENERATE) 54 | target_sources(kll_test 55 | PRIVATE 56 | kll_sketch_serialize_for_java.cpp 57 | ) 58 | endif() 59 | -------------------------------------------------------------------------------- /hll/include/Hll6Array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL6ARRAY_HPP_ 21 | #define _HLL6ARRAY_HPP_ 22 | 23 | #include "HllArray.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class Hll6Iterator; 29 | 30 | template 31 | class Hll6Array final : public HllArray { 32 | public: 33 | Hll6Array(uint8_t lgConfigK, bool startFullSize, const A& allocator); 34 | explicit Hll6Array(const HllArray& that); 35 | 36 | virtual ~Hll6Array() = default; 37 | virtual std::function*)> get_deleter() const; 38 | 39 | virtual Hll6Array* copy() const; 40 | 41 | inline uint8_t getSlot(uint32_t slotNo) const; 42 | inline void putSlot(uint32_t slotNo, uint8_t value); 43 | 44 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) final; 45 | 46 | virtual uint32_t getHllByteArrBytes() const; 47 | 48 | private: 49 | void internalCouponUpdate(uint32_t coupon); 50 | }; 51 | 52 | } 53 | 54 | #endif /* _HLL6ARRAY_HPP_ */ 55 | -------------------------------------------------------------------------------- /cpc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(cpc INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::CPC ALIAS cpc) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(cpc 27 | INTERFACE 28 | $/include> 29 | $ 30 | ) 31 | 32 | target_link_libraries(cpc INTERFACE common) 33 | 34 | install(TARGETS cpc 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/compression_data.hpp 40 | include/cpc_common.hpp 41 | include/cpc_compressor.hpp 42 | include/cpc_compressor_impl.hpp 43 | include/cpc_confidence.hpp 44 | include/cpc_sketch.hpp 45 | include/cpc_sketch_impl.hpp 46 | include/cpc_union.hpp 47 | include/cpc_union_impl.hpp 48 | include/cpc_util.hpp 49 | include/icon_estimator.hpp 50 | include/kxp_byte_lookup.hpp 51 | include/u32_table.hpp 52 | include/u32_table_impl.hpp 53 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 54 | -------------------------------------------------------------------------------- /quantiles/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(quantiles_test) 19 | 20 | target_link_libraries(quantiles_test quantiles common common_test_lib) 21 | 22 | set_target_properties(quantiles_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" QUANTILES_TEST_BINARY_PATH) 27 | string(APPEND QUANTILES_TEST_BINARY_PATH "/") 28 | target_compile_definitions(quantiles_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${QUANTILES_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME quantiles_test 35 | COMMAND quantiles_test 36 | ) 37 | 38 | target_sources(quantiles_test 39 | PRIVATE 40 | quantiles_sketch_test.cpp 41 | quantiles_compatibility_test.cpp 42 | kolmogorov_smirnov_test.cpp 43 | ) 44 | 45 | if (SERDE_COMPAT) 46 | target_sources(quantiles_test 47 | PRIVATE 48 | quantiles_sketch_deserialize_from_java_test.cpp 49 | ) 50 | endif() 51 | 52 | if (GENERATE) 53 | target_sources(quantiles_test 54 | PRIVATE 55 | quantiles_sketch_serialize_for_java.cpp 56 | ) 57 | endif() 58 | -------------------------------------------------------------------------------- /theta/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(theta_test) 19 | 20 | target_link_libraries(theta_test theta common_test_lib) 21 | 22 | set_target_properties(theta_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH) 27 | string(APPEND THETA_TEST_BINARY_PATH "/") 28 | target_compile_definitions(theta_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${THETA_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME theta_test 35 | COMMAND theta_test 36 | ) 37 | 38 | target_sources(theta_test 39 | PRIVATE 40 | theta_sketch_test.cpp 41 | theta_union_test.cpp 42 | theta_intersection_test.cpp 43 | theta_a_not_b_test.cpp 44 | theta_jaccard_similarity_test.cpp 45 | theta_setop_test.cpp 46 | bit_packing_test.cpp 47 | ) 48 | 49 | if (SERDE_COMPAT) 50 | target_sources(theta_test 51 | PRIVATE 52 | theta_sketch_deserialize_from_java_test.cpp 53 | ) 54 | endif() 55 | 56 | if (GENERATE) 57 | target_sources(theta_test 58 | PRIVATE 59 | theta_sketch_serialize_for_java.cpp 60 | ) 61 | endif() 62 | -------------------------------------------------------------------------------- /hll/test/hll_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("hll sketch generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | hll_sketch hll4(12, HLL_4); 30 | hll_sketch hll6(12, HLL_6); 31 | hll_sketch hll8(12, HLL_8); 32 | for (unsigned i = 0; i < n; ++i) { 33 | hll4.update(i); 34 | hll6.update(i); 35 | hll8.update(i); 36 | } 37 | { 38 | std::ofstream os("hll4_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 39 | hll4.serialize_compact(os); 40 | } 41 | { 42 | std::ofstream os("hll6_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 43 | hll6.serialize_compact(os); 44 | } 45 | { 46 | std::ofstream os("hll8_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 47 | hll8.serialize_compact(os); 48 | } 49 | } 50 | } 51 | 52 | } /* namespace datasketches */ 53 | -------------------------------------------------------------------------------- /hll/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(hll_test) 19 | 20 | target_link_libraries(hll_test hll common_test_lib) 21 | 22 | set_target_properties(hll_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" HLL_TEST_BINARY_PATH) 27 | string(APPEND HLL_TEST_BINARY_PATH "/") 28 | target_compile_definitions(hll_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${HLL_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME hll_test 35 | COMMAND hll_test 36 | ) 37 | 38 | target_sources(hll_test 39 | PRIVATE 40 | AuxHashMapTest.cpp 41 | CouponHashSetTest.cpp 42 | CouponListTest.cpp 43 | CrossCountingTest.cpp 44 | HllArrayTest.cpp 45 | HllSketchTest.cpp 46 | HllUnionTest.cpp 47 | TablesTest.cpp 48 | ToFromByteArrayTest.cpp 49 | IsomorphicTest.cpp 50 | ) 51 | 52 | if (SERDE_COMPAT) 53 | target_sources(hll_test 54 | PRIVATE 55 | hll_sketch_deserialize_from_java_test.cpp 56 | ) 57 | endif() 58 | 59 | if (GENERATE) 60 | target_sources(hll_test 61 | PRIVATE 62 | hll_sketch_serialize_for_java.cpp 63 | ) 64 | endif() 65 | -------------------------------------------------------------------------------- /filters/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # separate executables for var_opt and ebpps sampling 19 | 20 | # BLOOM FILTER 21 | add_executable(bloom_filter_test) 22 | 23 | target_link_libraries(bloom_filter_test filters common_test_lib) 24 | 25 | set_target_properties(bloom_filter_test PROPERTIES 26 | CXX_STANDARD_REQUIRED YES 27 | ) 28 | 29 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" FILTERS_TEST_BINARY_PATH) 30 | string(APPEND FILTERS_TEST_BINARY_PATH "/") 31 | target_compile_definitions(bloom_filter_test 32 | PRIVATE 33 | TEST_BINARY_INPUT_PATH="${FILTERS_TEST_BINARY_PATH}" 34 | ) 35 | 36 | add_test( 37 | NAME bloom_filter_test 38 | COMMAND bloom_filter_test 39 | ) 40 | 41 | target_sources(bloom_filter_test 42 | PRIVATE 43 | bit_array_ops_test.cpp 44 | bloom_filter_test.cpp 45 | bloom_filter_allocation_test.cpp 46 | ) 47 | 48 | if (SERDE_COMPAT) 49 | target_sources(bloom_filter_test 50 | PRIVATE 51 | bloom_filter_deserialize_from_java_test.cpp 52 | ) 53 | endif() 54 | 55 | if (GENERATE) 56 | target_sources(bloom_filter_test 57 | PRIVATE 58 | bloom_filter_serialize_for_java.cpp 59 | ) 60 | endif() 61 | -------------------------------------------------------------------------------- /theta/include/theta_set_difference_base.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_SET_DIFFERENCE_BASE_HPP_ 21 | #define THETA_SET_DIFFERENCE_BASE_HPP_ 22 | 23 | #include "theta_comparators.hpp" 24 | #include "theta_update_sketch_base.hpp" 25 | 26 | namespace datasketches { 27 | 28 | template< 29 | typename Entry, 30 | typename ExtractKey, 31 | typename CompactSketch, 32 | typename Allocator 33 | > 34 | class theta_set_difference_base { 35 | public: 36 | using comparator = compare_by_key; 37 | using AllocU64 = typename std::allocator_traits::template rebind_alloc; 38 | using hash_table = theta_update_sketch_base; 39 | 40 | theta_set_difference_base(uint64_t seed, const Allocator& allocator = Allocator()); 41 | 42 | template 43 | CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered) const; 44 | 45 | private: 46 | Allocator allocator_; 47 | uint16_t seed_hash_; 48 | }; 49 | 50 | } /* namespace datasketches */ 51 | 52 | #include "theta_set_difference_base_impl.hpp" 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /theta/include/theta_intersection_base.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_INTERSECTION_BASE_HPP_ 21 | #define THETA_INTERSECTION_BASE_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | template< 26 | typename Entry, 27 | typename ExtractKey, 28 | typename Policy, 29 | typename Sketch, 30 | typename CompactSketch, 31 | typename Allocator 32 | > 33 | class theta_intersection_base { 34 | public: 35 | using hash_table = theta_update_sketch_base; 36 | using resize_factor = typename hash_table::resize_factor; 37 | using comparator = compare_by_key; 38 | theta_intersection_base(uint64_t seed, const Policy& policy, const Allocator& allocator); 39 | 40 | template 41 | void update(FwdSketch&& sketch); 42 | 43 | CompactSketch get_result(bool ordered = true) const; 44 | 45 | bool has_result() const; 46 | 47 | const Policy& get_policy() const; 48 | 49 | private: 50 | Policy policy_; 51 | bool is_valid_; 52 | hash_table table_; 53 | }; 54 | 55 | } /* namespace datasketches */ 56 | 57 | #include "theta_intersection_base_impl.hpp" 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /hll/include/coupon_iterator-internal.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _INTARRAYPAIRITERATOR_INTERNAL_HPP_ 21 | #define _INTARRAYPAIRITERATOR_INTERNAL_HPP_ 22 | 23 | #include "HllUtil.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | coupon_iterator::coupon_iterator(const uint32_t* array, size_t array_size, size_t index, bool all): 29 | array_(array), array_size_(array_size), index_(index), all_(all) { 30 | while (index_ < array_size_) { 31 | if (all_ || array_[index_] != hll_constants::EMPTY) break; 32 | ++index_; 33 | } 34 | } 35 | 36 | template 37 | coupon_iterator& coupon_iterator::operator++() { 38 | while (++index_ < array_size_) { 39 | if (all_ || array_[index_] != hll_constants::EMPTY) break; 40 | } 41 | return *this; 42 | } 43 | 44 | template 45 | bool coupon_iterator::operator!=(const coupon_iterator& other) const { 46 | return index_ != other.index_; 47 | } 48 | 49 | template 50 | auto coupon_iterator::operator*() const -> reference { 51 | return array_[index_]; 52 | } 53 | 54 | } 55 | 56 | #endif // _INTARRAYPAIRITERATOR_INTERNAL_HPP_ 57 | -------------------------------------------------------------------------------- /fi/test/reverse_purge_hash_map_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("reverse purge hash map: empty", "[frequent_items_sketch]") { 27 | reverse_purge_hash_map map(3, 3, std::equal_to(), std::allocator()); 28 | REQUIRE(map.get_num_active() == 0); 29 | REQUIRE(map.get_lg_cur_size() == 3); // static_cast(3) 30 | } 31 | 32 | TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") { 33 | reverse_purge_hash_map map(3, 3, std::equal_to(), std::allocator()); 34 | map.adjust_or_insert(1, 1); 35 | REQUIRE(map.get_num_active() == 1); 36 | REQUIRE(map.get(1) == 1); 37 | } 38 | 39 | TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") { 40 | reverse_purge_hash_map map(3, 4, std::equal_to(), std::allocator()); 41 | for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge 42 | uint64_t sum = 0; 43 | for (auto it: map) sum += it.second; 44 | REQUIRE(sum == 11); 45 | } 46 | 47 | } /* namespace datasketches */ 48 | -------------------------------------------------------------------------------- /filters/test/bloom_filter_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "bloom_filter.hpp" 25 | 26 | namespace datasketches { 27 | 28 | TEST_CASE("bloom filter generate", "[serialize_for_java]") { 29 | const uint64_t n_arr[] = {0, 10000, 2000000, 30000000}; 30 | const uint16_t h_arr[] = {3, 5}; 31 | for (const uint64_t n: n_arr) { 32 | for (const uint16_t num_hashes: h_arr) { 33 | const uint64_t config_bits = std::max(n, static_cast(1000)); // so empty still has valid bit size 34 | bloom_filter bf = bloom_filter::builder::create_by_size(config_bits, num_hashes); 35 | for (uint64_t i = 0; i < n / 10; ++i) bf.update(i); // note: n / 10 items into n bits 36 | if (n > 0) bf.update(std::nan("1")); // include a NaN if non-empty 37 | REQUIRE(bf.is_empty() == (n == 0)); 38 | REQUIRE((bf.is_empty() || (bf.get_bits_used() > n / 10))); 39 | std::ofstream os("bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_cpp.sk", std::ios::binary); 40 | bf.serialize(os); 41 | } 42 | } 43 | } 44 | 45 | } /* namespace datasketches */ 46 | -------------------------------------------------------------------------------- /hll/include/Hll8Array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL8ARRAY_HPP_ 21 | #define _HLL8ARRAY_HPP_ 22 | 23 | #include "HllArray.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class Hll8Iterator; 29 | 30 | template 31 | class Hll8Array final : public HllArray { 32 | public: 33 | Hll8Array(uint8_t lgConfigK, bool startFullSize, const A& allocator); 34 | explicit Hll8Array(const HllArray& that); 35 | 36 | virtual ~Hll8Array() = default; 37 | virtual std::function*)> get_deleter() const; 38 | 39 | virtual Hll8Array* copy() const; 40 | 41 | inline uint8_t getSlot(uint32_t slotNo) const; 42 | inline void putSlot(uint32_t slotNo, uint8_t value); 43 | 44 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) final; 45 | void mergeList(const CouponList& src); 46 | void mergeHll(const HllArray& src); 47 | 48 | virtual uint32_t getHllByteArrBytes() const; 49 | 50 | private: 51 | inline void internalCouponUpdate(uint32_t coupon); 52 | inline void processValue(uint32_t slot, uint32_t mask, uint8_t new_val); 53 | }; 54 | 55 | } 56 | 57 | #endif /* _HLL8ARRAY_HPP_ */ 58 | -------------------------------------------------------------------------------- /common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(common INTERFACE) 19 | 20 | configure_file(include/version.hpp.in include/version.hpp @ONLY) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(common 27 | INTERFACE 28 | $/include> 29 | $ 30 | ) 31 | 32 | install(TARGETS common EXPORT ${PROJECT_NAME}) 33 | 34 | install(FILES 35 | ${CMAKE_CURRENT_BINARY_DIR}/include/version.hpp 36 | include/binomial_bounds.hpp 37 | include/bounds_binomial_proportions.hpp 38 | include/ceiling_power_of_2.hpp 39 | include/common_defs.hpp 40 | include/conditional_back_inserter.hpp 41 | include/conditional_forward.hpp 42 | include/count_zeros.hpp 43 | include/inv_pow2_table.hpp 44 | include/kolmogorov_smirnov_impl.hpp 45 | include/kolmogorov_smirnov.hpp 46 | include/memory_operations.hpp 47 | include/MurmurHash3.h 48 | include/optional.hpp 49 | include/quantiles_sorted_view_impl.hpp 50 | include/quantiles_sorted_view.hpp 51 | include/serde.hpp 52 | include/xxhash64.h 53 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 54 | -------------------------------------------------------------------------------- /theta/include/theta_union_base.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_UNION_BASE_HPP_ 21 | #define THETA_UNION_BASE_HPP_ 22 | 23 | #include "theta_update_sketch_base.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template< 28 | typename Entry, 29 | typename ExtractKey, 30 | typename Policy, 31 | typename Sketch, 32 | typename CompactSketch, 33 | typename Allocator 34 | > 35 | class theta_union_base { 36 | public: 37 | using hash_table = theta_update_sketch_base; 38 | using resize_factor = typename hash_table::resize_factor; 39 | using comparator = compare_by_key; 40 | 41 | theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator); 42 | 43 | template 44 | void update(FwdSketch&& sketch); 45 | 46 | CompactSketch get_result(bool ordered = true) const; 47 | 48 | const Policy& get_policy() const; 49 | 50 | void reset(); 51 | 52 | private: 53 | Policy policy_; 54 | hash_table table_; 55 | uint64_t union_theta_; 56 | }; 57 | 58 | } /* namespace datasketches */ 59 | 60 | #include "theta_union_base_impl.hpp" 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /tuple/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_executable(tuple_test) 19 | 20 | target_link_libraries(tuple_test tuple common_test_lib) 21 | 22 | set_target_properties(tuple_test PROPERTIES 23 | CXX_STANDARD_REQUIRED YES 24 | ) 25 | 26 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH) 27 | string(APPEND THETA_TEST_BINARY_PATH "/") 28 | target_compile_definitions(tuple_test 29 | PRIVATE 30 | TEST_BINARY_INPUT_PATH="${THETA_TEST_BINARY_PATH}" 31 | ) 32 | 33 | add_test( 34 | NAME tuple_test 35 | COMMAND tuple_test 36 | ) 37 | 38 | target_sources(tuple_test 39 | PRIVATE 40 | tuple_sketch_test.cpp 41 | tuple_sketch_allocation_test.cpp 42 | tuple_union_test.cpp 43 | tuple_intersection_test.cpp 44 | tuple_a_not_b_test.cpp 45 | tuple_jaccard_similarity_test.cpp 46 | array_of_doubles_sketch_test.cpp 47 | engagement_test.cpp 48 | ) 49 | 50 | if (SERDE_COMPAT) 51 | target_sources(tuple_test 52 | PRIVATE 53 | aod_sketch_deserialize_from_java_test.cpp 54 | tuple_sketch_deserialize_from_java_test.cpp 55 | ) 56 | endif() 57 | 58 | if (GENERATE) 59 | target_sources(tuple_test 60 | PRIVATE 61 | aod_sketch_serialize_for_java.cpp 62 | tuple_sketch_serialize_for_java.cpp 63 | ) 64 | endif() 65 | -------------------------------------------------------------------------------- /tuple/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(tuple INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::TUPLE ALIAS tuple) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(tuple 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(tuple INTERFACE common theta) 33 | 34 | install(TARGETS tuple 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/tuple_sketch.hpp 40 | include/tuple_sketch_impl.hpp 41 | include/tuple_union.hpp 42 | include/tuple_union_impl.hpp 43 | include/tuple_intersection.hpp 44 | include/tuple_intersection_impl.hpp 45 | include/tuple_a_not_b.hpp 46 | include/tuple_a_not_b_impl.hpp 47 | include/tuple_jaccard_similarity.hpp 48 | include/array_of_doubles_sketch.hpp 49 | include/array_tuple_sketch.hpp 50 | include/array_tuple_sketch_impl.hpp 51 | include/array_tuple_union.hpp 52 | include/array_tuple_union_impl.hpp 53 | include/array_tuple_intersection.hpp 54 | include/array_tuple_intersection_impl.hpp 55 | include/array_tuple_a_not_b.hpp 56 | include/array_tuple_a_not_b_impl.hpp 57 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 58 | -------------------------------------------------------------------------------- /sampling/test/var_opt_union_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("var opt union double sampling", "[serialize_for_java]") { 27 | const unsigned k_small = 16; 28 | const unsigned k_max = 128; 29 | const unsigned n1 = 32; 30 | const unsigned n2 = 64; 31 | 32 | // small k sketch, but sampling 33 | var_opt_sketch sketch1(k_small); 34 | for (unsigned i = 0; i < n1; ++i) sketch1.update(i); 35 | // negative heavy item to allow a simple predicate to filter 36 | sketch1.update(-1, n1 * n1); 37 | 38 | // another one, but different n to get a different per-item weight 39 | var_opt_sketch sketch2(k_small); 40 | for (unsigned i = 0; i < n2; ++i) sketch2.update(i); 41 | 42 | var_opt_union u(k_max); 43 | u.update(sketch1); 44 | u.update(sketch2); 45 | 46 | // must reduce k in the process 47 | auto result = u.get_result(); 48 | REQUIRE(result.get_k() < k_max); 49 | REQUIRE(result.get_k() >= k_small); 50 | REQUIRE(result.get_n() == 97); 51 | 52 | std::ofstream os("varopt_union_double_sampling_cpp.sk", std::ios::binary); 53 | u.serialize(os); 54 | } 55 | 56 | } /* namespace datasketches */ 57 | -------------------------------------------------------------------------------- /tuple/test/tuple_sketch_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | // assume the binary sketches for this test have been generated by datasketches-java code 27 | // in the subdirectory called "java" in the root directory of this project 28 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 29 | 30 | TEST_CASE("tuple sketch int", "[serde_compat]") { 31 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 32 | for (const unsigned n: n_arr) { 33 | std::ifstream is; 34 | is.exceptions(std::ios::failbit | std::ios::badbit); 35 | is.open(testBinaryInputPath + "tuple_int_n" + std::to_string(n) + "_java.sk", std::ios::binary); 36 | const auto sketch = compact_tuple_sketch::deserialize(is); 37 | REQUIRE(sketch.is_empty() == (n == 0)); 38 | REQUIRE(sketch.is_estimation_mode() == (n > 1000)); 39 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); 40 | for (const auto& entry: sketch) { 41 | REQUIRE(entry.first < sketch.get_theta64()); 42 | REQUIRE(entry.second < static_cast(n)); 43 | } 44 | } 45 | } 46 | 47 | } /* namespace datasketches */ 48 | -------------------------------------------------------------------------------- /quantiles/test/quantiles_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("quantiles sketch double generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | quantiles_sketch sketch; 30 | for (unsigned i = 1; i <= n; ++i) sketch.update(i); 31 | std::ofstream os("quantiles_double_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 32 | sketch.serialize(os); 33 | } 34 | } 35 | 36 | struct compare_as_number { 37 | bool operator()(const std::string& a, const std::string& b) const { 38 | return std::stoi(a) < std::stoi(b); 39 | } 40 | }; 41 | 42 | TEST_CASE("quantiles sketch string generate", "[serialize_for_java]") { 43 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 44 | for (const unsigned n: n_arr) { 45 | quantiles_sketch sketch; 46 | for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i)); 47 | std::ofstream os("quantiles_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 48 | sketch.serialize(os); 49 | } 50 | } 51 | 52 | } /* namespace datasketches */ 53 | -------------------------------------------------------------------------------- /filters/test/bloom_filter_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include "bloom_filter.hpp" 24 | 25 | namespace datasketches { 26 | 27 | // assume the binary sketches for this test have been generated by datasketches-java code 28 | // in the subdirectory called "java" in the root directory of this project 29 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 30 | 31 | TEST_CASE("bloom_filter", "[serde_compat]") { 32 | const uint64_t n_arr[] = {0, 10000, 2000000, 30000000}; 33 | const uint16_t h_arr[] = {3, 5}; 34 | for (const uint64_t n: n_arr) { 35 | for (const uint16_t num_hashes: h_arr) { 36 | std::ifstream is; 37 | is.exceptions(std::ios::failbit | std::ios::badbit); 38 | is.open(testBinaryInputPath + "bf_n" + std::to_string(n) + "_h" + std::to_string(num_hashes) + "_java.sk", std::ios::binary); 39 | auto bf = bloom_filter::deserialize(is); 40 | REQUIRE(bf.is_empty() == (n == 0)); 41 | REQUIRE((bf.is_empty() || (bf.get_bits_used() > n / 10))); 42 | 43 | for (uint64_t i = 0; i < n / 10; ++i) { 44 | REQUIRE(bf.query(i)); 45 | } 46 | if (n > 0) REQUIRE(bf.query(std::nan("1"))); 47 | } 48 | } 49 | } 50 | 51 | } /* namespace datasketches */ 52 | -------------------------------------------------------------------------------- /theta/include/theta_union_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_UNION_IMPL_HPP_ 21 | #define THETA_UNION_IMPL_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | template 26 | theta_union_alloc::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const A& allocator): 27 | state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, nop_policy(), allocator) 28 | {} 29 | 30 | template 31 | template 32 | void theta_union_alloc::update(FwdSketch&& sketch) { 33 | state_.update(std::forward(sketch)); 34 | } 35 | 36 | template 37 | auto theta_union_alloc::get_result(bool ordered) const -> CompactSketch { 38 | return state_.get_result(ordered); 39 | } 40 | 41 | template 42 | void theta_union_alloc::reset() { 43 | state_.reset(); 44 | } 45 | 46 | template 47 | theta_union_alloc::builder::builder(const A& allocator): theta_base_builder(allocator) {} 48 | 49 | template 50 | auto theta_union_alloc::builder::build() const -> theta_union_alloc { 51 | return theta_union_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_); 52 | } 53 | 54 | } /* namespace datasketches */ 55 | 56 | # endif 57 | -------------------------------------------------------------------------------- /sampling/test/var_opt_union_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | // assume the binary sketches for this test have been generated by datasketches-java code 27 | // in the subdirectory called "java" in the root directory of this project 28 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 29 | 30 | TEST_CASE("var opt union double", "[serde_compat]") { 31 | const double EPS = 1e-13; 32 | 33 | std::ifstream is; 34 | is.exceptions(std::ios::failbit | std::ios::badbit); 35 | is.open(testBinaryInputPath + "varopt_union_double_sampling_java.sk", std::ios::binary); 36 | auto u = var_opt_union::deserialize(is); 37 | 38 | // must reduce k in the process 39 | const auto result = u.get_result(); 40 | REQUIRE_FALSE(result.is_empty()); 41 | REQUIRE(result.get_n() == 97); 42 | 43 | const double expected_wt = 96.0; // light items -- ignoring the heavy one 44 | const subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;}); 45 | REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS)); 46 | REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS)); 47 | REQUIRE(result.get_k() < 128); 48 | } 49 | 50 | } /* namespace datasketches */ 51 | -------------------------------------------------------------------------------- /tdigest/test/tdigest_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include "tdigest.hpp" 24 | 25 | namespace datasketches { 26 | 27 | // assume the binary sketches for this test have been generated by datasketches-java code 28 | // in the subdirectory called "java" in the root directory of this project 29 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 30 | 31 | TEST_CASE("tdigest double", "[serde_compat]") { 32 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 33 | for (const unsigned n: n_arr) { 34 | std::ifstream is; 35 | is.exceptions(std::ios::failbit | std::ios::badbit); 36 | is.open(testBinaryInputPath + "tdigest_double_n" + std::to_string(n) + "_java.sk", std::ios::binary); 37 | const auto td = tdigest::deserialize(is); 38 | REQUIRE(td.is_empty() == (n == 0)); 39 | REQUIRE(td.get_total_weight() == n); 40 | if (n > 0) { 41 | REQUIRE(td.get_min_value() == 1.0); 42 | REQUIRE(td.get_max_value() == static_cast(n)); 43 | REQUIRE(td.get_rank(0) == 0); 44 | REQUIRE(td.get_rank(n + 1) == 1); 45 | if (n == 1) { 46 | REQUIRE(td.get_rank(n) == 0.5); 47 | } else { 48 | REQUIRE(td.get_rank(n / 2) == Approx(0.5).margin(0.05)); 49 | } 50 | } 51 | } 52 | } 53 | 54 | } /* namespace datasketches */ 55 | -------------------------------------------------------------------------------- /tuple/include/array_tuple_union_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | namespace datasketches { 21 | 22 | template 23 | array_tuple_union::array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator): 24 | Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) 25 | {} 26 | 27 | template 28 | auto array_tuple_union::get_result(bool ordered) const -> CompactSketch { 29 | return CompactSketch(this->state_.get_policy().get_external_policy().get_num_values(), Base::get_result(ordered)); 30 | } 31 | 32 | // builder 33 | 34 | template 35 | array_tuple_union::builder::builder(const Policy& policy, const Allocator& allocator): 36 | tuple_base_builder(policy, allocator) {} 37 | 38 | template 39 | auto array_tuple_union::builder::build() const -> array_tuple_union { 40 | return array_tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_); 41 | } 42 | 43 | } /* namespace datasketches */ 44 | -------------------------------------------------------------------------------- /tuple/include/tuple_union_impl.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | namespace datasketches { 21 | 22 | template 23 | tuple_union::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator): 24 | state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, internal_policy(policy), allocator) 25 | {} 26 | 27 | template 28 | template 29 | void tuple_union::update(SS&& sketch) { 30 | state_.update(std::forward(sketch)); 31 | } 32 | 33 | template 34 | auto tuple_union::get_result(bool ordered) const -> CompactSketch { 35 | return state_.get_result(ordered); 36 | } 37 | 38 | template 39 | void tuple_union::reset() { 40 | return state_.reset(); 41 | } 42 | 43 | template 44 | tuple_union::builder::builder(const P& policy, const A& allocator): 45 | tuple_base_builder(policy, allocator) {} 46 | 47 | template 48 | auto tuple_union::builder::build() const -> tuple_union { 49 | return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_); 50 | } 51 | 52 | } /* namespace datasketches */ 53 | -------------------------------------------------------------------------------- /sampling/test/var_opt_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("varopt sketch long generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | var_opt_sketch sketch(32); 30 | for (unsigned i = 1; i <= n; ++i) sketch.update(i); 31 | std::ofstream os("varopt_sketch_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 32 | sketch.serialize(os); 33 | } 34 | } 35 | 36 | TEST_CASE("varopt sketch string exact", "[serialize_for_java]") { 37 | var_opt_sketch sketch(1024); 38 | for (unsigned i = 1; i <= 200; ++i) sketch.update(std::to_string(i), 1000.0 / i); 39 | std::ofstream os("varopt_sketch_string_exact_cpp.sk", std::ios::binary); 40 | sketch.serialize(os); 41 | } 42 | 43 | TEST_CASE("varopt sketch long sampling", "[serialize_for_java]") { 44 | var_opt_sketch sketch(1024); 45 | for (unsigned i = 0; i < 2000; ++i) sketch.update(i); 46 | // negative heavy items to allow a simple predicate to filter 47 | sketch.update(-1L, 100000.0); 48 | sketch.update(-2L, 110000.0); 49 | sketch.update(-3L, 120000.0); 50 | std::ofstream os("varopt_sketch_long_sampling_cpp.sk", std::ios::binary); 51 | sketch.serialize(os); 52 | } 53 | 54 | } /* namespace datasketches */ 55 | -------------------------------------------------------------------------------- /tuple/include/array_tuple_a_not_b.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef ARRAY_TUPLE_A_NOT_B_HPP_ 21 | #define ARRAY_TUPLE_A_NOT_B_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | #include "array_tuple_sketch.hpp" 27 | #include "tuple_a_not_b.hpp" 28 | 29 | namespace datasketches { 30 | 31 | /// array tuple A-not-B 32 | template 33 | class array_tuple_a_not_b: tuple_a_not_b { 34 | public: 35 | using Base = tuple_a_not_b; 36 | using CompactSketch = compact_array_tuple_sketch; 37 | 38 | /** 39 | * Constructor 40 | * @param seed for the hash function that was used to create the sketch 41 | * @param allocator to use for allocating and deallocating memory 42 | */ 43 | explicit array_tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator()); 44 | 45 | /** 46 | * Computes the A-not-B set operation given two sketches. 47 | * @param a sketch A 48 | * @param b sketch B 49 | * @param ordered optional flag to specify if an ordered sketch should be produced 50 | * @return the result of A-not-B as a compact sketch 51 | */ 52 | template 53 | CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const; 54 | }; 55 | 56 | } /* namespace datasketches */ 57 | 58 | #include "array_tuple_a_not_b_impl.hpp" 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /hll/include/CouponHashSet.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _COUPONHASHSET_HPP_ 21 | #define _COUPONHASHSET_HPP_ 22 | 23 | #include "CouponList.hpp" 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class CouponHashSet : public CouponList { 29 | public: 30 | static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator); 31 | static CouponHashSet* newSet(std::istream& is, const A& allocator); 32 | CouponHashSet(uint8_t lgConfigK, target_hll_type tgtHllType, const A& allocator); 33 | CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType); 34 | 35 | virtual ~CouponHashSet() = default; 36 | virtual std::function*)> get_deleter() const; 37 | 38 | protected: 39 | using vector_int = std::vector::template rebind_alloc>; 40 | 41 | virtual CouponHashSet* copy() const; 42 | virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const; 43 | 44 | virtual HllSketchImpl* couponUpdate(uint32_t coupon); 45 | 46 | virtual uint32_t getMemDataStart() const; 47 | virtual uint8_t getPreInts() const; 48 | 49 | friend class HllSketchImplFactory; 50 | 51 | private: 52 | using ChsAlloc = typename std::allocator_traits::template rebind_alloc>; 53 | bool checkGrowOrPromote(); 54 | void growHashSet(uint8_t tgtLgCoupArrSize); 55 | }; 56 | 57 | } 58 | 59 | #endif /* _COUPONHASHSET_HPP_ */ 60 | -------------------------------------------------------------------------------- /cpc/include/cpc_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CPC_COMMON_HPP_ 21 | #define CPC_COMMON_HPP_ 22 | 23 | #include 24 | 25 | #include "MurmurHash3.h" 26 | 27 | namespace datasketches { 28 | 29 | /// CPC constants 30 | namespace cpc_constants { 31 | /// min log2 of K 32 | const uint8_t MIN_LG_K = 4; 33 | /// max log2 of K 34 | const uint8_t MAX_LG_K = 26; 35 | /// default log2 of K 36 | const uint8_t DEFAULT_LG_K = 11; 37 | } 38 | 39 | // forward declaration 40 | template class u32_table; 41 | 42 | template 43 | struct compressed_state { 44 | using vector_u32 = std::vector::template rebind_alloc>; 45 | 46 | explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0), 47 | window_data(allocator), window_data_words(0) {} 48 | vector_u32 table_data; 49 | uint32_t table_data_words; 50 | uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode 51 | vector_u32 window_data; 52 | uint32_t window_data_words; 53 | }; 54 | 55 | template 56 | struct uncompressed_state { 57 | using vector_bytes = std::vector::template rebind_alloc>; 58 | 59 | explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {} 60 | u32_table table; 61 | vector_bytes window; 62 | }; 63 | 64 | } /* namespace datasketches */ 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /req/test/req_sketch_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | // assume the binary sketches for this test have been generated by datasketches-java code 27 | // in the subdirectory called "java" in the root directory of this project 28 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 29 | 30 | TEST_CASE("req float", "[serde_compat]") { 31 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 32 | for (const unsigned n: n_arr) { 33 | std::ifstream is; 34 | is.exceptions(std::ios::failbit | std::ios::badbit); 35 | is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + "_java.sk", std::ios::binary); 36 | const auto sketch = req_sketch::deserialize(is); 37 | REQUIRE(sketch.is_HRA()); 38 | REQUIRE(sketch.is_empty() == (n == 0)); 39 | REQUIRE(sketch.is_estimation_mode() == (n > 10)); 40 | REQUIRE(sketch.get_n() == n); 41 | if (n > 0) { 42 | REQUIRE(sketch.get_min_item() == 1.0f); 43 | REQUIRE(sketch.get_max_item() == static_cast(n)); 44 | uint64_t weight = 0; 45 | for (const auto pair: sketch) { 46 | REQUIRE(pair.first >= sketch.get_min_item()); 47 | REQUIRE(pair.first <= sketch.get_max_item()); 48 | weight += pair.second; 49 | } 50 | REQUIRE(weight == sketch.get_n()); 51 | } 52 | } 53 | } 54 | 55 | } /* namespace datasketches */ 56 | -------------------------------------------------------------------------------- /hll/include/Hll4Array.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _HLL4ARRAY_HPP_ 21 | #define _HLL4ARRAY_HPP_ 22 | 23 | #include "AuxHashMap.hpp" 24 | #include "HllArray.hpp" 25 | 26 | namespace datasketches { 27 | 28 | template 29 | class Hll4Array final : public HllArray { 30 | public: 31 | explicit Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator); 32 | explicit Hll4Array(const Hll4Array& that); 33 | explicit Hll4Array(const HllArray& that); 34 | 35 | virtual ~Hll4Array(); 36 | virtual std::function*)> get_deleter() const; 37 | 38 | virtual Hll4Array* copy() const; 39 | 40 | inline uint8_t getSlot(uint32_t slotNo) const; 41 | inline void putSlot(uint32_t slotNo, uint8_t value); 42 | inline uint8_t adjustRawValue(uint32_t index, uint8_t value) const; 43 | 44 | virtual uint32_t getUpdatableSerializationBytes() const; 45 | virtual uint32_t getHllByteArrBytes() const; 46 | 47 | virtual HllSketchImpl* couponUpdate(uint32_t coupon) final; 48 | 49 | virtual AuxHashMap* getAuxHashMap() const; 50 | // does *not* delete old map if overwriting 51 | void putAuxHashMap(AuxHashMap* auxHashMap); 52 | 53 | virtual typename HllArray::const_iterator begin(bool all = false) const; 54 | virtual typename HllArray::const_iterator end() const; 55 | 56 | private: 57 | void internalCouponUpdate(uint32_t coupon); 58 | void internalHll4Update(uint32_t slotNo, uint8_t newVal); 59 | void shiftToBiggerCurMin(); 60 | 61 | AuxHashMap* auxHashMap_; 62 | }; 63 | 64 | } 65 | 66 | #endif /* _HLL4ARRAY_HPP_ */ 67 | -------------------------------------------------------------------------------- /tuple/include/tuple_a_not_b.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef TUPLE_A_NOT_B_HPP_ 21 | #define TUPLE_A_NOT_B_HPP_ 22 | 23 | #include "tuple_sketch.hpp" 24 | #include "theta_set_difference_base.hpp" 25 | 26 | namespace datasketches { 27 | 28 | /// tuple A-not-B 29 | template< 30 | typename Summary, 31 | typename Allocator = std::allocator 32 | > 33 | class tuple_a_not_b { 34 | public: 35 | using Entry = std::pair; 36 | using ExtractKey = pair_extract_key; 37 | using CompactSketch = compact_tuple_sketch; 38 | using AllocEntry = typename std::allocator_traits::template rebind_alloc; 39 | using State = theta_set_difference_base; 40 | 41 | /** 42 | * Constructor 43 | * @param seed for the hash function that was used to create the sketch 44 | * @param allocator to use for allocating and deallocating memory 45 | */ 46 | explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator()); 47 | 48 | /** 49 | * Computes the A-not-B set operation given two sketches. 50 | * @param a sketch A 51 | * @param b sketch B 52 | * @param ordered optional flag to specify if an ordered sketch should be produced 53 | * @return the result of A-not-B as a compact sketch 54 | */ 55 | template 56 | CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const; 57 | 58 | private: 59 | State state_; 60 | }; 61 | 62 | } /* namespace datasketches */ 63 | 64 | #include "tuple_a_not_b_impl.hpp" 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /theta/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(theta INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::THETA ALIAS theta) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(theta 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(theta INTERFACE common) 33 | 34 | install(TARGETS theta 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/theta_sketch.hpp 40 | include/theta_sketch_impl.hpp 41 | include/theta_union.hpp 42 | include/theta_union_impl.hpp 43 | include/theta_intersection.hpp 44 | include/theta_intersection_impl.hpp 45 | include/theta_a_not_b.hpp 46 | include/theta_a_not_b_impl.hpp 47 | include/theta_jaccard_similarity.hpp 48 | include/theta_comparators.hpp 49 | include/theta_constants.hpp 50 | include/theta_helpers.hpp 51 | include/theta_update_sketch_base.hpp 52 | include/theta_update_sketch_base_impl.hpp 53 | include/theta_union_base.hpp 54 | include/theta_union_base_impl.hpp 55 | include/theta_intersection_base.hpp 56 | include/theta_intersection_base_impl.hpp 57 | include/theta_set_difference_base.hpp 58 | include/theta_set_difference_base_impl.hpp 59 | include/theta_jaccard_similarity_base.hpp 60 | include/bounds_on_ratios_in_sampled_sets.hpp 61 | include/bounds_on_ratios_in_theta_sketched_sets.hpp 62 | include/compact_theta_sketch_parser.hpp 63 | include/compact_theta_sketch_parser_impl.hpp 64 | include/bit_packing.hpp 65 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 66 | -------------------------------------------------------------------------------- /kll/test/kll_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("kll sketch float generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | kll_sketch sketch; 30 | for (unsigned i = 1; i <= n; ++i) sketch.update(i); 31 | std::ofstream os("kll_float_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 32 | sketch.serialize(os); 33 | } 34 | } 35 | 36 | TEST_CASE("kll sketch double generate", "[serialize_for_java]") { 37 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 38 | for (const unsigned n: n_arr) { 39 | kll_sketch sketch; 40 | for (unsigned i = 1; i <= n; ++i) sketch.update(i); 41 | std::ofstream os("kll_double_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 42 | sketch.serialize(os); 43 | } 44 | } 45 | 46 | struct compare_as_number { 47 | bool operator()(const std::string& a, const std::string& b) const { 48 | return std::stoi(a) < std::stoi(b); 49 | } 50 | }; 51 | 52 | TEST_CASE("kll sketch string generate", "[serialize_for_java]") { 53 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 54 | for (const unsigned n: n_arr) { 55 | kll_sketch sketch; 56 | for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i)); 57 | std::ofstream os("kll_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 58 | sketch.serialize(os); 59 | } 60 | } 61 | 62 | } /* namespace datasketches */ 63 | -------------------------------------------------------------------------------- /common/include/memory_operations.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _MEMORY_OPERATIONS_HPP_ 21 | #define _MEMORY_OPERATIONS_HPP_ 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace datasketches { 30 | 31 | static inline void ensure_minimum_memory(size_t bytes_available, size_t min_needed) { 32 | if (bytes_available < min_needed) { 33 | throw std::out_of_range("Insufficient buffer size detected: bytes available " 34 | + std::to_string(bytes_available) + ", minimum needed " + std::to_string(min_needed)); 35 | } 36 | } 37 | 38 | static inline void check_memory_size(size_t requested_index, size_t capacity) { 39 | if (requested_index > capacity) { 40 | throw std::out_of_range("Attempt to access memory beyond limits: requested index " 41 | + std::to_string(requested_index) + ", capacity " + std::to_string(capacity)); 42 | } 43 | } 44 | 45 | // note: size is in bytes, not items 46 | static inline size_t copy_from_mem(const void* src, void* dst, size_t size) { 47 | memcpy(dst, src, size); 48 | return size; 49 | } 50 | 51 | // note: size is in bytes, not items 52 | static inline size_t copy_to_mem(const void* src, void* dst, size_t size) { 53 | memcpy(dst, src, size); 54 | return size; 55 | } 56 | 57 | template 58 | static inline size_t copy_from_mem(const void* src, T& item) { 59 | memcpy(&item, src, sizeof(T)); 60 | return sizeof(T); 61 | } 62 | 63 | template 64 | static inline size_t copy_to_mem(T item, void* dst) { 65 | memcpy(dst, &item, sizeof(T)); 66 | return sizeof(T); 67 | } 68 | 69 | } // namespace 70 | 71 | #endif // _MEMORY_OPERATIONS_HPP_ 72 | -------------------------------------------------------------------------------- /hll/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | add_library(hll INTERFACE) 19 | 20 | add_library(${PROJECT_NAME}::HLL ALIAS hll) 21 | 22 | if (BUILD_TESTS) 23 | add_subdirectory(test) 24 | endif() 25 | 26 | target_include_directories(hll 27 | INTERFACE 28 | $ 29 | $/include> 30 | ) 31 | 32 | target_link_libraries(hll INTERFACE common) 33 | 34 | install(TARGETS hll 35 | EXPORT ${PROJECT_NAME} 36 | ) 37 | 38 | install(FILES 39 | include/hll.hpp 40 | include/AuxHashMap.hpp 41 | include/CompositeInterpolationXTable.hpp 42 | include/hll.private.hpp 43 | include/HllSketchImplFactory.hpp 44 | include/CouponHashSet.hpp 45 | include/CouponList.hpp 46 | include/CubicInterpolation.hpp 47 | include/HarmonicNumbers.hpp 48 | include/Hll4Array.hpp 49 | include/Hll6Array.hpp 50 | include/Hll8Array.hpp 51 | include/HllArray.hpp 52 | include/HllSketchImpl.hpp 53 | include/HllUtil.hpp 54 | include/coupon_iterator.hpp 55 | include/RelativeErrorTables.hpp 56 | include/AuxHashMap-internal.hpp 57 | include/CompositeInterpolationXTable-internal.hpp 58 | include/CouponHashSet-internal.hpp 59 | include/CouponList-internal.hpp 60 | include/CubicInterpolation-internal.hpp 61 | include/HarmonicNumbers-internal.hpp 62 | include/Hll4Array-internal.hpp 63 | include/Hll6Array-internal.hpp 64 | include/Hll8Array-internal.hpp 65 | include/HllArray-internal.hpp 66 | include/HllSketch-internal.hpp 67 | include/HllSketchImpl-internal.hpp 68 | include/HllUnion-internal.hpp 69 | include/coupon_iterator-internal.hpp 70 | include/RelativeErrorTables-internal.hpp 71 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches") 72 | -------------------------------------------------------------------------------- /cpc/test/cpc_sketch_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | // assume the binary sketches for this test have been generated by datasketches-java code 27 | // in the subdirectory called "java" in the root directory of this project 28 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 29 | 30 | TEST_CASE("cpc sketch", "[serde_compat]") { 31 | const unsigned n_arr[] = {0, 100, 200, 2000, 20000}; 32 | for (const unsigned n: n_arr) { 33 | std::ifstream is; 34 | is.exceptions(std::ios::failbit | std::ios::badbit); 35 | is.open(testBinaryInputPath + "cpc_n" + std::to_string(n) + "_java.sk", std::ios::binary); 36 | const auto sketch = cpc_sketch::deserialize(is); 37 | REQUIRE(sketch.is_empty() == (n == 0)); 38 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); 39 | } 40 | } 41 | 42 | TEST_CASE("cpc sketch negative one", "[serde_compat]") { 43 | std::ifstream is; 44 | is.exceptions(std::ios::failbit | std::ios::badbit); 45 | is.open(testBinaryInputPath + "cpc_negative_one_java.sk", std::ios::binary); 46 | auto sketch = cpc_sketch::deserialize(is); 47 | REQUIRE_FALSE(sketch.is_empty()); 48 | REQUIRE(sketch.get_estimate() == Approx(1).margin(0.01)); 49 | sketch.update((uint64_t) -1); 50 | sketch.update((int64_t) -1); 51 | sketch.update((uint32_t) -1); 52 | sketch.update((int32_t) -1); 53 | sketch.update((uint16_t) -1); 54 | sketch.update((int16_t) -1); 55 | sketch.update((uint8_t) -1); 56 | sketch.update((int8_t) -1); 57 | REQUIRE(sketch.get_estimate() == Approx(1).margin(0.01)); 58 | } 59 | 60 | } /* namespace datasketches */ 61 | -------------------------------------------------------------------------------- /tuple/include/array_of_doubles_sketch.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef ARRAY_OF_DOUBLES_SKETCH_HPP_ 21 | #define ARRAY_OF_DOUBLES_SKETCH_HPP_ 22 | 23 | #include "array_tuple_sketch.hpp" 24 | #include "array_tuple_union.hpp" 25 | #include "array_tuple_intersection.hpp" 26 | #include "array_tuple_a_not_b.hpp" 27 | 28 | namespace datasketches { 29 | 30 | /// convenience alias with default allocator, default policy for update_array_of_doubles_sketch 31 | using default_array_of_doubles_update_policy = default_array_tuple_update_policy>; 32 | 33 | /// convenience alias with default allocator, equivalent to ArrayOfDoublesUpdatableSketch in Java 34 | using update_array_of_doubles_sketch = update_array_tuple_sketch>; 35 | 36 | /// convenience alias with default allocator, equivalent to ArrayOfDoublesCompactSketch in Java 37 | using compact_array_of_doubles_sketch = compact_array_tuple_sketch>; 38 | 39 | /// convenience alias, default policy for array_of_doubles_union 40 | using default_array_of_doubles_union_policy = default_array_tuple_union_policy>; 41 | 42 | /// convenience alias with default allocator, equivalent to ArrayOfDoublesUnion in Java 43 | using array_of_doubles_union = array_tuple_union>; 44 | 45 | /// convenience alias with default allocator, equivalent to ArrayOfDoublesIntersection in Java 46 | /// no default policy since it is not clear in general 47 | template using array_of_doubles_intersection = array_tuple_intersection, Policy>; 48 | 49 | /// convenience alias with default allocator, equivalent to ArrayOfDoublesAnotB in Java 50 | using array_of_doubles_a_not_b = array_tuple_a_not_b>; 51 | 52 | } /* namespace datasketches */ 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /tuple/include/array_tuple_intersection.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef ARRAY_TUPLE_INTERSECTION_HPP_ 21 | #define ARRAY_TUPLE_INTERSECTION_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | #include "array_tuple_sketch.hpp" 27 | #include "tuple_intersection.hpp" 28 | 29 | namespace datasketches { 30 | 31 | /// array tuple intersection 32 | template< 33 | typename Array, 34 | typename Policy, 35 | typename Allocator = typename Array::allocator_type 36 | > 37 | class array_tuple_intersection: public tuple_intersection { 38 | public: 39 | using Base = tuple_intersection; 40 | using CompactSketch = compact_array_tuple_sketch; 41 | using resize_factor = theta_constants::resize_factor; 42 | 43 | /** 44 | * Constructor 45 | * @param seed for the hash function that was used to create the sketch 46 | * @param policy user-defined way of combining Summary during intersection 47 | * @param allocator to use for allocating and deallocating memory 48 | */ 49 | explicit array_tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator()); 50 | 51 | /** 52 | * Produces a copy of the current state of the intersection. 53 | * If update() was not called, the state is the infinite "universe", 54 | * which is considered an undefined state, and throws an exception. 55 | * @param ordered optional flag to specify if an ordered sketch should be produced 56 | * @return the result of the intersection as a compact sketch 57 | */ 58 | CompactSketch get_result(bool ordered = true) const; 59 | }; 60 | 61 | } /* namespace datasketches */ 62 | 63 | #include "array_tuple_intersection_impl.hpp" 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /theta/include/theta_a_not_b.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_A_NOT_B_HPP_ 21 | #define THETA_A_NOT_B_HPP_ 22 | 23 | #include "theta_sketch.hpp" 24 | #include "theta_set_difference_base.hpp" 25 | 26 | namespace datasketches { 27 | 28 | // forward declaration 29 | template class theta_a_not_b_alloc; 30 | 31 | // alias with default allocator for convenience 32 | using theta_a_not_b = theta_a_not_b_alloc>; 33 | 34 | /** 35 | * Theta A-not-B (set difference). 36 | * Computes set difference of Theta sketches. 37 | */ 38 | template> 39 | class theta_a_not_b_alloc { 40 | public: 41 | using Entry = uint64_t; 42 | using ExtractKey = trivial_extract_key; 43 | using CompactSketch = compact_theta_sketch_alloc; 44 | using State = theta_set_difference_base; 45 | 46 | /** 47 | * Constructor 48 | * @param seed for the hash function that was used to create the sketch 49 | * @param allocator to use for allocating and deallocating memory 50 | */ 51 | explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator()); 52 | 53 | /** 54 | * Computes the A-not-B set operation given two sketches. 55 | * @param a sketch A 56 | * @param b sketch B 57 | * @param ordered optional flag to specify if an ordered sketch should be produced 58 | * @return the result of A-not-B as a compact sketch 59 | */ 60 | template 61 | CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const; 62 | 63 | private: 64 | State state_; 65 | }; 66 | 67 | } /* namespace datasketches */ 68 | 69 | #include "theta_a_not_b_impl.hpp" 70 | 71 | # endif 72 | -------------------------------------------------------------------------------- /cpc/test/compression_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include "cpc_compressor.hpp" 24 | 25 | namespace datasketches { 26 | 27 | typedef u32_table> table; 28 | 29 | TEST_CASE("cpc sketch: compress and decompress pairs", "[cpc_sketch]") { 30 | const size_t N = 200; 31 | const size_t MAXWORDS = 1000; 32 | 33 | HashState twoHashes; 34 | uint32_t pairArray[N]; 35 | uint32_t pairArray2[N]; 36 | uint64_t value = 35538947; // some arbitrary starting value 37 | const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio 38 | for (size_t i = 0; i < N; i++) { 39 | MurmurHash3_x64_128(&value, sizeof(value), 0, twoHashes); 40 | uint32_t rand = twoHashes.h1 & 0xffff; 41 | pairArray[i] = rand; 42 | value += golden64; 43 | } 44 | //table::knuth_shell_sort3(pairArray, 0, N - 1); // unsigned numerical sort 45 | std::sort(pairArray, pairArray + N); 46 | uint32_t prev = UINT32_MAX; 47 | uint32_t nxt = 0; 48 | for (size_t i = 0; i < N; i++) { // uniquify 49 | if (pairArray[i] != prev) { 50 | prev = pairArray[i]; 51 | pairArray[nxt++] = pairArray[i]; 52 | } 53 | } 54 | uint32_t numPairs = nxt; 55 | 56 | uint32_t compressedWords[MAXWORDS]; 57 | 58 | for (uint8_t numBaseBits = 0; numBaseBits <= 11; numBaseBits++) { 59 | uint32_t numWordsWritten = get_compressor>().low_level_compress_pairs(pairArray, numPairs, numBaseBits, compressedWords); 60 | get_compressor>().low_level_uncompress_pairs(pairArray2, numPairs, numBaseBits, compressedWords, numWordsWritten); 61 | for (size_t i = 0; i < numPairs; i++) { 62 | REQUIRE(pairArray[i] == pairArray2[i]); 63 | } 64 | } 65 | } 66 | 67 | } /* namespace datasketches */ 68 | -------------------------------------------------------------------------------- /theta/include/theta_helpers.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THETA_HELPERS_HPP_ 21 | #define THETA_HELPERS_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | #include "theta_constants.hpp" 27 | 28 | namespace datasketches { 29 | 30 | template 31 | static void check_value(T actual, T expected, const char* description) { 32 | if (actual != expected) { 33 | throw std::invalid_argument(std::string(description) + " mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual)); 34 | } 35 | } 36 | 37 | template 38 | class checker { 39 | public: 40 | static void check_serial_version(uint8_t actual, uint8_t expected) { 41 | check_value(actual, expected, "serial version"); 42 | } 43 | static void check_sketch_family(uint8_t actual, uint8_t expected) { 44 | check_value(actual, expected, "sketch family"); 45 | } 46 | static void check_sketch_type(uint8_t actual, uint8_t expected) { 47 | check_value(actual, expected, "sketch type"); 48 | } 49 | static void check_seed_hash(uint16_t actual, uint16_t expected) { 50 | check_value(actual, expected, "seed hash"); 51 | } 52 | }; 53 | 54 | template 55 | class theta_build_helper{ 56 | public: 57 | // consistent way of initializing theta from p 58 | // avoids multiplication if p == 1 since it might not yield MAX_THETA exactly 59 | static uint64_t starting_theta_from_p(float p) { 60 | if (p < 1) return static_cast(static_cast(theta_constants::MAX_THETA) * p); 61 | return theta_constants::MAX_THETA; 62 | } 63 | 64 | static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) { 65 | return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min; 66 | } 67 | }; 68 | 69 | } /* namespace datasketches */ 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /tdigest/test/tdigest_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include "tdigest.hpp" 24 | 25 | namespace datasketches { 26 | 27 | TEST_CASE("tdigest double generate", "[serialize_for_java]") { 28 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 29 | for (const unsigned n: n_arr) { 30 | tdigest_double td(100); 31 | for (unsigned i = 1; i <= n; ++i) td.update(i); 32 | std::ofstream os("tdigest_double_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 33 | td.serialize(os); 34 | } 35 | } 36 | 37 | TEST_CASE("tdigest double generate with buffer", "[serialize_for_java]") { 38 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 39 | for (const unsigned n: n_arr) { 40 | tdigest_double td(100); 41 | for (unsigned i = 1; i <= n; ++i) td.update(i); 42 | std::ofstream os("tdigest_double_buf_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 43 | td.serialize(os, true); 44 | } 45 | } 46 | 47 | TEST_CASE("tdigest float generate", "[serialize_for_java]") { 48 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 49 | for (const unsigned n: n_arr) { 50 | tdigest_float td(100); 51 | for (unsigned i = 1; i <= n; ++i) td.update(i); 52 | std::ofstream os("tdigest_float_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 53 | td.serialize(os); 54 | } 55 | } 56 | 57 | TEST_CASE("tdigest float generate with buffer", "[serialize_for_java]") { 58 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 59 | for (const unsigned n: n_arr) { 60 | tdigest_float td(100); 61 | for (unsigned i = 1; i <= n; ++i) td.update(i); 62 | std::ofstream os("tdigest_float_buf_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 63 | td.serialize(os, true); 64 | } 65 | } 66 | 67 | } /* namespace datasketches */ 68 | -------------------------------------------------------------------------------- /theta/test/theta_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | TEST_CASE("theta sketch generate", "[serialize_for_java]") { 27 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 28 | for (const unsigned n: n_arr) { 29 | auto sketch = update_theta_sketch::builder().build(); 30 | for (unsigned i = 0; i < n; ++i) sketch.update(i); 31 | REQUIRE(sketch.is_empty() == (n == 0)); 32 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); 33 | std::ofstream os("theta_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 34 | sketch.compact().serialize(os); 35 | } 36 | } 37 | 38 | TEST_CASE("theta sketch generate compressed", "[serialize_for_java]") { 39 | const unsigned n_arr[] = {10, 100, 1000, 10000, 100000, 1000000}; 40 | for (const unsigned n: n_arr) { 41 | auto sketch = update_theta_sketch::builder().build(); 42 | for (unsigned i = 0; i < n; ++i) sketch.update(i); 43 | REQUIRE_FALSE(sketch.is_empty()); 44 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); 45 | std::ofstream os("theta_compressed_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 46 | sketch.compact().serialize_compressed(os); 47 | } 48 | } 49 | 50 | TEST_CASE("theta sketch generate non-empty no entries", "[serialize_for_java]") { 51 | auto sketch = update_theta_sketch::builder().set_p(0.01).build(); 52 | // here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized) 53 | // and therefore gets rejected 54 | sketch.update(1); 55 | REQUIRE_FALSE(sketch.is_empty()); 56 | REQUIRE(sketch.get_num_retained() == 0); 57 | std::ofstream os("theta_non_empty_no_entries_cpp.sk", std::ios::binary); 58 | sketch.compact().serialize(os); 59 | } 60 | 61 | } /* namespace datasketches */ 62 | -------------------------------------------------------------------------------- /sampling/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # separate executables for var_opt and ebpps sampling 19 | 20 | # VAR OPT SAMPLING 21 | add_executable(var_opt_sampling_test) 22 | 23 | target_link_libraries(var_opt_sampling_test sampling common_test_lib) 24 | 25 | set_target_properties(var_opt_sampling_test PROPERTIES 26 | CXX_STANDARD_REQUIRED YES 27 | ) 28 | 29 | file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" SAMPLING_TEST_BINARY_PATH) 30 | string(APPEND SAMPLING_TEST_BINARY_PATH "/") 31 | target_compile_definitions(var_opt_sampling_test 32 | PRIVATE 33 | TEST_BINARY_INPUT_PATH="${SAMPLING_TEST_BINARY_PATH}" 34 | ) 35 | 36 | add_test( 37 | NAME var_opt_sampling_test 38 | COMMAND var_opt_sampling_test 39 | ) 40 | 41 | target_sources(var_opt_sampling_test 42 | PRIVATE 43 | var_opt_sketch_test.cpp 44 | var_opt_union_test.cpp 45 | var_opt_allocation_test.cpp 46 | ) 47 | 48 | 49 | # EBPPS SAMPLING 50 | add_executable(ebpps_sampling_test) 51 | 52 | target_link_libraries(ebpps_sampling_test sampling common_test_lib) 53 | 54 | set_target_properties(ebpps_sampling_test PROPERTIES 55 | CXX_STANDARD_REQUIRED YES 56 | ) 57 | 58 | target_compile_definitions(ebpps_sampling_test 59 | PRIVATE 60 | TEST_BINARY_INPUT_PATH="${SAMPLING_TEST_BINARY_PATH}" 61 | ) 62 | 63 | add_test( 64 | NAME ebpps_sampling_test 65 | COMMAND ebpps_sampling_test 66 | ) 67 | 68 | target_sources(ebpps_sampling_test 69 | PRIVATE 70 | ebpps_sample_test.cpp 71 | ebpps_sketch_test.cpp 72 | ebpps_allocation_test.cpp 73 | ) 74 | 75 | 76 | # Compatibility 77 | if (SERDE_COMPAT) 78 | target_sources(var_opt_sampling_test 79 | PRIVATE 80 | var_opt_sketch_deserialize_from_java_test.cpp 81 | var_opt_union_deserialize_from_java_test.cpp 82 | ) 83 | endif() 84 | 85 | if (GENERATE) 86 | target_sources(var_opt_sampling_test 87 | PRIVATE 88 | var_opt_sketch_serialize_for_java.cpp 89 | var_opt_union_serialize_for_java.cpp 90 | ) 91 | endif() 92 | -------------------------------------------------------------------------------- /common/test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # two parts here, the common test code for other parts to use, 19 | # and an integration test using the other parts of the library. 20 | 21 | # common dependencies for tests 22 | add_library(common_test_lib OBJECT "") 23 | 24 | include(FetchContent) 25 | 26 | FetchContent_Declare( 27 | Catch2 28 | GIT_REPOSITORY https://github.com/catchorg/Catch2 29 | GIT_TAG v2.13.9 30 | ) 31 | 32 | FetchContent_MakeAvailable(Catch2) 33 | 34 | target_link_libraries(common_test_lib PUBLIC Catch2::Catch2) 35 | 36 | set_target_properties(common_test_lib PROPERTIES 37 | CXX_STANDARD_REQUIRED YES 38 | ) 39 | 40 | target_include_directories(common_test_lib 41 | INTERFACE 42 | ${CMAKE_CURRENT_SOURCE_DIR} 43 | ) 44 | 45 | target_sources(common_test_lib 46 | INTERFACE 47 | ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.hpp 48 | ${CMAKE_CURRENT_SOURCE_DIR}/test_type.hpp 49 | PRIVATE 50 | ${CMAKE_CURRENT_SOURCE_DIR}/catch_runner.cpp 51 | ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.cpp 52 | ) 53 | 54 | add_executable(common_test) 55 | 56 | target_link_libraries(common_test common common_test_lib) 57 | 58 | set_target_properties(common_test PROPERTIES 59 | CXX_STANDARD 11 60 | CXX_STANDARD_REQUIRED YES 61 | ) 62 | 63 | add_test( 64 | NAME common_test 65 | COMMAND common_test 66 | ) 67 | 68 | target_sources(common_test 69 | PRIVATE 70 | quantiles_sorted_view_test.cpp 71 | optional_test.cpp 72 | ) 73 | 74 | # now the integration test part 75 | add_executable(integration_test) 76 | 77 | target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple common_test_lib) 78 | 79 | set_target_properties(integration_test PROPERTIES 80 | CXX_STANDARD 11 81 | CXX_STANDARD_REQUIRED YES 82 | ) 83 | 84 | add_test( 85 | NAME integration_test 86 | COMMAND integration_test 87 | ) 88 | 89 | target_sources(integration_test 90 | PRIVATE 91 | integration_test.cpp 92 | ) 93 | -------------------------------------------------------------------------------- /common/test/integration_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include "count_min.hpp" 23 | #include "cpc_sketch.hpp" 24 | #include "cpc_union.hpp" 25 | #include "density_sketch.hpp" 26 | #include "frequent_items_sketch.hpp" 27 | #include "hll.hpp" 28 | #include "kll_sketch.hpp" 29 | #include "req_sketch.hpp" 30 | #include "var_opt_sketch.hpp" 31 | #include "var_opt_union.hpp" 32 | #include "theta_sketch.hpp" 33 | #include "theta_union.hpp" 34 | #include "theta_intersection.hpp" 35 | #include "theta_a_not_b.hpp" 36 | #include "tuple_sketch.hpp" 37 | #include "tuple_union.hpp" 38 | #include "tuple_intersection.hpp" 39 | #include "tuple_a_not_b.hpp" 40 | 41 | namespace datasketches { 42 | 43 | template 44 | struct subtracting_intersection_policy { 45 | void operator()(Summary& summary, const Summary& other) const { 46 | summary -= other; 47 | } 48 | }; 49 | 50 | using tuple_intersection_float = tuple_intersection>; 51 | 52 | TEST_CASE("integration: declare all sketches", "[integration]") { 53 | count_min_sketch cm(5, 128); 54 | 55 | cpc_sketch cpc(12); 56 | cpc_union cpc_u(12); 57 | 58 | density_sketch ds(32, 3); 59 | 60 | frequent_items_sketch fi(100); 61 | 62 | hll_sketch hll(13); 63 | hll_union hll_u(13); 64 | 65 | kll_sketch kll(200); 66 | 67 | req_sketch req(12); 68 | 69 | var_opt_sketch vo(100); 70 | var_opt_union vo_u(100); 71 | 72 | update_theta_sketch theta = update_theta_sketch::builder().build(); 73 | theta_union theta_u = theta_union::builder().build(); 74 | theta_intersection theta_i; 75 | theta_a_not_b theta_anb; 76 | 77 | auto tuple = update_tuple_sketch::builder().build(); 78 | auto tuple_u = tuple_union::builder().build(); 79 | tuple_intersection_float tuple_i; 80 | tuple_a_not_b tuple_anb; 81 | } 82 | 83 | } /* namespace datasketches */ 84 | -------------------------------------------------------------------------------- /common/include/conditional_back_inserter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CONDITIONAL_BACK_INSERTER_HPP_ 21 | #define CONDITIONAL_BACK_INSERTER_HPP_ 22 | 23 | #include 24 | #include 25 | 26 | namespace datasketches { 27 | 28 | template 29 | class conditional_back_insert_iterator: public std::back_insert_iterator { 30 | public: 31 | template 32 | conditional_back_insert_iterator(Container& c, P&& p): std::back_insert_iterator(c), p(std::forward

(p)) {} 33 | 34 | // MSVC seems to insist on having copy constructor and assignment 35 | conditional_back_insert_iterator(const conditional_back_insert_iterator& other): 36 | std::back_insert_iterator(other), p(other.p) {} 37 | conditional_back_insert_iterator& operator=(const conditional_back_insert_iterator& other) { 38 | std::back_insert_iterator::operator=(other); 39 | p = other.p; 40 | return *this; 41 | } 42 | 43 | conditional_back_insert_iterator& operator=(const typename Container::value_type& value) { 44 | if (p(value)) std::back_insert_iterator::operator=(value); 45 | return *this; 46 | } 47 | 48 | conditional_back_insert_iterator& operator=(typename Container::value_type&& value) { 49 | if (p(value)) std::back_insert_iterator::operator=(std::move(value)); 50 | return *this; 51 | } 52 | 53 | conditional_back_insert_iterator& operator*() { return *this; } 54 | conditional_back_insert_iterator& operator++() { return *this; } 55 | conditional_back_insert_iterator& operator++(int) { return *this; } 56 | 57 | private: 58 | Predicate p; 59 | }; 60 | 61 | template 62 | conditional_back_insert_iterator conditional_back_inserter(Container& c, Predicate&& p) { 63 | return conditional_back_insert_iterator(c, std::forward(p)); 64 | } 65 | 66 | } /* namespace datasketches */ 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /tuple/test/aod_sketch_serialize_for_java.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include "array_of_doubles_sketch.hpp" 24 | 25 | namespace datasketches { 26 | 27 | TEST_CASE("aod sketch generate one value", "[serialize_for_java]") { 28 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 29 | for (const unsigned n: n_arr) { 30 | auto sketch = update_array_of_doubles_sketch::builder().build(); 31 | for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector(1, i)); 32 | REQUIRE(sketch.is_empty() == (n == 0)); 33 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); 34 | std::ofstream os("aod_1_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 35 | sketch.compact().serialize(os); 36 | } 37 | } 38 | 39 | TEST_CASE("aod sketch generate three values", "[serialize_for_java]") { 40 | const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000}; 41 | for (const unsigned n: n_arr) { 42 | auto sketch = update_array_of_doubles_sketch::builder(3).build(); 43 | for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector(3, i)); 44 | REQUIRE(sketch.is_empty() == (n == 0)); 45 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03)); 46 | std::ofstream os("aod_3_n" + std::to_string(n) + "_cpp.sk", std::ios::binary); 47 | sketch.compact().serialize(os); 48 | } 49 | } 50 | 51 | TEST_CASE("aod sketch generate non-empty no entries", "[serialize_for_java]") { 52 | auto sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build(); 53 | // here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized) 54 | // and therefore gets rejected 55 | sketch.update(1, std::vector({1})); 56 | REQUIRE_FALSE(sketch.is_empty()); 57 | REQUIRE(sketch.get_num_retained() == 0); 58 | std::ofstream os("aod_1_non_empty_no_entries_cpp.sk", std::ios::binary); 59 | sketch.compact().serialize(os); 60 | } 61 | 62 | } /* namespace datasketches */ 63 | -------------------------------------------------------------------------------- /common/test/optional_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | 24 | #include "optional.hpp" 25 | 26 | namespace datasketches { 27 | 28 | class tt { 29 | public: 30 | tt() = delete; // make sure it cannot be default constructed 31 | tt(int val): val_(val) {} 32 | tt(const tt& other): val_(other.val_) { std::cout << "tt copy constructor\n"; } 33 | tt(tt&& other): val_(other.val_) { std::cout << "tt move constructor\n"; } 34 | tt& operator=(const tt& other) { val_ = other.val_; std::cout << "tt copy assignment\n"; return *this; } 35 | tt& operator=(tt&& other) { val_ = other.val_; std::cout << "tt move assignment\n"; return *this; } 36 | int get_val() const { return val_; } 37 | private: 38 | int val_; 39 | }; 40 | 41 | TEST_CASE("optional", "[common]") { 42 | optional opt; 43 | REQUIRE_FALSE(opt); 44 | opt.emplace(5); 45 | REQUIRE(bool(opt)); 46 | REQUIRE((*opt).get_val() == 5); 47 | REQUIRE(opt->get_val() == 5); 48 | opt.reset(); 49 | REQUIRE_FALSE(opt); 50 | 51 | optional opt2(opt); 52 | REQUIRE_FALSE(opt2); 53 | 54 | opt2.emplace(3); 55 | if (opt2) *opt2 = 6; // good if it is initialized 56 | REQUIRE(opt2->get_val() == 6); 57 | 58 | opt.reset(); 59 | REQUIRE_FALSE(opt); 60 | optional opt3(std::move(opt)); 61 | REQUIRE_FALSE(opt3); 62 | *opt3 = 7; // don't do this! may be dangerous for arbitrary T, and it still thinks it is not initialized 63 | REQUIRE_FALSE(opt3); 64 | opt3.emplace(8); 65 | REQUIRE(bool(opt3)); 66 | REQUIRE(opt3->get_val() == 8); 67 | 68 | std::swap(opt2, opt3); 69 | REQUIRE(opt2->get_val() == 8); 70 | REQUIRE(opt3->get_val() == 6); 71 | 72 | std::swap(opt2, opt); 73 | REQUIRE_FALSE(opt2); 74 | REQUIRE(bool(opt)); 75 | REQUIRE(opt->get_val() == 8); 76 | } 77 | 78 | TEST_CASE("optional conversion", "[common]") { 79 | optional opt_f(1); 80 | optional opt_d(opt_f); 81 | REQUIRE(bool(opt_d)); 82 | REQUIRE(*opt_d == static_cast(*opt_f)); 83 | } 84 | 85 | } /* namespace datasketches */ 86 | -------------------------------------------------------------------------------- /filters/test/bloom_filter_allocation_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | 24 | #include "bloom_filter.hpp" 25 | #include "test_type.hpp" 26 | #include "test_allocator.hpp" 27 | 28 | namespace datasketches { 29 | 30 | using bloom_filter_test_alloc = bloom_filter_alloc>; 31 | using alloc = test_allocator; 32 | 33 | TEST_CASE("bloom filter allocation test", "[bloom_filter][test_type]") { 34 | test_allocator_total_bytes = 0; 35 | test_allocator_net_allocations = 0; 36 | { 37 | int64_t num_items = 10000; 38 | double fpp = 0.01; 39 | uint64_t seed = bloom_filter_test_alloc::builder::generate_random_seed(); 40 | auto bf1 = bloom_filter_test_alloc::builder::create_by_accuracy(num_items, 41 | fpp, 42 | seed, 43 | alloc(0)); 44 | for (int i = 0; i < num_items; ++i) { 45 | if (num_items % 1 == 0) { 46 | bf1.update(std::to_string(i)); 47 | } else { 48 | bf1.update(i); 49 | } 50 | } 51 | auto bytes1 = bf1.serialize(0); 52 | auto bf2 = bloom_filter_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0); 53 | 54 | std::stringstream ss; 55 | bf1.serialize(ss); 56 | auto bf3 = bloom_filter_test_alloc::deserialize(ss, alloc(0)); 57 | 58 | bf3.reset(); 59 | for (int i = 0; i < num_items; ++i) { 60 | bf1.update(-1.0 * i); 61 | } 62 | 63 | bf3.union_with(bf1); 64 | 65 | auto bytes2 = bf3.serialize(0); 66 | auto bf4 = bloom_filter_test_alloc::deserialize(bytes2.data(), bytes2.size(), 0); 67 | 68 | auto bf5 = bloom_filter_test_alloc::wrap(bytes2.data(), bytes2.size(), 0); 69 | auto bf6 = bloom_filter_test_alloc::writable_wrap(bytes2.data(), bytes2.size(), 0); 70 | } 71 | REQUIRE(test_allocator_total_bytes == 0); 72 | REQUIRE(test_allocator_net_allocations == 0); 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /tuple/include/array_tuple_union.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef ARRAY_TUPLE_UNION_HPP_ 21 | #define ARRAY_TUPLE_UNION_HPP_ 22 | 23 | #include 24 | #include 25 | #include "array_tuple_sketch.hpp" 26 | 27 | #include "tuple_union.hpp" 28 | 29 | namespace datasketches { 30 | 31 | /// default array tuple union policy 32 | template 33 | struct default_array_tuple_union_policy { 34 | default_array_tuple_union_policy(uint8_t num_values = 1): num_values_(num_values) {} 35 | 36 | void operator()(Array& array, const Array& other) const { 37 | for (uint8_t i = 0; i < num_values_; ++i) { 38 | array[i] += other[i]; 39 | } 40 | } 41 | uint8_t get_num_values() const { 42 | return num_values_; 43 | } 44 | private: 45 | uint8_t num_values_; 46 | }; 47 | 48 | /// array tuple union 49 | template< 50 | typename Array, 51 | typename Policy = default_array_tuple_union_policy, 52 | typename Allocator = typename Array::allocator_type 53 | > 54 | class array_tuple_union: public tuple_union { 55 | public: 56 | using value_type = typename Array::value_type; 57 | using Base = tuple_union; 58 | using CompactSketch = compact_array_tuple_sketch; 59 | using resize_factor = theta_constants::resize_factor; 60 | 61 | class builder; 62 | 63 | CompactSketch get_result(bool ordered = true) const; 64 | 65 | private: 66 | // for builder 67 | array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator); 68 | }; 69 | 70 | template 71 | class array_tuple_union::builder: public tuple_base_builder { 72 | public: 73 | builder(const Policy& policy = Policy(), const Allocator& allocator = Allocator()); 74 | array_tuple_union build() const; 75 | }; 76 | 77 | } /* namespace datasketches */ 78 | 79 | #include "array_tuple_union_impl.hpp" 80 | 81 | #endif 82 | -------------------------------------------------------------------------------- /common/include/conditional_forward.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef CONDITIONAL_FORWARD_HPP_ 21 | #define CONDITIONAL_FORWARD_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | // Forward type T2 as rvalue reference if type T1 is rvalue reference 28 | 29 | template 30 | using fwd_type = typename std::conditional::value, 31 | T2, typename std::remove_reference::type&&>::type; 32 | 33 | template 34 | fwd_type conditional_forward(T2&& value) { 35 | return std::forward>(std::forward(value)); 36 | } 37 | 38 | // Forward container as iterators 39 | 40 | template 41 | auto forward_begin(Container&& c) -> typename std::enable_if< 42 | std::is_lvalue_reference::value || 43 | std::is_same::type::const_iterator, decltype(c.begin())>::value, 44 | decltype(c.begin()) 45 | >::type 46 | { 47 | return c.begin(); 48 | } 49 | 50 | template 51 | auto forward_begin(Container&& c) -> typename std::enable_if< 52 | !std::is_lvalue_reference::value && 53 | !std::is_same::type::const_iterator, decltype(c.begin())>::value, 54 | decltype(std::make_move_iterator(c.begin())) 55 | >::type 56 | { 57 | return std::make_move_iterator(c.begin()); 58 | } 59 | 60 | template 61 | auto forward_end(Container&& c) -> typename std::enable_if< 62 | std::is_lvalue_reference::value || 63 | std::is_same::type::const_iterator, decltype(c.begin())>::value, 64 | decltype(c.end()) 65 | >::type 66 | { 67 | return c.end(); 68 | } 69 | 70 | template 71 | auto forward_end(Container&& c) -> typename std::enable_if< 72 | !std::is_lvalue_reference::value && 73 | !std::is_same::type::const_iterator, decltype(c.begin())>::value, 74 | decltype(std::make_move_iterator(c.end())) 75 | >::type 76 | { 77 | return std::make_move_iterator(c.end()); 78 | } 79 | 80 | } /* namespace datasketches */ 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /hll/test/AuxHashMapTest.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include "AuxHashMap.hpp" 25 | 26 | namespace datasketches { 27 | 28 | TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") { 29 | AuxHashMap>* map = new AuxHashMap>(3, 7, std::allocator()); 30 | map->mustAdd(100, 5); 31 | int val = map->mustFindValueFor(100); 32 | REQUIRE(val == 5); 33 | 34 | map->mustReplace(100, 10); 35 | val = map->mustFindValueFor(100); 36 | REQUIRE(val == 10); 37 | 38 | REQUIRE_THROWS_AS(map->mustReplace(101, 5), std::invalid_argument); 39 | 40 | delete map; 41 | } 42 | 43 | TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") { 44 | auto map = std::unique_ptr>, std::function>*)>>( 45 | AuxHashMap>::newAuxHashMap(3, 7, std::allocator()), 46 | AuxHashMap>::make_deleter() 47 | ); 48 | REQUIRE(map->getLgAuxArrInts() == 3); 49 | for (uint8_t i = 1; i <= 7; ++i) { 50 | map->mustAdd(i, i); 51 | } 52 | REQUIRE(map->getLgAuxArrInts() == 4); 53 | auto itr = map->begin(true); 54 | int count1 = 0; 55 | int count2 = 0; 56 | while (itr != map->end()) { 57 | ++count2; 58 | int pair = *itr; 59 | if (pair != 0) { ++count1; } 60 | ++itr; 61 | } 62 | REQUIRE(count1 == 7); 63 | REQUIRE(count2 == 16); 64 | } 65 | 66 | TEST_CASE("aux hash map: check exception must find value for", "[aux_hash_map]") { 67 | AuxHashMap> map(3, 7, std::allocator()); 68 | map.mustAdd(100, 5); 69 | REQUIRE_THROWS_AS(map.mustFindValueFor(101), std::invalid_argument); 70 | } 71 | 72 | TEST_CASE("aux hash map: check exception must add", "[aux_hash_map]") { 73 | AuxHashMap>* map = AuxHashMap>::newAuxHashMap(3, 7, std::allocator()); 74 | map->mustAdd(100, 5); 75 | REQUIRE_THROWS_AS(map->mustAdd(100, 6), std::invalid_argument); 76 | 77 | AuxHashMap>::make_deleter()(map); 78 | } 79 | 80 | } /* namespace datasketches */ 81 | -------------------------------------------------------------------------------- /theta/include/compact_theta_sketch_parser.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef COMPACT_THETA_SKETCH_PARSER_HPP_ 21 | #define COMPACT_THETA_SKETCH_PARSER_HPP_ 22 | 23 | #include 24 | 25 | namespace datasketches { 26 | 27 | template 28 | class compact_theta_sketch_parser { 29 | public: 30 | struct compact_theta_sketch_data { 31 | bool is_empty; 32 | bool is_ordered; 33 | uint16_t seed_hash; 34 | uint32_t num_entries; 35 | uint64_t theta; 36 | const void* entries_start_ptr; 37 | uint8_t entry_bits; 38 | }; 39 | 40 | static compact_theta_sketch_data parse(const void* ptr, size_t size, uint64_t seed, bool dump_on_error = false); 41 | 42 | private: 43 | // offsets are in sizeof(type) 44 | static const size_t COMPACT_SKETCH_PRE_LONGS_BYTE = 0; 45 | static const size_t COMPACT_SKETCH_SERIAL_VERSION_BYTE = 1; 46 | static const size_t COMPACT_SKETCH_TYPE_BYTE = 2; 47 | static const size_t COMPACT_SKETCH_FLAGS_BYTE = 5; 48 | static const size_t COMPACT_SKETCH_SEED_HASH_U16 = 3; 49 | static const size_t COMPACT_SKETCH_SINGLE_ENTRY_U64 = 1; // ver 3 50 | static const size_t COMPACT_SKETCH_NUM_ENTRIES_U32 = 2; // ver 1-3 51 | static const size_t COMPACT_SKETCH_ENTRIES_EXACT_U64 = 2; // ver 1-3 52 | static const size_t COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 = 3; // ver 1-3 53 | static const size_t COMPACT_SKETCH_THETA_U64 = 2; // ver 1-3 54 | static const size_t COMPACT_SKETCH_V4_ENTRY_BITS_BYTE = 3; 55 | static const size_t COMPACT_SKETCH_V4_NUM_ENTRIES_BYTES_BYTE = 4; 56 | static const size_t COMPACT_SKETCH_V4_THETA_U64 = 1; 57 | static const size_t COMPACT_SKETCH_V4_PACKED_DATA_EXACT_BYTE = 8; 58 | static const size_t COMPACT_SKETCH_V4_PACKED_DATA_ESTIMATION_BYTE = 16; 59 | 60 | static const uint8_t COMPACT_SKETCH_IS_EMPTY_FLAG = 2; 61 | static const uint8_t COMPACT_SKETCH_IS_ORDERED_FLAG = 4; 62 | 63 | static const uint8_t COMPACT_SKETCH_TYPE = 3; 64 | 65 | static void check_memory_size(const void* ptr, size_t actual_bytes, size_t expected_bytes, bool dump_on_error); 66 | static std::string hex_dump(const uint8_t* ptr, size_t size); 67 | }; 68 | 69 | } /* namespace datasketches */ 70 | 71 | #include "compact_theta_sketch_parser_impl.hpp" 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /hll/test/hll_sketch_deserialize_from_java_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace datasketches { 25 | 26 | // assume the binary sketches for this test have been generated by datasketches-java code 27 | // in the subdirectory called "java" in the root directory of this project 28 | static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/"; 29 | 30 | TEST_CASE("hll4 sketch", "[serde_compat]") { 31 | const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; 32 | for (const unsigned n: n_arr) { 33 | std::ifstream is; 34 | is.exceptions(std::ios::failbit | std::ios::badbit); 35 | is.open(testBinaryInputPath + "hll4_n" + std::to_string(n) + "_java.sk", std::ios::binary); 36 | const auto sketch = hll_sketch::deserialize(is); 37 | REQUIRE(sketch.get_lg_config_k() == 12); 38 | REQUIRE(sketch.is_empty() == (n == 0)); 39 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); 40 | } 41 | } 42 | 43 | TEST_CASE("hll6 sketch", "[serde_compat]") { 44 | const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; 45 | for (const unsigned n: n_arr) { 46 | std::ifstream is; 47 | is.exceptions(std::ios::failbit | std::ios::badbit); 48 | is.open(testBinaryInputPath + "hll6_n" + std::to_string(n) + "_java.sk", std::ios::binary); 49 | const auto sketch = hll_sketch::deserialize(is); 50 | REQUIRE(sketch.get_lg_config_k() == 12); 51 | REQUIRE(sketch.is_empty() == (n == 0)); 52 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); 53 | } 54 | } 55 | 56 | TEST_CASE("hll8 sketch", "[serde_compat]") { 57 | const unsigned n_arr[] = {0, 10, 100, 1000, 10000, 100000, 1000000}; 58 | for (const unsigned n: n_arr) { 59 | std::ifstream is; 60 | is.exceptions(std::ios::failbit | std::ios::badbit); 61 | is.open(testBinaryInputPath + "hll8_n" + std::to_string(n) + "_java.sk", std::ios::binary); 62 | const auto sketch = hll_sketch::deserialize(is); 63 | REQUIRE(sketch.get_lg_config_k() == 12); 64 | REQUIRE(sketch.is_empty() == (n == 0)); 65 | REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.02)); 66 | } 67 | } 68 | 69 | } /* namespace datasketches */ 70 | -------------------------------------------------------------------------------- /cpc/include/u32_table.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | // author Kevin Lang, Oath Research 21 | 22 | #ifndef U32_TABLE_HPP_ 23 | #define U32_TABLE_HPP_ 24 | 25 | // This is a highly specialized hash table that was designed 26 | // to be a part of the library's CPC sketch implementation 27 | 28 | #include "cpc_common.hpp" 29 | 30 | namespace datasketches { 31 | 32 | static const uint32_t U32_TABLE_UPSIZE_NUMER = 3LL; 33 | static const uint32_t U32_TABLE_UPSIZE_DENOM = 4LL; 34 | 35 | static const uint32_t U32_TABLE_DOWNSIZE_NUMER = 1LL; 36 | static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL; 37 | 38 | template 39 | class u32_table { 40 | public: 41 | using vector_u32 = std::vector::template rebind_alloc>; 42 | 43 | u32_table(const A& allocator); 44 | u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator); 45 | 46 | inline uint32_t get_num_items() const; 47 | inline const uint32_t* get_slots() const; 48 | inline uint8_t get_lg_size() const; 49 | inline void clear(); 50 | 51 | // returns true iff the item was new and was therefore added to the table 52 | inline bool maybe_insert(uint32_t item); 53 | // returns true iff the item was present and was therefore removed from the table 54 | inline bool maybe_delete(uint32_t item); 55 | 56 | static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator); 57 | 58 | vector_u32 unwrapping_get_items() const; 59 | 60 | static void merge( 61 | const uint32_t* arr_a, size_t start_a, size_t length_a, // input 62 | const uint32_t* arr_b, size_t start_b, size_t length_b, // input 63 | uint32_t* arr_c, size_t start_c // output 64 | ); 65 | 66 | static void introspective_insertion_sort(uint32_t* a, size_t l, size_t r); 67 | static void knuth_shell_sort3(uint32_t* a, size_t l, size_t r); 68 | 69 | private: 70 | 71 | uint8_t lg_size; // log2 of number of slots 72 | uint8_t num_valid_bits; 73 | uint32_t num_items; 74 | vector_u32 slots; 75 | 76 | inline uint32_t lookup(uint32_t item) const; 77 | inline void must_insert(uint32_t item); 78 | inline void rebuild(uint8_t new_lg_size); 79 | }; 80 | 81 | } /* namespace datasketches */ 82 | 83 | #include "u32_table_impl.hpp" 84 | 85 | #endif 86 | -------------------------------------------------------------------------------- /common/include/kolmogorov_smirnov.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef KOLMOGOROV_SMIRNOV_HPP_ 21 | #define KOLMOGOROV_SMIRNOV_HPP_ 22 | 23 | namespace datasketches { 24 | 25 | /** 26 | * Kolmogorov-Smirnov test for KLL or Quantiles sketches 27 | */ 28 | class kolmogorov_smirnov { 29 | public: 30 | /** 31 | * Computes the raw delta area between two quantile sketches for the Kolmogorov-Smirnov Test. 32 | * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T. 33 | * @param sketch1 sketch 1 34 | * @param sketch2 sketch 2 35 | * @return the raw delta between two KLL quantile sketches 36 | */ 37 | template 38 | static double delta(const Sketch& sketch1, const Sketch& sketch2); 39 | 40 | /** 41 | * Computes the adjusted delta area threshold for the Kolmogorov-Smirnov Test. 42 | * Adjusts the computed threshold by the error epsilons of the two given sketches. 43 | * See Kolmogorov–Smirnov Test 44 | * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T. 45 | * @param sketch1 sketch 1 46 | * @param sketch2 sketch 2 47 | * @param p Target p-value. Typically .001 to .1, e.g., .05. 48 | * @return the adjusted threshold to be compared with the raw delta 49 | */ 50 | template 51 | static double threshold(const Sketch& sketch1, const Sketch& sketch2, double p); 52 | 53 | /** 54 | * Performs the Kolmogorov-Smirnov Test between two quantile sketches. 55 | * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T. 56 | * Note: if the given sketches have insufficient data or if the sketch sizes are too small, 57 | * this will return false. 58 | * @param sketch1 sketch 1 59 | * @param sketch2 sketch 2 60 | * @param p Target p-value. Typically .001 to .1, e.g., .05. 61 | * @return Boolean indicating whether we can reject the null hypothesis (that the sketches 62 | * reflect the same underlying distribution) using the provided p-value. 63 | */ 64 | template 65 | static bool test(const Sketch& sketch1, const Sketch& sketch2, double p); 66 | }; 67 | 68 | } /* namespace datasketches */ 69 | 70 | #include "kolmogorov_smirnov_impl.hpp" 71 | 72 | #endif 73 | --------------------------------------------------------------------------------