├── .github └── workflows │ └── ci.yml ├── .gitignore ├── 00-RELEASENOTES ├── Cargo.toml ├── LICENSE ├── QUICK_START.md ├── README.md ├── build.sh ├── requirements.txt ├── src ├── bloom │ ├── command_handler.rs │ ├── data_type.rs │ ├── mod.rs │ └── utils.rs ├── commands │ ├── bf.add.json │ ├── bf.card.json │ ├── bf.exists.json │ ├── bf.info.json │ ├── bf.insert.json │ ├── bf.load.json │ ├── bf.madd.json │ ├── bf.mexists.json │ └── bf.reserve.json ├── configs.rs ├── lib.rs ├── metrics.rs └── wrapper │ ├── bloom_callback.rs │ └── mod.rs └── tests ├── conftest.py ├── test_bloom_acl_category.py ├── test_bloom_aofrewrite.py ├── test_bloom_basic.py ├── test_bloom_command.py ├── test_bloom_correctness.py ├── test_bloom_defrag.py ├── test_bloom_keyspace.py ├── test_bloom_metrics.py ├── test_bloom_replication.py ├── test_bloom_save_and_restore.py ├── test_bloom_valkeypy_compatibility.py └── valkey_bloom_test_case.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | VALKEY_REPO_URL: https://github.com/valkey-io/valkey.git 10 | TEST_FRAMEWORK_REPO: https://github.com/valkey-io/valkey-test-framework 11 | TEST_FRAMEWORK_DIR: tests/build/valkeytestframework 12 | 13 | jobs: 14 | build-ubuntu-latest: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | server_version: ['unstable', '8.0', '8.1'] 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set the server verison for python integeration tests 23 | run: echo "SERVER_VERSION=${{ matrix.server_version }}" >> $GITHUB_ENV 24 | - name: Run cargo and clippy format check 25 | run: | 26 | cargo fmt --check 27 | cargo clippy --profile release --all-targets 28 | - name: Release Build 29 | run: | 30 | if [ "${{ matrix.server_version }}" = "8.0" ]; then 31 | cargo build --all --all-targets --release --features valkey_8_0 32 | else 33 | cargo build --all --all-targets --release 34 | fi 35 | - name: Run unit tests 36 | run: cargo test --features enable-system-alloc 37 | - name: Make valkey-server binary 38 | run: | 39 | mkdir -p "tests/build/binaries/${{ matrix.server_version }}" 40 | cd tests/build 41 | git clone "${{ env.VALKEY_REPO_URL }}" 42 | cd valkey 43 | git checkout ${{ matrix.server_version }} 44 | make -j 45 | cp src/valkey-server ../binaries/${{ matrix.server_version }}/ 46 | - name: Set up test framework 47 | run: | 48 | echo "Cloning test framework..." 49 | git clone ${{ env.TEST_FRAMEWORK_REPO }} 50 | mkdir -p ${{ env.TEST_FRAMEWORK_DIR }} 51 | mv valkey-test-framework/src/* ${{ env.TEST_FRAMEWORK_DIR }}/ 52 | rm -rf valkey-test-framework 53 | - name: Set up Python 54 | uses: actions/setup-python@v3 55 | with: 56 | python-version: '3.8' 57 | - name: Install dependencies 58 | run: | 59 | python -m pip install --upgrade pip 60 | pip install -r requirements.txt 61 | - name: Update module path 62 | run: echo "MODULE_PATH=$(realpath target/release/libvalkey_bloom.so)" >> $GITHUB_ENV 63 | - name: Run integration tests 64 | run: python -m pytest --cache-clear -v "tests/" 65 | 66 | build-macos-latest: 67 | runs-on: macos-latest 68 | steps: 69 | - uses: actions/checkout@v4 70 | - name: Run cargo and clippy format check 71 | run: | 72 | cargo fmt --check 73 | cargo clippy --profile release --all-targets 74 | - name: Release Build 75 | run: cargo build --all --all-targets --release 76 | - name: Run unit tests 77 | run: cargo test --features enable-system-alloc 78 | 79 | asan-build: 80 | runs-on: ubuntu-latest 81 | strategy: 82 | fail-fast: false 83 | matrix: 84 | server_version: ['unstable', '8.0', '8.1'] 85 | steps: 86 | - uses: actions/checkout@v4 87 | - name: Set the server verison for python integeration tests 88 | run: echo "SERVER_VERSION=${{ matrix.server_version }}" >> $GITHUB_ENV 89 | - name: Run cargo and clippy format check 90 | run: | 91 | cargo fmt --check 92 | cargo clippy --profile release --all-targets 93 | - name: Release Build 94 | run: | 95 | if [ "${{ matrix.server_version }}" = "8.0" ]; then 96 | cargo build --all --all-targets --release --features valkey_8_0 97 | else 98 | cargo build --all --all-targets --release 99 | fi 100 | - name: Run unit tests 101 | run: cargo test --features enable-system-alloc 102 | - name: Make Valkey-server binary with asan 103 | run: | 104 | mkdir -p "tests/build/binaries/${{ matrix.server_version }}" 105 | cd tests/build 106 | git clone "${{ env.VALKEY_REPO_URL }}" 107 | cd valkey 108 | git checkout ${{ matrix.server_version }} 109 | make distclean 110 | make -j SANITIZER=address SERVER_CFLAGS='-Werror' BUILD_TLS=module 111 | cp src/valkey-server ../binaries/${{ matrix.server_version }}/ 112 | - name: Set up test framework 113 | run: | 114 | echo "Cloning test framework..." 115 | git clone ${{ env.TEST_FRAMEWORK_REPO }} 116 | mkdir -p ${{ env.TEST_FRAMEWORK_DIR }} 117 | mv valkey-test-framework/src/* ${{ env.TEST_FRAMEWORK_DIR }}/ 118 | rm -rf valkey-test-framework 119 | - name: Set up Python 120 | uses: actions/setup-python@v3 121 | with: 122 | python-version: '3.8' 123 | - name: Install dependencies 124 | run: | 125 | python -m pip install --upgrade pip 126 | pip install -r requirements.txt 127 | - name: Update module path 128 | run: echo "MODULE_PATH=$(realpath target/release/libvalkey_bloom.so)" >> $GITHUB_ENV 129 | - name: Run integration tests 130 | run: | 131 | python -m pytest --capture=sys --cache-clear -v "tests/" -m "not skip_for_asan" 2>&1 | tee test_output.tmp 132 | 133 | if grep -q "LeakSanitizer: detected memory leaks" test_output.tmp; then 134 | RED='\033[0;31m' 135 | echo -e "${RED}Memory leaks detected in the following tests:" 136 | LEAKING_TESTS=$(grep -B 2 "LeakSanitizer: detected memory leaks" test_output.tmp | \ 137 | grep -v "LeakSanitizer" | \ 138 | grep ".*\.py::") 139 | LEAK_COUNT=$(echo "$LEAKING_TESTS" | wc -l) 140 | echo "$LEAKING_TESTS" | while read -r line; do 141 | echo "::error::Test with leak: $line" 142 | done 143 | echo -e "\n$LEAK_COUNT python integration tests have leaks detected in them" 144 | rm test_output.tmp 145 | exit 1 146 | fi 147 | rm test_output.tmp 148 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | target 3 | tests/build 4 | __pycache__ 5 | .pytest_cache 6 | test-data 7 | .attach_pid* 8 | src/commands/commands 9 | -------------------------------------------------------------------------------- /00-RELEASENOTES: -------------------------------------------------------------------------------- 1 | Hello! This file is just a placeholder, since this is the "unstable" branch 2 | of Valkey-Bloom, the place where all the development happens. 3 | 4 | There is no release notes for this branch, it gets forked into another branch 5 | every time there is a partial feature freeze in order to eventually create 6 | a new stable release. 7 | 8 | Usually "unstable" is stable enough for you to use it in development environments 9 | however you should never use it in production environments. You can find the 10 | latest stable release here: 11 | 12 | https://github.com/valkey-io/valkey-bloom/releases 13 | 14 | Happy hacking! 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "valkey-bloom" 3 | authors = ["Karthik Subbarao"] 4 | version = "99.99.99-dev" 5 | edition = "2021" 6 | license = "BSD-3-Clause" 7 | repository = "https://github.com/valkey-io/valkey-bloom" 8 | readme = "README.md" 9 | description = "A bloom filter module for Valkey" 10 | homepage = "https://github.com/valkey-io/valkey-bloom" 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | valkey-module = { version = "0.1.5", features = ["min-valkey-compatibility-version-8-0", "min-redis-compatibility-version-7-2"]} 16 | valkey-module-macros = "0" 17 | linkme = "0" 18 | bloomfilter = { version = "3.0.1", features = ["serde"] } 19 | lazy_static = "1.4.0" 20 | libc = "0.2" 21 | serde = { version = "1.0", features = ["derive"] } 22 | bincode = "1.3" 23 | 24 | [dev-dependencies] 25 | rand = "0.8" 26 | rstest = "0.23.0" 27 | 28 | [lib] 29 | crate-type = ["cdylib"] 30 | name = "valkey_bloom" 31 | 32 | [profile.dev] 33 | opt-level = 0 34 | debug = 2 35 | debug-assertions = true 36 | 37 | [features] 38 | default = ["min-valkey-compatibility-version-8-0"] 39 | enable-system-alloc = ["valkey-module/enable-system-alloc"] 40 | min-valkey-compatibility-version-8-0 = [] 41 | valkey_8_0 = [] # Valkey-bloom is intended to be loaded on server versions >= Valkey 8.1 and by default it is built this way (unless this flag is provided). It is however compatible with Valkey version 8.0 if the user explicitly provides this feature flag in their cargo build command. 42 | use-redismodule-api = [] # We don't support this feature flag which is why it is empty. 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024-present, Valkey contributors 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | -------------------------------------------------------------------------------- /QUICK_START.md: -------------------------------------------------------------------------------- 1 | # Quick Start 2 | 3 | Follow these steps to set up, build, and run the Valkey server with the valkey-bloom module. This guide will walk you through creating a bloom filter, inserting items, and checking for items in the filters. 4 | 5 | ## Step 1: Install Valkey and valkey-bloom 6 | 7 | 1. Build Valkey from source by following the instructions [here](https://github.com/valkey-io/valkey?tab=readme-ov-file#building-valkey-using-makefile). Make sure to use Valkey version 8.0 or above. 8 | 9 | 2. Build the valkey-bloom module from source by following the instructions [here](https://github.com/valkey-io/valkey-bloom/blob/unstable/README.md#build-instructions). 10 | 11 | ## Step 2: Run the Valkey Server with valkey-bloom 12 | 13 | Once valkey-bloom is built, run the Valkey server with the module loaded: 14 | 15 | In case of Linux: 16 | ```bash 17 | ./valkey-server --loadmodule ./target/release/libvalkey_bloom.so 18 | ``` 19 | 20 | You should see the Valkey server start, and it will be ready to accept commands. 21 | 22 | ## Step 3: Create a Bloom Filter 23 | 24 | Start a Valkey CLI session: 25 | 26 | ```bash 27 | valkey-cli 28 | ``` 29 | 30 | Create a bloom filter using the BF.ADD, BF.INSERT, BF.RESERVE or BF.MADD commands. For example: 31 | 32 | ```bash 33 | BF.ADD filter-key item-val 34 | ``` 35 | 36 | - `filter-key` is the name of the bloom filter we will be operating on 37 | - `item-val` is the item we are inserting into the bloom filter 38 | 39 | ## Step 4: Insert some more items 40 | 41 | To insert items on an already created filter, use the `BF.ADD`, `BF.MADD` or `BF.INSERT` commands: 42 | 43 | ```bash 44 | BF.ADD filter-key example 45 | BF.MADD filter-key example1 example2 46 | ``` 47 | 48 | Replace the example with the actual items you want to add. 49 | 50 | ## Step 5: Check if items are present 51 | 52 | Now that you've created a bloom filter and inserted items, you can check what items have been added. Use the `BF.EXISTS` or `BF.MEXISTS` commands to check for items: 53 | 54 | ```bash 55 | BF.EXISTS filter-key example 56 | ``` 57 | 58 | This command checks if an item is present in a bloom filter. Bloom filters can have false positives, but no false negatives. This means that if the BF.EXISTS command returns 0, then the item is not present. But if the BF.EXISTS command returns 1, there is a possibility (determined by false positive rate) that the item is not actually present. 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # valkey-bloom 2 | 3 | Valkey-Bloom (BSD-3-Clause) is a Rust based Valkey-Module which brings a Bloom Filter (Module) data type into Valkey and supports verions >= 8.0. With this, users can create bloom filters (space efficient probabilistic data structures) to add elements, check whether elements exists, auto scale their filters, customize bloom filter properties, perform RDB Save and load operations, etc. 4 | 5 | Valkey-Bloom is built using `bloomfilter::Bloom` (https://crates.io/crates/bloomfilter which has a BSD-2-Clause license). 6 | 7 | It is API compatible with the bloom filter command syntax of the official Valkey client libraries including valkey-py, valkey-java, valkey-go (as well as the equivalent Redis libraries) 8 | 9 | ## Supported commands 10 | ``` 11 | BF.EXISTS 12 | BF.ADD 13 | BF.MEXISTS 14 | BF.MADD 15 | BF.CARD 16 | BF.RESERVE 17 | BF.INFO 18 | BF.INSERT 19 | BF.LOAD 20 | ``` 21 | 22 | ## Build instructions 23 | ``` 24 | curl https://sh.rustup.rs -sSf | sh 25 | sudo yum install clang 26 | git clone https://github.com/valkey-io/valkey-bloom.git 27 | cd valkey-bloom 28 | # Building for Valkey 8.1 and above: 29 | cargo build --release 30 | # Building for Valkey 8.0 specifically: 31 | cargo build --release --features valkey_8_0 32 | valkey-server --loadmodule ./target/release/libvalkey_bloom.so 33 | ``` 34 | 35 | #### Local development script to build, run format checks, run unit / integration tests, and for cargo release: 36 | ``` 37 | # Builds the valkey-server (unstable) for integration testing. 38 | SERVER_VERSION=unstable 39 | ./build.sh 40 | # Same as above, but uses valkey-server (8.0.0) for integration testing. 41 | SERVER_VERSION=8.0.0 42 | ./build.sh 43 | # Build with asan, you may need to remove the old valkey binary if you have used ./build.sh before. You can do this by deleting the `.build` folder in the `tests` folder 44 | ASAN_BUILD=true 45 | ./build.sh 46 | ``` 47 | 48 | ## Load the Module 49 | To test the module with a Valkey, you can load the module in the following ways: 50 | 51 | #### Using valkey.conf: 52 | ``` 53 | 1. Add the following to valkey.conf: 54 | loadmodule /path/to/libvalkey_bloom.so 55 | 2. Start valkey-server: 56 | valkey-server /path/to/valkey.conf 57 | ``` 58 | 59 | #### Starting Valkey with the `--loadmodule` option: 60 | ```text 61 | valkey-server --loadmodule /path/to/libvalkey_bloom.so 62 | ``` 63 | 64 | #### Using the Valkey command `MODULE LOAD`: 65 | ``` 66 | 1. Connect to a running Valkey instance using valkey-cli 67 | 2. Execute Valkey command: 68 | MODULE LOAD /path/to/libvalkey_bloom.so 69 | ``` 70 | ## Feature Flags 71 | 72 | * valkey_8_0: valkey-bloom is intended to be loaded on server versions >= Valkey 8.1 and by default it is built this way (unless this flag is provided). It is however compatible with Valkey version 8.0 if the user explicitly provides this feature flag in their cargo build command. 73 | ``` 74 | cargo build --release --features valkey_8_0 75 | ``` 76 | 77 | This can also be done by specifiyng SERVER_VERSION=8.0.0 and then running `./build.sh` -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # Script to run format checks valkey-bloom module, build it and generate .so files, run unit and integration tests. 4 | 5 | # Exit the script if any command fails 6 | set -e 7 | 8 | SCRIPT_DIR=$(pwd) 9 | echo "Script Directory: $SCRIPT_DIR" 10 | 11 | echo "Running cargo and clippy format checks..." 12 | cargo fmt --check 13 | cargo clippy --profile release --all-targets -- -D clippy::all 14 | 15 | 16 | echo "Running unit tests..." 17 | cargo test --features enable-system-alloc 18 | 19 | # Ensure SERVER_VERSION environment variable is set 20 | if [ -z "$SERVER_VERSION" ]; then 21 | echo "ERROR: SERVER_VERSION environment variable is not set. Defaulting to unstable." 22 | export SERVER_VERSION="unstable" 23 | fi 24 | 25 | if [ "$SERVER_VERSION" != "unstable" ] && [ "$SERVER_VERSION" != "8.0" ] && [ "$SERVER_VERSION" != "8.1" ]; then 26 | echo "ERROR: Unsupported version - $SERVER_VERSION" 27 | exit 1 28 | fi 29 | 30 | echo "Running cargo build release..." 31 | if [ "$SERVER_VERSION" == "8.0" ] ; then 32 | RUSTFLAGS="-D warnings" cargo build --all --all-targets --release --features valkey_8_0 33 | else 34 | RUSTFLAGS="-D warnings" cargo build --all --all-targets --release 35 | fi 36 | 37 | 38 | REPO_URL="https://github.com/valkey-io/valkey.git" 39 | BINARY_PATH="tests/build/binaries/$SERVER_VERSION/valkey-server" 40 | CACHED_VALKEY_PATH="tests/build/valkey" 41 | if [ -f "$BINARY_PATH" ] && [ -x "$BINARY_PATH" ]; then 42 | echo "valkey-server binary '$BINARY_PATH' found." 43 | else 44 | echo "valkey-server binary '$BINARY_PATH' not found." 45 | mkdir -p "tests/build/binaries/$SERVER_VERSION" 46 | rm -rf $CACHED_VALKEY_PATH 47 | cd tests/build 48 | git clone "$REPO_URL" 49 | cd valkey 50 | git checkout "$SERVER_VERSION" 51 | make distclean 52 | if [ ! -z "${ASAN_BUILD}" ]; then 53 | make -j SANITIZER=address 54 | else 55 | make -j 56 | fi 57 | cp src/valkey-server ../binaries/$SERVER_VERSION/ 58 | cd $SCRIPT_DIR 59 | rm -rf $CACHED_VALKEY_PATH 60 | fi 61 | 62 | 63 | TEST_FRAMEWORK_REPO="https://github.com/valkey-io/valkey-test-framework" 64 | TEST_FRAMEWORK_DIR="tests/build/valkeytestframework" 65 | 66 | if [ -d "$TEST_FRAMEWORK_DIR" ]; then 67 | echo "valkeytestframework found." 68 | else 69 | echo "Cloning valkey-test-framework..." 70 | git clone "$TEST_FRAMEWORK_REPO" 71 | mkdir -p "$TEST_FRAMEWORK_DIR" 72 | mv "valkey-test-framework/src"/* "$TEST_FRAMEWORK_DIR/" 73 | rm -rf valkey-test-framework 74 | fi 75 | 76 | REQUIREMENTS_FILE="requirements.txt" 77 | 78 | # Check if pip is available 79 | if command -v pip > /dev/null 2>&1; then 80 | echo "Using pip to install packages..." 81 | pip install -r "$SCRIPT_DIR/$REQUIREMENTS_FILE" 82 | # Check if pip3 is available 83 | elif command -v pip3 > /dev/null 2>&1; then 84 | echo "Using pip3 to install packages..." 85 | pip3 install -r "$SCRIPT_DIR/$REQUIREMENTS_FILE" 86 | else 87 | echo "Error: Neither pip nor pip3 is available. Please install Python package installer." 88 | exit 1 89 | fi 90 | 91 | os_type=$(uname) 92 | MODULE_EXT=".so" 93 | if [[ "$os_type" == "Darwin" ]]; then 94 | MODULE_EXT=".dylib" 95 | elif [[ "$os_type" == "Linux" ]]; then 96 | MODULE_EXT=".so" 97 | else 98 | echo "Unsupported OS type: $os_type" 99 | exit 1 100 | fi 101 | export MODULE_PATH="$SCRIPT_DIR/target/release/libvalkey_bloom$MODULE_EXT" 102 | 103 | echo "Running the integration tests..." 104 | if [ ! -z "${ASAN_BUILD}" ]; then 105 | # TEST_PATTERN can be used to run specific tests or test patterns. 106 | if [[ -n "$TEST_PATTERN" ]]; then 107 | python3 -m pytest --capture=sys --cache-clear -v "$SCRIPT_DIR/tests/" -k $TEST_PATTERN 2>&1 | tee test_output.tmp 108 | else 109 | echo "TEST_PATTERN is not set. Running all integration tests." 110 | python3 -m pytest --capture=sys --cache-clear -v "$SCRIPT_DIR/tests/" 2>&1 | tee test_output.tmp 111 | fi 112 | 113 | # Check for memory leaks in the output 114 | if grep -q "LeakSanitizer: detected memory leaks" test_output.tmp; then 115 | RED='\033[0;31m' 116 | echo -e "${RED}Memory leaks detected in the following tests:" 117 | LEAKING_TESTS=$(grep -B 2 "LeakSanitizer: detected memory leaks" test_output.tmp | \ 118 | grep -v "LeakSanitizer" | \ 119 | grep ".*\.py::") 120 | 121 | LEAK_COUNT=$(echo "$LEAKING_TESTS" | wc -l) 122 | 123 | # Output each leaking test 124 | echo "$LEAKING_TESTS" | while read -r line; do 125 | echo "::error::Test with leak: $line" 126 | done 127 | 128 | echo -e "\n$LEAK_COUNT python integration tests have leaks detected in them" 129 | rm test_output.tmp 130 | exit 1 131 | fi 132 | rm test_output.tmp 133 | else 134 | # TEST_PATTERN can be used to run specific tests or test patterns. 135 | if [[ -n "$TEST_PATTERN" ]]; then 136 | python3 -m pytest --cache-clear -v "$SCRIPT_DIR/tests/" -k $TEST_PATTERN 137 | else 138 | echo "TEST_PATTERN is not set. Running all integration tests." 139 | python3 -m pytest --cache-clear -v "$SCRIPT_DIR/tests/" 140 | fi 141 | fi 142 | 143 | echo "Build, Format Checks, Unit tests, and Integration Tests succeeded" 144 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | valkey 2 | pytest==6 3 | -------------------------------------------------------------------------------- /src/bloom/command_handler.rs: -------------------------------------------------------------------------------- 1 | use crate::bloom::data_type::BLOOM_TYPE; 2 | use crate::bloom::utils; 3 | use crate::bloom::utils::BloomObject; 4 | use crate::configs; 5 | use crate::configs::{ 6 | BLOOM_CAPACITY_MAX, BLOOM_CAPACITY_MIN, BLOOM_EXPANSION_MAX, BLOOM_EXPANSION_MIN, 7 | BLOOM_FP_RATE_MAX, BLOOM_FP_RATE_MIN, BLOOM_TIGHTENING_RATIO_MAX, BLOOM_TIGHTENING_RATIO_MIN, 8 | }; 9 | use crate::wrapper::must_obey_client; 10 | use std::sync::atomic::Ordering; 11 | use valkey_module::NotifyEvent; 12 | use valkey_module::{Context, ValkeyError, ValkeyResult, ValkeyString, ValkeyValue, VALKEY_OK}; 13 | /// Helper function used to add items to a bloom object. It handles both multi item and single item add operations. 14 | /// It is used by any command that allows adding of items: BF.ADD, BF.MADD, and BF.INSERT. 15 | /// Returns the result of the item add operation on success as a ValkeyValue and a ValkeyError on failure. 16 | fn handle_bloom_add( 17 | args: &[ValkeyString], 18 | argc: usize, 19 | item_idx: usize, 20 | bf: &mut BloomObject, 21 | multi: bool, 22 | add_succeeded: &mut bool, 23 | validate_size_limit: bool, 24 | ) -> Result { 25 | match multi { 26 | true => { 27 | let mut result = Vec::with_capacity(argc - item_idx); 28 | let mut curr_cmd_idx = item_idx; 29 | while curr_cmd_idx < argc { 30 | let item = args[curr_cmd_idx].as_slice(); 31 | match bf.add_item(item, validate_size_limit) { 32 | Ok(add_result) => { 33 | if add_result == 1 { 34 | *add_succeeded = true; 35 | } 36 | result.push(ValkeyValue::Integer(add_result)); 37 | } 38 | Err(err) => { 39 | result.push(ValkeyValue::StaticError(err.as_str())); 40 | break; 41 | } 42 | }; 43 | curr_cmd_idx += 1; 44 | } 45 | Ok(ValkeyValue::Array(result)) 46 | } 47 | false => { 48 | let item = args[item_idx].as_slice(); 49 | match bf.add_item(item, validate_size_limit) { 50 | Ok(add_result) => { 51 | *add_succeeded = add_result == 1; 52 | Ok(ValkeyValue::Integer(add_result)) 53 | } 54 | Err(err) => Err(ValkeyError::Str(err.as_str())), 55 | } 56 | } 57 | } 58 | } 59 | 60 | /// Structure to help provide the command arguments required for replication. This is used by mutative commands. 61 | struct ReplicateArgs<'a> { 62 | capacity: i64, 63 | expansion: u32, 64 | fp_rate: f64, 65 | tightening_ratio: f64, 66 | seed: [u8; 32], 67 | items: &'a [ValkeyString], 68 | } 69 | 70 | /// Helper function to replicate mutative commands to the replica nodes and publish keyspace events. 71 | /// There are two main cases for replication: 72 | /// - RESERVE operation: This is any bloom object creation which will be replicated with the exact properties of the 73 | /// primary node using BF.INSERT. 74 | /// - ADD operation: This is the case where only items were added to a bloom object. Here, the command is replicated verbatim. 75 | /// 76 | /// With this, replication becomes deterministic. 77 | /// For keyspace events, we publish an event for both the RESERVE and ADD scenarios depending on if either or both of the 78 | /// cases occurred. 79 | fn replicate_and_notify_events( 80 | ctx: &Context, 81 | key_name: &ValkeyString, 82 | add_operation: bool, 83 | reserve_operation: bool, 84 | args: ReplicateArgs, 85 | ) { 86 | if reserve_operation { 87 | // Any bloom filter creation should have a deterministic replication with the exact same properties as what was 88 | // created on the primary. This is done using BF.INSERT. 89 | let capacity_str = 90 | ValkeyString::create_from_slice(std::ptr::null_mut(), "CAPACITY".as_bytes()); 91 | let capacity_val = ValkeyString::create_from_slice( 92 | std::ptr::null_mut(), 93 | args.capacity.to_string().as_bytes(), 94 | ); 95 | let fp_rate_str = ValkeyString::create_from_slice(std::ptr::null_mut(), "ERROR".as_bytes()); 96 | let fp_rate_val = ValkeyString::create_from_slice( 97 | std::ptr::null_mut(), 98 | args.fp_rate.to_string().as_bytes(), 99 | ); 100 | let tightening_str = 101 | ValkeyString::create_from_slice(std::ptr::null_mut(), "TIGHTENING".as_bytes()); 102 | let tightening_val = ValkeyString::create_from_slice( 103 | std::ptr::null_mut(), 104 | args.tightening_ratio.to_string().as_bytes(), 105 | ); 106 | let seed_str = ValkeyString::create_from_slice(std::ptr::null_mut(), "SEED".as_bytes()); 107 | let seed_val = ValkeyString::create_from_slice(std::ptr::null_mut(), &args.seed); 108 | let mut cmd = vec![ 109 | key_name, 110 | &capacity_str, 111 | &capacity_val, 112 | &fp_rate_str, 113 | &fp_rate_val, 114 | &tightening_str, 115 | &tightening_val, 116 | &seed_str, 117 | &seed_val, 118 | ]; 119 | // Add nonscaling / expansion related arguments. 120 | let expansion_args = match args.expansion == 0 { 121 | true => { 122 | let nonscaling_str = 123 | ValkeyString::create_from_slice(std::ptr::null_mut(), "NONSCALING".as_bytes()); 124 | vec![nonscaling_str] 125 | } 126 | false => { 127 | let expansion_str = 128 | ValkeyString::create_from_slice(std::ptr::null_mut(), "EXPANSION".as_bytes()); 129 | let expansion_val = ValkeyString::create_from_slice( 130 | std::ptr::null_mut(), 131 | args.expansion.to_string().as_bytes(), 132 | ); 133 | vec![expansion_str, expansion_val] 134 | } 135 | }; 136 | for arg in &expansion_args { 137 | cmd.push(arg); 138 | } 139 | // Add items if any exist. 140 | let items_str = ValkeyString::create_from_slice(std::ptr::null_mut(), "ITEMS".as_bytes()); 141 | if !args.items.is_empty() { 142 | cmd.push(&items_str); 143 | } 144 | for item in args.items { 145 | cmd.push(item); 146 | } 147 | ctx.replicate("BF.INSERT", cmd.as_slice()); 148 | } else if add_operation { 149 | ctx.replicate_verbatim(); 150 | } 151 | if add_operation { 152 | ctx.notify_keyspace_event(NotifyEvent::GENERIC, utils::ADD_EVENT, key_name); 153 | } 154 | if reserve_operation { 155 | ctx.notify_keyspace_event(NotifyEvent::GENERIC, utils::RESERVE_EVENT, key_name); 156 | } 157 | } 158 | 159 | /// Function that implements logic to handle the BF.ADD and BF.MADD commands. 160 | pub fn bloom_filter_add_value( 161 | ctx: &Context, 162 | input_args: &[ValkeyString], 163 | multi: bool, 164 | ) -> ValkeyResult { 165 | let argc = input_args.len(); 166 | if (!multi && argc != 3) || argc < 3 { 167 | return Err(ValkeyError::WrongArity); 168 | } 169 | let mut curr_cmd_idx = 1; 170 | // Parse the filter name 171 | let filter_name = &input_args[curr_cmd_idx]; 172 | curr_cmd_idx += 1; 173 | // If the filter does not exist, create one 174 | let filter_key = ctx.open_key_writable(filter_name); 175 | let value = match filter_key.get_value::(&BLOOM_TYPE) { 176 | Ok(v) => v, 177 | Err(_) => { 178 | return Err(ValkeyError::WrongType); 179 | } 180 | }; 181 | // Skip bloom filter size validation on replicated cmds. 182 | let validate_size_limit = !must_obey_client(ctx); 183 | let mut add_succeeded = false; 184 | match value { 185 | Some(bloom) => { 186 | let response = handle_bloom_add( 187 | input_args, 188 | argc, 189 | curr_cmd_idx, 190 | bloom, 191 | multi, 192 | &mut add_succeeded, 193 | validate_size_limit, 194 | ); 195 | let replicate_args = ReplicateArgs { 196 | capacity: bloom.capacity(), 197 | expansion: bloom.expansion(), 198 | fp_rate: bloom.fp_rate(), 199 | tightening_ratio: bloom.tightening_ratio(), 200 | seed: bloom.seed(), 201 | items: &input_args[curr_cmd_idx..], 202 | }; 203 | replicate_and_notify_events(ctx, filter_name, add_succeeded, false, replicate_args); 204 | response 205 | } 206 | None => { 207 | // Instantiate empty bloom filter. 208 | let fp_rate = *configs::BLOOM_FP_RATE_F64 209 | .lock() 210 | .expect("Unable to get a lock on fp_rate static"); 211 | let tightening_ratio = *configs::BLOOM_TIGHTENING_F64 212 | .lock() 213 | .expect("Unable to get a lock on tightening ratio static"); 214 | let capacity = configs::BLOOM_CAPACITY.load(Ordering::Relaxed); 215 | let expansion = configs::BLOOM_EXPANSION.load(Ordering::Relaxed) as u32; 216 | let use_random_seed = configs::BLOOM_USE_RANDOM_SEED.load(Ordering::Relaxed); 217 | let seed = match use_random_seed { 218 | true => (None, true), 219 | false => (Some(configs::FIXED_SEED), false), 220 | }; 221 | let mut bloom = match BloomObject::new_reserved( 222 | fp_rate, 223 | tightening_ratio, 224 | capacity, 225 | expansion, 226 | seed, 227 | validate_size_limit, 228 | ) { 229 | Ok(bf) => bf, 230 | Err(err) => return Err(ValkeyError::Str(err.as_str())), 231 | }; 232 | let replicate_args = ReplicateArgs { 233 | capacity: bloom.capacity(), 234 | expansion: bloom.expansion(), 235 | fp_rate: bloom.fp_rate(), 236 | tightening_ratio: bloom.tightening_ratio(), 237 | seed: bloom.seed(), 238 | items: &input_args[curr_cmd_idx..], 239 | }; 240 | let response = handle_bloom_add( 241 | input_args, 242 | argc, 243 | curr_cmd_idx, 244 | &mut bloom, 245 | multi, 246 | &mut add_succeeded, 247 | validate_size_limit, 248 | ); 249 | match filter_key.set_value(&BLOOM_TYPE, bloom) { 250 | Ok(()) => { 251 | replicate_and_notify_events( 252 | ctx, 253 | filter_name, 254 | add_succeeded, 255 | true, 256 | replicate_args, 257 | ); 258 | response 259 | } 260 | Err(_) => Err(ValkeyError::Str(utils::ERROR)), 261 | } 262 | } 263 | } 264 | } 265 | 266 | /// Helper function used to check whether an item (or multiple items) exists on a bloom object. 267 | fn handle_item_exists(value: Option<&BloomObject>, item: &[u8]) -> ValkeyValue { 268 | if let Some(val) = value { 269 | if val.item_exists(item) { 270 | return ValkeyValue::Integer(1); 271 | } 272 | // Item has not been added to the filter. 273 | return ValkeyValue::Integer(0); 274 | }; 275 | // Key does not exist. 276 | ValkeyValue::Integer(0) 277 | } 278 | 279 | /// Function that implements logic to handle the BF.EXISTS and BF.MEXISTS commands. 280 | pub fn bloom_filter_exists( 281 | ctx: &Context, 282 | input_args: &[ValkeyString], 283 | multi: bool, 284 | ) -> ValkeyResult { 285 | let argc = input_args.len(); 286 | if (!multi && argc != 3) || argc < 3 { 287 | return Err(ValkeyError::WrongArity); 288 | } 289 | let mut curr_cmd_idx = 1; 290 | // Parse the filter name 291 | let filter_name = &input_args[curr_cmd_idx]; 292 | curr_cmd_idx += 1; 293 | // Parse the value to be checked whether it exists in the filter 294 | let filter_key = ctx.open_key(filter_name); 295 | let value = match filter_key.get_value::(&BLOOM_TYPE) { 296 | Ok(v) => v, 297 | Err(_) => { 298 | return Err(ValkeyError::WrongType); 299 | } 300 | }; 301 | if !multi { 302 | let item = input_args[curr_cmd_idx].as_slice(); 303 | return Ok(handle_item_exists(value, item)); 304 | } 305 | let mut result = Vec::with_capacity(argc - curr_cmd_idx); 306 | while curr_cmd_idx < argc { 307 | let item = input_args[curr_cmd_idx].as_slice(); 308 | result.push(handle_item_exists(value, item)); 309 | curr_cmd_idx += 1; 310 | } 311 | Ok(ValkeyValue::Array(result)) 312 | } 313 | 314 | /// Function that implements logic to handle the BF.CARD command. 315 | pub fn bloom_filter_card(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyResult { 316 | let argc = input_args.len(); 317 | if argc != 2 { 318 | return Err(ValkeyError::WrongArity); 319 | } 320 | let curr_cmd_idx = 1; 321 | // Parse the filter name 322 | let filter_name = &input_args[curr_cmd_idx]; 323 | let filter_key = ctx.open_key(filter_name); 324 | let value = match filter_key.get_value::(&BLOOM_TYPE) { 325 | Ok(v) => v, 326 | Err(_) => { 327 | return Err(ValkeyError::WrongType); 328 | } 329 | }; 330 | match value { 331 | Some(val) => Ok(ValkeyValue::Integer(val.cardinality())), 332 | None => Ok(ValkeyValue::Integer(0)), 333 | } 334 | } 335 | 336 | /// Function that implements logic to handle the BF.RESERVE command. 337 | pub fn bloom_filter_reserve(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyResult { 338 | let argc = input_args.len(); 339 | if !(4..=6).contains(&argc) { 340 | return Err(ValkeyError::WrongArity); 341 | } 342 | let mut curr_cmd_idx = 1; 343 | // Parse the filter name 344 | let filter_name = &input_args[curr_cmd_idx]; 345 | curr_cmd_idx += 1; 346 | // Parse the error rate 347 | let fp_rate = match input_args[curr_cmd_idx].to_string_lossy().parse::() { 348 | Ok(num) if num > BLOOM_FP_RATE_MIN && num < BLOOM_FP_RATE_MAX => num, 349 | Ok(num) if !(num > BLOOM_FP_RATE_MIN && num < BLOOM_FP_RATE_MAX) => { 350 | return Err(ValkeyError::Str(utils::ERROR_RATE_RANGE)); 351 | } 352 | _ => { 353 | return Err(ValkeyError::Str(utils::BAD_ERROR_RATE)); 354 | } 355 | }; 356 | curr_cmd_idx += 1; 357 | // Parse the capacity 358 | let capacity = match input_args[curr_cmd_idx].to_string_lossy().parse::() { 359 | Ok(num) if (BLOOM_CAPACITY_MIN..=BLOOM_CAPACITY_MAX).contains(&num) => num, 360 | Ok(0) => { 361 | return Err(ValkeyError::Str(utils::CAPACITY_LARGER_THAN_0)); 362 | } 363 | _ => { 364 | return Err(ValkeyError::Str(utils::BAD_CAPACITY)); 365 | } 366 | }; 367 | curr_cmd_idx += 1; 368 | let mut expansion = configs::BLOOM_EXPANSION.load(Ordering::Relaxed) as u32; 369 | if argc > 4 { 370 | match input_args[curr_cmd_idx] 371 | .to_string_lossy() 372 | .to_uppercase() 373 | .as_str() 374 | { 375 | "NONSCALING" if argc == 5 => { 376 | expansion = 0; 377 | } 378 | "EXPANSION" if argc == 6 => { 379 | curr_cmd_idx += 1; 380 | expansion = match input_args[curr_cmd_idx].to_string_lossy().parse::() { 381 | Ok(num) if (BLOOM_EXPANSION_MIN..=BLOOM_EXPANSION_MAX).contains(&num) => num, 382 | _ => { 383 | return Err(ValkeyError::Str(utils::BAD_EXPANSION)); 384 | } 385 | }; 386 | } 387 | _ => { 388 | return Err(ValkeyError::Str(utils::ERROR)); 389 | } 390 | } 391 | } 392 | // If the filter does not exist, create one 393 | let filter_key = ctx.open_key_writable(filter_name); 394 | let value = match filter_key.get_value::(&BLOOM_TYPE) { 395 | Ok(v) => v, 396 | Err(_) => { 397 | return Err(ValkeyError::WrongType); 398 | } 399 | }; 400 | match value { 401 | Some(_) => Err(ValkeyError::Str(utils::ITEM_EXISTS)), 402 | None => { 403 | let use_random_seed = configs::BLOOM_USE_RANDOM_SEED.load(Ordering::Relaxed); 404 | let seed = match use_random_seed { 405 | true => (None, true), 406 | false => (Some(configs::FIXED_SEED), false), 407 | }; 408 | // Skip bloom filter size validation on replicated cmds. 409 | let validate_size_limit = !must_obey_client(ctx); 410 | let tightening_ratio = *configs::BLOOM_TIGHTENING_F64 411 | .lock() 412 | .expect("Unable to get a lock on tightening ratio static"); 413 | let bloom = match BloomObject::new_reserved( 414 | fp_rate, 415 | tightening_ratio, 416 | capacity, 417 | expansion, 418 | seed, 419 | validate_size_limit, 420 | ) { 421 | Ok(bf) => bf, 422 | Err(err) => return Err(ValkeyError::Str(err.as_str())), 423 | }; 424 | let replicate_args = ReplicateArgs { 425 | capacity: bloom.capacity(), 426 | expansion: bloom.expansion(), 427 | fp_rate: bloom.fp_rate(), 428 | tightening_ratio: bloom.tightening_ratio(), 429 | seed: bloom.seed(), 430 | items: &[], 431 | }; 432 | match filter_key.set_value(&BLOOM_TYPE, bloom) { 433 | Ok(()) => { 434 | replicate_and_notify_events(ctx, filter_name, false, true, replicate_args); 435 | VALKEY_OK 436 | } 437 | Err(_) => Err(ValkeyError::Str(utils::ERROR)), 438 | } 439 | } 440 | } 441 | } 442 | 443 | /// Function that implements logic to handle the BF.INSERT command. 444 | pub fn bloom_filter_insert(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyResult { 445 | let argc = input_args.len(); 446 | // At the very least, we need: BF.INSERT 447 | if argc < 2 { 448 | return Err(ValkeyError::WrongArity); 449 | } 450 | let mut idx = 1; 451 | // Parse the filter name 452 | let filter_name = &input_args[idx]; 453 | idx += 1; 454 | let mut fp_rate = *configs::BLOOM_FP_RATE_F64 455 | .lock() 456 | .expect("Unable to get a lock on fp_rate static"); 457 | let mut tightening_ratio = *configs::BLOOM_TIGHTENING_F64 458 | .lock() 459 | .expect("Unable to get a lock on tightening ratio static"); 460 | let mut capacity = configs::BLOOM_CAPACITY.load(Ordering::Relaxed); 461 | let mut expansion = configs::BLOOM_EXPANSION.load(Ordering::Relaxed) as u32; 462 | let use_random_seed = configs::BLOOM_USE_RANDOM_SEED.load(Ordering::Relaxed); 463 | let mut seed = match use_random_seed { 464 | true => (None, true), 465 | false => (Some(configs::FIXED_SEED), false), 466 | }; 467 | let mut validate_scale_to = None; 468 | let mut nocreate = false; 469 | let mut items_provided = false; 470 | while idx < argc { 471 | match input_args[idx].to_string_lossy().to_uppercase().as_str() { 472 | "ERROR" => { 473 | if idx >= (argc - 1) { 474 | return Err(ValkeyError::WrongArity); 475 | } 476 | idx += 1; 477 | fp_rate = match input_args[idx].to_string_lossy().parse::() { 478 | Ok(num) if num > BLOOM_FP_RATE_MIN && num < BLOOM_FP_RATE_MAX => num, 479 | Ok(num) if !(num > BLOOM_FP_RATE_MIN && num < BLOOM_FP_RATE_MAX) => { 480 | return Err(ValkeyError::Str(utils::ERROR_RATE_RANGE)); 481 | } 482 | _ => { 483 | return Err(ValkeyError::Str(utils::BAD_ERROR_RATE)); 484 | } 485 | }; 486 | } 487 | "TIGHTENING" => { 488 | // Note: This argument is only supported on replicated commands since primary nodes replicate bloom objects 489 | // deterministically using every global bloom config/property. 490 | if idx >= (argc - 1) { 491 | return Err(ValkeyError::WrongArity); 492 | } 493 | idx += 1; 494 | tightening_ratio = match input_args[idx].to_string_lossy().parse::() { 495 | Ok(num) 496 | if num > BLOOM_TIGHTENING_RATIO_MIN && num < BLOOM_TIGHTENING_RATIO_MAX => 497 | { 498 | num 499 | } 500 | Ok(num) 501 | if !(num > BLOOM_TIGHTENING_RATIO_MIN 502 | && num < BLOOM_TIGHTENING_RATIO_MAX) => 503 | { 504 | return Err(ValkeyError::Str(utils::TIGHTENING_RATIO_RANGE)); 505 | } 506 | _ => { 507 | return Err(ValkeyError::Str(utils::BAD_TIGHTENING_RATIO)); 508 | } 509 | }; 510 | } 511 | "CAPACITY" => { 512 | if idx >= (argc - 1) { 513 | return Err(ValkeyError::WrongArity); 514 | } 515 | idx += 1; 516 | capacity = match input_args[idx].to_string_lossy().parse::() { 517 | Ok(num) if (BLOOM_CAPACITY_MIN..=BLOOM_CAPACITY_MAX).contains(&num) => num, 518 | Ok(0) => { 519 | return Err(ValkeyError::Str(utils::CAPACITY_LARGER_THAN_0)); 520 | } 521 | _ => { 522 | return Err(ValkeyError::Str(utils::BAD_CAPACITY)); 523 | } 524 | }; 525 | } 526 | "SEED" => { 527 | // Note: This argument is only supported on replicated commands since primary nodes replicate bloom objects 528 | // deterministically using every global bloom config/property. 529 | if idx >= (argc - 1) { 530 | return Err(ValkeyError::WrongArity); 531 | } 532 | idx += 1; 533 | // The BloomObject implementation uses a 32-byte (u8) array as the seed. 534 | let seed_result: Result<[u8; 32], _> = input_args[idx].as_slice().try_into(); 535 | let Ok(seed_raw) = seed_result else { 536 | return Err(ValkeyError::Str(utils::INVALID_SEED)); 537 | }; 538 | let is_seed_random = seed_raw != configs::FIXED_SEED; 539 | seed = (Some(seed_raw), is_seed_random); 540 | } 541 | "NOCREATE" => { 542 | nocreate = true; 543 | } 544 | "NONSCALING" => { 545 | expansion = 0; 546 | } 547 | "EXPANSION" => { 548 | if idx >= (argc - 1) { 549 | return Err(ValkeyError::WrongArity); 550 | } 551 | idx += 1; 552 | expansion = match input_args[idx].to_string_lossy().parse::() { 553 | Ok(num) if (BLOOM_EXPANSION_MIN..=BLOOM_EXPANSION_MAX).contains(&num) => num, 554 | _ => { 555 | return Err(ValkeyError::Str(utils::BAD_EXPANSION)); 556 | } 557 | }; 558 | } 559 | "VALIDATESCALETO" => { 560 | if idx >= (argc - 1) { 561 | return Err(ValkeyError::WrongArity); 562 | } 563 | idx += 1; 564 | validate_scale_to = match input_args[idx].to_string_lossy().parse::() { 565 | Ok(num) if (BLOOM_CAPACITY_MIN..=BLOOM_CAPACITY_MAX).contains(&num) => { 566 | Some(num) 567 | } 568 | Ok(0) => { 569 | return Err(ValkeyError::Str(utils::CAPACITY_LARGER_THAN_0)); 570 | } 571 | _ => { 572 | return Err(ValkeyError::Str(utils::BAD_CAPACITY)); 573 | } 574 | }; 575 | } 576 | "ITEMS" => { 577 | idx += 1; 578 | items_provided = true; 579 | break; 580 | } 581 | _ => { 582 | return Err(ValkeyError::Str(utils::UNKNOWN_ARGUMENT)); 583 | } 584 | } 585 | idx += 1; 586 | } 587 | if idx == argc && items_provided { 588 | // When the `ITEMS` argument is provided, we expect additional item arg/s to be provided. 589 | return Err(ValkeyError::WrongArity); 590 | } 591 | // Check if we have a wanted capacity and calculate if we can reach that capacity. Using VALIDATESCALETO and NONSCALING options together is invalid. 592 | if let Some(scale_to) = validate_scale_to { 593 | if expansion == 0 { 594 | return Err(ValkeyError::Str( 595 | utils::NON_SCALING_AND_VALIDATE_SCALE_TO_IS_INVALID, 596 | )); 597 | } 598 | match utils::BloomObject::calculate_max_scaled_capacity( 599 | capacity, 600 | fp_rate, 601 | scale_to, 602 | tightening_ratio, 603 | expansion, 604 | ) { 605 | Ok(_) => (), 606 | Err(err) => { 607 | return Err(ValkeyError::Str(err.as_str())); 608 | } 609 | }; 610 | } 611 | // If the filter does not exist, create one 612 | let filter_key = ctx.open_key_writable(filter_name); 613 | let value = match filter_key.get_value::(&BLOOM_TYPE) { 614 | Ok(v) => v, 615 | Err(_) => { 616 | return Err(ValkeyError::WrongType); 617 | } 618 | }; 619 | // Skip bloom filter size validation on replicated cmds. 620 | let validate_size_limit = !must_obey_client(ctx); 621 | let mut add_succeeded = false; 622 | match value { 623 | Some(bloom) => { 624 | let response = handle_bloom_add( 625 | input_args, 626 | argc, 627 | idx, 628 | bloom, 629 | true, 630 | &mut add_succeeded, 631 | validate_size_limit, 632 | ); 633 | let replicate_args = ReplicateArgs { 634 | capacity: bloom.capacity(), 635 | expansion: bloom.expansion(), 636 | fp_rate: bloom.fp_rate(), 637 | tightening_ratio: bloom.tightening_ratio(), 638 | seed: bloom.seed(), 639 | items: &input_args[idx..], 640 | }; 641 | replicate_and_notify_events(ctx, filter_name, add_succeeded, false, replicate_args); 642 | response 643 | } 644 | None => { 645 | if nocreate { 646 | return Err(ValkeyError::Str(utils::NOT_FOUND)); 647 | } 648 | let mut bloom = match BloomObject::new_reserved( 649 | fp_rate, 650 | tightening_ratio, 651 | capacity, 652 | expansion, 653 | seed, 654 | validate_size_limit, 655 | ) { 656 | Ok(bf) => bf, 657 | Err(err) => return Err(ValkeyError::Str(err.as_str())), 658 | }; 659 | let replicate_args = ReplicateArgs { 660 | capacity: bloom.capacity(), 661 | expansion: bloom.expansion(), 662 | fp_rate: bloom.fp_rate(), 663 | tightening_ratio: bloom.tightening_ratio(), 664 | seed: bloom.seed(), 665 | items: &input_args[idx..], 666 | }; 667 | let response = handle_bloom_add( 668 | input_args, 669 | argc, 670 | idx, 671 | &mut bloom, 672 | true, 673 | &mut add_succeeded, 674 | validate_size_limit, 675 | ); 676 | match filter_key.set_value(&BLOOM_TYPE, bloom) { 677 | Ok(()) => { 678 | replicate_and_notify_events( 679 | ctx, 680 | filter_name, 681 | add_succeeded, 682 | true, 683 | replicate_args, 684 | ); 685 | response 686 | } 687 | Err(_) => Err(ValkeyError::Str(utils::ERROR)), 688 | } 689 | } 690 | } 691 | } 692 | 693 | /// Function that implements logic to handle the BF.INFO command. 694 | pub fn bloom_filter_info(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyResult { 695 | let argc = input_args.len(); 696 | if !(2..=3).contains(&argc) { 697 | return Err(ValkeyError::WrongArity); 698 | } 699 | let mut curr_cmd_idx = 1; 700 | // Parse the filter name 701 | let filter_name = &input_args[curr_cmd_idx]; 702 | curr_cmd_idx += 1; 703 | let filter_key = ctx.open_key(filter_name); 704 | let value = match filter_key.get_value::(&BLOOM_TYPE) { 705 | Ok(v) => v, 706 | Err(_) => { 707 | return Err(ValkeyError::WrongType); 708 | } 709 | }; 710 | match value { 711 | Some(val) if argc == 3 => { 712 | match input_args[curr_cmd_idx] 713 | .to_string_lossy() 714 | .to_uppercase() 715 | .as_str() 716 | { 717 | "CAPACITY" => Ok(ValkeyValue::Integer(val.capacity())), 718 | "SIZE" => Ok(ValkeyValue::Integer(val.memory_usage() as i64)), 719 | "FILTERS" => Ok(ValkeyValue::Integer(val.num_filters() as i64)), 720 | "ITEMS" => Ok(ValkeyValue::Integer(val.cardinality())), 721 | "ERROR" => Ok(ValkeyValue::Float(val.fp_rate())), 722 | "TIGHTENING" if val.expansion() > 0 => { 723 | Ok(ValkeyValue::Float(val.tightening_ratio())) 724 | } 725 | "EXPANSION" => { 726 | if val.expansion() == 0 { 727 | return Ok(ValkeyValue::Null); 728 | } 729 | Ok(ValkeyValue::Integer(val.expansion() as i64)) 730 | } 731 | // Only calculate and expose MAXSCALEDCAPACITY for scaling bloom objects. 732 | "MAXSCALEDCAPACITY" if val.expansion() > 0 => { 733 | let max_capacity = match utils::BloomObject::calculate_max_scaled_capacity( 734 | val.starting_capacity(), 735 | val.fp_rate(), 736 | -1, 737 | val.tightening_ratio(), 738 | val.expansion(), 739 | ) { 740 | Ok(result) => result, 741 | Err(err) => { 742 | return Err(ValkeyError::Str(err.as_str())); 743 | } 744 | }; 745 | Ok(ValkeyValue::Integer(max_capacity)) 746 | } 747 | _ => Err(ValkeyError::Str(utils::INVALID_INFO_VALUE)), 748 | } 749 | } 750 | Some(val) if argc == 2 => { 751 | let mut result = vec![ 752 | ValkeyValue::SimpleStringStatic("Capacity"), 753 | ValkeyValue::Integer(val.capacity()), 754 | ValkeyValue::SimpleStringStatic("Size"), 755 | ValkeyValue::Integer(val.memory_usage() as i64), 756 | ValkeyValue::SimpleStringStatic("Number of filters"), 757 | ValkeyValue::Integer(val.num_filters() as i64), 758 | ValkeyValue::SimpleStringStatic("Number of items inserted"), 759 | ValkeyValue::Integer(val.cardinality()), 760 | ValkeyValue::SimpleStringStatic("Error rate"), 761 | ValkeyValue::Float(val.fp_rate()), 762 | ValkeyValue::SimpleStringStatic("Expansion rate"), 763 | ]; 764 | if val.expansion() == 0 { 765 | result.push(ValkeyValue::Null); 766 | } else { 767 | result.push(ValkeyValue::Integer(val.expansion() as i64)); 768 | // The following fields are only relevant to scalable filters, so will only be included if expansion is not equal to 0. 769 | result.push(ValkeyValue::SimpleStringStatic("Tightening ratio")); 770 | result.push(ValkeyValue::Float(val.tightening_ratio())); 771 | let max_capacity = match utils::BloomObject::calculate_max_scaled_capacity( 772 | val.starting_capacity(), 773 | val.fp_rate(), 774 | -1, 775 | val.tightening_ratio(), 776 | val.expansion(), 777 | ) { 778 | Ok(result) => result, 779 | Err(err) => { 780 | return Err(ValkeyError::Str(err.as_str())); 781 | } 782 | }; 783 | result.push(ValkeyValue::SimpleStringStatic("Max scaled capacity")); 784 | result.push(ValkeyValue::Integer(max_capacity)); 785 | } 786 | Ok(ValkeyValue::Array(result)) 787 | } 788 | _ => Err(ValkeyError::Str(utils::NOT_FOUND)), 789 | } 790 | } 791 | 792 | /// Function that implements logic to handle the BF.LOAD command. 793 | pub fn bloom_filter_load(ctx: &Context, input_args: &[ValkeyString]) -> ValkeyResult { 794 | let argc = input_args.len(); 795 | if argc != 3 { 796 | return Err(ValkeyError::WrongArity); 797 | } 798 | let mut idx = 1; 799 | let filter_name = &input_args[idx]; 800 | idx += 1; 801 | let value = &input_args[idx]; 802 | // find filter 803 | let filter_key = ctx.open_key_writable(filter_name); 804 | 805 | let filter = match filter_key.get_value::(&BLOOM_TYPE) { 806 | Ok(v) => v, 807 | Err(_) => { 808 | // error 809 | return Err(ValkeyError::WrongType); 810 | } 811 | }; 812 | match filter { 813 | Some(_) => { 814 | // if bloom exists, return exists error. 815 | Err(ValkeyError::Str(utils::KEY_EXISTS)) 816 | } 817 | None => { 818 | // if filter not exists, create it. 819 | let hex = value.to_vec(); 820 | let validate_size_limit = !must_obey_client(ctx); 821 | let bloom = match BloomObject::decode_object(&hex, validate_size_limit) { 822 | Ok(v) => v, 823 | Err(err) => { 824 | return Err(ValkeyError::Str(err.as_str())); 825 | } 826 | }; 827 | let replicate_args = ReplicateArgs { 828 | capacity: bloom.capacity(), 829 | expansion: bloom.expansion(), 830 | fp_rate: bloom.fp_rate(), 831 | tightening_ratio: bloom.tightening_ratio(), 832 | seed: bloom.seed(), 833 | items: &input_args[idx..], 834 | }; 835 | match filter_key.set_value(&BLOOM_TYPE, bloom) { 836 | Ok(_) => { 837 | replicate_and_notify_events(ctx, filter_name, false, true, replicate_args); 838 | VALKEY_OK 839 | } 840 | Err(_) => Err(ValkeyError::Str(utils::ERROR)), 841 | } 842 | } 843 | } 844 | } 845 | -------------------------------------------------------------------------------- /src/bloom/data_type.rs: -------------------------------------------------------------------------------- 1 | use crate::bloom::utils::BloomFilter; 2 | use crate::bloom::utils::BloomObject; 3 | use crate::configs; 4 | use crate::wrapper::bloom_callback; 5 | use crate::MODULE_NAME; 6 | use std::os::raw::c_int; 7 | use valkey_module::digest::Digest; 8 | use valkey_module::native_types::ValkeyType; 9 | use valkey_module::{logging, raw}; 10 | 11 | /// Used for decoding and encoding `BloomObject`. Currently used in AOF Rewrite. 12 | /// This value must increased when `BloomObject` struct change. 13 | pub const BLOOM_OBJECT_VERSION: u8 = 1; 14 | 15 | /// Bloom Module data type RDB encoding version. 16 | const BLOOM_TYPE_ENCODING_VERSION: i32 = 1; 17 | 18 | pub static BLOOM_TYPE: ValkeyType = ValkeyType::new( 19 | "bloomfltr", 20 | BLOOM_TYPE_ENCODING_VERSION, 21 | raw::RedisModuleTypeMethods { 22 | version: raw::REDISMODULE_TYPE_METHOD_VERSION as u64, 23 | rdb_load: Some(bloom_callback::bloom_rdb_load), 24 | rdb_save: Some(bloom_callback::bloom_rdb_save), 25 | aof_rewrite: Some(bloom_callback::bloom_aof_rewrite), 26 | digest: Some(bloom_callback::bloom_digest), 27 | 28 | mem_usage: Some(bloom_callback::bloom_mem_usage), 29 | free: Some(bloom_callback::bloom_free), 30 | 31 | aux_load: Some(bloom_callback::bloom_aux_load), 32 | // Callback not needed as there is no AUX (out of keyspace) data to be saved. 33 | aux_save: None, 34 | aux_save2: None, 35 | aux_save_triggers: raw::Aux::Before as i32, 36 | 37 | free_effort: Some(bloom_callback::bloom_free_effort), 38 | // Callback not needed as it just notifies us when a bloom item is about to be freed. 39 | unlink: None, 40 | copy: Some(bloom_callback::bloom_copy), 41 | defrag: Some(bloom_callback::bloom_defrag), 42 | 43 | // The callbacks below are not needed since the version 1 variants are used when implemented. 44 | mem_usage2: None, 45 | free_effort2: None, 46 | unlink2: None, 47 | copy2: None, 48 | }, 49 | ); 50 | 51 | pub trait ValkeyDataType { 52 | fn load_from_rdb(rdb: *mut raw::RedisModuleIO, encver: i32) -> Option; 53 | fn debug_digest(&self, dig: Digest); 54 | } 55 | 56 | impl ValkeyDataType for BloomObject { 57 | /// Callback to load and parse RDB data of a bloom item and create it. 58 | fn load_from_rdb(rdb: *mut raw::RedisModuleIO, encver: i32) -> Option { 59 | if encver > BLOOM_TYPE_ENCODING_VERSION { 60 | logging::log_warning(format!("{}: Cannot load bloomfltr data type of version {} because it is higher than the loaded module's bloomfltr supported version {}", MODULE_NAME, encver, BLOOM_TYPE_ENCODING_VERSION).as_str()); 61 | return None; 62 | } 63 | let Ok(num_filters) = raw::load_unsigned(rdb) else { 64 | return None; 65 | }; 66 | let Ok(expansion) = raw::load_unsigned(rdb) else { 67 | return None; 68 | }; 69 | let Ok(fp_rate) = raw::load_double(rdb) else { 70 | return None; 71 | }; 72 | 73 | let Ok(tightening_ratio) = raw::load_double(rdb) else { 74 | return None; 75 | }; 76 | let Ok(is_seed_random_u64) = raw::load_unsigned(rdb) else { 77 | return None; 78 | }; 79 | let is_seed_random = is_seed_random_u64 == 1; 80 | // We start off with capacity as 1 to match the same expansion of the vector that would have occurred during bloom 81 | // object creation and scaling as a result of BF.* operations. 82 | let mut filters = Vec::with_capacity(1); 83 | // Calculate the memory usage of the BloomFilter/s by summing up BloomFilter sizes as they are de-serialized. 84 | let mut filters_memory_usage = 0; 85 | for i in 0..num_filters { 86 | let Ok(capacity) = raw::load_unsigned(rdb) else { 87 | return None; 88 | }; 89 | let new_fp_rate = match Self::calculate_fp_rate(fp_rate, i as i32, tightening_ratio) { 90 | Ok(rate) => rate, 91 | Err(_) => { 92 | logging::log_warning( 93 | "Failed to restore bloom object: False positive degrades to 0 on scale out", 94 | ); 95 | return None; 96 | } 97 | }; 98 | let curr_filter_size = BloomFilter::compute_size(capacity as i64, new_fp_rate); 99 | let curr_object_size = BloomObject::compute_size(filters.capacity()) 100 | + filters_memory_usage 101 | + curr_filter_size; 102 | if !BloomObject::validate_size(curr_object_size) { 103 | logging::log_warning( 104 | "Failed to restore bloom object: Object larger than the allowed memory limit.", 105 | ); 106 | return None; 107 | } 108 | filters_memory_usage += curr_filter_size; 109 | // Only load num_items when it's the last filter 110 | let num_items = if i == num_filters - 1 { 111 | match raw::load_unsigned(rdb) { 112 | Ok(num_items) => num_items, 113 | Err(_) => return None, 114 | } 115 | } else { 116 | capacity 117 | }; 118 | let Ok(bitmap) = raw::load_string_buffer(rdb) else { 119 | return None; 120 | }; 121 | let filter = 122 | BloomFilter::from_existing(bitmap.as_ref(), num_items as i64, capacity as i64); 123 | if !is_seed_random && filter.seed() != configs::FIXED_SEED { 124 | logging::log_warning("Failed to restore bloom object: Object in fixed seed mode, but seed does not match FIXED_SEED."); 125 | return None; 126 | } 127 | filters.push(Box::new(filter)); 128 | } 129 | let item = BloomObject::from_existing( 130 | expansion as u32, 131 | fp_rate, 132 | tightening_ratio, 133 | is_seed_random, 134 | filters, 135 | ); 136 | Some(item) 137 | } 138 | 139 | /// Function that is used to generate a digest on the Bloom Object. 140 | fn debug_digest(&self, mut dig: Digest) { 141 | dig.add_long_long(self.expansion() as i64); 142 | dig.add_string_buffer(&self.fp_rate().to_le_bytes()); 143 | dig.add_string_buffer(&self.tightening_ratio().to_le_bytes()); 144 | let is_seed_random = if self.is_seed_random() { 1 } else { 0 }; 145 | dig.add_long_long(is_seed_random); 146 | for filter in self.filters() { 147 | dig.add_string_buffer(filter.raw_bloom().as_slice()); 148 | dig.add_long_long(filter.num_items()); 149 | dig.add_long_long(filter.capacity()); 150 | } 151 | dig.end_sequence(); 152 | } 153 | } 154 | 155 | /// Load the auxiliary data outside of the regular keyspace from the RDB file 156 | pub fn bloom_rdb_aux_load(_rdb: *mut raw::RedisModuleIO) -> c_int { 157 | logging::log_notice("Ignoring AUX fields during RDB load."); 158 | raw::Status::Ok as i32 159 | } 160 | -------------------------------------------------------------------------------- /src/bloom/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod command_handler; 2 | pub mod data_type; 3 | pub mod utils; 4 | -------------------------------------------------------------------------------- /src/commands/bf.add.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.ADD": { 3 | "summary": "Add a single item to a bloom filter. The bloom filter is created if it doesn't exist", 4 | "complexity": "O(N), where N is the number of hash functions used by the bloom filter.", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": 3, 8 | "acl_categories": [ 9 | "FAST", 10 | "WRITE", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "name": "value", 21 | "type": "string" 22 | } 23 | ] 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/commands/bf.card.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.CARD": { 3 | "summary": "Returns the cardinality of a bloom filter", 4 | "complexity": "O(1)", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": 2, 8 | "acl_categories": [ 9 | "READ", 10 | "FAST", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | } 19 | ] 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/commands/bf.exists.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.EXISTS": { 3 | "summary": "Determines if the bloom filter contains the specified item", 4 | "complexity": "O(N), where N is the number of hash functions used by the bloom filter.", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": 3, 8 | "acl_categories": [ 9 | "READ", 10 | "FAST", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "name": "value", 21 | "type": "string" 22 | } 23 | ] 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/commands/bf.info.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.INFO": { 3 | "summary": "Returns usage information and properties of a specific bloom filter", 4 | "complexity": "O(1)", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": -2, 8 | "acl_categories": [ 9 | "READ", 10 | "FAST", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "name": "sortby", 21 | "type": "oneof", 22 | "optional": true, 23 | "arguments": [ 24 | { 25 | "name": "capacity", 26 | "type": "pure-token", 27 | "token": "CAPACITY" 28 | }, 29 | { 30 | "name": "size", 31 | "type": "pure-token", 32 | "token": "SIZE" 33 | }, 34 | { 35 | "name": "filters", 36 | "type": "pure-token", 37 | "token": "FILTERS" 38 | }, 39 | { 40 | "name": "expansion", 41 | "type": "pure-token", 42 | "token": "EXPANSION" 43 | }, 44 | { 45 | "name": "error", 46 | "type": "pure-token", 47 | "token": "ERROR" 48 | }, 49 | { 50 | "name": "tightening", 51 | "type": "pure-token", 52 | "token": "TIGHTENTING" 53 | }, 54 | { 55 | "name": "maxscaledcapacity", 56 | "type": "pure-token", 57 | "token": "MAXSCALEDCAPACITY" 58 | } 59 | ] 60 | } 61 | ] 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/commands/bf.insert.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.INSERT": { 3 | "summary": "Creates a bloom filter with 0 or more items or adds items to an existing bloom filter", 4 | "complexity": "O(N * K), where N is the number of hash functions used by the bloom filter and K is the number of items being added", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": -2, 8 | "acl_categories": [ 9 | "WRITE", 10 | "FAST", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "token": "CAPACITY", 21 | "name": "capacity", 22 | "type": "integer", 23 | "optional": true 24 | }, 25 | { 26 | "token": "ERROR", 27 | "name": "error", 28 | "type": "float", 29 | "optional": true 30 | }, 31 | { 32 | "token": "EXPANSION", 33 | "name": "expansion", 34 | "type": "integer", 35 | "optional": true 36 | }, 37 | { 38 | "token": "SEED", 39 | "name": "seed", 40 | "type": "string", 41 | "optional": true 42 | }, 43 | { 44 | "token": "TIGHTENING", 45 | "name": "tightening", 46 | "type": "float", 47 | "optional": true 48 | }, 49 | { 50 | "token": "VALIDATESCALETO", 51 | "name": "validatescaleto", 52 | "type": "integer", 53 | "optional": true 54 | }, 55 | { 56 | "token": "NONSCALING", 57 | "name": "nonscaling", 58 | "type": "pure-token", 59 | "optional": true 60 | }, 61 | { 62 | "token": "NOCREATE", 63 | "name": "nocreate", 64 | "type": "pure-token", 65 | "optional": true 66 | }, 67 | { 68 | "token": "ITEMS", 69 | "name": "value", 70 | "type": "string", 71 | "multiple": true, 72 | "optional": true 73 | } 74 | ] 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/commands/bf.load.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.LOAD": { 3 | "summary": "Restores a bloom filter in a single operation. The command is only generated during AOF Rewrite of bloom filters", 4 | "complexity": "O(N), where N is the capacity", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": 3, 8 | "acl_categories": [ 9 | "WRITE", 10 | "BLOOM" 11 | ], 12 | "arguments": [ 13 | { 14 | "name": "key", 15 | "type": "key", 16 | "key_spec_index": 0 17 | }, 18 | { 19 | "name": "dump", 20 | "type": "string" 21 | } 22 | ] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/commands/bf.madd.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.MADD": { 3 | "summary": "Adds one or more items to a bloom filter. The bloom filter is created if it doesn't exist", 4 | "complexity": "O(N * K), where N is the number of hash functions used by the bloom filter and K is the number of items being added", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": 3, 8 | "acl_categories": [ 9 | "FAST", 10 | "WRITE", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "name": "value", 21 | "type": "string", 22 | "multiple": true 23 | } 24 | ] 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/commands/bf.mexists.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.MEXISTS": { 3 | "summary": "Determines if the bloom filter contains one or more items", 4 | "complexity": "O(K * N), where N is the number of hash functions used by the bloom filter and K is the number of items", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": 3, 8 | "acl_categories": [ 9 | "READ", 10 | "FAST", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "name": "value", 21 | "type": "string", 22 | "multiple": true 23 | } 24 | ] 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/commands/bf.reserve.json: -------------------------------------------------------------------------------- 1 | { 2 | "BF.RESERVE": { 3 | "summary": "Creates an empty bloom filter with the specified properties", 4 | "complexity": "O(1)", 5 | "group": "bloom", 6 | "module_since": "1.0.0", 7 | "arity": -4, 8 | "acl_categories": [ 9 | "FAST", 10 | "WRITE", 11 | "BLOOM" 12 | ], 13 | "arguments": [ 14 | { 15 | "name": "key", 16 | "type": "key", 17 | "key_spec_index": 0 18 | }, 19 | { 20 | "name": "error_rate", 21 | "type": "float" 22 | }, 23 | { 24 | "name": "capacity", 25 | "type": "integer" 26 | }, 27 | { 28 | "token": "EXPANSION", 29 | "name": "expansion", 30 | "type": "integer", 31 | "optional": true 32 | }, 33 | { 34 | "token": "NONSCALING", 35 | "name": "nonscaling", 36 | "type": "pure-token", 37 | "optional": true 38 | } 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/configs.rs: -------------------------------------------------------------------------------- 1 | use crate::bloom::utils; 2 | use lazy_static::lazy_static; 3 | use std::sync::atomic::{AtomicBool, AtomicI64}; 4 | use std::sync::Mutex; 5 | use valkey_module::{ 6 | configuration::ConfigurationContext, ConfigurationValue, ValkeyError, ValkeyGILGuard, 7 | ValkeyString, 8 | }; 9 | 10 | /// Configurations 11 | pub const BLOOM_CAPACITY_DEFAULT: i64 = 100; 12 | pub const BLOOM_CAPACITY_MIN: i64 = 1; 13 | pub const BLOOM_CAPACITY_MAX: i64 = i64::MAX; 14 | 15 | pub const BLOOM_EXPANSION_DEFAULT: i64 = 2; 16 | pub const BLOOM_EXPANSION_MIN: u32 = 1; 17 | pub const BLOOM_EXPANSION_MAX: u32 = u32::MAX; 18 | 19 | pub const BLOOM_FP_RATE_DEFAULT: &str = "0.01"; 20 | pub const BLOOM_FP_RATE_MIN: f64 = 0.0; 21 | pub const BLOOM_FP_RATE_MAX: f64 = 1.0; 22 | 23 | // Tightening ratio used during scale out for the calculation of fp_rate of every new filter within a bloom object to 24 | // maintain the bloom object's overall fp_rate to the configured value. 25 | pub const TIGHTENING_RATIO_DEFAULT: &str = "0.5"; 26 | pub const BLOOM_TIGHTENING_RATIO_MIN: f64 = 0.0; 27 | pub const BLOOM_TIGHTENING_RATIO_MAX: f64 = 1.0; 28 | 29 | pub const BLOOM_USE_RANDOM_SEED_DEFAULT: bool = true; 30 | 31 | pub const BLOOM_DEFRAG_DEAFULT: bool = true; 32 | 33 | // Max Memory usage allowed overall within a bloom object (128MB). 34 | // Beyond this threshold, a bloom object is classified as large. 35 | // Write operations that result in bloom object allocation larger than this size will be rejected. 36 | pub const BLOOM_MEMORY_LIMIT_PER_OBJECT_DEFAULT: i64 = 128 * 1024 * 1024; 37 | pub const BLOOM_MEMORY_LIMIT_PER_OBJECT_MIN: i64 = 0; 38 | pub const BLOOM_MEMORY_LIMIT_PER_OBJECT_MAX: i64 = i64::MAX; 39 | 40 | lazy_static! { 41 | pub static ref BLOOM_CAPACITY: AtomicI64 = AtomicI64::new(BLOOM_CAPACITY_DEFAULT); 42 | pub static ref BLOOM_EXPANSION: AtomicI64 = AtomicI64::new(BLOOM_EXPANSION_DEFAULT); 43 | pub static ref BLOOM_MEMORY_LIMIT_PER_OBJECT: AtomicI64 = 44 | AtomicI64::new(BLOOM_MEMORY_LIMIT_PER_OBJECT_DEFAULT); 45 | pub static ref BLOOM_USE_RANDOM_SEED: AtomicBool = AtomicBool::default(); 46 | pub static ref BLOOM_DEFRAG: AtomicBool = AtomicBool::new(BLOOM_DEFRAG_DEAFULT); 47 | pub static ref BLOOM_FP_RATE_F64: Mutex = Mutex::new( 48 | BLOOM_FP_RATE_DEFAULT 49 | .parse::() 50 | .expect("Expected valid f64 for fp rate.") 51 | ); 52 | pub static ref BLOOM_FP_RATE: ValkeyGILGuard = 53 | ValkeyGILGuard::new(ValkeyString::create(None, BLOOM_FP_RATE_DEFAULT)); 54 | pub static ref BLOOM_TIGHTENING_F64: Mutex = Mutex::new( 55 | TIGHTENING_RATIO_DEFAULT 56 | .parse::() 57 | .expect("Expected valid f64 for tightening ratio.") 58 | ); 59 | pub static ref BLOOM_TIGHTENING_RATIO: ValkeyGILGuard = 60 | ValkeyGILGuard::new(ValkeyString::create(None, TIGHTENING_RATIO_DEFAULT)); 61 | } 62 | 63 | /// Constants 64 | // Max number of filters allowed within a bloom object. 65 | pub const BLOOM_NUM_FILTERS_PER_OBJECT_LIMIT_MAX: i32 = i32::MAX; 66 | /// Below constants are fixed seed and sip keys to help create bloom objects using the same seed and to restore the bloom objects with the same hasher which 67 | /// generated using rust crate bloomfilter https://crates.io/crates/bloomfilter 68 | pub const FIXED_SEED: [u8; 32] = [ 69 | 89, 15, 245, 34, 234, 120, 17, 218, 167, 20, 216, 9, 59, 62, 123, 217, 29, 137, 138, 115, 62, 70 | 152, 136, 135, 48, 127, 151, 205, 40, 7, 51, 131, 71 | ]; 72 | /// Minimal Valkey version that supports Bloom Module 73 | pub const BLOOM_MIN_SUPPORTED_VERSION: &[i64; 3] = &[8, 0, 0]; 74 | 75 | /// This is a config set handler for the False Positive Rate and Tightening Ratio configs. 76 | pub fn on_string_config_set( 77 | config_ctx: &ConfigurationContext, 78 | name: &str, 79 | val: &'static ValkeyGILGuard, 80 | ) -> Result<(), ValkeyError> { 81 | let v = val.get(config_ctx); 82 | let value_str = v.to_string_lossy(); 83 | let value = match value_str.parse::() { 84 | Ok(v) => v, 85 | Err(_) => { 86 | return Err(ValkeyError::Str("Invalid floating-point value")); 87 | } 88 | }; 89 | match name { 90 | "bloom-fp-rate" => { 91 | if !(value > BLOOM_FP_RATE_MIN && value < BLOOM_FP_RATE_MAX) { 92 | return Err(ValkeyError::Str(utils::ERROR_RATE_RANGE)); 93 | } 94 | let mut fp_rate = BLOOM_FP_RATE_F64 95 | .lock() 96 | .expect("We expect the fp_rate static to exist."); 97 | *fp_rate = value; 98 | Ok(()) 99 | } 100 | "bloom-tightening-ratio" => { 101 | if !(value > BLOOM_TIGHTENING_RATIO_MIN && value < BLOOM_TIGHTENING_RATIO_MAX) { 102 | return Err(ValkeyError::Str(utils::TIGHTENING_RATIO_RANGE)); 103 | } 104 | let mut tightening = BLOOM_TIGHTENING_F64 105 | .lock() 106 | .expect("We expect the tightening_ratio static to exist."); 107 | *tightening = value; 108 | Ok(()) 109 | } 110 | _ => Err(ValkeyError::Str("Unknown configuration parameter")), 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use metrics::bloom_info_handler; 2 | use valkey_module::{ 3 | configuration::ConfigurationFlags, valkey_module, Context, InfoContext, Status, ValkeyResult, 4 | ValkeyString, 5 | }; 6 | pub mod bloom; 7 | pub mod configs; 8 | pub mod metrics; 9 | pub mod wrapper; 10 | use crate::bloom::command_handler; 11 | use crate::bloom::data_type::BLOOM_TYPE; 12 | use crate::bloom::utils::valid_server_version; 13 | use valkey_module_macros::info_command_handler; 14 | 15 | pub const MODULE_NAME: &str = "bf"; 16 | pub const MODULE_VERSION: i32 = 999999; 17 | // The release stage is used in order to provide release status information. 18 | // In unstable branch the status is always "dev". 19 | // During release process the status will be set to rc1,rc2...rcN. 20 | // When the version is released the status will be "ga". 21 | pub const MODULE_RELEASE_STAGE: &str = "dev"; 22 | 23 | fn initialize(ctx: &Context, _args: &[ValkeyString]) -> Status { 24 | let ver = ctx 25 | .get_server_version() 26 | .expect("Unable to get server version!"); 27 | if !valid_server_version(ver) { 28 | ctx.log_warning( 29 | format!( 30 | "The minimum supported Valkey server version for the valkey-bloom module is {:?}", 31 | configs::BLOOM_MIN_SUPPORTED_VERSION 32 | ) 33 | .as_str(), 34 | ); 35 | Status::Err 36 | } else { 37 | Status::Ok 38 | } 39 | } 40 | 41 | fn deinitialize(_ctx: &Context) -> Status { 42 | Status::Ok 43 | } 44 | 45 | /// Command handler for BF.EXISTS 46 | fn bloom_exists_command(ctx: &Context, args: Vec) -> ValkeyResult { 47 | command_handler::bloom_filter_exists(ctx, &args, false) 48 | } 49 | 50 | /// Command handler for BF.MEXISTS [ ...] 51 | fn bloom_mexists_command(ctx: &Context, args: Vec) -> ValkeyResult { 52 | command_handler::bloom_filter_exists(ctx, &args, true) 53 | } 54 | 55 | /// Command handler for BF.ADD 56 | fn bloom_add_command(ctx: &Context, args: Vec) -> ValkeyResult { 57 | command_handler::bloom_filter_add_value(ctx, &args, false) 58 | } 59 | 60 | /// Command handler for BF.MADD [ ...] 61 | fn bloom_madd_command(ctx: &Context, args: Vec) -> ValkeyResult { 62 | command_handler::bloom_filter_add_value(ctx, &args, true) 63 | } 64 | 65 | /// Command handler for BF.CARD 66 | fn bloom_card_command(ctx: &Context, args: Vec) -> ValkeyResult { 67 | command_handler::bloom_filter_card(ctx, &args) 68 | } 69 | 70 | /// Command handler for BF.RESERVE [EXPANSION ] | [NONSCALING] 71 | fn bloom_reserve_command(ctx: &Context, args: Vec) -> ValkeyResult { 72 | command_handler::bloom_filter_reserve(ctx, &args) 73 | } 74 | 75 | /// Command handler for BF.INFO [CAPACITY | SIZE | FILTERS | ITEMS | EXPANSION | ERROR | MAXSCALEDCAPACITY] 76 | fn bloom_info_command(ctx: &Context, args: Vec) -> ValkeyResult { 77 | command_handler::bloom_filter_info(ctx, &args) 78 | } 79 | 80 | /// Command handler for: 81 | /// BF.INSERT [ERROR ] [CAPACITY ] [EXPANSION ] [NOCREATE] [NONSCALING] [VALIDATESCALETO ] ITEMS [ ...] 82 | fn bloom_insert_command(ctx: &Context, args: Vec) -> ValkeyResult { 83 | command_handler::bloom_filter_insert(ctx, &args) 84 | } 85 | 86 | /// Command handler for: 87 | /// BF.LOAD data 88 | fn bloom_load_command(ctx: &Context, args: Vec) -> ValkeyResult { 89 | command_handler::bloom_filter_load(ctx, &args) 90 | } 91 | 92 | /// 93 | /// Module Info 94 | /// 95 | #[info_command_handler] 96 | fn info_handler(ctx: &InfoContext, _for_crash_report: bool) -> ValkeyResult<()> { 97 | bloom_info_handler(ctx) 98 | } 99 | 100 | ////////////////////////////////////////////////////// 101 | 102 | valkey_module! { 103 | name: MODULE_NAME, 104 | version: MODULE_VERSION, 105 | allocator: (valkey_module::alloc::ValkeyAlloc, valkey_module::alloc::ValkeyAlloc), 106 | data_types: [ 107 | BLOOM_TYPE, 108 | ], 109 | init: initialize, 110 | deinit: deinitialize, 111 | acl_categories: [ 112 | "bloom", 113 | ] 114 | commands: [ 115 | ["BF.ADD", bloom_add_command, "write fast deny-oom", 1, 1, 1, "fast write bloom"], 116 | ["BF.MADD", bloom_madd_command, "write fast deny-oom", 1, 1, 1, "fast write bloom"], 117 | ["BF.EXISTS", bloom_exists_command, "readonly fast", 1, 1, 1, "fast read bloom"], 118 | ["BF.MEXISTS", bloom_mexists_command, "readonly fast", 1, 1, 1, "fast read bloom"], 119 | ["BF.CARD", bloom_card_command, "readonly fast", 1, 1, 1, "fast read bloom"], 120 | ["BF.RESERVE", bloom_reserve_command, "write fast deny-oom", 1, 1, 1, "fast write bloom"], 121 | ["BF.INFO", bloom_info_command, "readonly fast", 1, 1, 1, "fast read bloom"], 122 | ["BF.INSERT", bloom_insert_command, "write fast deny-oom", 1, 1, 1, "fast write bloom"], 123 | ["BF.LOAD", bloom_load_command, "write deny-oom", 1, 1, 1, "write bloom"] 124 | ], 125 | configurations: [ 126 | i64: [ 127 | ["bloom-capacity", &*configs::BLOOM_CAPACITY, configs::BLOOM_CAPACITY_DEFAULT, configs::BLOOM_CAPACITY_MIN, configs::BLOOM_CAPACITY_MAX, ConfigurationFlags::DEFAULT, None], 128 | ["bloom-expansion", &*configs::BLOOM_EXPANSION, configs::BLOOM_EXPANSION_DEFAULT, 0, configs::BLOOM_EXPANSION_MAX as i64, ConfigurationFlags::DEFAULT, None], 129 | ["bloom-memory-usage-limit", &*configs::BLOOM_MEMORY_LIMIT_PER_OBJECT, configs::BLOOM_MEMORY_LIMIT_PER_OBJECT_DEFAULT, configs::BLOOM_MEMORY_LIMIT_PER_OBJECT_MIN, configs::BLOOM_MEMORY_LIMIT_PER_OBJECT_MAX, ConfigurationFlags::DEFAULT, None], 130 | ], 131 | string: [ 132 | ["bloom-fp-rate", &*configs::BLOOM_FP_RATE, configs::BLOOM_FP_RATE_DEFAULT, ConfigurationFlags::DEFAULT, None, Some(Box::new(configs::on_string_config_set))], 133 | ["bloom-tightening-ratio", &*configs::BLOOM_TIGHTENING_RATIO, configs::TIGHTENING_RATIO_DEFAULT, ConfigurationFlags::DEFAULT, None, Some(Box::new(configs::on_string_config_set))], 134 | ], 135 | bool: [ 136 | ["bloom-use-random-seed", &*configs::BLOOM_USE_RANDOM_SEED, configs::BLOOM_USE_RANDOM_SEED_DEFAULT, ConfigurationFlags::DEFAULT, None], 137 | ["bloom-defrag-enabled", &*configs::BLOOM_DEFRAG, configs::BLOOM_DEFRAG_DEAFULT, ConfigurationFlags::DEFAULT, None], 138 | ], 139 | enum: [ 140 | ], 141 | module_args_as_configuration: true, 142 | ] 143 | } 144 | -------------------------------------------------------------------------------- /src/metrics.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; 3 | use valkey_module::{InfoContext, ValkeyResult}; 4 | 5 | lazy_static! { 6 | pub static ref BLOOM_NUM_OBJECTS: AtomicU64 = AtomicU64::new(0); 7 | pub static ref BLOOM_OBJECT_TOTAL_MEMORY_BYTES: AtomicUsize = AtomicUsize::new(0); 8 | pub static ref BLOOM_NUM_FILTERS_ACROSS_OBJECTS: AtomicU64 = AtomicU64::new(0); 9 | pub static ref BLOOM_NUM_ITEMS_ACROSS_OBJECTS: AtomicU64 = AtomicU64::new(0); 10 | pub static ref BLOOM_CAPACITY_ACROSS_OBJECTS: AtomicU64 = AtomicU64::new(0); 11 | pub static ref BLOOM_DEFRAG_HITS: AtomicU64 = AtomicU64::new(0); 12 | pub static ref BLOOM_DEFRAG_MISSES: AtomicU64 = AtomicU64::new(0); 13 | } 14 | 15 | pub fn bloom_info_handler(ctx: &InfoContext) -> ValkeyResult<()> { 16 | ctx.builder() 17 | .add_section("bloom_core_metrics") 18 | .field( 19 | "bloom_total_memory_bytes", 20 | BLOOM_OBJECT_TOTAL_MEMORY_BYTES 21 | .load(Ordering::Relaxed) 22 | .to_string(), 23 | )? 24 | .field( 25 | "bloom_num_objects", 26 | BLOOM_NUM_OBJECTS.load(Ordering::Relaxed).to_string(), 27 | )? 28 | .field( 29 | "bloom_num_filters_across_objects", 30 | BLOOM_NUM_FILTERS_ACROSS_OBJECTS 31 | .load(Ordering::Relaxed) 32 | .to_string(), 33 | )? 34 | .field( 35 | "bloom_num_items_across_objects", 36 | BLOOM_NUM_ITEMS_ACROSS_OBJECTS 37 | .load(Ordering::Relaxed) 38 | .to_string(), 39 | )? 40 | .field( 41 | "bloom_capacity_across_objects", 42 | BLOOM_CAPACITY_ACROSS_OBJECTS 43 | .load(Ordering::Relaxed) 44 | .to_string(), 45 | )? 46 | .build_section()? 47 | .add_section("bloom_defrag_metrics") 48 | .field( 49 | "bloom_defrag_hits", 50 | BLOOM_DEFRAG_HITS.load(Ordering::Relaxed).to_string(), 51 | )? 52 | .field( 53 | "bloom_defrag_misses", 54 | BLOOM_DEFRAG_MISSES.load(Ordering::Relaxed).to_string(), 55 | )? 56 | .build_section()? 57 | .build_info()?; 58 | 59 | Ok(()) 60 | } 61 | -------------------------------------------------------------------------------- /src/wrapper/bloom_callback.rs: -------------------------------------------------------------------------------- 1 | use crate::bloom; 2 | use crate::bloom::data_type::ValkeyDataType; 3 | use crate::bloom::utils::BloomFilter; 4 | use crate::bloom::utils::BloomObject; 5 | use crate::configs; 6 | use crate::metrics; 7 | use bloomfilter::Bloom; 8 | use lazy_static::lazy_static; 9 | use std::ffi::CString; 10 | use std::mem; 11 | use std::os::raw::{c_char, c_int, c_void}; 12 | use std::ptr::null_mut; 13 | use std::sync::atomic::Ordering; 14 | use std::sync::Mutex; 15 | use valkey_module::defrag::Defrag; 16 | use valkey_module::digest::Digest; 17 | use valkey_module::logging; 18 | use valkey_module::logging::{log_io_error, ValkeyLogLevel}; 19 | use valkey_module::raw; 20 | use valkey_module::{RedisModuleDefragCtx, RedisModuleString}; 21 | 22 | // Note: methods in this mod are for the bloom module data type callbacks. 23 | // The reason they are unsafe is because the callback methods are expected to be 24 | // "unsafe extern C" based on the Rust module API definition 25 | 26 | /// # Safety 27 | pub unsafe extern "C" fn bloom_rdb_save(rdb: *mut raw::RedisModuleIO, value: *mut c_void) { 28 | let v = &*value.cast::(); 29 | raw::save_unsigned(rdb, v.num_filters() as u64); 30 | raw::save_unsigned(rdb, v.expansion() as u64); 31 | raw::save_double(rdb, v.fp_rate()); 32 | raw::save_double(rdb, v.tightening_ratio()); 33 | let is_seed_random = if v.is_seed_random() { 1 } else { 0 }; 34 | raw::save_unsigned(rdb, is_seed_random); 35 | let filter_list = v.filters(); 36 | let mut filter_list_iter = filter_list.iter().peekable(); 37 | while let Some(filter) = filter_list_iter.next() { 38 | raw::save_unsigned(rdb, filter.capacity() as u64); 39 | if filter_list_iter.peek().is_none() { 40 | raw::save_unsigned(rdb, filter.num_items() as u64); 41 | } 42 | let bloom = filter.raw_bloom(); 43 | let bitmap = bloom.as_slice(); 44 | raw::RedisModule_SaveStringBuffer.unwrap()( 45 | rdb, 46 | bitmap.as_ptr().cast::(), 47 | bitmap.len(), 48 | ); 49 | } 50 | } 51 | 52 | /// # Safety 53 | pub unsafe extern "C" fn bloom_rdb_load( 54 | rdb: *mut raw::RedisModuleIO, 55 | encver: c_int, 56 | ) -> *mut c_void { 57 | if let Some(item) = ::load_from_rdb(rdb, encver) { 58 | let bb = Box::new(item); 59 | Box::into_raw(bb).cast::() 60 | } else { 61 | logging::log_warning("Failed to restore bloom object."); 62 | null_mut() 63 | } 64 | } 65 | 66 | /// # Safety 67 | pub unsafe extern "C" fn bloom_aof_rewrite( 68 | aof: *mut raw::RedisModuleIO, 69 | key: *mut raw::RedisModuleString, 70 | value: *mut c_void, 71 | ) { 72 | let filter = &*value.cast::(); 73 | let hex = match filter.encode_object() { 74 | Ok(val) => val, 75 | Err(err) => { 76 | log_io_error(aof, ValkeyLogLevel::Warning, err.as_str()); 77 | return; 78 | } 79 | }; 80 | let cmd = CString::new("BF.LOAD").unwrap(); 81 | let fmt = CString::new("sb").unwrap(); 82 | valkey_module::raw::RedisModule_EmitAOF.unwrap()( 83 | aof, 84 | cmd.as_ptr(), 85 | fmt.as_ptr(), 86 | key, 87 | hex.as_ptr().cast::(), 88 | hex.len(), 89 | ); 90 | } 91 | 92 | /// # Safety 93 | /// Load auxiliary data from RDB 94 | pub unsafe extern "C" fn bloom_aux_load( 95 | rdb: *mut raw::RedisModuleIO, 96 | _encver: c_int, 97 | _when: c_int, 98 | ) -> c_int { 99 | bloom::data_type::bloom_rdb_aux_load(rdb) 100 | } 101 | 102 | /// # Safety 103 | /// Free a bloom object 104 | pub unsafe extern "C" fn bloom_free(value: *mut c_void) { 105 | drop(Box::from_raw(value.cast::())); 106 | } 107 | 108 | /// # Safety 109 | /// Compute the memory usage for a bloom object. 110 | pub unsafe extern "C" fn bloom_mem_usage(value: *const c_void) -> usize { 111 | let item = &*value.cast::(); 112 | item.memory_usage() 113 | } 114 | 115 | /// # Safety 116 | /// Raw handler for the COPY command. 117 | pub unsafe extern "C" fn bloom_copy( 118 | _from_key: *mut RedisModuleString, 119 | _to_key: *mut RedisModuleString, 120 | value: *const c_void, 121 | ) -> *mut c_void { 122 | let curr_item = &*value.cast::(); 123 | let new_item = BloomObject::create_copy_from(curr_item); 124 | let bb = Box::new(new_item); 125 | Box::into_raw(bb).cast::() 126 | } 127 | 128 | /// # Safety 129 | /// Raw handler for the Bloom digest callback. 130 | pub unsafe extern "C" fn bloom_digest(md: *mut raw::RedisModuleDigest, value: *mut c_void) { 131 | let dig = Digest::new(md); 132 | let val = &*(value.cast::()); 133 | val.debug_digest(dig); 134 | } 135 | 136 | /// # Safety 137 | /// Raw handler for the Bloom object's free_effort callback. 138 | pub unsafe extern "C" fn bloom_free_effort( 139 | _from_key: *mut RedisModuleString, 140 | value: *const c_void, 141 | ) -> usize { 142 | let curr_item = &*value.cast::(); 143 | curr_item.free_effort() 144 | } 145 | 146 | // Lazy static for a default temporary external crate Bloom structure that gets swapped during defrag. 147 | lazy_static! { 148 | static ref DEFRAG_BLOOM_FILTER: Mutex>>> = 149 | Mutex::new(Some(Box::new(Bloom::<[u8]>::new(1, 1).unwrap()))); 150 | } 151 | 152 | /// Defragments a vector of bytes (bit vector) of the external crate Bloom structure. This function is designed to be 153 | /// used as a callback. 154 | /// 155 | /// This function takes ownership of a `Vec`, attempts to defragment it using an external 156 | /// defragmentation mechanism, and returns a new `Vec` that may have been defragmented. 157 | /// 158 | /// # Arguments 159 | /// 160 | /// * `vec` - A `Vec` to be defragmented. 161 | /// 162 | /// # Returns 163 | /// 164 | /// Returns a new `Vec` that may have been defragmented. If defragmentation was successful, 165 | /// the returned vector will use the newly allocated memory. If defragmentation failed or was 166 | /// not necessary, the original vector's memory will be used. 167 | fn external_vec_defrag(vec: Vec) -> Vec { 168 | let defrag = Defrag::new(core::ptr::null_mut()); 169 | let len = vec.len(); 170 | let capacity = vec.capacity(); 171 | let vec_ptr = Box::into_raw(vec.into_boxed_slice()) as *mut c_void; 172 | let defragged_filters_ptr = unsafe { defrag.alloc(vec_ptr) }; 173 | if !defragged_filters_ptr.is_null() { 174 | metrics::BLOOM_DEFRAG_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 175 | unsafe { Vec::from_raw_parts(defragged_filters_ptr as *mut u8, len, capacity) } 176 | } else { 177 | metrics::BLOOM_DEFRAG_MISSES.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 178 | unsafe { Vec::from_raw_parts(vec_ptr as *mut u8, len, capacity) } 179 | } 180 | } 181 | 182 | /// # Safety 183 | /// Raw handler for the Bloom object's defrag callback. 184 | /// 185 | /// We will be defragging every allocation of the Bloom data type. We will explain them top down, then afterwards state the order in which 186 | /// we will defrag. Starting from the top, which is passed in as the variable named `value`, we have the BloomObject. This BloomObject 187 | /// contains a vec of BloomFilter structs. Each BloomFilter contains a Bloom structure implemented in an external Rust crate. 188 | /// Finally, each of these external Bloom structures contains a Vec (bit vector). 189 | /// 190 | /// The order of defragmention is as follows (1 to 3 is in a loop for the number of filters): 191 | /// 1. BloomFilter structures within the top level BloomObject structure 192 | /// 2. External Bloom structures within each BloomFilter 193 | /// 3. Vec (Bit vector) within each external Bloom structure 194 | /// 4. Vec of the BloomFilter/s in the BloomObject 195 | /// 5. The BloomObject itself 196 | /// 197 | /// We use a cursor to track the current filter of BloomObject that we are defragging. This cursor will start at 0 198 | /// if we finished all the filters the last time we defragged this object or if we havent defragged it before. We will determine 199 | /// that we have spent too much time on defragging this specific object from the should_stop_defrag() method. If we didn't defrag 200 | /// all the filters, then we set the cursor so we know where to start from the next time we defrag and return 1 to show we didn't 201 | /// finish. 202 | /// 203 | /// # Arguments 204 | /// 205 | /// * `defrag_ctx` - A raw pointer to the defragmentation context. 206 | /// * `_from_key` - A raw pointer to the Redis module string (unused in this function). 207 | /// * `value` - A mutable raw pointer to a raw pointer representing the BloomObject to be defragmented. 208 | /// 209 | /// # Returns 210 | /// 211 | /// Returns an `i32` where: 212 | /// * 0 indicates successful complete defragmentation. 213 | /// * 1 indicates incomplete defragmentation (not all filters were defragged). 214 | pub unsafe extern "C" fn bloom_defrag( 215 | defrag_ctx: *mut RedisModuleDefragCtx, 216 | _from_key: *mut RedisModuleString, 217 | value: *mut *mut c_void, 218 | ) -> i32 { 219 | // If defrag is disabled we will just exit straight away 220 | if !configs::BLOOM_DEFRAG.load(Ordering::Relaxed) { 221 | return 0; 222 | } 223 | 224 | // Get the cursor for the BloomObject otherwise start the cursor at 0 225 | let defrag = Defrag::new(defrag_ctx); 226 | let mut cursor = defrag.get_cursor().unwrap_or(0); 227 | 228 | // Convert pointer to BloomObject so we can operate on it. 229 | let bloom_object: &mut BloomObject = &mut *(*value).cast::(); 230 | 231 | let num_filters = bloom_object.num_filters(); 232 | let filters_capacity = bloom_object.filters().capacity(); 233 | 234 | // While we are within a timeframe decided from should_stop_defrag and not over the number of filters defrag the next filter 235 | while !defrag.should_stop_defrag() && cursor < num_filters as u64 { 236 | // Remove the current BloomFilter, unbox it, and attempt to defragment the BloomFilter. 237 | let bloom_filter_box = bloom_object.filters_mut().remove(cursor as usize); 238 | let bloom_filter = Box::into_raw(bloom_filter_box); 239 | let defrag_result = defrag.alloc(bloom_filter as *mut c_void); 240 | let mut defragged_filter = { 241 | if !defrag_result.is_null() { 242 | metrics::BLOOM_DEFRAG_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 243 | Box::from_raw(defrag_result as *mut BloomFilter) 244 | } else { 245 | metrics::BLOOM_DEFRAG_MISSES.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 246 | Box::from_raw(bloom_filter) 247 | } 248 | }; 249 | // Swap the external crate Bloom structure with a temporary one during its defragmentation. 250 | let mut temporary_bloom = DEFRAG_BLOOM_FILTER 251 | .lock() 252 | .expect("We expect default to exist"); 253 | let inner_bloom = mem::replace( 254 | defragged_filter.raw_bloom_mut(), 255 | temporary_bloom.take().expect("We expect default to exist"), 256 | ); 257 | // Convert the inner_bloom into the correct type and then try to defragment it 258 | let inner_bloom_ptr = Box::into_raw(inner_bloom); 259 | let defragged_inner_bloom = defrag.alloc(inner_bloom_ptr as *mut c_void); 260 | // Defragment the Bit Vec within the external crate Bloom structure using the external callback 261 | if !defragged_inner_bloom.is_null() { 262 | metrics::BLOOM_DEFRAG_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 263 | 264 | let inner_bloom = 265 | unsafe { Box::from_raw(defragged_inner_bloom as *mut bloomfilter::Bloom<[u8]>) }; 266 | let external_bloom = 267 | inner_bloom.realloc_large_heap_allocated_objects(external_vec_defrag); 268 | let placeholder_bloom = 269 | mem::replace(defragged_filter.raw_bloom_mut(), Box::new(external_bloom)); 270 | *temporary_bloom = Some(placeholder_bloom); // Reset the original static 271 | } else { 272 | metrics::BLOOM_DEFRAG_MISSES.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 273 | 274 | let inner_bloom = unsafe { Box::from_raw(inner_bloom_ptr) }; 275 | let external_bloom = 276 | inner_bloom.realloc_large_heap_allocated_objects(external_vec_defrag); 277 | let placeholder_bloom = 278 | mem::replace(defragged_filter.raw_bloom_mut(), Box::new(external_bloom)); 279 | *temporary_bloom = Some(placeholder_bloom); // Reset the original static 280 | } 281 | 282 | // Reinsert the defragmented filter and increment the cursor 283 | bloom_object 284 | .filters_mut() 285 | .insert(cursor as usize, defragged_filter); 286 | cursor += 1; 287 | } 288 | // Save the cursor for where we will start defragmenting from next time 289 | defrag.set_cursor(cursor); 290 | // If not all filters were looked at, return 1 to indicate incomplete defragmentation 291 | if cursor < num_filters as u64 { 292 | return 1; 293 | } 294 | // Defragment the Vec of BloomFilter/s itself 295 | let filters_vec = mem::take(bloom_object.filters_mut()); 296 | let filters_ptr = Box::into_raw(filters_vec.into_boxed_slice()) as *mut c_void; 297 | let defragged_filters_ptr = defrag.alloc(filters_ptr); 298 | if !defragged_filters_ptr.is_null() { 299 | metrics::BLOOM_DEFRAG_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 300 | *bloom_object.filters_mut() = unsafe { 301 | Vec::from_raw_parts( 302 | defragged_filters_ptr as *mut Box, 303 | num_filters, 304 | filters_capacity, 305 | ) 306 | }; 307 | } else { 308 | metrics::BLOOM_DEFRAG_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 309 | *bloom_object.filters_mut() = unsafe { 310 | Vec::from_raw_parts( 311 | filters_ptr as *mut Box, 312 | num_filters, 313 | filters_capacity, 314 | ) 315 | }; 316 | } 317 | // Finally, attempt to defragment the BloomObject itself 318 | let val = defrag.alloc(*value); 319 | if !val.is_null() { 320 | metrics::BLOOM_DEFRAG_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 321 | *value = val; 322 | } else { 323 | metrics::BLOOM_DEFRAG_MISSES.fetch_add(1, std::sync::atomic::Ordering::Relaxed); 324 | } 325 | // Return 0 to indicate successful complete defragmentation 326 | 0 327 | } 328 | -------------------------------------------------------------------------------- /src/wrapper/mod.rs: -------------------------------------------------------------------------------- 1 | use valkey_module::Context; 2 | 3 | pub mod bloom_callback; 4 | 5 | /// Wrapper for the ValkeyModule_MustObeyClient function. 6 | /// Takes in an Context and returns true if the if commands are arriving 7 | /// from the primary client or AOF client and should never be rejected. 8 | /// False otherwise. 9 | pub fn must_obey_client(ctx: &Context) -> bool { 10 | #[cfg(not(feature = "valkey_8_0"))] 11 | { 12 | // ValkeyModule_MustObeyClient exists since Valkey 8.1, so we use this as it is a more performant check. 13 | let ctx_raw = ctx.get_raw() as *mut valkey_module::ValkeyModuleCtx; 14 | 15 | let status = unsafe { valkey_module::raw::ValkeyModule_MustObeyClient.unwrap()(ctx_raw) }; 16 | match status { 17 | 1 => true, 18 | 0 => false, 19 | _ => panic!("We do not expect ValkeyModule_MustObeyClient to return anything other than 1 or 0."), 20 | } 21 | } 22 | 23 | #[cfg(feature = "valkey_8_0")] 24 | { 25 | // On Valkey 8.0, fallback to checking for replicated flag in the GetContextFlags API as a best effort. 26 | ctx.get_flags() 27 | .contains(valkey_module::ContextFlags::REPLICATED) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import pytest 3 | import sys 4 | import os 5 | 6 | # Set the path to find and use the valkey-test-framework 7 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'build'))) 8 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'build/valkeytestframework'))) 9 | 10 | @pytest.fixture(params=['random-seed', 'fixed-seed']) 11 | def bloom_config_parameterization(request): 12 | return request.param 13 | -------------------------------------------------------------------------------- /tests/test_bloom_acl_category.py: -------------------------------------------------------------------------------- 1 | from valkeytestframework.conftest import resource_port_tracker 2 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 3 | from valkeytestframework.util.waiters import * 4 | 5 | class TestBloomACLCategory(ValkeyBloomTestCaseBase): 6 | 7 | def test_bloom_acl_category_permissions(self): 8 | # List of bloom commands and the expected returns if the command is valid 9 | bloom_commands = [ 10 | ('BF.ADD add_key item', 1), 11 | ('BF.EXISTS add_key item', 1), 12 | ('BF.CARD add_key', 1), 13 | ('BF.MADD madd_key item1 item2 item3', 3), 14 | ('BF.MEXISTS madd_key item2 item3 item4', 3), 15 | ('BF.INSERT insert_key ITEMS item', [1]), 16 | ('BF.INFO insert_key filters', 1), 17 | ('BF.RESERVE reserve_key 0.01 1000', b'OK'), 18 | ] 19 | client = self.server.get_new_client() 20 | # Get a list of all commands with the acl category bloom 21 | list_of_bloom_commands = client.execute_command("COMMAND LIST FILTERBY ACLCAT bloom") 22 | # Create users with differnt acl permissions 23 | client.execute_command("ACL SETUSER nonbloomuser1 on >bloom_pass -@bloom") 24 | client.execute_command("ACL SETUSER nonbloomuser2 on >bloom_pass -@all") 25 | client.execute_command("ACL SETUSER bloomuser1 on >bloom_pass ~* &* +@all ") 26 | client.execute_command("ACL SETUSER bloomuser2 on >bloom_pass ~* &* -@all +@bloom ") 27 | client.execute_command("ACL SETUSER bloomuser3 on >bloom_pass ~* &* -@all +@write +@read ") 28 | client.execute_command("ACL SETUSER bloomuser4 on >bloom_pass ~* &* -@all +@write +@bloom") 29 | # Switch to the users with no bloom command access and check error occurs as expected 30 | for i in range(1, 3): 31 | client.execute_command(f"AUTH nonbloomuser{i} bloom_pass") 32 | for cmd in bloom_commands: 33 | self.verify_invalid_user_permissions(client, cmd, list_of_bloom_commands) 34 | # Switch to the users with bloom command access and check commands are run as expected 35 | for i in range(1, 5): 36 | client.execute_command(f"AUTH bloomuser{i} bloom_pass") 37 | for cmd in bloom_commands: 38 | self.verify_valid_user_permissions(client, cmd) 39 | self.client.execute_command('FLUSHDB') 40 | wait_for_equal(lambda: self.client.execute_command('DBSIZE'), 0) 41 | 42 | def verify_valid_user_permissions(self, client, cmd): 43 | cmd_name = cmd[0].split()[0] 44 | try: 45 | result = client.execute_command(cmd[0]) 46 | if cmd[0].startswith("BF.M"): 47 | assert len(result) == cmd[1] 48 | # The first add in a new bloom object should always return 1. For MEXISTS the first item we check will have been added as well so should exist 49 | assert result[0] == 1 50 | else: 51 | assert result == cmd[1], f"{cmd_name} should work for default user" 52 | except Exception as e: 53 | assert False, f"bloomuser should be able to execute {cmd_name}: {str(e)}" 54 | 55 | def verify_invalid_user_permissions(self, client, cmd, list_of_bloom_commands): 56 | cmd_name = cmd[0].split()[0] 57 | # Check that each command we try to run appeared in the list of commands with the bloom acl category 58 | assert cmd_name.encode() in list_of_bloom_commands 59 | try: 60 | result = client.execute_command(cmd[0]) 61 | assert False, f"User with no bloom category access shouldnt be able to run {cmd_name}" 62 | except Exception as e: 63 | assert f"has no permissions to run the '{cmd_name}' command" in str(e) 64 | 65 | def test_bloom_command_acl_categories(self): 66 | # List of bloom commands and their acl categories 67 | bloom_commands = [ 68 | ('BF.ADD', [b'write' , b'denyoom', b'module', b'fast'], [b'@write', b'@fast', b'@bloom']), 69 | ('BF.EXISTS', [b'readonly', b'module', b'fast'], [b'@read', b'@fast', b'@bloom']), 70 | ('BF.MADD', [b'write', b'denyoom', b'module', b'fast'], [b'@write', b'@fast', b'@bloom']), 71 | ('BF.MEXISTS', [b'readonly', b'module', b'fast'], [b'@read', b'@fast', b'@bloom']), 72 | ('BF.INSERT', [b'write', b'denyoom', b'module', b'fast'], [b'@write', b'@fast', b'@bloom']), 73 | ('BF.INFO', [b'readonly', b'module', b'fast'], [b'@read', b'@fast', b'@bloom']), 74 | ('BF.CARD', [b'readonly', b'module', b'fast'], [b'@read', b'@fast', b'@bloom']), 75 | ('BF.RESERVE', [b'write', b'denyoom', b'module', b'fast'], [b'@write', b'@fast', b'@bloom']), 76 | ('BF.LOAD', [b'write', b'denyoom', b'module'], [b'@write', b'@bloom']), 77 | ] 78 | for cmd in bloom_commands: 79 | # Get the info of the commands and compare the acl categories 80 | cmd_info = self.client.execute_command(f'COMMAND INFO {cmd[0]}') 81 | assert cmd_info[0][2] == cmd[1] 82 | for category in cmd[2]: 83 | assert category in cmd_info[0][6] 84 | -------------------------------------------------------------------------------- /tests/test_bloom_aofrewrite.py: -------------------------------------------------------------------------------- 1 | from valkeytestframework.util.waiters import * 2 | from valkeytestframework.valkey_test_case import ValkeyAction 3 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 4 | from valkeytestframework.conftest import resource_port_tracker 5 | 6 | class TestBloomAofRewrite(ValkeyBloomTestCaseBase): 7 | 8 | def test_basic_aofrewrite_and_restore(self): 9 | client = self.server.get_new_client() 10 | bf_add_result_1 = client.execute_command('BF.ADD testSave item') 11 | assert bf_add_result_1 == 1 12 | bf_exists_result_1 = client.execute_command('BF.EXISTS testSave item') 13 | assert bf_exists_result_1 == 1 14 | bf_info_result_1 = client.execute_command('BF.INFO testSave') 15 | assert(len(bf_info_result_1)) != 0 16 | curr_item_count_1 = self.server.num_keys() 17 | 18 | # cmd debug digest 19 | server_digest = client.execute_command('DEBUG', 'DIGEST') 20 | assert server_digest != None or 0000000000000000000000000000000000000000 21 | object_digest = client.execute_command('DEBUG DIGEST-VALUE testSave') 22 | 23 | # save aof, restart sever 24 | client.bgrewriteaof() 25 | self.server.wait_for_action_done(ValkeyAction.AOF_REWRITE) 26 | # Keep the server running for 1 second more to have a larger uptime. 27 | time.sleep(1) 28 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) 29 | assert self.server.is_alive() 30 | restored_server_digest = client.execute_command('DEBUG', 'DIGEST') 31 | restored_object_digest = client.execute_command('DEBUG DIGEST-VALUE testSave') 32 | assert restored_server_digest == server_digest 33 | assert restored_object_digest == object_digest 34 | 35 | # verify restore results 36 | curr_item_count_2 = self.server.num_keys() 37 | assert curr_item_count_2 == curr_item_count_1 38 | bf_exists_result_2 = client.execute_command('BF.EXISTS testSave item') 39 | assert bf_exists_result_2 == 1 40 | bf_info_result_2 = client.execute_command('BF.INFO testSave') 41 | assert bf_info_result_2 == bf_info_result_1 42 | client.execute_command('DEL testSave') 43 | 44 | def test_aofrewrite_bloomfilter_metrics(self): 45 | # Create scaled bloom filter and add 7500 items to trigger a scale out. 46 | self.client.execute_command('BF.RESERVE key1 0.001 7000') 47 | info_obj = self.client.execute_command('BF.INFO key1') 48 | self.add_items_till_capacity(self.client, "key1", 7500, 1, "item_prefix") 49 | 50 | # cmd debug digest 51 | server_digest = self.client.execute_command('DEBUG', 'DIGEST') 52 | assert server_digest != None or 0000000000000000000000000000000000000000 53 | object_digest = self.client.execute_command('DEBUG DIGEST-VALUE key1') 54 | 55 | # save aof, restart server 56 | self.client.bgrewriteaof() 57 | self.server.wait_for_action_done(ValkeyAction.AOF_REWRITE) 58 | # restart server 59 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) 60 | assert self.server.is_alive() 61 | restored_server_digest = self.client.execute_command('DEBUG', 'DIGEST') 62 | restored_object_digest = self.client.execute_command('DEBUG DIGEST-VALUE key1') 63 | assert restored_server_digest == server_digest 64 | assert restored_object_digest == object_digest 65 | 66 | # Check info for scaled bloomfilter matches metrics data for bloomfilter 67 | new_info_obj = self.client.execute_command(f'BF.INFO key1') 68 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), new_info_obj[3], 1, 2, 7500, 21000) 69 | 70 | # Check bloomfilter size has increased 71 | assert new_info_obj[3] > info_obj[3] 72 | 73 | # Delete the scaled bloomfilter to check both filters are deleted and metrics stats are set accordingly 74 | self.client.execute_command('DEL key1') 75 | wait_for_equal(lambda: self.client.execute_command('BF.EXISTS key1 item_prefix1'), 0) 76 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0) 77 | -------------------------------------------------------------------------------- /tests/test_bloom_basic.py: -------------------------------------------------------------------------------- 1 | import time 2 | from valkeytestframework.util.waiters import * 3 | from valkey import ResponseError 4 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 5 | from valkeytestframework.conftest import resource_port_tracker 6 | 7 | class TestBloomBasic(ValkeyBloomTestCaseBase): 8 | 9 | def test_basic(self): 10 | client = self.server.get_new_client() 11 | # Validate that the valkey-bloom module is loaded. 12 | module_list_data = client.execute_command('MODULE LIST') 13 | module_list_count = len(module_list_data) 14 | assert module_list_count == 1 15 | module_loaded = False 16 | for module in module_list_data: 17 | if (module[b'name'] == b'bf'): 18 | module_loaded = True 19 | break 20 | assert(module_loaded) 21 | # Validate that all the BF.* commands are supported on the server. 22 | command_cmd_result = client.execute_command('COMMAND') 23 | bf_cmds = ["BF.ADD", "BF.EXISTS", "BF.MADD", "BF.MEXISTS", "BF.INFO", "BF.CARD", "BF.RESERVE", "BF.INSERT"] 24 | assert all(item in command_cmd_result for item in bf_cmds) 25 | # Basic bloom filter create, item add and item exists validation. 26 | bf_add_result = client.execute_command('BF.ADD filter1 item1') 27 | assert bf_add_result == 1 28 | bf_exists_result = client.execute_command('BF.EXISTS filter1 item1') 29 | assert bf_exists_result == 1 30 | bf_exists_result = client.execute_command('BF.EXISTS filter1 item2') 31 | assert bf_exists_result == 0 or bf_exists_result == 1 32 | 33 | def test_copy_and_exists_cmd(self): 34 | client = self.server.get_new_client() 35 | madd_result = client.execute_command('BF.MADD filter item1 item2 item3 item4') 36 | assert client.execute_command('EXISTS filter') == 1 37 | mexists_result = client.execute_command('BF.MEXISTS filter item1 item2 item3 item4') 38 | assert len(madd_result) == 4 and len(mexists_result) == 4 39 | # cmd debug digest 40 | server_digest = client.execute_command('DEBUG', 'DIGEST') 41 | assert server_digest != None or 0000000000000000000000000000000000000000 42 | object_digest = client.execute_command('DEBUG DIGEST-VALUE filter') 43 | assert client.execute_command('COPY filter new_filter') == 1 44 | copied_server_digest = client.execute_command('DEBUG', 'DIGEST') 45 | assert copied_server_digest != None or 0000000000000000000000000000000000000000 46 | copied_object_digest = client.execute_command('DEBUG DIGEST-VALUE filter') 47 | assert client.execute_command('EXISTS new_filter') == 1 48 | copy_mexists_result = client.execute_command('BF.MEXISTS new_filter item1 item2 item3 item4') 49 | assert mexists_result == copy_mexists_result 50 | assert server_digest != copied_server_digest 51 | assert copied_object_digest == object_digest 52 | 53 | def test_memory_usage_cmd(self): 54 | client = self.server.get_new_client() 55 | assert client.execute_command('BF.ADD filter item1') == 1 56 | memory_usage = client.execute_command('MEMORY USAGE filter') 57 | info_size = client.execute_command('BF.INFO filter SIZE') 58 | assert memory_usage >= info_size and info_size > 0 59 | 60 | def test_too_large_bloom_obj(self): 61 | client = self.server.get_new_client() 62 | # Set the max allowed size per bloom filter per bloom object 63 | assert client.execute_command('CONFIG SET bf.bloom-memory-usage-limit 100') == b'OK' 64 | obj_exceeds_size_err = "operation exceeds bloom object memory limit" 65 | # Non Scaling 66 | # Validate that when a cmd would have resulted in a bloom object creation with the starting filter with size 67 | # greater than allowed limit, the cmd is rejected. 68 | cmds = [ 69 | 'BF.RESERVE filter 0.001 100000', 70 | 'BF.INSERT filter error 0.00001 capacity 10000 items item1', 71 | 'BF.ADD filter item1', 72 | 'BF.MADD filter item1 item2', 73 | ] 74 | for cmd in cmds: 75 | self.verify_error_response(self.client, cmd, obj_exceeds_size_err) 76 | # Scaling 77 | # Validate that when scaling would have resulted in a filter with size greater than allowed limit, the cmd 78 | # is rejected. 79 | assert client.execute_command('CONFIG SET bf.bloom-memory-usage-limit 1000') == b'OK' 80 | cmds = [ 81 | 'BF.INSERT filter ITEMS', 82 | 'BF.ADD filter', 83 | 'BF.MADD filter', 84 | ] 85 | # Fill a filter to capacity. 86 | assert client.execute_command('BF.RESERVE filter 0.001 100 EXPANSION 10') == b'OK' 87 | error_count, add_operation_idx = self.add_items_till_capacity(client, "filter", 100, 1, "item_prefix") 88 | assert client.execute_command('BF.INFO filter CAPACITY') == 100 89 | assert client.execute_command('BF.INFO filter ITEMS') == 100 90 | assert client.execute_command('BF.INFO filter SIZE') > 400 91 | assert client.execute_command('BF.INFO filter FILTERS') == 1 92 | assert client.execute_command('BF.INFO filter EXPANSION') == 10 93 | # Validate that scale out is rejected with appropriate error. 94 | new_item_idx = 0 95 | for cmd in cmds: 96 | response = "" 97 | while obj_exceeds_size_err not in response: 98 | item = f"new_item{new_item_idx}" 99 | new_item_idx += 1 100 | if "BF.ADD" in cmd: 101 | response = self.verify_error_response(self.client,f"{cmd} {item}", obj_exceeds_size_err) 102 | else: 103 | response = str(client.execute_command(f"{cmd} {item}")) 104 | if "1" in response: 105 | assert False, f"{cmd} returned a value of 1 when it should have thrown an {obj_exceeds_size_err}" 106 | new_item_idx -= 1 107 | 108 | def test_large_allocation_when_below_maxmemory(self): 109 | two_megabytes = 2 * 1024 * 1024 110 | # The command below will result in an allocation greater than 2 MB. 111 | bloom_cmd_large_allocation = 'BF.RESERVE newfilter 0.001 10000000' 112 | client = self.server.get_new_client() 113 | assert client.execute_command("CONFIG SET maxmemory-policy allkeys-lru") == b"OK" 114 | assert client.execute_command("CONFIG SET maxmemory {}".format(two_megabytes)) == b"OK" 115 | used_memory = client.info()['used_memory'] 116 | maxmemory = client.info()['maxmemory'] 117 | client.execute_command('BF.ADD filter item1') 118 | new_used_memory = client.info()['used_memory'] 119 | assert new_used_memory > used_memory and new_used_memory < maxmemory 120 | assert client.execute_command(bloom_cmd_large_allocation) == b"OK" 121 | assert client.execute_command('DBSIZE') < 2 122 | assert client.info("Stats")['evicted_keys'] > 0 123 | used_memory = client.info()['used_memory'] 124 | assert used_memory < maxmemory 125 | client.execute_command('FLUSHALL') 126 | client.execute_command('BF.ADD filter item1') 127 | assert client.execute_command("CONFIG SET maxmemory-policy volatile-lru") == b"OK" 128 | assert client.execute_command(bloom_cmd_large_allocation) == b"OK" 129 | assert client.execute_command('DBSIZE') == 2 130 | used_memory = client.info()['used_memory'] 131 | assert used_memory > maxmemory 132 | 133 | def test_large_allocation_when_above_maxmemory(self): 134 | client = self.server.get_new_client() 135 | assert client.execute_command("CONFIG SET maxmemory-policy allkeys-lru") == b"OK" 136 | used_memory = client.info()['used_memory'] 137 | client.execute_command('BF.ADD filter item1') 138 | new_used_memory = client.info()['used_memory'] 139 | assert new_used_memory > used_memory 140 | # Configure the server to now be over maxmemory with allkeys-lru policy. Test that allocation fails. 141 | assert client.execute_command("CONFIG SET maxmemory {}".format(used_memory)) == b"OK" 142 | bloom_cmd_large_allocation = 'BF.RESERVE newfilter 0.001 10000000' 143 | self.verify_error_response(self.client, bloom_cmd_large_allocation, "command not allowed when used memory > 'maxmemory'.") 144 | assert client.info("Errorstats")['errorstat_OOM']['count'] == 1 145 | # Configure the server to now be over maxmemory with volatile-lru policy. Test that allocation fails. 146 | assert client.execute_command("CONFIG SET maxmemory-policy volatile-lru") == b"OK" 147 | self.verify_error_response(self.client, bloom_cmd_large_allocation, "command not allowed when used memory > 'maxmemory'.") 148 | assert client.info("Errorstats")['errorstat_OOM']['count'] == 2 149 | 150 | def test_module_data_type(self): 151 | # Validate the name of the Module data type. 152 | client = self.server.get_new_client() 153 | assert client.execute_command('BF.ADD filter item1') == 1 154 | type_result = client.execute_command('TYPE filter') 155 | assert type_result == b"bloomfltr" 156 | # Validate the name of the Module data type. 157 | encoding_result = client.execute_command('OBJECT ENCODING filter') 158 | assert encoding_result == b"raw" 159 | 160 | def test_bloom_obj_access(self): 161 | client = self.server.get_new_client() 162 | # check bloom filter with basic valkey command 163 | # cmd touch 164 | assert client.execute_command('BF.ADD key1 val1') == 1 165 | assert client.execute_command('BF.ADD key2 val2') == 1 166 | assert client.execute_command('TOUCH key1 key2') == 2 167 | assert client.execute_command('TOUCH key3') == 0 168 | self.verify_server_key_count(client, 2) 169 | assert client.execute_command('DBSIZE') == 2 170 | random_key = client.execute_command('RANDOMKEY') 171 | assert random_key == b"key1" or random_key == b"key2" 172 | 173 | def test_bloom_transaction(self): 174 | client = self.server.get_new_client() 175 | # cmd multi, exec 176 | assert client.execute_command('MULTI') == b'OK' 177 | assert client.execute_command('BF.ADD M1 V1') == b'QUEUED' 178 | assert client.execute_command('BF.ADD M2 V2') == b'QUEUED' 179 | assert client.execute_command('BF.EXISTS M1 V1') == b'QUEUED' 180 | assert client.execute_command('DEL M1') == b'QUEUED' 181 | assert client.execute_command('BF.EXISTS M1 V1') == b'QUEUED' 182 | assert client.execute_command('EXEC') == [1, 1, 1, 1, 0] 183 | self.verify_bloom_filter_item_existence(client, 'M2', 'V2') 184 | self.verify_bloom_filter_item_existence(client, 'M1', 'V1', should_exist=False) 185 | self.verify_server_key_count(client, 1) 186 | 187 | def test_bloom_lua(self): 188 | client = self.server.get_new_client() 189 | # lua 190 | load_filter = """ 191 | redis.call('BF.ADD', 'LUA1', 'ITEM1'); 192 | redis.call('BF.ADD', 'LUA2', 'ITEM2'); 193 | redis.call('BF.MADD', 'LUA2', 'ITEM3', 'ITEM4', 'ITEM5'); 194 | """ 195 | client.eval(load_filter, 0) 196 | assert client.execute_command('BF.MEXISTS LUA2 ITEM1 ITEM3 ITEM4') == [0, 1, 1] 197 | self.verify_server_key_count(client, 2) 198 | 199 | def test_bloom_deletes(self): 200 | client = self.server.get_new_client() 201 | # delete 202 | assert client.execute_command('BF.ADD filter1 item1') == 1 203 | self.verify_bloom_filter_item_existence(client, 'filter1', 'item1') 204 | self.verify_server_key_count(client, 1) 205 | assert client.execute_command('DEL filter1') == 1 206 | self.verify_bloom_filter_item_existence(client, 'filter1', 'item1', should_exist=False) 207 | self.verify_server_key_count(client, 0) 208 | 209 | # flush 210 | self.create_bloom_filters_and_add_items(client, number_of_bf=10) 211 | self.verify_server_key_count(client, 10) 212 | assert client.execute_command('FLUSHALL') 213 | self.verify_server_key_count(client, 0) 214 | 215 | # unlink 216 | assert client.execute_command('BF.ADD A ITEMA') == 1 217 | assert client.execute_command('BF.ADD B ITEMB') == 1 218 | self.verify_bloom_filter_item_existence(client, 'A', 'ITEMA') 219 | self.verify_bloom_filter_item_existence(client, 'B', 'ITEMB') 220 | self.verify_bloom_filter_item_existence(client, 'C', 'ITEMC', should_exist=False) 221 | self.verify_server_key_count(client, 2) 222 | assert client.execute_command('UNLINK A B C') == 2 223 | assert client.execute_command('BF.MEXISTS A ITEMA ITEMB') == [0, 0] 224 | self.verify_bloom_filter_item_existence(client, 'A', 'ITEMA', should_exist=False) 225 | self.verify_bloom_filter_item_existence(client, 'B', 'ITEMB', should_exist=False) 226 | self.verify_server_key_count(client, 0) 227 | 228 | def test_bloom_expiration(self): 229 | client = self.server.get_new_client() 230 | # expiration 231 | # cmd object idletime 232 | self.verify_server_key_count(client, 0) 233 | assert client.execute_command('BF.ADD TEST_IDLE val3') == 1 234 | self.verify_bloom_filter_item_existence(client, 'TEST_IDLE', 'val3') 235 | self.verify_server_key_count(client, 1) 236 | time.sleep(1) 237 | assert client.execute_command('OBJECT IDLETIME test_idle') == None 238 | assert client.execute_command('OBJECT IDLETIME TEST_IDLE') > 0 239 | # cmd ttl, expireat 240 | assert client.execute_command('BF.ADD TEST_EXP ITEM') == 1 241 | assert client.execute_command('TTL TEST_EXP') == -1 242 | self.verify_bloom_filter_item_existence(client, 'TEST_EXP', 'ITEM') 243 | self.verify_server_key_count(client, 2) 244 | curr_time = int(time.time()) 245 | assert client.execute_command(f'EXPIREAT TEST_EXP {curr_time + 5}') == 1 246 | wait_for_equal(lambda: client.execute_command('BF.EXISTS TEST_EXP ITEM'), 0) 247 | self.verify_server_key_count(client, 1) 248 | # cmd persist 249 | assert client.execute_command('BF.ADD TEST_PERSIST ITEM') == 1 250 | assert client.execute_command('TTL TEST_PERSIST') == -1 251 | self.verify_bloom_filter_item_existence(client, 'TEST_PERSIST', 'ITEM') 252 | self.verify_server_key_count(client, 2) 253 | assert client.execute_command(f'EXPIREAT TEST_PERSIST {curr_time + 100000}') == 1 254 | assert client.execute_command('TTL TEST_PERSIST') > 0 255 | assert client.execute_command('PERSIST TEST_PERSIST') == 1 256 | assert client.execute_command('TTL TEST_PERSIST') == -1 257 | 258 | def test_debug_cmd(self): 259 | client = self.server.get_new_client() 260 | default_obj = client.execute_command('BF.RESERVE default_obj 0.001 1000') 261 | default_object_digest = client.execute_command('DEBUG DIGEST-VALUE default_obj') 262 | 263 | # scenario1 validates that digest differs on bloom objects (with same properties) when different items are added. 264 | scenario1_obj = client.execute_command('BF.INSERT scenario1 error 0.001 capacity 1000 items 1') 265 | scenario1_object_digest = client.execute_command('DEBUG DIGEST-VALUE scenario1') 266 | assert scenario1_obj != default_obj 267 | assert scenario1_object_digest != default_object_digest 268 | 269 | # scenario2 validates that digest differs on bloom objects with different false positive rate. 270 | scenario2_obj = client.execute_command('BF.INSERT scenario2 error 0.002 capacity 1000 items 1') 271 | scenario2_object_digest = client.execute_command('DEBUG DIGEST-VALUE scenario2') 272 | assert scenario2_obj != default_obj 273 | assert scenario2_object_digest != default_object_digest 274 | 275 | # scenario3 validates that digest differs on bloom objects with different expansion. 276 | scenario3_obj = client.execute_command('BF.INSERT scenario3 error 0.002 capacity 1000 expansion 3 items 1') 277 | scenario3_object_digest = client.execute_command('DEBUG DIGEST-VALUE scenario3') 278 | assert scenario3_obj != default_obj 279 | assert scenario3_object_digest != default_object_digest 280 | 281 | 282 | # scenario4 validates that digest differs on bloom objects with different capacity. 283 | scenario4_obj = client.execute_command('BF.INSERT scenario4 error 0.001 capacity 2000 items 1') 284 | scenario4_object_digest = client.execute_command('DEBUG DIGEST-VALUE scenario4') 285 | assert scenario4_obj != default_obj 286 | assert scenario4_object_digest != default_object_digest 287 | 288 | # scenario5 validates that digest is equal on bloom objects with same properties and same items only when we are 289 | # using a fixed seed. Not when we are using a random seed. 290 | is_random_seed = client.execute_command('CONFIG GET bf.bloom-use-random-seed') 291 | scenario5_obj = client.execute_command('BF.INSERT scenario5 error 0.001 capacity 1000 items 1') 292 | scenario5_object_digest = client.execute_command('DEBUG DIGEST-VALUE scenario5') 293 | assert scenario5_obj != default_obj 294 | assert scenario5_object_digest != default_object_digest 295 | 296 | # Add the same items to both the original and the new bloom object. 297 | client.execute_command('BF.MADD default_obj 1 2 3') 298 | client.execute_command('BF.MADD scenario5 2 3') 299 | madd_default_object_digest = client.execute_command('DEBUG DIGEST-VALUE default_obj') 300 | madd_scenario_object_digest = client.execute_command('DEBUG DIGEST-VALUE scenario5') 301 | if is_random_seed[1] == b'yes': 302 | assert madd_scenario_object_digest != madd_default_object_digest 303 | else: 304 | madd_scenario_object_digest == madd_default_object_digest 305 | 306 | # scenario 6 validates that digest differs on bloom objects after changing the tightening_ratio config 307 | client.execute_command('BF.RESERVE tightening_ratio 0.001 1000') 308 | assert self.client.execute_command('CONFIG SET bf.bloom-tightening-ratio 0.75') == b'OK' 309 | client.execute_command('BF.RESERVE tightening_ratio2 0.001 1000') 310 | scenario_tightening_ratio_object_digest = client.execute_command('DEBUG DIGEST-VALUE tightening_ratio') 311 | scenario_tightening_ratio2_digest = client.execute_command('DEBUG DIGEST-VALUE tightening_ratio2') 312 | assert scenario_tightening_ratio_object_digest != scenario_tightening_ratio2_digest 313 | 314 | # scenario 7 validates that digest differs on bloom objects after changing the fp_rate config 315 | client.execute_command('BF.INSERT fp_rate capacity 1000 items 1') 316 | assert self.client.execute_command('CONFIG SET bf.bloom-fp-rate 0.5') == b'OK' 317 | client.execute_command('BF.INSERT fp_rate2 capacity 1000 items 1') 318 | fp_rate_object_digest = client.execute_command('DEBUG DIGEST-VALUE fp_rate') 319 | scenario_fp_rate2_digest = client.execute_command('DEBUG DIGEST-VALUE fp_rate2') 320 | assert fp_rate_object_digest != scenario_fp_rate2_digest 321 | 322 | def test_bloom_wrong_type(self): 323 | # List of all bloom commands 324 | bloom_commands = [ 325 | 'BF.ADD key item', 326 | 'BF.EXISTS key item', 327 | 'BF.MADD key item1 item2 item3', 328 | 'BF.MEXISTS key item2 item3 item4', 329 | 'BF.INSERT key ITEMS item', 330 | 'BF.INFO key filters', 331 | 'BF.CARD key', 332 | 'BF.RESERVE key 0.01 1000', 333 | ] 334 | client = self.server.get_new_client() 335 | # Set the key we try to perform bloom commands on 336 | client.execute_command("set key value") 337 | # Run each command and check we get the correct error returned 338 | for cmd in bloom_commands: 339 | cmd_name = cmd.split()[0] 340 | try: 341 | result = client.execute_command(cmd) 342 | assert False, f"{cmd_name} on existing non bloom object should fail, instead: {result}" 343 | except Exception as e: 344 | 345 | assert str(e) == f"WRONGTYPE Operation against a key holding the wrong kind of value" 346 | 347 | def test_bloom_string_config_set(self): 348 | """ 349 | This is a test that validates the bloom string configuration set logic. 350 | """ 351 | assert self.client.execute_command('CONFIG SET bf.bloom-fp-rate 0.1') == b'OK' 352 | assert self.client.execute_command('CONFIG SET bf.bloom-tightening-ratio 0.75') == b'OK' 353 | 354 | assert self.client.execute_command('CONFIG GET bf.bloom-fp-rate')[1] == b'0.1' 355 | assert self.client.execute_command('CONFIG GET bf.bloom-tightening-ratio')[1] == b'0.75' 356 | try: 357 | assert self.client.execute_command('CONFIG SET bf.bloom-fp-rate 1.1') == b'ERR (0 < error rate range < 1)' 358 | except ResponseError as e: 359 | assert str(e) == f"CONFIG SET failed (possibly related to argument 'bf.bloom-fp-rate') - ERR (0 < error rate range < 1)" 360 | try: 361 | assert self.client.execute_command('CONFIG SET bf.bloom-tightening-ratio 1.75') == b'ERR (0 < tightening ratio range < 1)' 362 | except ResponseError as e: 363 | assert str(e) == f"CONFIG SET failed (possibly related to argument 'bf.bloom-tightening-ratio') - ERR (0 < tightening ratio range < 1)" 364 | 365 | def test_bloom_config_set_changes_default_creations(self): 366 | """ 367 | This is a test that validates the bloom configuration set logic changes the defualt creations for bloom objects 368 | """ 369 | assert self.client.execute_command('CONFIG SET bf.bloom-capacity 10000') == b'OK' 370 | assert self.client.execute_command('CONFIG SET bf.bloom-expansion 0') == b'OK' 371 | assert self.client.execute_command('CONFIG SET bf.bloom-fp-rate 0.75') == b'OK' 372 | assert self.client.execute_command('CONFIG SET bf.bloom-tightening-ratio 0.4') == b'OK' 373 | 374 | assert self.client.execute_command("BF.ADD changed_default item") == 1 375 | 376 | assert self.client.execute_command('BF.INFO changed_default CAPACITY') == 10000 377 | assert self.client.execute_command('BF.INFO changed_default ERROR') == str(0.75).encode() 378 | 379 | 380 | bf_info_full = self.client.execute_command("BF.INFO changed_default") 381 | assert b'Max scaled capacity' not in bf_info_full 382 | assert b'Tightening ratio' not in bf_info_full 383 | 384 | def test_bloom_dump_and_restore(self): 385 | """ 386 | This is a test that validates the bloom data has same debug digest value before and after using restore command 387 | """ 388 | client = self.server.get_new_client() 389 | client.execute_command('BF.INSERT original error 0.001 capacity 2000 items 1') 390 | dump = client.execute_command('DUMP original') 391 | dump_digest = client.execute_command('DEBUG DIGEST-VALUE original') 392 | client.execute_command('RESTORE', 'copy', 0, dump) 393 | restore_digest = client.execute_command('DEBUG DIGEST-VALUE copy') 394 | assert restore_digest == dump_digest 395 | -------------------------------------------------------------------------------- /tests/test_bloom_command.py: -------------------------------------------------------------------------------- 1 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 2 | from valkeytestframework.conftest import resource_port_tracker 3 | 4 | class TestBloomCommand(ValkeyBloomTestCaseBase): 5 | 6 | def verify_command_arity(self, command, expected_arity): 7 | command_info = self.client.execute_command('COMMAND', 'INFO', command) 8 | actual_arity = command_info.get(command).get('arity') 9 | assert actual_arity == expected_arity, f"Arity mismatch for command '{command}'" 10 | 11 | def test_bloom_command_arity(self): 12 | self.verify_command_arity('BF.EXISTS', -1) 13 | self.verify_command_arity('BF.ADD', -1) 14 | self.verify_command_arity('BF.MEXISTS', -1) 15 | self.verify_command_arity('BF.MADD', -1) 16 | self.verify_command_arity('BF.CARD', -1) 17 | self.verify_command_arity('BF.RESERVE', -1) 18 | self.verify_command_arity('BF.INFO', -1) 19 | self.verify_command_arity('BF.INSERT', -1) 20 | 21 | def test_bloom_command_error(self): 22 | # test set up 23 | assert self.client.execute_command('BF.ADD key item') == 1 24 | assert self.client.execute_command('BF.RESERVE bf 0.01 1000') == b'OK' 25 | assert self.client.execute_command('BF.RESERVE non_scaling_info 0.01 1000 NONSCALING') == b'OK' 26 | basic_error_test_cases = [ 27 | # not found 28 | ('BF.INFO TEST404', 'not found'), 29 | # incorrect syntax and argument usage 30 | ('bf.info key item', 'invalid information value'), 31 | ('bf.info non_scaling_info TIGHTENING', 'invalid information value'), 32 | ('bf.info non_scaling_info MAXSCALEDCAPACITY', 'invalid information value'), 33 | ('bf.insert key CAPACITY 10000 ERROR 0.01 EXPANSION 0.99 NOCREATE NONSCALING ITEMS test1 test2 test3', 'bad expansion'), 34 | ('BF.INSERT KEY HELLO WORLD', 'unknown argument received'), 35 | ('BF.INSERT KEY error 2 ITEMS test1', '(0 < error rate range < 1)'), 36 | ('BF.INSERT KEY ERROR err ITEMS test1', 'bad error rate'), 37 | ('BF.INSERT KEY TIGHTENING tr ITEMS test1', 'bad tightening ratio'), 38 | ('BF.INSERT KEY TIGHTENING 2 ITEMS test1', '(0 < tightening ratio range < 1)'), 39 | ('BF.INSERT TEST_LIMIT ERROR 0.99999999999999999 ITEMS ERROR_RATE', '(0 < error rate range < 1)'), 40 | ('BF.INSERT TEST_LIMIT TIGHTENING 0.99999999999999999 ITEMS ERROR_RATE', '(0 < tightening ratio range < 1)'), 41 | ('BF.INSERT TEST_LIMIT CAPACITY 9223372036854775808 ITEMS CAP', 'bad capacity'), 42 | ('BF.INSERT TEST_LIMIT CAPACITY 0 ITEMS CAP0', '(capacity should be larger than 0)'), 43 | ('BF.INSERT TEST_LIMIT EXPANSION 4294967299 ITEMS EXPAN', 'bad expansion'), 44 | ('BF.INSERT TEST_NOCREATE NOCREATE ITEMS A B', 'not found'), 45 | ('BF.INSERT KEY HELLO', 'unknown argument received'), 46 | ('BF.INSERT KEY CAPACITY 1 ERROR 0.0000000001 VALIDATESCALETO 10000000 EXPANSION 1', 'provided VALIDATESCALETO causes false positive to degrade to 0'), 47 | ('BF.INSERT KEY VALIDATESCALETO 1000000000000', 'provided VALIDATESCALETO causes bloom object to exceed memory limit'), 48 | ('BF.INSERT KEY VALIDATESCALETO 1000000000000 NONSCALING', 'cannot use NONSCALING and VALIDATESCALETO options together'), 49 | ('BF.RESERVE KEY String 100', 'bad error rate'), 50 | ('BF.RESERVE KEY 0.99999999999999999 3000', '(0 < error rate range < 1)'), 51 | ('BF.RESERVE KEY 2 100', '(0 < error rate range < 1)'), 52 | ('BF.RESERVE KEY 0.01 String', 'bad capacity'), 53 | ('BF.RESERVE KEY 0.01 0.01', 'bad capacity'), 54 | ('BF.RESERVE KEY 0.01 -1', 'bad capacity'), 55 | ('BF.RESERVE KEY 0.01 9223372036854775808', 'bad capacity'), 56 | ('BF.RESERVE bf 0.01 1000', 'item exists'), 57 | ('BF.RESERVE TEST_CAP 0.50 0', '(capacity should be larger than 0)'), 58 | 59 | # wrong number of arguments 60 | ('BF.ADD TEST', 'wrong number of arguments for \'BF.ADD\' command'), 61 | ('BF.ADD', 'wrong number of arguments for \'BF.ADD\' command'), 62 | ('BF.ADD HELLO TEST WORLD', 'wrong number of arguments for \'BF.ADD\' command'), 63 | ('BF.CARD KEY ITEM', 'wrong number of arguments for \'BF.CARD\' command'), 64 | ('bf.card', 'wrong number of arguments for \'BF.CARD\' command'), 65 | ('BF.EXISTS', 'wrong number of arguments for \'BF.EXISTS\' command'), 66 | ('bf.exists item', 'wrong number of arguments for \'BF.EXISTS\' command'), 67 | ('bf.exists key item hello', 'wrong number of arguments for \'BF.EXISTS\' command'), 68 | ('BF.INFO', 'wrong number of arguments for \'BF.INFO\' command'), 69 | ('bf.info key capacity size', 'wrong number of arguments for \'BF.INFO\' command'), 70 | ('BF.INSERT', 'wrong number of arguments for \'BF.INSERT\' command'), 71 | ('BF.INSERT MISS_ITEM EXPANSION 2 ITEMS', 'wrong number of arguments for \'BF.INSERT\' command'), 72 | ('BF.INSERT MISS_VAL ERROR 0.5 EXPANSION', 'wrong number of arguments for \'BF.INSERT\' command'), 73 | ('BF.INSERT MISS_VAL ERROR 0.5 CAPACITY', 'wrong number of arguments for \'BF.INSERT\' command'), 74 | ('BF.INSERT MISS_VAL EXPANSION 2 EXPANSION', 'wrong number of arguments for \'BF.INSERT\' command'), 75 | ('BF.INSERT MISS_VAL EXPANSION 1 error', 'wrong number of arguments for \'BF.INSERT\' command'), 76 | ('BF.MADD', 'wrong number of arguments for \'BF.MADD\' command'), 77 | ('BF.MADD KEY', 'wrong number of arguments for \'BF.MADD\' command'), 78 | ('BF.MEXISTS', 'wrong number of arguments for \'BF.MEXISTS\' command'), 79 | ('BF.MEXISTS INFO', 'wrong number of arguments for \'BF.MEXISTS\' command'), 80 | ('BF.RESERVE', 'wrong number of arguments for \'BF.RESERVE\' command'), 81 | ('BF.RESERVE KEY', 'wrong number of arguments for \'BF.RESERVE\' command'), 82 | ('BF.RESERVE KEY SSS', 'wrong number of arguments for \'BF.RESERVE\' command'), 83 | ('BF.RESERVE TT1 0.01 1 NONSCALING test1 test2 test3', 'wrong number of arguments for \'BF.RESERVE\' command'), 84 | ('BF.RESERVE TT 0.01 1 NONSCALING EXPANSION 1', 'wrong number of arguments for \'BF.RESERVE\' command'), 85 | ] 86 | 87 | for test_case in basic_error_test_cases: 88 | cmd = test_case[0] 89 | expected_err_reply = test_case[1] 90 | self.verify_error_response(self.client, cmd, expected_err_reply) 91 | 92 | def test_bloom_command_behavior(self): 93 | basic_behavior_test_case = [ 94 | ('BF.ADD key item', 1), 95 | ('BF.ADD key item', 0), 96 | ('BF.EXISTS key item', 1), 97 | ('BF.MADD key item item2', 2), 98 | ('BF.EXISTS key item', 1), 99 | ('BF.EXISTS key item2', 1), 100 | ('BF.MADD hello world1 world2 world3', 3), 101 | ('BF.MADD hello world1 world2 world3 world4', 4), 102 | ('BF.MEXISTS hello world5', 1), 103 | ('BF.MADD hello world5', 1), 104 | ('BF.MEXISTS hello world5 world6 world7', 3), 105 | ('BF.INSERT TEST ITEMS ITEM', 1), 106 | ('BF.INSERT TEST CAPACITY 1000 ITEMS ITEM', 1), 107 | ('BF.INSERT TEST CAPACITY 200 error 0.50 ITEMS ITEM ITEM1 ITEM2', 3), 108 | ('BF.INSERT TEST CAPACITY 300 ERROR 0.50 EXPANSION 1 ITEMS ITEM FOO', 2), 109 | ('BF.INSERT TEST ERROR 0.50 EXPANSION 3 NOCREATE items BOO', 1), 110 | ('BF.INSERT TEST ERROR 0.50 EXPANSION 1 NOCREATE NONSCALING items BOO', 1), 111 | ('BF.INSERT TEST_EXPANSION EXPANSION 9 ITEMS ITEM', 1), 112 | ('BF.INSERT TEST_CAPACITY CAPACITY 2000 ITEMS ITEM', 1), 113 | ('BF.INSERT TEST_ITEMS ITEMS 1 2 3 EXPANSION 2', 5), 114 | ('BF.INSERT TEST_VAL_SCALE_1 CAPACITY 200 VALIDATESCALETO 1000000 error 0.0001 ITEMS ITEM ITEM1 ITEM2', 3), 115 | ('BF.INSERT TEST_VAL_SCALE_2 CAPACITY 20000 VALIDATESCALETO 10000000 error 0.5 EXPANSION 4 ITEMS ITEM ITEM1 ITEM2', 3), 116 | ('BF.INSERT TEST_VAL_SCALE_3 CAPACITY 10400 VALIDATESCALETO 10410 error 0.0011 EXPANSION 1 ITEMS ITEM ITEM1 ITEM2', 3), 117 | ('BF.INSERT KEY', 0), 118 | ('BF.INSERT KEY EXPANSION 2', 0), 119 | ('BF.INFO TEST Capacity', 100), 120 | ('BF.INFO TEST ITEMS', 5), 121 | ('BF.INFO TEST filters', 1), 122 | ('bf.info TEST expansion', 2), 123 | ('BF.INFO TEST_EXPANSION EXPANSION', 9), 124 | ('BF.INFO TEST_CAPACITY CAPACITY', 2000), 125 | ('BF.INFO TEST MAXSCALEDCAPACITY', 26214300), 126 | ('BF.INFO TEST_VAL_SCALE_1 ERROR', b'0.0001'), 127 | ('BF.INFO TEST_VAL_SCALE_2 ERROR', b'0.5'), 128 | ('BF.INFO TEST TIGHTENING', b'0.5'), 129 | ('BF.CARD key', 3), 130 | ('BF.CARD hello', 5), 131 | ('BF.CARD TEST', 5), 132 | ('bf.card HELLO', 0), 133 | ('BF.RESERVE bf 0.01 1000', b'OK'), 134 | ('BF.EXISTS bf non_existant', 0), 135 | ('BF.RESERVE bf_exp 0.01 1000 EXPANSION 2', b'OK'), 136 | ('BF.RESERVE bf_non 0.01 1000 NONSCALING', b'OK'), 137 | ('bf.info bf_exp expansion', 2), 138 | ('BF.INFO bf_non expansion', None), 139 | ] 140 | 141 | for test_case in basic_behavior_test_case: 142 | cmd = test_case[0] 143 | # For non multi commands, this is the verbatim expected result. 144 | # For multi commands, test_case[1] contains the number of item add/exists results which are expected to be 0 or 1. 145 | expected_result = test_case[1] 146 | # For Cardinality commands we want to add items till we are at the number of items we expect then check Cardinality worked 147 | if cmd.upper().startswith("BF.CARD"): 148 | self.add_items_till_capacity(self.client, cmd.split()[-1], expected_result, 1, "item_prefix") 149 | # For multi commands expected result is actually the length of the expected return. While for other commands this we have the literal 150 | # expected result 151 | self.verify_command_success_reply(self.client, cmd, expected_result) 152 | 153 | # test bf.info 154 | assert self.client.execute_command('BF.RESERVE BF_INFO_NON_SCALING 0.50 2000 NONSCALING') == b'OK' 155 | assert self.client.execute_command('BF.RESERVE BF_INFO_SCALING 0.50 2000') == b'OK' 156 | 157 | for bf_name in ['BF_INFO_NON_SCALING', 'BF_INFO_SCALING']: 158 | bf_info = self.client.execute_command(f'BF.INFO {bf_name}') 159 | 160 | capacity_index = bf_info.index(b'Capacity') + 1 161 | filter_index = bf_info.index(b'Number of filters') + 1 162 | item_index = bf_info.index(b'Number of items inserted') + 1 163 | error_rate_index = bf_info.index(b'Error rate') + 1 164 | expansion_index = bf_info.index(b'Expansion rate') + 1 165 | 166 | assert bf_info[capacity_index] == self.client.execute_command(f'BF.INFO {bf_name} CAPACITY') == 2000 167 | assert bf_info[filter_index] == self.client.execute_command(f'BF.INFO {bf_name} FILTERS') == 1 168 | assert bf_info[item_index] == self.client.execute_command(f'BF.INFO {bf_name} ITEMS') == 0 169 | assert bf_info[error_rate_index] == self.client.execute_command(f'BF.INFO {bf_name} ERROR') == str(0.5).encode() 170 | 171 | if bf_name == 'BF_INFO_SCALING': 172 | assert bf_info[expansion_index] == self.client.execute_command(f'BF.INFO {bf_name} EXPANSION') == 2 173 | # Check scaling specific fields 174 | max_scaled_capacity_index = bf_info.index(b'Max scaled capacity') + 1 175 | tightening_ratio_index = bf_info.index(b'Tightening ratio') + 1 176 | assert bf_info[max_scaled_capacity_index] == self.client.execute_command(f'BF.INFO {bf_name} MAXSCALEDCAPACITY') == 32766000 177 | assert bf_info[tightening_ratio_index] == self.client.execute_command(f'BF.INFO {bf_name} TIGHTENING') == str(0.5).encode() 178 | else: 179 | # For non-scaling, expansion should be None 180 | assert bf_info[expansion_index] == self.client.execute_command(f'BF.INFO {bf_name} EXPANSION') == None 181 | # Check scaling specific fields don't appear in info 182 | assert b'Max scaled capacity' not in bf_info 183 | assert b'Tightening ratio' not in bf_info 184 | -------------------------------------------------------------------------------- /tests/test_bloom_correctness.py: -------------------------------------------------------------------------------- 1 | from valkeytestframework.conftest import resource_port_tracker 2 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 3 | 4 | class TestBloomCorrectness(ValkeyBloomTestCaseBase): 5 | 6 | def test_non_scaling_filter(self): 7 | client = self.server.get_new_client() 8 | item_prefix = self.generate_random_string() 9 | # 1 in every 1000 operations is expected to be a false positive. 10 | expected_fp_rate = 0.001 11 | capacity = 10000 12 | # Create a non scaling bloom filter and validate its behavior. 13 | filter_name = "filter1" 14 | assert client.execute_command(f'BF.RESERVE {filter_name} {expected_fp_rate} {capacity} NONSCALING') == b"OK" 15 | # Add items and fill the filter to capacity. 16 | error_count, add_operation_idx = self.add_items_till_capacity(client, filter_name, capacity, 1, item_prefix) 17 | new_item_idx = self.add_items_till_nonscaling_failure(client, filter_name, add_operation_idx, item_prefix) 18 | self.validate_nonscaling_failure(client, filter_name, item_prefix, new_item_idx) 19 | # Validate that is is filled. 20 | info = client.execute_command(f'BF.INFO {filter_name}') 21 | it = iter(info) 22 | info_dict = dict(zip(it, it)) 23 | assert info_dict[b'Capacity'] == capacity 24 | assert info_dict[b'Number of items inserted'] == capacity 25 | assert info_dict[b'Number of filters'] == 1 26 | assert info_dict[b'Size'] > 0 27 | assert info_dict[b'Expansion rate'] == None 28 | assert info_dict[b'Error rate'] == str(expected_fp_rate).encode() 29 | 30 | assert "Max scaled capacity" not in info_dict 31 | # Use a margin on the expected_fp_rate when asserting for correctness. 32 | fp_margin = 0.002 33 | # Validate that item "add" operations on bloom filters are ensuring correctness. 34 | # False positives should be close to the configured fp_rate. 35 | self.fp_assert(error_count, add_operation_idx, expected_fp_rate, fp_margin) 36 | # Validate item "exists" operations on bloom filters are ensuring correctness. 37 | # This tests for items already added to the filter and expects them to exist. 38 | # False negatives should not be possible. 39 | error_count, num_operations = self.check_items_exist( 40 | client, 41 | filter_name, 42 | 1, 43 | add_operation_idx, 44 | True, 45 | item_prefix, 46 | ) 47 | assert error_count == 0 48 | # This tests for items which are not added to the filter and expects them to not exist. 49 | # False positives should be close to the configured fp_rate. 50 | error_count, num_operations = self.check_items_exist( 51 | client, 52 | filter_name, 53 | add_operation_idx + 1, 54 | add_operation_idx * 2, 55 | False, 56 | item_prefix, 57 | ) 58 | self.fp_assert(error_count, num_operations, expected_fp_rate, fp_margin) 59 | # Validate correctness on a copy of a non scaling bloom filter. 60 | self.validate_copied_bloom_correctness(client, filter_name, item_prefix, add_operation_idx, expected_fp_rate, fp_margin, info_dict) 61 | 62 | def test_scaling_filter(self): 63 | client = self.server.get_new_client() 64 | item_prefix = self.generate_random_string() 65 | expected_fp_rate = 0.001 66 | expected_tightening_ratio = 0.5 67 | initial_capacity = 10000 68 | expansion = 2 69 | num_filters_to_scale = 5 70 | filter_name = "filter1" 71 | # Create a scaling bloom filter and validate its behavior. 72 | assert client.execute_command(f'BF.RESERVE {filter_name} {expected_fp_rate} {initial_capacity} EXPANSION {expansion}') == b"OK" 73 | info = client.execute_command(f'BF.INFO {filter_name}') 74 | it = iter(info) 75 | info_dict = dict(zip(it, it)) 76 | assert info_dict[b'Capacity'] == initial_capacity 77 | assert info_dict[b'Number of items inserted'] == 0 78 | assert info_dict[b'Number of filters'] == 1 79 | assert info_dict[b'Size'] > 0 80 | assert info_dict[b'Expansion rate'] == expansion 81 | assert info_dict[b'Error rate'] == str(expected_fp_rate).encode() 82 | assert info_dict[b'Tightening ratio'] == str(expected_tightening_ratio).encode() 83 | assert info_dict[b'Max scaled capacity'] == 20470000 84 | 85 | # Scale out by adding items. 86 | total_error_count = 0 87 | add_operation_idx = 0 88 | for filter_idx in range(1, num_filters_to_scale + 1): 89 | expected_total_capacity = self.calculate_expected_capacity(initial_capacity, expansion, filter_idx) 90 | error_count, new_add_operation_idx = self.add_items_till_capacity(client, filter_name, expected_total_capacity, add_operation_idx + 1, item_prefix) 91 | add_operation_idx = new_add_operation_idx 92 | total_error_count += error_count 93 | # Validate from BF.INFO that is filter is scaling correctly. 94 | info = client.execute_command(f'BF.INFO {filter_name}') 95 | it = iter(info) 96 | info_dict = dict(zip(it, it)) 97 | assert info_dict[b'Capacity'] == expected_total_capacity 98 | assert info_dict[b'Number of items inserted'] == expected_total_capacity 99 | assert info_dict[b'Number of filters'] == filter_idx 100 | assert info_dict[b'Size'] > 0 101 | assert info_dict[b'Expansion rate'] == expansion 102 | assert info_dict[b'Error rate'] == str(expected_fp_rate).encode() 103 | assert info_dict[b'Tightening ratio'] == str(expected_tightening_ratio).encode() 104 | 105 | assert info_dict[b'Max scaled capacity'] == 20470000 106 | 107 | # Use a margin on the expected_fp_rate when asserting for correctness. 108 | fp_margin = 0.002 109 | # Validate that item "add" operations on bloom filters are ensuring correctness. 110 | # False positives should be close to the configured fp_rate. 111 | self.fp_assert(total_error_count, add_operation_idx, expected_fp_rate, fp_margin) 112 | # Validate item "exists" operations on bloom filters are ensuring correctness. 113 | # This tests for items already added to the filter and expects them to exist. 114 | # False negatives should not be possible. 115 | error_count, num_operations = self.check_items_exist( 116 | client, 117 | filter_name, 118 | 1, 119 | add_operation_idx, 120 | True, 121 | item_prefix, 122 | ) 123 | assert error_count == 0 124 | # This tests for items which are not added to the filter and expects them to not exist. 125 | # False positives should be close to the configured fp_rate. 126 | error_count, num_operations = self.check_items_exist( 127 | client, 128 | filter_name, 129 | add_operation_idx + 1, 130 | add_operation_idx * 2, 131 | False, 132 | item_prefix, 133 | ) 134 | self.fp_assert(error_count, num_operations, expected_fp_rate, fp_margin) 135 | # Track INFO on the scaled out bloom filter. 136 | info = client.execute_command(f'BF.INFO {filter_name}') 137 | it = iter(info) 138 | info_dict = dict(zip(it, it)) 139 | # Validate correctness on a copy of a scaling bloom filter. 140 | self.validate_copied_bloom_correctness(client, filter_name, item_prefix, add_operation_idx, expected_fp_rate, fp_margin, info_dict) 141 | 142 | def test_max_and_validate_scale_to_correctness(self): 143 | validate_scale_to_commands = [ 144 | ('BF.INSERT MemLimitKey EXPANSION 25 ERROR 0.00000000000000001 VALIDATESCALETO 1627601', "provided VALIDATESCALETO causes bloom object to exceed memory limit" ), 145 | ('BF.INSERT key EXPANSION 1 VALIDATESCALETO 101601', "provided VALIDATESCALETO causes false positive to degrade to 0" ) 146 | ] 147 | for cmd in validate_scale_to_commands: 148 | try: 149 | self.client.execute_command(cmd[0]) 150 | assert False, "Expect BF.INSERT to fail if the wanted capacity would cause an error" 151 | except Exception as e: 152 | assert cmd[1] == str(e), f"Unexpected error message: {e}" 153 | self.client.execute_command('BF.INSERT MemLimitKey EXPANSION 25 ERROR 0.00000000000000001 VALIDATESCALETO 1627600') 154 | self.client.execute_command('BF.INSERT FPKey VALIDATESCALETO 101600 EXPANSION 1') 155 | FPKey_max_capacity = self.client.execute_command(f'BF.INFO FPKey MAXSCALEDCAPACITY') 156 | MemLimitKeyMaxCapacity = self.client.execute_command(f'BF.INFO MemLimitKey MAXSCALEDCAPACITY') 157 | self.add_items_till_capacity(self.client, "FPKey", 101600, 1, "item") 158 | self.add_items_till_capacity(self.client, "MemLimitKey", 1627600, 1, "item") 159 | key_names = [("MemLimitKey", MemLimitKeyMaxCapacity, "operation exceeds bloom object memory limit"), ("FPKey", FPKey_max_capacity, "false positive degrades to 0 on scale out")] 160 | for key in key_names: 161 | try: 162 | self.add_items_till_capacity(self.client, key[0], key[1]+1, 1, "new_item") 163 | assert False, "Expect adding to an item after reaching max capacity should fail" 164 | except Exception as e: 165 | assert key[2] in str(e) 166 | # Check that max capacity doesnt change even after adding items. 167 | assert self.client.execute_command(f'BF.INFO {key[0]} MAXSCALEDCAPACITY') == key[1] 168 | -------------------------------------------------------------------------------- /tests/test_bloom_defrag.py: -------------------------------------------------------------------------------- 1 | import time 2 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 3 | from valkeytestframework.conftest import resource_port_tracker 4 | from valkeytestframework.util.waiters import * 5 | import pytest 6 | 7 | @pytest.mark.skip_for_asan(reason="These tests are skipped due to not being able to set activedefrag to yes when valkey server is an ASAN build") 8 | class TestBloomDefrag(ValkeyBloomTestCaseBase): 9 | 10 | @pytest.mark.parametrize("initial_capacity", [1, 200]) 11 | def test_bloom_defrag(self, initial_capacity): 12 | # Set defragmentation thresholds 13 | self.client.config_set('activedefrag', 'no') 14 | self.client.config_set('active-defrag-ignore-bytes', '1') 15 | self.client.config_set('active-defrag-threshold-lower', '2') 16 | 17 | # Set a lower maxmemory 18 | max_memory = 20 * 1024 * 1024 19 | self.client.config_set('maxmemory', str(max_memory)) 20 | 21 | # Initial stats 22 | stats = self.parse_valkey_info("STATS") 23 | initial_defrag_hits = int(stats.get('active_defrag_hits', 0)) 24 | initial_defrag_misses = int(stats.get('active_defrag_misses', 0)) 25 | 26 | 27 | # Create list of key names we will operate with 28 | scale_names = [f'scale_{i}' for i in range(1, 2000)] 29 | # A list of the number of items we inserted into each Bloom object 30 | num_items_inserted_per_object = [] 31 | 32 | # Insert data 33 | for index, scale in enumerate(scale_names): 34 | self.client.execute_command(f'bf.reserve {scale} 0.001 {initial_capacity} EXPANSION 2') 35 | # The new_add_operation_idx means all numbers from 1 to it should all return 1 when called with bf.exists 36 | _, new_add_operation_idx = self.add_items_till_capacity(self.client, scale, 100, 1, "") 37 | # We delete every other object so only need to keep the ones with a odd index 38 | if index % 2 == 1: 39 | num_items_inserted_per_object.append(new_add_operation_idx) 40 | 41 | # Delete every other item to create fragmentation 42 | for scale in scale_names[::2]: 43 | self.client.execute_command(f'DEL {scale}') 44 | # Add a wait due to lazy delete where if we call info to early we wont get the correct memory info 45 | time.sleep(5) 46 | 47 | # Get the memory info before we start actively defragging 48 | memory_info_non_defragged = self.parse_valkey_info("MEMORY") 49 | 50 | # Enable defragmentation and defrag items. 51 | self.client.config_set('activedefrag', 'yes') 52 | # Wait for 5 seconds of defragging. Returned time is in milliseconds. 53 | wait_for_equal(lambda: int(self.parse_valkey_info("STATS").get('total_active_defrag_time')) > 5000, True) 54 | 55 | # Get the memory info after we have defragged 56 | memory_info_after_defrag = self.parse_valkey_info("MEMORY") 57 | 58 | first_defrag_stats = self.parse_valkey_info("STATS") 59 | first_defrag_hits = int(first_defrag_stats.get('active_defrag_hits', 0)) 60 | first_defrag_misses = int(first_defrag_stats.get('active_defrag_misses', 0)) 61 | 62 | # Assertion we got hits and misses when defragging 63 | assert first_defrag_hits > initial_defrag_hits and first_defrag_misses > initial_defrag_misses 64 | assert float(memory_info_after_defrag.get('allocator_frag_ratio', 0)) < float(memory_info_non_defragged.get('allocator_frag_ratio', 0)) 65 | # Check that items we added still exist in the respective bloom objects 66 | self.check_values_present(scale_names, num_items_inserted_per_object) 67 | info_results = self.client.info("bf") 68 | assert info_results['bf_bloom_defrag_hits'] + info_results['bf_bloom_defrag_misses'] > 0 69 | self.client.execute_command('BGSAVE') 70 | self.server.wait_for_save_done() 71 | 72 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) 73 | assert self.server.is_alive() 74 | wait_for_equal(lambda: self.server.is_rdb_done_loading(), True) 75 | 76 | # Set config as we had before saving and restarting 77 | self.client.config_set('activedefrag', 'yes') 78 | self.client.config_set('active-defrag-ignore-bytes', '1') 79 | self.client.config_set('active-defrag-threshold-lower', '2') 80 | self.client.config_set('maxmemory', str(max_memory)) 81 | 82 | # Wait for 5 seconds of defragging. Returned time is in milliseconds. 83 | wait_for_equal(lambda: int(self.parse_valkey_info("STATS").get('total_active_defrag_time')) > 5000, True) 84 | 85 | final_stats = self.parse_valkey_info("STATS") 86 | final_defrag_hits = int(final_stats.get('active_defrag_hits', 0)) 87 | final_defrag_misses = int(final_stats.get('active_defrag_misses', 0)) 88 | assert final_defrag_hits > initial_defrag_hits or final_defrag_misses > initial_defrag_misses, "No defragmentation occurred after RDB load" 89 | # Check that items we added still exist in the respective bloom objects 90 | self.check_values_present(scale_names, num_items_inserted_per_object) 91 | info_results = self.client.info("bf") 92 | assert info_results['bf_bloom_defrag_hits'] + info_results['bf_bloom_defrag_misses'] > 0 93 | 94 | def check_values_present(self, scale_names, num_items_inserted_per_object): 95 | for index, scale in enumerate(scale_names[1::2]): 96 | # Create a list of numbers of 1 to number of items inserted into the current bloomfilter 97 | num_items = num_items_inserted_per_object[index] 98 | items = list(range(1, num_items + 1)) 99 | 100 | # Perform a mexists for all the numbers 1 to num_items 101 | command = f'bf.mexists {scale} ' + ' '.join(map(str, items)) 102 | results = self.client.execute_command(command) 103 | # All items should be present so we compare with an array of length num items where all items are 1 104 | expected_results = [1] * num_items 105 | assert results == expected_results, f"Unexpected results for scale {scale}: {results}" 106 | -------------------------------------------------------------------------------- /tests/test_bloom_keyspace.py: -------------------------------------------------------------------------------- 1 | import time 2 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 3 | from valkeytestframework.conftest import resource_port_tracker 4 | 5 | class TestKeyEventNotifications(ValkeyBloomTestCaseBase): 6 | RESERVE_KEYSPACE_MESSAGE = {'type': 'pmessage', 'pattern': b'__key*__:*', 'channel': b'__keyspace@0__:intermediate_val', 'data': b'bloom.reserve'} 7 | RESERVE_KEYEVENT_MESSAGE = {'type': 'pmessage', 'pattern': b'__key*__:*', 'channel': b'__keyevent@0__:bloom.reserve', 'data': b'intermediate_val'} 8 | ADD_KEYSPACE_MESSAGE = {'type': 'pmessage', 'pattern': b'__key*__:*', 'channel': b'__keyspace@0__:intermediate_val', 'data': b'bloom.add'} 9 | ADD_KEYEVENT_MESSAGE = {'type': 'pmessage', 'pattern': b'__key*__:*', 'channel': b'__keyevent@0__:bloom.add', 'data': b'intermediate_val'} 10 | 11 | def create_expected_message_list(self, reserve_expected, add_expected, key_name): 12 | expected_messages = [] 13 | self.RESERVE_KEYSPACE_MESSAGE['channel'] = f"__keyspace@0__:{key_name}".encode('utf-8') 14 | self.RESERVE_KEYEVENT_MESSAGE['data'] = f"{key_name}".encode('utf-8') 15 | self.ADD_KEYSPACE_MESSAGE['channel'] = f"__keyspace@0__:{key_name}".encode('utf-8') 16 | self.ADD_KEYEVENT_MESSAGE['data'] = f"{key_name}".encode('utf-8') 17 | if reserve_expected: 18 | expected_messages.append(self.RESERVE_KEYEVENT_MESSAGE) 19 | expected_messages.append(self.RESERVE_KEYSPACE_MESSAGE) 20 | if add_expected: 21 | expected_messages.append(self.ADD_KEYSPACE_MESSAGE) 22 | expected_messages.append(self.ADD_KEYEVENT_MESSAGE) 23 | return expected_messages 24 | 25 | def check_response(self, result_messages, expected_messages): 26 | extra_message = self.keyspace_client_subscribe.get_message() 27 | if extra_message: 28 | assert False, f"Unexpected extra message returned: {extra_message}" 29 | for message in expected_messages: 30 | assert message in result_messages, f"{message} was not found in messages received" 31 | 32 | def get_subscribe_client_messages(self, client, cmd, expected_message_count): 33 | client.execute_command(cmd) 34 | count = 0 35 | messages = [] 36 | timeout = time.time() + 5 37 | while expected_message_count != count: 38 | message = self.keyspace_client_subscribe.get_message() 39 | if message: 40 | # Only for the first time we get messages we should skip the first message gotten 41 | if count > 0 or "BF.ADD" not in cmd: 42 | messages.append(message) 43 | count = count + 1 44 | if timeout < time.time(): 45 | assert False, f"The number of expected messages failed tor eturn in time, messages received so far {messages}" 46 | return messages 47 | 48 | def test_keyspace_bloom_commands(self): 49 | self.create_subscribe_clients() 50 | # The first call to get messages will return message that shows we subscribed to messages so we expect one more message than we need to check for 51 | # the first time we look at messages 52 | bloom_commands = [ 53 | ('BF.ADD add_test key', True, True, 5), 54 | ('BF.MADD madd_test key1 key2', True, True, 4), 55 | ('BF.EXISTS exists_test key', False, False, 0), 56 | ('BF.INSERT insert_test ITEMS key1 key2', True, True, 4), 57 | ('BF.RESERVE reserve_test 0.01 1000', True, False, 2) 58 | ] 59 | 60 | for command, reserve_expected, add_expected, expected_message_count in bloom_commands: 61 | expected_messages = self.create_expected_message_list(reserve_expected, add_expected, command.split()[1]) if reserve_expected else [] 62 | result_messages = self.get_subscribe_client_messages(self.keyspace_client, command, expected_message_count) 63 | self.check_response(result_messages, expected_messages) 64 | 65 | def create_subscribe_clients(self): 66 | self.keyspace_client = self.server.get_new_client() 67 | self.keyspace_client_subscribe = self.keyspace_client.pubsub() 68 | self.keyspace_client_subscribe.psubscribe('__key*__:*') 69 | self.keyspace_client.execute_command('CONFIG' ,'SET','notify-keyspace-events', 'KEA') 70 | -------------------------------------------------------------------------------- /tests/test_bloom_metrics.py: -------------------------------------------------------------------------------- 1 | import time 2 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 3 | from valkeytestframework.conftest import resource_port_tracker 4 | from valkeytestframework.util.waiters import * 5 | 6 | DEFAULT_BLOOM_FILTER_SIZE = 384 7 | DEFAULT_BLOOM_FILTER_CAPACITY = 100 8 | class TestBloomMetrics(ValkeyBloomTestCaseBase): 9 | 10 | def test_basic_command_metrics(self): 11 | # Check that bloom metrics stats start at 0 12 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0) 13 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), 0, 0, 0, 0, 0) 14 | 15 | # Create a default bloom filter, add an item and check its metrics values are correct 16 | self.add_items_till_capacity(self.client, "key", 1, 1, "item") 17 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE, 1, 1, 1, DEFAULT_BLOOM_FILTER_CAPACITY) 18 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE, 1, 1, 1, DEFAULT_BLOOM_FILTER_CAPACITY) 19 | 20 | # Check that other commands don't influence metrics 21 | assert(self.client.execute_command('BF.EXISTS key item1') == 1) 22 | assert(self.client.execute_command('BF.ADD key item2') == 1) 23 | assert(len(self.client.execute_command('BF.MADD key item3 item4')) == 2) 24 | assert(len(self.client.execute_command('BF.MEXISTS key item3 item5')) == 2) 25 | assert(1 <= self.client.execute_command('BF.CARD key') <= 4) 26 | self.client.execute_command("BF.INFO key") 27 | assert(len(self.client.execute_command('BF.INSERT key ITEMS item5 item6'))== 2) 28 | 29 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE, 1, 1, 6, DEFAULT_BLOOM_FILTER_CAPACITY) 30 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE, 1, 1, 6, DEFAULT_BLOOM_FILTER_CAPACITY) 31 | 32 | # Create a new default bloom filter and check metrics again 33 | assert(self.client.execute_command('BF.ADD key2 item') == 1) 34 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE*2, 2, 2, 7, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 35 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE*2, 2, 2, 7, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 36 | 37 | # Create a non default filter with BF.RESERVE and check its metrics are correct 38 | assert(self.client.execute_command('BF.RESERVE key3 0.001 2917251') == b'OK') 39 | info_obj = self.client.execute_command('BF.INFO key3') 40 | 41 | # We want to check the size of the newly created bloom filter but metrics contains the size of all bloomfilters so we must minus the 42 | # two default bloomfilters we already created 43 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), info_obj[3] + DEFAULT_BLOOM_FILTER_SIZE * 2, 3, 3, 7, DEFAULT_BLOOM_FILTER_CAPACITY * 2 + 2917251) 44 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), info_obj[3] + DEFAULT_BLOOM_FILTER_SIZE * 2, 3, 3, 7, DEFAULT_BLOOM_FILTER_CAPACITY * 2 + 2917251) 45 | 46 | # Delete a non default key and make sure the metrics stats are still correct 47 | self.client.execute_command('DEL key3') 48 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 7, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 49 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 7, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 50 | 51 | # Create a default filter with BF.INSERT and check its metrics are correct 52 | assert(self.client.execute_command('BF.INSERT key4 ITEMS item1 item2') == [1, 1]) 53 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE * 3, 3, 3, 9, DEFAULT_BLOOM_FILTER_CAPACITY * 3) 54 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE * 3, 3, 3, 9, DEFAULT_BLOOM_FILTER_CAPACITY * 3) 55 | 56 | # Delete a default key and make sure the metrics are still correct 57 | self.client.execute_command('UNLINK key') 58 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 3, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 59 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 3, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 60 | 61 | # Create a key then cause it to expire and check if metrics are updated correctly 62 | assert self.client.execute_command('BF.ADD TEST_EXP ITEM') == 1 63 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE * 3, 3, 3, 4, DEFAULT_BLOOM_FILTER_CAPACITY * 3) 64 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE * 3, 3, 3, 4, DEFAULT_BLOOM_FILTER_CAPACITY * 3) 65 | assert self.client.execute_command('TTL TEST_EXP') == -1 66 | self.verify_bloom_filter_item_existence(self.client, 'TEST_EXP', 'ITEM') 67 | curr_time = int(time.time()) 68 | assert self.client.execute_command(f'EXPIREAT TEST_EXP {curr_time + 5}') == 1 69 | wait_for_equal(lambda: self.client.execute_command('BF.EXISTS TEST_EXP ITEM'), 0) 70 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 3, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 71 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 3, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 72 | 73 | # Flush database so all keys should now be gone and metrics should all be at 0 74 | self.client.execute_command('FLUSHDB') 75 | wait_for_equal(lambda: self.client.execute_command('DBSIZE'), 0) 76 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0) 77 | self.verify_bloom_metrics(self.client.execute_command("INFO Modules"), 0, 0, 0, 0, 0) 78 | 79 | def test_scaled_bloomfilter_metrics(self): 80 | self.client.execute_command('BF.RESERVE key1 0.001 7000') 81 | # Get original size to compare against size after scaled 82 | info_obj = self.client.execute_command('BF.INFO key1') 83 | # Add keys until bloomfilter will scale out 84 | self.add_items_till_capacity(self.client, "key1", 7500, 1, "item_prefix") 85 | 86 | # Check info for scaled bloomfilter matches metrics data for bloomfilter 87 | new_info_obj = self.client.execute_command(f'BF.INFO key1') 88 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), new_info_obj[3], 1, 2, 7500, 21000) 89 | 90 | # Check bloomfilter size has increased 91 | assert new_info_obj[3] > info_obj[3] 92 | 93 | # Delete the scaled bloomfilter to check both filters are deleted and metrics stats are set accordingly 94 | self.client.execute_command('DEL key1') 95 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0) 96 | 97 | 98 | def test_copy_metrics(self): 99 | # Create a bloomfilter, add one item and copy it 100 | self.add_items_till_capacity(self.client, "originalKey", 1, 1, "item_prefix") 101 | assert(self.client.execute_command('COPY originalKey copiedkey') == 1) 102 | 103 | # Verify that the metrics were updated correctly after copying 104 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), DEFAULT_BLOOM_FILTER_SIZE * 2, 2, 2, 2, DEFAULT_BLOOM_FILTER_CAPACITY * 2) 105 | 106 | # Perform a FLUSHALL which should set all metrics data to 0 107 | self.client.execute_command('FLUSHALL') 108 | wait_for_equal(lambda: self.client.execute_command('DBSIZE'), 0) 109 | self.verify_bloom_metrics(self.client.execute_command("INFO bf"), 0, 0, 0, 0, 0) 110 | 111 | 112 | def test_save_and_restore_metrics(self): 113 | # Create default bloom filter and add one item 114 | self.add_items_till_capacity(self.client, "nonscaledfilter", 1, 1, "item_prefix") 115 | 116 | # Create scaled bloom filter and add 7500 items to trigger a scale out. 117 | self.client.execute_command('BF.RESERVE key1 0.001 7000') 118 | self.add_items_till_capacity(self.client, "key1", 7500, 1, "item_prefix") 119 | 120 | # Get info and metrics stats of bloomfilter before rdb load 121 | original_info_obj = self.client.execute_command('BF.INFO key1') 122 | 123 | self.client.execute_command('BGSAVE') 124 | self.server.wait_for_save_done() 125 | 126 | # Restart, and verify metrics are correct 127 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) 128 | 129 | # Compare original and loaded scaled bloomfilter infos 130 | new_client = self.server.get_new_client() 131 | restored_info_obj = new_client.execute_command('BF.INFO key1') 132 | for i in range(1, len(original_info_obj), 2): 133 | assert original_info_obj[i] == restored_info_obj[i] 134 | self.verify_bloom_metrics(new_client.execute_command("INFO bf"), original_info_obj[3] + DEFAULT_BLOOM_FILTER_SIZE, 2, 3, 7501, 21000 + DEFAULT_BLOOM_FILTER_CAPACITY) 135 | -------------------------------------------------------------------------------- /tests/test_bloom_replication.py: -------------------------------------------------------------------------------- 1 | import pytest, os 2 | from valkey import ResponseError 3 | from valkeytestframework.valkey_test_case import ReplicationTestCase 4 | from valkeytestframework.conftest import resource_port_tracker 5 | 6 | class TestBloomReplication(ReplicationTestCase): 7 | 8 | # Global Parameterized Configs 9 | use_random_seed = 'no' 10 | 11 | @pytest.fixture(autouse=True) 12 | def setup_test(self, setup): 13 | self.args = {"enable-debug-command":"yes", 'loadmodule': os.getenv('MODULE_PATH'),'bf.bloom-use-random-seed': self.use_random_seed} 14 | server_path = f"{os.path.dirname(os.path.realpath(__file__))}/build/binaries/{os.environ['SERVER_VERSION']}/valkey-server" 15 | 16 | self.server, self.client = self.create_server(testdir = self.testdir, server_path=server_path, args=self.args) 17 | 18 | @pytest.fixture(autouse=True) 19 | def use_random_seed_fixture(self, bloom_config_parameterization): 20 | if bloom_config_parameterization == "random-seed": 21 | self.use_random_seed = "yes" 22 | elif bloom_config_parameterization == "fixed-seed": 23 | self.use_random_seed = "no" 24 | 25 | def validate_cmd_stats(self, primary_cmd, replica_cmd, expected_primary_calls, expected_replica_calls): 26 | """ 27 | Helper fn to validate cmd count on primary & replica for non BF.RESERVE cases for object creation & item add. 28 | """ 29 | primary_cmd_stats = self.client.info("Commandstats")['cmdstat_' + primary_cmd] 30 | assert primary_cmd_stats["calls"] == expected_primary_calls 31 | replica_cmd_stats = self.replicas[0].client.info("Commandstats")['cmdstat_' + replica_cmd] 32 | assert replica_cmd_stats["calls"] == expected_replica_calls 33 | 34 | def validate_reserve_cmd_stats(self, primary_reserve_count, primary_add_count, replica_insert_count, replica_add_count): 35 | """ 36 | Helper fn to validate cmd count on primary & replica for the BF.RESERVE case for object creation & item add. 37 | """ 38 | primary_cmd_stats = self.client.info("Commandstats") 39 | replica_cmd_stats = self.replicas[0].client.info("Commandstats") 40 | assert primary_cmd_stats['cmdstat_BF.RESERVE']["calls"] == primary_reserve_count and primary_cmd_stats['cmdstat_BF.ADD']["calls"] == primary_add_count 41 | assert replica_cmd_stats['cmdstat_BF.INSERT']["calls"] == replica_insert_count and replica_cmd_stats['cmdstat_BF.ADD']["calls"] == replica_add_count 42 | 43 | def test_replication_behavior(self): 44 | self.setup_replication(num_replicas=1) 45 | # Test replication for write commands. 46 | bloom_write_cmds = [ 47 | ('BF.ADD', 'BF.ADD key item', 'BF.ADD key item1', 1), 48 | ('BF.MADD', 'BF.MADD key item', 'BF.MADD key item1', 1), 49 | ('BF.RESERVE', 'BF.RESERVE key 0.001 100000', 'BF.ADD key item1', 1), 50 | ('BF.INSERT', 'BF.INSERT key items item', 'BF.INSERT key items item1', 2), 51 | ] 52 | for test_case in bloom_write_cmds: 53 | prefix = test_case[0] 54 | create_cmd = test_case[1] 55 | # New bloom object being created is replicated. 56 | # Validate that the bloom object creation command replicated as BF.INSERT. 57 | self.client.execute_command(create_cmd) 58 | assert self.client.execute_command('EXISTS key') == 1 59 | self.waitForReplicaToSyncUp(self.replicas[0]) 60 | assert self.replicas[0].client.execute_command('EXISTS key') == 1 61 | self.validate_cmd_stats(prefix, 'BF.INSERT', 1, 1) 62 | 63 | # New item added to an existing bloom is replicated. 64 | item_add_cmd = test_case[2] 65 | expected_calls = test_case[3] 66 | self.client.execute_command(item_add_cmd) 67 | assert self.client.execute_command('BF.EXISTS key item1') == 1 68 | self.waitForReplicaToSyncUp(self.replicas[0]) 69 | assert self.replicas[0].client.execute_command('BF.EXISTS key item1') == 1 70 | # Validate that item addition (not bloom creation) is using the original command 71 | if prefix != 'BF.RESERVE': 72 | self.validate_cmd_stats(prefix, prefix, 2, expected_calls) 73 | else: 74 | # In case of the BF.RESERVE test case, we use BF.ADD to add items. Validate this is replicated. 75 | self.validate_reserve_cmd_stats(1, 1, 1, 1) 76 | # Attempting to add an existing item to an existing bloom will NOT replicated. 77 | self.client.execute_command(item_add_cmd) 78 | self.waitForReplicaToSyncUp(self.replicas[0]) 79 | primary_cmd_stats = self.client.info("Commandstats") 80 | replica_cmd_stats = self.replicas[0].client.info("Commandstats") 81 | if prefix != 'BF.RESERVE': 82 | self.validate_cmd_stats(prefix, prefix, 3, expected_calls) 83 | else: 84 | # In case of the BF.RESERVE test case, we use BF.ADD to add items. Validate this is not replicated since 85 | # the item already exists. 86 | self.validate_reserve_cmd_stats(1, 2, 1, 1) 87 | 88 | # cmd debug digest 89 | server_digest_primary = self.client.execute_command('DEBUG', 'DIGEST') 90 | assert server_digest_primary != None or 0000000000000000000000000000000000000000 91 | server_digest_replica = self.client.execute_command('DEBUG', 'DIGEST') 92 | assert server_digest_primary == server_digest_replica 93 | object_digest_primary = self.client.execute_command('DEBUG DIGEST-VALUE key') 94 | debug_digest_replica = self.replicas[0].client.execute_command('DEBUG DIGEST-VALUE key') 95 | assert object_digest_primary == debug_digest_replica 96 | 97 | self.client.execute_command('FLUSHALL') 98 | self.waitForReplicaToSyncUp(self.replicas[0]) 99 | self.client.execute_command('CONFIG RESETSTAT') 100 | self.replicas[0].client.execute_command('CONFIG RESETSTAT') 101 | 102 | self.client.execute_command('BF.ADD key item1') 103 | self.waitForReplicaToSyncUp(self.replicas[0]) 104 | 105 | # Read commands executed on the primary will not be replicated. 106 | read_commands = [ 107 | ('BF.EXISTS', 'BF.EXISTS key item1', 1), 108 | ('BF.MEXISTS', 'BF.MEXISTS key item1 item2', 1), 109 | ('BF.INFO', 'BF.INFO key', 1), 110 | ('BF.INFO', 'BF.INFO key Capacity', 2), 111 | ('BF.INFO', 'BF.INFO key ITEMS', 3), 112 | ('BF.INFO', 'BF.INFO key filters', 4), 113 | ('BF.INFO', 'BF.INFO key size', 5), 114 | ('BF.INFO', 'BF.INFO key expansion', 6), 115 | ('BF.CARD', 'BF.CARD key', 1) 116 | ] 117 | for test_case in read_commands: 118 | prefix = test_case[0] 119 | cmd = test_case[1] 120 | expected_primary_calls = test_case[2] 121 | self.client.execute_command(cmd) 122 | primary_cmd_stats = self.client.info("Commandstats")['cmdstat_' + prefix] 123 | assert primary_cmd_stats["calls"] == expected_primary_calls 124 | assert ('cmdstat_' + prefix) not in self.replicas[0].client.info("Commandstats") 125 | 126 | # Deletes of bloom objects are replicated 127 | assert self.client.execute_command("EXISTS key") == 1 128 | assert self.replicas[0].client.execute_command('EXISTS key') == 1 129 | assert self.client.execute_command("DEL key") == 1 130 | self.waitForReplicaToSyncUp(self.replicas[0]) 131 | assert self.client.execute_command("EXISTS key") == 0 132 | assert self.replicas[0].client.execute_command('EXISTS key') == 0 133 | 134 | self.client.execute_command('CONFIG RESETSTAT') 135 | self.replicas[0].client.execute_command('CONFIG RESETSTAT') 136 | 137 | # Write commands with errors are not replicated. 138 | invalid_bloom_write_cmds = [ 139 | ('BF.ADD', 'BF.ADD key item1 item2'), 140 | ('BF.MADD', 'BF.MADD key'), 141 | ('BF.RESERVE', 'BF.RESERVE key 1.001 100000'), 142 | ('BF.INSERT', 'BF.INSERT key CAPACITY 0 items item'), 143 | ] 144 | for test_case in invalid_bloom_write_cmds: 145 | prefix = test_case[0] 146 | cmd = test_case[1] 147 | try: 148 | self.client.execute_command(cmd) 149 | assert False 150 | except ResponseError as e: 151 | pass 152 | primary_cmd_stats = self.client.info("Commandstats")['cmdstat_' + prefix] 153 | assert primary_cmd_stats["calls"] == 1 154 | assert primary_cmd_stats["failed_calls"] == 1 155 | assert ('cmdstat_' + prefix) not in self.replicas[0].client.info("Commandstats") 156 | 157 | def test_deterministic_replication(self): 158 | self.setup_replication(num_replicas=1) 159 | # Set non default global properties (config) on the primary node. Any bloom creation on the primary should be 160 | # replicated with the properties below. 161 | assert self.client.execute_command('CONFIG SET bf.bloom-capacity 1000') == b'OK' 162 | assert self.client.execute_command('CONFIG SET bf.bloom-expansion 3') == b'OK' 163 | assert self.client.execute_command('CONFIG SET bf.bloom-fp-rate 0.1') == b'OK' 164 | assert self.client.execute_command('CONFIG SET bf.bloom-tightening-ratio 0.75') == b'OK' 165 | # Test bloom object creation with every command type. 166 | bloom_write_cmds = [ 167 | ('BF.ADD', 'BF.ADD key item'), 168 | ('BF.MADD', 'BF.MADD key item'), 169 | ('BF.RESERVE', 'BF.RESERVE key 0.001 100000'), 170 | ('BF.INSERT', 'BF.INSERT key items item'), 171 | ] 172 | for test_case in bloom_write_cmds: 173 | prefix = test_case[0] 174 | create_cmd = test_case[1] 175 | self.client.execute_command(create_cmd) 176 | server_digest_primary = self.client.execute_command('DEBUG', 'DIGEST') 177 | assert server_digest_primary != None or 0000000000000000000000000000000000000000 178 | server_digest_replica = self.client.execute_command('DEBUG', 'DIGEST') 179 | object_digest_primary = self.client.execute_command('DEBUG DIGEST-VALUE key') 180 | debug_digest_replica = self.replicas[0].client.execute_command('DEBUG DIGEST-VALUE key') 181 | assert server_digest_primary == server_digest_replica 182 | assert object_digest_primary == debug_digest_replica 183 | self.client.execute_command('FLUSHALL') 184 | self.waitForReplicaToSyncUp(self.replicas[0]) 185 | assert self.replicas[0].client.execute_command('CONFIG GET bf.bloom-capacity')[1] == b'100' 186 | assert self.replicas[0].client.execute_command('CONFIG GET bf.bloom-expansion')[1] == b'2' 187 | assert self.replicas[0].client.execute_command('CONFIG GET bf.bloom-fp-rate')[1] == b'0.01' 188 | assert self.replicas[0].client.execute_command('CONFIG GET bf.bloom-tightening-ratio')[1] == b'0.5' 189 | -------------------------------------------------------------------------------- /tests/test_bloom_save_and_restore.py: -------------------------------------------------------------------------------- 1 | import os 2 | from valkey import ResponseError 3 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 4 | from valkey_test_case import ValkeyServerHandle 5 | from valkeytestframework.conftest import resource_port_tracker 6 | from valkeytestframework.util.waiters import * 7 | 8 | class TestBloomSaveRestore(ValkeyBloomTestCaseBase): 9 | 10 | def test_basic_save_and_restore(self): 11 | client = self.server.get_new_client() 12 | bf_add_result_1 = client.execute_command('BF.ADD testSave item') 13 | assert bf_add_result_1 == 1 14 | bf_exists_result_1 = client.execute_command('BF.EXISTS testSave item') 15 | assert bf_exists_result_1 == 1 16 | bf_info_result_1 = client.execute_command('BF.INFO testSave') 17 | assert(len(bf_info_result_1)) != 0 18 | curr_item_count_1 = self.server.num_keys(client=client) 19 | # cmd debug digest 20 | server_digest = client.execute_command('DEBUG', 'DIGEST') 21 | assert server_digest != None or 0000000000000000000000000000000000000000 22 | object_digest = client.execute_command('DEBUG DIGEST-VALUE testSave') 23 | 24 | # save rdb, restart sever 25 | client.execute_command('BGSAVE') 26 | self.server.wait_for_save_done() 27 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) 28 | 29 | assert self.server.is_alive() 30 | wait_for_equal(lambda: self.server.is_rdb_done_loading(), True) 31 | restored_server_digest = client.execute_command('DEBUG', 'DIGEST') 32 | restored_object_digest = client.execute_command('DEBUG DIGEST-VALUE testSave') 33 | assert restored_server_digest == server_digest 34 | assert restored_object_digest == object_digest 35 | self.server.verify_string_in_logfile("Loading RDB produced by Valkey") 36 | self.server.verify_string_in_logfile("Done loading RDB, keys loaded: 1, keys expired: 0") 37 | 38 | # verify restore results 39 | curr_item_count_2 = self.server.num_keys(client=client) 40 | assert curr_item_count_2 == curr_item_count_1 41 | bf_exists_result_2 = client.execute_command('BF.EXISTS testSave item') 42 | assert bf_exists_result_2 == 1 43 | bf_info_result_2 = client.execute_command('BF.INFO testSave') 44 | assert bf_info_result_2 == bf_info_result_1 45 | 46 | def test_basic_save_many(self): 47 | client = self.server.get_new_client() 48 | count = 500 49 | for i in range(0, count): 50 | name = str(i) + "key" 51 | 52 | bf_add_result_1 = client.execute_command('BF.ADD ' + name + ' item') 53 | assert bf_add_result_1 == 1 54 | 55 | curr_item_count_1 = self.server.num_keys(client=client) 56 | assert curr_item_count_1 == count 57 | # save rdb, restart sever 58 | client.execute_command('BGSAVE') 59 | self.server.wait_for_save_done() 60 | 61 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=True) 62 | assert self.server.is_alive() 63 | wait_for_equal(lambda: self.server.is_rdb_done_loading(), True) 64 | self.server.verify_string_in_logfile("Loading RDB produced by Valkey") 65 | self.server.verify_string_in_logfile("Done loading RDB, keys loaded: 500, keys expired: 0") 66 | 67 | # verify restore results 68 | curr_item_count_1 = self.server.num_keys(client=client) 69 | 70 | assert curr_item_count_1 == count 71 | 72 | 73 | def test_restore_failed_large_bloom_filter(self): 74 | client = self.server.get_new_client() 75 | # Increase the max allowed size of a bloom filter per bloom object to 180MB. 76 | # Create a large bloom filter. 77 | # When we try to restore this on a server with the default max allowed filter size of 128MB, start up should fail. 78 | updated_max_size = 180 * 1024 * 1024 79 | original_max_size = int(client.execute_command('CONFIG GET bf.bloom-memory-usage-limit')[1]) 80 | bf_add_result_1 = client.execute_command('CONFIG SET bf.bloom-memory-usage-limit ' + str(updated_max_size)) 81 | client.execute_command('BF.RESERVE testSave 0.001 100000000') 82 | assert int(client.execute_command('BF.INFO testSave size')) > original_max_size 83 | bf_add_result_1 = client.execute_command('BF.ADD testSave item') 84 | assert bf_add_result_1 == 1 85 | bf_exists_result_1 = client.execute_command('BF.EXISTS testSave item') 86 | assert bf_exists_result_1 == 1 87 | bf_info_result_1 = client.execute_command('BF.INFO testSave') 88 | assert(len(bf_info_result_1)) != 0 89 | 90 | # Save rdb and try to load this on a sever. Validate module data type load fails and server does not startup. 91 | client.execute_command('BGSAVE') 92 | self.server.wait_for_save_done() 93 | self.server.restart(remove_rdb=False, remove_nodes_conf=False, connect_client=False) 94 | logfile = os.path.join(self.testdir, self.server.args["logfile"]) 95 | large_obj_restore_err = "Failed to restore bloom object: Object larger than the allowed memory limit" 96 | internal_rdb_err = "Internal error in RDB" 97 | self.wait_for_logfile(logfile, large_obj_restore_err) 98 | self.wait_for_logfile(logfile, internal_rdb_err) 99 | assert not self.server.is_alive() 100 | 101 | def test_rdb_restore_non_bloom_compatibility(self): 102 | # Create a rdb on bloom-module enabled server 103 | bf_client = self.server.get_new_client() 104 | bf_client.execute_command("BF.ADD key val") 105 | bf_client.execute_command("set string val") 106 | assert self.server.num_keys(client=bf_client) == 2 107 | assert bf_client.execute_command("del key") == 1 108 | assert self.server.num_keys(client=bf_client) == 1 109 | assert bf_client.get("string") == b"val" 110 | bf_client.execute_command('BGSAVE') 111 | self.server.wait_for_save_done() 112 | rdb_file = self.server.args["dbfilename"] 113 | 114 | # Create a server without bloom-module 115 | new_server, new_client = self.create_server(testdir=self.testdir, server_path=self.server_path, args= {"dbfilename": rdb_file}) 116 | assert new_server.is_alive() 117 | wait_for_equal(lambda: new_server.is_rdb_done_loading(), True) 118 | 119 | # Verification 120 | assert new_client.execute_command("info bf") == b'' 121 | try: 122 | new_client.execute_command("bf add test val") 123 | assert False 124 | except ResponseError as e: 125 | assert "unknown command" in str(e) 126 | 127 | assert self.server.num_keys(client=new_client) == 1 128 | assert new_client.get("string") == b"val" 129 | -------------------------------------------------------------------------------- /tests/test_bloom_valkeypy_compatibility.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import valkey 3 | from valkey.exceptions import ValkeyError 4 | from valkey_bloom_test_case import ValkeyBloomTestCaseBase 5 | from valkeytestframework.conftest import resource_port_tracker 6 | 7 | class TestValkeyBloomCompatibility(ValkeyBloomTestCaseBase): 8 | """ 9 | This test file is aiming to test the valkey-bloom compatibility with redis-bloom. 10 | All test cases and helper functions in this class are from https://github.com/valkey-io/valkey-py/blob/main/tests. 11 | """ 12 | 13 | def get_protocol_version(self, r): 14 | if isinstance(r, valkey.Valkey) or isinstance(r, valkey.asyncio.Valkey): 15 | return r.connection_pool.connection_kwargs.get("protocol") 16 | elif isinstance(r, valkey.cluster.AbstractValkeyCluster): 17 | return r.nodes_manager.connection_kwargs.get("protocol") 18 | 19 | def assert_resp_response(self, r, response, resp2_expected, resp3_expected): 20 | protocol = self.get_protocol_version(r) 21 | if protocol in [2, "2", None]: 22 | assert response == resp2_expected 23 | else: 24 | assert response == resp3_expected 25 | 26 | def intlist(self, obj): 27 | return [int(v) for v in obj] 28 | 29 | def test_create(self): 30 | """Test CREATE/RESERVE calls""" 31 | decoded_r = self.server.get_new_client() 32 | assert decoded_r.bf().create("bloom", 0.01, 1000) 33 | assert decoded_r.bf().create("bloom_e", 0.01, 1000, expansion=1) 34 | assert decoded_r.bf().create("bloom_ns", 0.01, 1000, noScale=True) 35 | 36 | # valkey-bloom start 37 | 38 | # def test_bf_add(self): 39 | # decoded_r = self.server.get_new_client() 40 | # assert decoded_r.bf().create("bloom", 0.01, 1000) 41 | # assert 1 == decoded_r.bf().add("bloom", "foo") 42 | # assert 0 == decoded_r.bf().add("bloom", "foo") 43 | # assert [0] == self.intlist(decoded_r.bf().madd("bloom", "foo")) 44 | # assert [0, 1] == decoded_r.bf().madd("bloom", "foo", "bar") 45 | # assert [0, 0, 1] == decoded_r.bf().madd("bloom", "foo", "bar", "baz") 46 | # assert 1 == decoded_r.bf().exists("bloom", "foo") 47 | # assert 0 == decoded_r.bf().exists("bloom", "noexist") 48 | # assert [1, 0] == self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist")) 49 | 50 | def test_bf_add(self): 51 | decoded_r = self.server.get_new_client() 52 | assert decoded_r.bf().create("bloom", 0.01, 1000) 53 | assert 0 == decoded_r.bf().exists("bloom", "noexist") 54 | assert 1 == decoded_r.bf().add("bloom", "foo") 55 | assert 0 == decoded_r.bf().add("bloom", "foo") 56 | assert [0] == self.intlist(decoded_r.bf().madd("bloom", "foo")) 57 | madd_return = decoded_r.bf().madd("bloom", "foo", "bar") 58 | assert 0 == madd_return[0] 59 | assert 2 == len(madd_return) 60 | self.check_return_of_multi_commands(madd_return) 61 | madd_return = decoded_r.bf().madd("bloom", "foo", "bar", "baz") 62 | assert 3 == len(madd_return) 63 | self.check_return_of_multi_commands(madd_return) 64 | assert 1 == decoded_r.bf().exists("bloom", "foo") 65 | mexists_return = self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist")) 66 | assert 1 == mexists_return[0] 67 | 68 | # def test_bf_insert(self): 69 | # decoded_r = self.server.get_new_client() 70 | # assert decoded_r.bf().create("bloom", 0.01, 1000) 71 | # assert [1] == self.intlist(decoded_r.bf().insert("bloom", ["foo"])) 72 | # assert [0, 1] == self.intlist(decoded_r.bf().insert("bloom", ["foo", "bar"])) 73 | # assert [1] == self.intlist(decoded_r.bf().insert("captest", ["foo"], capacity=10)) 74 | # assert [1] == self.intlist(decoded_r.bf().insert("errtest", ["foo"], error=0.01)) 75 | # assert 1 == decoded_r.bf().exists("bloom", "foo") 76 | # assert 0 == decoded_r.bf().exists("bloom", "noexist") 77 | # assert [1, 0] == self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist")) 78 | # info = decoded_r.bf().info("bloom") 79 | # self.assert_resp_response( 80 | # decoded_r, 81 | # 2, 82 | # info.get("insertedNum"), 83 | # info.get("Number of items inserted"), 84 | # ) 85 | # self.assert_resp_response( 86 | # decoded_r, 87 | # 1000, 88 | # info.get("capacity"), 89 | # info.get("Capacity"), 90 | # ) 91 | # self.assert_resp_response( 92 | # decoded_r, 93 | # 1, 94 | # info.get("filterNum"), 95 | # info.get("Number of filters"), 96 | # ) 97 | def test_bf_insert(self): 98 | decoded_r = self.server.get_new_client() 99 | assert decoded_r.bf().create("bloom", 0.01, 1000) 100 | assert [1] == self.intlist(decoded_r.bf().insert("bloom", ["foo"])) 101 | bloom_insert_return = self.intlist(decoded_r.bf().insert("bloom", ["foo", "bar"])) 102 | assert 2 == len(bloom_insert_return) 103 | assert 0 == bloom_insert_return[0] 104 | num_items_inserted = -1 105 | if bloom_insert_return[1] == 1 or bloom_insert_return[1] == 0: 106 | # We have inserted either 1 or 2 items. If this returned 1 that means we inserted a new item and have two items 107 | # otherwise we had a false positive and only have inserted 1 item 108 | num_items_inserted = bloom_insert_return[1] + 1 109 | assert [1] == self.intlist(decoded_r.bf().insert("captest", ["foo"], capacity=10)) 110 | assert [1] == self.intlist(decoded_r.bf().insert("errtest", ["foo"], error=0.01)) 111 | assert 1 == decoded_r.bf().exists("bloom", "foo") 112 | mexists_return = self.intlist(decoded_r.bf().mexists("bloom", "foo", "noexist")) 113 | assert 1 == mexists_return[0] 114 | info = decoded_r.bf().info("bloom") 115 | self.assert_resp_response( 116 | decoded_r, 117 | num_items_inserted, 118 | info.get("insertedNum"), 119 | info.get("Number of items inserted"), 120 | ) 121 | self.assert_resp_response( 122 | decoded_r, 123 | 1000, 124 | info.get("capacity"), 125 | info.get("Capacity"), 126 | ) 127 | self.assert_resp_response( 128 | decoded_r, 129 | 1, 130 | info.get("filterNum"), 131 | info.get("Number of filters"), 132 | ) 133 | 134 | def check_return_of_multi_commands(self, returned_count): 135 | for value in returned_count: 136 | assert value in [0, 1], f"Returned Value: {value} is not 0 or 1" 137 | 138 | # valkey-bloom end 139 | 140 | def test_bf_info(self): 141 | decoded_r = self.server.get_new_client() 142 | expansion = 4 143 | # Store a filter 144 | decoded_r.bf().create("nonscaling", "0.0001", "1000", noScale=True) 145 | info = decoded_r.bf().info("nonscaling") 146 | self.assert_resp_response( 147 | decoded_r, 148 | None, 149 | info.get("expansionRate"), 150 | info.get("Expansion rate"), 151 | ) 152 | 153 | decoded_r.bf().create("expanding", "0.0001", "1000", expansion=expansion) 154 | info = decoded_r.bf().info("expanding") 155 | self.assert_resp_response( 156 | decoded_r, 157 | 4, 158 | info.get("expansionRate"), 159 | info.get("Expansion rate"), 160 | ) 161 | 162 | try: 163 | # noScale mean no expansion 164 | decoded_r.bf().create( 165 | "myBloom", "0.0001", "1000", expansion=expansion, noScale=True 166 | ) 167 | assert False 168 | except ValkeyError: 169 | assert True 170 | 171 | def test_bf_card(self): 172 | decoded_r = self.server.get_new_client() 173 | # return 0 if the key does not exist 174 | assert decoded_r.bf().card("not_exist") == 0 175 | 176 | # Store a filter 177 | assert decoded_r.bf().add("bf1", "item_foo") == 1 178 | assert decoded_r.bf().card("bf1") == 1 179 | 180 | # Error when key is of a type other than Bloom filtedecoded_r. 181 | with pytest.raises(valkey.ResponseError): 182 | decoded_r.set("setKey", "value") 183 | decoded_r.bf().card("setKey") 184 | 185 | """ 186 | This test is commented in the valkey-py/tests/test-bloom.py due to 187 | pipeline has not yet implemented in valkey-py BFBloom class. 188 | """ 189 | # def test_pipeline(self): 190 | # decoded_r = self.server.get_new_client() 191 | # pipeline = decoded_r.bf().pipeline() 192 | # assert not decoded_r.bf().execute_command("get pipeline") 193 | # 194 | # assert decoded_r.bf().create("pipeline", 0.01, 1000) 195 | # for i in range(100): 196 | # pipeline.add("pipeline", i) 197 | # for i in range(100): 198 | # assert not (decoded_r.bf().exists("pipeline", i)) 199 | # 200 | # pipeline.execute() 201 | # 202 | # for i in range(100): 203 | # assert decoded_r.bf().exists("pipeline", i) 204 | -------------------------------------------------------------------------------- /tests/valkey_bloom_test_case.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from valkeytestframework.valkey_test_case import ValkeyTestCase 4 | from valkey import ResponseError 5 | import random 6 | import string 7 | import logging 8 | 9 | class ValkeyBloomTestCaseBase(ValkeyTestCase): 10 | 11 | # Global Parameterized Configs 12 | use_random_seed = 'no' 13 | 14 | @pytest.fixture(autouse=True) 15 | def setup_test(self, setup): 16 | args = {"enable-debug-command":"yes", 'loadmodule': os.getenv('MODULE_PATH'),'bf.bloom-use-random-seed': self.use_random_seed} 17 | server_path = f"{os.path.dirname(os.path.realpath(__file__))}/build/binaries/{os.environ['SERVER_VERSION']}/valkey-server" 18 | 19 | self.server, self.client = self.create_server(testdir = self.testdir, server_path=server_path, args=args) 20 | logging.info("startup args are: %s", args) 21 | 22 | @pytest.fixture(autouse=True) 23 | def use_random_seed_fixture(self, bloom_config_parameterization): 24 | if bloom_config_parameterization == "random-seed": 25 | self.use_random_seed = "yes" 26 | elif bloom_config_parameterization == "fixed-seed": 27 | self.use_random_seed = "no" 28 | 29 | def verify_error_response(self, client, cmd, expected_err_reply): 30 | try: 31 | client.execute_command(cmd) 32 | assert False 33 | except ResponseError as e: 34 | assert_error_msg = f"Actual error message: '{str(e)}' is different from expected error message '{expected_err_reply}'" 35 | assert str(e) == expected_err_reply, assert_error_msg 36 | return str(e) 37 | 38 | def verify_command_success_reply(self, client, cmd, expected_result): 39 | cmd_actual_result = client.execute_command(cmd) 40 | assert_error_msg = f"Actual command response '{cmd_actual_result}' is different from expected response '{expected_result}'" 41 | # For MEXISTS, MADD and INSERT due to false positives checking the return value can be flaky so instead we check that we get the correct 42 | # number of results returned for the command 43 | if cmd.upper().startswith("BF.M") or cmd.upper().startswith("BF.INSERT") : 44 | assert len(cmd_actual_result) == expected_result, assert_error_msg 45 | for value in cmd_actual_result: 46 | assert value in [0, 1], f"Returned value: {value} is not 0 or 1" 47 | else: 48 | assert cmd_actual_result == expected_result, assert_error_msg 49 | 50 | def verify_bloom_filter_item_existence(self, client, key, value, should_exist=True): 51 | if should_exist: 52 | assert client.execute_command(f'BF.EXISTS {key} {value}') == 1, f"Item {key} {value} doesn't exist" 53 | else: 54 | assert client.execute_command(f'BF.EXISTS {key} {value}') == 0, f"Item {key} {value} exists" 55 | 56 | def verify_server_key_count(self, client, expected_num_keys): 57 | actual_num_keys = self.server.num_keys() 58 | assert_num_key_error_msg = f"Actual key number {actual_num_keys} is different from expected key number {expected_num_keys}" 59 | assert actual_num_keys == expected_num_keys, assert_num_key_error_msg 60 | 61 | def create_bloom_filters_and_add_items(self, client, number_of_bf=5): 62 | """ Creates the specified number of bloom filter objects (`number_of_bf`) and adds an item to it named FOO. 63 | """ 64 | for i in range(number_of_bf): 65 | assert client.execute_command(f'BF.ADD SAMPLE{i} FOO') == 1, f"Failed to insert bloom filter item SAMPLE{i} FOO" 66 | 67 | def generate_random_string(self, length=7): 68 | """ Creates a random string with specified length. 69 | """ 70 | characters = string.ascii_letters + string.digits 71 | random_string = ''.join(random.choice(characters) for _ in range(length)) 72 | return random_string 73 | 74 | def validate_nonscaling_failure(self, client, filter_name, item_prefix, new_item_idx): 75 | """ 76 | Validate that the "non scaling filter is full" is returned from all item adding cmds. 77 | """ 78 | non_scaling_filter_full_err = "non scaling filter is full" 79 | new_item = f"{item_prefix}{new_item_idx}" 80 | try: 81 | client.execute_command(f'BF.ADD {filter_name} {new_item}') 82 | except Exception as e: 83 | assert non_scaling_filter_full_err in str(e) 84 | existing_item = f"{item_prefix}{new_item_idx - 1}" 85 | multi_add_cmds = [f'BF.MADD {filter_name} {existing_item} {new_item} {new_item}', f'BF.INSERT {filter_name} ITEMS {existing_item} {new_item} {new_item}'] 86 | for cmd in multi_add_cmds: 87 | response = client.execute_command(cmd) 88 | assert len(response) == 2 # We expect commands to stop at the first error. 89 | assert response[0] == 0 90 | assert non_scaling_filter_full_err == str(response[1]) 91 | 92 | def add_items_till_nonscaling_failure(self, client, filter_name, starting_item_idx, rand_prefix): 93 | """ 94 | Adds items to the provided bloom filter object (filter_name) until we get a scaling error. 95 | Item names will start with the provided prefix (rand_prefix) followed by a counter (starting_item_idx onwards). 96 | """ 97 | new_item_idx = starting_item_idx 98 | try: 99 | while True: 100 | item = f"{rand_prefix}{new_item_idx}" 101 | result = client.execute_command(f'BF.ADD {filter_name} {item}') 102 | if result == 1: 103 | raise RuntimeError("Unexpected return value 1 from BF.ADD") 104 | new_item_idx += 1 105 | except Exception as e: 106 | if "non scaling filter is full" in str(e): 107 | return new_item_idx 108 | else: 109 | raise RuntimeError(f"Unexpected error BF.ADD: {e}") 110 | 111 | def add_items_till_capacity(self, client, filter_name, capacity_needed, starting_item_idx, rand_prefix, batch_size=1000): 112 | """ 113 | Adds items to the provided bloom filter object (filter_name) until the specified capacity is reached. 114 | Item names will start with the provided prefix (rand_prefix) followed by a counter (starting_item_idx onwards). 115 | """ 116 | new_item_idx = starting_item_idx 117 | fp_count = 0 118 | cardinality = client.execute_command(f'BF.CARD {filter_name}') 119 | while cardinality < capacity_needed: 120 | # Calculate how many more items we need to add. 121 | remaining_capacity = capacity_needed - cardinality 122 | batch_to_add = min(batch_size, remaining_capacity) 123 | # Prepare a batch of items 124 | items = [f"{rand_prefix}{new_item_idx + i}" for i in range(batch_to_add)] 125 | new_item_idx += batch_to_add 126 | result = client.execute_command(f'BF.MADD {filter_name} ' + ' '.join(items)) 127 | # Process results 128 | for res in result: 129 | if res == 0: 130 | fp_count += 1 131 | elif res == 1: 132 | cardinality += 1 133 | else: 134 | raise RuntimeError(f"Unexpected return value from add_item: {res}") 135 | return fp_count, new_item_idx - 1 136 | 137 | def check_items_exist(self, client, filter_name, start_idx, end_idx, expected_result, rand_prefix, batch_size=1000): 138 | """ 139 | Executes BF.MEXISTS on the given bloom filter. Items that we expect to exist are those starting with 140 | rand_prefix, followed by a number beginning with start_idx. The result is compared with `expected_result` based 141 | on whether we expect the item to exist or not. 142 | """ 143 | error_count = 0 144 | num_operations = (end_idx - start_idx) + 1 145 | # Check that items exist in batches. 146 | for batch_start in range(start_idx, end_idx + 1, batch_size): 147 | batch_end = min(batch_start + batch_size - 1, end_idx) 148 | # Execute BF.MEXISTS with the batch of items 149 | items = [f"{rand_prefix}{i}" for i in range(batch_start, batch_end + 1)] 150 | result = client.execute_command(f'BF.MEXISTS {filter_name} ' + ' '.join(items)) 151 | # Check the results 152 | for item_result in result: 153 | if item_result != expected_result: 154 | error_count += 1 155 | return error_count, num_operations 156 | 157 | def fp_assert(self, error_count, num_operations, expected_fp_rate, fp_margin): 158 | """ 159 | Asserts that the actual false positive error rate is lower than the expected false positive rate with 160 | accounting for margin. 161 | """ 162 | real_fp_rate = error_count / num_operations 163 | fp_rate_with_margin = expected_fp_rate + fp_margin 164 | assert real_fp_rate < fp_rate_with_margin, f"The actual fp_rate, {real_fp_rate}, is greater than the configured fp_rate with margin. {fp_rate_with_margin}." 165 | 166 | def validate_copied_bloom_correctness(self, client, original_filter_name, item_prefix, add_operation_idx, expected_fp_rate, fp_margin, original_info_dict): 167 | """ Validate correctness on a copy of the provided bloom filter. 168 | """ 169 | copy_filter_name = "filter_copy" 170 | assert client.execute_command(f'COPY {original_filter_name} {copy_filter_name}') == 1 171 | object_digest = client.execute_command(f'DEBUG DIGEST-VALUE {original_filter_name}') 172 | copied_object_digest = client.execute_command(f'DEBUG DIGEST-VALUE {copy_filter_name}') 173 | assert copied_object_digest == object_digest 174 | assert client.execute_command('DBSIZE') == 2 175 | copy_info = client.execute_command(f'BF.INFO {copy_filter_name}') 176 | copy_it = iter(copy_info) 177 | copy_info_dict = dict(zip(copy_it, copy_it)) 178 | assert copy_info_dict[b'Capacity'] == original_info_dict[b'Capacity'] 179 | assert copy_info_dict[b'Number of items inserted'] == original_info_dict[b'Number of items inserted'] 180 | assert copy_info_dict[b'Number of filters'] == original_info_dict[b'Number of filters'] 181 | assert copy_info_dict[b'Size'] == original_info_dict[b'Size'] 182 | assert copy_info_dict[b'Expansion rate'] == original_info_dict[b'Expansion rate'] 183 | # Items added to the original filter should still exist on the copy. False Negatives are not possible. 184 | error_count, num_operations = self.check_items_exist( 185 | client, 186 | copy_filter_name, 187 | 1, 188 | add_operation_idx, 189 | True, 190 | item_prefix, 191 | ) 192 | assert error_count == 0 193 | # Items not added to the original filter should not exist on the copy. False Positives should be close to configured fp_rate. 194 | error_count, num_operations = self.check_items_exist( 195 | client, 196 | copy_filter_name, 197 | add_operation_idx + 1, 198 | add_operation_idx * 2, 199 | False, 200 | item_prefix, 201 | ) 202 | self.fp_assert(error_count, num_operations, expected_fp_rate, fp_margin) 203 | 204 | def calculate_expected_capacity(self, initial_capacity, expansion, num_filters): 205 | """ 206 | This function accepts the starting capacity (of the first filter), expansion and number of filters in 207 | the object to calculate the expected total capacity (across all the filters) within the bloom object. 208 | """ 209 | curr_filt_capacity = initial_capacity 210 | total_capacity = curr_filt_capacity 211 | for i in range(2, num_filters + 1): 212 | new_filt_capacity = curr_filt_capacity * expansion 213 | curr_filt_capacity = new_filt_capacity 214 | total_capacity += curr_filt_capacity 215 | return total_capacity 216 | 217 | def verify_bloom_metrics(self, info_response, expected_memory, expected_num_objects, expected_num_filters, expected_num_items, expected_sum_capacity): 218 | """ 219 | Verify the metric values are recorded properly, the expected values are as below 220 | expected_memory: the size of the memory used by the objects 221 | expected_num_objects: the number of module objects stored 222 | expected_num_filters: the number of filters currently created 223 | """ 224 | response_str = info_response.decode('utf-8') 225 | lines = response_str.split('\r\n') 226 | total_memory_bites = -1 227 | num_objects = -1 228 | num_filters = -1 229 | num_items = -1 230 | sum_capacity = -1 231 | for line in lines: 232 | if line.startswith('bf_bloom_total_memory_bytes:'): 233 | total_memory_bites = int(line.split(':')[1]) 234 | elif line.startswith('bf_bloom_num_objects:'): 235 | num_objects = int(line.split(':')[1]) 236 | elif line.startswith('bf_bloom_num_filters_across_objects'): 237 | num_filters = int(line.split(':')[1]) 238 | elif line.startswith('bf_bloom_num_items_across_objects'): 239 | num_items = int(line.split(':')[1]) 240 | elif line.startswith('bf_bloom_capacity_across_objects'): 241 | sum_capacity = int(line.split(':')[1]) 242 | 243 | assert total_memory_bites == expected_memory 244 | assert num_objects == expected_num_objects 245 | assert num_filters == expected_num_filters 246 | assert num_items == expected_num_items 247 | assert sum_capacity == expected_sum_capacity 248 | 249 | """ 250 | This method will parse the return of an INFO command and return a python dict where each metric is a key value pair. 251 | We can pass in specific sections in order to not have the dict store irrelevant fields related to what we want to check. 252 | Example of parsing the returned dict: 253 | stats = self.parse_valkey_info("STATS") 254 | stats.get('active_defrag_misses') 255 | """ 256 | def parse_valkey_info(self, section): 257 | mem_info = self.client.execute_command('INFO ' + section) 258 | lines = mem_info.decode('utf-8').split('\r\n') 259 | stats_dict = {} 260 | for line in lines: 261 | if ':' in line: 262 | key, value = line.split(':', 1) 263 | stats_dict[key.strip()] = value.strip() 264 | return stats_dict 265 | --------------------------------------------------------------------------------