├── .github ├── release-drafter-config.yml └── workflows │ ├── check-pypi.yml │ ├── codeql-analysis.yml │ ├── publish-pypi.yml │ └── release-drafter.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── pyproject.toml ├── redisbloom ├── __init__.py └── client.py ├── rltest_commands.py ├── test_commands.py └── tox.ini /.github/release-drafter-config.yml: -------------------------------------------------------------------------------- 1 | name-template: 'Version $NEXT_PATCH_VERSION' 2 | tag-template: 'v$NEXT_PATCH_VERSION' 3 | categories: 4 | - title: 'Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: 'Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: 'Maintenance' 14 | label: 'chore' 15 | change-template: '- $TITLE (#$NUMBER)' 16 | exclude-labels: 17 | - 'skip-changelog' 18 | template: | 19 | ## Changes 20 | 21 | $CHANGES 22 | -------------------------------------------------------------------------------- /.github/workflows/check-pypi.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Check if required secrets are set to publish to Pypi 3 | 4 | on: push 5 | 6 | jobs: 7 | checksecret: 8 | name: check if PYPI_TOKEN and TESTPYPI_TOKEN are set in github secrets 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check PYPI_TOKEN 12 | env: 13 | PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} 14 | run: | 15 | if ${{ env.PYPI_TOKEN == '' }} ; then 16 | echo "PYPI_TOKEN secret is not set" 17 | exit 1 18 | fi 19 | - name: Check TESTPYPI_TOKEN 20 | env: 21 | TESTPYPI_TOKEN: ${{ secrets.TESTPYPI_TOKEN }} 22 | run: | 23 | if ${{ env.TESTPYPI_TOKEN == '' }} ; then 24 | echo "TESTPYPI_TOKEN secret is not set" 25 | exit 1 26 | fi 27 | 28 | 29 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '37 7 * * 2' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v1 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v1 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v1 68 | -------------------------------------------------------------------------------- /.github/workflows/publish-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Pypi 2 | on: 3 | release: 4 | types: [ published ] 5 | 6 | jobs: 7 | pytest: 8 | name: Publish to PyPi 9 | runs-on: ubuntu-latest 10 | env: 11 | ACTIONS_ALLOW_UNSECURE_COMMANDS: true 12 | steps: 13 | - uses: actions/checkout@master 14 | 15 | - name: get version from tag 16 | id: get_version 17 | run: | 18 | realversion="${GITHUB_REF/refs\/tags\//}" 19 | realversion="${realversion//v/}" 20 | echo "::set-output name=VERSION::$realversion" 21 | 22 | - name: Set the version for publishing 23 | uses: ciiiii/toml-editor@1.0.0 24 | with: 25 | file: "pyproject.toml" 26 | key: "tool.poetry.version" 27 | value: "${{ steps.get_version.outputs.VERSION }}" 28 | 29 | - name: Set up Python 3.7 30 | uses: actions/setup-python@v1 31 | with: 32 | python-version: 3.7 33 | 34 | - name: Install Poetry 35 | uses: dschep/install-poetry-action@v1.3 36 | 37 | - name: Cache Poetry virtualenv 38 | uses: actions/cache@v1 39 | id: cache 40 | with: 41 | path: ~/.virtualenvs 42 | key: poetry-${{ hashFiles('**/poetry.lock') }} 43 | restore-keys: | 44 | poetry-${{ hashFiles('**/poetry.lock') }} 45 | 46 | - name: Set Poetry config 47 | run: | 48 | poetry config virtualenvs.in-project false 49 | poetry config virtualenvs.path ~/.virtualenvs 50 | 51 | - name: Install Dependencies 52 | run: poetry install 53 | if: steps.cache.outputs.cache-hit != 'true' 54 | 55 | - name: Publish to PyPI 56 | if: github.event_name == 'release' 57 | run: | 58 | poetry publish -u __token__ -p ${{ secrets.PYPI_TOKEN }} --build 59 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v5 15 | with: 16 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml 17 | config-name: release-drafter-config.yml 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Eclipse project settings 107 | .project 108 | .pydevproject 109 | 110 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM redislabs/rebloom:edge as builder 2 | 3 | RUN apt update && apt install -y python3 python3-pip 4 | ADD . /build 5 | WORKDIR /build 6 | RUN pip3 install poetry 7 | RUN poetry config virtualenvs.create false 8 | RUN poetry build 9 | 10 | ### clean docker stage 11 | FROM redislabs/redisbloom:edge as runner 12 | 13 | RUN apt update && apt install -y python3 python3-pip 14 | RUN rm -rf /var/cache/apt/ 15 | 16 | COPY --from=builder /build/dist/redisbloom*.tar.gz /tmp/ 17 | RUN pip3 install /tmp/redisbloom*.tar.gz 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, RedisBloom 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | 2 | include requirements.txt 3 | include LICENSE 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![license](https://img.shields.io/github/license/RedisBloom/redisbloom-py.svg)](https://github.com/RedisBloom/redisbloom-py) 2 | [![PyPI version](https://badge.fury.io/py/redisbloom.svg)](https://badge.fury.io/py/redisbloom) 3 | [![GitHub issues](https://img.shields.io/github/release/RedisBloom/redisbloom-py.svg)](https://github.com/RedisBloom/redisbloom-py/releases/latest) 4 | [![Codecov](https://codecov.io/gh/RedisBloom/redisbloom-py/branch/master/graph/badge.svg)](https://codecov.io/gh/RedisBloom/redisbloom-py) 5 | [![Known Vulnerabilities](https://snyk.io/test/github/RedisBloom/redisbloom-py/badge.svg?targetFile=pyproject.toml)](https://snyk.io/test/github/RedisBloom/redisbloom-py?targetFile=pyproject.toml) 6 | [![Total alerts](https://img.shields.io/lgtm/alerts/g/RedisBloom/redisbloom-py.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/RedisBloom/redisbloom-py/alerts/) 7 | 8 | # Python client for RedisBloom 9 | [![Forum](https://img.shields.io/badge/Forum-RedisBloom-blue)](https://forum.redis.com/c/modules/redisbloom) 10 | [![Discord](https://img.shields.io/discord/697882427875393627?style=flat-square)](https://discord.gg/wXhwjCQ) 11 | 12 | ## Deprecation notice 13 | 14 | As of [redis-py 4.0.0](https://pypi.org/project/redis/4.0.0) this library is deprecated. It's features have been merged into redis-py. Please either install it [from pypy](https://pypi.org/project/redis) or [the repo](https://github.com/redis/redis-py). 15 | 16 | -------------------------------- 17 | 18 | redisbloom-py is a package that gives developers easy access to several probabilistic data structures. The package extends [redis-py](https://github.com/andymccurdy/redis-py)'s interface with RedisBloom's API. 19 | 20 | ### Installation 21 | ``` 22 | $ pip install redisbloom 23 | ``` 24 | 25 | ### Usage example 26 | 27 | ```python 28 | # Using Bloom Filter 29 | from redisbloom.client import Client 30 | rb = Client() 31 | rb.bfCreate('bloom', 0.01, 1000) 32 | rb.bfAdd('bloom', 'foo') # returns 1 33 | rb.bfAdd('bloom', 'foo') # returns 0 34 | rb.bfExists('bloom', 'foo') # returns 1 35 | rb.bfExists('bloom', 'noexist') # returns 0 36 | 37 | # Using Cuckoo Filter 38 | from redisbloom.client import Client 39 | rb = Client() 40 | rb.cfCreate('cuckoo', 1000) 41 | rb.cfAdd('cuckoo', 'filter') # returns 1 42 | rb.cfAddNX('cuckoo', 'filter') # returns 0 43 | rb.cfExists('cuckoo', 'filter') # returns 1 44 | rb.cfExists('cuckoo', 'noexist') # returns 0 45 | 46 | # Using Count-Min Sketch 47 | from redisbloom.client import Client 48 | rb = Client() 49 | rb.cmsInitByDim('dim', 1000, 5) 50 | rb.cmsIncrBy('dim', ['foo'], [5]) 51 | rb.cmsIncrBy('dim', ['foo', 'bar'], [5, 15]) 52 | rb.cmsQuery('dim', 'foo', 'bar') # returns [10, 15] 53 | 54 | # Using Top-K 55 | from redisbloom.client import Client 56 | rb = Client() 57 | rb.topkReserve('topk', 3, 20, 3, 0.9) 58 | rb.topkAdd('topk', 'A', 'B', 'C', 'D', 'E', 'A', 'A', 'B', 59 | 'C', 'G', 'D', 'B', 'D', 'A', 'E', 'E') 60 | rb.topkQuery('topk', 'A', 'B', 'C', 'D') # returns [1, 1, 0, 1] 61 | rb.topkCount('topk', 'A', 'B', 'C', 'D') # returns [4, 3, 2, 3] 62 | rb.topkList('topk') # returns ['A', 'B', 'E'] 63 | rb.topkListWithCount('topk') # returns ['A', 4, 'B', 3, 'E', 3] 64 | ``` 65 | 66 | ### API 67 | For complete documentation about RedisBloom's commands, refer to [RedisBloom's website](http://redisbloom.io). 68 | 69 | ### License 70 | [BSD 3-Clause](https://github.com/RedisBloom/redisbloom-py/blob/master/LICENSE) 71 | 72 | ### Development 73 | 74 | 1. Create a virtualenv to manage your python dependencies, and ensure it's active. 75 | ```virtualenv -v venv``` 76 | 2. Install [pypoetry](https://python-poetry.org/) to manage your dependencies. 77 | ```pip install poetry``` 78 | 3. Install dependencies. 79 | ```poetry install``` 80 | 81 | [tox](https://tox.readthedocs.io/en/latest/) runs all tests as its default target. Running *tox* by itself will run unit tests. Ensure you have a running redis, with the module loaded. 82 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "redisbloom" 3 | version = "0.5.0" 4 | description = "RedisBloom Python Client" 5 | authors = ["Redis "] 6 | license = "BSD-3-Clause" 7 | readme = "README.md" 8 | 9 | packages = [ 10 | { include = 'redisbloom' }, 11 | ] 12 | 13 | classifiers = [ 14 | 'Topic :: Database', 15 | 'Programming Language :: Python', 16 | 'Intended Audience :: Developers', 17 | 'Programming Language :: Python :: 3.6', 18 | 'Programming Language :: Python :: 3.7', 19 | 'Programming Language :: Python :: 3.8', 20 | 'Programming Language :: Python :: 3.9', 21 | 'Programming Language :: Python :: 3.10', 22 | 'License :: OSI Approved :: BSD License', 23 | 'Development Status :: 7 - Inactive' 24 | ] 25 | keywords = ["Redis", "Extension", "Probabilistic", "TopK", "Cuckoo-Filter", "Bloom-Filter", "Count-min-sketch"] 26 | 27 | [tool.poetry.dependencies] 28 | python = "^3.6" 29 | hiredis = "^2.0.0" 30 | redis = "3.5.3" 31 | rmtest = "^0.7.0" 32 | six = "^1.16.0" 33 | 34 | 35 | [tool.poetry.urls] 36 | url = "https://redisbloom.io" 37 | repository = "https://github.com/RedisBloom/redisbloom-py" 38 | 39 | [tool.poetry.dev-dependencies] 40 | codecov = "^2.1.11" 41 | flake8 = "^3.9.2" 42 | tox = "^3.23.1" 43 | tox-poetry = "^0.3.0" 44 | bandit = "^1.7.0" 45 | vulture = "^2.3" 46 | 47 | [build-system] 48 | requires = ["poetry-core>=1.0.0"] 49 | build-backend = "poetry.core.masonry.api" 50 | -------------------------------------------------------------------------------- /redisbloom/__init__.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | 3 | warn( 4 | "Please upgrade to redis-py (https://pypi.org/project/redis/) " 5 | "This library is deprecated, and all features have been merged into redis-py.", 6 | DeprecationWarning, 7 | stacklevel=2, 8 | ) 9 | -------------------------------------------------------------------------------- /redisbloom/client.py: -------------------------------------------------------------------------------- 1 | import six 2 | from redis.client import Redis, Pipeline 3 | from redis._compat import nativestr 4 | 5 | 6 | def bool_ok(response): 7 | return nativestr(response) == 'OK' 8 | 9 | 10 | class BFInfo(object): 11 | capacity = None 12 | size = None 13 | filterNum = None 14 | insertedNum = None 15 | expansionRate = None 16 | 17 | def __init__(self, args): 18 | response = dict(zip(map(nativestr, args[::2]), args[1::2])) 19 | self.capacity = response['Capacity'] 20 | self.size = response['Size'] 21 | self.filterNum = response['Number of filters'] 22 | self.insertedNum = response['Number of items inserted'] 23 | self.expansionRate = response['Expansion rate'] 24 | 25 | 26 | class CFInfo(object): 27 | size = None 28 | bucketNum = None 29 | filterNum = None 30 | insertedNum = None 31 | deletedNum = None 32 | bucketSize = None 33 | expansionRate = None 34 | maxIteration = None 35 | 36 | def __init__(self, args): 37 | response = dict(zip(map(nativestr, args[::2]), args[1::2])) 38 | self.size = response['Size'] 39 | self.bucketNum = response['Number of buckets'] 40 | self.filterNum = response['Number of filters'] 41 | self.insertedNum = response['Number of items inserted'] 42 | self.deletedNum = response['Number of items deleted'] 43 | self.bucketSize = response['Bucket size'] 44 | self.expansionRate = response['Expansion rate'] 45 | self.maxIteration = response['Max iterations'] 46 | 47 | 48 | class CMSInfo(object): 49 | width = None 50 | depth = None 51 | count = None 52 | 53 | def __init__(self, args): 54 | response = dict(zip(map(nativestr, args[::2]), args[1::2])) 55 | self.width = response['width'] 56 | self.depth = response['depth'] 57 | self.count = response['count'] 58 | 59 | 60 | class TopKInfo(object): 61 | k = None 62 | width = None 63 | depth = None 64 | decay = None 65 | 66 | def __init__(self, args): 67 | response = dict(zip(map(nativestr, args[::2]), args[1::2])) 68 | self.k = response['k'] 69 | self.width = response['width'] 70 | self.depth = response['depth'] 71 | self.decay = response['decay'] 72 | 73 | 74 | def spaceHolder(response): 75 | return response 76 | 77 | 78 | def parseToList(response): 79 | res = [] 80 | for item in response: 81 | if isinstance(item, int): 82 | res.append(item) 83 | elif item is not None: 84 | res.append(nativestr(item)) 85 | else: 86 | res.append(None) 87 | return res 88 | 89 | 90 | class Client(Redis): # changed from StrictRedis 91 | """ 92 | This class subclasses redis-py's `Redis` and implements 93 | RedisBloom's commands. 94 | The client allows to interact with RedisBloom and use all of 95 | it's functionality. 96 | Prefix is according to the DS used. 97 | - BF for Bloom Filter 98 | - CF for Cuckoo Filter 99 | - CMS for Count-Min Sketch 100 | - TOPK for TopK Data Structure 101 | """ 102 | 103 | BF_RESERVE = 'BF.RESERVE' 104 | BF_ADD = 'BF.ADD' 105 | BF_MADD = 'BF.MADD' 106 | BF_INSERT = 'BF.INSERT' 107 | BF_EXISTS = 'BF.EXISTS' 108 | BF_MEXISTS = 'BF.MEXISTS' 109 | BF_SCANDUMP = 'BF.SCANDUMP' 110 | BF_LOADCHUNK = 'BF.LOADCHUNK' 111 | BF_INFO = 'BF.INFO' 112 | 113 | CF_RESERVE = 'CF.RESERVE' 114 | CF_ADD = 'CF.ADD' 115 | CF_ADDNX = 'CF.ADDNX' 116 | CF_INSERT = 'CF.INSERT' 117 | CF_INSERTNX = 'CF.INSERTNX' 118 | CF_EXISTS = 'CF.EXISTS' 119 | CF_DEL = 'CF.DEL' 120 | CF_COUNT = 'CF.COUNT' 121 | CF_SCANDUMP = 'CF.SCANDUMP' 122 | CF_LOADCHUNK = 'CF.LOADCHUNK' 123 | CF_INFO = 'CF.INFO' 124 | 125 | CMS_INITBYDIM = 'CMS.INITBYDIM' 126 | CMS_INITBYPROB = 'CMS.INITBYPROB' 127 | CMS_INCRBY = 'CMS.INCRBY' 128 | CMS_QUERY = 'CMS.QUERY' 129 | CMS_MERGE = 'CMS.MERGE' 130 | CMS_INFO = 'CMS.INFO' 131 | 132 | TOPK_RESERVE = 'TOPK.RESERVE' 133 | TOPK_ADD = 'TOPK.ADD' 134 | TOPK_QUERY = 'TOPK.QUERY' 135 | TOPK_COUNT = 'TOPK.COUNT' 136 | TOPK_LIST = 'TOPK.LIST' 137 | TOPK_INFO = 'TOPK.INFO' 138 | 139 | def __init__(self, *args, **kwargs): 140 | """ 141 | Creates a new RedisBloom client. 142 | """ 143 | Redis.__init__(self, *args, **kwargs) 144 | 145 | # Set the module commands' callbacks 146 | MODULE_CALLBACKS = { 147 | self.BF_RESERVE: bool_ok, 148 | #self.BF_ADD: spaceHolder, 149 | #self.BF_MADD: spaceHolder, 150 | #self.BF_INSERT: spaceHolder, 151 | #self.BF_EXISTS: spaceHolder, 152 | #self.BF_MEXISTS: spaceHolder, 153 | #self.BF_SCANDUMP: spaceHolder, 154 | #self.BF_LOADCHUNK: spaceHolder, 155 | self.BF_INFO: BFInfo, 156 | 157 | self.CF_RESERVE: bool_ok, 158 | #self.CF_ADD: spaceHolder, 159 | #self.CF_ADDNX: spaceHolder, 160 | #self.CF_INSERT: spaceHolder, 161 | #self.CF_INSERTNX: spaceHolder, 162 | #self.CF_EXISTS: spaceHolder, 163 | #self.CF_DEL: spaceHolder, 164 | #self.CF_COUNT: spaceHolder, 165 | #self.CF_SCANDUMP: spaceHolder, 166 | #self.CF_LOADCHUNK: spaceHolder, 167 | self.CF_INFO: CFInfo, 168 | 169 | 170 | self.CMS_INITBYDIM: bool_ok, 171 | self.CMS_INITBYPROB: bool_ok, 172 | #self.CMS_INCRBY: spaceHolder, 173 | #self.CMS_QUERY: spaceHolder, 174 | self.CMS_MERGE: bool_ok, 175 | self.CMS_INFO: CMSInfo, 176 | 177 | self.TOPK_RESERVE: bool_ok, 178 | self.TOPK_ADD: parseToList, 179 | #self.TOPK_QUERY: spaceHolder, 180 | #self.TOPK_COUNT: spaceHolder, 181 | self.TOPK_LIST: parseToList, 182 | self.TOPK_INFO: TopKInfo, 183 | 184 | } 185 | for k, v in six.iteritems(MODULE_CALLBACKS): 186 | self.set_response_callback(k, v) 187 | 188 | @staticmethod 189 | def appendItems(params, items): 190 | params.extend(['ITEMS']) 191 | params += items 192 | 193 | @staticmethod 194 | def appendError(params, error): 195 | if error is not None: 196 | params.extend(['ERROR', error]) 197 | 198 | @staticmethod 199 | def appendCapacity(params, capacity): 200 | if capacity is not None: 201 | params.extend(['CAPACITY', capacity]) 202 | 203 | @staticmethod 204 | def appendExpansion(params, expansion): 205 | if expansion is not None: 206 | params.extend(['EXPANSION', expansion]) 207 | 208 | @staticmethod 209 | def appendNoScale(params, noScale): 210 | if noScale is not None: 211 | params.extend(['NONSCALING']) 212 | 213 | @staticmethod 214 | def appendWeights(params, weights): 215 | if len(weights) > 0: 216 | params.append('WEIGHTS') 217 | params += weights 218 | 219 | @staticmethod 220 | def appendNoCreate(params, noCreate): 221 | if noCreate is not None: 222 | params.extend(['NOCREATE']) 223 | 224 | @staticmethod 225 | def appendItemsAndIncrements(params, items, increments): 226 | for i in range(len(items)): 227 | params.append(items[i]) 228 | params.append(increments[i]) 229 | 230 | @staticmethod 231 | def appendValuesAndWeights(params, items, weights): 232 | for i in range(len(items)): 233 | params.append(items[i]) 234 | params.append(weights[i]) 235 | 236 | @staticmethod 237 | def appendMaxIterations(params, max_iterations): 238 | if max_iterations is not None: 239 | params.extend(['MAXITERATIONS', max_iterations]) 240 | 241 | @staticmethod 242 | def appendBucketSize(params, bucket_size): 243 | if bucket_size is not None: 244 | params.extend(['BUCKETSIZE', bucket_size]) 245 | 246 | ################## Bloom Filter Functions ###################### 247 | def bfCreate(self, key, errorRate, capacity, expansion=None, noScale=None): 248 | """ 249 | Creates a new Bloom Filter ``key`` with desired probability of false 250 | positives ``errorRate`` expected entries to be inserted as ``capacity``. 251 | Default expansion value is 2. 252 | By default, filter is auto-scaling. 253 | """ 254 | params = [key, errorRate, capacity] 255 | self.appendExpansion(params, expansion) 256 | self.appendNoScale(params, noScale) 257 | 258 | return self.execute_command(self.BF_RESERVE, *params) 259 | 260 | def bfAdd(self, key, item): 261 | """ 262 | Adds to a Bloom Filter ``key`` an ``item``. 263 | """ 264 | params = [key, item] 265 | 266 | return self.execute_command(self.BF_ADD, *params) 267 | 268 | def bfMAdd(self, key, *items): 269 | """ 270 | Adds to a Bloom Filter ``key`` multiple ``items``. 271 | """ 272 | params = [key] 273 | params += items 274 | 275 | return self.execute_command(self.BF_MADD, *params) 276 | 277 | def bfInsert(self, key, items, capacity=None, error=None, noCreate=None, expansion=None, noScale=None): 278 | """ 279 | Adds to a Bloom Filter ``key`` multiple ``items``. If ``nocreate`` 280 | remain ``None`` and ``key does not exist, a new Bloom Filter ``key`` 281 | will be created with desired probability of false positives ``errorRate`` 282 | and expected entries to be inserted as ``size``. 283 | """ 284 | params = [key] 285 | self.appendCapacity(params, capacity) 286 | self.appendError(params, error) 287 | self.appendExpansion(params, expansion) 288 | self.appendNoCreate(params, noCreate) 289 | self.appendNoScale(params, noScale) 290 | self.appendItems(params, items) 291 | 292 | return self.execute_command(self.BF_INSERT, *params) 293 | 294 | def bfExists(self, key, item): 295 | """ 296 | Checks whether an ``item`` exists in Bloom Filter ``key``. 297 | """ 298 | params = [key, item] 299 | 300 | return self.execute_command(self.BF_EXISTS, *params) 301 | 302 | def bfMExists(self, key, *items): 303 | """ 304 | Checks whether ``items`` exist in Bloom Filter ``key``. 305 | """ 306 | params = [key] 307 | params += items 308 | 309 | return self.execute_command(self.BF_MEXISTS, *params) 310 | 311 | def bfScandump(self, key, iter): 312 | """ 313 | Begins an incremental save of the bloom filter ``key``. This is useful 314 | for large bloom filters which cannot fit into the normal SAVE 315 | and RESTORE model. 316 | The first time this command is called, the value of ``iter`` should be 0. 317 | This command will return successive (iter, data) pairs until 318 | (0, NULL) to indicate completion. 319 | """ 320 | params = [key, iter] 321 | 322 | return self.execute_command(self.BF_SCANDUMP, *params) 323 | 324 | def bfLoadChunk(self, key, iter, data): 325 | """ 326 | Restores a filter previously saved using SCANDUMP. See the SCANDUMP 327 | command for example usage. 328 | This command will overwrite any bloom filter stored under key. 329 | Ensure that the bloom filter will not be modified between invocations. 330 | """ 331 | params = [key, iter, data] 332 | 333 | return self.execute_command(self.BF_LOADCHUNK, *params) 334 | 335 | def bfInfo(self, key): 336 | """ 337 | Returns capacity, size, number of filters, number of items inserted, and expansion rate. 338 | """ 339 | 340 | return self.execute_command(self.BF_INFO, key) 341 | 342 | 343 | ################## Cuckoo Filter Functions ###################### 344 | 345 | def cfCreate(self, key, capacity, expansion=None, bucket_size=None, max_iterations=None): 346 | """ 347 | Creates a new Cuckoo Filter ``key`` an initial ``capacity`` items. 348 | """ 349 | params = [key, capacity] 350 | self.appendExpansion(params, expansion) 351 | self.appendBucketSize(params, bucket_size) 352 | self.appendMaxIterations(params, max_iterations) 353 | 354 | return self.execute_command(self.CF_RESERVE, *params) 355 | 356 | def cfAdd(self, key, item): 357 | """ 358 | Adds an ``item`` to a Cuckoo Filter ``key``. 359 | """ 360 | params = [key, item] 361 | 362 | return self.execute_command(self.CF_ADD, *params) 363 | 364 | def cfAddNX(self, key, item): 365 | """ 366 | Adds an ``item`` to a Cuckoo Filter ``key`` only if item does not yet exist. 367 | Command might be slower that ``cfAdd``. 368 | """ 369 | params = [key, item] 370 | 371 | return self.execute_command(self.CF_ADDNX, *params) 372 | 373 | def cfInsert(self, key, items, capacity=None, nocreate=None): 374 | """ 375 | Adds multiple ``items`` to a Cuckoo Filter ``key``, allowing the filter to be 376 | created with a custom ``capacity` if it does not yet exist. 377 | ``items`` must be provided as a list. 378 | """ 379 | params = [key] 380 | self.appendCapacity(params, capacity) 381 | self.appendNoCreate(params, nocreate) 382 | self.appendItems(params, items) 383 | 384 | return self.execute_command(self.CF_INSERT, *params) 385 | 386 | def cfInsertNX(self, key, items, capacity=None, nocreate=None): 387 | """ 388 | Adds multiple ``items`` to a Cuckoo Filter ``key`` only if they do not exist yet, 389 | allowing the filter to be created with a custom ``capacity` if it does not yet exist. 390 | ``items`` must be provided as a list. 391 | """ 392 | params = [key] 393 | self.appendCapacity(params, capacity) 394 | self.appendNoCreate(params, nocreate) 395 | self.appendItems(params, items) 396 | 397 | return self.execute_command(self.CF_INSERTNX, *params) 398 | 399 | def cfExists(self, key, item): 400 | """ 401 | Checks whether an ``item`` exists in Cuckoo Filter ``key``. 402 | """ 403 | params = [key, item] 404 | 405 | return self.execute_command(self.CF_EXISTS, *params) 406 | 407 | def cfDel(self, key, item): 408 | """ 409 | Deletes ``item`` from ``key``. 410 | """ 411 | params = [key, item] 412 | 413 | return self.execute_command(self.CF_DEL, *params) 414 | 415 | def cfCount(self, key, item): 416 | """ 417 | Returns the number of times an ``item`` may be in the ``key``. 418 | """ 419 | params = [key, item] 420 | 421 | return self.execute_command(self.CF_COUNT, *params) 422 | 423 | def cfScandump(self, key, iter): 424 | """ 425 | Begins an incremental save of the Cuckoo filter ``key``. This is useful 426 | for large Cuckoo filters which cannot fit into the normal SAVE 427 | and RESTORE model. 428 | The first time this command is called, the value of ``iter`` should be 0. 429 | This command will return successive (iter, data) pairs until 430 | (0, NULL) to indicate completion. 431 | """ 432 | params = [key, iter] 433 | 434 | return self.execute_command(self.CF_SCANDUMP, *params) 435 | 436 | def cfLoadChunk(self, key, iter, data): 437 | """ 438 | Restores a filter previously saved using SCANDUMP. See the SCANDUMP 439 | command for example usage. 440 | This command will overwrite any Cuckoo filter stored under key. 441 | Ensure that the Cuckoo filter will not be modified between invocations. 442 | """ 443 | params = [key, iter, data] 444 | 445 | return self.execute_command(self.CF_LOADCHUNK, *params) 446 | 447 | def cfInfo(self, key): 448 | """ 449 | Returns size, number of buckets, number of filter, number of items inserted, number of items deleted, 450 | bucket size, expansion rate, and max iteration. 451 | """ 452 | 453 | return self.execute_command(self.CF_INFO, key) 454 | 455 | ################## Count-Min Sketch Functions ###################### 456 | 457 | def cmsInitByDim(self, key, width, depth): 458 | """ 459 | Initializes a Count-Min Sketch ``key`` to dimensions 460 | (``width``, ``depth``) specified by user. 461 | """ 462 | params = [key, width, depth] 463 | 464 | return self.execute_command(self.CMS_INITBYDIM, *params) 465 | 466 | def cmsInitByProb(self, key, error, probability): 467 | """ 468 | Initializes a Count-Min Sketch ``key`` to characteristics 469 | (``error``, ``probability``) specified by user. 470 | """ 471 | params = [key, error, probability] 472 | 473 | return self.execute_command(self.CMS_INITBYPROB, *params) 474 | 475 | def cmsIncrBy(self, key, items, increments): 476 | """ 477 | Adds/increases ``items`` to a Count-Min Sketch ``key`` by ''increments''. 478 | Both ``items`` and ``increments`` are lists. 479 | Example - cmsIncrBy('A', ['foo'], [1]) 480 | """ 481 | params = [key] 482 | self.appendItemsAndIncrements(params, items, increments) 483 | 484 | return self.execute_command(self.CMS_INCRBY, *params) 485 | 486 | def cmsQuery(self, key, *items): 487 | """ 488 | Returns count for an ``item`` from ``key``. 489 | Multiple items can be queried with one call. 490 | """ 491 | params = [key] 492 | params += items 493 | 494 | return self.execute_command(self.CMS_QUERY, *params) 495 | 496 | def cmsMerge(self, destKey, numKeys, srcKeys, weights=[]): 497 | """ 498 | Merges ``numKeys`` of sketches into ``destKey``. Sketches specified in ``srcKeys``. 499 | All sketches must have identical width and depth. 500 | ``Weights`` can be used to multiply certain sketches. Default weight is 1. 501 | Both ``srcKeys`` and ``weights`` are lists. 502 | """ 503 | params = [destKey, numKeys] 504 | params += srcKeys 505 | self.appendWeights(params, weights) 506 | 507 | return self.execute_command(self.CMS_MERGE, *params) 508 | 509 | def cmsInfo(self, key): 510 | """ 511 | Returns width, depth and total count of the sketch. 512 | """ 513 | 514 | return self.execute_command(self.CMS_INFO, key) 515 | 516 | 517 | ################## Top-K Functions ###################### 518 | 519 | def topkReserve(self, key, k, width, depth, decay): 520 | """ 521 | Creates a new Cuckoo Filter ``key`` with desired probability of false 522 | positives ``errorRate`` expected entries to be inserted as ``size``. 523 | """ 524 | params = [key, k, width, depth, decay] 525 | 526 | return self.execute_command(self.TOPK_RESERVE, *params) 527 | 528 | def topkAdd(self, key, *items): 529 | """ 530 | Adds one ``item`` or more to a Cuckoo Filter ``key``. 531 | """ 532 | params = [key] 533 | params += items 534 | 535 | return self.execute_command(self.TOPK_ADD, *params) 536 | 537 | def topkQuery(self, key, *items): 538 | """ 539 | Checks whether one ``item`` or more is a Top-K item at ``key``. 540 | """ 541 | params = [key] 542 | params += items 543 | 544 | return self.execute_command(self.TOPK_QUERY, *params) 545 | 546 | def topkCount(self, key, *items): 547 | """ 548 | Returns count for one ``item`` or more from ``key``. 549 | """ 550 | params = [key] 551 | params += items 552 | 553 | return self.execute_command(self.TOPK_COUNT, *params) 554 | 555 | def topkList(self, key): 556 | """ 557 | Return full list of items in Top-K list of ``key```. 558 | """ 559 | 560 | return self.execute_command(self.TOPK_LIST, key) 561 | 562 | def topkListWithCount(self, key): 563 | """ 564 | Return full list of items with probabilistic count in Top-K list of ``key```. 565 | """ 566 | 567 | return self.execute_command(self.TOPK_LIST, key, 'WITHCOUNT') 568 | 569 | def topkInfo(self, key): 570 | """ 571 | Returns k, width, depth and decay values of ``key``. 572 | """ 573 | 574 | return self.execute_command(self.TOPK_INFO, key) 575 | 576 | def pipeline(self, transaction=True, shard_hint=None): 577 | """ 578 | Return a new pipeline object that can queue multiple commands for 579 | later execution. ``transaction`` indicates whether all commands 580 | should be executed atomically. Apart from making a group of operations 581 | atomic, pipelines are useful for reducing the back-and-forth overhead 582 | between the client and server. 583 | Overridden in order to provide the right client through the pipeline. 584 | """ 585 | p = Pipeline( 586 | connection_pool=self.connection_pool, 587 | response_callbacks=self.response_callbacks, 588 | transaction=transaction, 589 | shard_hint=shard_hint) 590 | return p 591 | 592 | 593 | class Pipeline(Pipeline, Client): 594 | "Pipeline for RedisBloom Client" 595 | def __init__(self, connection_pool, response_callbacks, transaction, shard_hint): 596 | self.connection_pool = connection_pool 597 | self.connection = None 598 | self.response_callbacks = response_callbacks 599 | self.transaction = transaction 600 | self.shard_hint = shard_hint 601 | 602 | self.watching = False 603 | self.reset() 604 | -------------------------------------------------------------------------------- /rltest_commands.py: -------------------------------------------------------------------------------- 1 | # run using: 2 | # RLTest -t rltest_commands.py --module /rebloom.so -s 3 | 4 | from RLTest import Env 5 | from redisbloom.client import Client as RedisBloom 6 | 7 | ''' 8 | from time import sleep 9 | from unittest import TestCase 10 | ''' 11 | # def CreateConn(): 12 | # port = 6379 13 | # rb = RedisBloom(port=port) 14 | # rb.flushdb() 15 | # return rb 16 | 17 | i = lambda l: [int(v) for v in l] 18 | 19 | class TestRedisBloom(): 20 | def __init__(self): 21 | self.env = Env() 22 | self.rb = RedisBloom(port=6379) 23 | 24 | def testCreate(self): 25 | '''Test CREATE/RESERVE calls''' 26 | self.env.cmd("flushall") 27 | rb = self.rb 28 | self.env.assertTrue(rb.bfCreate('bloom', 0.01, 1000)) 29 | self.env.assertTrue(rb.cfCreate('cuckoo', 1000)) 30 | self.env.assertTrue(rb.cmsInitByDim('cmsDim', 100, 5)) 31 | self.env.assertTrue(rb.cmsInitByProb('cmsProb', 0.01, 0.01)) 32 | self.env.assertTrue(rb.topkReserve('topk', 5, 100, 5, 0.9)) 33 | 34 | ################### Test Bloom Filter ################### 35 | def testBFAdd(self): 36 | self.env.cmd("flushall") 37 | rb = self.rb 38 | self.env.assertTrue(rb.bfCreate('bloom', 0.01, 1000)) 39 | self.env.assertEqual(1, rb.bfAdd('bloom', 'foo')) 40 | self.env.assertEqual(0, rb.bfAdd('bloom', 'foo')) 41 | self.env.assertEqual([0], i(rb.bfMAdd('bloom', 'foo'))) 42 | self.env.assertEqual([0, 1], rb.bfMAdd('bloom', 'foo', 'bar')) 43 | self.env.assertEqual([0, 0, 1], rb.bfMAdd('bloom', 'foo', 'bar', 'baz')) 44 | self.env.assertEqual(1, rb.bfExists('bloom', 'foo')) 45 | self.env.assertEqual(0, rb.bfExists('bloom', 'noexist')) 46 | self.env.assertEqual([1, 0], i(rb.bfMExists('bloom', 'foo', 'noexist'))) 47 | 48 | def testBFInsert(self): 49 | self.env.cmd("flushall") 50 | rb = self.rb 51 | self.env.assertTrue(rb.bfCreate('bloom', 0.01, 1000)) 52 | self.env.assertEqual([1], i(rb.bfInsert('bloom', ['foo']))) 53 | self.env.assertEqual([0, 1], i(rb.bfInsert('bloom', ['foo', 'bar']))) 54 | self.env.assertEqual([1], i(rb.bfInsert('captest', ['foo'], capacity=1000))) 55 | self.env.assertEqual([1], i(rb.bfInsert('errtest', ['foo'], error=0.01))) 56 | self.env.assertEqual(1, rb.bfExists('bloom', 'foo')) 57 | self.env.assertEqual(0, rb.bfExists('bloom', 'noexist')) 58 | self.env.assertEqual([1, 0], i(rb.bfMExists('bloom', 'foo', 'noexist'))) 59 | 60 | def testBFDumpLoad(self): 61 | self.env.cmd("flushall") 62 | rb = self.rb 63 | # Store a filter 64 | rb.bfCreate('myBloom', '0.0001', '1000') 65 | 66 | # test is probabilistic and might fail. It is OK to change variables if 67 | # certain to not break anything 68 | def do_verify(): 69 | res = 0 70 | for x in range(1000): 71 | rb.bfAdd('myBloom', x) 72 | rv = rb.bfExists('myBloom', x) 73 | self.env.assertTrue(rv) 74 | rv = rb.bfExists('myBloom', 'nonexist_{}'.format(x)) 75 | res += (rv == x) 76 | self.env.assertLess(res, 5) 77 | 78 | do_verify() 79 | cmds = [] 80 | cur = rb.bfScandump('myBloom', 0) 81 | first = cur[0] 82 | cmds.append(cur) 83 | 84 | while True: 85 | cur = rb.bfScandump('myBloom', first) 86 | first = cur[0] 87 | if first == 0: 88 | break 89 | else: 90 | cmds.append(cur) 91 | prev_info = rb.execute_command('bf.debug', 'myBloom') 92 | 93 | # Remove the filter 94 | rb.execute_command('del', 'myBloom') 95 | 96 | # Now, load all the commands: 97 | for cmd in cmds: 98 | rb.bfLoadChunk('myBloom', *cmd) 99 | 100 | cur_info = rb.execute_command('bf.debug', 'myBloom') 101 | self.env.assertEqual(prev_info, cur_info) 102 | do_verify() 103 | 104 | rb.execute_command('del', 'myBloom') 105 | rb.bfCreate('myBloom', '0.0001', '10000000') 106 | 107 | ################### Test Cuckoo Filter ################### 108 | def testCFAddInsert(self): 109 | self.env.cmd("flushall") 110 | rb = self.rb 111 | self.env.assertTrue(rb.cfCreate('cuckoo', 1000)) 112 | self.env.assertTrue(rb.cfAdd('cuckoo', 'filter')) 113 | self.env.assertFalse(rb.cfAddNX('cuckoo', 'filter')) 114 | self.env.assertEqual(1, rb.cfAddNX('cuckoo', 'newItem')) 115 | self.env.assertEqual([1], rb.cfInsert('captest', ['foo'])) 116 | self.env.assertEqual([1], rb.cfInsert('captest', ['foo'], capacity=1000)) 117 | self.env.assertEqual([1], rb.cfInsertNX('captest', ['bar'])) 118 | self.env.assertEqual([0, 0, 1], rb.cfInsertNX('captest', ['foo', 'bar', 'baz'])) 119 | self.env.assertEqual([0], rb.cfInsertNX('captest', ['bar'], capacity=1000)) 120 | self.env.assertEqual([1], rb.cfInsert('empty1', ['foo'], capacity=1000)) 121 | self.env.assertEqual([1], rb.cfInsertNX('empty2', ['bar'], capacity=1000)) 122 | 123 | def testCFExistsDel(self): 124 | self.env.cmd("flushall") 125 | rb = self.rb 126 | self.env.assertTrue(rb.cfCreate('cuckoo', 1000)) 127 | self.env.assertTrue(rb.cfAdd('cuckoo', 'filter')) 128 | self.env.assertTrue(rb.cfExists('cuckoo', 'filter')) 129 | self.env.assertFalse(rb.cfExists('cuckoo', 'notexist')) 130 | self.env.assertEqual(1, rb.cfCount('cuckoo', 'filter')) 131 | self.env.assertEqual(0, rb.cfCount('cuckoo', 'notexist')) 132 | self.env.assertTrue(rb.cfDel('cuckoo', 'filter')) 133 | self.env.assertEqual(0, rb.cfCount('cuckoo', 'filter')) 134 | 135 | ################### Test Count-Min Sketch ################### 136 | def testCMS(self): 137 | self.env.cmd("flushall") 138 | rb = self.rb 139 | self.env.assertTrue(rb.cmsInitByDim('dim', 1000, 5)) 140 | self.env.assertTrue(rb.cmsInitByProb('prob', 0.01, 0.01)) 141 | self.env.assertTrue(rb.cmsIncrBy('dim', ['foo'], [5])) 142 | self.env.assertEqual([0], rb.cmsQuery('dim', 'notexist')) 143 | self.env.assertEqual([5], rb.cmsQuery('dim', 'foo')) 144 | self.env.assertTrue(rb.cmsIncrBy('dim', ['foo', 'bar'], [5, 15])) 145 | self.env.assertEqual([10, 15], rb.cmsQuery('dim', 'foo', 'bar')) 146 | info = rb.cmsInfo('dim') 147 | self.env.assertEqual(1000, info.width) 148 | self.env.assertEqual(5, info.depth) 149 | self.env.assertEqual(25, info.count) 150 | 151 | def testCMSMerge(self): 152 | self.env.cmd("flushall") 153 | rb = self.rb 154 | self.env.assertTrue(rb.cmsInitByDim('A', 1000, 5)) 155 | self.env.assertTrue(rb.cmsInitByDim('B', 1000, 5)) 156 | self.env.assertTrue(rb.cmsInitByDim('C', 1000, 5)) 157 | self.env.assertTrue(rb.cmsIncrBy('A', ['foo', 'bar', 'baz'], [5, 3, 9])) 158 | self.env.assertTrue(rb.cmsIncrBy('B', ['foo', 'bar', 'baz'], [2, 3, 1])) 159 | self.env.assertEqual([5, 3, 9], rb.cmsQuery('A', 'foo', 'bar', 'baz')) 160 | self.env.assertEqual([2, 3, 1], rb.cmsQuery('B', 'foo', 'bar', 'baz')) 161 | self.env.assertTrue(rb.cmsMerge('C', 2, ['A', 'B'])) 162 | self.env.assertEqual([7, 6, 10], rb.cmsQuery('C', 'foo', 'bar', 'baz')) 163 | self.env.assertTrue(rb.cmsMerge('C', 2, ['A', 'B'], ['1', '2'])) 164 | self.env.assertEqual([9, 9, 11], rb.cmsQuery('C', 'foo', 'bar', 'baz')) 165 | self.env.assertTrue(rb.cmsMerge('C', 2, ['A', 'B'], ['2', '3'])) 166 | self.env.assertEqual([16, 15, 21], rb.cmsQuery('C', 'foo', 'bar', 'baz')) 167 | 168 | ################### Test Top-K ################### 169 | def testTopK(self): 170 | self.env.cmd("flushall") 171 | rb = self.rb 172 | # test list with empty buckets 173 | self.env.assertTrue(rb.topkReserve('topk', 10, 50, 3, 0.9)) 174 | self.env.assertTrue(rb.topkAdd('topk', 'A', 'B', 'C', 'D', 'E', 'A', 'A', 'B', 'C', 175 | 'G', 'D', 'B', 'D', 'A', 'E', 'E')) 176 | self.env.assertEqual([1, 1, 1, 1, 1, 0, 1], 177 | rb.topkQuery('topk', 'A', 'B', 'C', 'D', 'E', 'F', 'G')) 178 | self.env.assertEqual([4, 3, 2, 3, 3, 0, 1], 179 | rb.topkCount('topk', 'A', 'B', 'C', 'D', 'E', 'F', 'G')) 180 | 181 | # test full list 182 | self.env.assertTrue(rb.topkReserve('topklist', 3, 50, 3, 0.9)) 183 | self.env.assertTrue(rb.topkAdd('topklist', 'A', 'B', 'C', 'D', 'E','A', 'A', 'B', 'C', 184 | 'G', 'D', 'B', 'D', 'A', 'E', 'E')) 185 | self.env.assertEqual(['D', 'A', 'B'], rb.topkList('topklist')) -------------------------------------------------------------------------------- /test_commands.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from time import sleep 4 | from unittest import TestCase 5 | from redisbloom.client import Client as RedisBloom 6 | 7 | xrange = range 8 | rb = None 9 | port = 6379 10 | 11 | i = lambda l: [int(v) for v in l] 12 | 13 | 14 | # Can be used with assertRaises 15 | def run_func(func, *args, **kwargs): 16 | func(*args, **kwargs) 17 | 18 | 19 | class TestRedisBloom(TestCase): 20 | def setUp(self): 21 | global rb 22 | rb = RedisBloom(port=port) 23 | rb.flushdb() 24 | 25 | def testCreate(self): 26 | '''Test CREATE/RESERVE calls''' 27 | self.assertTrue(rb.bfCreate('bloom', 0.01, 1000)) 28 | self.assertTrue(rb.bfCreate('bloom_e', 0.01, 1000, expansion=1)) 29 | self.assertTrue(rb.bfCreate('bloom_ns', 0.01, 1000, noScale=True)) 30 | self.assertTrue(rb.cfCreate('cuckoo', 1000)) 31 | self.assertTrue(rb.cfCreate('cuckoo_e', 1000, expansion=1)) 32 | self.assertTrue(rb.cfCreate('cuckoo_bs', 1000, bucket_size=4)) 33 | self.assertTrue(rb.cfCreate('cuckoo_mi', 1000, max_iterations=10)) 34 | self.assertTrue(rb.cmsInitByDim('cmsDim', 100, 5)) 35 | self.assertTrue(rb.cmsInitByProb('cmsProb', 0.01, 0.01)) 36 | self.assertTrue(rb.topkReserve('topk', 5, 100, 5, 0.9)) 37 | 38 | ################### Test Bloom Filter ################### 39 | def testBFAdd(self): 40 | self.assertTrue(rb.bfCreate('bloom', 0.01, 1000)) 41 | self.assertEqual(1, rb.bfAdd('bloom', 'foo')) 42 | self.assertEqual(0, rb.bfAdd('bloom', 'foo')) 43 | self.assertEqual([0], i(rb.bfMAdd('bloom', 'foo'))) 44 | self.assertEqual([0, 1], rb.bfMAdd('bloom', 'foo', 'bar')) 45 | self.assertEqual([0, 0, 1], rb.bfMAdd('bloom', 'foo', 'bar', 'baz')) 46 | self.assertEqual(1, rb.bfExists('bloom', 'foo')) 47 | self.assertEqual(0, rb.bfExists('bloom', 'noexist')) 48 | self.assertEqual([1, 0], i(rb.bfMExists('bloom', 'foo', 'noexist'))) 49 | 50 | def testBFInsert(self): 51 | self.assertTrue(rb.bfCreate('bloom', 0.01, 1000)) 52 | self.assertEqual([1], i(rb.bfInsert('bloom', ['foo']))) 53 | self.assertEqual([0, 1], i(rb.bfInsert('bloom', ['foo', 'bar']))) 54 | self.assertEqual([1], i(rb.bfInsert('captest', ['foo'], capacity=1000))) 55 | self.assertEqual([1], i(rb.bfInsert('errtest', ['foo'], error=0.01))) 56 | self.assertEqual(1, rb.bfExists('bloom', 'foo')) 57 | self.assertEqual(0, rb.bfExists('bloom', 'noexist')) 58 | self.assertEqual([1, 0], i(rb.bfMExists('bloom', 'foo', 'noexist'))) 59 | info = rb.bfInfo('bloom') 60 | self.assertEqual(2, info.insertedNum) 61 | self.assertEqual(1000, info.capacity) 62 | self.assertEqual(1, info.filterNum) 63 | 64 | def testBFDumpLoad(self): 65 | # Store a filter 66 | rb.bfCreate('myBloom', '0.0001', '1000') 67 | 68 | # test is probabilistic and might fail. It is OK to change variables if 69 | # certain to not break anything 70 | def do_verify(): 71 | res = 0 72 | for x in xrange(1000): 73 | rb.bfAdd('myBloom', x) 74 | rv = rb.bfExists('myBloom', x) 75 | self.assertTrue(rv) 76 | rv = rb.bfExists('myBloom', 'nonexist_{}'.format(x)) 77 | res += (rv == x) 78 | self.assertLess(res, 5) 79 | 80 | do_verify() 81 | cmds = [] 82 | cur = rb.bfScandump('myBloom', 0) 83 | first = cur[0] 84 | cmds.append(cur) 85 | 86 | while True: 87 | cur = rb.bfScandump('myBloom', first) 88 | first = cur[0] 89 | if first == 0: 90 | break 91 | else: 92 | cmds.append(cur) 93 | prev_info = rb.execute_command('bf.debug', 'myBloom') 94 | 95 | # Remove the filter 96 | rb.execute_command('del', 'myBloom') 97 | 98 | # Now, load all the commands: 99 | for cmd in cmds: 100 | rb.bfLoadChunk('myBloom', *cmd) 101 | 102 | cur_info = rb.execute_command('bf.debug', 'myBloom') 103 | self.assertEqual(prev_info, cur_info) 104 | do_verify() 105 | 106 | rb.execute_command('del', 'myBloom') 107 | rb.bfCreate('myBloom', '0.0001', '10000000') 108 | 109 | def testBFInfo(self): 110 | expansion = 4 111 | # Store a filter 112 | rb.bfCreate('nonscaling', '0.0001', '1000', noScale=True) 113 | info = rb.bfInfo('nonscaling') 114 | self.assertEqual(info.expansionRate, None) 115 | 116 | rb.bfCreate('expanding', '0.0001', '1000', expansion=expansion) 117 | info = rb.bfInfo('expanding') 118 | self.assertEqual(info.expansionRate, 4) 119 | 120 | try: 121 | # noScale mean no expansion 122 | rb.bfCreate('myBloom', '0.0001', '1000', expansion=expansion, noScale=True) 123 | self.assertTrue(False) 124 | except: 125 | self.assertTrue(True) 126 | 127 | ################### Test Cuckoo Filter ################### 128 | def testCFAddInsert(self): 129 | self.assertTrue(rb.cfCreate('cuckoo', 1000)) 130 | self.assertTrue(rb.cfAdd('cuckoo', 'filter')) 131 | self.assertFalse(rb.cfAddNX('cuckoo', 'filter')) 132 | self.assertEqual(1, rb.cfAddNX('cuckoo', 'newItem')) 133 | self.assertEqual([1], rb.cfInsert('captest', ['foo'])) 134 | self.assertEqual([1], rb.cfInsert('captest', ['foo'], capacity=1000)) 135 | self.assertEqual([1], rb.cfInsertNX('captest', ['bar'])) 136 | self.assertEqual([1], rb.cfInsertNX('captest', ['food'], nocreate='1')) 137 | self.assertEqual([0, 0, 1], rb.cfInsertNX('captest', ['foo', 'bar', 'baz'])) 138 | self.assertEqual([0], rb.cfInsertNX('captest', ['bar'], capacity=1000)) 139 | self.assertEqual([1], rb.cfInsert('empty1', ['foo'], capacity=1000)) 140 | self.assertEqual([1], rb.cfInsertNX('empty2', ['bar'], capacity=1000)) 141 | info = rb.cfInfo('captest') 142 | self.assertEqual(5, info.insertedNum) 143 | self.assertEqual(0, info.deletedNum) 144 | self.assertEqual(1, info.filterNum) 145 | 146 | def testCFExistsDel(self): 147 | self.assertTrue(rb.cfCreate('cuckoo', 1000)) 148 | self.assertTrue(rb.cfAdd('cuckoo', 'filter')) 149 | self.assertTrue(rb.cfExists('cuckoo', 'filter')) 150 | self.assertFalse(rb.cfExists('cuckoo', 'notexist')) 151 | self.assertEqual(1, rb.cfCount('cuckoo', 'filter')) 152 | self.assertEqual(0, rb.cfCount('cuckoo', 'notexist')) 153 | self.assertTrue(rb.cfDel('cuckoo', 'filter')) 154 | self.assertEqual(0, rb.cfCount('cuckoo', 'filter')) 155 | 156 | ################### Test Count-Min Sketch ################### 157 | def testCMS(self): 158 | self.assertTrue(rb.cmsInitByDim('dim', 1000, 5)) 159 | self.assertTrue(rb.cmsInitByProb('prob', 0.01, 0.01)) 160 | self.assertTrue(rb.cmsIncrBy('dim', ['foo'], [5])) 161 | self.assertEqual([0], rb.cmsQuery('dim', 'notexist')) 162 | self.assertEqual([5], rb.cmsQuery('dim', 'foo')) 163 | self.assertEqual([10, 15], rb.cmsIncrBy('dim', ['foo', 'bar'], [5, 15])) 164 | self.assertEqual([10, 15], rb.cmsQuery('dim', 'foo', 'bar')) 165 | info = rb.cmsInfo('dim') 166 | self.assertEqual(1000, info.width) 167 | self.assertEqual(5, info.depth) 168 | self.assertEqual(25, info.count) 169 | 170 | def testCMSMerge(self): 171 | self.assertTrue(rb.cmsInitByDim('A', 1000, 5)) 172 | self.assertTrue(rb.cmsInitByDim('B', 1000, 5)) 173 | self.assertTrue(rb.cmsInitByDim('C', 1000, 5)) 174 | self.assertTrue(rb.cmsIncrBy('A', ['foo', 'bar', 'baz'], [5, 3, 9])) 175 | self.assertTrue(rb.cmsIncrBy('B', ['foo', 'bar', 'baz'], [2, 3, 1])) 176 | self.assertEqual([5, 3, 9], rb.cmsQuery('A', 'foo', 'bar', 'baz')) 177 | self.assertEqual([2, 3, 1], rb.cmsQuery('B', 'foo', 'bar', 'baz')) 178 | self.assertTrue(rb.cmsMerge('C', 2, ['A', 'B'])) 179 | self.assertEqual([7, 6, 10], rb.cmsQuery('C', 'foo', 'bar', 'baz')) 180 | self.assertTrue(rb.cmsMerge('C', 2, ['A', 'B'], ['1', '2'])) 181 | self.assertEqual([9, 9, 11], rb.cmsQuery('C', 'foo', 'bar', 'baz')) 182 | self.assertTrue(rb.cmsMerge('C', 2, ['A', 'B'], ['2', '3'])) 183 | self.assertEqual([16, 15, 21], rb.cmsQuery('C', 'foo', 'bar', 'baz')) 184 | 185 | ################### Test Top-K ################### 186 | def testTopK(self): 187 | # test list with empty buckets 188 | self.assertTrue(rb.topkReserve('topk', 3, 50, 4, 0.9)) 189 | self.assertEqual([None, None, None, 'A', 'C', 'D', None, None, 'E', 190 | None, 'B', 'C', None, None, None, 'D', None], 191 | rb.topkAdd('topk', 'A', 'B', 'C', 'D', 'E', 'A', 'A', 'B', 'C', 192 | 'G', 'D', 'B', 'D', 'A', 'E', 'E', 1)) 193 | self.assertEqual([1, 1, 0, 0, 1, 0, 0], 194 | rb.topkQuery('topk', 'A', 'B', 'C', 'D', 'E', 'F', 'G')) 195 | self.assertEqual([4, 3, 2, 3, 3, 0, 1], 196 | rb.topkCount('topk', 'A', 'B', 'C', 'D', 'E', 'F', 'G')) 197 | 198 | # test full list 199 | self.assertTrue(rb.topkReserve('topklist', 3, 50, 3, 0.9)) 200 | self.assertTrue(rb.topkAdd('topklist', 'A', 'B', 'C', 'D', 'E','A', 'A', 'B', 'C', 201 | 'G', 'D', 'B', 'D', 'A', 'E', 'E')) 202 | self.assertEqual(['A', 'B', 'E'], rb.topkList('topklist')) 203 | self.assertEqual(['A', 4, 'B', 3, 'E', 3], rb.topkListWithCount('topklist')) 204 | info = rb.topkInfo('topklist') 205 | self.assertEqual(3, info.k) 206 | self.assertEqual(50, info.width) 207 | self.assertEqual(3, info.depth) 208 | self.assertAlmostEqual(0.9, float(info.decay)) 209 | 210 | def test_pipeline(self): 211 | pipeline = rb.pipeline() 212 | 213 | self.assertFalse(rb.execute_command('get pipeline')) 214 | 215 | self.assertTrue(rb.bfCreate('pipeline', 0.01, 1000)) 216 | for i in range(100): 217 | pipeline.bfAdd('pipeline', i) 218 | for i in range(100): 219 | self.assertFalse(rb.bfExists('pipeline', i)) 220 | 221 | pipeline.execute() 222 | 223 | for i in range(100): 224 | self.assertTrue(rb.bfExists('pipeline', i)) 225 | 226 | 227 | if __name__ == '__main__': 228 | unittest.main() 229 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | skipsdist = True 3 | envlist = linters 4 | 5 | [flake8] 6 | max-complexity = 10 7 | ignore = E127,E265,E266,E301,E501 8 | srcdir = redisbloom 9 | show-source = true 10 | exclude =.git,.tox,dist,doc,*/__pycache__/*,*test*.py 11 | 12 | [testenv:cover] 13 | whitelist_externals = find 14 | commands_pre = 15 | pip install --upgrade pip 16 | setenv = 17 | REDIS_PORT = 6379 18 | commands = 19 | coverage run test_commands.py 20 | codecov 21 | 22 | [testenv:linters] 23 | commands = 24 | flake8 --show-source 25 | vulture redisbloom --min-confidence 80 26 | bandit redisbloom/** 27 | --------------------------------------------------------------------------------