├── .github └── workflows │ └── wheels.yml ├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── pyproject.toml ├── pyxorfilter ├── __init__.py ├── ffibuild.py └── pyxorfilter.py ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── test_fuse16.py ├── test_fuse8.py ├── test_xor16.py └── test_xor8.py /.github/workflows/wheels.yml: -------------------------------------------------------------------------------- 1 | name: Build and upload to PyPI 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | tags: 7 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 8 | 9 | name: Creating release 10 | 11 | env: 12 | CIBW_TEST_EXTRAS: test 13 | CIBW_TEST_COMMAND: "pytest {project}/tests" 14 | CIBW_TEST_SKIP: "*_arm64 *_universal2:arm64" 15 | CIBW_ARCHS_MACOS: "x86_64 universal2" 16 | 17 | jobs: 18 | sdist: 19 | name: Creating source release 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v2 23 | 24 | - name: Setting up Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: 3.8 28 | 29 | - name: Installing python build dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | python -m pip install --upgrade setuptools 33 | 34 | - name: Building source distribution 35 | run: | 36 | pip install -e ".[release]" 37 | python setup.py sdist 38 | 39 | - name: Ensuring documentation builds 40 | run: | 41 | cd docs && make clean && make html 42 | 43 | - uses: actions/upload-artifact@v2 44 | with: 45 | path: dist/*.tar.gz 46 | 47 | build_wheels: 48 | needs: [sdist] 49 | name: "[${{ strategy.job-index }}/${{ strategy.job-total }}] py${{ matrix.py }} on ${{ matrix.os }}" 50 | runs-on: ${{ matrix.os }} 51 | strategy: 52 | fail-fast: true 53 | matrix: 54 | os: [ubuntu-20.04, windows-2019, macos-11] 55 | py: ["cp39", "cp310", "cp311", "cp312", "pp37", "pp38", "pp39"] 56 | 57 | steps: 58 | - uses: actions/checkout@v2 59 | 60 | - uses: actions/setup-python@v2 61 | name: Setting up Python 62 | with: 63 | python-version: '3.8' 64 | 65 | - name: Set up QEMU 66 | if: runner.os == 'Linux' 67 | uses: docker/setup-qemu-action@v1 68 | with: 69 | platforms: all 70 | 71 | - name: Build & test wheels 72 | uses: pypa/cibuildwheel@v2.16.5 73 | env: 74 | CIBW_ARCHS_LINUX: auto aarch64 ppc64le 75 | CIBW_BUILD: "${{ matrix.py }}-*" 76 | 77 | - uses: actions/upload-artifact@v2 78 | with: 79 | path: ./wheelhouse/*.whl 80 | 81 | upload_all: 82 | needs: [build_wheels, sdist] 83 | name: Uploading built packages to pypi for release. 84 | runs-on: ubuntu-latest 85 | steps: 86 | - uses: actions/download-artifact@v2 87 | with: 88 | name: artifact 89 | path: dist 90 | 91 | - uses: pypa/gh-action-pypi-publish@v1.4.2 92 | with: 93 | user: ${{ secrets.PYPI_USERNAME }} 94 | password: ${{ secrets.PYPI_PASSWORD }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | *.so 3 | *.o 4 | __pycache__/ 5 | build/ 6 | env/ 7 | .eggs/ 8 | dist/ 9 | *.egg-info/ 10 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/xor_singleheader"] 2 | path = lib/xor_singleheader 3 | url = https://github.com/FastFilter/xor_singleheader 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | install: 2 | - if [ "$TRAVIS_OS_NAME" = "linux" ]; then 3 | pip install --upgrade nose cibuildwheel; 4 | elif [ "$TRAVIS_OS_NAME" = "osx" ]; then 5 | pip install --user --upgrade nose cibuildwheel; 6 | fi 7 | 8 | cache: 9 | - pip 10 | 11 | branches: 12 | only: 13 | - master 14 | 15 | env: 16 | global: 17 | - CIBW_BUILD_VERBOSITY=1 18 | - CIBW_TEST_REQUIRES=nose 19 | - CIBW_TEST_COMMAND="nosetests {project}/tests" 20 | - CIBW_BEFORE_BUILD="pip install cffi" 21 | - CIBW_BEFORE_ALL="python {project}/setup.py build" 22 | 23 | matrix: 24 | include: 25 | ##### Linux ##### 26 | ##### ARM ##### 27 | ### 3.6 ### 28 | - name: "Python 3.6 on Linux arm64" 29 | arch: 30 | - arm64 31 | env: 32 | - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 33 | - CIBW_BUILD="cp36-manylinux*" 34 | - CIBW_SKIP: "*-manylinux_i686" 35 | language: python 36 | dist: bionic 37 | python: "3.7" 38 | services: 39 | - docker 40 | ### 3.7 ### 41 | - name: "Python 3.7 on Linux arm64" 42 | arch: 43 | - arm64 44 | env: 45 | - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 46 | - CIBW_BUILD="cp37-manylinux*" 47 | - CIBW_SKIP: "*-manylinux_i686" 48 | language: python 49 | dist: bionic 50 | python: "3.7" 51 | services: 52 | - docker 53 | ### 3.8 ### 54 | - name: "Python 3.8 on Linux arm64" 55 | arch: 56 | - arm64 57 | env: 58 | - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 59 | - CIBW_BUILD="cp38-manylinux*" 60 | - CIBW_SKIP: "*-manylinux_i686" 61 | language: python 62 | dist: bionic 63 | python: "3.7" 64 | services: 65 | - docker 66 | ### 3.9 ### 67 | - name: "Python 3.9 on Linux arm64" 68 | arch: 69 | - arm64 70 | env: 71 | - CIBW_MANYLINUX_AARCH64_IMAGE: manylinux2014 72 | - CIBW_BUILD="cp39-manylinux*" 73 | - CIBW_SKIP: "*-manylinux_i686" 74 | language: python 75 | dist: bionic 76 | python: "3.7" 77 | services: 78 | - docker 79 | ##### x86_64 ##### 80 | ### 3.6 ### 81 | - name: "Python 3.6 on Linux amd64" 82 | arch: 83 | - amd64 84 | env: 85 | - CIBW_BUILD="cp36-manylinux*" 86 | - CIBW_SKIP: "*-manylinux_i686" 87 | language: python 88 | dist: bionic 89 | python: "3.7" 90 | services: 91 | - docker 92 | ### 3.7 ### 93 | - name: "Python 3.7 on Linux amd64" 94 | arch: 95 | - amd64 96 | env: 97 | - CIBW_BUILD="cp37-manylinux*" 98 | - CIBW_SKIP: "*-manylinux_i686" 99 | language: python 100 | dist: bionic 101 | python: "3.7" 102 | services: 103 | - docker 104 | ### 3.8 ### 105 | - name: "Python 3.8 on Linux amd64" 106 | arch: 107 | - amd64 108 | env: 109 | - CIBW_BUILD="cp38-manylinux*" 110 | - CIBW_SKIP: "*-manylinux_i686" 111 | language: python 112 | dist: bionic 113 | python: "3.7" 114 | services: 115 | - docker 116 | ### 3.9 ### 117 | - name: "Python 3.9 on Linux amd64" 118 | arch: 119 | - amd64 120 | env: 121 | - CIBW_BUILD="cp39-manylinux*" 122 | - CIBW_SKIP: "*-manylinux_i686" 123 | language: python 124 | dist: bionic 125 | python: "3.7" 126 | services: 127 | - docker 128 | 129 | ## Windows 130 | # - name: "Python 3.6 on Windows" 131 | # arch: 132 | # - amd64 133 | # env: 134 | # - CIBW_BUILD="cp36-win*" 135 | # os: windows 136 | # language: shell 137 | # before_install: 138 | # - choco install python --version=3.6.8 139 | # - python -m pip install --upgrade nose cibuildwheel 140 | # env: PATH=/c/Python36:/c/Python36/Scripts:$PATH 141 | # 142 | # - name: "Python 3.7 on Windows" 143 | # arch: 144 | # - amd64 145 | # env: 146 | # - CIBW_BUILD="cp37-win*" 147 | # os: windows 148 | # language: shell 149 | # before_install: 150 | # - choco install python --version=3.7.9 151 | # - python -m pip install --upgrade nose cibuildwheel 152 | # env: PATH=/c/Python37:/c/Python37/Scripts:$PATH 153 | # 154 | # - name: "Python 3.8 on Windows" 155 | # arch: 156 | # - amd64 157 | # env: 158 | # - CIBW_BUILD="cp38-win*" 159 | # os: windows 160 | # language: shell 161 | # before_install: 162 | # - choco install python --version=3.8.7 163 | # - python -m pip install --upgrade nose cibuildwheel 164 | # env: PATH=/c/Python38:/c/Python38/Scripts:$PATH 165 | # 166 | # - name: "Python 3.9 on Windows" 167 | # arch: 168 | # - amd64 169 | # env: 170 | # - CIBW_BUILD="cp39-win*" 171 | # os: windows 172 | # language: shell 173 | # before_install: 174 | # - choco install python --version=3.9.1 175 | # - python -m pip install --upgrade nose cibuildwheel 176 | # env: PATH=/c/Python39:/c/Python39/Scripts:$PATH 177 | 178 | script: 179 | - cibuildwheel --output-dir wheelhouse 180 | - ls wheelhouse 181 | - if [ "$TRAVIS_OS_NAME" = "linux" ]; then 182 | pip3 install --upgrade pip; 183 | pip3 install --upgrade --upgrade-strategy eager twine; 184 | elif [ "$TRAVIS_OS_NAME" = "osx" ]; then 185 | pip install --user twine; 186 | elif [ "$TRAVIS_OS_NAME" = "windows" ]; then 187 | python -m pip install twine; 188 | fi 189 | - if [ "$PYPI_UPLOAD" = "test" -a "$TRAVIS_OS_NAME" = "windows" ]; then 190 | python -m twine upload --skip-existing wheelhouse/* --repository-url https://test.pypi.org/legacy/ -u ${TEST_PYPI_USR} -p ${TEST_PYPI_PWD}; 191 | elif [ "$PYPI_UPLOAD" = "test" -a "$TRAVIS_OS_NAME" != "windows" ]; then 192 | twine upload --skip-existing wheelhouse/* --repository-url https://test.pypi.org/legacy/ -u ${TEST_PYPI_USR} -p ${TEST_PYPI_PWD}; 193 | elif [ "$PYPI_UPLOAD" = "release" -a "$TRAVIS_OS_NAME" = "windows" ]; then 194 | python -m twine upload --skip-existing wheelhouse/* -u ${PYPI_USR} -p ${PYPI_PWD}; 195 | elif [ "$PYPI_UPLOAD" = "release" -a "$TRAVIS_OS_NAME" != "windows" ]; then 196 | twine upload --skip-existing wheelhouse/* -u ${PYPI_USR} -p ${PYPI_PWD}; 197 | fi 198 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include lib/xor_singleheader/include/binaryfusefilter.h 2 | include lib/xor_singleheader/include/xorfilter.h 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyxorfilter 2 | 3 | Python bindings for [C](https://github.com/FastFilter/xor_singleheader) implementation of [Xor Filters: Faster and Smaller Than Bloom and Cuckoo Filters](https://arxiv.org/abs/1912.08258) 4 | and of [Binary Fuse Filters: Fast and Smaller Than Xor Filters](https://arxiv.org/abs/2201.01174). 5 | ## Installation 6 | `pip install pyxorfilter` 7 | ### From Source 8 | ``` 9 | git clone --recurse-submodules https://github.com/glitzflitz/pyxorfilter 10 | cd pyxorfilter 11 | python setup.py build_ext 12 | python setup.py install 13 | ``` 14 | ## Usage 15 | ```py 16 | >>> from pyxorfilter import Xor8, Xor16, Fuse8, Fuse16 17 | >>> filter = Xor8(5) #or Xor16(size) 18 | >>> #Supports unicode strings and heterogeneous types 19 | >>> test_str = ["あ","अ", 51, 0.0, 12.3] 20 | >>> filter.populate(test_str) 21 | True 22 | >>> filter.contains("अ") 23 | True 24 | >>> filter[51] #You can use __getitem__ instead of contains 25 | True 26 | >>> filter["か"] 27 | False 28 | >>> filter.contains(150) 29 | False 30 | >>> filter.size_in_bytes() 31 | 60 32 | ``` 33 | 34 | You can serialize a filter with the `serialize()` method which returns a buffer, and you can recover the filter with the `deserialize(buffer)` method, which returns a filter: 35 | 36 | ```py 37 | > f = open('/tmp/output', 'wb') 38 | > f.write(filter.serialize()) 39 | > f.close() 40 | > recoverfilter = Xor8.deserialize(open('/tmp/output', 'rb').read()) 41 | ``` 42 | 43 | ## Caveats 44 | ### Accuracy 45 | For more accuracy(less false positives) use larger but more accurate Xor16 for Fuse16. 46 | 47 | For large sets (contain millions of keys), Fuse8/Fuse16 filters are faster and smaller than Xor8/Xor16. 48 | 49 | ```py 50 | >>> filter = Xor8(1000000) 51 | >>> filter.size_in_bytes() 52 | 1230054 53 | >>> filter = Fuse8(1000000) 54 | >>> filter.size_in_bytes() 55 | 1130536 56 | ``` 57 | 58 | ### TODO 59 | 60 | - [x] Add unit tests 61 | - [x] Add CI support for distributing pyxorfilter with PyPI. 62 | 63 | ## Links 64 | * [C Implementation](https://github.com/FastFilter/xor_singleheader) 65 | * [Go Implementation](https://github.com/FastFilter/xorfilter) 66 | * [Erlang bindings](https://github.com/mpope9/exor_filter) 67 | * Rust Implementation: [1](https://github.com/bnclabs/xorfilter) and [2](https://github.com/codri/xorfilter-rs) 68 | * [C++ Implementation](https://github.com/FastFilter/fastfilter_cpp) 69 | * [Java Implementation](https://github.com/FastFilter/fastfilter_java) 70 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pyxorfilter" 3 | version = "1.1.2" 4 | description = "Python bindings for C implementation of xorfilter" 5 | authors = [ 6 | {name = "Amey Narkhede", email = "ameynarkhede02@gmail.com"}, 7 | ] 8 | dependencies = ["cffi", "xxhash"] 9 | readme = "README.md" 10 | license = {text = "Apache 2.0"} 11 | requires-python = ">=3.0" 12 | 13 | [project.urls] 14 | Homepage = "https://github.com/glitzflitz/pyxorfilter" 15 | 16 | [build-system] 17 | requires = ["setuptools", "wheel"] 18 | build-backend = "setuptools.build_meta" 19 | 20 | [tool.cibuildwheel] 21 | environment = { PIP_GLOBAL_OPTION="build_ext", PIP_NO_BUILD_ISOLATION="1" , CIBUILDWHEEL="1" } 22 | build-frontend = "pip" 23 | test-requires = "pytest" 24 | before-test = ["pip install -e {package}"] 25 | test-command = "pytest {project}/tests" 26 | test-skip = "cp37-* pp37-*" 27 | 28 | [tool.cibuildwheel.linux] 29 | before-all = "yum install -y libffi-devel" 30 | 31 | [tool.cibuildwheel.macos] 32 | before-all = "brew install libffi" 33 | 34 | [[tool.cibuildwheel.overrides]] 35 | select = "*-manylinux2_*" 36 | before-all = "apt-get -y install libffi-dev" 37 | 38 | [[tool.cibuildwheel.overrides]] 39 | select = "*-musllinux*" 40 | before-all = "apk add libffi-dev" 41 | 42 | [tool.setuptools] 43 | py-modules = [] 44 | -------------------------------------------------------------------------------- /pyxorfilter/__init__.py: -------------------------------------------------------------------------------- 1 | from pyxorfilter.pyxorfilter import Xor8, Xor16, Fuse8, Fuse16 2 | 3 | VERSION = "1.1.2" 4 | __all__ = ["Xor8", "Xor16", "Fuse8", "Fuse16"] 5 | -------------------------------------------------------------------------------- /pyxorfilter/ffibuild.py: -------------------------------------------------------------------------------- 1 | from cffi import FFI 2 | import os 3 | from sys import exit, platform 4 | 5 | 6 | ffi = FFI() 7 | cdef_from_file = None 8 | 9 | all_src = "" 10 | with open("lib/xor_singleheader/include/xorfilter.h", "r", encoding='utf-8') as src: 11 | all_src += src.read() 12 | 13 | with open("lib/xor_singleheader/include/binaryfusefilter.h", "r", encoding='utf-8') as src: 14 | all_src += src.read() 15 | 16 | 17 | ffi.set_source( 18 | "_xorfilter", all_src, 19 | ) 20 | 21 | ffi.cdef( 22 | """ 23 | static inline uint64_t xor_murmur64(uint64_t h) ; 24 | 25 | static inline uint64_t xor_mix_split(uint64_t key, uint64_t seed) ; 26 | 27 | static inline uint64_t xor_rotl64(uint64_t n, unsigned int c) ; 28 | 29 | static inline uint32_t xor_reduce(uint32_t hash, uint32_t n) ; 30 | 31 | static inline uint64_t xor_fingerprint(uint64_t hash) ; 32 | 33 | static inline uint64_t xor_rng_splitmix64(uint64_t *seed) ; 34 | 35 | 36 | typedef struct xor8_s { 37 | uint64_t seed; 38 | uint64_t blockLength; 39 | uint8_t 40 | *fingerprints; // after xor8_allocate, will point to 3*blockLength values 41 | } xor8_t; 42 | 43 | struct xor_xorset_s { 44 | uint64_t xormask; 45 | uint32_t count; 46 | }; 47 | 48 | typedef struct xor16_s { 49 | uint64_t seed; 50 | uint64_t blockLength; 51 | uint16_t 52 | *fingerprints; // after xor16_allocate, will point to 3*blockLength values 53 | } xor16_t; 54 | 55 | typedef struct binary_fuse8_s { 56 | uint64_t Seed; 57 | uint32_t SegmentLength; 58 | uint32_t SegmentLengthMask; 59 | uint32_t SegmentCount; 60 | uint32_t SegmentCountLength; 61 | uint32_t ArrayLength; 62 | uint8_t *Fingerprints; 63 | } binary_fuse8_t; 64 | 65 | typedef struct binary_fuse16_s { 66 | uint64_t Seed; 67 | uint32_t SegmentLength; 68 | uint32_t SegmentLengthMask; 69 | uint32_t SegmentCount; 70 | uint32_t SegmentCountLength; 71 | uint32_t ArrayLength; 72 | uint16_t *Fingerprints; 73 | } binary_fuse16_t; 74 | 75 | typedef struct xor_xorset_s xor_xorset_t; 76 | 77 | struct xor_hashes_s { 78 | uint64_t h; 79 | uint32_t h0; 80 | uint32_t h1; 81 | uint32_t h2; 82 | }; 83 | typedef struct xor_hashes_s xor_hashes_t; 84 | 85 | struct xor_h0h1h2_s { 86 | uint32_t h0; 87 | uint32_t h1; 88 | uint32_t h2; 89 | }; 90 | typedef struct xor_h0h1h2_s xor_h0h1h2_t; 91 | 92 | struct xor_keyindex_s { 93 | uint64_t hash; 94 | uint32_t index; 95 | }; 96 | 97 | typedef struct xor_keyindex_s xor_keyindex_t; 98 | 99 | struct xor_setbuffer_s { 100 | xor_keyindex_t *buffer; 101 | uint32_t *counts; 102 | int insignificantbits; 103 | uint32_t slotsize; // should be 1<< insignificantbits 104 | uint32_t slotcount; 105 | size_t originalsize; 106 | }; 107 | typedef struct xor_setbuffer_s xor_setbuffer_t; 108 | 109 | static inline bool xor8_contain(uint64_t key, const xor8_t *filter); 110 | static inline bool xor16_contain(uint64_t key, const xor16_t *filter); 111 | static inline bool binary_fuse8_contain(uint64_t key, const binary_fuse8_t *filter); 112 | static inline bool binary_fuse16_contain(uint64_t key, const binary_fuse16_t *filter); 113 | 114 | static inline bool xor8_allocate(uint32_t size, xor8_t *filter); 115 | static inline bool xor16_allocate(uint32_t size, xor16_t *filter); 116 | static inline bool binary_fuse8_allocate(uint32_t size, binary_fuse8_t *filter); 117 | static inline bool binary_fuse16_allocate(uint32_t size, binary_fuse16_t *filter); 118 | 119 | static inline size_t xor8_size_in_bytes(const xor8_t *filter); 120 | static inline size_t xor16_size_in_bytes(const xor16_t *filter); 121 | static inline size_t binary_fuse8_size_in_bytes(const binary_fuse8_t *filter); 122 | static inline size_t binary_fuse16_size_in_bytes(const binary_fuse16_t *filter); 123 | 124 | static inline void xor8_free(xor8_t *filter); 125 | static inline void xor16_free(xor16_t *filter); 126 | static inline void binary_fuse8_free(binary_fuse8_t *filter); 127 | static inline void binary_fuse16_free(binary_fuse16_t *filter); 128 | 129 | static inline xor_hashes_t xor8_get_h0_h1_h2(uint64_t k, const xor8_t *filter) ; 130 | static inline uint32_t xor8_get_h0(uint64_t hash, const xor8_t *filter) ; 131 | static inline uint32_t xor8_get_h1(uint64_t hash, const xor8_t *filter) ; 132 | static inline uint32_t xor8_get_h2(uint64_t hash, const xor8_t *filter) ; 133 | static inline uint32_t xor16_get_h0(uint64_t hash, const xor16_t *filter) ; 134 | static inline uint32_t xor16_get_h1(uint64_t hash, const xor16_t *filter) ; 135 | static inline uint32_t xor16_get_h2(uint64_t hash, const xor16_t *filter) ; 136 | static inline xor_hashes_t xor16_get_h0_h1_h2(uint64_t k, const xor16_t *filter) ; 137 | 138 | static inline bool xor_init_buffer(xor_setbuffer_t *buffer, size_t size) ; 139 | static inline void xor_free_buffer(xor_setbuffer_t *buffer) ; 140 | static inline void xor_buffered_increment_counter(uint32_t index, uint64_t hash, xor_setbuffer_t *buffer, xor_xorset_t *sets) ; 141 | static inline void xor_make_buffer_current(xor_setbuffer_t *buffer, xor_xorset_t *sets, uint32_t index, xor_keyindex_t *Q, size_t *Qsize) ; 142 | static inline void xor_buffered_decrement_counter(uint32_t index, uint64_t hash, xor_setbuffer_t *buffer, xor_xorset_t *sets, xor_keyindex_t *Q, size_t *Qsize) ; 143 | static inline void xor_flush_increment_buffer(xor_setbuffer_t *buffer, xor_xorset_t *sets) ; 144 | static inline void xor_flush_decrement_buffer(xor_setbuffer_t *buffer, xor_xorset_t *sets, xor_keyindex_t *Q, size_t *Qsize) ; 145 | static inline uint32_t xor_flushone_decrement_buffer(xor_setbuffer_t *buffer, xor_xorset_t *sets, xor_keyindex_t *Q, size_t *Qsize) ; 146 | 147 | bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, xor8_t *filter) ; 148 | bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *filter) ; 149 | bool binary_fuse8_populate(const uint64_t *keys, uint32_t size, binary_fuse8_t *filter) ; 150 | bool xor16_buffered_populate(const uint64_t *keys, uint32_t size, xor16_t *filter) ; 151 | bool xor16_populate(const uint64_t *keys, uint32_t size, xor16_t *filter) ; 152 | bool binary_fuse16_populate(const uint64_t *keys, uint32_t size, binary_fuse16_t *filter) ; 153 | 154 | 155 | size_t xor16_serialization_bytes(xor16_t *filter); 156 | size_t xor8_serialization_bytes(const xor8_t *filter); 157 | void xor16_serialize(const xor16_t *filter, char *buffer); 158 | void xor8_serialize(const xor8_t *filter, char *buffer); 159 | bool xor16_deserialize(xor16_t * filter, const char *buffer); 160 | bool xor8_deserialize(xor8_t * filter, const char *buffer); 161 | 162 | size_t binary_fuse16_serialization_bytes(binary_fuse16_t *filter); 163 | size_t binary_fuse8_serialization_bytes(const binary_fuse8_t *filter); 164 | void binary_fuse16_serialize(const binary_fuse16_t *filter, char *buffer); 165 | void binary_fuse8_serialize(const binary_fuse8_t *filter, char *buffer); 166 | bool binary_fuse16_deserialize(binary_fuse16_t * filter, const char *buffer); 167 | bool binary_fuse8_deserialize(binary_fuse8_t * filter, const char *buffer); 168 | 169 | """ 170 | ) 171 | 172 | if __name__ == "__main__": 173 | ffi.compile(verbose=True) 174 | -------------------------------------------------------------------------------- /pyxorfilter/pyxorfilter.py: -------------------------------------------------------------------------------- 1 | from ._xorfilter import lib, ffi 2 | from ctypes import c_ulonglong 3 | import xxhash 4 | import struct 5 | 6 | def hash(item): 7 | return xxhash.xxh64(str(item)).intdigest() 8 | 9 | class Xor8: 10 | def __init__(self, size): 11 | self.__filter = ffi.new("xor8_t *") 12 | status = lib.xor8_allocate(size, self.__filter) 13 | if not status: 14 | print("Unable to allocate memory for 8 bit filter") 15 | 16 | def __repr__(self): 17 | return "Xor8 object with size(in bytes):{}".format(self.size_in_bytes()) 18 | 19 | def __getitem__(self, item): 20 | return self.contains(item) 21 | 22 | def __del__(self): 23 | lib.xor8_free(self.__filter) 24 | 25 | def populate(self, data: list): 26 | """ 27 | Data can either be a list or iterable 28 | """ 29 | data = list(map(lambda x: c_ulonglong((hash(x))).value, data)) 30 | return lib.xor8_buffered_populate(data, len(data), self.__filter) 31 | 32 | def contains(self, item): 33 | item = c_ulonglong((hash(item))).value 34 | return lib.xor8_contain(item, self.__filter) 35 | 36 | def size_in_bytes(self): 37 | return lib.xor8_size_in_bytes(self.__filter) 38 | 39 | def serialize(self): 40 | buffer = ffi.new("char[]", lib.xor8_serialization_bytes(self.__filter)) 41 | lib.xor8_serialize(self.__filter, buffer) 42 | return ffi.buffer(buffer) 43 | 44 | @staticmethod 45 | def deserialize(buffer): 46 | self = object.__new__(Xor8) 47 | self.__filter = ffi.new("xor8_t *") 48 | lib.xor8_deserialize(self.__filter, ffi.from_buffer(buffer)) 49 | return self 50 | 51 | 52 | class Xor16: 53 | def __init__(self, size): 54 | self.__filter = ffi.new("xor16_t *") 55 | status = lib.xor16_allocate(size, self.__filter) 56 | if not status: 57 | print("Unable to allocate memory for 16 bit filter") 58 | 59 | def __repr__(self): 60 | return "Xor16 object with size(in bytes):{}".format(self.size_in_bytes()) 61 | 62 | def __getitem__(self, item): 63 | return self.contains(item) 64 | 65 | def __del__(self): 66 | lib.xor16_free(self.__filter) 67 | 68 | def populate(self, data): 69 | data = list(map(lambda x: c_ulonglong((hash(x))).value, data)) 70 | return lib.xor16_buffered_populate(data, len(data), self.__filter) 71 | 72 | def contains(self, item): 73 | item = c_ulonglong((hash(item))).value 74 | return lib.xor16_contain(item, self.__filter) 75 | 76 | def size_in_bytes(self): 77 | return lib.xor16_size_in_bytes(self.__filter) 78 | 79 | def serialize(self): 80 | buffer = ffi.new("char[]", lib.xor16_serialization_bytes(self.__filter)) 81 | lib.xor16_serialize(self.__filter, buffer) 82 | return ffi.buffer(buffer) 83 | 84 | @staticmethod 85 | def deserialize(buffer): 86 | self = object.__new__(Xor16) 87 | self.__filter = ffi.new("xor16_t *") 88 | lib.xor16_deserialize(self.__filter, ffi.from_buffer(buffer)) 89 | return self 90 | 91 | class Fuse8: 92 | def __init__(self, size): 93 | self.__filter = ffi.new("binary_fuse8_t *") 94 | status = lib.binary_fuse8_allocate(size, self.__filter) 95 | if not status: 96 | print("Unable to allocate memory for 8 bit filter") 97 | 98 | def __repr__(self): 99 | return "Fuse8 object with size(in bytes):{}".format(self.size_in_bytes()) 100 | 101 | def __getitem__(self, item): 102 | return self.contains(item) 103 | 104 | def __del__(self): 105 | lib.binary_fuse8_free(self.__filter) 106 | 107 | def populate(self, data: list): 108 | """ 109 | Data can either be a list or iterable 110 | """ 111 | data = list(map(lambda x: c_ulonglong((hash(x))).value, data)) 112 | return lib.binary_fuse8_populate(data, len(data), self.__filter) 113 | 114 | def contains(self, item): 115 | item = c_ulonglong((hash(item))).value 116 | return lib.binary_fuse8_contain(item, self.__filter) 117 | 118 | def size_in_bytes(self): 119 | return lib.binary_fuse8_size_in_bytes(self.__filter) 120 | 121 | def serialize(self): 122 | buffer = ffi.new("char[]", lib.binary_fuse8_serialization_bytes(self.__filter)) 123 | lib.binary_fuse8_serialize(self.__filter, buffer) 124 | return ffi.buffer(buffer) 125 | 126 | @staticmethod 127 | def deserialize(buffer): 128 | self = object.__new__(Fuse8) 129 | self.__filter = ffi.new("binary_fuse8_t *") 130 | lib.binary_fuse8_deserialize(self.__filter, ffi.from_buffer(buffer)) 131 | return self 132 | 133 | class Fuse16: 134 | def __init__(self, size): 135 | self.__filter = ffi.new("binary_fuse16_t *") 136 | status = lib.binary_fuse16_allocate(size, self.__filter) 137 | if not status: 138 | print("Unable to allocate memory for 16 bit filter") 139 | 140 | def __repr__(self): 141 | return "Fuse16 object with size(in bytes):{}".format(self.size_in_bytes()) 142 | 143 | def __getitem__(self, item): 144 | return self.contains(item) 145 | 146 | def __del__(self): 147 | lib.binary_fuse16_free(self.__filter) 148 | 149 | def populate(self, data: list): 150 | """ 151 | Data can either be a list or iterable 152 | """ 153 | data = list(map(lambda x: c_ulonglong((hash(x))).value, data)) 154 | return lib.binary_fuse16_populate(data, len(data), self.__filter) 155 | 156 | def contains(self, item): 157 | item = c_ulonglong((hash(item))).value 158 | return lib.binary_fuse16_contain(item, self.__filter) 159 | 160 | def size_in_bytes(self): 161 | return lib.binary_fuse16_size_in_bytes(self.__filter) 162 | 163 | def serialize(self): 164 | buffer = ffi.new("char[]", lib.binary_fuse16_serialization_bytes(self.__filter)) 165 | lib.binary_fuse16_serialize(self.__filter, buffer) 166 | return ffi.buffer(buffer) 167 | 168 | @staticmethod 169 | def deserialize(buffer): 170 | self = object.__new__(Fuse16) 171 | self.__filter = ffi.new("binary_fuse16_t *") 172 | lib.binary_fuse16_deserialize(self.__filter, ffi.from_buffer(buffer)) 173 | return self -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_ext] 2 | inplace=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension, find_packages 2 | import os 3 | 4 | setup( 5 | name="pyxorfilter", 6 | version="1.1.2", 7 | description="Python bindings for C implementation of xorfilter", 8 | long_description=open("README.md", "r", encoding='utf-8').read(), 9 | long_description_content_type="text/markdown", 10 | author="Amey Narkhede", 11 | author_email="ameynarkhede02@gmail.com", 12 | url="https://github.com/glitzflitz/pyxorfilter", 13 | license="Apache 2.0", 14 | python_requires=">=3.0", 15 | packages=find_packages(), 16 | ext_package="pyxorfilter", 17 | install_requires=["cffi","xxhash"], 18 | setup_requires=["cffi"], 19 | cffi_modules=["pyxorfilter/ffibuild.py:ffi"], 20 | test_suite="nose.collector", 21 | tests_require=["nose"], 22 | ) 23 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/glitzflitz/pyxorfilter/ef4d1f778677697a42612632257b1afc41ccb5ff/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_fuse16.py: -------------------------------------------------------------------------------- 1 | from pyxorfilter import Fuse16 2 | from random import sample 3 | import tempfile, os 4 | 5 | 6 | def test_fuse16_int(): 7 | xor_filter = Fuse16(50) 8 | xor_filter.populate([_ for _ in range(50)]) 9 | for i in range(50): 10 | assert xor_filter.contains(i) == True 11 | 12 | def test_fuse16_int_iterable(): 13 | xor_filter = Fuse16(50) 14 | xor_filter.populate(range(50)) 15 | for i in range(50): 16 | assert xor_filter.contains(i) == True 17 | 18 | def test_fuse16_strings(): 19 | xor_filter = Fuse16(10) 20 | test_str = ["あ", "/dev/null; touch /tmp/blns.fail ; echo", "अ", "Normal", "122"] 21 | xor_filter.populate(test_str.copy()) 22 | for i in test_str: 23 | assert xor_filter.contains(i) == True 24 | 25 | 26 | def test_fuse16_floats(): 27 | xor_filter = Fuse16(10) 28 | test_floats = [1.23, 9999.1616, 323.43, 0.0] 29 | xor_filter.populate(test_floats.copy()) 30 | for i in test_floats: 31 | assert xor_filter.contains(i) == True 32 | 33 | 34 | def test_fuse16_all(): 35 | xor_filter = Fuse16(5) 36 | test_str = ["string", 51, 0.0, 12.3] 37 | xor_filter.populate(test_str.copy()) 38 | for i in test_str: 39 | assert xor_filter.contains(i) == True 40 | 41 | def test_fuse16_serialize(): 42 | xor_filter = Fuse16(5) 43 | test_str = ["string", 51, 0.0, 12.3] 44 | xor_filter.populate(test_str.copy()) 45 | serialized_filter = tempfile.NamedTemporaryFile(delete=False).name 46 | with open(serialized_filter, 'wb') as f: 47 | f.write(xor_filter.serialize()) 48 | 49 | with open(serialized_filter, 'rb') as f: 50 | recover_xor_filter = Fuse16.deserialize(f.read()) 51 | 52 | for i in test_str: 53 | assert recover_xor_filter.contains(i) 54 | 55 | os.remove(serialized_filter) 56 | -------------------------------------------------------------------------------- /tests/test_fuse8.py: -------------------------------------------------------------------------------- 1 | from pyxorfilter import Fuse8 2 | from random import sample 3 | import tempfile, os 4 | 5 | def test_fuse8_int(): 6 | xor_filter = Fuse8(50) 7 | xor_filter.populate([_ for _ in range(50)]) 8 | for i in range(50): 9 | assert xor_filter.contains(i) == True 10 | 11 | def test_fuse8_int_iterable(): 12 | xor_filter = Fuse8(50) 13 | xor_filter.populate(range(50)) 14 | for i in range(50): 15 | assert xor_filter.contains(i) == True 16 | 17 | def test_fuse8_strings(): 18 | xor_filter = Fuse8(10) 19 | test_str = ["あ", "/dev/null; touch /tmp/blns.fail ; echo", "अ", "Normal", "122"] 20 | xor_filter.populate(test_str.copy()) 21 | for i in test_str: 22 | assert xor_filter.contains(i) == True 23 | 24 | 25 | def test_fuse8_floats(): 26 | xor_filter = Fuse8(10) 27 | test_floats = [1.23, 9999.88, 323.43, 0.0] 28 | xor_filter.populate(test_floats.copy()) 29 | for i in test_floats: 30 | assert xor_filter.contains(i) == True 31 | 32 | 33 | def test_fuse8_all(): 34 | xor_filter = Fuse8(5) 35 | test_str = ["string", 51, 0.0, 12.3] 36 | xor_filter.populate(test_str.copy()) 37 | for i in test_str: 38 | assert xor_filter.contains(i) == True 39 | 40 | def test_fuse8_serialize(): 41 | xor_filter = Fuse8(5) 42 | test_str = ["string", 51, 0.0, 12.3] 43 | xor_filter.populate(test_str.copy()) 44 | serialized_filter = tempfile.NamedTemporaryFile(delete=False).name 45 | with open(serialized_filter, 'wb') as f: 46 | f.write(xor_filter.serialize()) 47 | 48 | with open(serialized_filter, 'rb') as f: 49 | recover_xor_filter = Fuse8.deserialize(f.read()) 50 | 51 | for i in test_str: 52 | assert recover_xor_filter.contains(i) 53 | 54 | os.remove(serialized_filter) 55 | -------------------------------------------------------------------------------- /tests/test_xor16.py: -------------------------------------------------------------------------------- 1 | from pyxorfilter import Xor16 2 | import random 3 | import tempfile, os 4 | 5 | 6 | def test_xor16_int(): 7 | xor_filter = Xor16(100) 8 | test_lst = random.sample(range(0, 1000), 100) 9 | xor_filter.populate(test_lst.copy()) 10 | for i in test_lst: 11 | assert xor_filter.contains(i) == True 12 | for i in random.sample(range(1000, 3000), 500): 13 | assert xor_filter.contains(i) == False 14 | 15 | def test_xor16_int_iterable(): 16 | xor_filter = Xor16(100) 17 | xor_filter.populate(range(50)) 18 | for i in range(50): 19 | assert xor_filter.contains(i) == True 20 | 21 | def test_xor16_strings(): 22 | xor_filter = Xor16(10) 23 | test_str = ["あ", "/dev/null; touch /tmp/blns.fail ; echo", "अ", "Normal", "122"] 24 | xor_filter.populate(test_str.copy()) 25 | for test in test_str: 26 | assert xor_filter.contains(test) == True 27 | test_str2 = ["月", "क", "12", "delta"] 28 | for i in test_str2: 29 | assert xor_filter.contains(i) == False 30 | 31 | 32 | def test_xor16_floats(): 33 | xor_filter = Xor16(10) 34 | test_floats = [1.23, 9999.88, 323.43, 0.0] 35 | xor_filter.populate(test_floats.copy()) 36 | for i in test_floats: 37 | assert xor_filter.contains(i) == True 38 | test_floats2 = [-1.23, 1.0, 0.1, 676.5, 1.234] 39 | for i in test_floats2: 40 | assert xor_filter.contains(i) == False 41 | 42 | 43 | def test_xor16_all(): 44 | xor_filter = Xor16(5) 45 | test_str = ["string", 51, 0.0, 12.3] 46 | xor_filter.populate(test_str.copy()) 47 | for i in test_str: 48 | assert xor_filter.contains(i) == True 49 | test_str2 = [12, "४", 0.1] 50 | for i in test_str2: 51 | assert xor_filter.contains(i) == False 52 | 53 | def test_xor16_serialize(): 54 | xor_filter = Xor16(5) 55 | test_str = ["string", 51, 0.0, 12.3] 56 | xor_filter.populate(test_str.copy()) 57 | serialized_filter = tempfile.NamedTemporaryFile(delete=False).name 58 | with open(serialized_filter, 'wb') as f: 59 | f.write(xor_filter.serialize()) 60 | 61 | with open(serialized_filter, 'rb') as f: 62 | recover_xor_filter = Xor16.deserialize(f.read()) 63 | 64 | for i in test_str: 65 | assert recover_xor_filter.contains(i) 66 | 67 | os.remove(serialized_filter) 68 | -------------------------------------------------------------------------------- /tests/test_xor8.py: -------------------------------------------------------------------------------- 1 | from pyxorfilter import Xor8 2 | from random import sample 3 | import tempfile, os 4 | 5 | 6 | def test_xor8_int(): 7 | xor_filter = Xor8(50) 8 | xor_filter.populate([_ for _ in range(50)]) 9 | for i in range(50): 10 | assert xor_filter.contains(i) == True 11 | 12 | def test_xor8_int_iterable(): 13 | xor_filter = Xor8(50) 14 | xor_filter.populate(range(50)) 15 | for i in range(50): 16 | assert xor_filter.contains(i) == True 17 | 18 | def test_xor8_strings(): 19 | xor_filter = Xor8(10) 20 | test_str = ["あ", "/dev/null; touch /tmp/blns.fail ; echo", "अ", "Normal", "122"] 21 | xor_filter.populate(test_str.copy()) 22 | for i in test_str: 23 | assert xor_filter.contains(i) == True 24 | 25 | 26 | def test_xor8_floats(): 27 | xor_filter = Xor8(10) 28 | test_floats = [1.23, 9999.88, 323.43, 0.0] 29 | xor_filter.populate(test_floats.copy()) 30 | for i in test_floats: 31 | assert xor_filter.contains(i) == True 32 | 33 | 34 | def test_xor8_all(): 35 | xor_filter = Xor8(5) 36 | test_str = ["string", 51, 0.0, 12.3] 37 | xor_filter.populate(test_str.copy()) 38 | for i in test_str: 39 | assert xor_filter.contains(i) == True 40 | 41 | def test_xor8_serialize(): 42 | xor_filter = Xor8(5) 43 | test_str = ["string", 51, 0.0, 12.3] 44 | xor_filter.populate(test_str.copy()) 45 | serialized_filter = tempfile.NamedTemporaryFile(delete=False).name 46 | with open(serialized_filter, 'wb') as f: 47 | f.write(xor_filter.serialize()) 48 | 49 | with open(serialized_filter, 'rb') as f: 50 | recover_xor_filter = Xor8.deserialize(f.read()) 51 | 52 | for i in test_str: 53 | assert recover_xor_filter.contains(i) == True 54 | 55 | os.remove(serialized_filter) 56 | --------------------------------------------------------------------------------