├── .gitmodules ├── pyroaring ├── py.typed ├── version.pxi ├── croaring_version.pxi ├── frozen_bitmap.pxi ├── pyroaring.pyx ├── croaring.pxd ├── __init__.pyi ├── bitmap.pxi └── abstract_bitmap.pxi ├── docs ├── requirements.txt ├── index.rst ├── Makefile └── conf.py ├── .gitattributes ├── MANIFEST.in ├── Pipfile ├── .gitignore ├── download_amalgamation.py ├── LICENSE ├── .readthedocs.yaml ├── setup.cfg ├── tox.ini ├── .github └── workflows │ ├── test.yml │ └── buildwheels.yml ├── cydoctest.py ├── setup.py ├── test_state_machine.py ├── quick_bench.py ├── README.rst └── test.py /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyroaring/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | cython>=0.29.21 2 | -------------------------------------------------------------------------------- /pyroaring/version.pxi: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.3" 2 | -------------------------------------------------------------------------------- /pyroaring/croaring_version.pxi: -------------------------------------------------------------------------------- 1 | __croaring_version__ = "v4.2.3" -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | pyroaring/roaring.c -diff 2 | pyroaring/roaring.h -diff -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyroaring/* 2 | include LICENSE 3 | exclude pyproject.toml 4 | exclude pyroaring/pyroaring.cpp 5 | -------------------------------------------------------------------------------- /pyroaring/frozen_bitmap.pxi: -------------------------------------------------------------------------------- 1 | cdef class FrozenBitMap(AbstractBitMap): 2 | pass 3 | 4 | cdef class FrozenBitMap64(AbstractBitMap64): 5 | pass 6 | -------------------------------------------------------------------------------- /pyroaring/pyroaring.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: binding = True, language_level=3 3 | 4 | include 'version.pxi' 5 | include 'croaring_version.pxi' 6 | include 'abstract_bitmap.pxi' 7 | include 'frozen_bitmap.pxi' 8 | include 'bitmap.pxi' 9 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | Cython = ">=0.21" 10 | hypothesis = "<3.60" 11 | ipython = "*" 12 | twine = "*" 13 | 14 | [requires] 15 | python_version = "3.6" 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .hypothesis 3 | *.pyc 4 | *.tex 5 | *.aux 6 | *.log 7 | *.pickle 8 | *.pdf 9 | *.so 10 | .tox 11 | wheel 12 | build 13 | dist 14 | *.egg-info 15 | amalgamation_demo.* 16 | MANIFEST 17 | docs/_build 18 | Pipfile.lock 19 | .mypy_cache 20 | pyroaring/pyroaring.cpp 21 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | pyroaring API documentation 2 | =============================== 3 | .. automodule:: pyroaring 4 | :members: 5 | :undoc-members: 6 | :show-inheritance: 7 | 8 | 9 | Indices and tables 10 | ================== 11 | 12 | * :ref:`genindex` 13 | * :ref:`modindex` 14 | * :ref:`search` 15 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pyroaring 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /download_amalgamation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script to download a specific release amalgamation file for CRoaring. 3 | 4 | Usage: python download_amalgamation.py 5 | 6 | The version needs to be the specific release tag on github. 7 | 8 | """ 9 | import os 10 | import sys 11 | from urllib.request import urlretrieve 12 | 13 | version = sys.argv[1] 14 | 15 | release = f"https://github.com/RoaringBitmap/CRoaring/releases/download/{version}/" 16 | 17 | print(f"Downloading version {version} of the croaring amalgamation") 18 | 19 | files = ["roaring.c", "roaring.h"] 20 | 21 | for file in files: 22 | r = urlretrieve(release + file, os.path.join("pyroaring", file)) 23 | 24 | with open(os.path.join("pyroaring", "croaring_version.pxi"), "w") as f: 25 | f.write(f"__croaring_version__ = \"{version}\"") 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Tom Cornebize 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | python: 34 | install: 35 | - requirements: docs/requirements.txt 36 | - method: pip 37 | path: . 38 | extra_requirements: 39 | - docs 40 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .eggs 4 | .git 5 | .pybuild 6 | .tox 7 | __pycache__ 8 | build 9 | dist 10 | ignore = 11 | # W503 and W504 conflict; ignore the one that disagrees with recent PEP8. 12 | W503 13 | 14 | # try to keep it reasonable, but this allows us to push it a bit when needed. 15 | max_line_length = 150 16 | 17 | noqa-require-code = true 18 | 19 | 20 | [isort] 21 | atomic = True 22 | balanced_wrapping = True 23 | combine_as_imports = True 24 | include_trailing_comma = True 25 | length_sort = True 26 | multi_line_output = 3 27 | order_by_type = False 28 | 29 | default_section = THIRDPARTY 30 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 31 | 32 | 33 | [mypy] 34 | disallow_any_explicit = True 35 | disallow_any_generics = True 36 | disallow_subclassing_any = True 37 | 38 | disallow_untyped_defs = True 39 | check_untyped_defs = True 40 | disallow_untyped_decorators = True 41 | 42 | no_implicit_optional = True 43 | strict_optional = True 44 | 45 | warn_redundant_casts = True 46 | warn_unused_ignores = True 47 | warn_return_any = True 48 | warn_unreachable = True 49 | 50 | implicit_reexport = False 51 | strict_equality = True 52 | 53 | scripts_are_modules = True 54 | warn_unused_configs = True 55 | 56 | show_error_codes = True 57 | enable_error_code = ignore-without-code 58 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | cython3 4 | test_wheel 5 | 6 | 7 | [testenv] 8 | setenv = 9 | PYTHONFAULTHANDLER=1 10 | 11 | 12 | [testenv:cython3] 13 | deps = 14 | hypothesis 15 | pytest 16 | cython>=3.0.2 17 | passenv = 18 | HYPOTHESIS_PROFILE 19 | ROARING_BITSIZE 20 | commands = 21 | py.test -v test.py test_state_machine.py 22 | python cydoctest.py 23 | 24 | 25 | [testenv:test_wheel] 26 | deps = 27 | hypothesis 28 | pytest 29 | wheel 30 | build 31 | twine 32 | skip_sdist = true 33 | skip_install = true 34 | passenv = 35 | HYPOTHESIS_PROFILE 36 | ROARING_BITSIZE 37 | allowlist_externals = 38 | rm 39 | mkdir 40 | commands = 41 | # Clear our prebuilt wheels so we have a fresh directory 42 | python -m build 43 | # Install from the wheel in that directory 44 | pip install --only-binary ":all:" --find-links=dist --no-index pyroaring 45 | py.test -v test.py test_state_machine.py 46 | python cydoctest.py 47 | 48 | 49 | [testenv:linting] 50 | deps = 51 | flake8 52 | flake8-isort 53 | flake8-noqa 54 | flake8-pyi 55 | skip_sdist = true 56 | skip_install = true 57 | commands = 58 | flake8 59 | 60 | 61 | [testenv:type_check] 62 | deps = 63 | mypy 64 | hypothesis 65 | skip_sdist = true 66 | skip_install = true 67 | commands = 68 | # This serves to check that our stubs at least match the usages we test 69 | mypy test.py 70 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push,pull_request,workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | os: [ubuntu-latest, macos-latest, windows-latest] 12 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 13 | steps: 14 | - name: Set up the repository 15 | uses: actions/checkout@v3 16 | with: 17 | submodules: recursive 18 | fetch-depth: 0 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5.1.1 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | allow-prereleases: true 24 | - name: Install dependencies 25 | run: | 26 | pip install --upgrade pip 27 | pip install tox 28 | - name: Local build and tests (32 bits roaring bitmaps) 29 | env: 30 | HYPOTHESIS_PROFILE: ci 31 | ROARING_BITSIZE: 32 32 | run: | 33 | tox 34 | - name: Local build and tests (64 bits roaring bitmaps) 35 | env: 36 | HYPOTHESIS_PROFILE: ci 37 | ROARING_BITSIZE: 64 38 | run: | 39 | tox 40 | -------------------------------------------------------------------------------- /cydoctest.py: -------------------------------------------------------------------------------- 1 | #!python 2 | """ 3 | Taken from https://github.com/cython/cython/wiki/FAQ#how-can-i-run-doctests-in-cython-code-pyx-files 4 | 5 | Cython-compatible wrapper for doctest.testmod(). 6 | 7 | Usage example, assuming a Cython module mymod.pyx is compiled. 8 | This is run from the command line, passing a command to Python: 9 | python -c "import cydoctest, mymod; cydoctest.testmod(mymod)" 10 | 11 | (This still won't let a Cython module run its own doctests 12 | when called with "python mymod.py", but it's pretty close. 13 | Further options can be passed to testmod() as desired, e.g. 14 | verbose=True.) 15 | """ 16 | 17 | import sys 18 | import doctest 19 | import inspect 20 | 21 | 22 | def _from_module(module, object): 23 | """ 24 | Return true if the given object is defined in the given module. 25 | """ 26 | if module is None: 27 | return True 28 | elif inspect.getmodule(object) is not None: 29 | return module is inspect.getmodule(object) 30 | elif inspect.isfunction(object): 31 | return module.__dict__ is object.func_globals 32 | elif inspect.isclass(object): 33 | return module.__name__ == object.__module__ 34 | elif hasattr(object, '__module__'): 35 | return module.__name__ == object.__module__ 36 | elif isinstance(object, property): 37 | return True # [XX] no way not be sure. 38 | else: 39 | raise ValueError("object must be a class or function") 40 | 41 | 42 | def fix_module_doctest(module): 43 | """ 44 | Extract docstrings from cython functions, that would be skipped by doctest 45 | otherwise. 46 | """ 47 | module.__test__ = {} 48 | for name in dir(module): 49 | value = getattr(module, name) 50 | if inspect.isbuiltin(value) and isinstance(value.__doc__, str) and _from_module(module, value): 51 | module.__test__[name] = value.__doc__ 52 | 53 | 54 | def testmod(m=None, *args, **kwargs): 55 | """ 56 | Fix a Cython module's doctests, then call doctest.testmod() 57 | 58 | All other arguments are passed directly to doctest.testmod(). 59 | """ 60 | fix_module_doctest(m) 61 | result = doctest.testmod(m, *args, **kwargs) 62 | if result.failed > 0: 63 | sys.exit('%d test(s) failed' % result.failed) 64 | 65 | 66 | if __name__ == "__main__": 67 | import pyroaring 68 | testmod(pyroaring) 69 | -------------------------------------------------------------------------------- /.github/workflows/buildwheels.yml: -------------------------------------------------------------------------------- 1 | # This is based on the cibuildwheel example at 2 | # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml 3 | # 4 | # This workflow builds and tests wheels across multiple platforms using 5 | # cibuildwheel and creates the release sdist. Config not specified here can 6 | # be found in pyproject.toml 7 | 8 | name: Build and upload wheels 9 | 10 | on: 11 | workflow_dispatch: 12 | push: 13 | branches: 14 | - master 15 | release: 16 | types: 17 | - released 18 | - prereleased 19 | 20 | jobs: 21 | 22 | build_sdist: 23 | name: Build source distribution 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v3 27 | 28 | - name: Build sdist 29 | run: pipx run build --sdist 30 | 31 | - uses: actions/upload-artifact@v4 32 | with: 33 | path: dist/*.tar.gz 34 | name: cibw-sdist 35 | 36 | build_wheels: 37 | name: Build wheels on ${{ matrix.os }} 38 | runs-on: ${{ matrix.os }} 39 | strategy: 40 | fail-fast: false 41 | matrix: 42 | # macos-13 is an intel runner, macos-14 is apple silicon 43 | os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macos-13, macos-14] 44 | 45 | steps: 46 | - uses: actions/checkout@v3 47 | - name: Set up QEMU 48 | if: runner.os == 'Linux' && runner.arch == 'X64' 49 | uses: docker/setup-qemu-action@v3 50 | with: 51 | platforms: all 52 | - name: Build wheels 53 | uses: pypa/cibuildwheel@v2.23.2 54 | env: 55 | CIBW_TEST_REQUIRES: hypothesis pytest 56 | CIBW_TEST_COMMAND: "python {project}/cydoctest.py -v" # full test command: py.test {project}/test.py -v 57 | CIBW_SKIP: "pp* cp36-* cp37-*" 58 | CIBW_ARCHS_LINUX: ${{ runner.arch == 'X64' && 'auto' || 'auto armv7l' }} 59 | CIBW_ARCHS_MACOS: ${{ runner.arch == 'X64' && 'auto' || 'auto universal2' }} 60 | CIBW_ARCHS_WINDOWS: "auto ARM64" 61 | CIBW_TEST_SKIP: "*-win_arm64" 62 | CIBW_BUILD_FRONTEND: "build" 63 | 64 | - uses: actions/upload-artifact@v4 65 | with: 66 | path: ./wheelhouse/*.whl 67 | name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} 68 | 69 | upload_pypi: 70 | needs: [build_wheels, build_sdist] 71 | runs-on: ubuntu-latest 72 | steps: 73 | - uses: actions/download-artifact@v4 74 | with: 75 | pattern: cibw-* 76 | path: dist 77 | merge-multiple: true 78 | 79 | - uses: pypa/gh-action-pypi-publish@v1.5.0 80 | # Deploy releases to pypi. 81 | if: github.event_name == 'release' && github.event.action == 'released' 82 | with: 83 | user: __token__ 84 | password: ${{ secrets.PYPI_API_TOKEN }} 85 | 86 | - uses: pypa/gh-action-pypi-publish@v1.5.0 87 | # Deploy pre-releases to test pypi. 88 | if: github.event_name == 'release' && github.event.action == 'prereleased' 89 | with: 90 | user: __token__ 91 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 92 | repository_url: https://test.pypi.org/legacy/ 93 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | import platform 5 | from distutils.sysconfig import get_config_vars 6 | 7 | from setuptools import setup 8 | from setuptools.extension import Extension 9 | 10 | PKG_DIR = 'pyroaring' 11 | 12 | PLATFORM_WINDOWS = (platform.system() == 'Windows') 13 | PLATFORM_MACOSX = (platform.system() == 'Darwin') 14 | 15 | # Read version file from the src 16 | with open("pyroaring/version.pxi") as fp: 17 | exec(fp.read()) 18 | VERSION = __version__ # noqa: F821 19 | 20 | 21 | # Remove -Wstrict-prototypes option 22 | # See http://stackoverflow.com/a/29634231/4110059 23 | if not PLATFORM_WINDOWS: 24 | cfg_vars = get_config_vars() 25 | for key, value in cfg_vars.items(): 26 | if type(value) is str: 27 | cfg_vars[key] = value.replace("-Wstrict-prototypes", "") 28 | 29 | try: 30 | with open('README.rst') as f: 31 | long_description = ''.join(f.readlines()) 32 | except (IOError, ImportError, RuntimeError): 33 | print('Could not generate long description.') 34 | long_description = '' 35 | 36 | 37 | if PLATFORM_WINDOWS: 38 | pyroaring_module = Extension( 39 | 'pyroaring', 40 | sources=[os.path.join(PKG_DIR, 'pyroaring.pyx'), os.path.join(PKG_DIR, 'roaring.c')], 41 | language='c++', 42 | ) 43 | libraries = None 44 | else: 45 | compile_args = ['-D__STDC_LIMIT_MACROS', '-D__STDC_CONSTANT_MACROS', '-D _GLIBCXX_ASSERTIONS'] 46 | if PLATFORM_MACOSX: 47 | compile_args.append('-mmacosx-version-min=10.14') 48 | if 'DEBUG' in os.environ: 49 | compile_args.extend(['-O0', '-g']) 50 | else: 51 | compile_args.append('-O3') 52 | if 'ARCHI' in os.environ: 53 | if os.environ['ARCHI'] != "generic": 54 | compile_args.extend(['-march=%s' % os.environ['ARCHI']]) 55 | # The '-march=native' flag is not universally allowed. In particular, it 56 | # will systematically fail on aarch64 systems (like the new Apple M1 systems). It 57 | # also creates troubles under macOS with pip installs and requires ugly workarounds. 58 | # The best way to handle people who want to use -march=native is to ask them 59 | # to pass ARCHI=native to their build process. 60 | # else: 61 | # compile_args.append('-march=native') 62 | 63 | pyroaring_module = Extension( 64 | 'pyroaring', 65 | sources=[os.path.join(PKG_DIR, 'pyroaring.pyx')], 66 | extra_compile_args=compile_args + ["-std=c++11"], 67 | language='c++', 68 | ) 69 | 70 | # Because we compile croaring with a c compiler with sometimes incompatible arguments, 71 | # define croaring compilation with an extra argument for the c11 standard, which is 72 | # required for atomic support. 73 | croaring = ( 74 | 'croaring', 75 | { 76 | 'sources': [os.path.join(PKG_DIR, 'roaring.c')], 77 | "extra_compile_args": compile_args + ["-std=c11"], 78 | }, 79 | ) 80 | libraries = [croaring] 81 | 82 | setup( 83 | name='pyroaring', 84 | ext_modules=[pyroaring_module], 85 | libraries=libraries, 86 | package_data={'pyroaring': ['py.typed', '__init__.pyi']}, 87 | packages=['pyroaring'], 88 | version=VERSION, 89 | description='Library for handling efficiently sorted integer sets.', 90 | long_description=long_description, 91 | setup_requires=['cython>=3.0.2'], 92 | url='https://github.com/Ezibenroc/PyRoaringBitMap', 93 | author='Tom Cornebize', 94 | author_email='tom.cornebize@gmail.com', 95 | license='MIT', 96 | classifiers=[ 97 | 'License :: OSI Approved :: MIT License', 98 | 'Intended Audience :: Developers', 99 | 'Operating System :: POSIX :: Linux', 100 | 'Operating System :: MacOS :: MacOS X', 101 | 'Operating System :: Microsoft :: Windows', 102 | 'Programming Language :: Python :: 3.8', 103 | 'Programming Language :: Python :: 3.9', 104 | 'Programming Language :: Python :: 3.10', 105 | 'Programming Language :: Python :: 3.11', 106 | 'Programming Language :: Python :: 3.12', 107 | 'Programming Language :: Python :: 3.13', 108 | ], 109 | ) 110 | -------------------------------------------------------------------------------- /test_state_machine.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # for using set[int] in Python 3.8 2 | 3 | import hypothesis.strategies as st 4 | from hypothesis.database import DirectoryBasedExampleDatabase 5 | from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule 6 | from hypothesis import settings 7 | from dataclasses import dataclass 8 | from pyroaring import BitMap, BitMap64 9 | from test import hyp_collection, uint32, uint64, is_32_bits 10 | 11 | if is_32_bits: 12 | BitMapClass = BitMap 13 | int_class = uint32 14 | large_val = 2**30 15 | else: 16 | BitMapClass = BitMap64 17 | int_class = uint64 18 | large_val = 2**40 19 | 20 | @dataclass 21 | class Collection: 22 | test: BitMapClass 23 | ref: set[int] 24 | 25 | def check(self): 26 | assert len(self.test) == len(self.ref) 27 | assert set(self.test) == self.ref 28 | 29 | def __post_init__(self): 30 | self.check() 31 | 32 | 33 | class SetComparison(RuleBasedStateMachine): 34 | collections = Bundle("collections") 35 | 36 | @rule(target=collections, val=hyp_collection) 37 | def init_collection(self, val): 38 | return Collection(test=BitMapClass(val), ref=set(val)) 39 | 40 | @rule(target=collections, col=collections) 41 | def copy(self, col): 42 | return Collection(test=BitMapClass(col.test), ref=set(col.ref)) 43 | 44 | @rule(col=collections, val=int_class) 45 | def add_elt(self, col, val): 46 | col.test.add(val) 47 | col.ref.add(val) 48 | col.check() 49 | 50 | @rule(col=collections, val=int_class) 51 | def remove_elt(self, col, val): 52 | col.test.discard(val) 53 | col.ref.discard(val) 54 | col.check() 55 | 56 | @rule(target=collections, col1=collections, col2=collections) 57 | def union(self, col1, col2): 58 | return Collection(test=col1.test | col2.test, ref=col1.ref | col2.ref) 59 | 60 | @rule(col1=collections, col2=collections) 61 | def union_inplace(self, col1, col2): 62 | col1.test |= col2.test 63 | col1.ref |= col2.ref 64 | col1.check() 65 | 66 | @rule(target=collections, col1=collections, col2=collections) 67 | def intersection(self, col1, col2): 68 | return Collection(test=col1.test & col2.test, ref=col1.ref & col2.ref) 69 | 70 | @rule(col1=collections, col2=collections) 71 | def intersection_inplace(self, col1, col2): 72 | col1.test &= col2.test 73 | col1.ref &= col2.ref 74 | col1.check() 75 | 76 | @rule(target=collections, col1=collections, col2=collections) 77 | def difference(self, col1, col2): 78 | return Collection(test=col1.test - col2.test, ref=col1.ref - col2.ref) 79 | 80 | @rule(col1=collections, col2=collections) 81 | def difference_inplace(self, col1, col2): 82 | col1.test -= col2.test 83 | col1.ref -= col2.ref 84 | col1.check() 85 | 86 | @rule(target=collections, col1=collections, col2=collections) 87 | def symmetric_difference(self, col1, col2): 88 | return Collection(test=col1.test ^ col2.test, ref=col1.ref ^ col2.ref) 89 | 90 | @rule(col1=collections, col2=collections) 91 | def symmetric_difference_inplace(self, col1, col2): 92 | col1.test ^= col2.test 93 | col1.ref ^= col2.ref 94 | col1.check() 95 | 96 | @rule( 97 | target=collections, 98 | col=collections, 99 | start=st.integers(min_value=0, max_value=large_val), 100 | size=st.integers(min_value=0, max_value=2**18), 101 | ) 102 | def flip(self, col, start, size): 103 | stop = start + size 104 | return Collection( 105 | test=col.test.flip(start, stop), ref=col.ref ^ set(range(start, stop)) 106 | ) 107 | 108 | @rule( 109 | col=collections, 110 | start=st.integers(min_value=0, max_value=large_val), 111 | size=st.integers(min_value=0, max_value=2**18), 112 | ) 113 | def flip_inplace(self, col, start, size): 114 | stop = start + size 115 | col.test.flip_inplace(start, stop) 116 | col.ref ^= set(range(start, stop)) 117 | col.check() 118 | 119 | 120 | TestTrees = SetComparison.TestCase 121 | TestTrees.settings = settings(max_examples=100, stateful_step_count=100) 122 | 123 | if __name__ == "__main__": 124 | unittest.main() 125 | -------------------------------------------------------------------------------- /quick_bench.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import sys 4 | import random 5 | import timeit 6 | 7 | from pandas import Series, DataFrame 8 | 9 | try: 10 | import tabulate 11 | has_tabulate = True 12 | except ImportError: 13 | has_tabulate = False 14 | sys.stderr.write('Warning: could not import tabulate\n') 15 | sys.stderr.write(' see https://bitbucket.org/astanin/python-tabulate\n') 16 | from pyroaring import BitMap, BitMap64 17 | 18 | classes = {'set': set, 'pyroaring (32 bits)': BitMap, 'pyroaring (64 bits)': BitMap64, } 19 | nb_exp = 30 20 | size = int(1e6) 21 | density = 0.125 22 | universe_size = int(size / density) 23 | 24 | try: 25 | from roaringbitmap import RoaringBitmap 26 | classes['roaringbitmap'] = RoaringBitmap 27 | except ImportError: 28 | sys.stderr.write('Warning: could not import roaringbitmap\n') 29 | sys.stderr.write(' see https://github.com/andreasvc/roaringbitmap/\n') 30 | 31 | try: 32 | from sortedcontainers.sortedset import SortedSet 33 | classes['sortedcontainers'] = SortedSet 34 | except ImportError: 35 | sys.stderr.write('Warning: could not import sortedcontainers\n') 36 | sys.stderr.write(' see https://github.com/grantjenks/sorted_containers\n') 37 | 38 | try: 39 | from croaring import BitSet 40 | classes['python-croaring'] = BitSet 41 | except ImportError: 42 | sys.stderr.write('Warning: could not import croaring\n') 43 | sys.stderr.write(' see https://github.com/sunzhaoping/python-croaring\n') 44 | 45 | import_str = 'import array, pickle; from __main__ import %s' % (','.join( 46 | ['get_list', 'get_range', 'random', 'size', 'universe_size'] 47 | + [cls.__name__ for cls in classes.values() if cls is not set])) 48 | 49 | 50 | def run_exp(stmt, setup, number): 51 | setup = '%s ; %s' % (import_str, setup) 52 | try: 53 | return timeit.timeit(stmt=stmt, setup=setup, number=number) / number 54 | except Exception: 55 | return float('nan') 56 | 57 | 58 | def get_range(): 59 | r = (0, universe_size, int(1 / density)) 60 | try: 61 | return xrange(*r) 62 | except NameError: 63 | return range(*r) 64 | 65 | 66 | def get_list(): 67 | return random.sample(range(universe_size), size) 68 | 69 | 70 | constructor = 'x={class_name}(values)' 71 | simple_setup_constructor = 'x={class_name}(get_list());val=random.randint(0, universe_size)' 72 | double_setup_constructor = 'x={class_name}(get_list()); y={class_name}(get_list())' 73 | equal_setup_constructor = 'l=get_list();x={class_name}(l); y={class_name}(l)' 74 | experiments = [ 75 | # Constructors 76 | ('range constructor', ('values=get_range()', constructor)), 77 | ('ordered list constructor', ('values=get_list(); values.sort()', constructor)), 78 | ('list constructor', ('values=get_list()', constructor)), 79 | ('ordered array constructor', ('l=get_list(); l.sort(); values=array.array("I", l)', constructor)), 80 | ('array constructor', ('values=array.array("I", get_list())', constructor)), 81 | # Simple operations 82 | ('element addition', (simple_setup_constructor, 'x.add(val)')), 83 | ('element removal', (simple_setup_constructor, 'x.discard(val)')), 84 | ('membership test', (simple_setup_constructor, 'val in x')), 85 | # Binary operations 86 | ('union', (double_setup_constructor, 'z=x|y')), 87 | ('intersection', (double_setup_constructor, 'z=x&y')), 88 | ('difference', (double_setup_constructor, 'z=x-y')), 89 | ('symmetric diference', (double_setup_constructor, 'z=x^y')), 90 | ('equality test', (equal_setup_constructor, 'x==y')), 91 | ('subset test', (equal_setup_constructor, 'x<=y')), 92 | # Export 93 | ('conversion to list', (simple_setup_constructor, 'list(x)')), 94 | ('pickle dump & load', (simple_setup_constructor, 'pickle.loads(pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL))')), 95 | ('"naive" conversion to array', (simple_setup_constructor, 'array.array("I", x)')), 96 | ('"optimized" conversion to array', (simple_setup_constructor, 'x.to_array()')), 97 | # Items 98 | ('selection', (simple_setup_constructor, 'x[int(size/2)]')), 99 | ('contiguous slice', (simple_setup_constructor, 'x[int(size/4):int(3*size/4):1]')), 100 | ('slice', (simple_setup_constructor, 'x[int(size/4):int(3*size/4):3]')), 101 | ('small slice', (simple_setup_constructor, 'x[int(size/100):int(3*size/100):3]')), 102 | ] 103 | exp_dict = dict(experiments) 104 | 105 | 106 | def run(cls, op): 107 | cls_name = classes[cls].__name__ 108 | setup = exp_dict[op][0].format(class_name=cls_name) 109 | stmt = exp_dict[op][1].format(class_name=cls_name) 110 | result = run_exp(stmt=stmt, setup=setup, number=nb_exp) 111 | return result 112 | 113 | 114 | def run_all(): 115 | all_results = [] 116 | for op, _ in experiments: 117 | sys.stderr.write('experiment: %s\n' % op) 118 | result = {'operation': op} 119 | for cls in random.sample(list(classes), len(classes)): 120 | result[cls] = run(cls, op) 121 | all_results.append(result) 122 | return DataFrame(all_results).sort_index(axis=1) 123 | 124 | 125 | if __name__ == '__main__': 126 | df = run_all() 127 | print() 128 | if has_tabulate: 129 | print(tabulate.tabulate(df, headers='keys', tablefmt='rst', showindex='never', floatfmt=".2e")) 130 | else: 131 | print(df) 132 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pyroaring documentation build configuration file, created by 5 | # sphinx-quickstart on Sun May 7 16:11:09 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | # import os 21 | # import sys 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.doctest', 37 | 'sphinx.ext.coverage', 38 | 'sphinx.ext.mathjax', 39 | 'sphinx.ext.viewcode', 40 | ] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # The suffix(es) of source filenames. 46 | # You can specify multiple suffix as a list of string: 47 | # 48 | # source_suffix = ['.rst', '.md'] 49 | source_suffix = '.rst' 50 | 51 | # The master toctree document. 52 | master_doc = 'index' 53 | 54 | # General information about the project. 55 | project = 'pyroaring' 56 | copyright = '2017, Tom Cornebize' 57 | author = 'Tom Cornebize' 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | version = '1.0.3' 65 | # The full version, including alpha/beta/rc tags. 66 | release = version 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = None 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This patterns also effect to html_static_path and html_extra_path 78 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | # If true, `todo` and `todoList` produce output, else they produce nothing. 84 | todo_include_todos = False 85 | 86 | 87 | # -- Options for HTML output ---------------------------------------------- 88 | 89 | # The theme to use for HTML and HTML Help pages. See the documentation for 90 | # a list of builtin themes. 91 | # 92 | html_theme = 'alabaster' 93 | 94 | # Theme options are theme-specific and customize the look and feel of a theme 95 | # further. For a list of options available for each theme, see the 96 | # documentation. 97 | # 98 | # html_theme_options = {} 99 | 100 | # Add any paths that contain custom static files (such as style sheets) here, 101 | # relative to this directory. They are copied after the builtin static files, 102 | # so a file named "default.css" will overwrite the builtin "default.css". 103 | html_static_path = ['_static'] 104 | 105 | 106 | # -- Options for HTMLHelp output ------------------------------------------ 107 | 108 | # Output file base name for HTML help builder. 109 | htmlhelp_basename = 'pyroaringdoc' 110 | 111 | 112 | # -- Options for LaTeX output --------------------------------------------- 113 | 114 | latex_elements = { 115 | # The paper size ('letterpaper' or 'a4paper'). 116 | # 117 | # 'papersize': 'letterpaper', 118 | 119 | # The font size ('10pt', '11pt' or '12pt'). 120 | # 121 | # 'pointsize': '10pt', 122 | 123 | # Additional stuff for the LaTeX preamble. 124 | # 125 | # 'preamble': '', 126 | 127 | # Latex figure (float) alignment 128 | # 129 | # 'figure_align': 'htbp', 130 | } 131 | 132 | # Grouping the document tree into LaTeX files. List of tuples 133 | # (source start file, target name, title, 134 | # author, documentclass [howto, manual, or own class]). 135 | latex_documents = [ 136 | (master_doc, 'pyroaring.tex', 'pyroaring Documentation', 137 | 'Tom Cornebize', 'manual'), 138 | ] 139 | 140 | 141 | # -- Options for manual page output --------------------------------------- 142 | 143 | # One entry per manual page. List of tuples 144 | # (source start file, name, description, authors, manual section). 145 | man_pages = [ 146 | (master_doc, 'pyroaring', 'pyroaring Documentation', 147 | [author], 1) 148 | ] 149 | 150 | 151 | # -- Options for Texinfo output ------------------------------------------- 152 | 153 | # Grouping the document tree into Texinfo files. List of tuples 154 | # (source start file, target name, title, author, 155 | # dir menu entry, description, category) 156 | texinfo_documents = [ 157 | (master_doc, 'pyroaring', 'pyroaring Documentation', 158 | author, 'pyroaring', 'One line description of project.', 159 | 'Miscellaneous'), 160 | ] 161 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |Documentation Status| 2 | 3 | An efficient and light-weight ordered set of integers. 4 | This is a Python wrapper for the C library `CRoaring `__. 5 | 6 | Example 7 | ------- 8 | 9 | You can use a bitmap nearly as the classical Python set in your code: 10 | 11 | .. code:: python 12 | 13 | from pyroaring import BitMap 14 | bm1 = BitMap() 15 | bm1.add(3) 16 | bm1.add(18) 17 | print("has 3:", 3 in bm1) 18 | print("has 4:", 4 in bm1) 19 | bm2 = BitMap([3, 27, 42]) 20 | print("bm1 = %s" % bm1) 21 | print("bm2 = %s" % bm2) 22 | print("bm1 & bm2 = %s" % (bm1&bm2)) 23 | print("bm1 | bm2 = %s" % (bm1|bm2)) 24 | 25 | Output: 26 | 27 | :: 28 | 29 | has 3: True 30 | has 4: False 31 | bm1 = BitMap([3, 18]) 32 | bm2 = BitMap([3, 27, 42]) 33 | bm1 & bm2 = BitMap([3]) 34 | bm1 | bm2 = BitMap([3, 18, 27, 42]) 35 | 36 | The class ``BitMap`` is for 32 bit integers, it supports values from 0 to 2**32-1 (included). 37 | 38 | For larger numbers, you can use the class ``BitMap64`` that supports values from 0 to 2**64-1 (included). 39 | 40 | Installation from Pypi 41 | ---------------------- 42 | 43 | Supported systems: Linux, MacOS or Windows, Python 3.8 or higher. Note that pyroaring might still work with older Python 44 | versions, but they are not tested anymore. 45 | 46 | To install pyroaring on your local account, use the following command: 47 | 48 | .. code:: bash 49 | 50 | pip install pyroaring --user 51 | 52 | For a system-wide installation, use the following command: 53 | 54 | .. code:: bash 55 | 56 | pip install pyroaring 57 | 58 | Naturally, the latter may require superuser rights (consider prefixing 59 | the commands by ``sudo``). 60 | 61 | If you want to use Python 3 and your system defaults on Python 2.7, you 62 | may need to adjust the above commands, e.g., replace ``pip`` by ``pip3``. 63 | 64 | Installation from conda-forge 65 | ----------------------------- 66 | 67 | Conda users can install the package from `conda-forge`: 68 | 69 | .. code:: bash 70 | 71 | conda install -c conda-forge pyroaring 72 | 73 | (Supports Python 3.6 or higher; Mac/Linux/Windows) 74 | 75 | Installation from Source 76 | --------------------------------- 77 | 78 | If you want to compile (and install) pyroaring by yourself, for instance 79 | to modify the Cython sources you can follow the following instructions. 80 | Note that these examples will install in your currently active python 81 | virtual environment. Installing this way will require an appropriate 82 | C compiler to be installed on your system. 83 | 84 | First clone this repository. 85 | 86 | .. code:: bash 87 | 88 | git clone https://github.com/Ezibenroc/PyRoaringBitMap.git 89 | 90 | To install from Cython via source, for example during development run the following from the root of the above repository: 91 | 92 | .. code:: bash 93 | 94 | python -m pip install . 95 | 96 | This will automatically install Cython if it not present for the build, cythonise the source files and compile everything for you. 97 | 98 | If you just want to recompile the package in place for quick testing you can 99 | try the following: 100 | 101 | .. code:: bash 102 | 103 | python setup.py build_clib 104 | python setup.py build_ext -i 105 | 106 | Note that the build_clib compiles croaring only, and only needs to be run once. 107 | 108 | Then you can test the new code using tox - this will install all the other 109 | dependencies needed for testing and test in an isolated environment: 110 | 111 | .. code:: bash 112 | 113 | python -m pip install tox 114 | tox 115 | 116 | If you just want to run the tests directly from the root of the repository: 117 | 118 | .. code:: bash 119 | 120 | python -m pip install hypothesis pytest 121 | # This will test in three ways: via installation from source, 122 | # via cython directly, and creation of a wheel 123 | python -m pytest test.py 124 | 125 | 126 | Package pyroaring as an sdist and wheel. Note that building wheels that have 127 | wide compatibility can be tricky - for releases we rely on `cibuildwheel `_ 128 | to do the heavy lifting across platforms. 129 | 130 | .. code:: bash 131 | 132 | python -m pip install build 133 | python -m build . 134 | 135 | For all the above commands, two environment variables can be used to control the compilation. 136 | 137 | - ``DEBUG=1`` to build pyroaring in debug mode. 138 | - ``ARCHI=`` to build pyroaring for the given platform. The platform may be any keyword 139 | given to the ``-march`` option of gcc (see the 140 | `documentation `__). 141 | Note that cross-compiling for a 32-bit architecture from a 64-bit architecture is not supported. 142 | 143 | Example of use: 144 | 145 | .. code:: bash 146 | 147 | DEBUG=1 ARCHI=x86-64 python setup.py build_ext 148 | 149 | 150 | Optimizing the builds for your machine (x64) 151 | -------------------------------------------- 152 | 153 | For recent Intel and AMD (x64) processors under Linux, you may get better performance by requesting that 154 | CRoaring be built for your machine, specifically, when building from source. 155 | Be mindful that when doing so, the generated binary may only run on your machine. 156 | 157 | 158 | .. code:: bash 159 | 160 | ARCHI=native pip install pyroaring --no-binary :all: 161 | 162 | This approach may not work under macOS. 163 | 164 | 165 | Development Notes 166 | ----------------- 167 | 168 | Updating CRoaring 169 | ================= 170 | 171 | The download_amalgamation.py script can be used to download a specific version 172 | of the official CRoaring amalgamation: 173 | 174 | .. code:: bash 175 | 176 | python download_amalgamation.py v0.7.2 177 | 178 | This will update roaring.c and roaring.h. This also means that the dependency 179 | is vendored in and tracked as part of the source repository now. Note that the 180 | __croaring_version__ in version.pxi will need to be updated to match the new 181 | version. 182 | 183 | 184 | Tracking Package and CRoaring versions 185 | ====================================== 186 | 187 | The package version is maintained in the file `pyroaring/version.pxi` - this 188 | can be manually incremented in preparation for releases. This file is read 189 | from in setup.py to specify the version. 190 | 191 | The croaring version is tracked in `pyroaring/croaring_version.pxi` - this is 192 | updated automatically when downloading a new amalgamation. 193 | 194 | 195 | Benchmark 196 | --------- 197 | 198 | ``Pyroaring`` is compared with the built-in ``set`` and the library ``sortedcontainers``. 199 | 200 | The script ``quick_bench.py`` measures the time of different set 201 | operations. It uses randomly generated sets of size 1e6 and density 202 | 0.125. For each operation, the average time (in seconds) of 30 tests 203 | is reported. 204 | 205 | The results have been obtained with: 206 | 207 | - CPU AMD Ryzen 7 5700X 208 | - CPython version 3.11.2 209 | - gcc version 12.2.0 210 | - Cython version 3.0.2 211 | - sortedcontainers version 2.4.0 212 | - pyroaring commit `b54769b `__ 213 | 214 | =============================== ===================== ===================== ========== ================== 215 | operation pyroaring (32 bits) pyroaring (64 bits) set sortedcontainers 216 | =============================== ===================== ===================== ========== ================== 217 | range constructor 3.03e-04 3.15e-04 4.09e-02 8.54e-02 218 | ordered list constructor 2.17e-02 3.06e-02 8.21e-02 2.67e-01 219 | list constructor 7.23e-02 6.38e-02 5.65e-02 2.34e-01 220 | ordered array constructor 4.50e-03 nan 6.53e-02 1.75e-01 221 | array constructor 6.51e-02 nan 8.98e-02 2.40e-01 222 | element addition 4.33e-07 2.19e-07 2.13e-07 3.82e-07 223 | element removal 2.69e-07 1.67e-07 2.33e-07 2.83e-07 224 | membership test 1.59e-07 1.33e-07 1.42e-07 3.22e-07 225 | union 1.07e-04 1.04e-04 1.06e-01 5.69e-01 226 | intersection 6.00e-04 6.26e-04 4.66e-02 1.03e-01 227 | difference 7.24e-05 8.34e-05 7.94e-02 2.34e-01 228 | symmetric diference 8.32e-05 1.03e-04 1.31e-01 4.19e-01 229 | equality test 3.52e-05 3.21e-05 3.18e-02 3.29e-02 230 | subset test 4.15e-05 4.41e-05 3.20e-02 3.20e-02 231 | conversion to list 2.92e-02 3.08e-02 3.16e-02 3.53e-02 232 | pickle dump & load 1.64e-04 1.76e-04 1.37e-01 3.53e-01 233 | "naive" conversion to array 2.46e-02 2.57e-02 6.49e-02 5.73e-02 234 | "optimized" conversion to array 8.73e-04 1.45e-03 nan nan 235 | selection 8.83e-07 2.49e-06 nan 8.18e-06 236 | contiguous slice 3.31e-03 6.49e-03 nan 4.32e-03 237 | slice 1.58e-03 2.74e-03 nan 1.29e-01 238 | small slice 6.62e-05 1.15e-04 nan 5.43e-03 239 | =============================== ===================== ===================== ========== ================== 240 | 241 | Note: the timings are missing for pyroaring 64 bits with the array constructor. For simplicity reasons the Benchmark 242 | builds an array of 32 bit integers, which is not compatible with ``BitMap64``. 243 | 244 | .. |Documentation Status| image:: https://readthedocs.org/projects/pyroaringbitmap/badge/?version=stable 245 | :target: http://pyroaringbitmap.readthedocs.io/en/stable/?badge=stable 246 | -------------------------------------------------------------------------------- /pyroaring/croaring.pxd: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport uint8_t, int32_t, uint32_t, uint64_t, int64_t 2 | from libcpp cimport bool 3 | 4 | cdef extern from "roaring.h": 5 | ctypedef struct roaring_array_t: 6 | pass 7 | ctypedef struct roaring_bitmap_t: 8 | roaring_array_t high_low_container 9 | ctypedef struct roaring_uint32_iterator_t: 10 | const roaring_bitmap_t *parent 11 | int32_t container_index 12 | int32_t in_container_index 13 | int32_t run_index 14 | uint32_t in_run_index 15 | uint32_t current_value 16 | bool has_value 17 | const void *container 18 | uint8_t typecode 19 | uint32_t highbits 20 | ctypedef struct roaring_statistics_t: 21 | uint32_t n_containers 22 | uint32_t n_array_containers 23 | uint32_t n_run_containers 24 | uint32_t n_bitset_containers 25 | uint32_t n_values_array_containers 26 | uint32_t n_values_run_containers 27 | uint32_t n_values_bitset_containers 28 | uint32_t n_bytes_array_containers 29 | uint32_t n_bytes_run_containers 30 | uint32_t n_bytes_bitset_containers 31 | uint32_t max_value 32 | uint32_t min_value 33 | uint64_t sum_value 34 | uint64_t cardinality 35 | ctypedef struct roaring64_statistics_t: 36 | uint64_t n_containers 37 | uint64_t n_array_containers 38 | uint64_t n_run_containers 39 | uint64_t n_bitset_containers 40 | uint64_t n_values_array_containers 41 | uint64_t n_values_run_containers 42 | uint64_t n_values_bitset_containers 43 | uint64_t n_bytes_array_containers 44 | uint64_t n_bytes_run_containers 45 | uint64_t n_bytes_bitset_containers 46 | uint64_t max_value 47 | uint64_t min_value 48 | uint64_t cardinality 49 | 50 | roaring_bitmap_t *roaring_bitmap_create() 51 | bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) 52 | void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) 53 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x) 54 | bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x) 55 | void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals) 56 | void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); 57 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x) 58 | void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) 59 | bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x) 60 | void roaring_bitmap_clear(roaring_bitmap_t *r) 61 | bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) 62 | bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) 63 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) 64 | bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, const roaring_bitmap_t *src) 65 | roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step) 66 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) 67 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) 68 | void roaring_bitmap_free(roaring_bitmap_t *r) 69 | roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) 70 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) 71 | uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) 72 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) 73 | bool roaring_bitmap_equals(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2) 74 | bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2) 75 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2) 76 | void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) 77 | roaring_bitmap_t *roaring_bitmap_or_many(size_t number, const roaring_bitmap_t **x) 78 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 79 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 80 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 81 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 82 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 83 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 84 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 85 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 86 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 87 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 88 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 89 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 90 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 91 | double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 92 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r) 93 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r) 94 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x) 95 | roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) 96 | void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) 97 | roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, int64_t offset) 98 | bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, uint32_t *element) 99 | void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat) 100 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) 101 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf) 102 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) 103 | roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) 104 | bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) 105 | roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *ra) 106 | bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) 107 | uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) 108 | bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) 109 | void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) 110 | 111 | # 64-bit roaring bitmaps 112 | ctypedef struct roaring64_bitmap_t: 113 | pass 114 | 115 | ctypedef struct roaring64_iterator_t: 116 | pass 117 | 118 | roaring64_bitmap_t *roaring64_bitmap_create() 119 | void roaring64_bitmap_free(roaring64_bitmap_t *r) 120 | roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) 121 | roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, const uint64_t *vals) 122 | roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint64_t step) 123 | void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) 124 | bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) 125 | void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, const uint64_t *vals) 126 | void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max) 127 | void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) 128 | bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) 129 | void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, const uint64_t *vals) 130 | void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max) 131 | bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) 132 | bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) 133 | bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) 134 | void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, roaring64_statistics_t *stat) 135 | uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) 136 | roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) 137 | void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t max) 138 | bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint64_t *out_index) 139 | uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) 140 | uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) 141 | bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) 142 | uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) 143 | uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) 144 | bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) 145 | size_t roaring64_bitmap_size_in_bytes(const roaring64_bitmap_t *r) 146 | bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 147 | bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 148 | bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 149 | void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, uint64_t *out) 150 | roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 151 | uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 152 | void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 153 | bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 154 | double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 155 | roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 156 | uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 157 | void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 158 | roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 159 | uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 160 | void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 161 | roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 162 | uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 163 | void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 164 | size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) 165 | size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf) 166 | size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) 167 | roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) 168 | bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason) 169 | roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) 170 | void roaring64_iterator_free(roaring64_iterator_t *it) 171 | bool roaring64_iterator_has_value(const roaring64_iterator_t *it) 172 | bool roaring64_iterator_advance(roaring64_iterator_t *it) 173 | uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) 174 | bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint64_t val) 175 | uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t count) -------------------------------------------------------------------------------- /pyroaring/__init__.pyi: -------------------------------------------------------------------------------- 1 | import array 2 | from typing import overload, TypedDict 3 | from collections.abc import Iterable, Iterator 4 | 5 | from typing_extensions import Self 6 | 7 | __version__: str 8 | __croaring_version__: str 9 | 10 | 11 | class _Statistics(TypedDict): 12 | # Type as observed in the returned values. 13 | # Warning: This type does not exist at runtime. 14 | 15 | n_containers: int 16 | n_array_containers: int 17 | n_run_containers: int 18 | n_bitset_containers: int 19 | n_values_array_containers: int 20 | n_values_run_containers: int 21 | n_values_bitset_containers: int 22 | n_bytes_array_containers: int 23 | n_bytes_run_containers: int 24 | n_bytes_bitset_containers: int 25 | max_value: int 26 | min_value: int 27 | sum_value: int 28 | cardinality: int 29 | 30 | 31 | class AbstractBitMap: 32 | def __init__(self, values: Iterable[int] | None = None, copy_on_write: bool = False, optimize: bool = True) -> None: 33 | ... 34 | 35 | @property 36 | def copy_on_write(self) -> bool: 37 | ... 38 | 39 | def run_optimize(self) -> bool: 40 | ... 41 | 42 | def shrink_to_fit(self) -> int: 43 | ... 44 | 45 | def __contains__(self, value: int) -> bool: 46 | ... 47 | 48 | def __bool__(self) -> bool: 49 | ... 50 | 51 | def __len__(self) -> int: 52 | ... 53 | 54 | def __lt__(self, other: AbstractBitMap) -> bool: 55 | ... 56 | 57 | def __le__(self, other: AbstractBitMap) -> bool: 58 | ... 59 | 60 | def __eq__(self, other: object) -> bool: 61 | ... 62 | 63 | def __ne__(self, other: object) -> bool: 64 | ... 65 | 66 | def __gt__(self, other: AbstractBitMap) -> bool: 67 | ... 68 | 69 | def __ge__(self, other: AbstractBitMap) -> bool: 70 | ... 71 | 72 | def contains_range(self, range_start: int, range_end: int) -> bool: 73 | ... 74 | 75 | def range_cardinality(self, range_start: int, range_end: int) -> int: 76 | ... 77 | 78 | def iter_equal_or_larger(self, val: int) -> Iterator[int]: 79 | ... 80 | 81 | def __iter__(self) -> Iterator[int]: 82 | ... 83 | 84 | def flip(self, start: int, end: int) -> Self: 85 | ... 86 | 87 | def shift(self, offset: int) -> Self: 88 | ... 89 | 90 | def copy(self) -> Self: 91 | ... 92 | 93 | def isdisjoint(self, other: AbstractBitMap) -> bool: 94 | ... 95 | 96 | def issubset(self, other: AbstractBitMap) -> bool: 97 | ... 98 | 99 | def issuperset(self, other: AbstractBitMap) -> bool: 100 | ... 101 | 102 | # Note: `difference` and others are sort-of set up like they're meant to be 103 | # static methods (accepting _only_ `*bitmaps` in the underlying Cython 104 | # code), however at runtime they require at least one argument and return an 105 | # instance of the same type as that value -- like instance methods. Typing 106 | # them as instances methods ensures that mypy matches this behaviour (other 107 | # type checkers untested), even when used statically as their docstrings 108 | # suggest. 109 | 110 | def difference(self, *bitmaps: AbstractBitMap) -> Self: 111 | ... 112 | 113 | def symmetric_difference(self, other: AbstractBitMap) -> Self: 114 | ... 115 | 116 | def union(self, *bitmaps: AbstractBitMap) -> Self: 117 | ... 118 | 119 | def intersection(self, *bitmaps: AbstractBitMap) -> Self: 120 | ... 121 | 122 | def __or__(self, other: AbstractBitMap) -> Self: 123 | ... 124 | 125 | def __and__(self, other: AbstractBitMap) -> Self: 126 | ... 127 | 128 | def __xor__(self, other: AbstractBitMap) -> Self: 129 | ... 130 | 131 | def __sub__(self, other: AbstractBitMap) -> Self: 132 | ... 133 | 134 | def union_cardinality(self, other: AbstractBitMap) -> int: 135 | ... 136 | 137 | def intersection_cardinality(self, other: AbstractBitMap) -> int: 138 | ... 139 | 140 | def difference_cardinality(self, other: AbstractBitMap) -> int: 141 | ... 142 | 143 | def symmetric_difference_cardinality(self, other: AbstractBitMap) -> int: 144 | ... 145 | 146 | def intersect(self, other: AbstractBitMap) -> bool: 147 | ... 148 | 149 | def jaccard_index(self, other: AbstractBitMap) -> float: 150 | ... 151 | 152 | def get_statistics(self) -> _Statistics: 153 | ... 154 | 155 | def min(self) -> int: 156 | ... 157 | 158 | def max(self) -> int: 159 | ... 160 | 161 | def rank(self, value: int) -> int: 162 | ... 163 | 164 | def next_set_bit(self, value: int) -> int: 165 | ... 166 | 167 | @overload 168 | def __getitem__(self, value: int) -> int: 169 | ... 170 | 171 | @overload 172 | def __getitem__(self, value: slice) -> Self: 173 | ... 174 | 175 | def serialize(self) -> bytes: 176 | ... 177 | 178 | @classmethod 179 | def deserialize(cls, buff: bytes) -> Self: 180 | ... 181 | 182 | def __getstate__(self) -> bytes: 183 | ... 184 | 185 | def __setstate__(self, state: bytes) -> Self: 186 | ... 187 | 188 | def __sizeof__(self) -> int: 189 | ... 190 | 191 | def to_array(self) -> array.array[int]: 192 | ... 193 | 194 | 195 | class FrozenBitMap(AbstractBitMap): 196 | def __hash__(self) -> int: 197 | ... 198 | 199 | 200 | class BitMap(AbstractBitMap): 201 | def add(self, value: int) -> None: 202 | ... 203 | 204 | def add_checked(self, value: int) -> None: 205 | ... 206 | 207 | def update(self, *all_values: Iterable[int]) -> None: 208 | ... 209 | 210 | def discard(self, value: int) -> None: 211 | ... 212 | 213 | def remove(self, value: int) -> None: 214 | ... 215 | 216 | def __ior__(self, other: AbstractBitMap) -> Self: 217 | ... 218 | 219 | def __iand__(self, other: AbstractBitMap) -> Self: 220 | ... 221 | 222 | def __ixor__(self, other: AbstractBitMap) -> Self: 223 | ... 224 | 225 | def __isub__(self, other: AbstractBitMap) -> Self: 226 | ... 227 | 228 | def intersection_update(self, *all_values: Iterable[int]) -> None: 229 | ... 230 | 231 | def difference_update(self, *others: AbstractBitMap) -> None: 232 | ... 233 | 234 | def symmetric_difference_update(self, other: AbstractBitMap) -> None: 235 | ... 236 | 237 | def overwrite(self, other: AbstractBitMap) -> None: 238 | ... 239 | 240 | def clear(self) -> None: 241 | ... 242 | 243 | def pop(self) -> int: 244 | ... 245 | 246 | def flip_inplace(self, start: int, end: int) -> None: 247 | ... 248 | 249 | def add_range(self, range_start: int, range_end: int) -> None: 250 | ... 251 | 252 | def remove_range(self, range_start: int, range_end: int) -> None: 253 | ... 254 | 255 | class AbstractBitMap64: 256 | def __init__(self, values: Iterable[int] | None = None, copy_on_write: bool = False, optimize: bool = True) -> None: 257 | ... 258 | 259 | @property 260 | def copy_on_write(self) -> bool: 261 | ... 262 | 263 | def run_optimize(self) -> bool: 264 | ... 265 | 266 | def shrink_to_fit(self) -> int: 267 | ... 268 | 269 | def __contains__(self, value: int) -> bool: 270 | ... 271 | 272 | def __bool__(self) -> bool: 273 | ... 274 | 275 | def __len__(self) -> int: 276 | ... 277 | 278 | def __lt__(self, other: AbstractBitMap64) -> bool: 279 | ... 280 | 281 | def __le__(self, other: AbstractBitMap64) -> bool: 282 | ... 283 | 284 | def __eq__(self, other: object) -> bool: 285 | ... 286 | 287 | def __ne__(self, other: object) -> bool: 288 | ... 289 | 290 | def __gt__(self, other: AbstractBitMap64) -> bool: 291 | ... 292 | 293 | def __ge__(self, other: AbstractBitMap64) -> bool: 294 | ... 295 | 296 | def contains_range(self, range_start: int, range_end: int) -> bool: 297 | ... 298 | 299 | def range_cardinality(self, range_start: int, range_end: int) -> int: 300 | ... 301 | 302 | def iter_equal_or_larger(self, val: int) -> Iterator[int]: 303 | ... 304 | 305 | def __iter__(self) -> Iterator[int]: 306 | ... 307 | 308 | def flip(self, start: int, end: int) -> Self: 309 | ... 310 | 311 | def shift(self, offset: int) -> Self: 312 | ... 313 | 314 | def copy(self) -> Self: 315 | ... 316 | 317 | def isdisjoint(self, other: AbstractBitMap64) -> bool: 318 | ... 319 | 320 | def issubset(self, other: AbstractBitMap64) -> bool: 321 | ... 322 | 323 | def issuperset(self, other: AbstractBitMap64) -> bool: 324 | ... 325 | 326 | def difference(self, *bitmaps: AbstractBitMap64) -> Self: 327 | ... 328 | 329 | def symmetric_difference(self, other: AbstractBitMap64) -> Self: 330 | ... 331 | 332 | def union(self, *bitmaps: AbstractBitMap64) -> Self: 333 | ... 334 | 335 | def intersection(self, *bitmaps: AbstractBitMap64) -> Self: 336 | ... 337 | 338 | def __or__(self, other: AbstractBitMap64) -> Self: 339 | ... 340 | 341 | def __and__(self, other: AbstractBitMap64) -> Self: 342 | ... 343 | 344 | def __xor__(self, other: AbstractBitMap64) -> Self: 345 | ... 346 | 347 | def __sub__(self, other: AbstractBitMap64) -> Self: 348 | ... 349 | 350 | def union_cardinality(self, other: AbstractBitMap64) -> int: 351 | ... 352 | 353 | def intersection_cardinality(self, other: AbstractBitMap64) -> int: 354 | ... 355 | 356 | def difference_cardinality(self, other: AbstractBitMap64) -> int: 357 | ... 358 | 359 | def symmetric_difference_cardinality(self, other: AbstractBitMap64) -> int: 360 | ... 361 | 362 | def intersect(self, other: AbstractBitMap64) -> bool: 363 | ... 364 | 365 | def jaccard_index(self, other: AbstractBitMap64) -> float: 366 | ... 367 | 368 | def get_statistics(self) -> _Statistics: 369 | ... 370 | 371 | def min(self) -> int: 372 | ... 373 | 374 | def max(self) -> int: 375 | ... 376 | 377 | def rank(self, value: int) -> int: 378 | ... 379 | 380 | def next_set_bit(self, value: int) -> int: 381 | ... 382 | 383 | @overload 384 | def __getitem__(self, value: int) -> int: 385 | ... 386 | 387 | @overload 388 | def __getitem__(self, value: slice) -> Self: 389 | ... 390 | 391 | def serialize(self) -> bytes: 392 | ... 393 | 394 | @classmethod 395 | def deserialize(cls, buff: bytes) -> Self: 396 | ... 397 | 398 | def __getstate__(self) -> bytes: 399 | ... 400 | 401 | def __setstate__(self, state: bytes) -> Self: 402 | ... 403 | 404 | def __sizeof__(self) -> int: 405 | ... 406 | 407 | def to_array(self) -> array.array[int]: 408 | ... 409 | 410 | 411 | class FrozenBitMap64(AbstractBitMap64): 412 | def __hash__(self) -> int: 413 | ... 414 | 415 | 416 | class BitMap64(AbstractBitMap64): 417 | def add(self, value: int) -> None: 418 | ... 419 | 420 | def add_checked(self, value: int) -> None: 421 | ... 422 | 423 | def update(self, *all_values: Iterable[int]) -> None: 424 | ... 425 | 426 | def discard(self, value: int) -> None: 427 | ... 428 | 429 | def remove(self, value: int) -> None: 430 | ... 431 | 432 | def __ior__(self, other: AbstractBitMap64) -> Self: 433 | ... 434 | 435 | def __iand__(self, other: AbstractBitMap64) -> Self: 436 | ... 437 | 438 | def __ixor__(self, other: AbstractBitMap64) -> Self: 439 | ... 440 | 441 | def __isub__(self, other: AbstractBitMap64) -> Self: 442 | ... 443 | 444 | def intersection_update(self, *all_values: Iterable[int]) -> None: 445 | ... 446 | 447 | def difference_update(self, *others: AbstractBitMap64) -> None: 448 | ... 449 | 450 | def symmetric_difference_update(self, other: AbstractBitMap64) -> None: 451 | ... 452 | 453 | def overwrite(self, other: AbstractBitMap64) -> None: 454 | ... 455 | 456 | def clear(self) -> None: 457 | ... 458 | 459 | def pop(self) -> int: 460 | ... 461 | 462 | def flip_inplace(self, start: int, end: int) -> None: 463 | ... 464 | 465 | def add_range(self, range_start: int, range_end: int) -> None: 466 | ... 467 | 468 | def remove_range(self, range_start: int, range_end: int) -> None: 469 | ... -------------------------------------------------------------------------------- /pyroaring/bitmap.pxi: -------------------------------------------------------------------------------- 1 | cdef class BitMap(AbstractBitMap): 2 | 3 | cdef compute_hash(self): 4 | '''Unsupported method.''' 5 | # For some reason, if we directly override __hash__ (either in BitMap or in FrozenBitMap), the __richcmp__ 6 | # method disappears. 7 | raise TypeError('Cannot compute the hash of a %s.' % self.__class__.__name__) 8 | 9 | def add(self, uint32_t value): 10 | """ 11 | Add an element to the bitmap. This has no effect if the element is already present. 12 | 13 | >>> bm = BitMap() 14 | >>> bm.add(42) 15 | >>> bm 16 | BitMap([42]) 17 | >>> bm.add(42) 18 | >>> bm 19 | BitMap([42]) 20 | """ 21 | croaring.roaring_bitmap_add(self._c_bitmap, value) 22 | 23 | def add_checked(self, uint32_t value): 24 | """ 25 | Add an element to the bitmap. This raises a KeyError exception if the element is already present. 26 | 27 | >>> bm = BitMap() 28 | >>> bm.add_checked(42) 29 | >>> bm 30 | BitMap([42]) 31 | >>> bm.add_checked(42) 32 | Traceback (most recent call last): 33 | ... 34 | KeyError: 42 35 | """ 36 | cdef bool test = croaring.roaring_bitmap_add_checked(self._c_bitmap, value) 37 | if not test: 38 | raise KeyError(value) 39 | 40 | def update(self, *all_values): # FIXME could be more efficient 41 | """ 42 | Add all the given values to the bitmap. 43 | 44 | >>> bm = BitMap([3, 12]) 45 | >>> bm.update([8, 12, 55, 18]) 46 | >>> bm 47 | BitMap([3, 8, 12, 18, 55]) 48 | """ 49 | cdef vector[uint32_t] buff_vect 50 | cdef unsigned[:] buff 51 | for values in all_values: 52 | if isinstance(values, AbstractBitMap): 53 | self |= values 54 | elif isinstance(values, range): 55 | if len(values) == 0: 56 | continue 57 | _, (start, stop, step) = values.__reduce__() 58 | if step == -1: 59 | step = 1 60 | start, stop = stop+1, start+1 61 | if step == 1: 62 | self.add_range(start, stop) 63 | else: 64 | self |= AbstractBitMap(values, copy_on_write=self.copy_on_write) 65 | elif isinstance(values, array.array) and len(values) > 0: 66 | buff = values 67 | croaring.roaring_bitmap_add_many(self._c_bitmap, len(values), &buff[0]) 68 | else: 69 | try: 70 | size = len(values) 71 | except TypeError: # object has no length, creating a list 72 | values = list(values) 73 | size = len(values) 74 | if size > 0: 75 | buff_vect = values 76 | croaring.roaring_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 77 | 78 | def discard(self, uint32_t value): 79 | """ 80 | Remove an element from the bitmap. This has no effect if the element is not present. 81 | 82 | >>> bm = BitMap([3, 12]) 83 | >>> bm.discard(3) 84 | >>> bm 85 | BitMap([12]) 86 | >>> bm.discard(3) 87 | >>> bm 88 | BitMap([12]) 89 | """ 90 | croaring.roaring_bitmap_remove(self._c_bitmap, value) 91 | 92 | def remove(self, uint32_t value): 93 | """ 94 | Remove an element from the bitmap. This raises a KeyError exception if the element does not exist in the bitmap. 95 | 96 | >>> bm = BitMap([3, 12]) 97 | >>> bm.remove(3) 98 | >>> bm 99 | BitMap([12]) 100 | >>> bm.remove(3) 101 | Traceback (most recent call last): 102 | ... 103 | KeyError: 3 104 | """ 105 | cdef bool test = croaring.roaring_bitmap_remove_checked(self._c_bitmap, value) 106 | if not test: 107 | raise KeyError(value) 108 | 109 | cdef binary_iop(self, AbstractBitMap other, (void)func(croaring.roaring_bitmap_t*, const croaring.roaring_bitmap_t*) noexcept) noexcept: 110 | func(self._c_bitmap, other._c_bitmap) 111 | return self 112 | 113 | def __ior__(self, other): 114 | self._check_compatibility(other) 115 | if self._c_bitmap == (other)._c_bitmap: 116 | return self 117 | return (self).binary_iop(other, croaring.roaring_bitmap_or_inplace) 118 | 119 | def __iand__(self, other): 120 | self._check_compatibility(other) 121 | if self._c_bitmap == (other)._c_bitmap: 122 | return self 123 | return (self).binary_iop(other, croaring.roaring_bitmap_and_inplace) 124 | 125 | def __ixor__(self, other): 126 | self._check_compatibility(other) 127 | if self._c_bitmap == (other)._c_bitmap: 128 | self.clear() 129 | return self 130 | return (self).binary_iop(other, croaring.roaring_bitmap_xor_inplace) 131 | 132 | def __isub__(self, other): 133 | self._check_compatibility(other) 134 | if self._c_bitmap == (other)._c_bitmap: 135 | self.clear() 136 | return self 137 | return (self).binary_iop(other, croaring.roaring_bitmap_andnot_inplace) 138 | 139 | def intersection_update(self, *all_values): # FIXME could be more efficient 140 | """ 141 | Update the bitmap by taking its intersection with the given values. 142 | 143 | >>> bm = BitMap([3, 12]) 144 | >>> bm.intersection_update([8, 12, 55, 18]) 145 | >>> bm 146 | BitMap([12]) 147 | """ 148 | for values in all_values: 149 | if isinstance(values, AbstractBitMap): 150 | self &= values 151 | else: 152 | self &= AbstractBitMap(values, copy_on_write=self.copy_on_write) 153 | 154 | def difference_update(self, *others): 155 | """ 156 | Remove all elements of another set from this set. 157 | 158 | >>> bm = BitMap([1, 2, 3, 4, 5]) 159 | >>> bm.difference_update(BitMap([1, 2, 10]), BitMap([3, 4, 20])) 160 | >>> bm 161 | BitMap([5]) 162 | """ 163 | self.__isub__(AbstractBitMap.union(*others)) 164 | 165 | def symmetric_difference_update(self, other): 166 | """ 167 | Update a set with the symmetric difference of itself and another. 168 | 169 | >>> bm = BitMap([1, 2, 3, 4]) 170 | >>> bm.symmetric_difference_update(BitMap([1, 2, 10])) 171 | >>> bm 172 | BitMap([3, 4, 10]) 173 | 174 | """ 175 | self.__ixor__(other) 176 | 177 | def overwrite(self, AbstractBitMap other): 178 | """ 179 | Clear the bitmap and overwrite it with another. 180 | 181 | >>> bm = BitMap([3, 12]) 182 | >>> other = BitMap([4, 14]) 183 | >>> bm.overwrite(other) 184 | >>> other.remove(4) 185 | >>> bm 186 | BitMap([4, 14]) 187 | >>> other 188 | BitMap([14]) 189 | """ 190 | if self._c_bitmap == other._c_bitmap: 191 | raise ValueError('Cannot overwrite itself') 192 | croaring.roaring_bitmap_overwrite(self._c_bitmap, other._c_bitmap) 193 | 194 | def clear(self): 195 | """ 196 | Remove all elements from this set. 197 | 198 | >>> bm = BitMap([1, 2, 3]) 199 | >>> bm.clear() 200 | >>> bm 201 | BitMap([]) 202 | """ 203 | croaring.roaring_bitmap_clear(self._c_bitmap) 204 | 205 | def pop(self): 206 | """ 207 | Remove and return an arbitrary set element. 208 | Raises KeyError if the set is empty. 209 | 210 | >>> bm = BitMap([1, 2]) 211 | >>> a = bm.pop() 212 | >>> b = bm.pop() 213 | >>> bm 214 | BitMap([]) 215 | >>> bm.pop() 216 | Traceback (most recent call last): 217 | ... 218 | KeyError: 'pop from an empty BitMap' 219 | 220 | """ 221 | try: 222 | value = self.min() 223 | except ValueError: 224 | raise KeyError('pop from an empty BitMap') 225 | self.remove(value) 226 | return value 227 | 228 | 229 | def flip_inplace(self, uint64_t start, uint64_t end): 230 | """ 231 | Compute (in place) the negation of the bitmap within the specified interval. 232 | 233 | Areas outside the range are passed unchanged. 234 | 235 | >>> bm = BitMap([3, 12]) 236 | >>> bm.flip_inplace(10, 15) 237 | >>> bm 238 | BitMap([3, 10, 11, 13, 14]) 239 | """ 240 | croaring.roaring_bitmap_flip_inplace(self._c_bitmap, start, end) 241 | 242 | def add_range(self, uint64_t range_start, uint64_t range_end): 243 | """ 244 | Add a range of values from range_start (included) to range_end (excluded). 245 | 246 | >>> bm = BitMap([5, 7]) 247 | >>> bm.add_range(6, 9) 248 | >>> bm 249 | BitMap([5, 6, 7, 8]) 250 | """ 251 | if range_end <= range_start or range_end == 0 or range_start >= 2**32: 252 | return 253 | if range_end >= 2**32: 254 | range_end = 2**32 255 | croaring.roaring_bitmap_add_range(self._c_bitmap, range_start, range_end) 256 | 257 | def remove_range(self, uint64_t range_start, uint64_t range_end): 258 | """ 259 | Remove a range of values from range_start (included) to range_end (excluded). 260 | 261 | >>> bm = BitMap([5, 6, 7, 8, 9, 10]) 262 | >>> bm.remove_range(6, 9) 263 | >>> bm 264 | BitMap([5, 9, 10]) 265 | """ 266 | if range_end <= range_start or range_end == 0 or range_start >= 2**32: 267 | return 268 | if range_end >= 2**32: 269 | range_end = 2**32 270 | croaring.roaring_bitmap_remove_range(self._c_bitmap, range_start, range_end) 271 | 272 | cdef class BitMap64(AbstractBitMap64): 273 | 274 | cdef compute_hash(self): 275 | '''Unsupported method.''' 276 | # For some reason, if we directly override __hash__ (either in BitMap or in FrozenBitMap), the __richcmp__ 277 | # method disappears. 278 | raise TypeError('Cannot compute the hash of a %s.' % self.__class__.__name__) 279 | 280 | def add(self, uint64_t value): 281 | """ 282 | Add an element to the bitmap. This has no effect if the element is already present. 283 | 284 | >>> bm = BitMap64() 285 | >>> bm.add(42) 286 | >>> bm 287 | BitMap64([42]) 288 | >>> bm.add(42) 289 | >>> bm 290 | BitMap64([42]) 291 | """ 292 | croaring.roaring64_bitmap_add(self._c_bitmap, value) 293 | 294 | def add_checked(self, uint64_t value): 295 | """ 296 | Add an element to the bitmap. This raises a KeyError exception if the element is already present. 297 | 298 | >>> bm = BitMap64() 299 | >>> bm.add_checked(42) 300 | >>> bm 301 | BitMap64([42]) 302 | >>> bm.add_checked(42) 303 | Traceback (most recent call last): 304 | ... 305 | KeyError: 42 306 | """ 307 | cdef bool test = croaring.roaring64_bitmap_add_checked(self._c_bitmap, value) 308 | if not test: 309 | raise KeyError(value) 310 | 311 | def update(self, *all_values): # FIXME could be more efficient 312 | """ 313 | Add all the given values to the bitmap. 314 | 315 | >>> bm = BitMap64([3, 12]) 316 | >>> bm.update([8, 12, 55, 18]) 317 | >>> bm 318 | BitMap64([3, 8, 12, 18, 55]) 319 | """ 320 | cdef vector[uint64_t] buff_vect 321 | cdef uint64_t[:] buff 322 | for values in all_values: 323 | if isinstance(values, AbstractBitMap64): 324 | self |= values 325 | elif isinstance(values, range): 326 | if len(values) == 0: 327 | continue 328 | _, (start, stop, step) = values.__reduce__() 329 | if step == -1: 330 | step = 1 331 | start, stop = stop+1, start+1 332 | if step == 1: 333 | self.add_range(start, stop) 334 | else: 335 | self |= AbstractBitMap64(values) 336 | elif isinstance(values, array.array) and len(values) > 0: 337 | buff = values 338 | croaring.roaring64_bitmap_add_many(self._c_bitmap, len(values), &buff[0]) 339 | else: 340 | try: 341 | size = len(values) 342 | except TypeError: # object has no length, creating a list 343 | values = list(values) 344 | size = len(values) 345 | if size > 0: 346 | buff_vect = values 347 | croaring.roaring64_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 348 | 349 | def discard(self, uint64_t value): 350 | """ 351 | Remove an element from the bitmap. This has no effect if the element is not present. 352 | 353 | >>> bm = BitMap64([3, 12]) 354 | >>> bm.discard(3) 355 | >>> bm 356 | BitMap64([12]) 357 | >>> bm.discard(3) 358 | >>> bm 359 | BitMap64([12]) 360 | """ 361 | croaring.roaring64_bitmap_remove(self._c_bitmap, value) 362 | 363 | def remove(self, uint64_t value): 364 | """ 365 | Remove an element from the bitmap. This raises a KeyError exception if the element does not exist in the bitmap. 366 | 367 | >>> bm = BitMap64([3, 12]) 368 | >>> bm.remove(3) 369 | >>> bm 370 | BitMap64([12]) 371 | >>> bm.remove(3) 372 | Traceback (most recent call last): 373 | ... 374 | KeyError: 3 375 | """ 376 | cdef bool test = croaring.roaring64_bitmap_remove_checked(self._c_bitmap, value) 377 | if not test: 378 | raise KeyError(value) 379 | 380 | cdef binary_iop(self, AbstractBitMap64 other, (void)func(croaring.roaring64_bitmap_t*, const croaring.roaring64_bitmap_t*) noexcept) noexcept: 381 | func(self._c_bitmap, other._c_bitmap) 382 | return self 383 | 384 | def __ior__(self, other): 385 | if self._c_bitmap == (other)._c_bitmap: 386 | return self 387 | return (self).binary_iop(other, croaring.roaring64_bitmap_or_inplace) 388 | 389 | def __iand__(self, other): 390 | if self._c_bitmap == (other)._c_bitmap: 391 | return self 392 | return (self).binary_iop(other, croaring.roaring64_bitmap_and_inplace) 393 | 394 | def __ixor__(self, other): 395 | if self._c_bitmap == (other)._c_bitmap: 396 | self.clear() 397 | return self 398 | return (self).binary_iop(other, croaring.roaring64_bitmap_xor_inplace) 399 | 400 | def __isub__(self, other): 401 | if self._c_bitmap == (other)._c_bitmap: 402 | self.clear() 403 | return self 404 | return (self).binary_iop(other, croaring.roaring64_bitmap_andnot_inplace) 405 | 406 | def intersection_update(self, *all_values): # FIXME could be more efficient 407 | """ 408 | Update the bitmap by taking its intersection with the given values. 409 | 410 | >>> bm = BitMap64([3, 12]) 411 | >>> bm.intersection_update([8, 12, 55, 18]) 412 | >>> bm 413 | BitMap64([12]) 414 | """ 415 | for values in all_values: 416 | if isinstance(values, AbstractBitMap64): 417 | self &= values 418 | else: 419 | self &= AbstractBitMap64(values) 420 | 421 | def difference_update(self, *others): 422 | """ 423 | Remove all elements of another set from this set. 424 | 425 | >>> bm = BitMap64([1, 2, 3, 4, 5]) 426 | >>> bm.difference_update(BitMap64([1, 2, 10]), BitMap64([3, 4, 20])) 427 | >>> bm 428 | BitMap64([5]) 429 | """ 430 | self.__isub__(AbstractBitMap64.union(*others)) 431 | 432 | def symmetric_difference_update(self, other): 433 | """ 434 | Update a set with the symmetric difference of itself and another. 435 | 436 | >>> bm = BitMap64([1, 2, 3, 4]) 437 | >>> bm.symmetric_difference_update(BitMap64([1, 2, 10])) 438 | >>> bm 439 | BitMap64([3, 4, 10]) 440 | 441 | """ 442 | self.__ixor__(other) 443 | 444 | def clear(self): 445 | """ 446 | Remove all elements from this set. 447 | 448 | >>> bm = BitMap64([1, 2, 3]) 449 | >>> bm.clear() 450 | >>> bm 451 | BitMap64([]) 452 | """ 453 | self.__iand__(BitMap64()) 454 | 455 | def pop(self): 456 | """ 457 | Remove and return an arbitrary set element. 458 | Raises KeyError if the set is empty. 459 | 460 | >>> bm = BitMap64([1, 2]) 461 | >>> a = bm.pop() 462 | >>> b = bm.pop() 463 | >>> bm 464 | BitMap64([]) 465 | >>> bm.pop() 466 | Traceback (most recent call last): 467 | ... 468 | KeyError: 'pop from an empty BitMap64' 469 | 470 | """ 471 | try: 472 | value = self.min() 473 | except ValueError: 474 | raise KeyError('pop from an empty BitMap64') 475 | self.remove(value) 476 | return value 477 | 478 | def add_range(self, uint64_t range_start, uint64_t range_end): 479 | """ 480 | Add a range of values from range_start (included) to range_end (excluded). 481 | 482 | >>> bm = BitMap64([5, 7]) 483 | >>> bm.add_range(6, 9) 484 | >>> bm 485 | BitMap64([5, 6, 7, 8]) 486 | """ 487 | if range_end <= range_start or range_end == 0: 488 | return 489 | croaring.roaring64_bitmap_add_range(self._c_bitmap, range_start, range_end) 490 | 491 | def flip_inplace(self, uint64_t start, uint64_t end): 492 | """ 493 | Compute (in place) the negation of the bitmap within the specified interval. 494 | 495 | Areas outside the range are passed unchanged. 496 | 497 | >>> bm = BitMap64([3, 12]) 498 | >>> bm.flip_inplace(10, 15) 499 | >>> bm 500 | BitMap64([3, 10, 11, 13, 14]) 501 | """ 502 | croaring.roaring64_bitmap_flip_inplace(self._c_bitmap, start, end) 503 | 504 | def remove_range(self, uint64_t range_start, uint64_t range_end): 505 | """ 506 | Remove a range of values from range_start (included) to range_end (excluded). 507 | 508 | >>> bm = BitMap64([5, 6, 7, 8, 9, 10]) 509 | >>> bm.remove_range(6, 9) 510 | >>> bm 511 | BitMap64([5, 9, 10]) 512 | """ 513 | if range_end <= range_start or range_end == 0: 514 | return 515 | croaring.roaring64_bitmap_remove_range(self._c_bitmap, range_start, range_end) -------------------------------------------------------------------------------- /pyroaring/abstract_bitmap.pxi: -------------------------------------------------------------------------------- 1 | cimport croaring 2 | from libc.stdint cimport uint32_t, uint64_t, int64_t 3 | from libcpp cimport bool 4 | from libcpp.vector cimport vector 5 | from libc.stdlib cimport free, malloc 6 | 7 | from cpython cimport array 8 | import array 9 | 10 | try: 11 | range = xrange 12 | except NameError: # python 3 13 | pass 14 | 15 | 16 | cdef croaring.roaring_bitmap_t *deserialize_ptr(const unsigned char[:] buff): 17 | cdef croaring.roaring_bitmap_t *ptr 18 | cdef const char *reason_failure = NULL 19 | 20 | cdef char* buffer_ptr = &buff[0] 21 | 22 | buff_size = len(buff) 23 | ptr = croaring.roaring_bitmap_portable_deserialize_safe(buffer_ptr, buff_size) 24 | 25 | if ptr == NULL: 26 | raise ValueError("Could not deserialize bitmap") 27 | # Validate the bitmap 28 | if not croaring.roaring_bitmap_internal_validate(ptr, &reason_failure): 29 | # If validation fails, free the bitmap and raise an exception 30 | croaring.roaring_bitmap_free(ptr) 31 | raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}") 32 | return ptr 33 | 34 | cdef croaring.roaring64_bitmap_t *deserialize64_ptr(const unsigned char[:] buff): 35 | cdef croaring.roaring64_bitmap_t *ptr 36 | cdef const char *reason_failure = NULL 37 | 38 | cdef char* buffer_ptr = &buff[0] 39 | 40 | buff_size = len(buff) 41 | ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buffer_ptr, buff_size) 42 | if ptr == NULL: 43 | raise ValueError("Could not deserialize bitmap") 44 | # Validate the bitmap 45 | if not croaring.roaring64_bitmap_internal_validate(ptr, &reason_failure): 46 | # If validation fails, free the bitmap and raise an exception 47 | croaring.roaring64_bitmap_free(ptr) 48 | raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}") 49 | return ptr 50 | 51 | def _string_rep(bm): 52 | skip_rows = len(bm) > 500 #this is the cutoff number for the truncating to kick in. 53 | table_max_width = 80 # this isn't the length of the entire output, it's only for the numeric part 54 | num_lines_if_skipping = 5 # the number of lines to show in the beginning and the end when output is being truncated 55 | 56 | head = bm.__class__.__name__ + '([' 57 | row_start_buffer = ' ' * len(head) 58 | tail = '])' 59 | 60 | try: 61 | maxval = bm.max() 62 | except ValueError: 63 | # empty bitmap 64 | return head + tail 65 | 66 | element_max_length = len(str(maxval)) 67 | column_width = element_max_length + 2 68 | 69 | num_columns = table_max_width // column_width 70 | 71 | num_rows = (len(bm) + num_columns - 1) // num_columns 72 | rows = [] 73 | row_idx = 0 74 | skipped = False 75 | while row_idx < num_rows: 76 | row_ints = bm[row_idx * num_columns:(row_idx + 1) * num_columns] 77 | 78 | line = [] 79 | for i in row_ints: 80 | s = str(i) 81 | if num_rows == 1: 82 | # no padding if all numbers fit on a single line 83 | line.append(s) 84 | else: 85 | line.append(' ' * (element_max_length - len(s)) + s) 86 | 87 | if row_idx == 0: 88 | prefix = head 89 | else: 90 | prefix = row_start_buffer 91 | rows.append(prefix + ', '.join(line) + ',') 92 | row_idx += 1 93 | if skip_rows and not skipped and row_idx >= num_lines_if_skipping: 94 | rows.append((' ' * ((table_max_width + len(head)) // 2)) + '...') 95 | skipped = True 96 | row_idx = num_rows - num_lines_if_skipping 97 | 98 | rows[-1] = rows[-1].rstrip(',') # remove trailing comma from the last line 99 | return '\n'.join(rows) + tail 100 | 101 | cdef class AbstractBitMap: 102 | """ 103 | An efficient and light-weight ordered set of 32 bits integers. 104 | """ 105 | cdef croaring.roaring_bitmap_t* _c_bitmap 106 | cdef int64_t _h_val 107 | 108 | def __cinit__(self, values=None, copy_on_write=False, optimize=True, no_init=False): 109 | if no_init: 110 | assert values is None and not copy_on_write 111 | return 112 | cdef vector[uint32_t] buff_vect 113 | cdef unsigned[:] buff 114 | if values is None: 115 | self._c_bitmap = croaring.roaring_bitmap_create() 116 | elif isinstance(values, AbstractBitMap): 117 | self._c_bitmap = croaring.roaring_bitmap_copy((values)._c_bitmap) 118 | self._h_val = (values)._h_val 119 | elif isinstance(values, range): 120 | _, (start, stop, step) = values.__reduce__() 121 | if step < 0: 122 | values = range(min(values), max(values)+1, -step) 123 | _, (start, stop, step) = values.__reduce__() 124 | if start >= stop: 125 | self._c_bitmap = croaring.roaring_bitmap_create() 126 | else: 127 | self._c_bitmap = croaring.roaring_bitmap_from_range(start, stop, step) 128 | elif isinstance(values, array.array): 129 | size = len(values) 130 | if size == 0: 131 | self._c_bitmap = croaring.roaring_bitmap_create() 132 | else: 133 | buff = values 134 | self._c_bitmap = croaring.roaring_bitmap_of_ptr(size, &buff[0]) 135 | else: 136 | try: 137 | size = len(values) 138 | except TypeError: # object has no length, creating a list 139 | values = list(values) 140 | size = len(values) 141 | self._c_bitmap = croaring.roaring_bitmap_create() 142 | if size > 0: 143 | buff_vect = values 144 | croaring.roaring_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 145 | if not isinstance(values, AbstractBitMap): 146 | croaring.roaring_bitmap_set_copy_on_write(self._c_bitmap, copy_on_write) 147 | self._h_val = 0 148 | if optimize: 149 | self.run_optimize() 150 | self.shrink_to_fit() 151 | 152 | def __init__(self, values=None, copy_on_write=False, optimize=True): 153 | """ 154 | Construct a AbstractBitMap object, either empry or from an iterable. 155 | 156 | Copy on write can be enabled with the field copy_on_write. 157 | 158 | >>> BitMap() 159 | BitMap([]) 160 | >>> BitMap([1, 123456789, 27]) 161 | BitMap([1, 27, 123456789]) 162 | >>> BitMap([1, 123456789, 27], copy_on_write=True) 163 | BitMap([1, 27, 123456789]) 164 | """ 165 | 166 | cdef from_ptr(self, croaring.roaring_bitmap_t *ptr) noexcept: 167 | """ 168 | Return an instance of AbstractBitMap (or one of its subclasses) initialized with the given pointer. 169 | 170 | FIXME: this should be a classmethod, but this is (currently) impossible for cdef methods. 171 | See https://groups.google.com/forum/#!topic/cython-users/FLHiLzzKqj4 172 | """ 173 | bm = self.__class__.__new__(self.__class__, no_init=True) 174 | (bm)._c_bitmap = ptr 175 | return bm 176 | 177 | @property 178 | def copy_on_write(self): 179 | """ 180 | True if and only if the bitmap has "copy on write" optimization enabled. 181 | 182 | >>> BitMap(copy_on_write=False).copy_on_write 183 | False 184 | >>> BitMap(copy_on_write=True).copy_on_write 185 | True 186 | """ 187 | return croaring.roaring_bitmap_get_copy_on_write(self._c_bitmap) 188 | 189 | def run_optimize(self): 190 | return croaring.roaring_bitmap_run_optimize(self._c_bitmap) 191 | 192 | def shrink_to_fit(self): 193 | return croaring.roaring_bitmap_shrink_to_fit(self._c_bitmap) 194 | 195 | def __dealloc__(self): 196 | if self._c_bitmap is not NULL: 197 | croaring.roaring_bitmap_free(self._c_bitmap) 198 | 199 | def _check_compatibility(self, AbstractBitMap other): 200 | if other is None: 201 | raise TypeError('Argument has incorrect type (expected pyroaring.AbstractBitMap, got None)') 202 | if self.copy_on_write != other.copy_on_write: 203 | raise ValueError('Cannot have interactions between bitmaps with and without copy_on_write.\n') 204 | 205 | def __contains__(self, uint32_t value): 206 | return croaring.roaring_bitmap_contains(self._c_bitmap, value) 207 | 208 | def __bool__(self): 209 | return not croaring.roaring_bitmap_is_empty(self._c_bitmap) 210 | 211 | def __len__(self): 212 | return croaring.roaring_bitmap_get_cardinality(self._c_bitmap) 213 | 214 | def __lt__(self, AbstractBitMap other): 215 | self._check_compatibility(other) 216 | return croaring.roaring_bitmap_is_strict_subset((self)._c_bitmap, (other)._c_bitmap) 217 | 218 | def __le__(self, AbstractBitMap other): 219 | self._check_compatibility(other) 220 | return croaring.roaring_bitmap_is_subset((self)._c_bitmap, (other)._c_bitmap) 221 | 222 | def __eq__(self, object other): 223 | if not isinstance(other, AbstractBitMap): 224 | return NotImplemented 225 | self._check_compatibility(other) 226 | return croaring.roaring_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 227 | 228 | def __ne__(self, object other): 229 | if not isinstance(other, AbstractBitMap): 230 | return NotImplemented 231 | self._check_compatibility(other) 232 | return not croaring.roaring_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 233 | 234 | def __gt__(self, AbstractBitMap other): 235 | self._check_compatibility(other) 236 | return croaring.roaring_bitmap_is_strict_subset((other)._c_bitmap, (self)._c_bitmap) 237 | 238 | def __ge__(self, AbstractBitMap other): 239 | self._check_compatibility(other) 240 | return croaring.roaring_bitmap_is_subset((other)._c_bitmap, (self)._c_bitmap) 241 | 242 | def contains_range(self, uint64_t range_start, uint64_t range_end): 243 | """ 244 | Check whether a range of values from range_start (included) to range_end (excluded) is present. 245 | 246 | >>> bm = BitMap([5, 6, 7, 8, 9, 10]) 247 | >>> bm.contains_range(6, 9) 248 | True 249 | >>> bm.contains_range(8, 12) 250 | False 251 | """ 252 | if range_end <= range_start or range_end == 0 or range_start >= 2**32: 253 | return True # empty range 254 | if range_end >= 2**32: 255 | range_end = 2**32 256 | return croaring.roaring_bitmap_contains_range(self._c_bitmap, range_start, range_end) 257 | 258 | def range_cardinality(self, uint64_t range_start, uint64_t range_end): 259 | """ 260 | Return cardinality from range_start (included) to range_end (excluded). 261 | 262 | >>> bm = BitMap(range(10)) 263 | >>> bm.range_cardinality(0, 10) 264 | 10 265 | >>> bm.range_cardinality(10, 100) 266 | 0 267 | """ 268 | if range_end < range_start: 269 | raise AssertionError('range_end must not be lower than range_start') 270 | return croaring.roaring_bitmap_range_cardinality(self._c_bitmap, range_start, range_end) 271 | 272 | cdef compute_hash(self): 273 | cdef int64_t h_val = 0 274 | cdef uint32_t i, count, max_count=256 275 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 276 | cdef uint32_t *buff = malloc(max_count*4) 277 | while True: 278 | count = croaring.roaring_uint32_iterator_read(iterator, buff, max_count) 279 | i = 0 280 | while i < count: 281 | h_val = (h_val << 2) + buff[i] + 1 282 | # TODO find a good hash formula 283 | # This one should be better, but is too long: 284 | # h_val = ((h_val<<16) + buff[i]) % 1748104473534059 285 | i += 1 286 | if count != max_count: 287 | break 288 | croaring.roaring_uint32_iterator_free(iterator) 289 | free(buff) 290 | if not self: 291 | return -1 292 | return h_val 293 | 294 | def __hash__(self): 295 | if self._h_val == 0: 296 | self._h_val = self.compute_hash() 297 | return self._h_val 298 | 299 | def iter_equal_or_larger(self, uint32_t val): 300 | """ 301 | Iterate over items in the bitmap equal or larger than a given value. 302 | 303 | >>> bm = BitMap([1, 2, 4]) 304 | >>> list(bm.iter_equal_or_larger(2)) 305 | [2, 4] 306 | """ 307 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 308 | valid = croaring.roaring_uint32_iterator_move_equalorlarger(iterator, val) 309 | if not valid: 310 | return 311 | try: 312 | while iterator.has_value: 313 | yield iterator.current_value 314 | croaring.roaring_uint32_iterator_advance(iterator) 315 | finally: 316 | croaring.roaring_uint32_iterator_free(iterator) 317 | 318 | def __iter__(self): 319 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 320 | try: 321 | while iterator.has_value: 322 | yield iterator.current_value 323 | croaring.roaring_uint32_iterator_advance(iterator) 324 | finally: 325 | croaring.roaring_uint32_iterator_free(iterator) 326 | 327 | def __repr__(self): 328 | return str(self) 329 | 330 | def __str__(self): 331 | return _string_rep(self) 332 | 333 | def flip(self, uint64_t start, uint64_t end): 334 | """ 335 | Compute the negation of the bitmap within the specified interval. 336 | 337 | Areas outside the range are passed unchanged. 338 | 339 | >>> bm = BitMap([3, 12]) 340 | >>> bm.flip(10, 15) 341 | BitMap([3, 10, 11, 13, 14]) 342 | """ 343 | return self.from_ptr(croaring.roaring_bitmap_flip(self._c_bitmap, start, end)) 344 | 345 | def shift(self, int64_t offset): 346 | """ 347 | Add the value 'offset' to each and every value of the bitmap. 348 | 349 | If offset + element is outside of the range [0,2^32), that the element will be dropped. 350 | 351 | >>> bm = BitMap([3, 12]) 352 | >>> bm.shift(21) 353 | BitMap([24, 33]) 354 | """ 355 | return self.from_ptr(croaring.roaring_bitmap_add_offset(self._c_bitmap, offset)) 356 | 357 | def copy(self): 358 | """ 359 | Return a copy of a set. 360 | 361 | >>> bm = BitMap([3, 12]) 362 | >>> bm2 = bm.copy() 363 | >>> bm == bm2 364 | True 365 | >>> bm.add(1) 366 | >>> bm == bm2 367 | False 368 | 369 | """ 370 | return self.__class__(self) 371 | 372 | def isdisjoint(self, other): 373 | """ 374 | Return True if two sets have a null intersection. 375 | 376 | >>> BitMap([1, 2]).isdisjoint(BitMap([3, 4])) 377 | True 378 | 379 | >>> BitMap([1, 2, 3]).isdisjoint(BitMap([3, 4])) 380 | False 381 | 382 | """ 383 | return self.intersection_cardinality(other) == 0 384 | 385 | def issubset(self, other): 386 | """ 387 | Report whether another set contains this set. 388 | 389 | >>> BitMap([1, 2]).issubset(BitMap([1, 2, 3, 4])) 390 | True 391 | 392 | >>> BitMap([1, 2]).issubset(BitMap([3, 4])) 393 | False 394 | 395 | """ 396 | return self <= other 397 | 398 | def issuperset(self, other): 399 | """ 400 | Report whether this set contains another set. 401 | 402 | >>> BitMap([1, 2, 3, 4]).issuperset(BitMap([1, 2])) 403 | True 404 | 405 | >>> BitMap([1, 2]).issuperset(BitMap([3, 4])) 406 | False 407 | 408 | """ 409 | return self >= other 410 | 411 | def difference(*bitmaps): 412 | """ 413 | Return the difference of two or more sets as a new set. 414 | 415 | (i.e. all elements that are in this set but not the others.) 416 | 417 | >>> BitMap.difference(BitMap([1, 2, 3]), BitMap([2, 20]), BitMap([3, 30])) 418 | BitMap([1]) 419 | 420 | """ 421 | size = len(bitmaps) 422 | cdef AbstractBitMap result, bm 423 | if size <= 1: 424 | return bitmaps[0].copy() 425 | elif size == 2: 426 | return bitmaps[0] - bitmaps[1] 427 | else: 428 | result = BitMap(bitmaps[0]) 429 | result._h_val = 0 430 | for bm in bitmaps[1:]: 431 | result -= bm 432 | return bitmaps[0].__class__(result) 433 | 434 | 435 | def symmetric_difference(self, other): 436 | """ 437 | Return the symmetric difference of two sets as a new set. 438 | 439 | (i.e. all elements that are in exactly one of the sets.) 440 | 441 | >>> BitMap([1, 2, 3]).symmetric_difference(BitMap([2, 3, 4])) 442 | BitMap([1, 4]) 443 | """ 444 | return self.__xor__(other) 445 | 446 | def union(*bitmaps): 447 | """ 448 | Return the union of the bitmaps. 449 | 450 | >>> BitMap.union(BitMap([3, 12]), BitMap([5]), BitMap([0, 10, 12])) 451 | BitMap([0, 3, 5, 10, 12]) 452 | """ 453 | size = len(bitmaps) 454 | cdef croaring.roaring_bitmap_t *result 455 | cdef AbstractBitMap bm 456 | cdef vector[const croaring.roaring_bitmap_t*] buff 457 | if size <= 1: 458 | return bitmaps[0].copy() 459 | elif size == 2: 460 | return bitmaps[0] | bitmaps[1] 461 | else: 462 | for bm in bitmaps: 463 | bitmaps[0]._check_compatibility(bm) 464 | buff.push_back(bm._c_bitmap) 465 | result = croaring.roaring_bitmap_or_many(size, &buff[0]) 466 | return (bitmaps[0].__class__()).from_ptr(result) # FIXME to change when from_ptr is a classmethod 467 | 468 | def intersection(*bitmaps): # FIXME could be more efficient 469 | """ 470 | Return the intersection of the bitmaps. 471 | 472 | >>> BitMap.intersection(BitMap(range(0, 15)), BitMap(range(5, 20)), BitMap(range(10, 25))) 473 | BitMap([10, 11, 12, 13, 14]) 474 | """ 475 | size = len(bitmaps) 476 | cdef AbstractBitMap result, bm 477 | if size <= 1: 478 | return bitmaps[0].copy() 479 | elif size == 2: 480 | return bitmaps[0] & bitmaps[1] 481 | else: 482 | result = BitMap(bitmaps[0]) 483 | result._h_val = 0 484 | for bm in bitmaps[1:]: 485 | result &= bm 486 | return bitmaps[0].__class__(result) 487 | 488 | cdef binary_op(self, AbstractBitMap other, (croaring.roaring_bitmap_t*)func(const croaring.roaring_bitmap_t*, const croaring.roaring_bitmap_t*) noexcept) noexcept: 489 | cdef croaring.roaring_bitmap_t *r = func(self._c_bitmap, other._c_bitmap) 490 | return self.from_ptr(r) 491 | 492 | def __or__(self, other): 493 | self._check_compatibility(other) 494 | return (self).binary_op(other, croaring.roaring_bitmap_or) 495 | 496 | def __and__(self, other): 497 | self._check_compatibility(other) 498 | return (self).binary_op(other, croaring.roaring_bitmap_and) 499 | 500 | def __xor__(self, other): 501 | self._check_compatibility(other) 502 | return (self).binary_op(other, croaring.roaring_bitmap_xor) 503 | 504 | def __sub__(self, other): 505 | self._check_compatibility(other) 506 | return (self).binary_op(other, croaring.roaring_bitmap_andnot) 507 | 508 | def union_cardinality(self, AbstractBitMap other): 509 | """ 510 | Return the number of elements in the union of the two bitmaps. 511 | 512 | It is equivalent to len(self | other), but faster. 513 | 514 | >>> BitMap([3, 12]).union_cardinality(AbstractBitMap([3, 5, 8])) 515 | 4 516 | """ 517 | self._check_compatibility(other) 518 | return croaring.roaring_bitmap_or_cardinality(self._c_bitmap, other._c_bitmap) 519 | 520 | def intersection_cardinality(self, AbstractBitMap other): 521 | """ 522 | Return the number of elements in the intersection of the two bitmaps. 523 | 524 | It is equivalent to len(self & other), but faster. 525 | 526 | >>> BitMap([3, 12]).intersection_cardinality(BitMap([3, 5, 8])) 527 | 1 528 | """ 529 | self._check_compatibility(other) 530 | return croaring.roaring_bitmap_and_cardinality(self._c_bitmap, other._c_bitmap) 531 | 532 | def difference_cardinality(self, AbstractBitMap other): 533 | """ 534 | Return the number of elements in the difference of the two bitmaps. 535 | 536 | It is equivalent to len(self - other), but faster. 537 | 538 | >>> BitMap([3, 12]).difference_cardinality(BitMap([3, 5, 8])) 539 | 1 540 | """ 541 | self._check_compatibility(other) 542 | return croaring.roaring_bitmap_andnot_cardinality(self._c_bitmap, other._c_bitmap) 543 | 544 | def symmetric_difference_cardinality(self, AbstractBitMap other): 545 | """ 546 | Return the number of elements in the symmetric difference of the two bitmaps. 547 | 548 | It is equivalent to len(self ^ other), but faster. 549 | 550 | >>> BitMap([3, 12]).symmetric_difference_cardinality(BitMap([3, 5, 8])) 551 | 3 552 | """ 553 | self._check_compatibility(other) 554 | return croaring.roaring_bitmap_xor_cardinality(self._c_bitmap, other._c_bitmap) 555 | 556 | def intersect(self, AbstractBitMap other): 557 | """ 558 | Return True if and only if the two bitmaps have elements in common. 559 | 560 | It is equivalent to len(self & other) > 0, but faster. 561 | 562 | >>> BitMap([3, 12]).intersect(BitMap([3, 18])) 563 | True 564 | >>> BitMap([3, 12]).intersect(BitMap([5, 18])) 565 | False 566 | """ 567 | self._check_compatibility(other) 568 | return croaring.roaring_bitmap_intersect(self._c_bitmap, other._c_bitmap) 569 | 570 | def jaccard_index(self, AbstractBitMap other): 571 | """ 572 | Compute the Jaccard index of the two bitmaps. 573 | 574 | It is equivalent to len(self&other)/len(self|other), but faster. 575 | See https://en.wikipedia.org/wiki/Jaccard_index 576 | 577 | >>> BitMap([3, 10, 12]).jaccard_index(BitMap([3, 18])) 578 | 0.25 579 | """ 580 | self._check_compatibility(other) 581 | return croaring.roaring_bitmap_jaccard_index(self._c_bitmap, other._c_bitmap) 582 | 583 | def get_statistics(self): 584 | """ 585 | Return relevant metrics about the bitmap. 586 | 587 | >>> stats = BitMap(range(18, 66000, 2)).get_statistics() 588 | >>> stats['cardinality'] 589 | 32991 590 | >>> stats['max_value'] 591 | 65998 592 | >>> stats['min_value'] 593 | 18 594 | >>> stats['n_array_containers'] 595 | 1 596 | >>> stats['n_bitset_containers'] 597 | 1 598 | >>> stats['n_bytes_array_containers'] 599 | 464 600 | >>> stats['n_bytes_bitset_containers'] 601 | 8192 602 | >>> stats['n_bytes_run_containers'] 603 | 0 604 | >>> stats['n_containers'] 605 | 2 606 | >>> stats['n_run_containers'] 607 | 0 608 | >>> stats['n_values_array_containers'] 609 | 232 610 | >>> stats['n_values_bitset_containers'] 611 | 32759 612 | >>> stats['n_values_run_containers'] 613 | 0 614 | >>> stats['sum_value'] 615 | 0 616 | """ 617 | cdef croaring.roaring_statistics_t stat 618 | croaring.roaring_bitmap_statistics(self._c_bitmap, &stat) 619 | return stat 620 | 621 | def min(self): 622 | """ 623 | Return the minimum element of the bitmap. 624 | 625 | It is equivalent to min(self), but faster. 626 | 627 | >>> BitMap([3, 12]).min() 628 | 3 629 | """ 630 | if len(self) == 0: 631 | raise ValueError('Empty roaring bitmap, there is no minimum.') 632 | else: 633 | return croaring.roaring_bitmap_minimum(self._c_bitmap) 634 | 635 | def max(self): 636 | """ 637 | Return the maximum element of the bitmap. 638 | 639 | It is equivalent to max(self), but faster. 640 | 641 | >>> BitMap([3, 12]).max() 642 | 12 643 | """ 644 | if len(self) == 0: 645 | raise ValueError('Empty roaring bitmap, there is no maximum.') 646 | else: 647 | return croaring.roaring_bitmap_maximum(self._c_bitmap) 648 | 649 | def rank(self, uint32_t value): 650 | """ 651 | Return the rank of the element in the bitmap. 652 | 653 | >>> BitMap([3, 12]).rank(12) 654 | 2 655 | """ 656 | return croaring.roaring_bitmap_rank(self._c_bitmap, value) 657 | 658 | def next_set_bit(self, uint32_t value): 659 | """ 660 | Return the next set bit larger or equal to the given value. 661 | 662 | >>> BitMap([1, 2, 4]).next_set_bit(1) 663 | 1 664 | 665 | >>> BitMap([1, 2, 4]).next_set_bit(3) 666 | 4 667 | 668 | >>> BitMap([1, 2, 4]).next_set_bit(5) 669 | Traceback (most recent call last): 670 | ValueError: No value larger or equal to specified value. 671 | """ 672 | try: 673 | return next(self.iter_equal_or_larger(value)) 674 | except StopIteration: 675 | raise ValueError('No value larger or equal to specified value.') 676 | 677 | cdef int64_t _shift_index(self, int64_t index) except -1: 678 | cdef int64_t size = len(self) 679 | if index >= size or index < -size: 680 | raise IndexError('Index out of bound') 681 | if index < 0: 682 | return (index + size) 683 | else: 684 | return index 685 | 686 | cdef uint32_t _get_elt(self, int64_t index) except? 0: 687 | cdef uint64_t s_index = self._shift_index(index) 688 | cdef uint32_t elt 689 | cdef bool valid = croaring.roaring_bitmap_select(self._c_bitmap, s_index, &elt) 690 | if not valid: 691 | raise ValueError('Invalid rank') 692 | return elt 693 | 694 | cdef _get_slice(self, sl): 695 | """For a faster computation, different methods, depending on the slice.""" 696 | start, stop, step = sl.indices(len(self)) 697 | sign = 1 if step > 0 else -1 698 | if (sign > 0 and start >= stop) or (sign < 0 and start <= stop): # empty chunk 699 | return self.__class__() 700 | r = range(start, stop, step) 701 | assert len(r) > 0 702 | first_elt = self._get_elt(start) 703 | last_elt = self._get_elt(stop-sign) 704 | values = range(first_elt, last_elt+sign, step) 705 | if abs(step) == 1 and len(values) <= len(self) / 100: # contiguous and small chunk of the bitmap 706 | return self & self.__class__(values, copy_on_write=self.copy_on_write) 707 | else: # generic case 708 | if step < 0: 709 | start = r[-1] 710 | stop = r[0] + 1 711 | step = -step 712 | else: 713 | start = r[0] 714 | stop = r[-1] + 1 715 | return self._generic_get_slice(start, stop, step) 716 | 717 | cdef _generic_get_slice(self, uint32_t start, uint32_t stop, uint32_t step): 718 | """Assume that start, stop and step > 0 and that the result will not be empty.""" 719 | cdef croaring.roaring_bitmap_t *result = croaring.roaring_bitmap_create() 720 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 721 | cdef uint32_t count, max_count=256 722 | cdef uint32_t *buff = malloc(max_count*4) 723 | cdef uint32_t i_loc=0, i_glob=start, i_buff=0 724 | croaring.roaring_bitmap_set_copy_on_write(result, self.copy_on_write) 725 | first_elt = self._get_elt(start) 726 | valid = croaring.roaring_uint32_iterator_move_equalorlarger(iterator, first_elt) 727 | assert valid 728 | while True: 729 | count = croaring.roaring_uint32_iterator_read(iterator, buff, max_count) 730 | while i_buff < max_count and i_glob < stop: 731 | buff[i_loc] = buff[i_buff] 732 | i_loc += 1 733 | i_buff += step 734 | i_glob += step 735 | croaring.roaring_bitmap_add_many(result, i_loc, buff) 736 | if count != max_count or i_glob >= stop: 737 | break 738 | i_loc = 0 739 | i_buff = i_buff % max_count 740 | croaring.roaring_uint32_iterator_free(iterator) 741 | free(buff) 742 | return self.from_ptr(result) 743 | 744 | def __getitem__(self, value): 745 | if isinstance(value, int): 746 | return self._get_elt(value) 747 | elif isinstance(value, slice): 748 | return self._get_slice(value) 749 | else: 750 | return TypeError('Indices must be integers or slices, not %s' % type(value)) 751 | 752 | def serialize(self): 753 | """ 754 | Return the serialization of the bitmap. See AbstractBitMap.deserialize for the reverse operation. 755 | 756 | >>> BitMap.deserialize(BitMap([3, 12]).serialize()) 757 | BitMap([3, 12]) 758 | """ 759 | cdef size_t size = croaring.roaring_bitmap_portable_size_in_bytes(self._c_bitmap) 760 | cdef char *buff = malloc(size) 761 | cdef real_size = croaring.roaring_bitmap_portable_serialize(self._c_bitmap, buff) 762 | result = buff[:size] 763 | free(buff) 764 | return result 765 | 766 | 767 | @classmethod 768 | def deserialize(cls, const unsigned char[:] buff): 769 | """ 770 | Generate a bitmap from the given serialization. See AbstractBitMap.serialize for the reverse operation. 771 | 772 | >>> BitMap.deserialize(BitMap([3, 12]).serialize()) 773 | BitMap([3, 12]) 774 | """ 775 | return (cls()).from_ptr(deserialize_ptr(buff)) # FIXME to change when from_ptr is a classmethod 776 | 777 | def __getstate__(self): 778 | return self.serialize() 779 | 780 | def __setstate__(self, state): 781 | try: # compatibility between Python2 and Python3 (see #27) 782 | self._c_bitmap = deserialize_ptr(state) 783 | except TypeError: 784 | self._c_bitmap = deserialize_ptr(state.encode()) 785 | 786 | 787 | def __sizeof__(self): 788 | cdef size_t size = croaring.roaring_bitmap_portable_size_in_bytes(self._c_bitmap) 789 | return size 790 | 791 | 792 | def to_array(self): 793 | """ 794 | Return an array.array containing the elements of the bitmap, in increasing order. 795 | 796 | It is equivalent to array.array('I', self), but more efficient. 797 | 798 | >>> BitMap([3, 12]).to_array() 799 | array('I', [3, 12]) 800 | """ 801 | cdef int64_t size = len(self) 802 | if size == 0: 803 | return array.array('I', []) 804 | cdef array.array result = array.array('I') 805 | array.resize(result, size) 806 | cdef unsigned[:] buff = result 807 | croaring.roaring_bitmap_to_uint32_array(self._c_bitmap, &buff[0]) 808 | return result 809 | 810 | 811 | cdef class AbstractBitMap64: 812 | """ 813 | An efficient and light-weight ordered set of 64 bits integers. 814 | """ 815 | cdef croaring.roaring64_bitmap_t* _c_bitmap 816 | cdef int64_t _h_val 817 | 818 | def __cinit__(self, values=None, copy_on_write=False, optimize=True, no_init=False): 819 | if no_init: 820 | assert values is None 821 | return 822 | cdef vector[uint64_t] buff_vect 823 | cdef uint64_t[:] buff 824 | if values is None: 825 | self._c_bitmap = croaring.roaring64_bitmap_create() 826 | elif isinstance(values, AbstractBitMap64): 827 | self._c_bitmap = croaring.roaring64_bitmap_copy((values)._c_bitmap) 828 | self._h_val = (values)._h_val 829 | elif isinstance(values, range): 830 | _, (start, stop, step) = values.__reduce__() 831 | if step < 0: 832 | values = range(min(values), max(values)+1, -step) 833 | _, (start, stop, step) = values.__reduce__() 834 | if start >= stop: 835 | self._c_bitmap = croaring.roaring64_bitmap_create() 836 | else: 837 | self._c_bitmap = croaring.roaring64_bitmap_from_range(start, stop, step) 838 | elif isinstance(values, array.array): 839 | size = len(values) 840 | if size == 0: 841 | self._c_bitmap = croaring.roaring64_bitmap_create() 842 | else: 843 | buff = values 844 | self._c_bitmap = croaring.roaring64_bitmap_of_ptr(size, &buff[0]) 845 | else: 846 | try: 847 | size = len(values) 848 | except TypeError: # object has no length, creating a list 849 | values = list(values) 850 | size = len(values) 851 | self._c_bitmap = croaring.roaring64_bitmap_create() 852 | if size > 0: 853 | buff_vect = values 854 | croaring.roaring64_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 855 | if not isinstance(values, AbstractBitMap64): 856 | self._h_val = 0 857 | if optimize: 858 | self.run_optimize() 859 | 860 | def __init__(self, values=None, copy_on_write=False, optimize=True): 861 | """ 862 | Construct a AbstractBitMap64 object, either empry or from an iterable. 863 | 864 | The field copy_on_write has no effect (yet). 865 | 866 | >>> BitMap64() 867 | BitMap64([]) 868 | >>> BitMap64([1, 123456789, 27]) 869 | BitMap64([1, 27, 123456789]) 870 | """ 871 | 872 | cdef from_ptr(self, croaring.roaring64_bitmap_t *ptr) noexcept: 873 | """ 874 | Return an instance of AbstractBitMap64 (or one of its subclasses) initialized with the given pointer. 875 | 876 | FIXME: this should be a classmethod, but this is (currently) impossible for cdef methods. 877 | See https://groups.google.com/forum/#!topic/cython-users/FLHiLzzKqj4 878 | """ 879 | bm = self.__class__.__new__(self.__class__, no_init=True) 880 | (bm)._c_bitmap = ptr 881 | return bm 882 | 883 | @property 884 | def copy_on_write(self): 885 | """ 886 | Always False, not implemented for 64 bits roaring bitmaps. 887 | 888 | >>> BitMap64(copy_on_write=False).copy_on_write 889 | False 890 | >>> BitMap64(copy_on_write=True).copy_on_write 891 | False 892 | """ 893 | return False 894 | 895 | def run_optimize(self): 896 | return croaring.roaring64_bitmap_run_optimize(self._c_bitmap) 897 | 898 | def __dealloc__(self): 899 | if self._c_bitmap is not NULL: 900 | croaring.roaring64_bitmap_free(self._c_bitmap) 901 | 902 | def _check_compatibility(self, AbstractBitMap64 other): 903 | if other is None: 904 | raise TypeError('Argument has incorrect type (expected pyroaring.AbstractBitMap64, got None)') 905 | if self.copy_on_write != other.copy_on_write: 906 | raise ValueError('Cannot have interactions between bitmaps with and without copy_on_write.\n') 907 | 908 | def __contains__(self, uint64_t value): 909 | return croaring.roaring64_bitmap_contains(self._c_bitmap, value) 910 | 911 | def __bool__(self): 912 | return not croaring.roaring64_bitmap_is_empty(self._c_bitmap) 913 | 914 | def __len__(self): 915 | return croaring.roaring64_bitmap_get_cardinality(self._c_bitmap) 916 | 917 | def __lt__(self, AbstractBitMap64 other): 918 | self._check_compatibility(other) 919 | return croaring.roaring64_bitmap_is_strict_subset((self)._c_bitmap, (other)._c_bitmap) 920 | 921 | def __le__(self, AbstractBitMap64 other): 922 | self._check_compatibility(other) 923 | return croaring.roaring64_bitmap_is_subset((self)._c_bitmap, (other)._c_bitmap) 924 | 925 | def __eq__(self, object other): 926 | if not isinstance(other, AbstractBitMap64): 927 | return NotImplemented 928 | self._check_compatibility(other) 929 | return croaring.roaring64_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 930 | 931 | def __ne__(self, object other): 932 | if not isinstance(other, AbstractBitMap64): 933 | return NotImplemented 934 | self._check_compatibility(other) 935 | return not croaring.roaring64_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 936 | 937 | def __gt__(self, AbstractBitMap64 other): 938 | self._check_compatibility(other) 939 | return croaring.roaring64_bitmap_is_strict_subset((other)._c_bitmap, (self)._c_bitmap) 940 | 941 | def __ge__(self, AbstractBitMap64 other): 942 | self._check_compatibility(other) 943 | return croaring.roaring64_bitmap_is_subset((other)._c_bitmap, (self)._c_bitmap) 944 | 945 | def contains_range(self, uint64_t range_start, uint64_t range_end): 946 | """ 947 | Check whether a range of values from range_start (included) to range_end (excluded) is present. 948 | 949 | >>> bm = BitMap64([5, 6, 7, 8, 9, 10]) 950 | >>> bm.contains_range(6, 9) 951 | True 952 | >>> bm.contains_range(8, 12) 953 | False 954 | """ 955 | if range_end <= range_start or range_end == 0: 956 | return True # empty range 957 | return croaring.roaring64_bitmap_contains_range(self._c_bitmap, range_start, range_end) 958 | 959 | def range_cardinality(self, uint64_t range_start, uint64_t range_end): 960 | """ 961 | Return cardinality from range_start (included) to range_end (excluded). 962 | 963 | >>> bm = BitMap64(range(10)) 964 | >>> bm.range_cardinality(0, 10) 965 | 10 966 | >>> bm.range_cardinality(10, 100) 967 | 0 968 | """ 969 | if range_end < range_start: 970 | raise AssertionError('range_end must not be lower than range_start') 971 | return croaring.roaring64_bitmap_range_cardinality(self._c_bitmap, range_start, range_end) 972 | 973 | cdef compute_hash(self): 974 | cdef int64_t h_val = 0 975 | cdef uint32_t i, count, max_count=256 976 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 977 | cdef uint64_t *buff = malloc(max_count*8) 978 | while True: 979 | count = croaring.roaring64_iterator_read(iterator, buff, max_count) 980 | i = 0 981 | while i < count: 982 | h_val += buff[i] 983 | # TODO find a good hash formula 984 | i += 1 985 | if count != max_count: 986 | break 987 | croaring.roaring64_iterator_free(iterator) 988 | free(buff) 989 | if not self: 990 | return -1 991 | return h_val 992 | 993 | def __hash__(self): 994 | if self._h_val == 0: 995 | self._h_val = self.compute_hash() 996 | return self._h_val 997 | 998 | def iter_equal_or_larger(self, uint64_t val): 999 | """ 1000 | Iterate over items in the bitmap equal or larger than a given value. 1001 | 1002 | >>> bm = BitMap64([1, 2, 4]) 1003 | >>> list(bm.iter_equal_or_larger(2)) 1004 | [2, 4] 1005 | """ 1006 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 1007 | valid = croaring.roaring64_iterator_move_equalorlarger(iterator, val) 1008 | if not valid: 1009 | return 1010 | try: 1011 | while valid: 1012 | yield croaring.roaring64_iterator_value(iterator) 1013 | valid = croaring.roaring64_iterator_advance(iterator) 1014 | finally: 1015 | croaring.roaring64_iterator_free(iterator) 1016 | 1017 | def __iter__(self): 1018 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 1019 | valid = croaring.roaring64_iterator_has_value(iterator) 1020 | if not valid: 1021 | return 1022 | try: 1023 | while valid: 1024 | yield croaring.roaring64_iterator_value(iterator) 1025 | valid = croaring.roaring64_iterator_advance(iterator) 1026 | finally: 1027 | croaring.roaring64_iterator_free(iterator) 1028 | 1029 | def __repr__(self): 1030 | return str(self) 1031 | 1032 | def __str__(self): 1033 | return _string_rep(self) 1034 | 1035 | def flip(self, uint64_t start, uint64_t end): 1036 | """ 1037 | Compute the negation of the bitmap within the specified interval. 1038 | 1039 | Areas outside the range are passed unchanged. 1040 | 1041 | >>> bm = BitMap64([3, 12]) 1042 | >>> bm.flip(10, 15) 1043 | BitMap64([3, 10, 11, 13, 14]) 1044 | """ 1045 | return self.from_ptr(croaring.roaring64_bitmap_flip(self._c_bitmap, start, end)) 1046 | 1047 | def get_statistics(self): 1048 | """ 1049 | Return relevant metrics about the bitmap. 1050 | 1051 | >>> stats = BitMap64(range(18, 66000, 2)).get_statistics() 1052 | >>> stats['cardinality'] 1053 | 32991 1054 | >>> stats['max_value'] 1055 | 65998 1056 | >>> stats['min_value'] 1057 | 18 1058 | >>> stats['n_array_containers'] 1059 | 1 1060 | >>> stats['n_bitset_containers'] 1061 | 1 1062 | >>> stats['n_bytes_array_containers'] 1063 | 464 1064 | >>> stats['n_bytes_bitset_containers'] 1065 | 8192 1066 | >>> stats['n_bytes_run_containers'] 1067 | 0 1068 | >>> stats['n_containers'] 1069 | 2 1070 | >>> stats['n_run_containers'] 1071 | 0 1072 | >>> stats['n_values_array_containers'] 1073 | 232 1074 | >>> stats['n_values_bitset_containers'] 1075 | 32759 1076 | >>> stats['n_values_run_containers'] 1077 | 0 1078 | """ 1079 | cdef croaring.roaring64_statistics_t stat 1080 | croaring.roaring64_bitmap_statistics(self._c_bitmap, &stat) 1081 | return stat 1082 | 1083 | def min(self): 1084 | """ 1085 | Return the minimum element of the bitmap. 1086 | 1087 | It is equivalent to min(self), but faster. 1088 | 1089 | >>> BitMap64([3, 12]).min() 1090 | 3 1091 | """ 1092 | if len(self) == 0: 1093 | raise ValueError('Empty roaring bitmap, there is no minimum.') 1094 | else: 1095 | return croaring.roaring64_bitmap_minimum(self._c_bitmap) 1096 | 1097 | def max(self): 1098 | """ 1099 | Return the maximum element of the bitmap. 1100 | 1101 | It is equivalent to max(self), but faster. 1102 | 1103 | >>> BitMap64([3, 12]).max() 1104 | 12 1105 | """ 1106 | if len(self) == 0: 1107 | raise ValueError('Empty roaring bitmap, there is no maximum.') 1108 | else: 1109 | return croaring.roaring64_bitmap_maximum(self._c_bitmap) 1110 | 1111 | def rank(self, uint64_t value): 1112 | """ 1113 | Return the rank of the element in the bitmap. 1114 | 1115 | >>> BitMap64([3, 12]).rank(12) 1116 | 2 1117 | """ 1118 | return croaring.roaring64_bitmap_rank(self._c_bitmap, value) 1119 | 1120 | def next_set_bit(self, uint64_t value): 1121 | """ 1122 | Return the next set bit larger or equal to the given value. 1123 | 1124 | >>> BitMap64([1, 2, 4]).next_set_bit(1) 1125 | 1 1126 | 1127 | >>> BitMap64([1, 2, 4]).next_set_bit(3) 1128 | 4 1129 | 1130 | >>> BitMap64([1, 2, 4]).next_set_bit(5) 1131 | Traceback (most recent call last): 1132 | ValueError: No value larger or equal to specified value. 1133 | """ 1134 | try: 1135 | return next(self.iter_equal_or_larger(value)) 1136 | except StopIteration: 1137 | raise ValueError('No value larger or equal to specified value.') 1138 | 1139 | cdef int64_t _shift_index(self, int64_t index) except -1: 1140 | cdef int64_t size = len(self) 1141 | if index >= size or index < -size: 1142 | raise IndexError('Index out of bound') 1143 | if index < 0: 1144 | return (index + size) 1145 | else: 1146 | return index 1147 | 1148 | cdef uint64_t _get_elt(self, int64_t index) except? 0: 1149 | cdef uint64_t s_index = self._shift_index(index) 1150 | cdef uint64_t elt 1151 | cdef bool valid = croaring.roaring64_bitmap_select(self._c_bitmap, s_index, &elt) 1152 | if not valid: 1153 | raise ValueError('Invalid rank') 1154 | return elt 1155 | 1156 | cdef _get_slice(self, sl): 1157 | """For a faster computation, different methods, depending on the slice.""" 1158 | start, stop, step = sl.indices(len(self)) 1159 | sign = 1 if step > 0 else -1 1160 | if (sign > 0 and start >= stop) or (sign < 0 and start <= stop): # empty chunk 1161 | return self.__class__() 1162 | r = range(start, stop, step) 1163 | assert len(r) > 0 1164 | first_elt = self._get_elt(start) 1165 | last_elt = self._get_elt(stop-sign) 1166 | values = range(first_elt, last_elt+sign, step) 1167 | if abs(step) == 1 and len(values) <= len(self) / 100: # contiguous and small chunk of the bitmap 1168 | return self & self.__class__(values) 1169 | else: # generic case 1170 | if step < 0: 1171 | start = r[-1] 1172 | stop = r[0] + 1 1173 | step = -step 1174 | else: 1175 | start = r[0] 1176 | stop = r[-1] + 1 1177 | return self._generic_get_slice(start, stop, step) 1178 | 1179 | cdef _generic_get_slice(self, uint64_t start, uint64_t stop, uint64_t step): 1180 | """Assume that start, stop and step > 0 and that the result will not be empty.""" 1181 | cdef croaring.roaring64_bitmap_t *result = croaring.roaring64_bitmap_create() 1182 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 1183 | cdef uint64_t count, max_count=256 1184 | cdef uint64_t *buff = malloc(max_count*8) 1185 | cdef uint64_t i_loc=0, i_glob=start, i_buff=0 1186 | first_elt = self._get_elt(start) 1187 | valid = croaring.roaring64_iterator_move_equalorlarger(iterator, first_elt) 1188 | assert valid 1189 | while True: 1190 | count = croaring.roaring64_iterator_read(iterator, buff, max_count) 1191 | while i_buff < max_count and i_glob < stop: 1192 | buff[i_loc] = buff[i_buff] 1193 | i_loc += 1 1194 | i_buff += step 1195 | i_glob += step 1196 | croaring.roaring64_bitmap_add_many(result, i_loc, buff) 1197 | if count != max_count or i_glob >= stop: 1198 | break 1199 | i_loc = 0 1200 | i_buff = i_buff % max_count 1201 | croaring.roaring64_iterator_free(iterator) 1202 | free(buff) 1203 | return self.from_ptr(result) 1204 | 1205 | def __getitem__(self, value): 1206 | if isinstance(value, int): 1207 | return self._get_elt(value) 1208 | elif isinstance(value, slice): 1209 | return self._get_slice(value) 1210 | else: 1211 | return TypeError('Indices must be integers or slices, not %s' % type(value)) 1212 | 1213 | def serialize(self): 1214 | """ 1215 | Return the serialization of the bitmap. See AbstractBitMap64.deserialize for the reverse operation. 1216 | 1217 | >>> BitMap64.deserialize(BitMap64([3, 12]).serialize()) 1218 | BitMap64([3, 12]) 1219 | """ 1220 | cdef size_t size = croaring.roaring64_bitmap_portable_size_in_bytes(self._c_bitmap) 1221 | cdef char *buff = malloc(size) 1222 | cdef real_size = croaring.roaring64_bitmap_portable_serialize(self._c_bitmap, buff) 1223 | result = buff[:size] 1224 | free(buff) 1225 | return result 1226 | 1227 | 1228 | @classmethod 1229 | def deserialize(cls, const unsigned char[:] buff): 1230 | """ 1231 | Generate a bitmap from the given serialization. See AbstractBitMap64.serialize for the reverse operation. 1232 | 1233 | >>> BitMap64.deserialize(BitMap64([3, 12]).serialize()) 1234 | BitMap64([3, 12]) 1235 | """ 1236 | return (cls()).from_ptr(deserialize64_ptr(buff)) # FIXME to change when from_ptr is a classmethod 1237 | 1238 | def __getstate__(self): 1239 | return self.serialize() 1240 | 1241 | def __setstate__(self, state): 1242 | try: # compatibility between Python2 and Python3 (see #27) 1243 | self._c_bitmap = deserialize64_ptr(state) 1244 | except TypeError: 1245 | self._c_bitmap = deserialize64_ptr(state.encode()) 1246 | 1247 | 1248 | def __sizeof__(self): 1249 | cdef size_t size = croaring.roaring64_bitmap_portable_size_in_bytes(self._c_bitmap) 1250 | return size 1251 | 1252 | def to_array(self): 1253 | """ 1254 | Return an array.array containing the elements of the bitmap, in increasing order. 1255 | 1256 | It is equivalent to array.array('Q', self), but more efficient. 1257 | 1258 | >>> BitMap64([3, 12]).to_array() 1259 | array('Q', [3, 12]) 1260 | """ 1261 | cdef uint64_t size = len(self) 1262 | if size == 0: 1263 | return array.array('Q', []) 1264 | cdef array.array result = array.array('Q') 1265 | array.resize(result, size) 1266 | cdef uint64_t[:] buff = result 1267 | croaring.roaring64_bitmap_to_uint64_array(self._c_bitmap, &buff[0]) 1268 | return result 1269 | 1270 | def copy(self): 1271 | """ 1272 | Return a copy of a set. 1273 | 1274 | >>> bm = BitMap64([3, 12]) 1275 | >>> bm2 = bm.copy() 1276 | >>> bm == bm2 1277 | True 1278 | >>> bm.add(1) 1279 | >>> bm == bm2 1280 | False 1281 | 1282 | """ 1283 | return self.__class__(self) 1284 | 1285 | def isdisjoint(self, other): 1286 | """ 1287 | Return True if two sets have a null intersection. 1288 | 1289 | >>> BitMap64([1, 2]).isdisjoint(BitMap64([3, 4])) 1290 | True 1291 | 1292 | >>> BitMap64([1, 2, 3]).isdisjoint(BitMap64([3, 4])) 1293 | False 1294 | 1295 | """ 1296 | return self.intersection_cardinality(other) == 0 1297 | 1298 | def issubset(self, other): 1299 | """ 1300 | Report whether another set contains this set. 1301 | 1302 | >>> BitMap64([1, 2]).issubset(BitMap64([1, 2, 3, 4])) 1303 | True 1304 | 1305 | >>> BitMap64([1, 2]).issubset(BitMap64([3, 4])) 1306 | False 1307 | 1308 | """ 1309 | return self <= other 1310 | 1311 | def issuperset(self, other): 1312 | """ 1313 | Report whether this set contains another set. 1314 | 1315 | >>> BitMap64([1, 2, 3, 4]).issuperset(BitMap64([1, 2])) 1316 | True 1317 | 1318 | >>> BitMap64([1, 2]).issuperset(BitMap64([3, 4])) 1319 | False 1320 | 1321 | """ 1322 | return self >= other 1323 | 1324 | def difference(*bitmaps): 1325 | """ 1326 | Return the difference of two or more sets as a new set. 1327 | 1328 | (i.e. all elements that are in this set but not the others.) 1329 | 1330 | >>> BitMap64.difference(BitMap64([1, 2, 3]), BitMap64([2, 20]), BitMap64([3, 30])) 1331 | BitMap64([1]) 1332 | 1333 | """ 1334 | size = len(bitmaps) 1335 | cdef AbstractBitMap64 result, bm 1336 | if size <= 1: 1337 | return bitmaps[0].copy() 1338 | elif size == 2: 1339 | return bitmaps[0] - bitmaps[1] 1340 | else: 1341 | result = BitMap64(bitmaps[0]) 1342 | result._h_val = 0 1343 | for bm in bitmaps[1:]: 1344 | result -= bm 1345 | return bitmaps[0].__class__(result) 1346 | 1347 | 1348 | def symmetric_difference(self, other): 1349 | """ 1350 | Return the symmetric difference of two sets as a new set. 1351 | 1352 | (i.e. all elements that are in exactly one of the sets.) 1353 | 1354 | >>> BitMap64([1, 2, 3]).symmetric_difference(BitMap64([2, 3, 4])) 1355 | BitMap64([1, 4]) 1356 | """ 1357 | return self.__xor__(other) 1358 | 1359 | def union(*bitmaps): 1360 | """ 1361 | Return the union of the bitmaps. 1362 | 1363 | >>> BitMap64.union(BitMap64([3, 12]), BitMap64([5]), BitMap64([0, 10, 12])) 1364 | BitMap64([0, 3, 5, 10, 12]) 1365 | """ 1366 | size = len(bitmaps) 1367 | cdef AbstractBitMap64 result, bm 1368 | if size <= 1: 1369 | return bitmaps[0].copy() 1370 | elif size == 2: 1371 | return bitmaps[0] | bitmaps[1] 1372 | else: 1373 | result = BitMap64(bitmaps[0]) 1374 | for bm in bitmaps[1:]: 1375 | result |= bm 1376 | return bitmaps[0].__class__(result) 1377 | 1378 | def intersection(*bitmaps): 1379 | """ 1380 | Return the intersection of the bitmaps. 1381 | 1382 | >>> BitMap64.intersection(BitMap64(range(0, 15)), BitMap64(range(5, 20)), BitMap64(range(10, 25))) 1383 | BitMap64([10, 11, 12, 13, 14]) 1384 | """ 1385 | size = len(bitmaps) 1386 | cdef AbstractBitMap64 result, bm 1387 | if size <= 1: 1388 | return bitmaps[0].copy() 1389 | elif size == 2: 1390 | return bitmaps[0] & bitmaps[1] 1391 | else: 1392 | result = BitMap64(bitmaps[0]) 1393 | result._h_val = 0 1394 | for bm in bitmaps[1:]: 1395 | result &= bm 1396 | return bitmaps[0].__class__(result) 1397 | 1398 | cdef binary_op(self, AbstractBitMap64 other, (croaring.roaring64_bitmap_t*)func(const croaring.roaring64_bitmap_t*, const croaring.roaring64_bitmap_t*) noexcept) noexcept: 1399 | cdef croaring.roaring64_bitmap_t *r = func(self._c_bitmap, other._c_bitmap) 1400 | return self.from_ptr(r) 1401 | 1402 | def __or__(self, other): 1403 | return (self).binary_op(other, croaring.roaring64_bitmap_or) 1404 | 1405 | def __and__(self, other): 1406 | return (self).binary_op(other, croaring.roaring64_bitmap_and) 1407 | 1408 | def __xor__(self, other): 1409 | return (self).binary_op(other, croaring.roaring64_bitmap_xor) 1410 | 1411 | def __sub__(self, other): 1412 | return (self).binary_op(other, croaring.roaring64_bitmap_andnot) 1413 | 1414 | def union_cardinality(self, AbstractBitMap64 other): 1415 | """ 1416 | Return the number of elements in the union of the two bitmaps. 1417 | 1418 | It is equivalent to len(self | other), but faster. 1419 | 1420 | >>> BitMap64([3, 12]).union_cardinality(BitMap64([3, 5, 8])) 1421 | 4 1422 | """ 1423 | return croaring.roaring64_bitmap_or_cardinality(self._c_bitmap, other._c_bitmap) 1424 | 1425 | def intersection_cardinality(self, AbstractBitMap64 other): 1426 | """ 1427 | Return the number of elements in the intersection of the two bitmaps. 1428 | 1429 | It is equivalent to len(self & other), but faster. 1430 | 1431 | >>> BitMap64([3, 12]).intersection_cardinality(BitMap64([3, 5, 8])) 1432 | 1 1433 | """ 1434 | return croaring.roaring64_bitmap_and_cardinality(self._c_bitmap, other._c_bitmap) 1435 | 1436 | def difference_cardinality(self, AbstractBitMap64 other): 1437 | """ 1438 | Return the number of elements in the difference of the two bitmaps. 1439 | 1440 | It is equivalent to len(self - other), but faster. 1441 | 1442 | >>> BitMap64([3, 12]).difference_cardinality(BitMap64([3, 5, 8])) 1443 | 1 1444 | """ 1445 | return croaring.roaring64_bitmap_andnot_cardinality(self._c_bitmap, other._c_bitmap) 1446 | 1447 | def symmetric_difference_cardinality(self, AbstractBitMap64 other): 1448 | """ 1449 | Return the number of elements in the symmetric difference of the two bitmaps. 1450 | 1451 | It is equivalent to len(self ^ other), but faster. 1452 | 1453 | >>> BitMap64([3, 12]).symmetric_difference_cardinality(BitMap64([3, 5, 8])) 1454 | 3 1455 | """ 1456 | return croaring.roaring64_bitmap_xor_cardinality(self._c_bitmap, other._c_bitmap) 1457 | 1458 | def intersect(self, AbstractBitMap64 other): 1459 | """ 1460 | Return True if and only if the two bitmaps have elements in common. 1461 | 1462 | It is equivalent to len(self & other) > 0, but faster. 1463 | 1464 | >>> BitMap64([3, 12]).intersect(BitMap64([3, 18])) 1465 | True 1466 | >>> BitMap64([3, 12]).intersect(BitMap64([5, 18])) 1467 | False 1468 | """ 1469 | return croaring.roaring64_bitmap_intersect(self._c_bitmap, other._c_bitmap) 1470 | 1471 | def jaccard_index(self, AbstractBitMap64 other): 1472 | """ 1473 | Compute the Jaccard index of the two bitmaps. 1474 | 1475 | It is equivalent to len(self&other)/len(self|other), but faster. 1476 | See https://en.wikipedia.org/wiki/Jaccard_index 1477 | 1478 | >>> BitMap64([3, 10, 12]).jaccard_index(BitMap64([3, 18])) 1479 | 0.25 1480 | """ 1481 | return croaring.roaring64_bitmap_jaccard_index(self._c_bitmap, other._c_bitmap) 1482 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | from __future__ import annotations 4 | 5 | import pytest 6 | import os 7 | import re 8 | import sys 9 | import array 10 | import pickle 11 | import random 12 | import operator 13 | import unittest 14 | import functools 15 | import base64 16 | from typing import TYPE_CHECKING 17 | from collections.abc import Set, Callable, Iterable, Iterator 18 | 19 | import hypothesis.strategies as st 20 | from hypothesis import given, assume, errors, settings, Verbosity, HealthCheck 21 | 22 | import pyroaring 23 | 24 | 25 | settings.register_profile("ci", settings( 26 | max_examples=100, deadline=None)) 27 | settings.register_profile("dev", settings(max_examples=10, deadline=None)) 28 | settings.register_profile("debug", settings( 29 | max_examples=10, verbosity=Verbosity.verbose, deadline=None)) 30 | try: 31 | env = os.getenv('HYPOTHESIS_PROFILE', 'dev') 32 | settings.load_profile(env) 33 | except errors.InvalidArgument: 34 | sys.exit(f'Unknown hypothesis profile: {env}') 35 | 36 | bitsize = os.getenv('ROARING_BITSIZE', '32') 37 | if bitsize not in ('32', '64'): 38 | sys.exit(f'Unknown bit size: {bitsize}') 39 | is_32_bits = (bitsize=="32") 40 | 41 | if is_32_bits: 42 | from pyroaring import BitMap, FrozenBitMap, AbstractBitMap 43 | else: 44 | from pyroaring import BitMap64 as BitMap, FrozenBitMap64 as FrozenBitMap, AbstractBitMap64 as AbstractBitMap # type: ignore[assignment] 45 | # Note: we could not find a way to type-check both the 32-bit and the 64-bit implementations using a same file. 46 | # Out of simplcity, we therefore decided to only type-check the 32-bit version. 47 | # To type-check the 64-bit version, remove the above if statement to only keep the else part 48 | # (i.e. directly import BitMap64 as BitMap etc.) 49 | 50 | uint18 = st.integers(min_value=0, max_value=2**18) 51 | uint32 = st.integers(min_value=0, max_value=2**32 - 1) 52 | uint64 = st.integers(min_value=0, max_value=2**64 - 1) 53 | large_uint64 = st.integers(min_value=2**32, max_value=2**64 - 1) 54 | integer = st.integers(min_value=0, max_value=2**31 - 1) 55 | int64 = st.integers(min_value=-2**63, max_value=2**63 - 1) 56 | 57 | range_max_size = 2**18 58 | 59 | range_big_step = uint18.flatmap(lambda n: 60 | st.builds(range, st.just(n), 61 | st.integers( 62 | min_value=n + 1, max_value=n + range_max_size), 63 | st.integers(min_value=2**8, max_value=range_max_size // 8))) 64 | 65 | range_small_step = uint18.flatmap(lambda n: 66 | st.builds(range, st.just(n), 67 | st.integers( 68 | min_value=n + 1, max_value=n + range_max_size), 69 | st.integers(min_value=1, max_value=2**8))) 70 | 71 | range_power2_step = uint18.flatmap(lambda n: 72 | st.builds(range, st.just(n), 73 | st.integers( 74 | min_value=n + 1, max_value=n + range_max_size), 75 | st.integers(min_value=0, max_value=8).flatmap( 76 | lambda n: st.just(2**n), 77 | ))) 78 | 79 | range_huge_interval = uint18.flatmap(lambda n: 80 | st.builds(range, st.just(n), 81 | st.integers( 82 | min_value=n+2**52, max_value=n+2**63), 83 | st.integers(min_value=2**49, max_value=2**63))) 84 | 85 | # Build a list of values of the form a * 2**16 + b with b in [-2,+2] 86 | # In other words, numbers that are close (or equal) to a multiple of 2**16 87 | multiple_2p16 = st.sets( 88 | st.builds( 89 | int.__add__, st.builds( 90 | int.__mul__, 91 | st.integers(min_value=1, max_value=2**32), 92 | st.just(2**16) 93 | ), 94 | st.integers(min_value=-2, max_value=+2) 95 | ), 96 | max_size=100) 97 | 98 | hyp_range = range_big_step | range_small_step | range_power2_step | st.sampled_from( 99 | [range(0, 0)]) # last one is an empty range 100 | 101 | if not is_32_bits: 102 | hyp_range = hyp_range | range_huge_interval | multiple_2p16 103 | 104 | # would be great to build a true random set, but it takes too long and hypothesis does a timeout... 105 | hyp_set: st.SearchStrategy[set[int]] = st.builds(set, hyp_range) 106 | if is_32_bits: 107 | hyp_array = st.builds(lambda x: array.array('I', x), hyp_range) 108 | else: 109 | hyp_array = st.builds(lambda x: array.array('Q', x), hyp_range) 110 | hyp_collection = hyp_range | hyp_set | hyp_array 111 | hyp_many_collections = st.lists(hyp_collection, min_size=1, max_size=20) 112 | 113 | bitmap_cls = st.sampled_from([BitMap, FrozenBitMap]) 114 | 115 | if TYPE_CHECKING: 116 | from typing_extensions import TypeAlias 117 | 118 | HypCollection: TypeAlias = range | set[int] | array.array[int] | list[int] 119 | EitherBitMap = BitMap | FrozenBitMap 120 | EitherSet = set | frozenset # type: ignore[type-arg] 121 | 122 | 123 | class Util: 124 | 125 | comparison_set = random.sample( 126 | range(2**8), 100) + random.sample(range(2**31 - 1), 50) 127 | 128 | def compare_with_set(self, bitmap: AbstractBitMap, expected_set: set[int]) -> None: 129 | assert len(bitmap) == len(expected_set) 130 | assert bool(bitmap) == bool(expected_set) 131 | assert set(bitmap) == expected_set 132 | assert sorted(list(bitmap)) == sorted(list(expected_set)) 133 | assert BitMap(expected_set, copy_on_write=bitmap.copy_on_write) == bitmap 134 | for value in self.comparison_set: 135 | if value in expected_set: 136 | assert value in bitmap 137 | else: 138 | assert value not in bitmap 139 | 140 | @staticmethod 141 | def bitmap_sample(bitmap: AbstractBitMap, size: int) -> list[int]: 142 | indices = random.sample(range(len(bitmap)), size) 143 | return [bitmap[i] for i in indices] 144 | 145 | def assert_is_not(self, bitmap1: AbstractBitMap, bitmap2: AbstractBitMap) -> None: 146 | add1 = remove1 = add2 = remove2 = -1 147 | if isinstance(bitmap1, BitMap): 148 | if bitmap1: 149 | remove1 = bitmap1[0] 150 | bitmap1.remove(remove1) 151 | else: 152 | add1 = 27 153 | bitmap1.add(add1) 154 | elif isinstance(bitmap2, BitMap): 155 | if bitmap2: 156 | remove2 = bitmap2[0] 157 | bitmap2.remove(remove2) 158 | else: 159 | add2 = 27 160 | bitmap2.add(add2) 161 | else: # The two are non-mutable, cannot do anything... 162 | return 163 | if bitmap1 == bitmap2: 164 | pytest.fail( 165 | 'The two bitmaps are identical (modifying one also modifies the other).') 166 | # Restore the bitmaps to their original point 167 | else: 168 | if add1 >= 0: 169 | bitmap1.remove(add1) 170 | if remove1 >= 0: 171 | bitmap1.add(remove1) 172 | if add2 >= 0: 173 | bitmap2.remove(add2) 174 | if remove2 >= 0: 175 | bitmap2.add(remove2) 176 | 177 | 178 | 179 | class TestBasic(Util): 180 | 181 | @given(hyp_collection, st.booleans()) 182 | @settings(deadline=None) 183 | def test_basic(self, values: HypCollection, cow: bool) -> None: 184 | bitmap = BitMap(copy_on_write=cow) 185 | if is_32_bits: 186 | assert bitmap.copy_on_write == cow 187 | expected_set: set[int] = set() 188 | self.compare_with_set(bitmap, expected_set) 189 | values = list(values) 190 | random.shuffle(values) 191 | size = len(values) 192 | for value in values[:size // 2]: 193 | bitmap.add(value) 194 | expected_set.add(value) 195 | self.compare_with_set(bitmap, expected_set) 196 | for value in values[size // 2:]: 197 | bitmap.add(value) 198 | with pytest.raises(KeyError): 199 | bitmap.add_checked(value) 200 | expected_set.add(value) 201 | self.compare_with_set(bitmap, expected_set) 202 | for value in values[:size // 2]: 203 | bitmap.remove(value) 204 | expected_set.remove(value) 205 | with pytest.raises(KeyError): 206 | bitmap.remove(value) 207 | self.compare_with_set(bitmap, expected_set) 208 | for value in values[size // 2:]: 209 | bitmap.discard(value) 210 | # check that we can discard element not in the bitmap 211 | bitmap.discard(value) 212 | expected_set.discard(value) 213 | self.compare_with_set(bitmap, expected_set) 214 | 215 | @given(bitmap_cls, bitmap_cls, hyp_collection, st.booleans()) 216 | def test_bitmap_equality( 217 | self, 218 | cls1: type[EitherBitMap], 219 | cls2: type[EitherBitMap], 220 | values: HypCollection, 221 | cow: bool, 222 | ) -> None: 223 | bitmap1 = cls1(values, copy_on_write=cow) 224 | bitmap2 = cls2(values, copy_on_write=cow) 225 | assert bitmap1 == bitmap2 226 | 227 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 228 | def test_bitmap_unequality( 229 | self, 230 | cls1: type[EitherBitMap], 231 | cls2: type[EitherBitMap], 232 | values1: HypCollection, 233 | values2: HypCollection, 234 | cow: bool, 235 | ) -> None: 236 | assume(set(values1) != set(values2)) 237 | bitmap1 = cls1(values1, copy_on_write=cow) 238 | bitmap2 = cls2(values2, copy_on_write=cow) 239 | assert bitmap1 != bitmap2 240 | 241 | @given(bitmap_cls, hyp_collection, st.booleans()) 242 | def test_constructor_values( 243 | self, 244 | cls: type[EitherBitMap], 245 | values: HypCollection, 246 | cow: bool, 247 | ) -> None: 248 | bitmap = cls(values, copy_on_write=cow) 249 | expected_set = set(values) 250 | self.compare_with_set(bitmap, expected_set) 251 | 252 | @given(bitmap_cls, bitmap_cls, hyp_collection, uint32, st.booleans(), st.booleans()) 253 | def test_constructor_copy( 254 | self, 255 | cls1: type[EitherBitMap], 256 | cls2: type[EitherBitMap], 257 | values: HypCollection, 258 | other_value: int, 259 | cow1: bool, 260 | cow2: bool, 261 | ) -> None: 262 | bitmap1 = cls1(values, copy_on_write=cow1) 263 | # should be robust even if cow2 != cow1 264 | bitmap2 = cls2(bitmap1, copy_on_write=cow2) 265 | assert bitmap1 == bitmap2 266 | self.assert_is_not(bitmap1, bitmap2) 267 | 268 | @given(hyp_collection, hyp_collection, st.booleans()) 269 | def test_update(self, initial_values: HypCollection, new_values: HypCollection, cow: bool) -> None: 270 | bm = BitMap(initial_values, cow) 271 | expected = BitMap(bm) 272 | bm.update(new_values) 273 | expected |= BitMap(new_values, copy_on_write=cow) 274 | assert bm == expected 275 | 276 | @given(hyp_collection, hyp_collection, st.booleans()) 277 | def test_intersection_update(self, initial_values: HypCollection, new_values: HypCollection, cow: bool) -> None: 278 | bm = BitMap(initial_values, cow) 279 | expected = BitMap(bm) 280 | bm.intersection_update(new_values) 281 | expected &= BitMap(new_values, copy_on_write=cow) 282 | assert bm == expected 283 | 284 | def wrong_op(self, op: Callable[[BitMap, int], object]) -> None: 285 | bitmap = BitMap() 286 | with pytest.raises(OverflowError): 287 | op(bitmap, -3) 288 | with pytest.raises(OverflowError): 289 | if is_32_bits: 290 | op(bitmap, 2**33) 291 | else: 292 | op(bitmap, 2**65) 293 | with pytest.raises(TypeError): 294 | op(bitmap, 'bla') # type: ignore[arg-type] 295 | 296 | def test_wrong_add(self) -> None: 297 | self.wrong_op(lambda bitmap, value: bitmap.add(value)) 298 | 299 | def test_wrong_contain(self) -> None: 300 | self.wrong_op(lambda bitmap, value: bitmap.__contains__(value)) 301 | 302 | @given(bitmap_cls) 303 | def test_wrong_constructor_values(self, cls: type[EitherBitMap]) -> None: 304 | with pytest.raises(TypeError): # this should fire a type error! 305 | cls([3, 'bla', 3, 42]) # type: ignore[list-item] 306 | bad_range = range(-3, 0) 307 | with pytest.raises(OverflowError): 308 | cls(bad_range) 309 | 310 | @given(bitmap_cls, hyp_collection, st.booleans()) 311 | def test_to_array( 312 | self, 313 | cls: type[EitherBitMap], 314 | values: HypCollection, 315 | cow: bool, 316 | ) -> None: 317 | bitmap = cls(values, copy_on_write=cow) 318 | result = bitmap.to_array() 319 | if is_32_bits: 320 | expected = array.array('I', sorted(values)) 321 | else: 322 | expected = array.array('Q', sorted(values)) 323 | assert result == expected 324 | 325 | @given(bitmap_cls, st.booleans(), st.integers(min_value=0, max_value=100)) 326 | def test_constructor_generator(self, cls: type[EitherBitMap], cow: bool, size: int) -> None: 327 | def generator(n: int) -> Iterator[int]: 328 | for i in range(n): 329 | yield i 330 | bitmap = cls(generator(size), copy_on_write=cow) 331 | assert bitmap == cls(range(size), copy_on_write=cow) 332 | 333 | 334 | def slice_arg(n: int) -> st.SearchStrategy[int]: 335 | return st.integers(min_value=-n, max_value=n) 336 | 337 | 338 | class TestSelectRank(Util): 339 | 340 | @given(bitmap_cls, hyp_collection, st.booleans()) 341 | def test_simple_select( 342 | self, 343 | cls: type[EitherBitMap], 344 | values: HypCollection, 345 | cow: bool, 346 | ) -> None: 347 | bitmap = cls(values, copy_on_write=cow) 348 | values = list(bitmap) # enforce sorted order 349 | for i in range(-len(values), len(values)): 350 | assert bitmap[i] == values[i] 351 | 352 | @given(bitmap_cls, hyp_collection, uint32, st.booleans()) 353 | def test_wrong_selection( 354 | self, 355 | cls: type[EitherBitMap], 356 | values: HypCollection, 357 | n: int, 358 | cow: bool, 359 | ) -> None: 360 | bitmap = cls(values, cow) 361 | with pytest.raises(IndexError): 362 | bitmap[len(values)] 363 | with pytest.raises(IndexError): 364 | bitmap[n + len(values)] 365 | with pytest.raises(IndexError): 366 | bitmap[-len(values) - 1] 367 | with pytest.raises(IndexError): 368 | bitmap[-n - len(values) - 1] 369 | 370 | def check_slice( 371 | self, 372 | cls: type[EitherBitMap], 373 | values: HypCollection, 374 | start: int | None, 375 | stop: int | None, 376 | step: int | None, 377 | cow: bool, 378 | ) -> None: 379 | bitmap = cls(values, copy_on_write=cow) 380 | values = list(bitmap) # enforce sorted order 381 | expected = values[start:stop:step] 382 | expected.sort() 383 | observed = list(bitmap[start:stop:step]) 384 | assert expected == observed 385 | 386 | @given(bitmap_cls, hyp_collection, slice_arg(2**12), slice_arg(2**12), slice_arg(2**5), st.booleans()) 387 | def test_slice_select_non_empty( 388 | self, 389 | cls: type[EitherBitMap], 390 | values: HypCollection, 391 | start: int, 392 | stop: int, 393 | step: int, 394 | cow: bool, 395 | ) -> None: 396 | assume(step != 0) 397 | assume(len(range(start, stop, step)) > 0) 398 | self.check_slice(cls, values, start, stop, step, cow) 399 | 400 | @given(bitmap_cls, hyp_collection, slice_arg(2**12), slice_arg(2**12), slice_arg(2**5), st.booleans()) 401 | def test_slice_select_empty( 402 | self, 403 | cls: type[EitherBitMap], 404 | values: HypCollection, 405 | start: int, 406 | stop: int, 407 | step: int, 408 | cow: bool, 409 | ) -> None: 410 | assume(step != 0) 411 | assume(len(range(start, stop, step)) == 0) 412 | self.check_slice(cls, values, start, stop, step, cow) 413 | 414 | @given(bitmap_cls, hyp_collection, slice_arg(2**12) | st.none(), slice_arg(2**12) | st.none(), slice_arg(2**5) | st.none(), st.booleans()) 415 | def test_slice_select_none( 416 | self, 417 | cls: type[EitherBitMap], 418 | values: HypCollection, 419 | start: int | None, 420 | stop: int | None, 421 | step: int | None, 422 | cow: bool, 423 | ) -> None: 424 | assume(step != 0) 425 | self.check_slice(cls, values, start, stop, step, cow) 426 | 427 | @given(bitmap_cls, hyp_collection, st.booleans()) 428 | def test_simple_rank( 429 | self, 430 | cls: type[EitherBitMap], 431 | values: HypCollection, 432 | cow: bool, 433 | ) -> None: 434 | bitmap = cls(values, copy_on_write=cow) 435 | for i, value in enumerate(sorted(values)): 436 | assert bitmap.rank(value) == i + 1 437 | 438 | @given(bitmap_cls, hyp_collection, uint18, st.booleans()) 439 | def test_general_rank( 440 | self, 441 | cls: type[EitherBitMap], 442 | values: HypCollection, 443 | element: int, 444 | cow: bool, 445 | ) -> None: 446 | bitmap = cls(values, copy_on_write=cow) 447 | observed_rank = bitmap.rank(element) 448 | expected_rank = len([n for n in set(values) if n <= element]) 449 | assert expected_rank == observed_rank 450 | 451 | @given(bitmap_cls, hyp_collection, st.booleans()) 452 | def test_min( 453 | self, 454 | cls: type[EitherBitMap], 455 | values: HypCollection, 456 | cow: bool, 457 | ) -> None: 458 | assume(len(values) > 0) 459 | bitmap = cls(values, copy_on_write=cow) 460 | assert bitmap.min() == min(values) 461 | 462 | @given(bitmap_cls) 463 | def test_wrong_min(self, cls: type[EitherBitMap]) -> None: 464 | bitmap = cls() 465 | with pytest.raises(ValueError): 466 | bitmap.min() 467 | 468 | @given(bitmap_cls, hyp_collection, st.booleans()) 469 | def test_max( 470 | self, 471 | cls: type[EitherBitMap], 472 | values: HypCollection, 473 | cow: bool, 474 | ) -> None: 475 | assume(len(values) > 0) 476 | bitmap = cls(values, copy_on_write=cow) 477 | assert bitmap.max() == max(values) 478 | 479 | @given(bitmap_cls) 480 | def test_wrong_max(self, cls: type[EitherBitMap]) -> None: 481 | bitmap = cls() 482 | with pytest.raises(ValueError): 483 | bitmap.max() 484 | 485 | @given(bitmap_cls, hyp_collection, uint32, st.booleans()) 486 | def test_next_set_bit( 487 | self, 488 | cls: type[EitherBitMap], 489 | values: HypCollection, 490 | other_value: int, 491 | cow: bool, 492 | ) -> None: 493 | assume(len(values) > 0) 494 | bitmap = cls(values, copy_on_write=cow) 495 | try: 496 | expected = next(i for i in sorted(values) if i >= other_value) 497 | assert bitmap.next_set_bit(other_value) == expected 498 | except StopIteration: 499 | with pytest.raises(ValueError): 500 | bitmap.next_set_bit(other_value) 501 | 502 | @given(bitmap_cls) 503 | def test_wrong_next_set_bit(self, cls: type[EitherBitMap]) -> None: 504 | bitmap = cls() 505 | with pytest.raises(ValueError): 506 | bitmap.next_set_bit(0) 507 | 508 | 509 | class TestBinaryOperations(Util): 510 | set1: Set[int] 511 | set2: Set[int] 512 | 513 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 514 | def test_binary_op( 515 | self, 516 | cls1: type[EitherBitMap], 517 | cls2: type[EitherBitMap], 518 | values1: HypCollection, 519 | values2: HypCollection, 520 | cow: bool, 521 | ) -> None: 522 | for op in [operator.or_, operator.and_, operator.xor, operator.sub]: 523 | self.set1 = set(values1) 524 | self.set2 = set(values2) 525 | self.bitmap1 = cls1(values1, cow) 526 | self.bitmap2 = cls2(values2, cow) 527 | old_bitmap1 = cls1(self.bitmap1) 528 | old_bitmap2 = cls2(self.bitmap2) 529 | result_set = op(self.set1, self.set2) 530 | result_bitmap = op(self.bitmap1, self.bitmap2) 531 | assert self.bitmap1 == old_bitmap1 532 | assert self.bitmap2 == old_bitmap2 533 | self.compare_with_set(result_bitmap, result_set) 534 | assert type(self.bitmap1) == type(result_bitmap) 535 | 536 | @given(bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 537 | def test_binary_op_inplace( 538 | self, 539 | cls2: type[EitherBitMap], 540 | values1: HypCollection, 541 | values2: HypCollection, 542 | cow: bool, 543 | ) -> None: 544 | for op in [operator.ior, operator.iand, operator.ixor, operator.isub]: 545 | self.set1 = set(values1) 546 | self.set2 = set(values2) 547 | self.bitmap1 = BitMap(values1, cow) 548 | original = self.bitmap1 549 | self.bitmap2 = cls2(values2, cow) 550 | old_bitmap2 = cls2(self.bitmap2) 551 | op(self.set1, self.set2) 552 | op(self.bitmap1, self.bitmap2) 553 | assert original is self.bitmap1 554 | assert self.bitmap2 == old_bitmap2 555 | self.compare_with_set(self.bitmap1, self.set1) 556 | 557 | @given(hyp_collection, st.booleans()) 558 | def test_binary_op_inplace_self( 559 | self, 560 | values: HypCollection, 561 | cow: bool, 562 | ) -> None: 563 | for op in [operator.ior, operator.iand, operator.ixor, operator.isub]: 564 | self.set = set(values) 565 | self.bitmap = BitMap(values, cow) 566 | original = self.bitmap 567 | op(self.set, self.set) 568 | op(self.bitmap, self.bitmap) 569 | assert original is self.bitmap 570 | self.compare_with_set(self.bitmap, self.set) 571 | 572 | @given(bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 573 | def test_binary_op_inplace_frozen( 574 | self, 575 | cls2: type[EitherBitMap], 576 | values1: HypCollection, 577 | values2: HypCollection, 578 | cow: bool, 579 | ) -> None: 580 | for op in [operator.ior, operator.iand, operator.ixor, operator.isub]: 581 | self.set1 = frozenset(values1) 582 | self.set2 = frozenset(values2) 583 | 584 | self.bitmap1 = FrozenBitMap(values1, cow) 585 | old_bitmap1 = FrozenBitMap(self.bitmap1) 586 | self.bitmap2 = cls2(values2, cow) 587 | old_bitmap2 = cls2(self.bitmap2) 588 | 589 | new_set = op(self.set1, self.set2) 590 | new_bitmap = op(self.bitmap1, self.bitmap2) 591 | 592 | assert self.bitmap1 == old_bitmap1 593 | assert self.bitmap2 == old_bitmap2 594 | 595 | self.compare_with_set(new_bitmap, new_set) 596 | 597 | 598 | class TestComparison(Util): 599 | 600 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 601 | def test_comparison( 602 | self, 603 | cls1: type[EitherBitMap], 604 | cls2: type[EitherBitMap], 605 | values1: HypCollection, 606 | values2: HypCollection, 607 | cow: bool, 608 | ) -> None: 609 | for op in [operator.le, operator.ge, operator.lt, operator.gt, operator.eq, operator.ne]: 610 | self.set1 = set(values1) 611 | self.set2 = set(values2) 612 | self.bitmap1 = cls1(values1, copy_on_write=cow) 613 | self.bitmap2 = cls2(values2, copy_on_write=cow) 614 | assert op(self.bitmap1, self.bitmap1) == \ 615 | op(self.set1, self.set1) 616 | assert op(self.bitmap1, self.bitmap2) == \ 617 | op(self.set1, self.set2) 618 | assert op(self.bitmap1 | self.bitmap2, self.bitmap2) == \ 619 | op(self.set1 | self.set2, self.set2) 620 | assert op(self.set1, self.set1 | self.set2) == \ 621 | op(self.set1, self.set1 | self.set2) 622 | 623 | @given(bitmap_cls, hyp_collection, st.booleans()) 624 | def test_comparison_other_objects(self, cls: type[EitherBitMap], values: HypCollection, cow: bool) -> None: 625 | for op in [operator.le, operator.ge, operator.lt, operator.gt]: 626 | bm = cls(values, copy_on_write=cow) 627 | with pytest.raises(TypeError): 628 | op(bm, 42) 629 | with pytest.raises(TypeError): 630 | op(bm, None) 631 | 632 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 633 | def test_intersect( 634 | self, 635 | cls1: type[EitherBitMap], 636 | cls2: type[EitherBitMap], 637 | values1: HypCollection, 638 | values2: HypCollection, 639 | cow: bool, 640 | ) -> None: 641 | bm1 = cls1(values1, copy_on_write=cow) 642 | bm2 = cls2(values2, copy_on_write=cow) 643 | assert (bm1.intersect(bm2)) == (len(bm1 & bm2) > 0) 644 | 645 | @given(bitmap_cls, hyp_collection, st.booleans()) 646 | def test_eq_other_objects(self, cls: type[EitherBitMap], values: HypCollection, cow: bool) -> None: 647 | bm = cls(values, copy_on_write=cow) 648 | 649 | assert not bm == 42 650 | assert cls.__eq__(bm, 42) is NotImplemented 651 | assert not bm == None# noqa: E711 652 | assert cls.__eq__(bm, None) is NotImplemented 653 | 654 | @given(bitmap_cls, hyp_collection, st.booleans()) 655 | def test_ne_other_objects(self, cls: type[EitherBitMap], values: HypCollection, cow: bool) -> None: 656 | bm = cls(values, copy_on_write=cow) 657 | 658 | assert bm != 42 659 | assert cls.__ne__(bm, 42) is NotImplemented 660 | assert bm != None# noqa: E711 661 | assert cls.__ne__(bm, None) is NotImplemented 662 | 663 | 664 | class TestRange(Util): 665 | @given(bitmap_cls, hyp_collection, st.booleans(), uint32, uint32) 666 | def test_contains_range_arbitrary( 667 | self, 668 | cls: type[EitherBitMap], 669 | values: HypCollection, 670 | cow: bool, 671 | start: int, 672 | end: int, 673 | ) -> None: 674 | bm = cls(values) 675 | expected = (cls(range(start, end)) <= bm) 676 | assert expected == bm.contains_range(start, end) 677 | 678 | @given(bitmap_cls, st.booleans(), uint32, uint32) 679 | def test_contains_range(self, cls: type[EitherBitMap], cow: bool, start: int, end: int) -> None: 680 | assume(start < end) 681 | assert cls(range(start, end)).contains_range(start, end) 682 | assert cls(range(start, end)).contains_range(start, end - 1) 683 | assert not cls(range(start, end - 1)).contains_range(start, end) 684 | assert cls(range(start, end)).contains_range(start + 1, end) 685 | assert not cls(range(start + 1, end)).contains_range(start, end) 686 | r = range(start, end) 687 | try: 688 | middle = r[len(r) // 2] # on 32bits systems, this call might fail when len(r) is too large 689 | except OverflowError: 690 | if sys.maxsize > 2**32: 691 | raise 692 | else: 693 | return 694 | bm = cls(range(start, end)) - cls([middle]) 695 | assert not bm.contains_range(start, end) 696 | assert bm.contains_range(start, middle) 697 | assert bm.contains_range(middle + 1, end) 698 | 699 | @given(hyp_collection, st.booleans(), uint32, uint32) 700 | def test_add_remove_range(self, values: HypCollection, cow: bool, start: int, end: int) -> None: 701 | assume(start < end) 702 | bm = BitMap(values, copy_on_write=cow) 703 | # Empty range 704 | original = BitMap(bm) 705 | bm.add_range(end, start) 706 | assert bm == original 707 | bm.remove_range(end, start) 708 | assert bm == original 709 | # Adding the range 710 | bm.add_range(start, end) 711 | assert bm.contains_range(start, end) 712 | assert bm.intersection_cardinality(BitMap(range(start, end), copy_on_write=cow)) == end - start 713 | # Empty range (again) 714 | original = BitMap(bm) 715 | bm.remove_range(end, start) 716 | assert bm == original 717 | assert bm.intersection_cardinality(BitMap(range(start, end), copy_on_write=cow)) == end - start 718 | # Removing the range 719 | bm.remove_range(start, end) 720 | assert not bm.contains_range(start, end) 721 | assert bm.intersection_cardinality(BitMap(range(start, end), copy_on_write=cow)) == 0 722 | 723 | @pytest.mark.skipif(not is_32_bits, reason="build a too large bitmap with 64 bits, blows up memory") 724 | @given(hyp_collection, st.booleans(), large_uint64, large_uint64) 725 | def test_large_values(self, values: HypCollection, cow: bool, start: int, end: int) -> None: 726 | bm = BitMap(values, copy_on_write=cow) 727 | original = BitMap(bm) 728 | bm.add_range(start, end) 729 | assert bm == original 730 | bm.remove_range(start, end) 731 | assert bm == original 732 | assert bm.contains_range(start, end) 733 | 734 | 735 | class TestCardinality(Util): 736 | 737 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 738 | def test_cardinality( 739 | self, 740 | cls1: type[EitherBitMap], 741 | cls2: type[EitherBitMap], 742 | values1: HypCollection, 743 | values2: HypCollection, 744 | cow: bool, 745 | ) -> None: 746 | 747 | for real_op, estimated_op in [ 748 | (operator.or_, cls1.union_cardinality), 749 | (operator.and_, cls1.intersection_cardinality), 750 | (operator.sub, cls1.difference_cardinality), 751 | (operator.xor, cls1.symmetric_difference_cardinality), 752 | ]: 753 | self.bitmap1 = cls1(values1, copy_on_write=cow) 754 | self.bitmap2 = cls2(values2, copy_on_write=cow) 755 | real_value = len(real_op(self.bitmap1, self.bitmap2)) 756 | estimated_value = estimated_op(self.bitmap1, self.bitmap2) 757 | assert real_value == estimated_value 758 | 759 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 760 | def test_jaccard_index( 761 | self, 762 | cls1: type[EitherBitMap], 763 | cls2: type[EitherBitMap], 764 | values1: HypCollection, 765 | values2: HypCollection, 766 | cow: bool, 767 | ) -> None: 768 | assume(len(values1) > 0 or len(values2) > 0) 769 | self.bitmap1 = cls1(values1, copy_on_write=cow) 770 | self.bitmap2 = cls2(values2, copy_on_write=cow) 771 | real_value = float(len(self.bitmap1 & self.bitmap2)) / \ 772 | float(max(1, len(self.bitmap1 | self.bitmap2))) 773 | estimated_value = self.bitmap1.jaccard_index(self.bitmap2) 774 | assert real_value == pytest.approx(estimated_value) 775 | 776 | @given(bitmap_cls, hyp_collection, uint32, uint32) 777 | def test_range_cardinality( 778 | self, 779 | cls: type[EitherBitMap], 780 | values: HypCollection, 781 | a: int, 782 | b: int, 783 | ) -> None: 784 | bm = cls(values) 785 | start, end = sorted([a, b]) 786 | 787 | # make an intersection with the relevant range to test against 788 | test_bm = bm.intersection(BitMap(range(start, end))) 789 | 790 | assert len(test_bm) == bm.range_cardinality(start, end) 791 | 792 | 793 | class TestManyOperations(Util): 794 | all_bitmaps: Iterable[AbstractBitMap] 795 | 796 | @given(hyp_collection, hyp_many_collections, st.booleans()) 797 | def test_update( 798 | self, 799 | initial_values: HypCollection, 800 | all_values: list[HypCollection], 801 | cow: bool, 802 | ) -> None: 803 | self.initial_bitmap = BitMap(initial_values, copy_on_write=cow) 804 | self.all_bitmaps = [BitMap(values, copy_on_write=cow) 805 | for values in all_values] 806 | self.initial_bitmap.update(*all_values) 807 | expected_result = functools.reduce( 808 | lambda x, y: x | y, self.all_bitmaps + [self.initial_bitmap]) 809 | assert expected_result == self.initial_bitmap 810 | assert type(expected_result) == type(self.initial_bitmap) 811 | 812 | @given(hyp_collection, hyp_many_collections, st.booleans()) 813 | def test_intersection_update( 814 | self, 815 | initial_values: HypCollection, 816 | all_values: list[HypCollection], 817 | cow: bool, 818 | ) -> None: 819 | self.initial_bitmap = BitMap(initial_values, copy_on_write=cow) 820 | self.all_bitmaps = [BitMap(values, copy_on_write=cow) 821 | for values in all_values] 822 | self.initial_bitmap.intersection_update(*all_values) 823 | expected_result = functools.reduce( 824 | lambda x, y: x & y, self.all_bitmaps + [self.initial_bitmap]) 825 | assert expected_result == self.initial_bitmap 826 | assert type(expected_result) == type(self.initial_bitmap) 827 | 828 | @given(bitmap_cls, st.data(), hyp_many_collections, st.booleans()) 829 | def test_union( 830 | self, 831 | cls: type[EitherBitMap], 832 | data: st.DataObject, 833 | all_values: list[HypCollection], 834 | cow: bool, 835 | ) -> None: 836 | classes = [data.draw(bitmap_cls) for _ in range(len(all_values))] 837 | self.all_bitmaps = [classes[i](values, copy_on_write=cow) 838 | for i, values in enumerate(all_values)] 839 | result = cls.union(*self.all_bitmaps) 840 | expected_result = functools.reduce( 841 | lambda x, y: x | y, self.all_bitmaps) 842 | assert expected_result == result 843 | 844 | @given(bitmap_cls, st.data(), hyp_many_collections, st.booleans()) 845 | def test_intersection( 846 | self, 847 | cls: type[EitherBitMap], 848 | data: st.DataObject, 849 | all_values: list[HypCollection], 850 | cow: bool, 851 | ) -> None: 852 | classes = [data.draw(bitmap_cls) for _ in range(len(all_values))] 853 | self.all_bitmaps = [classes[i](values, copy_on_write=cow) 854 | for i, values in enumerate(all_values)] 855 | result = cls.intersection(*self.all_bitmaps) 856 | expected_result = functools.reduce( 857 | lambda x, y: x & y, self.all_bitmaps) 858 | assert expected_result == result 859 | 860 | @given(bitmap_cls, st.data(), hyp_many_collections, st.booleans()) 861 | def test_difference( 862 | self, 863 | cls: type[EitherBitMap], 864 | data: st.DataObject, 865 | all_values: list[HypCollection], 866 | cow: bool, 867 | ) -> None: 868 | classes = [data.draw(bitmap_cls) for _ in range(len(all_values))] 869 | self.all_bitmaps = [classes[i](values, copy_on_write=cow) 870 | for i, values in enumerate(all_values)] 871 | result = cls.difference(*self.all_bitmaps) 872 | expected_result = functools.reduce( 873 | lambda x, y: x - y, self.all_bitmaps) 874 | assert expected_result == result 875 | 876 | 877 | class TestSerialization(Util): 878 | 879 | @given(bitmap_cls, bitmap_cls, hyp_collection) 880 | def test_serialization( 881 | self, 882 | cls1: type[EitherBitMap], 883 | cls2: type[EitherBitMap], 884 | values: HypCollection, 885 | ) -> None: 886 | old_bm = cls1(values) 887 | buff = old_bm.serialize() 888 | new_bm = cls2.deserialize(buff) 889 | assert old_bm == new_bm 890 | assert isinstance(new_bm, cls2) 891 | self.assert_is_not(old_bm, new_bm) 892 | 893 | @given(bitmap_cls, bitmap_cls, hyp_many_collections) 894 | def test_deserialization_from_memoryview( 895 | self, 896 | cls1: type[EitherBitMap], 897 | cls2: type[EitherBitMap], 898 | values: list[HypCollection] 899 | ) -> None: 900 | old_bms = [cls1(vals) for vals in values] 901 | 902 | # Create a memoryview with all of the items concatenated into a single bytes 903 | # object. 904 | serialized = [bm.serialize() for bm in old_bms] 905 | sizes = [len(ser) for ser in serialized] 906 | starts = [0] 907 | for s in sizes: 908 | starts.append(s + starts[-1]) 909 | 910 | combined = b''.join(serialized) 911 | mutable_combined = bytearray(combined) 912 | 913 | for source in (combined, mutable_combined): 914 | with memoryview(source) as mv: 915 | new_bms = [cls2.deserialize(mv[start: start + size])for start, size in zip(starts, sizes)] 916 | for old_bm, new_bm in zip(old_bms, new_bms): 917 | assert old_bm == new_bm 918 | assert isinstance(new_bm, cls2) 919 | self.assert_is_not(old_bm, new_bm) 920 | 921 | @given(bitmap_cls, hyp_collection, st.integers(min_value=2, max_value=pickle.HIGHEST_PROTOCOL)) 922 | def test_pickle_protocol( 923 | self, 924 | cls: type[EitherBitMap], 925 | values: HypCollection, 926 | protocol: int, 927 | ) -> None: 928 | old_bm = cls(values) 929 | pickled = pickle.dumps(old_bm, protocol=protocol) 930 | new_bm = pickle.loads(pickled) 931 | assert old_bm == new_bm 932 | self.assert_is_not(old_bm, new_bm) 933 | 934 | @given(bitmap_cls) 935 | def test_impossible_deserialization( 936 | self, 937 | cls: type[EitherBitMap], 938 | ) -> None: 939 | wrong_input = base64.b64decode('aGVsbG8gd29ybGQ=') 940 | with pytest.raises(ValueError, match='Could not deserialize bitmap'): 941 | bitmap = cls.deserialize(wrong_input) 942 | 943 | @given(bitmap_cls) 944 | def test_invalid_deserialization( 945 | self, 946 | cls: type[EitherBitMap], 947 | ) -> None: 948 | wrong_input = base64.b64decode('aGVsbG8gd29ybGQ=') 949 | bm = cls(list(range(0, 1000000, 3))) 950 | bitmap_bytes = bm.serialize() 951 | bitmap_bytes = bitmap_bytes[:42] + wrong_input + bitmap_bytes[42:] 952 | with pytest.raises(ValueError, match='Invalid bitmap after deserialization|Could not deserialize bitmap'): 953 | bitmap = cls.deserialize(bitmap_bytes) 954 | 955 | 956 | class TestStatistics(Util): 957 | 958 | @given(bitmap_cls, hyp_collection, st.booleans()) 959 | def test_basic_properties( 960 | self, 961 | cls: type[EitherBitMap], 962 | values: HypCollection, 963 | cow: bool, 964 | ) -> None: 965 | bitmap = cls(values, copy_on_write=cow) 966 | stats = bitmap.get_statistics() 967 | assert stats['n_values_array_containers'] + stats['n_values_bitset_containers'] \ 968 | + stats['n_values_run_containers'] == len(bitmap) 969 | assert stats['n_bytes_array_containers'] == \ 970 | 2 * stats['n_values_array_containers'] 971 | assert stats['n_bytes_bitset_containers'] == \ 972 | 2**13 * stats['n_bitset_containers'] 973 | if len(values) > 0: 974 | assert stats['min_value'] == bitmap[0] 975 | assert stats['max_value'] == bitmap[len(bitmap) - 1] 976 | assert stats['cardinality'] == len(bitmap) 977 | 978 | @given(bitmap_cls) 979 | def test_implementation_properties_array(self, cls: type[EitherBitMap]) -> None: 980 | values = range(2**16 - 10, 2**16 + 10, 2) 981 | stats = cls(values).get_statistics() 982 | assert stats['n_array_containers'] == 2 983 | assert stats['n_bitset_containers'] == 0 984 | assert stats['n_run_containers'] == 0 985 | assert stats['n_values_array_containers'] == len(values) 986 | assert stats['n_values_bitset_containers'] == 0 987 | assert stats['n_values_run_containers'] == 0 988 | 989 | @given(bitmap_cls) 990 | def test_implementation_properties_bitset(self, cls: type[EitherBitMap]) -> None: 991 | values = range(2**0, 2**17, 2) 992 | stats = cls(values).get_statistics() 993 | assert stats['n_array_containers'] == 0 994 | assert stats['n_bitset_containers'] == 2 995 | assert stats['n_run_containers'] == 0 996 | assert stats['n_values_array_containers'] == 0 997 | assert stats['n_values_bitset_containers'] == len(values) 998 | assert stats['n_values_run_containers'] == 0 999 | 1000 | @given(bitmap_cls) 1001 | def test_implementation_properties_run(self, cls: type[EitherBitMap]) -> None: 1002 | values = range(2**0, 2**17, 1) 1003 | stats = cls(values).get_statistics() 1004 | assert stats['n_array_containers'] == 0 1005 | assert stats['n_bitset_containers'] == 0 1006 | assert stats['n_run_containers'] == 2 1007 | assert stats['n_values_array_containers'] == 0 1008 | assert stats['n_values_bitset_containers'] == 0 1009 | assert stats['n_values_run_containers'] == len(values) 1010 | assert stats['n_bytes_run_containers'] == 12 1011 | 1012 | 1013 | class TestFlip(Util): 1014 | 1015 | def check_flip(self, bm_before: AbstractBitMap, bm_after: AbstractBitMap, start: int, end: int) -> None: 1016 | size = 100 1017 | iter_range = random.sample( 1018 | range(start, end), min(size, len(range(start, end)))) 1019 | iter_before = self.bitmap_sample(bm_before, min(size, len(bm_before))) 1020 | iter_after = self.bitmap_sample(bm_after, min(size, len(bm_after))) 1021 | for elt in iter_range: 1022 | if elt in bm_before: 1023 | assert elt not in bm_after 1024 | else: 1025 | assert elt in bm_after 1026 | for elt in iter_before: 1027 | if not (start <= elt < end): 1028 | assert elt in bm_after 1029 | for elt in iter_after: 1030 | if not (start <= elt < end): 1031 | assert elt in bm_before 1032 | 1033 | @given(bitmap_cls, hyp_collection, integer, integer, st.booleans()) 1034 | def test_flip_empty( 1035 | self, 1036 | cls: type[EitherBitMap], 1037 | values: HypCollection, 1038 | start: int, 1039 | end: int, 1040 | cow: bool, 1041 | ) -> None: 1042 | assume(start >= end) 1043 | bm_before = cls(values, copy_on_write=cow) 1044 | bm_copy = cls(bm_before) 1045 | bm_after = bm_before.flip(start, end) 1046 | assert bm_before == bm_copy 1047 | assert bm_before == bm_after 1048 | 1049 | @given(bitmap_cls, hyp_collection, integer, integer, st.booleans()) 1050 | def test_flip( 1051 | self, 1052 | cls: type[EitherBitMap], 1053 | values: HypCollection, 1054 | start: int, 1055 | end: int, 1056 | cow: bool, 1057 | ) -> None: 1058 | assume(start < end) 1059 | bm_before = cls(values, copy_on_write=cow) 1060 | bm_copy = cls(bm_before) 1061 | bm_after = bm_before.flip(start, end) 1062 | assert bm_before == bm_copy 1063 | self.check_flip(bm_before, bm_after, start, end) 1064 | 1065 | @given(hyp_collection, integer, integer, st.booleans()) 1066 | def test_flip_inplace_empty( 1067 | self, 1068 | values: HypCollection, 1069 | start: int, 1070 | end: int, 1071 | cow: bool, 1072 | ) -> None: 1073 | assume(start >= end) 1074 | bm_before = BitMap(values, copy_on_write=cow) 1075 | bm_after = BitMap(bm_before) 1076 | bm_after.flip_inplace(start, end) 1077 | assert bm_before == bm_after 1078 | 1079 | @given(hyp_collection, integer, integer, st.booleans()) 1080 | def test_flip_inplace( 1081 | self, 1082 | values: HypCollection, 1083 | start: int, 1084 | end: int, 1085 | cow: bool, 1086 | ) -> None: 1087 | assume(start < end) 1088 | bm_before = BitMap(values, copy_on_write=cow) 1089 | bm_after = BitMap(bm_before) 1090 | bm_after.flip_inplace(start, end) 1091 | self.check_flip(bm_before, bm_after, start, end) 1092 | 1093 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1094 | class TestShift(Util): 1095 | @given(bitmap_cls, hyp_collection, int64, st.booleans()) 1096 | def test_shift( 1097 | self, 1098 | cls: type[EitherBitMap], 1099 | values: HypCollection, 1100 | offset: int, 1101 | cow: bool, 1102 | ) -> None: 1103 | bm_before = cls(values, copy_on_write=cow) 1104 | bm_copy = cls(bm_before) 1105 | bm_after = bm_before.shift(offset) 1106 | assert bm_before == bm_copy 1107 | expected = cls([val + offset for val in values if val + offset in range(0, 2**32)], copy_on_write=cow) 1108 | assert bm_after == expected 1109 | 1110 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1111 | class TestIncompatibleInteraction(Util): 1112 | 1113 | def incompatible_op(self, op: Callable[[BitMap, BitMap], object]) -> None: 1114 | for cow1, cow2 in [(True, False), (False, True)]: 1115 | bm1 = BitMap(copy_on_write=cow1) 1116 | bm2 = BitMap(copy_on_write=cow2) 1117 | with pytest.raises(ValueError): 1118 | op(bm1, bm2) 1119 | 1120 | def test_incompatible_or(self) -> None: 1121 | self.incompatible_op(lambda x, y: x | y) 1122 | 1123 | def test_incompatible_and(self) -> None: 1124 | self.incompatible_op(lambda x, y: x & y) 1125 | 1126 | def test_incompatible_xor(self) -> None: 1127 | self.incompatible_op(lambda x, y: x ^ y) 1128 | 1129 | def test_incompatible_sub(self) -> None: 1130 | self.incompatible_op(lambda x, y: x - y) 1131 | 1132 | def test_incompatible_or_inplace(self) -> None: 1133 | self.incompatible_op(lambda x, y: x.__ior__(y)) 1134 | 1135 | def test_incompatible_and_inplace(self) -> None: 1136 | self.incompatible_op(lambda x, y: x.__iand__(y)) 1137 | 1138 | def test_incompatible_xor_inplace(self) -> None: 1139 | self.incompatible_op(lambda x, y: x.__ixor__(y)) 1140 | 1141 | def test_incompatible_sub_inplace(self) -> None: 1142 | self.incompatible_op(lambda x, y: x.__isub__(y)) 1143 | 1144 | def test_incompatible_eq(self) -> None: 1145 | self.incompatible_op(lambda x, y: x == y) 1146 | 1147 | def test_incompatible_neq(self) -> None: 1148 | self.incompatible_op(lambda x, y: x != y) 1149 | 1150 | def test_incompatible_le(self) -> None: 1151 | self.incompatible_op(lambda x, y: x <= y) 1152 | 1153 | def test_incompatible_lt(self) -> None: 1154 | self.incompatible_op(lambda x, y: x < y) 1155 | 1156 | def test_incompatible_ge(self) -> None: 1157 | self.incompatible_op(lambda x, y: x >= y) 1158 | 1159 | def test_incompatible_gt(self) -> None: 1160 | self.incompatible_op(lambda x, y: x > y) 1161 | 1162 | def test_incompatible_intersect(self) -> None: 1163 | self.incompatible_op(lambda x, y: x.intersect(y)) 1164 | 1165 | def test_incompatible_union(self) -> None: 1166 | self.incompatible_op(lambda x, y: BitMap.union(x, y)) 1167 | self.incompatible_op(lambda x, y: BitMap.union(x, x, y, y, x, x, y, y)) 1168 | 1169 | def test_incompatible_or_card(self) -> None: 1170 | self.incompatible_op(lambda x, y: x.union_cardinality(y)) 1171 | 1172 | def test_incompatible_and_card(self) -> None: 1173 | self.incompatible_op(lambda x, y: x.intersection_cardinality(y)) 1174 | 1175 | def test_incompatible_xor_card(self) -> None: 1176 | self.incompatible_op(lambda x, y: x.symmetric_difference_cardinality(y)) 1177 | 1178 | def test_incompatible_sub_card(self) -> None: 1179 | self.incompatible_op(lambda x, y: x.difference_cardinality(y)) 1180 | 1181 | def test_incompatible_jaccard(self) -> None: 1182 | self.incompatible_op(lambda x, y: x.jaccard_index(y)) 1183 | 1184 | 1185 | class TestBitMap: 1186 | @given(hyp_collection, uint32) 1187 | def test_iter_equal_or_larger(self, values: HypCollection, other_value: int) -> None: 1188 | bm = BitMap(values) 1189 | bm_iter = bm.iter_equal_or_larger(other_value) 1190 | expected = [i for i in values if i >= other_value] 1191 | expected.sort() 1192 | 1193 | observed = list(bm_iter) 1194 | assert expected == observed 1195 | 1196 | def test_unashability(self) -> None: 1197 | bm = BitMap() 1198 | with pytest.raises(TypeError): 1199 | hash(bm) 1200 | 1201 | 1202 | class TestFrozen: 1203 | 1204 | @given(hyp_collection, hyp_collection, integer) 1205 | def test_immutability(self, values: HypCollection, raw_other: HypCollection, number: int) -> None: 1206 | frozen = FrozenBitMap(values) 1207 | copy = FrozenBitMap(values) 1208 | other = BitMap(raw_other) 1209 | with pytest.raises(AttributeError): 1210 | frozen.clear() # type: ignore[attr-defined] 1211 | with pytest.raises(AttributeError): 1212 | frozen.pop() # type: ignore[attr-defined] 1213 | with pytest.raises(AttributeError): 1214 | frozen.add(number) # type: ignore[attr-defined] 1215 | with pytest.raises(AttributeError): 1216 | frozen.update(other) # type: ignore[attr-defined] 1217 | with pytest.raises(AttributeError): 1218 | frozen.discard(number) # type: ignore[attr-defined] 1219 | with pytest.raises(AttributeError): 1220 | frozen.remove(number) # type: ignore[attr-defined] 1221 | with pytest.raises(AttributeError): 1222 | frozen.intersection_update(other) # type: ignore[attr-defined] 1223 | with pytest.raises(AttributeError): 1224 | frozen.difference_update(other) # type: ignore[attr-defined] 1225 | with pytest.raises(AttributeError): 1226 | frozen.symmetric_difference_update(other) # type: ignore[attr-defined] 1227 | with pytest.raises(AttributeError): 1228 | frozen.update(number, number + 10) # type: ignore[attr-defined] 1229 | with pytest.raises(AttributeError): 1230 | frozen.overwrite(other) # type: ignore[attr-defined] 1231 | assert frozen == copy 1232 | 1233 | @given(hyp_collection, hyp_collection) 1234 | def test_hash_uneq(self, values1: HypCollection, values2: HypCollection) -> None: 1235 | """This test as a non null (but extremly low) probability to fail.""" 1236 | bitmap1 = FrozenBitMap(values1) 1237 | bitmap2 = FrozenBitMap(values2) 1238 | assume(bitmap1 != bitmap2) 1239 | h1 = hash(bitmap1) 1240 | h2 = hash(bitmap2) 1241 | hd = hash(bitmap1 ^ bitmap2) 1242 | hashes = [h1, h2, hd] 1243 | nb_collisions = len(hashes) - len(set(hashes)) 1244 | assert 1 >= nb_collisions 1245 | 1246 | @given(hyp_collection) 1247 | def test_hash_eq(self, values: HypCollection) -> None: 1248 | bitmap1 = FrozenBitMap(values) 1249 | bitmap2 = FrozenBitMap(values) 1250 | bitmap3 = FrozenBitMap(bitmap1) 1251 | assert hash(bitmap1) == hash(bitmap2) 1252 | assert hash(bitmap1) == hash(bitmap3) 1253 | 1254 | def test_hash_eq2(self) -> None: 1255 | """It can happen that two bitmaps hold the same values but have a different data structure. They should still 1256 | have a same hash. 1257 | This test compares two bitmaps with the same values, one has a run container, the other has an array container.""" 1258 | n = 100 1259 | bm1 = FrozenBitMap(range(n)) 1260 | bm2 = BitMap() 1261 | for i in range(n): 1262 | bm2.add(i) 1263 | bm2 = FrozenBitMap(bm2, optimize=False) # type: ignore[assignment] 1264 | assert bm1 == bm2 1265 | assert bm1.get_statistics() != bm2.get_statistics() 1266 | assert hash(bm1) == hash(bm2) 1267 | 1268 | def test_hash_eq_after_operations(self) -> None: 1269 | """Testing that bitmaps have the same hash even when they have been obtained after some operations. 1270 | Test for issue #129. 1271 | """ 1272 | ref_hash = hash(FrozenBitMap([2, 3])) 1273 | assert ref_hash == hash(FrozenBitMap([1, 2, 3]) & FrozenBitMap([2, 3, 4])) 1274 | assert ref_hash == hash(FrozenBitMap([1, 2, 3]).intersection(FrozenBitMap([2, 3, 4]), FrozenBitMap([0, 2, 3]))) 1275 | assert ref_hash == hash(FrozenBitMap([2]) | FrozenBitMap([3])) 1276 | assert ref_hash == hash(FrozenBitMap([2]).union(FrozenBitMap([3]), FrozenBitMap())) 1277 | assert ref_hash == hash(FrozenBitMap([1, 2, 3, 4]) - FrozenBitMap([1, 4])) 1278 | assert ref_hash == hash(FrozenBitMap([1, 2, 3, 4]).difference(FrozenBitMap([1]), FrozenBitMap([4]))) 1279 | 1280 | class TestOptimization: 1281 | 1282 | @given(bitmap_cls) 1283 | def test_run_optimize(self, cls: type[EitherBitMap]) -> None: 1284 | bm1 = BitMap() 1285 | size = 1000 1286 | for i in range(size): 1287 | bm1.add(i) 1288 | bm2 = cls(bm1, optimize=False) 1289 | stats = bm2.get_statistics() 1290 | assert bm1.get_statistics() == stats 1291 | assert stats['n_containers'] == stats['n_array_containers'] 1292 | assert stats['n_values_array_containers'] == size 1293 | assert bm2.run_optimize() 1294 | stats = bm2.get_statistics() 1295 | assert stats['n_containers'] == stats['n_run_containers'] 1296 | assert stats['n_values_run_containers'] == size 1297 | bm3 = cls(bm1) # optimize is True by default 1298 | assert stats == bm3.get_statistics() 1299 | 1300 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1301 | @given(bitmap_cls) 1302 | def test_shrink_to_fit(self, cls: type[EitherBitMap]) -> None: 1303 | bm1 = BitMap() 1304 | size = 1000 1305 | for i in range(size): 1306 | bm1.add(i) 1307 | bm2 = cls(bm1, optimize=False) 1308 | assert bm2.shrink_to_fit() > 0 1309 | assert bm2.shrink_to_fit() == 0 1310 | bm3 = cls(bm1, optimize=True) 1311 | assert bm3.shrink_to_fit() == 0 1312 | 1313 | 1314 | small_integer = st.integers(min_value=0, max_value=200) 1315 | small_integer_list = st.lists(min_size=0, max_size=2000, elements=small_integer) 1316 | 1317 | 1318 | class TestPythonSetEquivalent: 1319 | """ 1320 | The main goal of this class is to make sure the BitMap api is a superset of the python builtin set api. 1321 | """ 1322 | 1323 | @given(bitmap_cls, small_integer_list, st.booleans()) 1324 | def test_convert_to_set(self, BitMapClass: type[EitherBitMap], list1: list[int], cow: bool) -> None: 1325 | """ 1326 | Most of the tests depend on a working implementation for converting from BitMap to python set. 1327 | This test sanity checks it. 1328 | 1329 | This test should be modified or removed if you want to run PythonSetEquivalentTest with integers drawn from 1330 | a larger set than `small_integer`. It will become prohibitively time-consuming. 1331 | """ 1332 | if BitMapClass == BitMap: 1333 | SetClass: type[EitherSet] = set 1334 | elif BitMapClass == FrozenBitMap: 1335 | SetClass = frozenset 1336 | else: 1337 | raise AssertionError() 1338 | 1339 | s1 = SetClass(list1) 1340 | b1 = BitMapClass(list1, copy_on_write=cow) 1341 | 1342 | converted_set = SetClass(b1) 1343 | 1344 | try: 1345 | min_value = min(s1) 1346 | except ValueError: 1347 | min_value = 0 1348 | 1349 | try: 1350 | max_value = max(s1) + 1 1351 | except ValueError: 1352 | max_value = 200 + 1 1353 | 1354 | for i in range(min_value, max_value): 1355 | assert (i in s1) == (i in converted_set) 1356 | 1357 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1358 | def test_difference(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1359 | if BitMapClass == BitMap: 1360 | SetClass: type[EitherSet] = set 1361 | elif BitMapClass == FrozenBitMap: 1362 | SetClass = frozenset 1363 | else: 1364 | raise AssertionError() 1365 | 1366 | s1 = SetClass(list1) 1367 | s2 = SetClass(list2) 1368 | 1369 | b1 = BitMapClass(list1, copy_on_write=cow) 1370 | b2 = BitMapClass(list2, copy_on_write=cow) 1371 | 1372 | assert s1.difference(s2) == set(b1.difference(b2)) 1373 | assert SetClass.difference(s1, s2) == set(BitMapClass.difference(b1, b2))# type: ignore[arg-type] 1374 | assert (s1 - s2) == set(b1 - b2) 1375 | assert b1 - b2 == b1.difference(b2) 1376 | 1377 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1378 | def test_symmetric_difference(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1379 | if BitMapClass == BitMap: 1380 | SetClass: type[EitherSet] = set 1381 | elif BitMapClass == FrozenBitMap: 1382 | SetClass = frozenset 1383 | else: 1384 | raise AssertionError() 1385 | 1386 | s1 = SetClass(list1) 1387 | s2 = SetClass(list2) 1388 | 1389 | b1 = BitMapClass(list1, copy_on_write=cow) 1390 | b2 = BitMapClass(list2, copy_on_write=cow) 1391 | 1392 | assert s1.symmetric_difference(s2) == set(b1.symmetric_difference(b2)) 1393 | assert SetClass.symmetric_difference(s1, s2) == set(BitMapClass.symmetric_difference(b1, b2))# type: ignore[arg-type] 1394 | 1395 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1396 | def test_union(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1397 | if BitMapClass == BitMap: 1398 | SetClass: type[EitherSet] = set 1399 | elif BitMapClass == FrozenBitMap: 1400 | SetClass = frozenset 1401 | else: 1402 | raise AssertionError() 1403 | 1404 | s1 = SetClass(list1) 1405 | s2 = SetClass(list2) 1406 | 1407 | b1 = BitMapClass(list1, copy_on_write=cow) 1408 | b2 = BitMapClass(list2, copy_on_write=cow) 1409 | 1410 | assert s1.union(s2) == set(b1.union(b2)) 1411 | assert SetClass.union(s1, s2) == set(BitMapClass.union(b1, b2))# type: ignore[arg-type] 1412 | assert (s1 | s2) == set(b1 | b2) 1413 | assert b1 | b2 == b1.union(b2) 1414 | 1415 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1416 | def test_issubset(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1417 | if BitMapClass == BitMap: 1418 | SetClass: type[EitherSet] = set 1419 | elif BitMapClass == FrozenBitMap: 1420 | SetClass = frozenset 1421 | else: 1422 | raise AssertionError() 1423 | 1424 | s1 = SetClass(list1) 1425 | s2 = SetClass(list2) 1426 | 1427 | b1 = BitMapClass(list1, copy_on_write=cow) 1428 | b2 = BitMapClass(list2, copy_on_write=cow) 1429 | 1430 | assert s1.issubset(s2) == b1.issubset(b2) 1431 | assert SetClass.issubset(s1, s2) == BitMapClass.issubset(b1, b2)# type: ignore[arg-type] 1432 | 1433 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1434 | def test_le(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1435 | if BitMapClass == BitMap: 1436 | SetClass: type[EitherSet] = set 1437 | elif BitMapClass == FrozenBitMap: 1438 | SetClass = frozenset 1439 | else: 1440 | raise AssertionError() 1441 | 1442 | s1 = SetClass(list1) 1443 | s2 = SetClass(list2) 1444 | 1445 | b1 = BitMapClass(list1, copy_on_write=cow) 1446 | b2 = BitMapClass(list2, copy_on_write=cow) 1447 | 1448 | assert s1.__le__(s2) == b1.__le__(b2) 1449 | assert SetClass.__le__(s1, s2) == BitMapClass.__le__(b1, b2)# type: ignore[operator] 1450 | assert (s1 <= s2) == (b1 <= b2) 1451 | 1452 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1453 | def test_ge(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1454 | if BitMapClass == BitMap: 1455 | SetClass: type[EitherSet] = set 1456 | elif BitMapClass == FrozenBitMap: 1457 | SetClass = frozenset 1458 | else: 1459 | raise AssertionError() 1460 | 1461 | s1 = SetClass(list1) 1462 | s2 = SetClass(list2) 1463 | 1464 | b1 = BitMapClass(list1, copy_on_write=cow) 1465 | b2 = BitMapClass(list2, copy_on_write=cow) 1466 | 1467 | assert s1.__ge__(s2) == b1.__ge__(b2) 1468 | assert SetClass.__ge__(s1, s2) == BitMapClass.__ge__(b1, b2)# type: ignore[operator] 1469 | assert (s1 >= s2) == (b1 >= b2) 1470 | 1471 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1472 | def test_eq(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1473 | if BitMapClass == BitMap: 1474 | SetClass: type[EitherSet] = set 1475 | elif BitMapClass == FrozenBitMap: 1476 | SetClass = frozenset 1477 | else: 1478 | raise AssertionError() 1479 | s1 = SetClass(list1) 1480 | s2 = SetClass(list2) 1481 | 1482 | b1 = BitMapClass(list1, copy_on_write=cow) 1483 | b2 = BitMapClass(list2, copy_on_write=cow) 1484 | 1485 | assert s1.__eq__(s2) == b1.__eq__(b2) 1486 | assert SetClass.__eq__(s1, s2) == BitMapClass.__eq__(b1, b2)# type: ignore[operator] 1487 | assert (s1 == s2) == (b1 == b2) 1488 | 1489 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1490 | def test_issuperset(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1491 | if BitMapClass == BitMap: 1492 | SetClass: type[EitherSet] = set 1493 | elif BitMapClass == FrozenBitMap: 1494 | SetClass = frozenset 1495 | else: 1496 | raise AssertionError() 1497 | 1498 | s1 = SetClass(list1) 1499 | s2 = SetClass(list2) 1500 | 1501 | b1 = BitMapClass(list1, copy_on_write=cow) 1502 | b2 = BitMapClass(list2, copy_on_write=cow) 1503 | 1504 | assert s1.issuperset(s2) == b1.issuperset(b2) 1505 | assert SetClass.issuperset(s1, s2) == BitMapClass.issuperset(b1, b2)# type: ignore[arg-type] 1506 | 1507 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1508 | def test_isdisjoint(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1509 | if BitMapClass == BitMap: 1510 | SetClass: type[EitherSet] = set 1511 | elif BitMapClass == FrozenBitMap: 1512 | SetClass = frozenset 1513 | else: 1514 | raise AssertionError() 1515 | 1516 | s1 = SetClass(list1) 1517 | s2 = SetClass(list2) 1518 | 1519 | b1 = BitMapClass(list1, copy_on_write=cow) 1520 | b2 = BitMapClass(list2, copy_on_write=cow) 1521 | 1522 | assert s1.isdisjoint(s2) == b1.isdisjoint(b2) 1523 | assert SetClass.isdisjoint(s1, s2) == BitMapClass.isdisjoint(b1, b2)# type: ignore[arg-type] 1524 | 1525 | @given(small_integer_list, st.booleans()) 1526 | def test_clear(self, list1: list[int], cow: bool) -> None: 1527 | b1 = BitMap(list1, copy_on_write=cow) 1528 | b1.clear() 1529 | assert len(b1) == 0 1530 | 1531 | @given(small_integer_list, st.booleans()) 1532 | def test_pop(self, list1: list[int], cow: bool) -> None: 1533 | b1 = BitMap(list1, copy_on_write=cow) 1534 | starting_length = len(b1) 1535 | if starting_length >= 1: 1536 | popped_element = b1.pop() 1537 | assert len(b1) == starting_length - 1# length decreased by one 1538 | assert not popped_element in b1# and element isn't in the BitMap anymore 1539 | else: 1540 | with pytest.raises(KeyError): 1541 | b1.pop() 1542 | 1543 | @given(bitmap_cls, small_integer_list, st.booleans()) 1544 | def test_copy(self, BitMapClass: type[EitherBitMap], list1: list[int], cow: bool) -> None: 1545 | b1 = BitMapClass(list1, copy_on_write=cow) 1546 | b2 = b1.copy() 1547 | assert b2 == b1 1548 | 1549 | @given(small_integer_list, st.booleans()) 1550 | def test_copy_writable(self, list1: list[int], cow: bool) -> None: 1551 | b1 = BitMap(list1, copy_on_write=cow) 1552 | b2 = b1.copy() 1553 | 1554 | try: 1555 | new_element = max(b1) + 1 # doesn't exist in the set 1556 | except ValueError: 1557 | new_element = 1 1558 | 1559 | b2.add(new_element) 1560 | 1561 | assert new_element in b2 1562 | assert new_element not in b1 1563 | 1564 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1565 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1566 | def test_overwrite(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1567 | assume(set(list1) != set(list2)) 1568 | b1 = BitMap(list1, copy_on_write=cow) 1569 | orig1 = b1.copy() 1570 | b2 = BitMapClass(list2, copy_on_write=cow) 1571 | orig2 = b2.copy() 1572 | b1.overwrite(b2) 1573 | assert b1 == b2# the two bitmaps are now equal 1574 | assert b1 != orig1# the first bitmap has been modified 1575 | assert b2 == orig2# the second bitmap was left untouched 1576 | with pytest.raises(ValueError): 1577 | b1.overwrite(b1) 1578 | 1579 | @given(small_integer_list, small_integer_list, st.booleans()) 1580 | def test_difference_update(self, list1: list[int], list2: list[int], cow: bool) -> None: 1581 | s1 = set(list1) 1582 | s2 = set(list2) 1583 | s1.difference_update(s2) 1584 | 1585 | b1 = BitMap(list1, copy_on_write=cow) 1586 | b2 = BitMap(list2, copy_on_write=cow) 1587 | b1.difference_update(b2) 1588 | 1589 | assert s1 == set(b1) 1590 | 1591 | @given(small_integer_list, small_integer_list, st.booleans()) 1592 | def test_symmetric_difference_update(self, list1: list[int], list2: list[int], cow: bool) -> None: 1593 | s1 = set(list1) 1594 | s2 = set(list2) 1595 | s1.symmetric_difference_update(s2) 1596 | 1597 | b1 = BitMap(list1, copy_on_write=cow) 1598 | b2 = BitMap(list2, copy_on_write=cow) 1599 | b1.symmetric_difference_update(b2) 1600 | 1601 | assert s1 == set(b1) 1602 | 1603 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1604 | def test_dunder(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1605 | """ 1606 | Tests for &|^- 1607 | """ 1608 | if BitMapClass == BitMap: 1609 | SetClass: type[EitherSet] = set 1610 | elif BitMapClass == FrozenBitMap: 1611 | SetClass = frozenset 1612 | else: 1613 | raise AssertionError() 1614 | 1615 | s1 = SetClass(list1) 1616 | s2 = SetClass(list2) 1617 | 1618 | b1 = BitMapClass(list1, copy_on_write=cow) 1619 | b2 = BitMapClass(list2, copy_on_write=cow) 1620 | 1621 | assert s1.__and__(s2) == SetClass(b1.__and__(b2)) 1622 | assert s1.__or__(s2) == SetClass(b1.__or__(b2)) 1623 | assert s1.__xor__(s2) == SetClass(b1.__xor__(b2)) 1624 | assert s1.__sub__(s2) == SetClass(b1.__sub__(b2)) 1625 | 1626 | @given(small_integer_list, small_integer, st.booleans()) 1627 | def test_add(self, list1: list[int], value: int, cow: bool) -> None: 1628 | s1 = set(list1) 1629 | b1 = BitMap(list1, copy_on_write=cow) 1630 | assert s1 == set(b1) 1631 | 1632 | s1.add(value) 1633 | b1.add(value) 1634 | assert s1 == set(b1) 1635 | 1636 | @given(small_integer_list, small_integer, st.booleans()) 1637 | def test_discard(self, list1: list[int], value: int, cow: bool) -> None: 1638 | s1 = set(list1) 1639 | b1 = BitMap(list1, copy_on_write=cow) 1640 | assert s1 == set(b1) 1641 | 1642 | s1.discard(value) 1643 | b1.discard(value) 1644 | assert s1 == set(b1) 1645 | 1646 | @given(small_integer_list, small_integer, st.booleans()) 1647 | def test_remove(self, list1: list[int], value: int, cow: bool) -> None: 1648 | s1 = set(list1) 1649 | b1 = BitMap(list1, copy_on_write=cow) 1650 | assert s1 == set(b1) 1651 | 1652 | s1_raised = False 1653 | b1_raised = False 1654 | try: 1655 | s1.remove(value) 1656 | except KeyError: 1657 | s1_raised = True 1658 | 1659 | try: 1660 | b1.remove(value) 1661 | except KeyError: 1662 | b1_raised = True 1663 | 1664 | assert s1 == set(b1) 1665 | assert s1_raised == b1_raised# Either both raised exception or neither did 1666 | 1667 | @given(bitmap_cls, small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1668 | def test_nary_union(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1669 | if BitMapClass == BitMap: 1670 | SetClass: type[EitherSet] = set 1671 | elif BitMapClass == FrozenBitMap: 1672 | SetClass = frozenset 1673 | else: 1674 | raise AssertionError() 1675 | 1676 | s1 = SetClass(list1) 1677 | s2 = SetClass(list2) 1678 | s3 = SetClass(list3) 1679 | 1680 | b1 = BitMapClass(list1, copy_on_write=cow) 1681 | b2 = BitMapClass(list2, copy_on_write=cow) 1682 | b3 = BitMapClass(list3, copy_on_write=cow) 1683 | 1684 | assert SetClass.union(s1, s2, s3) == SetClass(BitMapClass.union(b1, b2, b3))# type: ignore[arg-type] 1685 | assert s1.union(s2, s3) == SetClass(b1.union(b2, b3)) 1686 | 1687 | @given(bitmap_cls, small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1688 | def test_nary_difference(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1689 | if BitMapClass == BitMap: 1690 | SetClass: type[EitherSet] = set 1691 | elif BitMapClass == FrozenBitMap: 1692 | SetClass = frozenset 1693 | else: 1694 | raise AssertionError() 1695 | 1696 | s1 = SetClass(list1) 1697 | s2 = SetClass(list2) 1698 | s3 = SetClass(list3) 1699 | 1700 | b1 = BitMapClass(list1, copy_on_write=cow) 1701 | b2 = BitMapClass(list2, copy_on_write=cow) 1702 | b3 = BitMapClass(list3, copy_on_write=cow) 1703 | 1704 | assert SetClass.difference(s1, s2, s3) == SetClass(BitMapClass.difference(b1, b2, b3))# type: ignore[arg-type] 1705 | assert s1.difference(s2, s3) == SetClass(b1.difference(b2, b3)) 1706 | 1707 | @given(bitmap_cls, small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1708 | def test_nary_intersection(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1709 | if BitMapClass == BitMap: 1710 | SetClass: type[EitherSet] = set 1711 | elif BitMapClass == FrozenBitMap: 1712 | SetClass = frozenset 1713 | else: 1714 | raise AssertionError() 1715 | 1716 | s1 = SetClass(list1) 1717 | s2 = SetClass(list2) 1718 | s3 = SetClass(list3) 1719 | 1720 | b1 = BitMapClass(list1, copy_on_write=cow) 1721 | b2 = BitMapClass(list2, copy_on_write=cow) 1722 | b3 = BitMapClass(list3, copy_on_write=cow) 1723 | 1724 | assert SetClass.intersection(s1, s2, s3) == SetClass(BitMapClass.intersection(b1, b2, b3))# type: ignore[arg-type] 1725 | assert s1.intersection(s2, s3) == SetClass(b1.intersection(b2, b3)) 1726 | 1727 | @given(small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1728 | def test_nary_intersection_update(self, list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1729 | s1 = set(list1) 1730 | s2 = set(list2) 1731 | s3 = set(list3) 1732 | 1733 | b1 = BitMap(list1, copy_on_write=cow) 1734 | b2 = BitMap(list2, copy_on_write=cow) 1735 | b3 = BitMap(list3, copy_on_write=cow) 1736 | 1737 | set.intersection_update(s1, s2, s3) 1738 | BitMap.intersection_update(b1, b2, b3) 1739 | assert s1 == set(b1) 1740 | 1741 | s1 = set(list1) 1742 | s2 = set(list2) 1743 | s3 = set(list3) 1744 | 1745 | b1 = BitMap(list1, copy_on_write=cow) 1746 | b2 = BitMap(list2, copy_on_write=cow) 1747 | b3 = BitMap(list3, copy_on_write=cow) 1748 | 1749 | s1.intersection_update(s2, s3) 1750 | b1.intersection_update(b2, b3) 1751 | 1752 | assert s1 == set(b1) 1753 | 1754 | @given(small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1755 | def test_nary_difference_update(self, list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1756 | s1 = set(list1) 1757 | s2 = set(list2) 1758 | s3 = set(list3) 1759 | 1760 | b1 = BitMap(list1, copy_on_write=cow) 1761 | b2 = BitMap(list2, copy_on_write=cow) 1762 | b3 = BitMap(list3, copy_on_write=cow) 1763 | 1764 | set.difference_update(s1, s2, s3) 1765 | BitMap.difference_update(b1, b2, b3) 1766 | assert s1 == set(b1) 1767 | 1768 | s1 = set(list1) 1769 | s2 = set(list2) 1770 | s3 = set(list3) 1771 | 1772 | b1 = BitMap(list1, copy_on_write=cow) 1773 | b2 = BitMap(list2, copy_on_write=cow) 1774 | b3 = BitMap(list3, copy_on_write=cow) 1775 | 1776 | s1.difference_update(s2, s3) 1777 | b1.difference_update(b2, b3) 1778 | 1779 | assert s1 == set(b1) 1780 | 1781 | @given(small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1782 | def test_nary_update(self, list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1783 | s1 = set(list1) 1784 | s2 = set(list2) 1785 | s3 = set(list3) 1786 | 1787 | b1 = BitMap(list1, copy_on_write=cow) 1788 | b2 = BitMap(list2, copy_on_write=cow) 1789 | b3 = BitMap(list3, copy_on_write=cow) 1790 | 1791 | set.update(s1, s2, s3) 1792 | BitMap.update(b1, b2, b3) 1793 | assert s1 == set(b1) 1794 | 1795 | s1 = set(list1) 1796 | s2 = set(list2) 1797 | s3 = set(list3) 1798 | 1799 | b1 = BitMap(list1, copy_on_write=cow) 1800 | b2 = BitMap(list2, copy_on_write=cow) 1801 | b3 = BitMap(list3, copy_on_write=cow) 1802 | 1803 | s1.update(s2, s3) 1804 | b1.update(b2, b3) 1805 | 1806 | assert s1 == set(b1) 1807 | 1808 | 1809 | small_list_of_uin32 = st.lists(min_size=0, max_size=400, elements=uint32) 1810 | large_list_of_uin32 = st.lists(min_size=600, max_size=1000, elements=uint32, unique=True) 1811 | 1812 | 1813 | class TestString: 1814 | 1815 | @given(bitmap_cls, small_list_of_uin32) 1816 | def test_small_list(self, cls: type[EitherBitMap], collection: list[int]) -> None: 1817 | # test that repr for a small bitmap is equal to the original bitmap 1818 | bm = cls(collection) 1819 | string_repr = repr(bm) 1820 | if not is_32_bits: 1821 | string_repr = string_repr.replace("BitMap64", "BitMap") # we redefined BitMap64 to BitMap at the top of this file 1822 | assert bm == eval(string_repr) 1823 | 1824 | @settings(suppress_health_check=HealthCheck) 1825 | @given(bitmap_cls, large_list_of_uin32) 1826 | def test_large_list(self, cls: type[EitherBitMap], collection: list[int]) -> None: 1827 | # test that for a large bitmap the both the start and the end of the bitmap get printed 1828 | 1829 | bm = cls(collection) 1830 | s = repr(bm).replace(cls.__name__, " ") 1831 | nondigits = set(s) - set('0123456789\n.') 1832 | for x in nondigits: 1833 | s = s.replace(x, ' ') 1834 | 1835 | small, large = s.split('...') 1836 | small_ints = [int(i) for i in small.split()] 1837 | large_ints = [int(i) for i in large.split()] 1838 | 1839 | for i in small_ints: 1840 | assert i in bm 1841 | 1842 | for i in large_ints: 1843 | assert i in bm 1844 | 1845 | assert min(small_ints) == bm.min() 1846 | assert max(large_ints) == bm.max() 1847 | 1848 | 1849 | class TestVersion: 1850 | def assert_regex(self, pattern: str, text: str) -> None: 1851 | matches = re.findall(pattern, text) 1852 | if len(matches) != 1 or matches[0] != text: 1853 | pytest.fail('Regex "%s" does not match text "%s".' % (pattern, text)) 1854 | 1855 | def test_version(self) -> None: 1856 | self.assert_regex(r'\d+\.\d+\.\d+(?:\.dev\d+)?', pyroaring.__version__) 1857 | self.assert_regex(r'v\d+\.\d+\.\d+', pyroaring.__croaring_version__) 1858 | 1859 | 1860 | if __name__ == "__main__": 1861 | unittest.main() 1862 | --------------------------------------------------------------------------------