├── .gitattributes ├── .github └── workflows │ ├── buildwheels.yml │ └── test.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── LICENSE ├── MANIFEST.in ├── Pipfile ├── README.rst ├── cydoctest.py ├── docs ├── Makefile ├── conf.py ├── index.rst └── requirements.txt ├── download_amalgamation.py ├── pyroaring ├── __init__.pyi ├── abstract_bitmap.pxi ├── bitmap.pxi ├── croaring.pxd ├── croaring_version.pxi ├── frozen_bitmap.pxi ├── py.typed ├── pyroaring.pyx ├── roaring.c ├── roaring.h └── version.pxi ├── quick_bench.py ├── setup.cfg ├── setup.py ├── test.py ├── test_state_machine.py └── tox.ini /.gitattributes: -------------------------------------------------------------------------------- 1 | pyroaring/roaring.c -diff 2 | pyroaring/roaring.h -diff -------------------------------------------------------------------------------- /.github/workflows/buildwheels.yml: -------------------------------------------------------------------------------- 1 | # This is based on the cibuildwheel example at 2 | # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml 3 | # 4 | # This workflow builds and tests wheels across multiple platforms using 5 | # cibuildwheel and creates the release sdist. Config not specified here can 6 | # be found in pyproject.toml 7 | 8 | name: Build and upload wheels 9 | 10 | on: 11 | workflow_dispatch: 12 | push: 13 | branches: 14 | - master 15 | release: 16 | types: 17 | - released 18 | - prereleased 19 | 20 | jobs: 21 | 22 | build_sdist: 23 | name: Build source distribution 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v3 27 | 28 | - name: Build sdist 29 | run: pipx run build --sdist 30 | 31 | - uses: actions/upload-artifact@v4 32 | with: 33 | path: dist/*.tar.gz 34 | name: cibw-sdist 35 | 36 | build_wheels: 37 | name: Build wheels on ${{ matrix.os }} 38 | runs-on: ${{ matrix.os }} 39 | strategy: 40 | fail-fast: false 41 | matrix: 42 | # macos-13 is an intel runner, macos-14 is apple silicon 43 | os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, macos-13, macos-14] 44 | 45 | steps: 46 | - uses: actions/checkout@v3 47 | - name: Set up QEMU 48 | if: runner.os == 'Linux' && runner.arch == 'X64' 49 | uses: docker/setup-qemu-action@v3 50 | with: 51 | platforms: all 52 | - name: Build wheels 53 | uses: pypa/cibuildwheel@v2.23.2 54 | env: 55 | CIBW_TEST_REQUIRES: hypothesis pytest 56 | CIBW_TEST_COMMAND: "python {project}/cydoctest.py -v" # full test command: py.test {project}/test.py -v 57 | CIBW_SKIP: "pp* cp36-* cp37-*" 58 | CIBW_ARCHS_LINUX: ${{ runner.arch == 'X64' && 'auto' || 'auto armv7l' }} 59 | CIBW_ARCHS_MACOS: ${{ runner.arch == 'X64' && 'auto' || 'auto universal2' }} 60 | CIBW_ARCHS_WINDOWS: "auto ARM64" 61 | CIBW_TEST_SKIP: "*-win_arm64" 62 | CIBW_BUILD_FRONTEND: "build" 63 | 64 | - uses: actions/upload-artifact@v4 65 | with: 66 | path: ./wheelhouse/*.whl 67 | name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} 68 | 69 | upload_pypi: 70 | needs: [build_wheels, build_sdist] 71 | runs-on: ubuntu-latest 72 | steps: 73 | - uses: actions/download-artifact@v4 74 | with: 75 | pattern: cibw-* 76 | path: dist 77 | merge-multiple: true 78 | 79 | - uses: pypa/gh-action-pypi-publish@v1.5.0 80 | # Deploy releases to pypi. 81 | if: github.event_name == 'release' && github.event.action == 'released' 82 | with: 83 | user: __token__ 84 | password: ${{ secrets.PYPI_API_TOKEN }} 85 | 86 | - uses: pypa/gh-action-pypi-publish@v1.5.0 87 | # Deploy pre-releases to test pypi. 88 | if: github.event_name == 'release' && github.event.action == 'prereleased' 89 | with: 90 | user: __token__ 91 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 92 | repository_url: https://test.pypi.org/legacy/ 93 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push,pull_request,workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | os: [ubuntu-latest, macos-latest, windows-latest] 12 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] 13 | steps: 14 | - name: Set up the repository 15 | uses: actions/checkout@v3 16 | with: 17 | submodules: recursive 18 | fetch-depth: 0 19 | - name: Set up Python ${{ matrix.python-version }} 20 | uses: actions/setup-python@v5.1.1 21 | with: 22 | python-version: ${{ matrix.python-version }} 23 | allow-prereleases: true 24 | - name: Install dependencies 25 | run: | 26 | pip install --upgrade pip 27 | pip install tox 28 | - name: Local build and tests (32 bits roaring bitmaps) 29 | env: 30 | HYPOTHESIS_PROFILE: ci 31 | ROARING_BITSIZE: 32 32 | run: | 33 | tox 34 | - name: Local build and tests (64 bits roaring bitmaps) 35 | env: 36 | HYPOTHESIS_PROFILE: ci 37 | ROARING_BITSIZE: 64 38 | run: | 39 | tox 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .hypothesis 3 | *.pyc 4 | *.tex 5 | *.aux 6 | *.log 7 | *.pickle 8 | *.pdf 9 | *.so 10 | .tox 11 | wheel 12 | build 13 | dist 14 | *.egg-info 15 | amalgamation_demo.* 16 | MANIFEST 17 | docs/_build 18 | Pipfile.lock 19 | .mypy_cache 20 | pyroaring/pyroaring.cpp 21 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ezibenroc/PyRoaringBitMap/11683c024e4c10cefaaef7ca7d58878339c4dabb/.gitmodules -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | # You can also specify other tool versions: 13 | # nodejs: "20" 14 | # rust: "1.70" 15 | # golang: "1.20" 16 | 17 | # Build documentation in the "docs/" directory with Sphinx 18 | sphinx: 19 | configuration: docs/conf.py 20 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 21 | # builder: "dirhtml" 22 | # Fail on all warnings to avoid broken references 23 | # fail_on_warning: true 24 | 25 | # Optionally build your docs in additional formats such as PDF and ePub 26 | # formats: 27 | # - pdf 28 | # - epub 29 | 30 | # Optional but recommended, declare the Python requirements required 31 | # to build your documentation 32 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 33 | python: 34 | install: 35 | - requirements: docs/requirements.txt 36 | - method: pip 37 | path: . 38 | extra_requirements: 39 | - docs 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Tom Cornebize 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyroaring/* 2 | include LICENSE 3 | exclude pyproject.toml 4 | exclude pyroaring/pyroaring.cpp 5 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | Cython = ">=0.21" 10 | hypothesis = "<3.60" 11 | ipython = "*" 12 | twine = "*" 13 | 14 | [requires] 15 | python_version = "3.6" 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |Documentation Status| 2 | 3 | An efficient and light-weight ordered set of integers. 4 | This is a Python wrapper for the C library `CRoaring `__. 5 | 6 | Example 7 | ------- 8 | 9 | You can use a bitmap nearly as the classical Python set in your code: 10 | 11 | .. code:: python 12 | 13 | from pyroaring import BitMap 14 | bm1 = BitMap() 15 | bm1.add(3) 16 | bm1.add(18) 17 | print("has 3:", 3 in bm1) 18 | print("has 4:", 4 in bm1) 19 | bm2 = BitMap([3, 27, 42]) 20 | print("bm1 = %s" % bm1) 21 | print("bm2 = %s" % bm2) 22 | print("bm1 & bm2 = %s" % (bm1&bm2)) 23 | print("bm1 | bm2 = %s" % (bm1|bm2)) 24 | 25 | Output: 26 | 27 | :: 28 | 29 | has 3: True 30 | has 4: False 31 | bm1 = BitMap([3, 18]) 32 | bm2 = BitMap([3, 27, 42]) 33 | bm1 & bm2 = BitMap([3]) 34 | bm1 | bm2 = BitMap([3, 18, 27, 42]) 35 | 36 | The class ``BitMap`` is for 32 bit integers, it supports values from 0 to 2**32-1 (included). 37 | 38 | For larger numbers, you can use the class ``BitMap64`` that supports values from 0 to 2**64-1 (included). 39 | 40 | Installation from Pypi 41 | ---------------------- 42 | 43 | Supported systems: Linux, MacOS or Windows, Python 3.8 or higher. Note that pyroaring might still work with older Python 44 | versions, but they are not tested anymore. 45 | 46 | To install pyroaring on your local account, use the following command: 47 | 48 | .. code:: bash 49 | 50 | pip install pyroaring --user 51 | 52 | For a system-wide installation, use the following command: 53 | 54 | .. code:: bash 55 | 56 | pip install pyroaring 57 | 58 | Naturally, the latter may require superuser rights (consider prefixing 59 | the commands by ``sudo``). 60 | 61 | If you want to use Python 3 and your system defaults on Python 2.7, you 62 | may need to adjust the above commands, e.g., replace ``pip`` by ``pip3``. 63 | 64 | Installation from conda-forge 65 | ----------------------------- 66 | 67 | Conda users can install the package from `conda-forge`: 68 | 69 | .. code:: bash 70 | 71 | conda install -c conda-forge pyroaring 72 | 73 | (Supports Python 3.6 or higher; Mac/Linux/Windows) 74 | 75 | Installation from Source 76 | --------------------------------- 77 | 78 | If you want to compile (and install) pyroaring by yourself, for instance 79 | to modify the Cython sources you can follow the following instructions. 80 | Note that these examples will install in your currently active python 81 | virtual environment. Installing this way will require an appropriate 82 | C compiler to be installed on your system. 83 | 84 | First clone this repository. 85 | 86 | .. code:: bash 87 | 88 | git clone https://github.com/Ezibenroc/PyRoaringBitMap.git 89 | 90 | To install from Cython via source, for example during development run the following from the root of the above repository: 91 | 92 | .. code:: bash 93 | 94 | python -m pip install . 95 | 96 | This will automatically install Cython if it not present for the build, cythonise the source files and compile everything for you. 97 | 98 | If you just want to recompile the package in place for quick testing you can 99 | try the following: 100 | 101 | .. code:: bash 102 | 103 | python setup.py build_clib 104 | python setup.py build_ext -i 105 | 106 | Note that the build_clib compiles croaring only, and only needs to be run once. 107 | 108 | Then you can test the new code using tox - this will install all the other 109 | dependencies needed for testing and test in an isolated environment: 110 | 111 | .. code:: bash 112 | 113 | python -m pip install tox 114 | tox 115 | 116 | If you just want to run the tests directly from the root of the repository: 117 | 118 | .. code:: bash 119 | 120 | python -m pip install hypothesis pytest 121 | # This will test in three ways: via installation from source, 122 | # via cython directly, and creation of a wheel 123 | python -m pytest test.py 124 | 125 | 126 | Package pyroaring as an sdist and wheel. Note that building wheels that have 127 | wide compatibility can be tricky - for releases we rely on `cibuildwheel `_ 128 | to do the heavy lifting across platforms. 129 | 130 | .. code:: bash 131 | 132 | python -m pip install build 133 | python -m build . 134 | 135 | For all the above commands, two environment variables can be used to control the compilation. 136 | 137 | - ``DEBUG=1`` to build pyroaring in debug mode. 138 | - ``ARCHI=`` to build pyroaring for the given platform. The platform may be any keyword 139 | given to the ``-march`` option of gcc (see the 140 | `documentation `__). 141 | Note that cross-compiling for a 32-bit architecture from a 64-bit architecture is not supported. 142 | 143 | Example of use: 144 | 145 | .. code:: bash 146 | 147 | DEBUG=1 ARCHI=x86-64 python setup.py build_ext 148 | 149 | 150 | Optimizing the builds for your machine (x64) 151 | -------------------------------------------- 152 | 153 | For recent Intel and AMD (x64) processors under Linux, you may get better performance by requesting that 154 | CRoaring be built for your machine, specifically, when building from source. 155 | Be mindful that when doing so, the generated binary may only run on your machine. 156 | 157 | 158 | .. code:: bash 159 | 160 | ARCHI=native pip install pyroaring --no-binary :all: 161 | 162 | This approach may not work under macOS. 163 | 164 | 165 | Development Notes 166 | ----------------- 167 | 168 | Updating CRoaring 169 | ================= 170 | 171 | The download_amalgamation.py script can be used to download a specific version 172 | of the official CRoaring amalgamation: 173 | 174 | .. code:: bash 175 | 176 | python download_amalgamation.py v0.7.2 177 | 178 | This will update roaring.c and roaring.h. This also means that the dependency 179 | is vendored in and tracked as part of the source repository now. Note that the 180 | __croaring_version__ in version.pxi will need to be updated to match the new 181 | version. 182 | 183 | 184 | Tracking Package and CRoaring versions 185 | ====================================== 186 | 187 | The package version is maintained in the file `pyroaring/version.pxi` - this 188 | can be manually incremented in preparation for releases. This file is read 189 | from in setup.py to specify the version. 190 | 191 | The croaring version is tracked in `pyroaring/croaring_version.pxi` - this is 192 | updated automatically when downloading a new amalgamation. 193 | 194 | 195 | Benchmark 196 | --------- 197 | 198 | ``Pyroaring`` is compared with the built-in ``set`` and the library ``sortedcontainers``. 199 | 200 | The script ``quick_bench.py`` measures the time of different set 201 | operations. It uses randomly generated sets of size 1e6 and density 202 | 0.125. For each operation, the average time (in seconds) of 30 tests 203 | is reported. 204 | 205 | The results have been obtained with: 206 | 207 | - CPU AMD Ryzen 7 5700X 208 | - CPython version 3.11.2 209 | - gcc version 12.2.0 210 | - Cython version 3.0.2 211 | - sortedcontainers version 2.4.0 212 | - pyroaring commit `b54769b `__ 213 | 214 | =============================== ===================== ===================== ========== ================== 215 | operation pyroaring (32 bits) pyroaring (64 bits) set sortedcontainers 216 | =============================== ===================== ===================== ========== ================== 217 | range constructor 3.03e-04 3.15e-04 4.09e-02 8.54e-02 218 | ordered list constructor 2.17e-02 3.06e-02 8.21e-02 2.67e-01 219 | list constructor 7.23e-02 6.38e-02 5.65e-02 2.34e-01 220 | ordered array constructor 4.50e-03 nan 6.53e-02 1.75e-01 221 | array constructor 6.51e-02 nan 8.98e-02 2.40e-01 222 | element addition 4.33e-07 2.19e-07 2.13e-07 3.82e-07 223 | element removal 2.69e-07 1.67e-07 2.33e-07 2.83e-07 224 | membership test 1.59e-07 1.33e-07 1.42e-07 3.22e-07 225 | union 1.07e-04 1.04e-04 1.06e-01 5.69e-01 226 | intersection 6.00e-04 6.26e-04 4.66e-02 1.03e-01 227 | difference 7.24e-05 8.34e-05 7.94e-02 2.34e-01 228 | symmetric diference 8.32e-05 1.03e-04 1.31e-01 4.19e-01 229 | equality test 3.52e-05 3.21e-05 3.18e-02 3.29e-02 230 | subset test 4.15e-05 4.41e-05 3.20e-02 3.20e-02 231 | conversion to list 2.92e-02 3.08e-02 3.16e-02 3.53e-02 232 | pickle dump & load 1.64e-04 1.76e-04 1.37e-01 3.53e-01 233 | "naive" conversion to array 2.46e-02 2.57e-02 6.49e-02 5.73e-02 234 | "optimized" conversion to array 8.73e-04 1.45e-03 nan nan 235 | selection 8.83e-07 2.49e-06 nan 8.18e-06 236 | contiguous slice 3.31e-03 6.49e-03 nan 4.32e-03 237 | slice 1.58e-03 2.74e-03 nan 1.29e-01 238 | small slice 6.62e-05 1.15e-04 nan 5.43e-03 239 | =============================== ===================== ===================== ========== ================== 240 | 241 | Note: the timings are missing for pyroaring 64 bits with the array constructor. For simplicity reasons the Benchmark 242 | builds an array of 32 bit integers, which is not compatible with ``BitMap64``. 243 | 244 | .. |Documentation Status| image:: https://readthedocs.org/projects/pyroaringbitmap/badge/?version=stable 245 | :target: http://pyroaringbitmap.readthedocs.io/en/stable/?badge=stable 246 | -------------------------------------------------------------------------------- /cydoctest.py: -------------------------------------------------------------------------------- 1 | #!python 2 | """ 3 | Taken from https://github.com/cython/cython/wiki/FAQ#how-can-i-run-doctests-in-cython-code-pyx-files 4 | 5 | Cython-compatible wrapper for doctest.testmod(). 6 | 7 | Usage example, assuming a Cython module mymod.pyx is compiled. 8 | This is run from the command line, passing a command to Python: 9 | python -c "import cydoctest, mymod; cydoctest.testmod(mymod)" 10 | 11 | (This still won't let a Cython module run its own doctests 12 | when called with "python mymod.py", but it's pretty close. 13 | Further options can be passed to testmod() as desired, e.g. 14 | verbose=True.) 15 | """ 16 | 17 | import sys 18 | import doctest 19 | import inspect 20 | 21 | 22 | def _from_module(module, object): 23 | """ 24 | Return true if the given object is defined in the given module. 25 | """ 26 | if module is None: 27 | return True 28 | elif inspect.getmodule(object) is not None: 29 | return module is inspect.getmodule(object) 30 | elif inspect.isfunction(object): 31 | return module.__dict__ is object.func_globals 32 | elif inspect.isclass(object): 33 | return module.__name__ == object.__module__ 34 | elif hasattr(object, '__module__'): 35 | return module.__name__ == object.__module__ 36 | elif isinstance(object, property): 37 | return True # [XX] no way not be sure. 38 | else: 39 | raise ValueError("object must be a class or function") 40 | 41 | 42 | def fix_module_doctest(module): 43 | """ 44 | Extract docstrings from cython functions, that would be skipped by doctest 45 | otherwise. 46 | """ 47 | module.__test__ = {} 48 | for name in dir(module): 49 | value = getattr(module, name) 50 | if inspect.isbuiltin(value) and isinstance(value.__doc__, str) and _from_module(module, value): 51 | module.__test__[name] = value.__doc__ 52 | 53 | 54 | def testmod(m=None, *args, **kwargs): 55 | """ 56 | Fix a Cython module's doctests, then call doctest.testmod() 57 | 58 | All other arguments are passed directly to doctest.testmod(). 59 | """ 60 | fix_module_doctest(m) 61 | result = doctest.testmod(m, *args, **kwargs) 62 | if result.failed > 0: 63 | sys.exit('%d test(s) failed' % result.failed) 64 | 65 | 66 | if __name__ == "__main__": 67 | import pyroaring 68 | testmod(pyroaring) 69 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pyroaring 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pyroaring documentation build configuration file, created by 5 | # sphinx-quickstart on Sun May 7 16:11:09 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | # import os 21 | # import sys 22 | # sys.path.insert(0, os.path.abspath('.')) 23 | 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | # 29 | # needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.doctest', 37 | 'sphinx.ext.coverage', 38 | 'sphinx.ext.mathjax', 39 | 'sphinx.ext.viewcode', 40 | ] 41 | 42 | # Add any paths that contain templates here, relative to this directory. 43 | templates_path = ['_templates'] 44 | 45 | # The suffix(es) of source filenames. 46 | # You can specify multiple suffix as a list of string: 47 | # 48 | # source_suffix = ['.rst', '.md'] 49 | source_suffix = '.rst' 50 | 51 | # The master toctree document. 52 | master_doc = 'index' 53 | 54 | # General information about the project. 55 | project = 'pyroaring' 56 | copyright = '2017, Tom Cornebize' 57 | author = 'Tom Cornebize' 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | version = '1.0.1' 65 | # The full version, including alpha/beta/rc tags. 66 | release = version 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = None 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This patterns also effect to html_static_path and html_extra_path 78 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | # If true, `todo` and `todoList` produce output, else they produce nothing. 84 | todo_include_todos = False 85 | 86 | 87 | # -- Options for HTML output ---------------------------------------------- 88 | 89 | # The theme to use for HTML and HTML Help pages. See the documentation for 90 | # a list of builtin themes. 91 | # 92 | html_theme = 'alabaster' 93 | 94 | # Theme options are theme-specific and customize the look and feel of a theme 95 | # further. For a list of options available for each theme, see the 96 | # documentation. 97 | # 98 | # html_theme_options = {} 99 | 100 | # Add any paths that contain custom static files (such as style sheets) here, 101 | # relative to this directory. They are copied after the builtin static files, 102 | # so a file named "default.css" will overwrite the builtin "default.css". 103 | html_static_path = ['_static'] 104 | 105 | 106 | # -- Options for HTMLHelp output ------------------------------------------ 107 | 108 | # Output file base name for HTML help builder. 109 | htmlhelp_basename = 'pyroaringdoc' 110 | 111 | 112 | # -- Options for LaTeX output --------------------------------------------- 113 | 114 | latex_elements = { 115 | # The paper size ('letterpaper' or 'a4paper'). 116 | # 117 | # 'papersize': 'letterpaper', 118 | 119 | # The font size ('10pt', '11pt' or '12pt'). 120 | # 121 | # 'pointsize': '10pt', 122 | 123 | # Additional stuff for the LaTeX preamble. 124 | # 125 | # 'preamble': '', 126 | 127 | # Latex figure (float) alignment 128 | # 129 | # 'figure_align': 'htbp', 130 | } 131 | 132 | # Grouping the document tree into LaTeX files. List of tuples 133 | # (source start file, target name, title, 134 | # author, documentclass [howto, manual, or own class]). 135 | latex_documents = [ 136 | (master_doc, 'pyroaring.tex', 'pyroaring Documentation', 137 | 'Tom Cornebize', 'manual'), 138 | ] 139 | 140 | 141 | # -- Options for manual page output --------------------------------------- 142 | 143 | # One entry per manual page. List of tuples 144 | # (source start file, name, description, authors, manual section). 145 | man_pages = [ 146 | (master_doc, 'pyroaring', 'pyroaring Documentation', 147 | [author], 1) 148 | ] 149 | 150 | 151 | # -- Options for Texinfo output ------------------------------------------- 152 | 153 | # Grouping the document tree into Texinfo files. List of tuples 154 | # (source start file, target name, title, author, 155 | # dir menu entry, description, category) 156 | texinfo_documents = [ 157 | (master_doc, 'pyroaring', 'pyroaring Documentation', 158 | author, 'pyroaring', 'One line description of project.', 159 | 'Miscellaneous'), 160 | ] 161 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | pyroaring API documentation 2 | =============================== 3 | .. automodule:: pyroaring 4 | :members: 5 | :undoc-members: 6 | :show-inheritance: 7 | 8 | 9 | Indices and tables 10 | ================== 11 | 12 | * :ref:`genindex` 13 | * :ref:`modindex` 14 | * :ref:`search` 15 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | cython>=0.29.21 2 | -------------------------------------------------------------------------------- /download_amalgamation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper script to download a specific release amalgamation file for CRoaring. 3 | 4 | Usage: python download_amalgamation.py 5 | 6 | The version needs to be the specific release tag on github. 7 | 8 | """ 9 | import os 10 | import sys 11 | from urllib.request import urlretrieve 12 | 13 | version = sys.argv[1] 14 | 15 | release = f"https://github.com/RoaringBitmap/CRoaring/releases/download/{version}/" 16 | 17 | print(f"Downloading version {version} of the croaring amalgamation") 18 | 19 | files = ["roaring.c", "roaring.h"] 20 | 21 | for file in files: 22 | r = urlretrieve(release + file, os.path.join("pyroaring", file)) 23 | 24 | with open(os.path.join("pyroaring", "croaring_version.pxi"), "w") as f: 25 | f.write(f"__croaring_version__ = \"{version}\"") 26 | -------------------------------------------------------------------------------- /pyroaring/__init__.pyi: -------------------------------------------------------------------------------- 1 | import array 2 | from typing import overload, TypedDict 3 | from collections.abc import Iterable, Iterator 4 | 5 | from typing_extensions import Self 6 | 7 | __version__: str 8 | __croaring_version__: str 9 | 10 | 11 | class _Statistics(TypedDict): 12 | # Type as observed in the returned values. 13 | # Warning: This type does not exist at runtime. 14 | 15 | n_containers: int 16 | n_array_containers: int 17 | n_run_containers: int 18 | n_bitset_containers: int 19 | n_values_array_containers: int 20 | n_values_run_containers: int 21 | n_values_bitset_containers: int 22 | n_bytes_array_containers: int 23 | n_bytes_run_containers: int 24 | n_bytes_bitset_containers: int 25 | max_value: int 26 | min_value: int 27 | sum_value: int 28 | cardinality: int 29 | 30 | 31 | class AbstractBitMap: 32 | def __init__(self, values: Iterable[int] | None = None, copy_on_write: bool = False, optimize: bool = True) -> None: 33 | ... 34 | 35 | @property 36 | def copy_on_write(self) -> bool: 37 | ... 38 | 39 | def run_optimize(self) -> bool: 40 | ... 41 | 42 | def shrink_to_fit(self) -> int: 43 | ... 44 | 45 | def __contains__(self, value: int) -> bool: 46 | ... 47 | 48 | def __bool__(self) -> bool: 49 | ... 50 | 51 | def __len__(self) -> int: 52 | ... 53 | 54 | def __lt__(self, other: AbstractBitMap) -> bool: 55 | ... 56 | 57 | def __le__(self, other: AbstractBitMap) -> bool: 58 | ... 59 | 60 | def __eq__(self, other: object) -> bool: 61 | ... 62 | 63 | def __ne__(self, other: object) -> bool: 64 | ... 65 | 66 | def __gt__(self, other: AbstractBitMap) -> bool: 67 | ... 68 | 69 | def __ge__(self, other: AbstractBitMap) -> bool: 70 | ... 71 | 72 | def contains_range(self, range_start: int, range_end: int) -> bool: 73 | ... 74 | 75 | def range_cardinality(self, range_start: int, range_end: int) -> int: 76 | ... 77 | 78 | def iter_equal_or_larger(self, val: int) -> Iterator[int]: 79 | ... 80 | 81 | def __iter__(self) -> Iterator[int]: 82 | ... 83 | 84 | def flip(self, start: int, end: int) -> Self: 85 | ... 86 | 87 | def shift(self, offset: int) -> Self: 88 | ... 89 | 90 | def copy(self) -> Self: 91 | ... 92 | 93 | def isdisjoint(self, other: AbstractBitMap) -> bool: 94 | ... 95 | 96 | def issubset(self, other: AbstractBitMap) -> bool: 97 | ... 98 | 99 | def issuperset(self, other: AbstractBitMap) -> bool: 100 | ... 101 | 102 | # Note: `difference` and others are sort-of set up like they're meant to be 103 | # static methods (accepting _only_ `*bitmaps` in the underlying Cython 104 | # code), however at runtime they require at least one argument and return an 105 | # instance of the same type as that value -- like instance methods. Typing 106 | # them as instances methods ensures that mypy matches this behaviour (other 107 | # type checkers untested), even when used statically as their docstrings 108 | # suggest. 109 | 110 | def difference(self, *bitmaps: AbstractBitMap) -> Self: 111 | ... 112 | 113 | def symmetric_difference(self, other: AbstractBitMap) -> Self: 114 | ... 115 | 116 | def union(self, *bitmaps: AbstractBitMap) -> Self: 117 | ... 118 | 119 | def intersection(self, *bitmaps: AbstractBitMap) -> Self: 120 | ... 121 | 122 | def __or__(self, other: AbstractBitMap) -> Self: 123 | ... 124 | 125 | def __and__(self, other: AbstractBitMap) -> Self: 126 | ... 127 | 128 | def __xor__(self, other: AbstractBitMap) -> Self: 129 | ... 130 | 131 | def __sub__(self, other: AbstractBitMap) -> Self: 132 | ... 133 | 134 | def union_cardinality(self, other: AbstractBitMap) -> int: 135 | ... 136 | 137 | def intersection_cardinality(self, other: AbstractBitMap) -> int: 138 | ... 139 | 140 | def difference_cardinality(self, other: AbstractBitMap) -> int: 141 | ... 142 | 143 | def symmetric_difference_cardinality(self, other: AbstractBitMap) -> int: 144 | ... 145 | 146 | def intersect(self, other: AbstractBitMap) -> bool: 147 | ... 148 | 149 | def jaccard_index(self, other: AbstractBitMap) -> float: 150 | ... 151 | 152 | def get_statistics(self) -> _Statistics: 153 | ... 154 | 155 | def min(self) -> int: 156 | ... 157 | 158 | def max(self) -> int: 159 | ... 160 | 161 | def rank(self, value: int) -> int: 162 | ... 163 | 164 | def next_set_bit(self, value: int) -> int: 165 | ... 166 | 167 | @overload 168 | def __getitem__(self, value: int) -> int: 169 | ... 170 | 171 | @overload 172 | def __getitem__(self, value: slice) -> Self: 173 | ... 174 | 175 | def serialize(self) -> bytes: 176 | ... 177 | 178 | @classmethod 179 | def deserialize(cls, buff: bytes) -> Self: 180 | ... 181 | 182 | def __getstate__(self) -> bytes: 183 | ... 184 | 185 | def __setstate__(self, state: bytes) -> Self: 186 | ... 187 | 188 | def __sizeof__(self) -> int: 189 | ... 190 | 191 | def to_array(self) -> array.array[int]: 192 | ... 193 | 194 | 195 | class FrozenBitMap(AbstractBitMap): 196 | def __hash__(self) -> int: 197 | ... 198 | 199 | 200 | class BitMap(AbstractBitMap): 201 | def add(self, value: int) -> None: 202 | ... 203 | 204 | def add_checked(self, value: int) -> None: 205 | ... 206 | 207 | def update(self, *all_values: Iterable[int]) -> None: 208 | ... 209 | 210 | def discard(self, value: int) -> None: 211 | ... 212 | 213 | def remove(self, value: int) -> None: 214 | ... 215 | 216 | def __ior__(self, other: AbstractBitMap) -> Self: 217 | ... 218 | 219 | def __iand__(self, other: AbstractBitMap) -> Self: 220 | ... 221 | 222 | def __ixor__(self, other: AbstractBitMap) -> Self: 223 | ... 224 | 225 | def __isub__(self, other: AbstractBitMap) -> Self: 226 | ... 227 | 228 | def intersection_update(self, *all_values: Iterable[int]) -> None: 229 | ... 230 | 231 | def difference_update(self, *others: AbstractBitMap) -> None: 232 | ... 233 | 234 | def symmetric_difference_update(self, other: AbstractBitMap) -> None: 235 | ... 236 | 237 | def overwrite(self, other: AbstractBitMap) -> None: 238 | ... 239 | 240 | def clear(self) -> None: 241 | ... 242 | 243 | def pop(self) -> int: 244 | ... 245 | 246 | def flip_inplace(self, start: int, end: int) -> None: 247 | ... 248 | 249 | def add_range(self, range_start: int, range_end: int) -> None: 250 | ... 251 | 252 | def remove_range(self, range_start: int, range_end: int) -> None: 253 | ... 254 | 255 | class AbstractBitMap64: 256 | def __init__(self, values: Iterable[int] | None = None, copy_on_write: bool = False, optimize: bool = True) -> None: 257 | ... 258 | 259 | @property 260 | def copy_on_write(self) -> bool: 261 | ... 262 | 263 | def run_optimize(self) -> bool: 264 | ... 265 | 266 | def shrink_to_fit(self) -> int: 267 | ... 268 | 269 | def __contains__(self, value: int) -> bool: 270 | ... 271 | 272 | def __bool__(self) -> bool: 273 | ... 274 | 275 | def __len__(self) -> int: 276 | ... 277 | 278 | def __lt__(self, other: AbstractBitMap64) -> bool: 279 | ... 280 | 281 | def __le__(self, other: AbstractBitMap64) -> bool: 282 | ... 283 | 284 | def __eq__(self, other: object) -> bool: 285 | ... 286 | 287 | def __ne__(self, other: object) -> bool: 288 | ... 289 | 290 | def __gt__(self, other: AbstractBitMap64) -> bool: 291 | ... 292 | 293 | def __ge__(self, other: AbstractBitMap64) -> bool: 294 | ... 295 | 296 | def contains_range(self, range_start: int, range_end: int) -> bool: 297 | ... 298 | 299 | def range_cardinality(self, range_start: int, range_end: int) -> int: 300 | ... 301 | 302 | def iter_equal_or_larger(self, val: int) -> Iterator[int]: 303 | ... 304 | 305 | def __iter__(self) -> Iterator[int]: 306 | ... 307 | 308 | def flip(self, start: int, end: int) -> Self: 309 | ... 310 | 311 | def shift(self, offset: int) -> Self: 312 | ... 313 | 314 | def copy(self) -> Self: 315 | ... 316 | 317 | def isdisjoint(self, other: AbstractBitMap64) -> bool: 318 | ... 319 | 320 | def issubset(self, other: AbstractBitMap64) -> bool: 321 | ... 322 | 323 | def issuperset(self, other: AbstractBitMap64) -> bool: 324 | ... 325 | 326 | def difference(self, *bitmaps: AbstractBitMap64) -> Self: 327 | ... 328 | 329 | def symmetric_difference(self, other: AbstractBitMap64) -> Self: 330 | ... 331 | 332 | def union(self, *bitmaps: AbstractBitMap64) -> Self: 333 | ... 334 | 335 | def intersection(self, *bitmaps: AbstractBitMap64) -> Self: 336 | ... 337 | 338 | def __or__(self, other: AbstractBitMap64) -> Self: 339 | ... 340 | 341 | def __and__(self, other: AbstractBitMap64) -> Self: 342 | ... 343 | 344 | def __xor__(self, other: AbstractBitMap64) -> Self: 345 | ... 346 | 347 | def __sub__(self, other: AbstractBitMap64) -> Self: 348 | ... 349 | 350 | def union_cardinality(self, other: AbstractBitMap64) -> int: 351 | ... 352 | 353 | def intersection_cardinality(self, other: AbstractBitMap64) -> int: 354 | ... 355 | 356 | def difference_cardinality(self, other: AbstractBitMap64) -> int: 357 | ... 358 | 359 | def symmetric_difference_cardinality(self, other: AbstractBitMap64) -> int: 360 | ... 361 | 362 | def intersect(self, other: AbstractBitMap64) -> bool: 363 | ... 364 | 365 | def jaccard_index(self, other: AbstractBitMap64) -> float: 366 | ... 367 | 368 | def get_statistics(self) -> _Statistics: 369 | ... 370 | 371 | def min(self) -> int: 372 | ... 373 | 374 | def max(self) -> int: 375 | ... 376 | 377 | def rank(self, value: int) -> int: 378 | ... 379 | 380 | def next_set_bit(self, value: int) -> int: 381 | ... 382 | 383 | @overload 384 | def __getitem__(self, value: int) -> int: 385 | ... 386 | 387 | @overload 388 | def __getitem__(self, value: slice) -> Self: 389 | ... 390 | 391 | def serialize(self) -> bytes: 392 | ... 393 | 394 | @classmethod 395 | def deserialize(cls, buff: bytes) -> Self: 396 | ... 397 | 398 | def __getstate__(self) -> bytes: 399 | ... 400 | 401 | def __setstate__(self, state: bytes) -> Self: 402 | ... 403 | 404 | def __sizeof__(self) -> int: 405 | ... 406 | 407 | def to_array(self) -> array.array[int]: 408 | ... 409 | 410 | 411 | class FrozenBitMap64(AbstractBitMap64): 412 | def __hash__(self) -> int: 413 | ... 414 | 415 | 416 | class BitMap64(AbstractBitMap64): 417 | def add(self, value: int) -> None: 418 | ... 419 | 420 | def add_checked(self, value: int) -> None: 421 | ... 422 | 423 | def update(self, *all_values: Iterable[int]) -> None: 424 | ... 425 | 426 | def discard(self, value: int) -> None: 427 | ... 428 | 429 | def remove(self, value: int) -> None: 430 | ... 431 | 432 | def __ior__(self, other: AbstractBitMap64) -> Self: 433 | ... 434 | 435 | def __iand__(self, other: AbstractBitMap64) -> Self: 436 | ... 437 | 438 | def __ixor__(self, other: AbstractBitMap64) -> Self: 439 | ... 440 | 441 | def __isub__(self, other: AbstractBitMap64) -> Self: 442 | ... 443 | 444 | def intersection_update(self, *all_values: Iterable[int]) -> None: 445 | ... 446 | 447 | def difference_update(self, *others: AbstractBitMap64) -> None: 448 | ... 449 | 450 | def symmetric_difference_update(self, other: AbstractBitMap64) -> None: 451 | ... 452 | 453 | def overwrite(self, other: AbstractBitMap64) -> None: 454 | ... 455 | 456 | def clear(self) -> None: 457 | ... 458 | 459 | def pop(self) -> int: 460 | ... 461 | 462 | def flip_inplace(self, start: int, end: int) -> None: 463 | ... 464 | 465 | def add_range(self, range_start: int, range_end: int) -> None: 466 | ... 467 | 468 | def remove_range(self, range_start: int, range_end: int) -> None: 469 | ... -------------------------------------------------------------------------------- /pyroaring/abstract_bitmap.pxi: -------------------------------------------------------------------------------- 1 | cimport croaring 2 | from libc.stdint cimport uint32_t, uint64_t, int64_t 3 | from libcpp cimport bool 4 | from libcpp.vector cimport vector 5 | from libc.stdlib cimport free, malloc 6 | 7 | from cpython cimport array 8 | import array 9 | 10 | try: 11 | range = xrange 12 | except NameError: # python 3 13 | pass 14 | 15 | cdef croaring.roaring_bitmap_t *deserialize_ptr(bytes buff): 16 | cdef croaring.roaring_bitmap_t *ptr 17 | cdef const char *reason_failure = NULL 18 | buff_size = len(buff) 19 | ptr = croaring.roaring_bitmap_portable_deserialize_safe(buff, buff_size) 20 | if ptr == NULL: 21 | raise ValueError("Could not deserialize bitmap") 22 | # Validate the bitmap 23 | if not croaring.roaring_bitmap_internal_validate(ptr, &reason_failure): 24 | # If validation fails, free the bitmap and raise an exception 25 | croaring.roaring_bitmap_free(ptr) 26 | raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}") 27 | return ptr 28 | 29 | cdef croaring.roaring64_bitmap_t *deserialize64_ptr(bytes buff): 30 | cdef croaring.roaring64_bitmap_t *ptr 31 | cdef const char *reason_failure = NULL 32 | buff_size = len(buff) 33 | ptr = croaring.roaring64_bitmap_portable_deserialize_safe(buff, buff_size) 34 | if ptr == NULL: 35 | raise ValueError("Could not deserialize bitmap") 36 | # Validate the bitmap 37 | if not croaring.roaring64_bitmap_internal_validate(ptr, &reason_failure): 38 | # If validation fails, free the bitmap and raise an exception 39 | croaring.roaring64_bitmap_free(ptr) 40 | raise ValueError(f"Invalid bitmap after deserialization: {reason_failure.decode('utf-8')}") 41 | return ptr 42 | 43 | def _string_rep(bm): 44 | skip_rows = len(bm) > 500 #this is the cutoff number for the truncating to kick in. 45 | table_max_width = 80 # this isn't the length of the entire output, it's only for the numeric part 46 | num_lines_if_skipping = 5 # the number of lines to show in the beginning and the end when output is being truncated 47 | 48 | head = bm.__class__.__name__ + '([' 49 | row_start_buffer = ' ' * len(head) 50 | tail = '])' 51 | 52 | try: 53 | maxval = bm.max() 54 | except ValueError: 55 | # empty bitmap 56 | return head + tail 57 | 58 | element_max_length = len(str(maxval)) 59 | column_width = element_max_length + 2 60 | 61 | num_columns = table_max_width // column_width 62 | 63 | num_rows = len(bm) / float(num_columns) 64 | if not num_rows.is_integer(): 65 | num_rows += 1 66 | num_rows = int(num_rows) 67 | rows = [] 68 | row_idx = 0 69 | skipped = False 70 | while row_idx < num_rows: 71 | row_ints = bm[row_idx * num_columns:(row_idx + 1) * num_columns] 72 | 73 | line = [] 74 | for i in row_ints: 75 | s = str(i) 76 | if num_rows == 1: 77 | # no padding if all numbers fit on a single line 78 | line.append(s) 79 | else: 80 | line.append(' ' * (element_max_length - len(s)) + s) 81 | 82 | if row_idx == 0: 83 | prefix = head 84 | else: 85 | prefix = row_start_buffer 86 | rows.append(prefix + ', '.join(line) + ',') 87 | row_idx += 1 88 | if skip_rows and not skipped and row_idx >= num_lines_if_skipping: 89 | rows.append((' ' * ((table_max_width + len(head)) // 2)) + '...') 90 | skipped = True 91 | row_idx = num_rows - num_lines_if_skipping 92 | 93 | rows[-1] = rows[-1].rstrip(',') # remove trailing comma from the last line 94 | return '\n'.join(rows) + tail 95 | 96 | cdef class AbstractBitMap: 97 | """ 98 | An efficient and light-weight ordered set of 32 bits integers. 99 | """ 100 | cdef croaring.roaring_bitmap_t* _c_bitmap 101 | cdef int64_t _h_val 102 | 103 | def __cinit__(self, values=None, copy_on_write=False, optimize=True, no_init=False): 104 | if no_init: 105 | assert values is None and not copy_on_write 106 | return 107 | cdef vector[uint32_t] buff_vect 108 | cdef unsigned[:] buff 109 | if values is None: 110 | self._c_bitmap = croaring.roaring_bitmap_create() 111 | elif isinstance(values, AbstractBitMap): 112 | self._c_bitmap = croaring.roaring_bitmap_copy((values)._c_bitmap) 113 | self._h_val = (values)._h_val 114 | elif isinstance(values, range): 115 | _, (start, stop, step) = values.__reduce__() 116 | if step < 0: 117 | values = range(min(values), max(values)+1, -step) 118 | _, (start, stop, step) = values.__reduce__() 119 | if start >= stop: 120 | self._c_bitmap = croaring.roaring_bitmap_create() 121 | else: 122 | self._c_bitmap = croaring.roaring_bitmap_from_range(start, stop, step) 123 | elif isinstance(values, array.array): 124 | size = len(values) 125 | if size == 0: 126 | self._c_bitmap = croaring.roaring_bitmap_create() 127 | else: 128 | buff = values 129 | self._c_bitmap = croaring.roaring_bitmap_of_ptr(size, &buff[0]) 130 | else: 131 | try: 132 | size = len(values) 133 | except TypeError: # object has no length, creating a list 134 | values = list(values) 135 | size = len(values) 136 | self._c_bitmap = croaring.roaring_bitmap_create() 137 | if size > 0: 138 | buff_vect = values 139 | croaring.roaring_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 140 | if not isinstance(values, AbstractBitMap): 141 | croaring.roaring_bitmap_set_copy_on_write(self._c_bitmap, copy_on_write) 142 | self._h_val = 0 143 | if optimize: 144 | self.run_optimize() 145 | self.shrink_to_fit() 146 | 147 | def __init__(self, values=None, copy_on_write=False, optimize=True): 148 | """ 149 | Construct a AbstractBitMap object, either empry or from an iterable. 150 | 151 | Copy on write can be enabled with the field copy_on_write. 152 | 153 | >>> BitMap() 154 | BitMap([]) 155 | >>> BitMap([1, 123456789, 27]) 156 | BitMap([1, 27, 123456789]) 157 | >>> BitMap([1, 123456789, 27], copy_on_write=True) 158 | BitMap([1, 27, 123456789]) 159 | """ 160 | 161 | cdef from_ptr(self, croaring.roaring_bitmap_t *ptr) noexcept: 162 | """ 163 | Return an instance of AbstractBitMap (or one of its subclasses) initialized with the given pointer. 164 | 165 | FIXME: this should be a classmethod, but this is (currently) impossible for cdef methods. 166 | See https://groups.google.com/forum/#!topic/cython-users/FLHiLzzKqj4 167 | """ 168 | bm = self.__class__.__new__(self.__class__, no_init=True) 169 | (bm)._c_bitmap = ptr 170 | return bm 171 | 172 | @property 173 | def copy_on_write(self): 174 | """ 175 | True if and only if the bitmap has "copy on write" optimization enabled. 176 | 177 | >>> BitMap(copy_on_write=False).copy_on_write 178 | False 179 | >>> BitMap(copy_on_write=True).copy_on_write 180 | True 181 | """ 182 | return croaring.roaring_bitmap_get_copy_on_write(self._c_bitmap) 183 | 184 | def run_optimize(self): 185 | return croaring.roaring_bitmap_run_optimize(self._c_bitmap) 186 | 187 | def shrink_to_fit(self): 188 | return croaring.roaring_bitmap_shrink_to_fit(self._c_bitmap) 189 | 190 | def __dealloc__(self): 191 | if self._c_bitmap is not NULL: 192 | croaring.roaring_bitmap_free(self._c_bitmap) 193 | 194 | def _check_compatibility(self, AbstractBitMap other): 195 | if other is None: 196 | raise TypeError('Argument has incorrect type (expected pyroaring.AbstractBitMap, got None)') 197 | if self.copy_on_write != other.copy_on_write: 198 | raise ValueError('Cannot have interactions between bitmaps with and without copy_on_write.\n') 199 | 200 | def __contains__(self, uint32_t value): 201 | return croaring.roaring_bitmap_contains(self._c_bitmap, value) 202 | 203 | def __bool__(self): 204 | return not croaring.roaring_bitmap_is_empty(self._c_bitmap) 205 | 206 | def __len__(self): 207 | return croaring.roaring_bitmap_get_cardinality(self._c_bitmap) 208 | 209 | def __lt__(self, AbstractBitMap other): 210 | self._check_compatibility(other) 211 | return croaring.roaring_bitmap_is_strict_subset((self)._c_bitmap, (other)._c_bitmap) 212 | 213 | def __le__(self, AbstractBitMap other): 214 | self._check_compatibility(other) 215 | return croaring.roaring_bitmap_is_subset((self)._c_bitmap, (other)._c_bitmap) 216 | 217 | def __eq__(self, object other): 218 | if not isinstance(other, AbstractBitMap): 219 | return NotImplemented 220 | self._check_compatibility(other) 221 | return croaring.roaring_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 222 | 223 | def __ne__(self, object other): 224 | if not isinstance(other, AbstractBitMap): 225 | return NotImplemented 226 | self._check_compatibility(other) 227 | return not croaring.roaring_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 228 | 229 | def __gt__(self, AbstractBitMap other): 230 | self._check_compatibility(other) 231 | return croaring.roaring_bitmap_is_strict_subset((other)._c_bitmap, (self)._c_bitmap) 232 | 233 | def __ge__(self, AbstractBitMap other): 234 | self._check_compatibility(other) 235 | return croaring.roaring_bitmap_is_subset((other)._c_bitmap, (self)._c_bitmap) 236 | 237 | def contains_range(self, uint64_t range_start, uint64_t range_end): 238 | """ 239 | Check whether a range of values from range_start (included) to range_end (excluded) is present. 240 | 241 | >>> bm = BitMap([5, 6, 7, 8, 9, 10]) 242 | >>> bm.contains_range(6, 9) 243 | True 244 | >>> bm.contains_range(8, 12) 245 | False 246 | """ 247 | if range_end <= range_start or range_end == 0 or range_start >= 2**32: 248 | return True # empty range 249 | if range_end >= 2**32: 250 | range_end = 2**32 251 | return croaring.roaring_bitmap_contains_range(self._c_bitmap, range_start, range_end) 252 | 253 | def range_cardinality(self, uint64_t range_start, uint64_t range_end): 254 | """ 255 | Return cardinality from range_start (included) to range_end (excluded). 256 | 257 | >>> bm = BitMap(range(10)) 258 | >>> bm.range_cardinality(0, 10) 259 | 10 260 | >>> bm.range_cardinality(10, 100) 261 | 0 262 | """ 263 | if range_end < range_start: 264 | raise AssertionError('range_end must not be lower than range_start') 265 | return croaring.roaring_bitmap_range_cardinality(self._c_bitmap, range_start, range_end) 266 | 267 | cdef compute_hash(self): 268 | cdef int64_t h_val = 0 269 | cdef uint32_t i, count, max_count=256 270 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 271 | cdef uint32_t *buff = malloc(max_count*4) 272 | while True: 273 | count = croaring.roaring_uint32_iterator_read(iterator, buff, max_count) 274 | i = 0 275 | while i < count: 276 | h_val = (h_val << 2) + buff[i] + 1 277 | # TODO find a good hash formula 278 | # This one should be better, but is too long: 279 | # h_val = ((h_val<<16) + buff[i]) % 1748104473534059 280 | i += 1 281 | if count != max_count: 282 | break 283 | croaring.roaring_uint32_iterator_free(iterator) 284 | free(buff) 285 | if not self: 286 | return -1 287 | return h_val 288 | 289 | def __hash__(self): 290 | if self._h_val == 0: 291 | self._h_val = self.compute_hash() 292 | return self._h_val 293 | 294 | def iter_equal_or_larger(self, uint32_t val): 295 | """ 296 | Iterate over items in the bitmap equal or larger than a given value. 297 | 298 | >>> bm = BitMap([1, 2, 4]) 299 | >>> list(bm.iter_equal_or_larger(2)) 300 | [2, 4] 301 | """ 302 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 303 | valid = croaring.roaring_uint32_iterator_move_equalorlarger(iterator, val) 304 | if not valid: 305 | return 306 | try: 307 | while iterator.has_value: 308 | yield iterator.current_value 309 | croaring.roaring_uint32_iterator_advance(iterator) 310 | finally: 311 | croaring.roaring_uint32_iterator_free(iterator) 312 | 313 | def __iter__(self): 314 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 315 | try: 316 | while iterator.has_value: 317 | yield iterator.current_value 318 | croaring.roaring_uint32_iterator_advance(iterator) 319 | finally: 320 | croaring.roaring_uint32_iterator_free(iterator) 321 | 322 | def __repr__(self): 323 | return str(self) 324 | 325 | def __str__(self): 326 | return _string_rep(self) 327 | 328 | def flip(self, uint64_t start, uint64_t end): 329 | """ 330 | Compute the negation of the bitmap within the specified interval. 331 | 332 | Areas outside the range are passed unchanged. 333 | 334 | >>> bm = BitMap([3, 12]) 335 | >>> bm.flip(10, 15) 336 | BitMap([3, 10, 11, 13, 14]) 337 | """ 338 | return self.from_ptr(croaring.roaring_bitmap_flip(self._c_bitmap, start, end)) 339 | 340 | def shift(self, int64_t offset): 341 | """ 342 | Add the value 'offset' to each and every value of the bitmap. 343 | 344 | If offset + element is outside of the range [0,2^32), that the element will be dropped. 345 | 346 | >>> bm = BitMap([3, 12]) 347 | >>> bm.shift(21) 348 | BitMap([24, 33]) 349 | """ 350 | return self.from_ptr(croaring.roaring_bitmap_add_offset(self._c_bitmap, offset)) 351 | 352 | def copy(self): 353 | """ 354 | Return a copy of a set. 355 | 356 | >>> bm = BitMap([3, 12]) 357 | >>> bm2 = bm.copy() 358 | >>> bm == bm2 359 | True 360 | >>> bm.add(1) 361 | >>> bm == bm2 362 | False 363 | 364 | """ 365 | return self.__class__(self) 366 | 367 | def isdisjoint(self, other): 368 | """ 369 | Return True if two sets have a null intersection. 370 | 371 | >>> BitMap([1, 2]).isdisjoint(BitMap([3, 4])) 372 | True 373 | 374 | >>> BitMap([1, 2, 3]).isdisjoint(BitMap([3, 4])) 375 | False 376 | 377 | """ 378 | return self.intersection_cardinality(other) == 0 379 | 380 | def issubset(self, other): 381 | """ 382 | Report whether another set contains this set. 383 | 384 | >>> BitMap([1, 2]).issubset(BitMap([1, 2, 3, 4])) 385 | True 386 | 387 | >>> BitMap([1, 2]).issubset(BitMap([3, 4])) 388 | False 389 | 390 | """ 391 | return self <= other 392 | 393 | def issuperset(self, other): 394 | """ 395 | Report whether this set contains another set. 396 | 397 | >>> BitMap([1, 2, 3, 4]).issuperset(BitMap([1, 2])) 398 | True 399 | 400 | >>> BitMap([1, 2]).issuperset(BitMap([3, 4])) 401 | False 402 | 403 | """ 404 | return self >= other 405 | 406 | def difference(*bitmaps): 407 | """ 408 | Return the difference of two or more sets as a new set. 409 | 410 | (i.e. all elements that are in this set but not the others.) 411 | 412 | >>> BitMap.difference(BitMap([1, 2, 3]), BitMap([2, 20]), BitMap([3, 30])) 413 | BitMap([1]) 414 | 415 | """ 416 | size = len(bitmaps) 417 | cdef AbstractBitMap result, bm 418 | if size <= 1: 419 | return bitmaps[0].copy() 420 | elif size == 2: 421 | return bitmaps[0] - bitmaps[1] 422 | else: 423 | result = BitMap(bitmaps[0]) 424 | for bm in bitmaps[1:]: 425 | result -= bm 426 | return bitmaps[0].__class__(result) 427 | 428 | 429 | def symmetric_difference(self, other): 430 | """ 431 | Return the symmetric difference of two sets as a new set. 432 | 433 | (i.e. all elements that are in exactly one of the sets.) 434 | 435 | >>> BitMap([1, 2, 3]).symmetric_difference(BitMap([2, 3, 4])) 436 | BitMap([1, 4]) 437 | """ 438 | return self.__xor__(other) 439 | 440 | def union(*bitmaps): 441 | """ 442 | Return the union of the bitmaps. 443 | 444 | >>> BitMap.union(BitMap([3, 12]), BitMap([5]), BitMap([0, 10, 12])) 445 | BitMap([0, 3, 5, 10, 12]) 446 | """ 447 | size = len(bitmaps) 448 | cdef croaring.roaring_bitmap_t *result 449 | cdef AbstractBitMap bm 450 | cdef vector[const croaring.roaring_bitmap_t*] buff 451 | if size <= 1: 452 | return bitmaps[0].copy() 453 | elif size == 2: 454 | return bitmaps[0] | bitmaps[1] 455 | else: 456 | for bm in bitmaps: 457 | bitmaps[0]._check_compatibility(bm) 458 | buff.push_back(bm._c_bitmap) 459 | result = croaring.roaring_bitmap_or_many(size, &buff[0]) 460 | return (bitmaps[0].__class__()).from_ptr(result) # FIXME to change when from_ptr is a classmethod 461 | 462 | def intersection(*bitmaps): # FIXME could be more efficient 463 | """ 464 | Return the intersection of the bitmaps. 465 | 466 | >>> BitMap.intersection(BitMap(range(0, 15)), BitMap(range(5, 20)), BitMap(range(10, 25))) 467 | BitMap([10, 11, 12, 13, 14]) 468 | """ 469 | size = len(bitmaps) 470 | cdef AbstractBitMap result, bm 471 | if size <= 1: 472 | return bitmaps[0].copy() 473 | elif size == 2: 474 | return bitmaps[0] & bitmaps[1] 475 | else: 476 | result = BitMap(bitmaps[0]) 477 | for bm in bitmaps[1:]: 478 | result &= bm 479 | return bitmaps[0].__class__(result) 480 | 481 | cdef binary_op(self, AbstractBitMap other, (croaring.roaring_bitmap_t*)func(const croaring.roaring_bitmap_t*, const croaring.roaring_bitmap_t*) noexcept) noexcept: 482 | cdef croaring.roaring_bitmap_t *r = func(self._c_bitmap, other._c_bitmap) 483 | return self.from_ptr(r) 484 | 485 | def __or__(self, other): 486 | self._check_compatibility(other) 487 | return (self).binary_op(other, croaring.roaring_bitmap_or) 488 | 489 | def __and__(self, other): 490 | self._check_compatibility(other) 491 | return (self).binary_op(other, croaring.roaring_bitmap_and) 492 | 493 | def __xor__(self, other): 494 | self._check_compatibility(other) 495 | return (self).binary_op(other, croaring.roaring_bitmap_xor) 496 | 497 | def __sub__(self, other): 498 | self._check_compatibility(other) 499 | return (self).binary_op(other, croaring.roaring_bitmap_andnot) 500 | 501 | def union_cardinality(self, AbstractBitMap other): 502 | """ 503 | Return the number of elements in the union of the two bitmaps. 504 | 505 | It is equivalent to len(self | other), but faster. 506 | 507 | >>> BitMap([3, 12]).union_cardinality(AbstractBitMap([3, 5, 8])) 508 | 4 509 | """ 510 | self._check_compatibility(other) 511 | return croaring.roaring_bitmap_or_cardinality(self._c_bitmap, other._c_bitmap) 512 | 513 | def intersection_cardinality(self, AbstractBitMap other): 514 | """ 515 | Return the number of elements in the intersection of the two bitmaps. 516 | 517 | It is equivalent to len(self & other), but faster. 518 | 519 | >>> BitMap([3, 12]).intersection_cardinality(BitMap([3, 5, 8])) 520 | 1 521 | """ 522 | self._check_compatibility(other) 523 | return croaring.roaring_bitmap_and_cardinality(self._c_bitmap, other._c_bitmap) 524 | 525 | def difference_cardinality(self, AbstractBitMap other): 526 | """ 527 | Return the number of elements in the difference of the two bitmaps. 528 | 529 | It is equivalent to len(self - other), but faster. 530 | 531 | >>> BitMap([3, 12]).difference_cardinality(BitMap([3, 5, 8])) 532 | 1 533 | """ 534 | self._check_compatibility(other) 535 | return croaring.roaring_bitmap_andnot_cardinality(self._c_bitmap, other._c_bitmap) 536 | 537 | def symmetric_difference_cardinality(self, AbstractBitMap other): 538 | """ 539 | Return the number of elements in the symmetric difference of the two bitmaps. 540 | 541 | It is equivalent to len(self ^ other), but faster. 542 | 543 | >>> BitMap([3, 12]).symmetric_difference_cardinality(BitMap([3, 5, 8])) 544 | 3 545 | """ 546 | self._check_compatibility(other) 547 | return croaring.roaring_bitmap_xor_cardinality(self._c_bitmap, other._c_bitmap) 548 | 549 | def intersect(self, AbstractBitMap other): 550 | """ 551 | Return True if and only if the two bitmaps have elements in common. 552 | 553 | It is equivalent to len(self & other) > 0, but faster. 554 | 555 | >>> BitMap([3, 12]).intersect(BitMap([3, 18])) 556 | True 557 | >>> BitMap([3, 12]).intersect(BitMap([5, 18])) 558 | False 559 | """ 560 | self._check_compatibility(other) 561 | return croaring.roaring_bitmap_intersect(self._c_bitmap, other._c_bitmap) 562 | 563 | def jaccard_index(self, AbstractBitMap other): 564 | """ 565 | Compute the Jaccard index of the two bitmaps. 566 | 567 | It is equivalent to len(self&other)/len(self|other), but faster. 568 | See https://en.wikipedia.org/wiki/Jaccard_index 569 | 570 | >>> BitMap([3, 10, 12]).jaccard_index(BitMap([3, 18])) 571 | 0.25 572 | """ 573 | self._check_compatibility(other) 574 | return croaring.roaring_bitmap_jaccard_index(self._c_bitmap, other._c_bitmap) 575 | 576 | def get_statistics(self): 577 | """ 578 | Return relevant metrics about the bitmap. 579 | 580 | >>> stats = BitMap(range(18, 66000, 2)).get_statistics() 581 | >>> stats['cardinality'] 582 | 32991 583 | >>> stats['max_value'] 584 | 65998 585 | >>> stats['min_value'] 586 | 18 587 | >>> stats['n_array_containers'] 588 | 1 589 | >>> stats['n_bitset_containers'] 590 | 1 591 | >>> stats['n_bytes_array_containers'] 592 | 464 593 | >>> stats['n_bytes_bitset_containers'] 594 | 8192 595 | >>> stats['n_bytes_run_containers'] 596 | 0 597 | >>> stats['n_containers'] 598 | 2 599 | >>> stats['n_run_containers'] 600 | 0 601 | >>> stats['n_values_array_containers'] 602 | 232 603 | >>> stats['n_values_bitset_containers'] 604 | 32759 605 | >>> stats['n_values_run_containers'] 606 | 0 607 | >>> stats['sum_value'] 608 | 0 609 | """ 610 | cdef croaring.roaring_statistics_t stat 611 | croaring.roaring_bitmap_statistics(self._c_bitmap, &stat) 612 | return stat 613 | 614 | def min(self): 615 | """ 616 | Return the minimum element of the bitmap. 617 | 618 | It is equivalent to min(self), but faster. 619 | 620 | >>> BitMap([3, 12]).min() 621 | 3 622 | """ 623 | if len(self) == 0: 624 | raise ValueError('Empty roaring bitmap, there is no minimum.') 625 | else: 626 | return croaring.roaring_bitmap_minimum(self._c_bitmap) 627 | 628 | def max(self): 629 | """ 630 | Return the maximum element of the bitmap. 631 | 632 | It is equivalent to max(self), but faster. 633 | 634 | >>> BitMap([3, 12]).max() 635 | 12 636 | """ 637 | if len(self) == 0: 638 | raise ValueError('Empty roaring bitmap, there is no maximum.') 639 | else: 640 | return croaring.roaring_bitmap_maximum(self._c_bitmap) 641 | 642 | def rank(self, uint32_t value): 643 | """ 644 | Return the rank of the element in the bitmap. 645 | 646 | >>> BitMap([3, 12]).rank(12) 647 | 2 648 | """ 649 | return croaring.roaring_bitmap_rank(self._c_bitmap, value) 650 | 651 | def next_set_bit(self, uint32_t value): 652 | """ 653 | Return the next set bit larger or equal to the given value. 654 | 655 | >>> BitMap([1, 2, 4]).next_set_bit(1) 656 | 1 657 | 658 | >>> BitMap([1, 2, 4]).next_set_bit(3) 659 | 4 660 | 661 | >>> BitMap([1, 2, 4]).next_set_bit(5) 662 | Traceback (most recent call last): 663 | ValueError: No value larger or equal to specified value. 664 | """ 665 | try: 666 | return next(self.iter_equal_or_larger(value)) 667 | except StopIteration: 668 | raise ValueError('No value larger or equal to specified value.') 669 | 670 | cdef int64_t _shift_index(self, int64_t index) except -1: 671 | cdef int64_t size = len(self) 672 | if index >= size or index < -size: 673 | raise IndexError('Index out of bound') 674 | if index < 0: 675 | return (index + size) 676 | else: 677 | return index 678 | 679 | cdef uint32_t _get_elt(self, int64_t index) except? 0: 680 | cdef uint64_t s_index = self._shift_index(index) 681 | cdef uint32_t elt 682 | cdef bool valid = croaring.roaring_bitmap_select(self._c_bitmap, s_index, &elt) 683 | if not valid: 684 | raise ValueError('Invalid rank') 685 | return elt 686 | 687 | cdef _get_slice(self, sl): 688 | """For a faster computation, different methods, depending on the slice.""" 689 | start, stop, step = sl.indices(len(self)) 690 | sign = 1 if step > 0 else -1 691 | if (sign > 0 and start >= stop) or (sign < 0 and start <= stop): # empty chunk 692 | return self.__class__() 693 | r = range(start, stop, step) 694 | assert len(r) > 0 695 | first_elt = self._get_elt(start) 696 | last_elt = self._get_elt(stop-sign) 697 | values = range(first_elt, last_elt+sign, step) 698 | if abs(step) == 1 and len(values) <= len(self) / 100: # contiguous and small chunk of the bitmap 699 | return self & self.__class__(values, copy_on_write=self.copy_on_write) 700 | else: # generic case 701 | if step < 0: 702 | start = r[-1] 703 | stop = r[0] + 1 704 | step = -step 705 | else: 706 | start = r[0] 707 | stop = r[-1] + 1 708 | return self._generic_get_slice(start, stop, step) 709 | 710 | cdef _generic_get_slice(self, uint32_t start, uint32_t stop, uint32_t step): 711 | """Assume that start, stop and step > 0 and that the result will not be empty.""" 712 | cdef croaring.roaring_bitmap_t *result = croaring.roaring_bitmap_create() 713 | cdef croaring.roaring_uint32_iterator_t *iterator = croaring.roaring_iterator_create(self._c_bitmap) 714 | cdef uint32_t count, max_count=256 715 | cdef uint32_t *buff = malloc(max_count*4) 716 | cdef uint32_t i_loc=0, i_glob=start, i_buff=0 717 | croaring.roaring_bitmap_set_copy_on_write(result, self.copy_on_write) 718 | first_elt = self._get_elt(start) 719 | valid = croaring.roaring_uint32_iterator_move_equalorlarger(iterator, first_elt) 720 | assert valid 721 | while True: 722 | count = croaring.roaring_uint32_iterator_read(iterator, buff, max_count) 723 | while i_buff < max_count and i_glob < stop: 724 | buff[i_loc] = buff[i_buff] 725 | i_loc += 1 726 | i_buff += step 727 | i_glob += step 728 | croaring.roaring_bitmap_add_many(result, i_loc, buff) 729 | if count != max_count or i_glob >= stop: 730 | break 731 | i_loc = 0 732 | i_buff = i_buff % max_count 733 | croaring.roaring_uint32_iterator_free(iterator) 734 | free(buff) 735 | return self.from_ptr(result) 736 | 737 | def __getitem__(self, value): 738 | if isinstance(value, int): 739 | return self._get_elt(value) 740 | elif isinstance(value, slice): 741 | return self._get_slice(value) 742 | else: 743 | return TypeError('Indices must be integers or slices, not %s' % type(value)) 744 | 745 | def serialize(self): 746 | """ 747 | Return the serialization of the bitmap. See AbstractBitMap.deserialize for the reverse operation. 748 | 749 | >>> BitMap.deserialize(BitMap([3, 12]).serialize()) 750 | BitMap([3, 12]) 751 | """ 752 | cdef size_t size = croaring.roaring_bitmap_portable_size_in_bytes(self._c_bitmap) 753 | cdef char *buff = malloc(size) 754 | cdef real_size = croaring.roaring_bitmap_portable_serialize(self._c_bitmap, buff) 755 | result = buff[:size] 756 | free(buff) 757 | return result 758 | 759 | 760 | @classmethod 761 | def deserialize(cls, bytes buff): 762 | """ 763 | Generate a bitmap from the given serialization. See AbstractBitMap.serialize for the reverse operation. 764 | 765 | >>> BitMap.deserialize(BitMap([3, 12]).serialize()) 766 | BitMap([3, 12]) 767 | """ 768 | return (cls()).from_ptr(deserialize_ptr(buff)) # FIXME to change when from_ptr is a classmethod 769 | 770 | def __getstate__(self): 771 | return self.serialize() 772 | 773 | def __setstate__(self, state): 774 | try: # compatibility between Python2 and Python3 (see #27) 775 | self._c_bitmap = deserialize_ptr(state) 776 | except TypeError: 777 | self._c_bitmap = deserialize_ptr(state.encode()) 778 | 779 | 780 | def __sizeof__(self): 781 | cdef size_t size = croaring.roaring_bitmap_portable_size_in_bytes(self._c_bitmap) 782 | return size 783 | 784 | 785 | def to_array(self): 786 | """ 787 | Return an array.array containing the elements of the bitmap, in increasing order. 788 | 789 | It is equivalent to array.array('I', self), but more efficient. 790 | 791 | >>> BitMap([3, 12]).to_array() 792 | array('I', [3, 12]) 793 | """ 794 | cdef int64_t size = len(self) 795 | if size == 0: 796 | return array.array('I', []) 797 | cdef array.array result = array.array('I') 798 | array.resize(result, size) 799 | cdef unsigned[:] buff = result 800 | croaring.roaring_bitmap_to_uint32_array(self._c_bitmap, &buff[0]) 801 | return result 802 | 803 | 804 | cdef class AbstractBitMap64: 805 | """ 806 | An efficient and light-weight ordered set of 64 bits integers. 807 | """ 808 | cdef croaring.roaring64_bitmap_t* _c_bitmap 809 | cdef int64_t _h_val 810 | 811 | def __cinit__(self, values=None, copy_on_write=False, optimize=True, no_init=False): 812 | if no_init: 813 | assert values is None 814 | return 815 | cdef vector[uint64_t] buff_vect 816 | cdef uint64_t[:] buff 817 | if values is None: 818 | self._c_bitmap = croaring.roaring64_bitmap_create() 819 | elif isinstance(values, AbstractBitMap64): 820 | self._c_bitmap = croaring.roaring64_bitmap_copy((values)._c_bitmap) 821 | self._h_val = (values)._h_val 822 | elif isinstance(values, range): 823 | _, (start, stop, step) = values.__reduce__() 824 | if step < 0: 825 | values = range(min(values), max(values)+1, -step) 826 | _, (start, stop, step) = values.__reduce__() 827 | if start >= stop: 828 | self._c_bitmap = croaring.roaring64_bitmap_create() 829 | else: 830 | self._c_bitmap = croaring.roaring64_bitmap_from_range(start, stop, step) 831 | elif isinstance(values, array.array): 832 | size = len(values) 833 | if size == 0: 834 | self._c_bitmap = croaring.roaring64_bitmap_create() 835 | else: 836 | buff = values 837 | self._c_bitmap = croaring.roaring64_bitmap_of_ptr(size, &buff[0]) 838 | else: 839 | try: 840 | size = len(values) 841 | except TypeError: # object has no length, creating a list 842 | values = list(values) 843 | size = len(values) 844 | self._c_bitmap = croaring.roaring64_bitmap_create() 845 | if size > 0: 846 | buff_vect = values 847 | croaring.roaring64_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 848 | if not isinstance(values, AbstractBitMap64): 849 | self._h_val = 0 850 | if optimize: 851 | self.run_optimize() 852 | 853 | def __init__(self, values=None, copy_on_write=False, optimize=True): 854 | """ 855 | Construct a AbstractBitMap64 object, either empry or from an iterable. 856 | 857 | The field copy_on_write has no effect (yet). 858 | 859 | >>> BitMap64() 860 | BitMap64([]) 861 | >>> BitMap64([1, 123456789, 27]) 862 | BitMap64([1, 27, 123456789]) 863 | """ 864 | 865 | cdef from_ptr(self, croaring.roaring64_bitmap_t *ptr) noexcept: 866 | """ 867 | Return an instance of AbstractBitMap64 (or one of its subclasses) initialized with the given pointer. 868 | 869 | FIXME: this should be a classmethod, but this is (currently) impossible for cdef methods. 870 | See https://groups.google.com/forum/#!topic/cython-users/FLHiLzzKqj4 871 | """ 872 | bm = self.__class__.__new__(self.__class__, no_init=True) 873 | (bm)._c_bitmap = ptr 874 | return bm 875 | 876 | @property 877 | def copy_on_write(self): 878 | """ 879 | Always False, not implemented for 64 bits roaring bitmaps. 880 | 881 | >>> BitMap64(copy_on_write=False).copy_on_write 882 | False 883 | >>> BitMap64(copy_on_write=True).copy_on_write 884 | False 885 | """ 886 | return False 887 | 888 | def run_optimize(self): 889 | return croaring.roaring64_bitmap_run_optimize(self._c_bitmap) 890 | 891 | def __dealloc__(self): 892 | if self._c_bitmap is not NULL: 893 | croaring.roaring64_bitmap_free(self._c_bitmap) 894 | 895 | def _check_compatibility(self, AbstractBitMap64 other): 896 | if other is None: 897 | raise TypeError('Argument has incorrect type (expected pyroaring.AbstractBitMap64, got None)') 898 | if self.copy_on_write != other.copy_on_write: 899 | raise ValueError('Cannot have interactions between bitmaps with and without copy_on_write.\n') 900 | 901 | def __contains__(self, uint64_t value): 902 | return croaring.roaring64_bitmap_contains(self._c_bitmap, value) 903 | 904 | def __bool__(self): 905 | return not croaring.roaring64_bitmap_is_empty(self._c_bitmap) 906 | 907 | def __len__(self): 908 | return croaring.roaring64_bitmap_get_cardinality(self._c_bitmap) 909 | 910 | def __lt__(self, AbstractBitMap64 other): 911 | self._check_compatibility(other) 912 | return croaring.roaring64_bitmap_is_strict_subset((self)._c_bitmap, (other)._c_bitmap) 913 | 914 | def __le__(self, AbstractBitMap64 other): 915 | self._check_compatibility(other) 916 | return croaring.roaring64_bitmap_is_subset((self)._c_bitmap, (other)._c_bitmap) 917 | 918 | def __eq__(self, object other): 919 | if not isinstance(other, AbstractBitMap64): 920 | return NotImplemented 921 | self._check_compatibility(other) 922 | return croaring.roaring64_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 923 | 924 | def __ne__(self, object other): 925 | if not isinstance(other, AbstractBitMap64): 926 | return NotImplemented 927 | self._check_compatibility(other) 928 | return not croaring.roaring64_bitmap_equals((self)._c_bitmap, (other)._c_bitmap) 929 | 930 | def __gt__(self, AbstractBitMap64 other): 931 | self._check_compatibility(other) 932 | return croaring.roaring64_bitmap_is_strict_subset((other)._c_bitmap, (self)._c_bitmap) 933 | 934 | def __ge__(self, AbstractBitMap64 other): 935 | self._check_compatibility(other) 936 | return croaring.roaring64_bitmap_is_subset((other)._c_bitmap, (self)._c_bitmap) 937 | 938 | def contains_range(self, uint64_t range_start, uint64_t range_end): 939 | """ 940 | Check whether a range of values from range_start (included) to range_end (excluded) is present. 941 | 942 | >>> bm = BitMap64([5, 6, 7, 8, 9, 10]) 943 | >>> bm.contains_range(6, 9) 944 | True 945 | >>> bm.contains_range(8, 12) 946 | False 947 | """ 948 | if range_end <= range_start or range_end == 0: 949 | return True # empty range 950 | return croaring.roaring64_bitmap_contains_range(self._c_bitmap, range_start, range_end) 951 | 952 | def range_cardinality(self, uint64_t range_start, uint64_t range_end): 953 | """ 954 | Return cardinality from range_start (included) to range_end (excluded). 955 | 956 | >>> bm = BitMap64(range(10)) 957 | >>> bm.range_cardinality(0, 10) 958 | 10 959 | >>> bm.range_cardinality(10, 100) 960 | 0 961 | """ 962 | if range_end < range_start: 963 | raise AssertionError('range_end must not be lower than range_start') 964 | return croaring.roaring64_bitmap_range_cardinality(self._c_bitmap, range_start, range_end) 965 | 966 | cdef compute_hash(self): 967 | cdef int64_t h_val = 0 968 | cdef uint32_t i, count, max_count=256 969 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 970 | cdef uint64_t *buff = malloc(max_count*8) 971 | while True: 972 | count = croaring.roaring64_iterator_read(iterator, buff, max_count) 973 | i = 0 974 | while i < count: 975 | h_val += buff[i] 976 | # TODO find a good hash formula 977 | i += 1 978 | if count != max_count: 979 | break 980 | croaring.roaring64_iterator_free(iterator) 981 | free(buff) 982 | if not self: 983 | return -1 984 | return h_val 985 | 986 | def __hash__(self): 987 | if self._h_val == 0: 988 | self._h_val = self.compute_hash() 989 | return self._h_val 990 | 991 | def iter_equal_or_larger(self, uint64_t val): 992 | """ 993 | Iterate over items in the bitmap equal or larger than a given value. 994 | 995 | >>> bm = BitMap64([1, 2, 4]) 996 | >>> list(bm.iter_equal_or_larger(2)) 997 | [2, 4] 998 | """ 999 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 1000 | valid = croaring.roaring64_iterator_move_equalorlarger(iterator, val) 1001 | if not valid: 1002 | return 1003 | try: 1004 | while valid: 1005 | yield croaring.roaring64_iterator_value(iterator) 1006 | valid = croaring.roaring64_iterator_advance(iterator) 1007 | finally: 1008 | croaring.roaring64_iterator_free(iterator) 1009 | 1010 | def __iter__(self): 1011 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 1012 | valid = croaring.roaring64_iterator_has_value(iterator) 1013 | if not valid: 1014 | return 1015 | try: 1016 | while valid: 1017 | yield croaring.roaring64_iterator_value(iterator) 1018 | valid = croaring.roaring64_iterator_advance(iterator) 1019 | finally: 1020 | croaring.roaring64_iterator_free(iterator) 1021 | 1022 | def __repr__(self): 1023 | return str(self) 1024 | 1025 | def __str__(self): 1026 | return _string_rep(self) 1027 | 1028 | def flip(self, uint64_t start, uint64_t end): 1029 | """ 1030 | Compute the negation of the bitmap within the specified interval. 1031 | 1032 | Areas outside the range are passed unchanged. 1033 | 1034 | >>> bm = BitMap64([3, 12]) 1035 | >>> bm.flip(10, 15) 1036 | BitMap64([3, 10, 11, 13, 14]) 1037 | """ 1038 | return self.from_ptr(croaring.roaring64_bitmap_flip(self._c_bitmap, start, end)) 1039 | 1040 | def get_statistics(self): 1041 | """ 1042 | Return relevant metrics about the bitmap. 1043 | 1044 | >>> stats = BitMap64(range(18, 66000, 2)).get_statistics() 1045 | >>> stats['cardinality'] 1046 | 32991 1047 | >>> stats['max_value'] 1048 | 65998 1049 | >>> stats['min_value'] 1050 | 18 1051 | >>> stats['n_array_containers'] 1052 | 1 1053 | >>> stats['n_bitset_containers'] 1054 | 1 1055 | >>> stats['n_bytes_array_containers'] 1056 | 464 1057 | >>> stats['n_bytes_bitset_containers'] 1058 | 8192 1059 | >>> stats['n_bytes_run_containers'] 1060 | 0 1061 | >>> stats['n_containers'] 1062 | 2 1063 | >>> stats['n_run_containers'] 1064 | 0 1065 | >>> stats['n_values_array_containers'] 1066 | 232 1067 | >>> stats['n_values_bitset_containers'] 1068 | 32759 1069 | >>> stats['n_values_run_containers'] 1070 | 0 1071 | """ 1072 | cdef croaring.roaring64_statistics_t stat 1073 | croaring.roaring64_bitmap_statistics(self._c_bitmap, &stat) 1074 | return stat 1075 | 1076 | def min(self): 1077 | """ 1078 | Return the minimum element of the bitmap. 1079 | 1080 | It is equivalent to min(self), but faster. 1081 | 1082 | >>> BitMap64([3, 12]).min() 1083 | 3 1084 | """ 1085 | if len(self) == 0: 1086 | raise ValueError('Empty roaring bitmap, there is no minimum.') 1087 | else: 1088 | return croaring.roaring64_bitmap_minimum(self._c_bitmap) 1089 | 1090 | def max(self): 1091 | """ 1092 | Return the maximum element of the bitmap. 1093 | 1094 | It is equivalent to max(self), but faster. 1095 | 1096 | >>> BitMap64([3, 12]).max() 1097 | 12 1098 | """ 1099 | if len(self) == 0: 1100 | raise ValueError('Empty roaring bitmap, there is no maximum.') 1101 | else: 1102 | return croaring.roaring64_bitmap_maximum(self._c_bitmap) 1103 | 1104 | def rank(self, uint64_t value): 1105 | """ 1106 | Return the rank of the element in the bitmap. 1107 | 1108 | >>> BitMap64([3, 12]).rank(12) 1109 | 2 1110 | """ 1111 | return croaring.roaring64_bitmap_rank(self._c_bitmap, value) 1112 | 1113 | def next_set_bit(self, uint64_t value): 1114 | """ 1115 | Return the next set bit larger or equal to the given value. 1116 | 1117 | >>> BitMap64([1, 2, 4]).next_set_bit(1) 1118 | 1 1119 | 1120 | >>> BitMap64([1, 2, 4]).next_set_bit(3) 1121 | 4 1122 | 1123 | >>> BitMap64([1, 2, 4]).next_set_bit(5) 1124 | Traceback (most recent call last): 1125 | ValueError: No value larger or equal to specified value. 1126 | """ 1127 | try: 1128 | return next(self.iter_equal_or_larger(value)) 1129 | except StopIteration: 1130 | raise ValueError('No value larger or equal to specified value.') 1131 | 1132 | cdef int64_t _shift_index(self, int64_t index) except -1: 1133 | cdef int64_t size = len(self) 1134 | if index >= size or index < -size: 1135 | raise IndexError('Index out of bound') 1136 | if index < 0: 1137 | return (index + size) 1138 | else: 1139 | return index 1140 | 1141 | cdef uint64_t _get_elt(self, int64_t index) except? 0: 1142 | cdef uint64_t s_index = self._shift_index(index) 1143 | cdef uint64_t elt 1144 | cdef bool valid = croaring.roaring64_bitmap_select(self._c_bitmap, s_index, &elt) 1145 | if not valid: 1146 | raise ValueError('Invalid rank') 1147 | return elt 1148 | 1149 | cdef _get_slice(self, sl): 1150 | """For a faster computation, different methods, depending on the slice.""" 1151 | start, stop, step = sl.indices(len(self)) 1152 | sign = 1 if step > 0 else -1 1153 | if (sign > 0 and start >= stop) or (sign < 0 and start <= stop): # empty chunk 1154 | return self.__class__() 1155 | r = range(start, stop, step) 1156 | assert len(r) > 0 1157 | first_elt = self._get_elt(start) 1158 | last_elt = self._get_elt(stop-sign) 1159 | values = range(first_elt, last_elt+sign, step) 1160 | if abs(step) == 1 and len(values) <= len(self) / 100: # contiguous and small chunk of the bitmap 1161 | return self & self.__class__(values) 1162 | else: # generic case 1163 | if step < 0: 1164 | start = r[-1] 1165 | stop = r[0] + 1 1166 | step = -step 1167 | else: 1168 | start = r[0] 1169 | stop = r[-1] + 1 1170 | return self._generic_get_slice(start, stop, step) 1171 | 1172 | cdef _generic_get_slice(self, uint64_t start, uint64_t stop, uint64_t step): 1173 | """Assume that start, stop and step > 0 and that the result will not be empty.""" 1174 | cdef croaring.roaring64_bitmap_t *result = croaring.roaring64_bitmap_create() 1175 | cdef croaring.roaring64_iterator_t *iterator = croaring.roaring64_iterator_create(self._c_bitmap) 1176 | cdef uint64_t count, max_count=256 1177 | cdef uint64_t *buff = malloc(max_count*8) 1178 | cdef uint64_t i_loc=0, i_glob=start, i_buff=0 1179 | first_elt = self._get_elt(start) 1180 | valid = croaring.roaring64_iterator_move_equalorlarger(iterator, first_elt) 1181 | assert valid 1182 | while True: 1183 | count = croaring.roaring64_iterator_read(iterator, buff, max_count) 1184 | while i_buff < max_count and i_glob < stop: 1185 | buff[i_loc] = buff[i_buff] 1186 | i_loc += 1 1187 | i_buff += step 1188 | i_glob += step 1189 | croaring.roaring64_bitmap_add_many(result, i_loc, buff) 1190 | if count != max_count or i_glob >= stop: 1191 | break 1192 | i_loc = 0 1193 | i_buff = i_buff % max_count 1194 | croaring.roaring64_iterator_free(iterator) 1195 | free(buff) 1196 | return self.from_ptr(result) 1197 | 1198 | def __getitem__(self, value): 1199 | if isinstance(value, int): 1200 | return self._get_elt(value) 1201 | elif isinstance(value, slice): 1202 | return self._get_slice(value) 1203 | else: 1204 | return TypeError('Indices must be integers or slices, not %s' % type(value)) 1205 | 1206 | def serialize(self): 1207 | """ 1208 | Return the serialization of the bitmap. See AbstractBitMap64.deserialize for the reverse operation. 1209 | 1210 | >>> BitMap64.deserialize(BitMap64([3, 12]).serialize()) 1211 | BitMap64([3, 12]) 1212 | """ 1213 | cdef size_t size = croaring.roaring64_bitmap_portable_size_in_bytes(self._c_bitmap) 1214 | cdef char *buff = malloc(size) 1215 | cdef real_size = croaring.roaring64_bitmap_portable_serialize(self._c_bitmap, buff) 1216 | result = buff[:size] 1217 | free(buff) 1218 | return result 1219 | 1220 | 1221 | @classmethod 1222 | def deserialize(cls, bytes buff): 1223 | """ 1224 | Generate a bitmap from the given serialization. See AbstractBitMap64.serialize for the reverse operation. 1225 | 1226 | >>> BitMap64.deserialize(BitMap64([3, 12]).serialize()) 1227 | BitMap64([3, 12]) 1228 | """ 1229 | return (cls()).from_ptr(deserialize64_ptr(buff)) # FIXME to change when from_ptr is a classmethod 1230 | 1231 | def __getstate__(self): 1232 | return self.serialize() 1233 | 1234 | def __setstate__(self, state): 1235 | try: # compatibility between Python2 and Python3 (see #27) 1236 | self._c_bitmap = deserialize64_ptr(state) 1237 | except TypeError: 1238 | self._c_bitmap = deserialize64_ptr(state.encode()) 1239 | 1240 | 1241 | def __sizeof__(self): 1242 | cdef size_t size = croaring.roaring64_bitmap_portable_size_in_bytes(self._c_bitmap) 1243 | return size 1244 | 1245 | def to_array(self): 1246 | """ 1247 | Return an array.array containing the elements of the bitmap, in increasing order. 1248 | 1249 | It is equivalent to array.array('Q', self), but more efficient. 1250 | 1251 | >>> BitMap64([3, 12]).to_array() 1252 | array('Q', [3, 12]) 1253 | """ 1254 | cdef uint64_t size = len(self) 1255 | if size == 0: 1256 | return array.array('Q', []) 1257 | cdef array.array result = array.array('Q') 1258 | array.resize(result, size) 1259 | cdef uint64_t[:] buff = result 1260 | croaring.roaring64_bitmap_to_uint64_array(self._c_bitmap, &buff[0]) 1261 | return result 1262 | 1263 | def copy(self): 1264 | """ 1265 | Return a copy of a set. 1266 | 1267 | >>> bm = BitMap64([3, 12]) 1268 | >>> bm2 = bm.copy() 1269 | >>> bm == bm2 1270 | True 1271 | >>> bm.add(1) 1272 | >>> bm == bm2 1273 | False 1274 | 1275 | """ 1276 | return self.__class__(self) 1277 | 1278 | def isdisjoint(self, other): 1279 | """ 1280 | Return True if two sets have a null intersection. 1281 | 1282 | >>> BitMap64([1, 2]).isdisjoint(BitMap64([3, 4])) 1283 | True 1284 | 1285 | >>> BitMap64([1, 2, 3]).isdisjoint(BitMap64([3, 4])) 1286 | False 1287 | 1288 | """ 1289 | return self.intersection_cardinality(other) == 0 1290 | 1291 | def issubset(self, other): 1292 | """ 1293 | Report whether another set contains this set. 1294 | 1295 | >>> BitMap64([1, 2]).issubset(BitMap64([1, 2, 3, 4])) 1296 | True 1297 | 1298 | >>> BitMap64([1, 2]).issubset(BitMap64([3, 4])) 1299 | False 1300 | 1301 | """ 1302 | return self <= other 1303 | 1304 | def issuperset(self, other): 1305 | """ 1306 | Report whether this set contains another set. 1307 | 1308 | >>> BitMap64([1, 2, 3, 4]).issuperset(BitMap64([1, 2])) 1309 | True 1310 | 1311 | >>> BitMap64([1, 2]).issuperset(BitMap64([3, 4])) 1312 | False 1313 | 1314 | """ 1315 | return self >= other 1316 | 1317 | def difference(*bitmaps): 1318 | """ 1319 | Return the difference of two or more sets as a new set. 1320 | 1321 | (i.e. all elements that are in this set but not the others.) 1322 | 1323 | >>> BitMap64.difference(BitMap64([1, 2, 3]), BitMap64([2, 20]), BitMap64([3, 30])) 1324 | BitMap64([1]) 1325 | 1326 | """ 1327 | size = len(bitmaps) 1328 | cdef AbstractBitMap64 result, bm 1329 | if size <= 1: 1330 | return bitmaps[0].copy() 1331 | elif size == 2: 1332 | return bitmaps[0] - bitmaps[1] 1333 | else: 1334 | result = BitMap64(bitmaps[0]) 1335 | for bm in bitmaps[1:]: 1336 | result -= bm 1337 | return bitmaps[0].__class__(result) 1338 | 1339 | 1340 | def symmetric_difference(self, other): 1341 | """ 1342 | Return the symmetric difference of two sets as a new set. 1343 | 1344 | (i.e. all elements that are in exactly one of the sets.) 1345 | 1346 | >>> BitMap64([1, 2, 3]).symmetric_difference(BitMap64([2, 3, 4])) 1347 | BitMap64([1, 4]) 1348 | """ 1349 | return self.__xor__(other) 1350 | 1351 | def union(*bitmaps): 1352 | """ 1353 | Return the union of the bitmaps. 1354 | 1355 | >>> BitMap64.union(BitMap64([3, 12]), BitMap64([5]), BitMap64([0, 10, 12])) 1356 | BitMap64([0, 3, 5, 10, 12]) 1357 | """ 1358 | size = len(bitmaps) 1359 | cdef AbstractBitMap64 result, bm 1360 | if size <= 1: 1361 | return bitmaps[0].copy() 1362 | elif size == 2: 1363 | return bitmaps[0] | bitmaps[1] 1364 | else: 1365 | result = BitMap64(bitmaps[0]) 1366 | for bm in bitmaps[1:]: 1367 | result |= bm 1368 | return bitmaps[0].__class__(result) 1369 | 1370 | def intersection(*bitmaps): 1371 | """ 1372 | Return the intersection of the bitmaps. 1373 | 1374 | >>> BitMap64.intersection(BitMap64(range(0, 15)), BitMap64(range(5, 20)), BitMap64(range(10, 25))) 1375 | BitMap64([10, 11, 12, 13, 14]) 1376 | """ 1377 | size = len(bitmaps) 1378 | cdef AbstractBitMap64 result, bm 1379 | if size <= 1: 1380 | return bitmaps[0].copy() 1381 | elif size == 2: 1382 | return bitmaps[0] & bitmaps[1] 1383 | else: 1384 | result = BitMap64(bitmaps[0]) 1385 | for bm in bitmaps[1:]: 1386 | result &= bm 1387 | return bitmaps[0].__class__(result) 1388 | 1389 | cdef binary_op(self, AbstractBitMap64 other, (croaring.roaring64_bitmap_t*)func(const croaring.roaring64_bitmap_t*, const croaring.roaring64_bitmap_t*) noexcept) noexcept: 1390 | cdef croaring.roaring64_bitmap_t *r = func(self._c_bitmap, other._c_bitmap) 1391 | return self.from_ptr(r) 1392 | 1393 | def __or__(self, other): 1394 | return (self).binary_op(other, croaring.roaring64_bitmap_or) 1395 | 1396 | def __and__(self, other): 1397 | return (self).binary_op(other, croaring.roaring64_bitmap_and) 1398 | 1399 | def __xor__(self, other): 1400 | return (self).binary_op(other, croaring.roaring64_bitmap_xor) 1401 | 1402 | def __sub__(self, other): 1403 | return (self).binary_op(other, croaring.roaring64_bitmap_andnot) 1404 | 1405 | def union_cardinality(self, AbstractBitMap64 other): 1406 | """ 1407 | Return the number of elements in the union of the two bitmaps. 1408 | 1409 | It is equivalent to len(self | other), but faster. 1410 | 1411 | >>> BitMap64([3, 12]).union_cardinality(BitMap64([3, 5, 8])) 1412 | 4 1413 | """ 1414 | return croaring.roaring64_bitmap_or_cardinality(self._c_bitmap, other._c_bitmap) 1415 | 1416 | def intersection_cardinality(self, AbstractBitMap64 other): 1417 | """ 1418 | Return the number of elements in the intersection of the two bitmaps. 1419 | 1420 | It is equivalent to len(self & other), but faster. 1421 | 1422 | >>> BitMap64([3, 12]).intersection_cardinality(BitMap64([3, 5, 8])) 1423 | 1 1424 | """ 1425 | return croaring.roaring64_bitmap_and_cardinality(self._c_bitmap, other._c_bitmap) 1426 | 1427 | def difference_cardinality(self, AbstractBitMap64 other): 1428 | """ 1429 | Return the number of elements in the difference of the two bitmaps. 1430 | 1431 | It is equivalent to len(self - other), but faster. 1432 | 1433 | >>> BitMap64([3, 12]).difference_cardinality(BitMap64([3, 5, 8])) 1434 | 1 1435 | """ 1436 | return croaring.roaring64_bitmap_andnot_cardinality(self._c_bitmap, other._c_bitmap) 1437 | 1438 | def symmetric_difference_cardinality(self, AbstractBitMap64 other): 1439 | """ 1440 | Return the number of elements in the symmetric difference of the two bitmaps. 1441 | 1442 | It is equivalent to len(self ^ other), but faster. 1443 | 1444 | >>> BitMap64([3, 12]).symmetric_difference_cardinality(BitMap64([3, 5, 8])) 1445 | 3 1446 | """ 1447 | return croaring.roaring64_bitmap_xor_cardinality(self._c_bitmap, other._c_bitmap) 1448 | 1449 | def intersect(self, AbstractBitMap64 other): 1450 | """ 1451 | Return True if and only if the two bitmaps have elements in common. 1452 | 1453 | It is equivalent to len(self & other) > 0, but faster. 1454 | 1455 | >>> BitMap64([3, 12]).intersect(BitMap64([3, 18])) 1456 | True 1457 | >>> BitMap64([3, 12]).intersect(BitMap64([5, 18])) 1458 | False 1459 | """ 1460 | return croaring.roaring64_bitmap_intersect(self._c_bitmap, other._c_bitmap) 1461 | 1462 | def jaccard_index(self, AbstractBitMap64 other): 1463 | """ 1464 | Compute the Jaccard index of the two bitmaps. 1465 | 1466 | It is equivalent to len(self&other)/len(self|other), but faster. 1467 | See https://en.wikipedia.org/wiki/Jaccard_index 1468 | 1469 | >>> BitMap64([3, 10, 12]).jaccard_index(BitMap64([3, 18])) 1470 | 0.25 1471 | """ 1472 | return croaring.roaring64_bitmap_jaccard_index(self._c_bitmap, other._c_bitmap) -------------------------------------------------------------------------------- /pyroaring/bitmap.pxi: -------------------------------------------------------------------------------- 1 | cdef class BitMap(AbstractBitMap): 2 | 3 | cdef compute_hash(self): 4 | '''Unsupported method.''' 5 | # For some reason, if we directly override __hash__ (either in BitMap or in FrozenBitMap), the __richcmp__ 6 | # method disappears. 7 | raise TypeError('Cannot compute the hash of a %s.' % self.__class__.__name__) 8 | 9 | def add(self, uint32_t value): 10 | """ 11 | Add an element to the bitmap. This has no effect if the element is already present. 12 | 13 | >>> bm = BitMap() 14 | >>> bm.add(42) 15 | >>> bm 16 | BitMap([42]) 17 | >>> bm.add(42) 18 | >>> bm 19 | BitMap([42]) 20 | """ 21 | croaring.roaring_bitmap_add(self._c_bitmap, value) 22 | 23 | def add_checked(self, uint32_t value): 24 | """ 25 | Add an element to the bitmap. This raises a KeyError exception if the element is already present. 26 | 27 | >>> bm = BitMap() 28 | >>> bm.add_checked(42) 29 | >>> bm 30 | BitMap([42]) 31 | >>> bm.add_checked(42) 32 | Traceback (most recent call last): 33 | ... 34 | KeyError: 42 35 | """ 36 | cdef bool test = croaring.roaring_bitmap_add_checked(self._c_bitmap, value) 37 | if not test: 38 | raise KeyError(value) 39 | 40 | def update(self, *all_values): # FIXME could be more efficient 41 | """ 42 | Add all the given values to the bitmap. 43 | 44 | >>> bm = BitMap([3, 12]) 45 | >>> bm.update([8, 12, 55, 18]) 46 | >>> bm 47 | BitMap([3, 8, 12, 18, 55]) 48 | """ 49 | cdef vector[uint32_t] buff_vect 50 | cdef unsigned[:] buff 51 | for values in all_values: 52 | if isinstance(values, AbstractBitMap): 53 | self |= values 54 | elif isinstance(values, range): 55 | if len(values) == 0: 56 | continue 57 | _, (start, stop, step) = values.__reduce__() 58 | if step == -1: 59 | step = 1 60 | start, stop = stop+1, start+1 61 | if step == 1: 62 | self.add_range(start, stop) 63 | else: 64 | self |= AbstractBitMap(values, copy_on_write=self.copy_on_write) 65 | elif isinstance(values, array.array) and len(values) > 0: 66 | buff = values 67 | croaring.roaring_bitmap_add_many(self._c_bitmap, len(values), &buff[0]) 68 | else: 69 | try: 70 | size = len(values) 71 | except TypeError: # object has no length, creating a list 72 | values = list(values) 73 | size = len(values) 74 | if size > 0: 75 | buff_vect = values 76 | croaring.roaring_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 77 | 78 | def discard(self, uint32_t value): 79 | """ 80 | Remove an element from the bitmap. This has no effect if the element is not present. 81 | 82 | >>> bm = BitMap([3, 12]) 83 | >>> bm.discard(3) 84 | >>> bm 85 | BitMap([12]) 86 | >>> bm.discard(3) 87 | >>> bm 88 | BitMap([12]) 89 | """ 90 | croaring.roaring_bitmap_remove(self._c_bitmap, value) 91 | 92 | def remove(self, uint32_t value): 93 | """ 94 | Remove an element from the bitmap. This raises a KeyError exception if the element does not exist in the bitmap. 95 | 96 | >>> bm = BitMap([3, 12]) 97 | >>> bm.remove(3) 98 | >>> bm 99 | BitMap([12]) 100 | >>> bm.remove(3) 101 | Traceback (most recent call last): 102 | ... 103 | KeyError: 3 104 | """ 105 | cdef bool test = croaring.roaring_bitmap_remove_checked(self._c_bitmap, value) 106 | if not test: 107 | raise KeyError(value) 108 | 109 | cdef binary_iop(self, AbstractBitMap other, (void)func(croaring.roaring_bitmap_t*, const croaring.roaring_bitmap_t*) noexcept) noexcept: 110 | func(self._c_bitmap, other._c_bitmap) 111 | return self 112 | 113 | def __ior__(self, other): 114 | self._check_compatibility(other) 115 | if self._c_bitmap == (other)._c_bitmap: 116 | return self 117 | return (self).binary_iop(other, croaring.roaring_bitmap_or_inplace) 118 | 119 | def __iand__(self, other): 120 | self._check_compatibility(other) 121 | if self._c_bitmap == (other)._c_bitmap: 122 | return self 123 | return (self).binary_iop(other, croaring.roaring_bitmap_and_inplace) 124 | 125 | def __ixor__(self, other): 126 | self._check_compatibility(other) 127 | if self._c_bitmap == (other)._c_bitmap: 128 | self.clear() 129 | return self 130 | return (self).binary_iop(other, croaring.roaring_bitmap_xor_inplace) 131 | 132 | def __isub__(self, other): 133 | self._check_compatibility(other) 134 | if self._c_bitmap == (other)._c_bitmap: 135 | self.clear() 136 | return self 137 | return (self).binary_iop(other, croaring.roaring_bitmap_andnot_inplace) 138 | 139 | def intersection_update(self, *all_values): # FIXME could be more efficient 140 | """ 141 | Update the bitmap by taking its intersection with the given values. 142 | 143 | >>> bm = BitMap([3, 12]) 144 | >>> bm.intersection_update([8, 12, 55, 18]) 145 | >>> bm 146 | BitMap([12]) 147 | """ 148 | for values in all_values: 149 | if isinstance(values, AbstractBitMap): 150 | self &= values 151 | else: 152 | self &= AbstractBitMap(values, copy_on_write=self.copy_on_write) 153 | 154 | def difference_update(self, *others): 155 | """ 156 | Remove all elements of another set from this set. 157 | 158 | >>> bm = BitMap([1, 2, 3, 4, 5]) 159 | >>> bm.difference_update(BitMap([1, 2, 10]), BitMap([3, 4, 20])) 160 | >>> bm 161 | BitMap([5]) 162 | """ 163 | self.__isub__(AbstractBitMap.union(*others)) 164 | 165 | def symmetric_difference_update(self, other): 166 | """ 167 | Update a set with the symmetric difference of itself and another. 168 | 169 | >>> bm = BitMap([1, 2, 3, 4]) 170 | >>> bm.symmetric_difference_update(BitMap([1, 2, 10])) 171 | >>> bm 172 | BitMap([3, 4, 10]) 173 | 174 | """ 175 | self.__ixor__(other) 176 | 177 | def overwrite(self, AbstractBitMap other): 178 | """ 179 | Clear the bitmap and overwrite it with another. 180 | 181 | >>> bm = BitMap([3, 12]) 182 | >>> other = BitMap([4, 14]) 183 | >>> bm.overwrite(other) 184 | >>> other.remove(4) 185 | >>> bm 186 | BitMap([4, 14]) 187 | >>> other 188 | BitMap([14]) 189 | """ 190 | if self._c_bitmap == other._c_bitmap: 191 | raise ValueError('Cannot overwrite itself') 192 | croaring.roaring_bitmap_overwrite(self._c_bitmap, other._c_bitmap) 193 | 194 | def clear(self): 195 | """ 196 | Remove all elements from this set. 197 | 198 | >>> bm = BitMap([1, 2, 3]) 199 | >>> bm.clear() 200 | >>> bm 201 | BitMap([]) 202 | """ 203 | croaring.roaring_bitmap_clear(self._c_bitmap) 204 | 205 | def pop(self): 206 | """ 207 | Remove and return an arbitrary set element. 208 | Raises KeyError if the set is empty. 209 | 210 | >>> bm = BitMap([1, 2]) 211 | >>> a = bm.pop() 212 | >>> b = bm.pop() 213 | >>> bm 214 | BitMap([]) 215 | >>> bm.pop() 216 | Traceback (most recent call last): 217 | ... 218 | KeyError: 'pop from an empty BitMap' 219 | 220 | """ 221 | try: 222 | value = self.min() 223 | except ValueError: 224 | raise KeyError('pop from an empty BitMap') 225 | self.remove(value) 226 | return value 227 | 228 | 229 | def flip_inplace(self, uint64_t start, uint64_t end): 230 | """ 231 | Compute (in place) the negation of the bitmap within the specified interval. 232 | 233 | Areas outside the range are passed unchanged. 234 | 235 | >>> bm = BitMap([3, 12]) 236 | >>> bm.flip_inplace(10, 15) 237 | >>> bm 238 | BitMap([3, 10, 11, 13, 14]) 239 | """ 240 | croaring.roaring_bitmap_flip_inplace(self._c_bitmap, start, end) 241 | 242 | def add_range(self, uint64_t range_start, uint64_t range_end): 243 | """ 244 | Add a range of values from range_start (included) to range_end (excluded). 245 | 246 | >>> bm = BitMap([5, 7]) 247 | >>> bm.add_range(6, 9) 248 | >>> bm 249 | BitMap([5, 6, 7, 8]) 250 | """ 251 | if range_end <= range_start or range_end == 0 or range_start >= 2**32: 252 | return 253 | if range_end >= 2**32: 254 | range_end = 2**32 255 | croaring.roaring_bitmap_add_range(self._c_bitmap, range_start, range_end) 256 | 257 | def remove_range(self, uint64_t range_start, uint64_t range_end): 258 | """ 259 | Remove a range of values from range_start (included) to range_end (excluded). 260 | 261 | >>> bm = BitMap([5, 6, 7, 8, 9, 10]) 262 | >>> bm.remove_range(6, 9) 263 | >>> bm 264 | BitMap([5, 9, 10]) 265 | """ 266 | if range_end <= range_start or range_end == 0 or range_start >= 2**32: 267 | return 268 | if range_end >= 2**32: 269 | range_end = 2**32 270 | croaring.roaring_bitmap_remove_range(self._c_bitmap, range_start, range_end) 271 | 272 | cdef class BitMap64(AbstractBitMap64): 273 | 274 | cdef compute_hash(self): 275 | '''Unsupported method.''' 276 | # For some reason, if we directly override __hash__ (either in BitMap or in FrozenBitMap), the __richcmp__ 277 | # method disappears. 278 | raise TypeError('Cannot compute the hash of a %s.' % self.__class__.__name__) 279 | 280 | def add(self, uint64_t value): 281 | """ 282 | Add an element to the bitmap. This has no effect if the element is already present. 283 | 284 | >>> bm = BitMap64() 285 | >>> bm.add(42) 286 | >>> bm 287 | BitMap64([42]) 288 | >>> bm.add(42) 289 | >>> bm 290 | BitMap64([42]) 291 | """ 292 | croaring.roaring64_bitmap_add(self._c_bitmap, value) 293 | 294 | def add_checked(self, uint64_t value): 295 | """ 296 | Add an element to the bitmap. This raises a KeyError exception if the element is already present. 297 | 298 | >>> bm = BitMap64() 299 | >>> bm.add_checked(42) 300 | >>> bm 301 | BitMap64([42]) 302 | >>> bm.add_checked(42) 303 | Traceback (most recent call last): 304 | ... 305 | KeyError: 42 306 | """ 307 | cdef bool test = croaring.roaring64_bitmap_add_checked(self._c_bitmap, value) 308 | if not test: 309 | raise KeyError(value) 310 | 311 | def update(self, *all_values): # FIXME could be more efficient 312 | """ 313 | Add all the given values to the bitmap. 314 | 315 | >>> bm = BitMap64([3, 12]) 316 | >>> bm.update([8, 12, 55, 18]) 317 | >>> bm 318 | BitMap64([3, 8, 12, 18, 55]) 319 | """ 320 | cdef vector[uint64_t] buff_vect 321 | cdef uint64_t[:] buff 322 | for values in all_values: 323 | if isinstance(values, AbstractBitMap64): 324 | self |= values 325 | elif isinstance(values, range): 326 | if len(values) == 0: 327 | continue 328 | _, (start, stop, step) = values.__reduce__() 329 | if step == -1: 330 | step = 1 331 | start, stop = stop+1, start+1 332 | if step == 1: 333 | self.add_range(start, stop) 334 | else: 335 | self |= AbstractBitMap64(values) 336 | elif isinstance(values, array.array) and len(values) > 0: 337 | buff = values 338 | croaring.roaring64_bitmap_add_many(self._c_bitmap, len(values), &buff[0]) 339 | else: 340 | try: 341 | size = len(values) 342 | except TypeError: # object has no length, creating a list 343 | values = list(values) 344 | size = len(values) 345 | if size > 0: 346 | buff_vect = values 347 | croaring.roaring64_bitmap_add_many(self._c_bitmap, size, &buff_vect[0]) 348 | 349 | def discard(self, uint64_t value): 350 | """ 351 | Remove an element from the bitmap. This has no effect if the element is not present. 352 | 353 | >>> bm = BitMap64([3, 12]) 354 | >>> bm.discard(3) 355 | >>> bm 356 | BitMap64([12]) 357 | >>> bm.discard(3) 358 | >>> bm 359 | BitMap64([12]) 360 | """ 361 | croaring.roaring64_bitmap_remove(self._c_bitmap, value) 362 | 363 | def remove(self, uint64_t value): 364 | """ 365 | Remove an element from the bitmap. This raises a KeyError exception if the element does not exist in the bitmap. 366 | 367 | >>> bm = BitMap64([3, 12]) 368 | >>> bm.remove(3) 369 | >>> bm 370 | BitMap64([12]) 371 | >>> bm.remove(3) 372 | Traceback (most recent call last): 373 | ... 374 | KeyError: 3 375 | """ 376 | cdef bool test = croaring.roaring64_bitmap_remove_checked(self._c_bitmap, value) 377 | if not test: 378 | raise KeyError(value) 379 | 380 | cdef binary_iop(self, AbstractBitMap64 other, (void)func(croaring.roaring64_bitmap_t*, const croaring.roaring64_bitmap_t*) noexcept) noexcept: 381 | func(self._c_bitmap, other._c_bitmap) 382 | return self 383 | 384 | def __ior__(self, other): 385 | if self._c_bitmap == (other)._c_bitmap: 386 | return self 387 | return (self).binary_iop(other, croaring.roaring64_bitmap_or_inplace) 388 | 389 | def __iand__(self, other): 390 | if self._c_bitmap == (other)._c_bitmap: 391 | return self 392 | return (self).binary_iop(other, croaring.roaring64_bitmap_and_inplace) 393 | 394 | def __ixor__(self, other): 395 | if self._c_bitmap == (other)._c_bitmap: 396 | self.clear() 397 | return self 398 | return (self).binary_iop(other, croaring.roaring64_bitmap_xor_inplace) 399 | 400 | def __isub__(self, other): 401 | if self._c_bitmap == (other)._c_bitmap: 402 | self.clear() 403 | return self 404 | return (self).binary_iop(other, croaring.roaring64_bitmap_andnot_inplace) 405 | 406 | def intersection_update(self, *all_values): # FIXME could be more efficient 407 | """ 408 | Update the bitmap by taking its intersection with the given values. 409 | 410 | >>> bm = BitMap64([3, 12]) 411 | >>> bm.intersection_update([8, 12, 55, 18]) 412 | >>> bm 413 | BitMap64([12]) 414 | """ 415 | for values in all_values: 416 | if isinstance(values, AbstractBitMap64): 417 | self &= values 418 | else: 419 | self &= AbstractBitMap64(values) 420 | 421 | def difference_update(self, *others): 422 | """ 423 | Remove all elements of another set from this set. 424 | 425 | >>> bm = BitMap64([1, 2, 3, 4, 5]) 426 | >>> bm.difference_update(BitMap64([1, 2, 10]), BitMap64([3, 4, 20])) 427 | >>> bm 428 | BitMap64([5]) 429 | """ 430 | self.__isub__(AbstractBitMap64.union(*others)) 431 | 432 | def symmetric_difference_update(self, other): 433 | """ 434 | Update a set with the symmetric difference of itself and another. 435 | 436 | >>> bm = BitMap64([1, 2, 3, 4]) 437 | >>> bm.symmetric_difference_update(BitMap64([1, 2, 10])) 438 | >>> bm 439 | BitMap64([3, 4, 10]) 440 | 441 | """ 442 | self.__ixor__(other) 443 | 444 | def clear(self): 445 | """ 446 | Remove all elements from this set. 447 | 448 | >>> bm = BitMap64([1, 2, 3]) 449 | >>> bm.clear() 450 | >>> bm 451 | BitMap64([]) 452 | """ 453 | self.__iand__(BitMap64()) 454 | 455 | def pop(self): 456 | """ 457 | Remove and return an arbitrary set element. 458 | Raises KeyError if the set is empty. 459 | 460 | >>> bm = BitMap64([1, 2]) 461 | >>> a = bm.pop() 462 | >>> b = bm.pop() 463 | >>> bm 464 | BitMap64([]) 465 | >>> bm.pop() 466 | Traceback (most recent call last): 467 | ... 468 | KeyError: 'pop from an empty BitMap64' 469 | 470 | """ 471 | try: 472 | value = self.min() 473 | except ValueError: 474 | raise KeyError('pop from an empty BitMap64') 475 | self.remove(value) 476 | return value 477 | 478 | def add_range(self, uint64_t range_start, uint64_t range_end): 479 | """ 480 | Add a range of values from range_start (included) to range_end (excluded). 481 | 482 | >>> bm = BitMap64([5, 7]) 483 | >>> bm.add_range(6, 9) 484 | >>> bm 485 | BitMap64([5, 6, 7, 8]) 486 | """ 487 | if range_end <= range_start or range_end == 0: 488 | return 489 | croaring.roaring64_bitmap_add_range(self._c_bitmap, range_start, range_end) 490 | 491 | def flip_inplace(self, uint64_t start, uint64_t end): 492 | """ 493 | Compute (in place) the negation of the bitmap within the specified interval. 494 | 495 | Areas outside the range are passed unchanged. 496 | 497 | >>> bm = BitMap64([3, 12]) 498 | >>> bm.flip_inplace(10, 15) 499 | >>> bm 500 | BitMap64([3, 10, 11, 13, 14]) 501 | """ 502 | croaring.roaring64_bitmap_flip_inplace(self._c_bitmap, start, end) 503 | 504 | def remove_range(self, uint64_t range_start, uint64_t range_end): 505 | """ 506 | Remove a range of values from range_start (included) to range_end (excluded). 507 | 508 | >>> bm = BitMap64([5, 6, 7, 8, 9, 10]) 509 | >>> bm.remove_range(6, 9) 510 | >>> bm 511 | BitMap64([5, 9, 10]) 512 | """ 513 | if range_end <= range_start or range_end == 0: 514 | return 515 | croaring.roaring64_bitmap_remove_range(self._c_bitmap, range_start, range_end) -------------------------------------------------------------------------------- /pyroaring/croaring.pxd: -------------------------------------------------------------------------------- 1 | from libc.stdint cimport uint8_t, int32_t, uint32_t, uint64_t, int64_t 2 | from libcpp cimport bool 3 | 4 | cdef extern from "roaring.h": 5 | ctypedef struct roaring_array_t: 6 | pass 7 | ctypedef struct roaring_bitmap_t: 8 | roaring_array_t high_low_container 9 | ctypedef struct roaring_uint32_iterator_t: 10 | const roaring_bitmap_t *parent 11 | int32_t container_index 12 | int32_t in_container_index 13 | int32_t run_index 14 | uint32_t in_run_index 15 | uint32_t current_value 16 | bool has_value 17 | const void *container 18 | uint8_t typecode 19 | uint32_t highbits 20 | ctypedef struct roaring_statistics_t: 21 | uint32_t n_containers 22 | uint32_t n_array_containers 23 | uint32_t n_run_containers 24 | uint32_t n_bitset_containers 25 | uint32_t n_values_array_containers 26 | uint32_t n_values_run_containers 27 | uint32_t n_values_bitset_containers 28 | uint32_t n_bytes_array_containers 29 | uint32_t n_bytes_run_containers 30 | uint32_t n_bytes_bitset_containers 31 | uint32_t max_value 32 | uint32_t min_value 33 | uint64_t sum_value 34 | uint64_t cardinality 35 | ctypedef struct roaring64_statistics_t: 36 | uint64_t n_containers 37 | uint64_t n_array_containers 38 | uint64_t n_run_containers 39 | uint64_t n_bitset_containers 40 | uint64_t n_values_array_containers 41 | uint64_t n_values_run_containers 42 | uint64_t n_values_bitset_containers 43 | uint64_t n_bytes_array_containers 44 | uint64_t n_bytes_run_containers 45 | uint64_t n_bytes_bitset_containers 46 | uint64_t max_value 47 | uint64_t min_value 48 | uint64_t cardinality 49 | 50 | roaring_bitmap_t *roaring_bitmap_create() 51 | bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) 52 | void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) 53 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x) 54 | bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x) 55 | void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, const uint32_t *vals) 56 | void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); 57 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x) 58 | void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) 59 | bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x) 60 | void roaring_bitmap_clear(roaring_bitmap_t *r) 61 | bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) 62 | bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) 63 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) 64 | bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, const roaring_bitmap_t *src) 65 | roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, uint32_t step) 66 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) 67 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) 68 | void roaring_bitmap_free(roaring_bitmap_t *r) 69 | roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) 70 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) 71 | uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) 72 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) 73 | bool roaring_bitmap_equals(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2) 74 | bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2) 75 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1, const roaring_bitmap_t *r2) 76 | void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) 77 | roaring_bitmap_t *roaring_bitmap_or_many(size_t number, const roaring_bitmap_t **x) 78 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 79 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 80 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 81 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 82 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 83 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 84 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 85 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 86 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 87 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 88 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 89 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 90 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 91 | double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, const roaring_bitmap_t *x2) 92 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *r) 93 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *r) 94 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *r, uint32_t x) 95 | roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) 96 | void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, uint64_t range_end) 97 | roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm, int64_t offset) 98 | bool roaring_bitmap_select(const roaring_bitmap_t *r, uint32_t rank, uint32_t *element) 99 | void roaring_bitmap_statistics(const roaring_bitmap_t *r, roaring_statistics_t *stat) 100 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) 101 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf) 102 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) 103 | roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) 104 | bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) 105 | roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *ra) 106 | bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) 107 | uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) 108 | bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) 109 | void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) 110 | 111 | # 64-bit roaring bitmaps 112 | ctypedef struct roaring64_bitmap_t: 113 | pass 114 | 115 | ctypedef struct roaring64_iterator_t: 116 | pass 117 | 118 | roaring64_bitmap_t *roaring64_bitmap_create() 119 | void roaring64_bitmap_free(roaring64_bitmap_t *r) 120 | roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) 121 | roaring64_bitmap_t *roaring64_bitmap_of_ptr(size_t n_args, const uint64_t *vals) 122 | roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint64_t step) 123 | void roaring64_bitmap_add(roaring64_bitmap_t *r, uint64_t val) 124 | bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) 125 | void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, const uint64_t *vals) 126 | void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max) 127 | void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) 128 | bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) 129 | void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, const uint64_t *vals) 130 | void roaring64_bitmap_remove_range(roaring64_bitmap_t *r, uint64_t min, uint64_t max) 131 | bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) 132 | bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) 133 | bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) 134 | void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, roaring64_statistics_t *stat) 135 | uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) 136 | roaring64_bitmap_t *roaring64_bitmap_flip(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) 137 | void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t max) 138 | bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint64_t *out_index) 139 | uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) 140 | uint64_t roaring64_bitmap_range_cardinality(const roaring64_bitmap_t *r, uint64_t min, uint64_t max) 141 | bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) 142 | uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) 143 | uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) 144 | bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) 145 | size_t roaring64_bitmap_size_in_bytes(const roaring64_bitmap_t *r) 146 | bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 147 | bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 148 | bool roaring64_bitmap_is_strict_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 149 | void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r, uint64_t *out) 150 | roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 151 | uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 152 | void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 153 | bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 154 | double roaring64_bitmap_jaccard_index(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 155 | roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 156 | uint64_t roaring64_bitmap_or_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 157 | void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 158 | roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 159 | uint64_t roaring64_bitmap_xor_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 160 | void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 161 | roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 162 | uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 163 | void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) 164 | size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) 165 | size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, char *buf) 166 | size_t roaring64_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) 167 | roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) 168 | bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason) 169 | roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) 170 | void roaring64_iterator_free(roaring64_iterator_t *it) 171 | bool roaring64_iterator_has_value(const roaring64_iterator_t *it) 172 | bool roaring64_iterator_advance(roaring64_iterator_t *it) 173 | uint64_t roaring64_iterator_value(const roaring64_iterator_t *it) 174 | bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint64_t val) 175 | uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t count) -------------------------------------------------------------------------------- /pyroaring/croaring_version.pxi: -------------------------------------------------------------------------------- 1 | __croaring_version__ = "v4.1.1" -------------------------------------------------------------------------------- /pyroaring/frozen_bitmap.pxi: -------------------------------------------------------------------------------- 1 | cdef class FrozenBitMap(AbstractBitMap): 2 | pass 3 | 4 | cdef class FrozenBitMap64(AbstractBitMap64): 5 | pass 6 | -------------------------------------------------------------------------------- /pyroaring/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ezibenroc/PyRoaringBitMap/11683c024e4c10cefaaef7ca7d58878339c4dabb/pyroaring/py.typed -------------------------------------------------------------------------------- /pyroaring/pyroaring.pyx: -------------------------------------------------------------------------------- 1 | # distutils: language = c++ 2 | # cython: binding = True, language_level=3 3 | 4 | include 'version.pxi' 5 | include 'croaring_version.pxi' 6 | include 'abstract_bitmap.pxi' 7 | include 'frozen_bitmap.pxi' 8 | include 'bitmap.pxi' 9 | -------------------------------------------------------------------------------- /pyroaring/version.pxi: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.1" 2 | -------------------------------------------------------------------------------- /quick_bench.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import sys 4 | import random 5 | import timeit 6 | 7 | from pandas import Series, DataFrame 8 | 9 | try: 10 | import tabulate 11 | has_tabulate = True 12 | except ImportError: 13 | has_tabulate = False 14 | sys.stderr.write('Warning: could not import tabulate\n') 15 | sys.stderr.write(' see https://bitbucket.org/astanin/python-tabulate\n') 16 | from pyroaring import BitMap, BitMap64 17 | 18 | classes = {'set': set, 'pyroaring (32 bits)': BitMap, 'pyroaring (64 bits)': BitMap64, } 19 | nb_exp = 30 20 | size = int(1e6) 21 | density = 0.125 22 | universe_size = int(size / density) 23 | 24 | try: 25 | from roaringbitmap import RoaringBitmap 26 | classes['roaringbitmap'] = RoaringBitmap 27 | except ImportError: 28 | sys.stderr.write('Warning: could not import roaringbitmap\n') 29 | sys.stderr.write(' see https://github.com/andreasvc/roaringbitmap/\n') 30 | 31 | try: 32 | from sortedcontainers.sortedset import SortedSet 33 | classes['sortedcontainers'] = SortedSet 34 | except ImportError: 35 | sys.stderr.write('Warning: could not import sortedcontainers\n') 36 | sys.stderr.write(' see https://github.com/grantjenks/sorted_containers\n') 37 | 38 | try: 39 | from croaring import BitSet 40 | classes['python-croaring'] = BitSet 41 | except ImportError: 42 | sys.stderr.write('Warning: could not import croaring\n') 43 | sys.stderr.write(' see https://github.com/sunzhaoping/python-croaring\n') 44 | 45 | import_str = 'import array, pickle; from __main__ import %s' % (','.join( 46 | ['get_list', 'get_range', 'random', 'size', 'universe_size'] 47 | + [cls.__name__ for cls in classes.values() if cls is not set])) 48 | 49 | 50 | def run_exp(stmt, setup, number): 51 | setup = '%s ; %s' % (import_str, setup) 52 | try: 53 | return timeit.timeit(stmt=stmt, setup=setup, number=number) / number 54 | except Exception: 55 | return float('nan') 56 | 57 | 58 | def get_range(): 59 | r = (0, universe_size, int(1 / density)) 60 | try: 61 | return xrange(*r) 62 | except NameError: 63 | return range(*r) 64 | 65 | 66 | def get_list(): 67 | return random.sample(range(universe_size), size) 68 | 69 | 70 | constructor = 'x={class_name}(values)' 71 | simple_setup_constructor = 'x={class_name}(get_list());val=random.randint(0, universe_size)' 72 | double_setup_constructor = 'x={class_name}(get_list()); y={class_name}(get_list())' 73 | equal_setup_constructor = 'l=get_list();x={class_name}(l); y={class_name}(l)' 74 | experiments = [ 75 | # Constructors 76 | ('range constructor', ('values=get_range()', constructor)), 77 | ('ordered list constructor', ('values=get_list(); values.sort()', constructor)), 78 | ('list constructor', ('values=get_list()', constructor)), 79 | ('ordered array constructor', ('l=get_list(); l.sort(); values=array.array("I", l)', constructor)), 80 | ('array constructor', ('values=array.array("I", get_list())', constructor)), 81 | # Simple operations 82 | ('element addition', (simple_setup_constructor, 'x.add(val)')), 83 | ('element removal', (simple_setup_constructor, 'x.discard(val)')), 84 | ('membership test', (simple_setup_constructor, 'val in x')), 85 | # Binary operations 86 | ('union', (double_setup_constructor, 'z=x|y')), 87 | ('intersection', (double_setup_constructor, 'z=x&y')), 88 | ('difference', (double_setup_constructor, 'z=x-y')), 89 | ('symmetric diference', (double_setup_constructor, 'z=x^y')), 90 | ('equality test', (equal_setup_constructor, 'x==y')), 91 | ('subset test', (equal_setup_constructor, 'x<=y')), 92 | # Export 93 | ('conversion to list', (simple_setup_constructor, 'list(x)')), 94 | ('pickle dump & load', (simple_setup_constructor, 'pickle.loads(pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL))')), 95 | ('"naive" conversion to array', (simple_setup_constructor, 'array.array("I", x)')), 96 | ('"optimized" conversion to array', (simple_setup_constructor, 'x.to_array()')), 97 | # Items 98 | ('selection', (simple_setup_constructor, 'x[int(size/2)]')), 99 | ('contiguous slice', (simple_setup_constructor, 'x[int(size/4):int(3*size/4):1]')), 100 | ('slice', (simple_setup_constructor, 'x[int(size/4):int(3*size/4):3]')), 101 | ('small slice', (simple_setup_constructor, 'x[int(size/100):int(3*size/100):3]')), 102 | ] 103 | exp_dict = dict(experiments) 104 | 105 | 106 | def run(cls, op): 107 | cls_name = classes[cls].__name__ 108 | setup = exp_dict[op][0].format(class_name=cls_name) 109 | stmt = exp_dict[op][1].format(class_name=cls_name) 110 | result = run_exp(stmt=stmt, setup=setup, number=nb_exp) 111 | return result 112 | 113 | 114 | def run_all(): 115 | all_results = [] 116 | for op, _ in experiments: 117 | sys.stderr.write('experiment: %s\n' % op) 118 | result = {'operation': op} 119 | for cls in random.sample(list(classes), len(classes)): 120 | result[cls] = run(cls, op) 121 | all_results.append(result) 122 | return DataFrame(all_results).sort_index(axis=1) 123 | 124 | 125 | if __name__ == '__main__': 126 | df = run_all() 127 | print() 128 | if has_tabulate: 129 | print(tabulate.tabulate(df, headers='keys', tablefmt='rst', showindex='never', floatfmt=".2e")) 130 | else: 131 | print(df) 132 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .eggs 4 | .git 5 | .pybuild 6 | .tox 7 | __pycache__ 8 | build 9 | dist 10 | ignore = 11 | # W503 and W504 conflict; ignore the one that disagrees with recent PEP8. 12 | W503 13 | 14 | # try to keep it reasonable, but this allows us to push it a bit when needed. 15 | max_line_length = 150 16 | 17 | noqa-require-code = true 18 | 19 | 20 | [isort] 21 | atomic = True 22 | balanced_wrapping = True 23 | combine_as_imports = True 24 | include_trailing_comma = True 25 | length_sort = True 26 | multi_line_output = 3 27 | order_by_type = False 28 | 29 | default_section = THIRDPARTY 30 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 31 | 32 | 33 | [mypy] 34 | disallow_any_explicit = True 35 | disallow_any_generics = True 36 | disallow_subclassing_any = True 37 | 38 | disallow_untyped_defs = True 39 | check_untyped_defs = True 40 | disallow_untyped_decorators = True 41 | 42 | no_implicit_optional = True 43 | strict_optional = True 44 | 45 | warn_redundant_casts = True 46 | warn_unused_ignores = True 47 | warn_return_any = True 48 | warn_unreachable = True 49 | 50 | implicit_reexport = False 51 | strict_equality = True 52 | 53 | scripts_are_modules = True 54 | warn_unused_configs = True 55 | 56 | show_error_codes = True 57 | enable_error_code = ignore-without-code 58 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | import platform 5 | from distutils.sysconfig import get_config_vars 6 | 7 | from setuptools import setup 8 | from setuptools.extension import Extension 9 | 10 | PKG_DIR = 'pyroaring' 11 | 12 | PLATFORM_WINDOWS = (platform.system() == 'Windows') 13 | PLATFORM_MACOSX = (platform.system() == 'Darwin') 14 | 15 | # Read version file from the src 16 | with open("pyroaring/version.pxi") as fp: 17 | exec(fp.read()) 18 | VERSION = __version__ # noqa: F821 19 | 20 | 21 | # Remove -Wstrict-prototypes option 22 | # See http://stackoverflow.com/a/29634231/4110059 23 | if not PLATFORM_WINDOWS: 24 | cfg_vars = get_config_vars() 25 | for key, value in cfg_vars.items(): 26 | if type(value) is str: 27 | cfg_vars[key] = value.replace("-Wstrict-prototypes", "") 28 | 29 | try: 30 | with open('README.rst') as f: 31 | long_description = ''.join(f.readlines()) 32 | except (IOError, ImportError, RuntimeError): 33 | print('Could not generate long description.') 34 | long_description = '' 35 | 36 | 37 | if PLATFORM_WINDOWS: 38 | pyroaring_module = Extension( 39 | 'pyroaring', 40 | sources=[os.path.join(PKG_DIR, 'pyroaring.pyx'), os.path.join(PKG_DIR, 'roaring.c')], 41 | language='c++', 42 | ) 43 | libraries = None 44 | else: 45 | compile_args = ['-D__STDC_LIMIT_MACROS', '-D__STDC_CONSTANT_MACROS', '-D _GLIBCXX_ASSERTIONS'] 46 | if PLATFORM_MACOSX: 47 | compile_args.append('-mmacosx-version-min=10.14') 48 | if 'DEBUG' in os.environ: 49 | compile_args.extend(['-O0', '-g']) 50 | else: 51 | compile_args.append('-O3') 52 | if 'ARCHI' in os.environ: 53 | if os.environ['ARCHI'] != "generic": 54 | compile_args.extend(['-march=%s' % os.environ['ARCHI']]) 55 | # The '-march=native' flag is not universally allowed. In particular, it 56 | # will systematically fail on aarch64 systems (like the new Apple M1 systems). It 57 | # also creates troubles under macOS with pip installs and requires ugly workarounds. 58 | # The best way to handle people who want to use -march=native is to ask them 59 | # to pass ARCHI=native to their build process. 60 | # else: 61 | # compile_args.append('-march=native') 62 | 63 | pyroaring_module = Extension( 64 | 'pyroaring', 65 | sources=[os.path.join(PKG_DIR, 'pyroaring.pyx')], 66 | extra_compile_args=compile_args + ["-std=c++11"], 67 | language='c++', 68 | ) 69 | 70 | # Because we compile croaring with a c compiler with sometimes incompatible arguments, 71 | # define croaring compilation with an extra argument for the c11 standard, which is 72 | # required for atomic support. 73 | croaring = ( 74 | 'croaring', 75 | { 76 | 'sources': [os.path.join(PKG_DIR, 'roaring.c')], 77 | "extra_compile_args": compile_args + ["-std=c11"], 78 | }, 79 | ) 80 | libraries = [croaring] 81 | 82 | setup( 83 | name='pyroaring', 84 | ext_modules=[pyroaring_module], 85 | libraries=libraries, 86 | package_data={'pyroaring': ['py.typed', '__init__.pyi']}, 87 | packages=['pyroaring'], 88 | version=VERSION, 89 | description='Library for handling efficiently sorted integer sets.', 90 | long_description=long_description, 91 | setup_requires=['cython>=3.0.2'], 92 | url='https://github.com/Ezibenroc/PyRoaringBitMap', 93 | author='Tom Cornebize', 94 | author_email='tom.cornebize@gmail.com', 95 | license='MIT', 96 | classifiers=[ 97 | 'License :: OSI Approved :: MIT License', 98 | 'Intended Audience :: Developers', 99 | 'Operating System :: POSIX :: Linux', 100 | 'Operating System :: MacOS :: MacOS X', 101 | 'Operating System :: Microsoft :: Windows', 102 | 'Programming Language :: Python :: 3.8', 103 | 'Programming Language :: Python :: 3.9', 104 | 'Programming Language :: Python :: 3.10', 105 | 'Programming Language :: Python :: 3.11', 106 | 'Programming Language :: Python :: 3.12', 107 | 'Programming Language :: Python :: 3.13', 108 | ], 109 | ) 110 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | from __future__ import annotations 4 | 5 | import pytest 6 | import os 7 | import re 8 | import sys 9 | import array 10 | import pickle 11 | import random 12 | import operator 13 | import unittest 14 | import functools 15 | import base64 16 | from typing import TYPE_CHECKING 17 | from collections.abc import Set, Callable, Iterable, Iterator 18 | 19 | import hypothesis.strategies as st 20 | from hypothesis import given, assume, errors, settings, Verbosity, HealthCheck 21 | 22 | import pyroaring 23 | 24 | 25 | settings.register_profile("ci", settings( 26 | max_examples=100, deadline=None)) 27 | settings.register_profile("dev", settings(max_examples=10, deadline=None)) 28 | settings.register_profile("debug", settings( 29 | max_examples=10, verbosity=Verbosity.verbose, deadline=None)) 30 | try: 31 | env = os.getenv('HYPOTHESIS_PROFILE', 'dev') 32 | settings.load_profile(env) 33 | except errors.InvalidArgument: 34 | sys.exit(f'Unknown hypothesis profile: {env}') 35 | 36 | bitsize = os.getenv('ROARING_BITSIZE', '32') 37 | if bitsize not in ('32', '64'): 38 | sys.exit(f'Unknown bit size: {bitsize}') 39 | is_32_bits = (bitsize=="32") 40 | 41 | if is_32_bits: 42 | from pyroaring import BitMap, FrozenBitMap, AbstractBitMap 43 | else: 44 | from pyroaring import BitMap64 as BitMap, FrozenBitMap64 as FrozenBitMap, AbstractBitMap64 as AbstractBitMap # type: ignore[assignment] 45 | # Note: we could not find a way to type-check both the 32-bit and the 64-bit implementations using a same file. 46 | # Out of simplcity, we therefore decided to only type-check the 32-bit version. 47 | # To type-check the 64-bit version, remove the above if statement to only keep the else part 48 | # (i.e. directly import BitMap64 as BitMap etc.) 49 | 50 | uint18 = st.integers(min_value=0, max_value=2**18) 51 | uint32 = st.integers(min_value=0, max_value=2**32 - 1) 52 | uint64 = st.integers(min_value=0, max_value=2**64 - 1) 53 | large_uint64 = st.integers(min_value=2**32, max_value=2**64 - 1) 54 | integer = st.integers(min_value=0, max_value=2**31 - 1) 55 | int64 = st.integers(min_value=-2**63, max_value=2**63 - 1) 56 | 57 | range_max_size = 2**18 58 | 59 | range_big_step = uint18.flatmap(lambda n: 60 | st.builds(range, st.just(n), 61 | st.integers( 62 | min_value=n + 1, max_value=n + range_max_size), 63 | st.integers(min_value=2**8, max_value=range_max_size // 8))) 64 | 65 | range_small_step = uint18.flatmap(lambda n: 66 | st.builds(range, st.just(n), 67 | st.integers( 68 | min_value=n + 1, max_value=n + range_max_size), 69 | st.integers(min_value=1, max_value=2**8))) 70 | 71 | range_power2_step = uint18.flatmap(lambda n: 72 | st.builds(range, st.just(n), 73 | st.integers( 74 | min_value=n + 1, max_value=n + range_max_size), 75 | st.integers(min_value=0, max_value=8).flatmap( 76 | lambda n: st.just(2**n), 77 | ))) 78 | 79 | range_huge_interval = uint18.flatmap(lambda n: 80 | st.builds(range, st.just(n), 81 | st.integers( 82 | min_value=n+2**52, max_value=n+2**63), 83 | st.integers(min_value=2**49, max_value=2**63))) 84 | 85 | # Build a list of values of the form a * 2**16 + b with b in [-2,+2] 86 | # In other words, numbers that are close (or equal) to a multiple of 2**16 87 | multiple_2p16 = st.sets( 88 | st.builds( 89 | int.__add__, st.builds( 90 | int.__mul__, 91 | st.integers(min_value=1, max_value=2**32), 92 | st.just(2**16) 93 | ), 94 | st.integers(min_value=-2, max_value=+2) 95 | ), 96 | max_size=100) 97 | 98 | hyp_range = range_big_step | range_small_step | range_power2_step | st.sampled_from( 99 | [range(0, 0)]) # last one is an empty range 100 | 101 | if not is_32_bits: 102 | hyp_range = hyp_range | range_huge_interval | multiple_2p16 103 | 104 | # would be great to build a true random set, but it takes too long and hypothesis does a timeout... 105 | hyp_set: st.SearchStrategy[set[int]] = st.builds(set, hyp_range) 106 | if is_32_bits: 107 | hyp_array = st.builds(lambda x: array.array('I', x), hyp_range) 108 | else: 109 | hyp_array = st.builds(lambda x: array.array('Q', x), hyp_range) 110 | hyp_collection = hyp_range | hyp_set | hyp_array 111 | hyp_many_collections = st.lists(hyp_collection, min_size=1, max_size=20) 112 | 113 | bitmap_cls = st.sampled_from([BitMap, FrozenBitMap]) 114 | 115 | if TYPE_CHECKING: 116 | from typing_extensions import TypeAlias 117 | 118 | HypCollection: TypeAlias = range | set[int] | array.array[int] | list[int] 119 | EitherBitMap = BitMap | FrozenBitMap 120 | EitherSet = set | frozenset # type: ignore[type-arg] 121 | 122 | 123 | class Util: 124 | 125 | comparison_set = random.sample( 126 | range(2**8), 100) + random.sample(range(2**31 - 1), 50) 127 | 128 | def compare_with_set(self, bitmap: AbstractBitMap, expected_set: set[int]) -> None: 129 | assert len(bitmap) == len(expected_set) 130 | assert bool(bitmap) == bool(expected_set) 131 | assert set(bitmap) == expected_set 132 | assert sorted(list(bitmap)) == sorted(list(expected_set)) 133 | assert BitMap(expected_set, copy_on_write=bitmap.copy_on_write) == bitmap 134 | for value in self.comparison_set: 135 | if value in expected_set: 136 | assert value in bitmap 137 | else: 138 | assert value not in bitmap 139 | 140 | @staticmethod 141 | def bitmap_sample(bitmap: AbstractBitMap, size: int) -> list[int]: 142 | indices = random.sample(range(len(bitmap)), size) 143 | return [bitmap[i] for i in indices] 144 | 145 | def assert_is_not(self, bitmap1: AbstractBitMap, bitmap2: AbstractBitMap) -> None: 146 | if isinstance(bitmap1, BitMap): 147 | if bitmap1: 148 | bitmap1.remove(bitmap1[0]) 149 | else: 150 | bitmap1.add(27) 151 | elif isinstance(bitmap2, BitMap): 152 | if bitmap2: 153 | bitmap2.remove(bitmap1[0]) 154 | else: 155 | bitmap2.add(27) 156 | else: # The two are non-mutable, cannot do anything... 157 | return 158 | if bitmap1 == bitmap2: 159 | pytest.fail( 160 | 'The two bitmaps are identical (modifying one also modifies the other).') 161 | 162 | 163 | class TestBasic(Util): 164 | 165 | @given(hyp_collection, st.booleans()) 166 | @settings(deadline=None) 167 | def test_basic(self, values: HypCollection, cow: bool) -> None: 168 | bitmap = BitMap(copy_on_write=cow) 169 | if is_32_bits: 170 | assert bitmap.copy_on_write == cow 171 | expected_set: set[int] = set() 172 | self.compare_with_set(bitmap, expected_set) 173 | values = list(values) 174 | random.shuffle(values) 175 | size = len(values) 176 | for value in values[:size // 2]: 177 | bitmap.add(value) 178 | expected_set.add(value) 179 | self.compare_with_set(bitmap, expected_set) 180 | for value in values[size // 2:]: 181 | bitmap.add(value) 182 | with pytest.raises(KeyError): 183 | bitmap.add_checked(value) 184 | expected_set.add(value) 185 | self.compare_with_set(bitmap, expected_set) 186 | for value in values[:size // 2]: 187 | bitmap.remove(value) 188 | expected_set.remove(value) 189 | with pytest.raises(KeyError): 190 | bitmap.remove(value) 191 | self.compare_with_set(bitmap, expected_set) 192 | for value in values[size // 2:]: 193 | bitmap.discard(value) 194 | # check that we can discard element not in the bitmap 195 | bitmap.discard(value) 196 | expected_set.discard(value) 197 | self.compare_with_set(bitmap, expected_set) 198 | 199 | @given(bitmap_cls, bitmap_cls, hyp_collection, st.booleans()) 200 | def test_bitmap_equality( 201 | self, 202 | cls1: type[EitherBitMap], 203 | cls2: type[EitherBitMap], 204 | values: HypCollection, 205 | cow: bool, 206 | ) -> None: 207 | bitmap1 = cls1(values, copy_on_write=cow) 208 | bitmap2 = cls2(values, copy_on_write=cow) 209 | assert bitmap1 == bitmap2 210 | 211 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 212 | def test_bitmap_unequality( 213 | self, 214 | cls1: type[EitherBitMap], 215 | cls2: type[EitherBitMap], 216 | values1: HypCollection, 217 | values2: HypCollection, 218 | cow: bool, 219 | ) -> None: 220 | assume(set(values1) != set(values2)) 221 | bitmap1 = cls1(values1, copy_on_write=cow) 222 | bitmap2 = cls2(values2, copy_on_write=cow) 223 | assert bitmap1 != bitmap2 224 | 225 | @given(bitmap_cls, hyp_collection, st.booleans()) 226 | def test_constructor_values( 227 | self, 228 | cls: type[EitherBitMap], 229 | values: HypCollection, 230 | cow: bool, 231 | ) -> None: 232 | bitmap = cls(values, copy_on_write=cow) 233 | expected_set = set(values) 234 | self.compare_with_set(bitmap, expected_set) 235 | 236 | @given(bitmap_cls, bitmap_cls, hyp_collection, uint32, st.booleans(), st.booleans()) 237 | def test_constructor_copy( 238 | self, 239 | cls1: type[EitherBitMap], 240 | cls2: type[EitherBitMap], 241 | values: HypCollection, 242 | other_value: int, 243 | cow1: bool, 244 | cow2: bool, 245 | ) -> None: 246 | bitmap1 = cls1(values, copy_on_write=cow1) 247 | # should be robust even if cow2 != cow1 248 | bitmap2 = cls2(bitmap1, copy_on_write=cow2) 249 | assert bitmap1 == bitmap2 250 | self.assert_is_not(bitmap1, bitmap2) 251 | 252 | @given(hyp_collection, hyp_collection, st.booleans()) 253 | def test_update(self, initial_values: HypCollection, new_values: HypCollection, cow: bool) -> None: 254 | bm = BitMap(initial_values, cow) 255 | expected = BitMap(bm) 256 | bm.update(new_values) 257 | expected |= BitMap(new_values, copy_on_write=cow) 258 | assert bm == expected 259 | 260 | @given(hyp_collection, hyp_collection, st.booleans()) 261 | def test_intersection_update(self, initial_values: HypCollection, new_values: HypCollection, cow: bool) -> None: 262 | bm = BitMap(initial_values, cow) 263 | expected = BitMap(bm) 264 | bm.intersection_update(new_values) 265 | expected &= BitMap(new_values, copy_on_write=cow) 266 | assert bm == expected 267 | 268 | def wrong_op(self, op: Callable[[BitMap, int], object]) -> None: 269 | bitmap = BitMap() 270 | with pytest.raises(OverflowError): 271 | op(bitmap, -3) 272 | with pytest.raises(OverflowError): 273 | if is_32_bits: 274 | op(bitmap, 2**33) 275 | else: 276 | op(bitmap, 2**65) 277 | with pytest.raises(TypeError): 278 | op(bitmap, 'bla') # type: ignore[arg-type] 279 | 280 | def test_wrong_add(self) -> None: 281 | self.wrong_op(lambda bitmap, value: bitmap.add(value)) 282 | 283 | def test_wrong_contain(self) -> None: 284 | self.wrong_op(lambda bitmap, value: bitmap.__contains__(value)) 285 | 286 | @given(bitmap_cls) 287 | def test_wrong_constructor_values(self, cls: type[EitherBitMap]) -> None: 288 | with pytest.raises(TypeError): # this should fire a type error! 289 | cls([3, 'bla', 3, 42]) # type: ignore[list-item] 290 | bad_range = range(-3, 0) 291 | with pytest.raises(OverflowError): 292 | cls(bad_range) 293 | 294 | @given(bitmap_cls, hyp_collection, st.booleans()) 295 | def test_to_array( 296 | self, 297 | cls: type[EitherBitMap], 298 | values: HypCollection, 299 | cow: bool, 300 | ) -> None: 301 | bitmap = cls(values, copy_on_write=cow) 302 | result = bitmap.to_array() 303 | if is_32_bits: 304 | expected = array.array('I', sorted(values)) 305 | else: 306 | expected = array.array('Q', sorted(values)) 307 | assert result == expected 308 | 309 | @given(bitmap_cls, st.booleans(), st.integers(min_value=0, max_value=100)) 310 | def test_constructor_generator(self, cls: type[EitherBitMap], cow: bool, size: int) -> None: 311 | def generator(n: int) -> Iterator[int]: 312 | for i in range(n): 313 | yield i 314 | bitmap = cls(generator(size), copy_on_write=cow) 315 | assert bitmap == cls(range(size), copy_on_write=cow) 316 | 317 | 318 | def slice_arg(n: int) -> st.SearchStrategy[int]: 319 | return st.integers(min_value=-n, max_value=n) 320 | 321 | 322 | class TestSelectRank(Util): 323 | 324 | @given(bitmap_cls, hyp_collection, st.booleans()) 325 | def test_simple_select( 326 | self, 327 | cls: type[EitherBitMap], 328 | values: HypCollection, 329 | cow: bool, 330 | ) -> None: 331 | bitmap = cls(values, copy_on_write=cow) 332 | values = list(bitmap) # enforce sorted order 333 | for i in range(-len(values), len(values)): 334 | assert bitmap[i] == values[i] 335 | 336 | @given(bitmap_cls, hyp_collection, uint32, st.booleans()) 337 | def test_wrong_selection( 338 | self, 339 | cls: type[EitherBitMap], 340 | values: HypCollection, 341 | n: int, 342 | cow: bool, 343 | ) -> None: 344 | bitmap = cls(values, cow) 345 | with pytest.raises(IndexError): 346 | bitmap[len(values)] 347 | with pytest.raises(IndexError): 348 | bitmap[n + len(values)] 349 | with pytest.raises(IndexError): 350 | bitmap[-len(values) - 1] 351 | with pytest.raises(IndexError): 352 | bitmap[-n - len(values) - 1] 353 | 354 | def check_slice( 355 | self, 356 | cls: type[EitherBitMap], 357 | values: HypCollection, 358 | start: int | None, 359 | stop: int | None, 360 | step: int | None, 361 | cow: bool, 362 | ) -> None: 363 | bitmap = cls(values, copy_on_write=cow) 364 | values = list(bitmap) # enforce sorted order 365 | expected = values[start:stop:step] 366 | expected.sort() 367 | observed = list(bitmap[start:stop:step]) 368 | assert expected == observed 369 | 370 | @given(bitmap_cls, hyp_collection, slice_arg(2**12), slice_arg(2**12), slice_arg(2**5), st.booleans()) 371 | def test_slice_select_non_empty( 372 | self, 373 | cls: type[EitherBitMap], 374 | values: HypCollection, 375 | start: int, 376 | stop: int, 377 | step: int, 378 | cow: bool, 379 | ) -> None: 380 | assume(step != 0) 381 | assume(len(range(start, stop, step)) > 0) 382 | self.check_slice(cls, values, start, stop, step, cow) 383 | 384 | @given(bitmap_cls, hyp_collection, slice_arg(2**12), slice_arg(2**12), slice_arg(2**5), st.booleans()) 385 | def test_slice_select_empty( 386 | self, 387 | cls: type[EitherBitMap], 388 | values: HypCollection, 389 | start: int, 390 | stop: int, 391 | step: int, 392 | cow: bool, 393 | ) -> None: 394 | assume(step != 0) 395 | assume(len(range(start, stop, step)) == 0) 396 | self.check_slice(cls, values, start, stop, step, cow) 397 | 398 | @given(bitmap_cls, hyp_collection, slice_arg(2**12) | st.none(), slice_arg(2**12) | st.none(), slice_arg(2**5) | st.none(), st.booleans()) 399 | def test_slice_select_none( 400 | self, 401 | cls: type[EitherBitMap], 402 | values: HypCollection, 403 | start: int | None, 404 | stop: int | None, 405 | step: int | None, 406 | cow: bool, 407 | ) -> None: 408 | assume(step != 0) 409 | self.check_slice(cls, values, start, stop, step, cow) 410 | 411 | @given(bitmap_cls, hyp_collection, st.booleans()) 412 | def test_simple_rank( 413 | self, 414 | cls: type[EitherBitMap], 415 | values: HypCollection, 416 | cow: bool, 417 | ) -> None: 418 | bitmap = cls(values, copy_on_write=cow) 419 | for i, value in enumerate(sorted(values)): 420 | assert bitmap.rank(value) == i + 1 421 | 422 | @given(bitmap_cls, hyp_collection, uint18, st.booleans()) 423 | def test_general_rank( 424 | self, 425 | cls: type[EitherBitMap], 426 | values: HypCollection, 427 | element: int, 428 | cow: bool, 429 | ) -> None: 430 | bitmap = cls(values, copy_on_write=cow) 431 | observed_rank = bitmap.rank(element) 432 | expected_rank = len([n for n in set(values) if n <= element]) 433 | assert expected_rank == observed_rank 434 | 435 | @given(bitmap_cls, hyp_collection, st.booleans()) 436 | def test_min( 437 | self, 438 | cls: type[EitherBitMap], 439 | values: HypCollection, 440 | cow: bool, 441 | ) -> None: 442 | assume(len(values) > 0) 443 | bitmap = cls(values, copy_on_write=cow) 444 | assert bitmap.min() == min(values) 445 | 446 | @given(bitmap_cls) 447 | def test_wrong_min(self, cls: type[EitherBitMap]) -> None: 448 | bitmap = cls() 449 | with pytest.raises(ValueError): 450 | bitmap.min() 451 | 452 | @given(bitmap_cls, hyp_collection, st.booleans()) 453 | def test_max( 454 | self, 455 | cls: type[EitherBitMap], 456 | values: HypCollection, 457 | cow: bool, 458 | ) -> None: 459 | assume(len(values) > 0) 460 | bitmap = cls(values, copy_on_write=cow) 461 | assert bitmap.max() == max(values) 462 | 463 | @given(bitmap_cls) 464 | def test_wrong_max(self, cls: type[EitherBitMap]) -> None: 465 | bitmap = cls() 466 | with pytest.raises(ValueError): 467 | bitmap.max() 468 | 469 | @given(bitmap_cls, hyp_collection, uint32, st.booleans()) 470 | def test_next_set_bit( 471 | self, 472 | cls: type[EitherBitMap], 473 | values: HypCollection, 474 | other_value: int, 475 | cow: bool, 476 | ) -> None: 477 | assume(len(values) > 0) 478 | bitmap = cls(values, copy_on_write=cow) 479 | try: 480 | expected = next(i for i in sorted(values) if i >= other_value) 481 | assert bitmap.next_set_bit(other_value) == expected 482 | except StopIteration: 483 | with pytest.raises(ValueError): 484 | bitmap.next_set_bit(other_value) 485 | 486 | @given(bitmap_cls) 487 | def test_wrong_next_set_bit(self, cls: type[EitherBitMap]) -> None: 488 | bitmap = cls() 489 | with pytest.raises(ValueError): 490 | bitmap.next_set_bit(0) 491 | 492 | 493 | class TestBinaryOperations(Util): 494 | set1: Set[int] 495 | set2: Set[int] 496 | 497 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 498 | def test_binary_op( 499 | self, 500 | cls1: type[EitherBitMap], 501 | cls2: type[EitherBitMap], 502 | values1: HypCollection, 503 | values2: HypCollection, 504 | cow: bool, 505 | ) -> None: 506 | for op in [operator.or_, operator.and_, operator.xor, operator.sub]: 507 | self.set1 = set(values1) 508 | self.set2 = set(values2) 509 | self.bitmap1 = cls1(values1, cow) 510 | self.bitmap2 = cls2(values2, cow) 511 | old_bitmap1 = cls1(self.bitmap1) 512 | old_bitmap2 = cls2(self.bitmap2) 513 | result_set = op(self.set1, self.set2) 514 | result_bitmap = op(self.bitmap1, self.bitmap2) 515 | assert self.bitmap1 == old_bitmap1 516 | assert self.bitmap2 == old_bitmap2 517 | self.compare_with_set(result_bitmap, result_set) 518 | assert type(self.bitmap1) == type(result_bitmap) 519 | 520 | @given(bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 521 | def test_binary_op_inplace( 522 | self, 523 | cls2: type[EitherBitMap], 524 | values1: HypCollection, 525 | values2: HypCollection, 526 | cow: bool, 527 | ) -> None: 528 | for op in [operator.ior, operator.iand, operator.ixor, operator.isub]: 529 | self.set1 = set(values1) 530 | self.set2 = set(values2) 531 | self.bitmap1 = BitMap(values1, cow) 532 | original = self.bitmap1 533 | self.bitmap2 = cls2(values2, cow) 534 | old_bitmap2 = cls2(self.bitmap2) 535 | op(self.set1, self.set2) 536 | op(self.bitmap1, self.bitmap2) 537 | assert original is self.bitmap1 538 | assert self.bitmap2 == old_bitmap2 539 | self.compare_with_set(self.bitmap1, self.set1) 540 | 541 | @given(hyp_collection, st.booleans()) 542 | def test_binary_op_inplace_self( 543 | self, 544 | values: HypCollection, 545 | cow: bool, 546 | ) -> None: 547 | for op in [operator.ior, operator.iand, operator.ixor, operator.isub]: 548 | self.set = set(values) 549 | self.bitmap = BitMap(values, cow) 550 | original = self.bitmap 551 | op(self.set, self.set) 552 | op(self.bitmap, self.bitmap) 553 | assert original is self.bitmap 554 | self.compare_with_set(self.bitmap, self.set) 555 | 556 | @given(bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 557 | def test_binary_op_inplace_frozen( 558 | self, 559 | cls2: type[EitherBitMap], 560 | values1: HypCollection, 561 | values2: HypCollection, 562 | cow: bool, 563 | ) -> None: 564 | for op in [operator.ior, operator.iand, operator.ixor, operator.isub]: 565 | self.set1 = frozenset(values1) 566 | self.set2 = frozenset(values2) 567 | 568 | self.bitmap1 = FrozenBitMap(values1, cow) 569 | old_bitmap1 = FrozenBitMap(self.bitmap1) 570 | self.bitmap2 = cls2(values2, cow) 571 | old_bitmap2 = cls2(self.bitmap2) 572 | 573 | new_set = op(self.set1, self.set2) 574 | new_bitmap = op(self.bitmap1, self.bitmap2) 575 | 576 | assert self.bitmap1 == old_bitmap1 577 | assert self.bitmap2 == old_bitmap2 578 | 579 | self.compare_with_set(new_bitmap, new_set) 580 | 581 | 582 | class TestComparison(Util): 583 | 584 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 585 | def test_comparison( 586 | self, 587 | cls1: type[EitherBitMap], 588 | cls2: type[EitherBitMap], 589 | values1: HypCollection, 590 | values2: HypCollection, 591 | cow: bool, 592 | ) -> None: 593 | for op in [operator.le, operator.ge, operator.lt, operator.gt, operator.eq, operator.ne]: 594 | self.set1 = set(values1) 595 | self.set2 = set(values2) 596 | self.bitmap1 = cls1(values1, copy_on_write=cow) 597 | self.bitmap2 = cls2(values2, copy_on_write=cow) 598 | assert op(self.bitmap1, self.bitmap1) == \ 599 | op(self.set1, self.set1) 600 | assert op(self.bitmap1, self.bitmap2) == \ 601 | op(self.set1, self.set2) 602 | assert op(self.bitmap1 | self.bitmap2, self.bitmap2) == \ 603 | op(self.set1 | self.set2, self.set2) 604 | assert op(self.set1, self.set1 | self.set2) == \ 605 | op(self.set1, self.set1 | self.set2) 606 | 607 | @given(bitmap_cls, hyp_collection, st.booleans()) 608 | def test_comparison_other_objects(self, cls: type[EitherBitMap], values: HypCollection, cow: bool) -> None: 609 | for op in [operator.le, operator.ge, operator.lt, operator.gt]: 610 | bm = cls(values, copy_on_write=cow) 611 | with pytest.raises(TypeError): 612 | op(bm, 42) 613 | with pytest.raises(TypeError): 614 | op(bm, None) 615 | 616 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 617 | def test_intersect( 618 | self, 619 | cls1: type[EitherBitMap], 620 | cls2: type[EitherBitMap], 621 | values1: HypCollection, 622 | values2: HypCollection, 623 | cow: bool, 624 | ) -> None: 625 | bm1 = cls1(values1, copy_on_write=cow) 626 | bm2 = cls2(values2, copy_on_write=cow) 627 | assert (bm1.intersect(bm2)) == (len(bm1 & bm2) > 0) 628 | 629 | @given(bitmap_cls, hyp_collection, st.booleans()) 630 | def test_eq_other_objects(self, cls: type[EitherBitMap], values: HypCollection, cow: bool) -> None: 631 | bm = cls(values, copy_on_write=cow) 632 | 633 | assert not bm == 42 634 | assert cls.__eq__(bm, 42) is NotImplemented 635 | assert not bm == None# noqa: E711 636 | assert cls.__eq__(bm, None) is NotImplemented 637 | 638 | @given(bitmap_cls, hyp_collection, st.booleans()) 639 | def test_ne_other_objects(self, cls: type[EitherBitMap], values: HypCollection, cow: bool) -> None: 640 | bm = cls(values, copy_on_write=cow) 641 | 642 | assert bm != 42 643 | assert cls.__ne__(bm, 42) is NotImplemented 644 | assert bm != None# noqa: E711 645 | assert cls.__ne__(bm, None) is NotImplemented 646 | 647 | 648 | class TestRange(Util): 649 | @given(bitmap_cls, hyp_collection, st.booleans(), uint32, uint32) 650 | def test_contains_range_arbitrary( 651 | self, 652 | cls: type[EitherBitMap], 653 | values: HypCollection, 654 | cow: bool, 655 | start: int, 656 | end: int, 657 | ) -> None: 658 | bm = cls(values) 659 | expected = (cls(range(start, end)) <= bm) 660 | assert expected == bm.contains_range(start, end) 661 | 662 | @given(bitmap_cls, st.booleans(), uint32, uint32) 663 | def test_contains_range(self, cls: type[EitherBitMap], cow: bool, start: int, end: int) -> None: 664 | assume(start < end) 665 | assert cls(range(start, end)).contains_range(start, end) 666 | assert cls(range(start, end)).contains_range(start, end - 1) 667 | assert not cls(range(start, end - 1)).contains_range(start, end) 668 | assert cls(range(start, end)).contains_range(start + 1, end) 669 | assert not cls(range(start + 1, end)).contains_range(start, end) 670 | r = range(start, end) 671 | try: 672 | middle = r[len(r) // 2] # on 32bits systems, this call might fail when len(r) is too large 673 | except OverflowError: 674 | if sys.maxsize > 2**32: 675 | raise 676 | else: 677 | return 678 | bm = cls(range(start, end)) - cls([middle]) 679 | assert not bm.contains_range(start, end) 680 | assert bm.contains_range(start, middle) 681 | assert bm.contains_range(middle + 1, end) 682 | 683 | @given(hyp_collection, st.booleans(), uint32, uint32) 684 | def test_add_remove_range(self, values: HypCollection, cow: bool, start: int, end: int) -> None: 685 | assume(start < end) 686 | bm = BitMap(values, copy_on_write=cow) 687 | # Empty range 688 | original = BitMap(bm) 689 | bm.add_range(end, start) 690 | assert bm == original 691 | bm.remove_range(end, start) 692 | assert bm == original 693 | # Adding the range 694 | bm.add_range(start, end) 695 | assert bm.contains_range(start, end) 696 | assert bm.intersection_cardinality(BitMap(range(start, end), copy_on_write=cow)) == end - start 697 | # Empty range (again) 698 | original = BitMap(bm) 699 | bm.remove_range(end, start) 700 | assert bm == original 701 | assert bm.intersection_cardinality(BitMap(range(start, end), copy_on_write=cow)) == end - start 702 | # Removing the range 703 | bm.remove_range(start, end) 704 | assert not bm.contains_range(start, end) 705 | assert bm.intersection_cardinality(BitMap(range(start, end), copy_on_write=cow)) == 0 706 | 707 | @pytest.mark.skipif(not is_32_bits, reason="build a too large bitmap with 64 bits, blows up memory") 708 | @given(hyp_collection, st.booleans(), large_uint64, large_uint64) 709 | def test_large_values(self, values: HypCollection, cow: bool, start: int, end: int) -> None: 710 | bm = BitMap(values, copy_on_write=cow) 711 | original = BitMap(bm) 712 | bm.add_range(start, end) 713 | assert bm == original 714 | bm.remove_range(start, end) 715 | assert bm == original 716 | assert bm.contains_range(start, end) 717 | 718 | 719 | class TestCardinality(Util): 720 | 721 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 722 | def test_cardinality( 723 | self, 724 | cls1: type[EitherBitMap], 725 | cls2: type[EitherBitMap], 726 | values1: HypCollection, 727 | values2: HypCollection, 728 | cow: bool, 729 | ) -> None: 730 | 731 | for real_op, estimated_op in [ 732 | (operator.or_, cls1.union_cardinality), 733 | (operator.and_, cls1.intersection_cardinality), 734 | (operator.sub, cls1.difference_cardinality), 735 | (operator.xor, cls1.symmetric_difference_cardinality), 736 | ]: 737 | self.bitmap1 = cls1(values1, copy_on_write=cow) 738 | self.bitmap2 = cls2(values2, copy_on_write=cow) 739 | real_value = len(real_op(self.bitmap1, self.bitmap2)) 740 | estimated_value = estimated_op(self.bitmap1, self.bitmap2) 741 | assert real_value == estimated_value 742 | 743 | @given(bitmap_cls, bitmap_cls, hyp_collection, hyp_collection, st.booleans()) 744 | def test_jaccard_index( 745 | self, 746 | cls1: type[EitherBitMap], 747 | cls2: type[EitherBitMap], 748 | values1: HypCollection, 749 | values2: HypCollection, 750 | cow: bool, 751 | ) -> None: 752 | assume(len(values1) > 0 or len(values2) > 0) 753 | self.bitmap1 = cls1(values1, copy_on_write=cow) 754 | self.bitmap2 = cls2(values2, copy_on_write=cow) 755 | real_value = float(len(self.bitmap1 & self.bitmap2)) / \ 756 | float(max(1, len(self.bitmap1 | self.bitmap2))) 757 | estimated_value = self.bitmap1.jaccard_index(self.bitmap2) 758 | assert real_value == pytest.approx(estimated_value) 759 | 760 | @given(bitmap_cls, hyp_collection, uint32, uint32) 761 | def test_range_cardinality( 762 | self, 763 | cls: type[EitherBitMap], 764 | values: HypCollection, 765 | a: int, 766 | b: int, 767 | ) -> None: 768 | bm = cls(values) 769 | start, end = sorted([a, b]) 770 | 771 | # make an intersection with the relevant range to test against 772 | test_bm = bm.intersection(BitMap(range(start, end))) 773 | 774 | assert len(test_bm) == bm.range_cardinality(start, end) 775 | 776 | 777 | class TestManyOperations(Util): 778 | all_bitmaps: Iterable[AbstractBitMap] 779 | 780 | @given(hyp_collection, hyp_many_collections, st.booleans()) 781 | def test_update( 782 | self, 783 | initial_values: HypCollection, 784 | all_values: list[HypCollection], 785 | cow: bool, 786 | ) -> None: 787 | self.initial_bitmap = BitMap(initial_values, copy_on_write=cow) 788 | self.all_bitmaps = [BitMap(values, copy_on_write=cow) 789 | for values in all_values] 790 | self.initial_bitmap.update(*all_values) 791 | expected_result = functools.reduce( 792 | lambda x, y: x | y, self.all_bitmaps + [self.initial_bitmap]) 793 | assert expected_result == self.initial_bitmap 794 | assert type(expected_result) == type(self.initial_bitmap) 795 | 796 | @given(hyp_collection, hyp_many_collections, st.booleans()) 797 | def test_intersection_update( 798 | self, 799 | initial_values: HypCollection, 800 | all_values: list[HypCollection], 801 | cow: bool, 802 | ) -> None: 803 | self.initial_bitmap = BitMap(initial_values, copy_on_write=cow) 804 | self.all_bitmaps = [BitMap(values, copy_on_write=cow) 805 | for values in all_values] 806 | self.initial_bitmap.intersection_update(*all_values) 807 | expected_result = functools.reduce( 808 | lambda x, y: x & y, self.all_bitmaps + [self.initial_bitmap]) 809 | assert expected_result == self.initial_bitmap 810 | assert type(expected_result) == type(self.initial_bitmap) 811 | 812 | @given(bitmap_cls, st.data(), hyp_many_collections, st.booleans()) 813 | def test_union( 814 | self, 815 | cls: type[EitherBitMap], 816 | data: st.DataObject, 817 | all_values: list[HypCollection], 818 | cow: bool, 819 | ) -> None: 820 | classes = [data.draw(bitmap_cls) for _ in range(len(all_values))] 821 | self.all_bitmaps = [classes[i](values, copy_on_write=cow) 822 | for i, values in enumerate(all_values)] 823 | result = cls.union(*self.all_bitmaps) 824 | expected_result = functools.reduce( 825 | lambda x, y: x | y, self.all_bitmaps) 826 | assert expected_result == result 827 | 828 | @given(bitmap_cls, st.data(), hyp_many_collections, st.booleans()) 829 | def test_intersection( 830 | self, 831 | cls: type[EitherBitMap], 832 | data: st.DataObject, 833 | all_values: list[HypCollection], 834 | cow: bool, 835 | ) -> None: 836 | classes = [data.draw(bitmap_cls) for _ in range(len(all_values))] 837 | self.all_bitmaps = [classes[i](values, copy_on_write=cow) 838 | for i, values in enumerate(all_values)] 839 | result = cls.intersection(*self.all_bitmaps) 840 | expected_result = functools.reduce( 841 | lambda x, y: x & y, self.all_bitmaps) 842 | assert expected_result == result 843 | 844 | @given(bitmap_cls, st.data(), hyp_many_collections, st.booleans()) 845 | def test_difference( 846 | self, 847 | cls: type[EitherBitMap], 848 | data: st.DataObject, 849 | all_values: list[HypCollection], 850 | cow: bool, 851 | ) -> None: 852 | classes = [data.draw(bitmap_cls) for _ in range(len(all_values))] 853 | self.all_bitmaps = [classes[i](values, copy_on_write=cow) 854 | for i, values in enumerate(all_values)] 855 | result = cls.difference(*self.all_bitmaps) 856 | expected_result = functools.reduce( 857 | lambda x, y: x - y, self.all_bitmaps) 858 | assert expected_result == result 859 | 860 | 861 | class TestSerialization(Util): 862 | 863 | @given(bitmap_cls, bitmap_cls, hyp_collection) 864 | def test_serialization( 865 | self, 866 | cls1: type[EitherBitMap], 867 | cls2: type[EitherBitMap], 868 | values: HypCollection, 869 | ) -> None: 870 | old_bm = cls1(values) 871 | buff = old_bm.serialize() 872 | new_bm = cls2.deserialize(buff) 873 | assert old_bm == new_bm 874 | assert isinstance(new_bm, cls2) 875 | self.assert_is_not(old_bm, new_bm) 876 | 877 | @given(bitmap_cls, hyp_collection, st.integers(min_value=2, max_value=pickle.HIGHEST_PROTOCOL)) 878 | def test_pickle_protocol( 879 | self, 880 | cls: type[EitherBitMap], 881 | values: HypCollection, 882 | protocol: int, 883 | ) -> None: 884 | old_bm = cls(values) 885 | pickled = pickle.dumps(old_bm, protocol=protocol) 886 | new_bm = pickle.loads(pickled) 887 | assert old_bm == new_bm 888 | self.assert_is_not(old_bm, new_bm) 889 | 890 | @given(bitmap_cls) 891 | def test_impossible_deserialization( 892 | self, 893 | cls: type[EitherBitMap], 894 | ) -> None: 895 | wrong_input = base64.b64decode('aGVsbG8gd29ybGQ=') 896 | with pytest.raises(ValueError, match='Could not deserialize bitmap'): 897 | bitmap = cls.deserialize(wrong_input) 898 | 899 | @given(bitmap_cls) 900 | def test_invalid_deserialization( 901 | self, 902 | cls: type[EitherBitMap], 903 | ) -> None: 904 | wrong_input = base64.b64decode('aGVsbG8gd29ybGQ=') 905 | bm = cls(list(range(0, 1000000, 3))) 906 | bitmap_bytes = bm.serialize() 907 | bitmap_bytes = bitmap_bytes[:42] + wrong_input + bitmap_bytes[42:] 908 | with pytest.raises(ValueError, match='Invalid bitmap after deserialization'): 909 | bitmap = cls.deserialize(bitmap_bytes) 910 | 911 | 912 | class TestStatistics(Util): 913 | 914 | @given(bitmap_cls, hyp_collection, st.booleans()) 915 | def test_basic_properties( 916 | self, 917 | cls: type[EitherBitMap], 918 | values: HypCollection, 919 | cow: bool, 920 | ) -> None: 921 | bitmap = cls(values, copy_on_write=cow) 922 | stats = bitmap.get_statistics() 923 | assert stats['n_values_array_containers'] + stats['n_values_bitset_containers'] \ 924 | + stats['n_values_run_containers'] == len(bitmap) 925 | assert stats['n_bytes_array_containers'] == \ 926 | 2 * stats['n_values_array_containers'] 927 | assert stats['n_bytes_bitset_containers'] == \ 928 | 2**13 * stats['n_bitset_containers'] 929 | if len(values) > 0: 930 | assert stats['min_value'] == bitmap[0] 931 | assert stats['max_value'] == bitmap[len(bitmap) - 1] 932 | assert stats['cardinality'] == len(bitmap) 933 | 934 | @given(bitmap_cls) 935 | def test_implementation_properties_array(self, cls: type[EitherBitMap]) -> None: 936 | values = range(2**16 - 10, 2**16 + 10, 2) 937 | stats = cls(values).get_statistics() 938 | assert stats['n_array_containers'] == 2 939 | assert stats['n_bitset_containers'] == 0 940 | assert stats['n_run_containers'] == 0 941 | assert stats['n_values_array_containers'] == len(values) 942 | assert stats['n_values_bitset_containers'] == 0 943 | assert stats['n_values_run_containers'] == 0 944 | 945 | @given(bitmap_cls) 946 | def test_implementation_properties_bitset(self, cls: type[EitherBitMap]) -> None: 947 | values = range(2**0, 2**17, 2) 948 | stats = cls(values).get_statistics() 949 | assert stats['n_array_containers'] == 0 950 | assert stats['n_bitset_containers'] == 2 951 | assert stats['n_run_containers'] == 0 952 | assert stats['n_values_array_containers'] == 0 953 | assert stats['n_values_bitset_containers'] == len(values) 954 | assert stats['n_values_run_containers'] == 0 955 | 956 | @given(bitmap_cls) 957 | def test_implementation_properties_run(self, cls: type[EitherBitMap]) -> None: 958 | values = range(2**0, 2**17, 1) 959 | stats = cls(values).get_statistics() 960 | assert stats['n_array_containers'] == 0 961 | assert stats['n_bitset_containers'] == 0 962 | assert stats['n_run_containers'] == 2 963 | assert stats['n_values_array_containers'] == 0 964 | assert stats['n_values_bitset_containers'] == 0 965 | assert stats['n_values_run_containers'] == len(values) 966 | assert stats['n_bytes_run_containers'] == 12 967 | 968 | 969 | class TestFlip(Util): 970 | 971 | def check_flip(self, bm_before: AbstractBitMap, bm_after: AbstractBitMap, start: int, end: int) -> None: 972 | size = 100 973 | iter_range = random.sample( 974 | range(start, end), min(size, len(range(start, end)))) 975 | iter_before = self.bitmap_sample(bm_before, min(size, len(bm_before))) 976 | iter_after = self.bitmap_sample(bm_after, min(size, len(bm_after))) 977 | for elt in iter_range: 978 | if elt in bm_before: 979 | assert elt not in bm_after 980 | else: 981 | assert elt in bm_after 982 | for elt in iter_before: 983 | if not (start <= elt < end): 984 | assert elt in bm_after 985 | for elt in iter_after: 986 | if not (start <= elt < end): 987 | assert elt in bm_before 988 | 989 | @given(bitmap_cls, hyp_collection, integer, integer, st.booleans()) 990 | def test_flip_empty( 991 | self, 992 | cls: type[EitherBitMap], 993 | values: HypCollection, 994 | start: int, 995 | end: int, 996 | cow: bool, 997 | ) -> None: 998 | assume(start >= end) 999 | bm_before = cls(values, copy_on_write=cow) 1000 | bm_copy = cls(bm_before) 1001 | bm_after = bm_before.flip(start, end) 1002 | assert bm_before == bm_copy 1003 | assert bm_before == bm_after 1004 | 1005 | @given(bitmap_cls, hyp_collection, integer, integer, st.booleans()) 1006 | def test_flip( 1007 | self, 1008 | cls: type[EitherBitMap], 1009 | values: HypCollection, 1010 | start: int, 1011 | end: int, 1012 | cow: bool, 1013 | ) -> None: 1014 | assume(start < end) 1015 | bm_before = cls(values, copy_on_write=cow) 1016 | bm_copy = cls(bm_before) 1017 | bm_after = bm_before.flip(start, end) 1018 | assert bm_before == bm_copy 1019 | self.check_flip(bm_before, bm_after, start, end) 1020 | 1021 | @given(hyp_collection, integer, integer, st.booleans()) 1022 | def test_flip_inplace_empty( 1023 | self, 1024 | values: HypCollection, 1025 | start: int, 1026 | end: int, 1027 | cow: bool, 1028 | ) -> None: 1029 | assume(start >= end) 1030 | bm_before = BitMap(values, copy_on_write=cow) 1031 | bm_after = BitMap(bm_before) 1032 | bm_after.flip_inplace(start, end) 1033 | assert bm_before == bm_after 1034 | 1035 | @given(hyp_collection, integer, integer, st.booleans()) 1036 | def test_flip_inplace( 1037 | self, 1038 | values: HypCollection, 1039 | start: int, 1040 | end: int, 1041 | cow: bool, 1042 | ) -> None: 1043 | assume(start < end) 1044 | bm_before = BitMap(values, copy_on_write=cow) 1045 | bm_after = BitMap(bm_before) 1046 | bm_after.flip_inplace(start, end) 1047 | self.check_flip(bm_before, bm_after, start, end) 1048 | 1049 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1050 | class TestShift(Util): 1051 | @given(bitmap_cls, hyp_collection, int64, st.booleans()) 1052 | def test_shift( 1053 | self, 1054 | cls: type[EitherBitMap], 1055 | values: HypCollection, 1056 | offset: int, 1057 | cow: bool, 1058 | ) -> None: 1059 | bm_before = cls(values, copy_on_write=cow) 1060 | bm_copy = cls(bm_before) 1061 | bm_after = bm_before.shift(offset) 1062 | assert bm_before == bm_copy 1063 | expected = cls([val + offset for val in values if val + offset in range(0, 2**32)], copy_on_write=cow) 1064 | assert bm_after == expected 1065 | 1066 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1067 | class TestIncompatibleInteraction(Util): 1068 | 1069 | def incompatible_op(self, op: Callable[[BitMap, BitMap], object]) -> None: 1070 | for cow1, cow2 in [(True, False), (False, True)]: 1071 | bm1 = BitMap(copy_on_write=cow1) 1072 | bm2 = BitMap(copy_on_write=cow2) 1073 | with pytest.raises(ValueError): 1074 | op(bm1, bm2) 1075 | 1076 | def test_incompatible_or(self) -> None: 1077 | self.incompatible_op(lambda x, y: x | y) 1078 | 1079 | def test_incompatible_and(self) -> None: 1080 | self.incompatible_op(lambda x, y: x & y) 1081 | 1082 | def test_incompatible_xor(self) -> None: 1083 | self.incompatible_op(lambda x, y: x ^ y) 1084 | 1085 | def test_incompatible_sub(self) -> None: 1086 | self.incompatible_op(lambda x, y: x - y) 1087 | 1088 | def test_incompatible_or_inplace(self) -> None: 1089 | self.incompatible_op(lambda x, y: x.__ior__(y)) 1090 | 1091 | def test_incompatible_and_inplace(self) -> None: 1092 | self.incompatible_op(lambda x, y: x.__iand__(y)) 1093 | 1094 | def test_incompatible_xor_inplace(self) -> None: 1095 | self.incompatible_op(lambda x, y: x.__ixor__(y)) 1096 | 1097 | def test_incompatible_sub_inplace(self) -> None: 1098 | self.incompatible_op(lambda x, y: x.__isub__(y)) 1099 | 1100 | def test_incompatible_eq(self) -> None: 1101 | self.incompatible_op(lambda x, y: x == y) 1102 | 1103 | def test_incompatible_neq(self) -> None: 1104 | self.incompatible_op(lambda x, y: x != y) 1105 | 1106 | def test_incompatible_le(self) -> None: 1107 | self.incompatible_op(lambda x, y: x <= y) 1108 | 1109 | def test_incompatible_lt(self) -> None: 1110 | self.incompatible_op(lambda x, y: x < y) 1111 | 1112 | def test_incompatible_ge(self) -> None: 1113 | self.incompatible_op(lambda x, y: x >= y) 1114 | 1115 | def test_incompatible_gt(self) -> None: 1116 | self.incompatible_op(lambda x, y: x > y) 1117 | 1118 | def test_incompatible_intersect(self) -> None: 1119 | self.incompatible_op(lambda x, y: x.intersect(y)) 1120 | 1121 | def test_incompatible_union(self) -> None: 1122 | self.incompatible_op(lambda x, y: BitMap.union(x, y)) 1123 | self.incompatible_op(lambda x, y: BitMap.union(x, x, y, y, x, x, y, y)) 1124 | 1125 | def test_incompatible_or_card(self) -> None: 1126 | self.incompatible_op(lambda x, y: x.union_cardinality(y)) 1127 | 1128 | def test_incompatible_and_card(self) -> None: 1129 | self.incompatible_op(lambda x, y: x.intersection_cardinality(y)) 1130 | 1131 | def test_incompatible_xor_card(self) -> None: 1132 | self.incompatible_op(lambda x, y: x.symmetric_difference_cardinality(y)) 1133 | 1134 | def test_incompatible_sub_card(self) -> None: 1135 | self.incompatible_op(lambda x, y: x.difference_cardinality(y)) 1136 | 1137 | def test_incompatible_jaccard(self) -> None: 1138 | self.incompatible_op(lambda x, y: x.jaccard_index(y)) 1139 | 1140 | 1141 | class TestBitMap: 1142 | @given(hyp_collection, uint32) 1143 | def test_iter_equal_or_larger(self, values: HypCollection, other_value: int) -> None: 1144 | bm = BitMap(values) 1145 | bm_iter = bm.iter_equal_or_larger(other_value) 1146 | expected = [i for i in values if i >= other_value] 1147 | expected.sort() 1148 | 1149 | observed = list(bm_iter) 1150 | assert expected == observed 1151 | 1152 | def test_unashability(self) -> None: 1153 | bm = BitMap() 1154 | with pytest.raises(TypeError): 1155 | hash(bm) 1156 | 1157 | 1158 | class TestFrozen: 1159 | 1160 | @given(hyp_collection, hyp_collection, integer) 1161 | def test_immutability(self, values: HypCollection, raw_other: HypCollection, number: int) -> None: 1162 | frozen = FrozenBitMap(values) 1163 | copy = FrozenBitMap(values) 1164 | other = BitMap(raw_other) 1165 | with pytest.raises(AttributeError): 1166 | frozen.clear() # type: ignore[attr-defined] 1167 | with pytest.raises(AttributeError): 1168 | frozen.pop() # type: ignore[attr-defined] 1169 | with pytest.raises(AttributeError): 1170 | frozen.add(number) # type: ignore[attr-defined] 1171 | with pytest.raises(AttributeError): 1172 | frozen.update(other) # type: ignore[attr-defined] 1173 | with pytest.raises(AttributeError): 1174 | frozen.discard(number) # type: ignore[attr-defined] 1175 | with pytest.raises(AttributeError): 1176 | frozen.remove(number) # type: ignore[attr-defined] 1177 | with pytest.raises(AttributeError): 1178 | frozen.intersection_update(other) # type: ignore[attr-defined] 1179 | with pytest.raises(AttributeError): 1180 | frozen.difference_update(other) # type: ignore[attr-defined] 1181 | with pytest.raises(AttributeError): 1182 | frozen.symmetric_difference_update(other) # type: ignore[attr-defined] 1183 | with pytest.raises(AttributeError): 1184 | frozen.update(number, number + 10) # type: ignore[attr-defined] 1185 | with pytest.raises(AttributeError): 1186 | frozen.overwrite(other) # type: ignore[attr-defined] 1187 | assert frozen == copy 1188 | 1189 | @given(hyp_collection, hyp_collection) 1190 | def test_hash_uneq(self, values1: HypCollection, values2: HypCollection) -> None: 1191 | """This test as a non null (but extremly low) probability to fail.""" 1192 | bitmap1 = FrozenBitMap(values1) 1193 | bitmap2 = FrozenBitMap(values2) 1194 | assume(bitmap1 != bitmap2) 1195 | h1 = hash(bitmap1) 1196 | h2 = hash(bitmap2) 1197 | hd = hash(bitmap1 ^ bitmap2) 1198 | hashes = [h1, h2, hd] 1199 | nb_collisions = len(hashes) - len(set(hashes)) 1200 | assert 1 >= nb_collisions 1201 | 1202 | @given(hyp_collection) 1203 | def test_hash_eq(self, values: HypCollection) -> None: 1204 | bitmap1 = FrozenBitMap(values) 1205 | bitmap2 = FrozenBitMap(values) 1206 | bitmap3 = FrozenBitMap(bitmap1) 1207 | assert hash(bitmap1) == hash(bitmap2) 1208 | assert hash(bitmap1) == hash(bitmap3) 1209 | 1210 | def test_hash_eq2(self) -> None: 1211 | """It can happen that two bitmaps hold the same values but have a different data structure. They should still 1212 | have a same hash. 1213 | This test compares two bitmaps with the same values, one has a run container, the other has an array container.""" 1214 | n = 100 1215 | bm1 = FrozenBitMap(range(n)) 1216 | bm2 = BitMap() 1217 | for i in range(n): 1218 | bm2.add(i) 1219 | bm2 = FrozenBitMap(bm2, optimize=False) # type: ignore[assignment] 1220 | assert bm1 == bm2 1221 | assert bm1.get_statistics() != bm2.get_statistics() 1222 | assert hash(bm1) == hash(bm2) 1223 | 1224 | 1225 | class TestOptimization: 1226 | 1227 | @given(bitmap_cls) 1228 | def test_run_optimize(self, cls: type[EitherBitMap]) -> None: 1229 | bm1 = BitMap() 1230 | size = 1000 1231 | for i in range(size): 1232 | bm1.add(i) 1233 | bm2 = cls(bm1, optimize=False) 1234 | stats = bm2.get_statistics() 1235 | assert bm1.get_statistics() == stats 1236 | assert stats['n_containers'] == stats['n_array_containers'] 1237 | assert stats['n_values_array_containers'] == size 1238 | assert bm2.run_optimize() 1239 | stats = bm2.get_statistics() 1240 | assert stats['n_containers'] == stats['n_run_containers'] 1241 | assert stats['n_values_run_containers'] == size 1242 | bm3 = cls(bm1) # optimize is True by default 1243 | assert stats == bm3.get_statistics() 1244 | 1245 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1246 | @given(bitmap_cls) 1247 | def test_shrink_to_fit(self, cls: type[EitherBitMap]) -> None: 1248 | bm1 = BitMap() 1249 | size = 1000 1250 | for i in range(size): 1251 | bm1.add(i) 1252 | bm2 = cls(bm1, optimize=False) 1253 | assert bm2.shrink_to_fit() > 0 1254 | assert bm2.shrink_to_fit() == 0 1255 | bm3 = cls(bm1, optimize=True) 1256 | assert bm3.shrink_to_fit() == 0 1257 | 1258 | 1259 | small_integer = st.integers(min_value=0, max_value=200) 1260 | small_integer_list = st.lists(min_size=0, max_size=2000, elements=small_integer) 1261 | 1262 | 1263 | class TestPythonSetEquivalent: 1264 | """ 1265 | The main goal of this class is to make sure the BitMap api is a superset of the python builtin set api. 1266 | """ 1267 | 1268 | @given(bitmap_cls, small_integer_list, st.booleans()) 1269 | def test_convert_to_set(self, BitMapClass: type[EitherBitMap], list1: list[int], cow: bool) -> None: 1270 | """ 1271 | Most of the tests depend on a working implementation for converting from BitMap to python set. 1272 | This test sanity checks it. 1273 | 1274 | This test should be modified or removed if you want to run PythonSetEquivalentTest with integers drawn from 1275 | a larger set than `small_integer`. It will become prohibitively time-consuming. 1276 | """ 1277 | if BitMapClass == BitMap: 1278 | SetClass: type[EitherSet] = set 1279 | elif BitMapClass == FrozenBitMap: 1280 | SetClass = frozenset 1281 | else: 1282 | raise AssertionError() 1283 | 1284 | s1 = SetClass(list1) 1285 | b1 = BitMapClass(list1, copy_on_write=cow) 1286 | 1287 | converted_set = SetClass(b1) 1288 | 1289 | try: 1290 | min_value = min(s1) 1291 | except ValueError: 1292 | min_value = 0 1293 | 1294 | try: 1295 | max_value = max(s1) + 1 1296 | except ValueError: 1297 | max_value = 200 + 1 1298 | 1299 | for i in range(min_value, max_value): 1300 | assert (i in s1) == (i in converted_set) 1301 | 1302 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1303 | def test_difference(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1304 | if BitMapClass == BitMap: 1305 | SetClass: type[EitherSet] = set 1306 | elif BitMapClass == FrozenBitMap: 1307 | SetClass = frozenset 1308 | else: 1309 | raise AssertionError() 1310 | 1311 | s1 = SetClass(list1) 1312 | s2 = SetClass(list2) 1313 | 1314 | b1 = BitMapClass(list1, copy_on_write=cow) 1315 | b2 = BitMapClass(list2, copy_on_write=cow) 1316 | 1317 | assert s1.difference(s2) == set(b1.difference(b2)) 1318 | assert SetClass.difference(s1, s2) == set(BitMapClass.difference(b1, b2))# type: ignore[arg-type] 1319 | assert (s1 - s2) == set(b1 - b2) 1320 | assert b1 - b2 == b1.difference(b2) 1321 | 1322 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1323 | def test_symmetric_difference(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1324 | if BitMapClass == BitMap: 1325 | SetClass: type[EitherSet] = set 1326 | elif BitMapClass == FrozenBitMap: 1327 | SetClass = frozenset 1328 | else: 1329 | raise AssertionError() 1330 | 1331 | s1 = SetClass(list1) 1332 | s2 = SetClass(list2) 1333 | 1334 | b1 = BitMapClass(list1, copy_on_write=cow) 1335 | b2 = BitMapClass(list2, copy_on_write=cow) 1336 | 1337 | assert s1.symmetric_difference(s2) == set(b1.symmetric_difference(b2)) 1338 | assert SetClass.symmetric_difference(s1, s2) == set(BitMapClass.symmetric_difference(b1, b2))# type: ignore[arg-type] 1339 | 1340 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1341 | def test_union(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1342 | if BitMapClass == BitMap: 1343 | SetClass: type[EitherSet] = set 1344 | elif BitMapClass == FrozenBitMap: 1345 | SetClass = frozenset 1346 | else: 1347 | raise AssertionError() 1348 | 1349 | s1 = SetClass(list1) 1350 | s2 = SetClass(list2) 1351 | 1352 | b1 = BitMapClass(list1, copy_on_write=cow) 1353 | b2 = BitMapClass(list2, copy_on_write=cow) 1354 | 1355 | assert s1.union(s2) == set(b1.union(b2)) 1356 | assert SetClass.union(s1, s2) == set(BitMapClass.union(b1, b2))# type: ignore[arg-type] 1357 | assert (s1 | s2) == set(b1 | b2) 1358 | assert b1 | b2 == b1.union(b2) 1359 | 1360 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1361 | def test_issubset(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1362 | if BitMapClass == BitMap: 1363 | SetClass: type[EitherSet] = set 1364 | elif BitMapClass == FrozenBitMap: 1365 | SetClass = frozenset 1366 | else: 1367 | raise AssertionError() 1368 | 1369 | s1 = SetClass(list1) 1370 | s2 = SetClass(list2) 1371 | 1372 | b1 = BitMapClass(list1, copy_on_write=cow) 1373 | b2 = BitMapClass(list2, copy_on_write=cow) 1374 | 1375 | assert s1.issubset(s2) == b1.issubset(b2) 1376 | assert SetClass.issubset(s1, s2) == BitMapClass.issubset(b1, b2)# type: ignore[arg-type] 1377 | 1378 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1379 | def test_le(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1380 | if BitMapClass == BitMap: 1381 | SetClass: type[EitherSet] = set 1382 | elif BitMapClass == FrozenBitMap: 1383 | SetClass = frozenset 1384 | else: 1385 | raise AssertionError() 1386 | 1387 | s1 = SetClass(list1) 1388 | s2 = SetClass(list2) 1389 | 1390 | b1 = BitMapClass(list1, copy_on_write=cow) 1391 | b2 = BitMapClass(list2, copy_on_write=cow) 1392 | 1393 | assert s1.__le__(s2) == b1.__le__(b2) 1394 | assert SetClass.__le__(s1, s2) == BitMapClass.__le__(b1, b2)# type: ignore[operator] 1395 | assert (s1 <= s2) == (b1 <= b2) 1396 | 1397 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1398 | def test_ge(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1399 | if BitMapClass == BitMap: 1400 | SetClass: type[EitherSet] = set 1401 | elif BitMapClass == FrozenBitMap: 1402 | SetClass = frozenset 1403 | else: 1404 | raise AssertionError() 1405 | 1406 | s1 = SetClass(list1) 1407 | s2 = SetClass(list2) 1408 | 1409 | b1 = BitMapClass(list1, copy_on_write=cow) 1410 | b2 = BitMapClass(list2, copy_on_write=cow) 1411 | 1412 | assert s1.__ge__(s2) == b1.__ge__(b2) 1413 | assert SetClass.__ge__(s1, s2) == BitMapClass.__ge__(b1, b2)# type: ignore[operator] 1414 | assert (s1 >= s2) == (b1 >= b2) 1415 | 1416 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1417 | def test_eq(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1418 | if BitMapClass == BitMap: 1419 | SetClass: type[EitherSet] = set 1420 | elif BitMapClass == FrozenBitMap: 1421 | SetClass = frozenset 1422 | else: 1423 | raise AssertionError() 1424 | s1 = SetClass(list1) 1425 | s2 = SetClass(list2) 1426 | 1427 | b1 = BitMapClass(list1, copy_on_write=cow) 1428 | b2 = BitMapClass(list2, copy_on_write=cow) 1429 | 1430 | assert s1.__eq__(s2) == b1.__eq__(b2) 1431 | assert SetClass.__eq__(s1, s2) == BitMapClass.__eq__(b1, b2)# type: ignore[operator] 1432 | assert (s1 == s2) == (b1 == b2) 1433 | 1434 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1435 | def test_issuperset(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1436 | if BitMapClass == BitMap: 1437 | SetClass: type[EitherSet] = set 1438 | elif BitMapClass == FrozenBitMap: 1439 | SetClass = frozenset 1440 | else: 1441 | raise AssertionError() 1442 | 1443 | s1 = SetClass(list1) 1444 | s2 = SetClass(list2) 1445 | 1446 | b1 = BitMapClass(list1, copy_on_write=cow) 1447 | b2 = BitMapClass(list2, copy_on_write=cow) 1448 | 1449 | assert s1.issuperset(s2) == b1.issuperset(b2) 1450 | assert SetClass.issuperset(s1, s2) == BitMapClass.issuperset(b1, b2)# type: ignore[arg-type] 1451 | 1452 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1453 | def test_isdisjoint(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1454 | if BitMapClass == BitMap: 1455 | SetClass: type[EitherSet] = set 1456 | elif BitMapClass == FrozenBitMap: 1457 | SetClass = frozenset 1458 | else: 1459 | raise AssertionError() 1460 | 1461 | s1 = SetClass(list1) 1462 | s2 = SetClass(list2) 1463 | 1464 | b1 = BitMapClass(list1, copy_on_write=cow) 1465 | b2 = BitMapClass(list2, copy_on_write=cow) 1466 | 1467 | assert s1.isdisjoint(s2) == b1.isdisjoint(b2) 1468 | assert SetClass.isdisjoint(s1, s2) == BitMapClass.isdisjoint(b1, b2)# type: ignore[arg-type] 1469 | 1470 | @given(small_integer_list, st.booleans()) 1471 | def test_clear(self, list1: list[int], cow: bool) -> None: 1472 | b1 = BitMap(list1, copy_on_write=cow) 1473 | b1.clear() 1474 | assert len(b1) == 0 1475 | 1476 | @given(small_integer_list, st.booleans()) 1477 | def test_pop(self, list1: list[int], cow: bool) -> None: 1478 | b1 = BitMap(list1, copy_on_write=cow) 1479 | starting_length = len(b1) 1480 | if starting_length >= 1: 1481 | popped_element = b1.pop() 1482 | assert len(b1) == starting_length - 1# length decreased by one 1483 | assert not popped_element in b1# and element isn't in the BitMap anymore 1484 | else: 1485 | with pytest.raises(KeyError): 1486 | b1.pop() 1487 | 1488 | @given(bitmap_cls, small_integer_list, st.booleans()) 1489 | def test_copy(self, BitMapClass: type[EitherBitMap], list1: list[int], cow: bool) -> None: 1490 | b1 = BitMapClass(list1, copy_on_write=cow) 1491 | b2 = b1.copy() 1492 | assert b2 == b1 1493 | 1494 | @given(small_integer_list, st.booleans()) 1495 | def test_copy_writable(self, list1: list[int], cow: bool) -> None: 1496 | b1 = BitMap(list1, copy_on_write=cow) 1497 | b2 = b1.copy() 1498 | 1499 | try: 1500 | new_element = max(b1) + 1 # doesn't exist in the set 1501 | except ValueError: 1502 | new_element = 1 1503 | 1504 | b2.add(new_element) 1505 | 1506 | assert new_element in b2 1507 | assert new_element not in b1 1508 | 1509 | @pytest.mark.skipif(not is_32_bits, reason="not supported yet") 1510 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1511 | def test_overwrite(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1512 | assume(set(list1) != set(list2)) 1513 | b1 = BitMap(list1, copy_on_write=cow) 1514 | orig1 = b1.copy() 1515 | b2 = BitMapClass(list2, copy_on_write=cow) 1516 | orig2 = b2.copy() 1517 | b1.overwrite(b2) 1518 | assert b1 == b2# the two bitmaps are now equal 1519 | assert b1 != orig1# the first bitmap has been modified 1520 | assert b2 == orig2# the second bitmap was left untouched 1521 | with pytest.raises(ValueError): 1522 | b1.overwrite(b1) 1523 | 1524 | @given(small_integer_list, small_integer_list, st.booleans()) 1525 | def test_difference_update(self, list1: list[int], list2: list[int], cow: bool) -> None: 1526 | s1 = set(list1) 1527 | s2 = set(list2) 1528 | s1.difference_update(s2) 1529 | 1530 | b1 = BitMap(list1, copy_on_write=cow) 1531 | b2 = BitMap(list2, copy_on_write=cow) 1532 | b1.difference_update(b2) 1533 | 1534 | assert s1 == set(b1) 1535 | 1536 | @given(small_integer_list, small_integer_list, st.booleans()) 1537 | def test_symmetric_difference_update(self, list1: list[int], list2: list[int], cow: bool) -> None: 1538 | s1 = set(list1) 1539 | s2 = set(list2) 1540 | s1.symmetric_difference_update(s2) 1541 | 1542 | b1 = BitMap(list1, copy_on_write=cow) 1543 | b2 = BitMap(list2, copy_on_write=cow) 1544 | b1.symmetric_difference_update(b2) 1545 | 1546 | assert s1 == set(b1) 1547 | 1548 | @given(bitmap_cls, small_integer_list, small_integer_list, st.booleans()) 1549 | def test_dunder(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], cow: bool) -> None: 1550 | """ 1551 | Tests for &|^- 1552 | """ 1553 | if BitMapClass == BitMap: 1554 | SetClass: type[EitherSet] = set 1555 | elif BitMapClass == FrozenBitMap: 1556 | SetClass = frozenset 1557 | else: 1558 | raise AssertionError() 1559 | 1560 | s1 = SetClass(list1) 1561 | s2 = SetClass(list2) 1562 | 1563 | b1 = BitMapClass(list1, copy_on_write=cow) 1564 | b2 = BitMapClass(list2, copy_on_write=cow) 1565 | 1566 | assert s1.__and__(s2) == SetClass(b1.__and__(b2)) 1567 | assert s1.__or__(s2) == SetClass(b1.__or__(b2)) 1568 | assert s1.__xor__(s2) == SetClass(b1.__xor__(b2)) 1569 | assert s1.__sub__(s2) == SetClass(b1.__sub__(b2)) 1570 | 1571 | @given(small_integer_list, small_integer, st.booleans()) 1572 | def test_add(self, list1: list[int], value: int, cow: bool) -> None: 1573 | s1 = set(list1) 1574 | b1 = BitMap(list1, copy_on_write=cow) 1575 | assert s1 == set(b1) 1576 | 1577 | s1.add(value) 1578 | b1.add(value) 1579 | assert s1 == set(b1) 1580 | 1581 | @given(small_integer_list, small_integer, st.booleans()) 1582 | def test_discard(self, list1: list[int], value: int, cow: bool) -> None: 1583 | s1 = set(list1) 1584 | b1 = BitMap(list1, copy_on_write=cow) 1585 | assert s1 == set(b1) 1586 | 1587 | s1.discard(value) 1588 | b1.discard(value) 1589 | assert s1 == set(b1) 1590 | 1591 | @given(small_integer_list, small_integer, st.booleans()) 1592 | def test_remove(self, list1: list[int], value: int, cow: bool) -> None: 1593 | s1 = set(list1) 1594 | b1 = BitMap(list1, copy_on_write=cow) 1595 | assert s1 == set(b1) 1596 | 1597 | s1_raised = False 1598 | b1_raised = False 1599 | try: 1600 | s1.remove(value) 1601 | except KeyError: 1602 | s1_raised = True 1603 | 1604 | try: 1605 | b1.remove(value) 1606 | except KeyError: 1607 | b1_raised = True 1608 | 1609 | assert s1 == set(b1) 1610 | assert s1_raised == b1_raised# Either both raised exception or neither did 1611 | 1612 | @given(bitmap_cls, small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1613 | def test_nary_union(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1614 | if BitMapClass == BitMap: 1615 | SetClass: type[EitherSet] = set 1616 | elif BitMapClass == FrozenBitMap: 1617 | SetClass = frozenset 1618 | else: 1619 | raise AssertionError() 1620 | 1621 | s1 = SetClass(list1) 1622 | s2 = SetClass(list2) 1623 | s3 = SetClass(list3) 1624 | 1625 | b1 = BitMapClass(list1, copy_on_write=cow) 1626 | b2 = BitMapClass(list2, copy_on_write=cow) 1627 | b3 = BitMapClass(list3, copy_on_write=cow) 1628 | 1629 | assert SetClass.union(s1, s2, s3) == SetClass(BitMapClass.union(b1, b2, b3))# type: ignore[arg-type] 1630 | assert s1.union(s2, s3) == SetClass(b1.union(b2, b3)) 1631 | 1632 | @given(bitmap_cls, small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1633 | def test_nary_difference(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1634 | if BitMapClass == BitMap: 1635 | SetClass: type[EitherSet] = set 1636 | elif BitMapClass == FrozenBitMap: 1637 | SetClass = frozenset 1638 | else: 1639 | raise AssertionError() 1640 | 1641 | s1 = SetClass(list1) 1642 | s2 = SetClass(list2) 1643 | s3 = SetClass(list3) 1644 | 1645 | b1 = BitMapClass(list1, copy_on_write=cow) 1646 | b2 = BitMapClass(list2, copy_on_write=cow) 1647 | b3 = BitMapClass(list3, copy_on_write=cow) 1648 | 1649 | assert SetClass.difference(s1, s2, s3) == SetClass(BitMapClass.difference(b1, b2, b3))# type: ignore[arg-type] 1650 | assert s1.difference(s2, s3) == SetClass(b1.difference(b2, b3)) 1651 | 1652 | @given(bitmap_cls, small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1653 | def test_nary_intersection(self, BitMapClass: type[EitherBitMap], list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1654 | if BitMapClass == BitMap: 1655 | SetClass: type[EitherSet] = set 1656 | elif BitMapClass == FrozenBitMap: 1657 | SetClass = frozenset 1658 | else: 1659 | raise AssertionError() 1660 | 1661 | s1 = SetClass(list1) 1662 | s2 = SetClass(list2) 1663 | s3 = SetClass(list3) 1664 | 1665 | b1 = BitMapClass(list1, copy_on_write=cow) 1666 | b2 = BitMapClass(list2, copy_on_write=cow) 1667 | b3 = BitMapClass(list3, copy_on_write=cow) 1668 | 1669 | assert SetClass.intersection(s1, s2, s3) == SetClass(BitMapClass.intersection(b1, b2, b3))# type: ignore[arg-type] 1670 | assert s1.intersection(s2, s3) == SetClass(b1.intersection(b2, b3)) 1671 | 1672 | @given(small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1673 | def test_nary_intersection_update(self, list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1674 | s1 = set(list1) 1675 | s2 = set(list2) 1676 | s3 = set(list3) 1677 | 1678 | b1 = BitMap(list1, copy_on_write=cow) 1679 | b2 = BitMap(list2, copy_on_write=cow) 1680 | b3 = BitMap(list3, copy_on_write=cow) 1681 | 1682 | set.intersection_update(s1, s2, s3) 1683 | BitMap.intersection_update(b1, b2, b3) 1684 | assert s1 == set(b1) 1685 | 1686 | s1 = set(list1) 1687 | s2 = set(list2) 1688 | s3 = set(list3) 1689 | 1690 | b1 = BitMap(list1, copy_on_write=cow) 1691 | b2 = BitMap(list2, copy_on_write=cow) 1692 | b3 = BitMap(list3, copy_on_write=cow) 1693 | 1694 | s1.intersection_update(s2, s3) 1695 | b1.intersection_update(b2, b3) 1696 | 1697 | assert s1 == set(b1) 1698 | 1699 | @given(small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1700 | def test_nary_difference_update(self, list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1701 | s1 = set(list1) 1702 | s2 = set(list2) 1703 | s3 = set(list3) 1704 | 1705 | b1 = BitMap(list1, copy_on_write=cow) 1706 | b2 = BitMap(list2, copy_on_write=cow) 1707 | b3 = BitMap(list3, copy_on_write=cow) 1708 | 1709 | set.difference_update(s1, s2, s3) 1710 | BitMap.difference_update(b1, b2, b3) 1711 | assert s1 == set(b1) 1712 | 1713 | s1 = set(list1) 1714 | s2 = set(list2) 1715 | s3 = set(list3) 1716 | 1717 | b1 = BitMap(list1, copy_on_write=cow) 1718 | b2 = BitMap(list2, copy_on_write=cow) 1719 | b3 = BitMap(list3, copy_on_write=cow) 1720 | 1721 | s1.difference_update(s2, s3) 1722 | b1.difference_update(b2, b3) 1723 | 1724 | assert s1 == set(b1) 1725 | 1726 | @given(small_integer_list, small_integer_list, small_integer_list, st.booleans()) 1727 | def test_nary_update(self, list1: list[int], list2: list[int], list3: list[int], cow: bool) -> None: 1728 | s1 = set(list1) 1729 | s2 = set(list2) 1730 | s3 = set(list3) 1731 | 1732 | b1 = BitMap(list1, copy_on_write=cow) 1733 | b2 = BitMap(list2, copy_on_write=cow) 1734 | b3 = BitMap(list3, copy_on_write=cow) 1735 | 1736 | set.update(s1, s2, s3) 1737 | BitMap.update(b1, b2, b3) 1738 | assert s1 == set(b1) 1739 | 1740 | s1 = set(list1) 1741 | s2 = set(list2) 1742 | s3 = set(list3) 1743 | 1744 | b1 = BitMap(list1, copy_on_write=cow) 1745 | b2 = BitMap(list2, copy_on_write=cow) 1746 | b3 = BitMap(list3, copy_on_write=cow) 1747 | 1748 | s1.update(s2, s3) 1749 | b1.update(b2, b3) 1750 | 1751 | assert s1 == set(b1) 1752 | 1753 | 1754 | small_list_of_uin32 = st.lists(min_size=0, max_size=400, elements=uint32) 1755 | large_list_of_uin32 = st.lists(min_size=600, max_size=1000, elements=uint32, unique=True) 1756 | 1757 | 1758 | class TestString: 1759 | 1760 | @given(bitmap_cls, small_list_of_uin32) 1761 | def test_small_list(self, cls: type[EitherBitMap], collection: list[int]) -> None: 1762 | # test that repr for a small bitmap is equal to the original bitmap 1763 | bm = cls(collection) 1764 | string_repr = repr(bm) 1765 | if not is_32_bits: 1766 | string_repr = string_repr.replace("BitMap64", "BitMap") # we redefined BitMap64 to BitMap at the top of this file 1767 | assert bm == eval(string_repr) 1768 | 1769 | @settings(suppress_health_check=HealthCheck) 1770 | @given(bitmap_cls, large_list_of_uin32) 1771 | def test_large_list(self, cls: type[EitherBitMap], collection: list[int]) -> None: 1772 | # test that for a large bitmap the both the start and the end of the bitmap get printed 1773 | 1774 | bm = cls(collection) 1775 | s = repr(bm).replace(cls.__name__, " ") 1776 | nondigits = set(s) - set('0123456789\n.') 1777 | for x in nondigits: 1778 | s = s.replace(x, ' ') 1779 | 1780 | small, large = s.split('...') 1781 | small_ints = [int(i) for i in small.split()] 1782 | large_ints = [int(i) for i in large.split()] 1783 | 1784 | for i in small_ints: 1785 | assert i in bm 1786 | 1787 | for i in large_ints: 1788 | assert i in bm 1789 | 1790 | assert min(small_ints) == bm.min() 1791 | assert max(large_ints) == bm.max() 1792 | 1793 | 1794 | class TestVersion: 1795 | def assert_regex(self, pattern: str, text: str) -> None: 1796 | matches = re.findall(pattern, text) 1797 | if len(matches) != 1 or matches[0] != text: 1798 | pytest.fail('Regex "%s" does not match text "%s".' % (pattern, text)) 1799 | 1800 | def test_version(self) -> None: 1801 | self.assert_regex(r'\d+\.\d+\.\d+(?:\.dev\d+)?', pyroaring.__version__) 1802 | self.assert_regex(r'v\d+\.\d+\.\d+', pyroaring.__croaring_version__) 1803 | 1804 | 1805 | if __name__ == "__main__": 1806 | unittest.main() 1807 | -------------------------------------------------------------------------------- /test_state_machine.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations # for using set[int] in Python 3.8 2 | 3 | import hypothesis.strategies as st 4 | from hypothesis.database import DirectoryBasedExampleDatabase 5 | from hypothesis.stateful import Bundle, RuleBasedStateMachine, rule 6 | from hypothesis import settings 7 | from dataclasses import dataclass 8 | from pyroaring import BitMap, BitMap64 9 | from test import hyp_collection, uint32, uint64, is_32_bits 10 | 11 | if is_32_bits: 12 | BitMapClass = BitMap 13 | int_class = uint32 14 | large_val = 2**30 15 | else: 16 | BitMapClass = BitMap64 17 | int_class = uint64 18 | large_val = 2**40 19 | 20 | @dataclass 21 | class Collection: 22 | test: BitMapClass 23 | ref: set[int] 24 | 25 | def check(self): 26 | assert len(self.test) == len(self.ref) 27 | assert set(self.test) == self.ref 28 | 29 | def __post_init__(self): 30 | self.check() 31 | 32 | 33 | class SetComparison(RuleBasedStateMachine): 34 | collections = Bundle("collections") 35 | 36 | @rule(target=collections, val=hyp_collection) 37 | def init_collection(self, val): 38 | return Collection(test=BitMapClass(val), ref=set(val)) 39 | 40 | @rule(target=collections, col=collections) 41 | def copy(self, col): 42 | return Collection(test=BitMapClass(col.test), ref=set(col.ref)) 43 | 44 | @rule(col=collections, val=int_class) 45 | def add_elt(self, col, val): 46 | col.test.add(val) 47 | col.ref.add(val) 48 | col.check() 49 | 50 | @rule(col=collections, val=int_class) 51 | def remove_elt(self, col, val): 52 | col.test.discard(val) 53 | col.ref.discard(val) 54 | col.check() 55 | 56 | @rule(target=collections, col1=collections, col2=collections) 57 | def union(self, col1, col2): 58 | return Collection(test=col1.test | col2.test, ref=col1.ref | col2.ref) 59 | 60 | @rule(col1=collections, col2=collections) 61 | def union_inplace(self, col1, col2): 62 | col1.test |= col2.test 63 | col1.ref |= col2.ref 64 | col1.check() 65 | 66 | @rule(target=collections, col1=collections, col2=collections) 67 | def intersection(self, col1, col2): 68 | return Collection(test=col1.test & col2.test, ref=col1.ref & col2.ref) 69 | 70 | @rule(col1=collections, col2=collections) 71 | def intersection_inplace(self, col1, col2): 72 | col1.test &= col2.test 73 | col1.ref &= col2.ref 74 | col1.check() 75 | 76 | @rule(target=collections, col1=collections, col2=collections) 77 | def difference(self, col1, col2): 78 | return Collection(test=col1.test - col2.test, ref=col1.ref - col2.ref) 79 | 80 | @rule(col1=collections, col2=collections) 81 | def difference_inplace(self, col1, col2): 82 | col1.test -= col2.test 83 | col1.ref -= col2.ref 84 | col1.check() 85 | 86 | @rule(target=collections, col1=collections, col2=collections) 87 | def symmetric_difference(self, col1, col2): 88 | return Collection(test=col1.test ^ col2.test, ref=col1.ref ^ col2.ref) 89 | 90 | @rule(col1=collections, col2=collections) 91 | def symmetric_difference_inplace(self, col1, col2): 92 | col1.test ^= col2.test 93 | col1.ref ^= col2.ref 94 | col1.check() 95 | 96 | @rule( 97 | target=collections, 98 | col=collections, 99 | start=st.integers(min_value=0, max_value=large_val), 100 | size=st.integers(min_value=0, max_value=2**18), 101 | ) 102 | def flip(self, col, start, size): 103 | stop = start + size 104 | return Collection( 105 | test=col.test.flip(start, stop), ref=col.ref ^ set(range(start, stop)) 106 | ) 107 | 108 | @rule( 109 | col=collections, 110 | start=st.integers(min_value=0, max_value=large_val), 111 | size=st.integers(min_value=0, max_value=2**18), 112 | ) 113 | def flip_inplace(self, col, start, size): 114 | stop = start + size 115 | col.test.flip_inplace(start, stop) 116 | col.ref ^= set(range(start, stop)) 117 | col.check() 118 | 119 | 120 | TestTrees = SetComparison.TestCase 121 | TestTrees.settings = settings(max_examples=100, stateful_step_count=100) 122 | 123 | if __name__ == "__main__": 124 | unittest.main() 125 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | cython3 4 | test_wheel 5 | 6 | 7 | [testenv] 8 | setenv = 9 | PYTHONFAULTHANDLER=1 10 | 11 | 12 | [testenv:cython3] 13 | deps = 14 | hypothesis 15 | pytest 16 | cython>=3.0.2 17 | passenv = 18 | HYPOTHESIS_PROFILE 19 | ROARING_BITSIZE 20 | commands = 21 | py.test -v test.py test_state_machine.py 22 | python cydoctest.py 23 | 24 | 25 | [testenv:test_wheel] 26 | deps = 27 | hypothesis 28 | pytest 29 | wheel 30 | build 31 | twine 32 | skip_sdist = true 33 | skip_install = true 34 | passenv = 35 | HYPOTHESIS_PROFILE 36 | ROARING_BITSIZE 37 | allowlist_externals = 38 | rm 39 | mkdir 40 | commands = 41 | # Clear our prebuilt wheels so we have a fresh directory 42 | python -m build 43 | # Install from the wheel in that directory 44 | pip install --only-binary ":all:" --find-links=dist --no-index pyroaring 45 | py.test -v test.py test_state_machine.py 46 | python cydoctest.py 47 | 48 | 49 | [testenv:linting] 50 | deps = 51 | flake8 52 | flake8-isort 53 | flake8-noqa 54 | flake8-pyi 55 | skip_sdist = true 56 | skip_install = true 57 | commands = 58 | flake8 59 | 60 | 61 | [testenv:type_check] 62 | deps = 63 | mypy 64 | hypothesis 65 | skip_sdist = true 66 | skip_install = true 67 | commands = 68 | # This serves to check that our stubs at least match the usages we test 69 | mypy test.py 70 | --------------------------------------------------------------------------------