├── MANIFEST.in ├── .gitmodules ├── tox.ini ├── .coveragerc ├── Makefile ├── .travis.yml ├── setup.py ├── .gitignore ├── LICENSING ├── test ├── test_croaring.py └── test_generative.py ├── README.rst ├── README.md ├── croaring_build.py └── croaring └── __init__.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSING 2 | include croaring_build.py 3 | include croaring-src/roaring.c 4 | include croaring-src/roaring.h 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "croaring-src"] 2 | path = croaring-src 3 | url = https://github.com/RoaringBitmap/CRoaring.git 4 | ignore = dirty 5 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py27,py35,py36 3 | [testenv] 4 | passenv = CI TRAVIS TRAVIS_* NOSE_WITH_COVERAGE 5 | usedevelop=true 6 | deps= 7 | cffi 8 | codecov 9 | coverage 10 | nose 11 | commands= 12 | nosetests 13 | codecov 14 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = croaring 4 | 5 | [report] 6 | exclude_lines = 7 | if self.debug: 8 | if six.PY2: 9 | if six.PY3: 10 | pragma: no cover 11 | raise NotImplementedError 12 | if __name__ == .__main__.: 13 | ignore_errors = True 14 | omit = 15 | test/* 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: README.rst amalgamated 2 | 3 | README.rst: README.md 4 | cat $^ | egrep -v '^\[\!' | pandoc -f markdown -t rst > $@ 5 | 6 | amalgamated: croaring-src/roaring.h croaring-src/roaring.c 7 | 8 | croaring-src/roaring.h roaring-src/roaring.c: $@ 9 | cd croaring-src/; ./amalgamation.sh 10 | 11 | .PHONY: all amalgamated 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: trusty 3 | 4 | compiler: 5 | - clang 6 | - gcc 7 | 8 | matrix: 9 | include: 10 | - python: "2.7" 11 | env: TOXENV=py27 NOSE_WITH_COVERAGE=true 12 | - python: "3.6" 13 | env: TOXENV=py36 NOSE_WITH_COVERAGE=true 14 | - python: "pypy" 15 | env: TOXENV=py27 16 | - python: "pypy3" 17 | env: TOXENV=py35 18 | 19 | install: 20 | - make amalgamated 21 | - pip install tox 22 | 23 | script: tox 24 | 25 | notifications: 26 | email: false 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from codecs import open 2 | from os import path 3 | 4 | from setuptools import setup, find_packages 5 | 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | # Get the long description from the README file 10 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 11 | readme = f.read() 12 | 13 | 14 | setup( 15 | name='croaring', 16 | version='0.0.2', 17 | 18 | description='Compressed integer sets based on Roaring bitmaps.', 19 | long_description=readme, 20 | url='https://github.com/zacharyvoase/croaring.py', 21 | 22 | author='Zachary Voase', 23 | author_email='zack@meat.io', 24 | license='UNLICENSE', 25 | 26 | packages=find_packages(exclude=['test']), 27 | 28 | setup_requires=[ 29 | "cffi>=1.4.0", 30 | "six>=1.10.0", 31 | ], 32 | install_requires=[ 33 | "cffi>=1.4.0", 34 | "six>=1.10.0", 35 | ], 36 | 37 | cffi_modules=["croaring_build.py:FFI_BUILDER"], 38 | ) 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | croaring/_roaring.* 2 | 3 | # Finder files 4 | .DS_Store 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.pyc 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # IPython Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv/ 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | -------------------------------------------------------------------------------- /LICENSING: -------------------------------------------------------------------------------- 1 | The CRoaring library is distributed under the following terms: 2 | 3 | Copyright 2016 The CRoaring authors 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | All other code (particularly, the Python bindings) is released into the public 18 | domain under these terms: 19 | 20 | This is free and unencumbered software released into the public domain. 21 | 22 | Anyone is free to copy, modify, publish, use, compile, sell, or 23 | distribute this software, either in source code form or as a compiled 24 | binary, for any purpose, commercial or non-commercial, and by any 25 | means. 26 | 27 | In jurisdictions that recognize copyright laws, the author or authors 28 | of this software dedicate any and all copyright interest in the 29 | software to the public domain. We make this dedication for the benefit 30 | of the public at large and to the detriment of our heirs and 31 | successors. We intend this dedication to be an overt act of 32 | relinquishment in perpetuity of all present and future rights to this 33 | software under copyright law. 34 | 35 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 38 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 39 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 40 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 41 | OTHER DEALINGS IN THE SOFTWARE. 42 | 43 | For more information, please refer to 44 | -------------------------------------------------------------------------------- /test/test_croaring.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | from nose.tools import assert_raises 4 | import six 5 | 6 | from croaring import RoaringBitmap 7 | 8 | 9 | def test_smoke(): 10 | bm = RoaringBitmap() 11 | assert len(bm) == 0 12 | assert list(bm) == [] 13 | assert not bm 14 | assert 1 not in bm 15 | 16 | printed = repr(bm) 17 | assert isinstance(printed, six.string_types) 18 | 19 | bm.add(1) 20 | bm.add(6) 21 | assert len(bm) == 2 22 | assert list(bm) == [1, 6] 23 | assert bm 24 | assert 1 in bm 25 | assert 6 in bm 26 | assert 2 not in bm 27 | 28 | bm.discard(1) 29 | assert len(bm) == 1 30 | assert list(bm) == [6] 31 | assert bm 32 | assert 1 not in bm 33 | assert 6 in bm 34 | 35 | 36 | def test_invalid_args(): 37 | with assert_raises(TypeError): 38 | RoaringBitmap(123) 39 | with assert_raises(TypeError): 40 | RoaringBitmap('string') 41 | 42 | 43 | def test_minmax(): 44 | empty = RoaringBitmap([]) 45 | assert empty.minimum() is None 46 | assert empty.maximum() is None 47 | 48 | bm = RoaringBitmap([1, 2, 3, 4]) 49 | assert bm.minimum() == 1 50 | assert bm.maximum() == 4 51 | 52 | 53 | def test_indexing_empty(): 54 | empty = RoaringBitmap([]) 55 | with assert_raises(IndexError): 56 | empty[0] 57 | with assert_raises(IndexError): 58 | empty[-1] 59 | 60 | 61 | def test_indexing_nonempty(): 62 | bm = RoaringBitmap([1, 3, 5, 6]) 63 | assert bm[0] == 1 64 | assert bm[1] == 3 65 | assert bm[2] == 5 66 | assert bm[3] == 6 67 | with assert_raises(IndexError): 68 | bm[4] 69 | assert bm[-1] == 6 70 | assert bm[-2] == 5 71 | assert bm[-3] == 3 72 | assert bm[-4] == 1 73 | with assert_raises(IndexError): 74 | bm[-5] 75 | 76 | 77 | def test_copy(): 78 | bm = RoaringBitmap() 79 | bm.add(1) 80 | bm2 = bm.copy() 81 | bm2.add(6) 82 | assert len(bm) == 1 83 | assert list(bm) == [1] 84 | assert len(bm2) == 2 85 | assert list(bm2) == [1, 6] 86 | 87 | 88 | def test_isdisjoint(): 89 | empty = RoaringBitmap() 90 | assert empty.isdisjoint(empty) 91 | bm1 = RoaringBitmap([1, 2, 3, 4, 5]) 92 | bm2 = RoaringBitmap([6, 7, 8, 9, 10]) 93 | bm3 = RoaringBitmap([4, 5, 6, 7]) 94 | assert bm1.isdisjoint(empty) 95 | assert empty.isdisjoint(bm1) 96 | assert bm1.isdisjoint(bm2) 97 | assert not bm1.isdisjoint(bm3) 98 | assert not bm2.isdisjoint(bm3) 99 | assert not bm3.isdisjoint(bm1) 100 | assert not bm3.isdisjoint(bm2) 101 | 102 | 103 | def test_clear(): 104 | bm = RoaringBitmap([1, 2, 3, 4, 5]) 105 | bm.clear() 106 | assert len(bm) == 0 107 | assert list(bm) == [] 108 | -------------------------------------------------------------------------------- /test/test_generative.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import random 3 | import six 4 | 5 | from nose.tools import assert_equals 6 | from nose.tools import assert_raises 7 | 8 | from croaring import RoaringBitmap 9 | 10 | 11 | EMPTY = set() 12 | 13 | 14 | BINARY_OPERATORS = [ 15 | (operator.and_, "&"), 16 | (operator.or_, "|"), 17 | (operator.xor, "^"), 18 | (operator.sub, "-"), 19 | ] 20 | 21 | 22 | BOOLEAN_OPERATORS = [ 23 | (operator.eq, "=="), 24 | (operator.lt, "<"), 25 | (operator.gt, ">"), 26 | (operator.le, "<="), 27 | (operator.ge, ">="), 28 | ] 29 | 30 | 31 | INPLACE_OPERATORS = [ 32 | (operator.iand, "&="), 33 | (operator.ior, "|="), 34 | (operator.ixor, "^="), 35 | (operator.isub, "-="), 36 | ] 37 | 38 | 39 | def gen_random_set_pairs(): 40 | if six.PY2: 41 | range_ = xrange 42 | else: 43 | range_ = range 44 | 45 | initial = random.sample(range_(0, 1000), 50) 46 | equal = initial[:] 47 | disjoint = random.sample(range_(1000, 2000), 50) 48 | overlap = initial[:25] + random.sample(range_(1000, 2000), 25) 49 | proper_subset = initial[:25] 50 | proper_superset = initial + disjoint 51 | 52 | yield (EMPTY, EMPTY, "empty {} empty") 53 | yield (initial, EMPTY, "X {} empty") 54 | yield (initial, equal, "X {} X") 55 | yield (initial, disjoint, "X {} disjoint") 56 | yield (initial, overlap, "X {} overlap") 57 | yield (initial, proper_subset, "X {} proper_subset") 58 | yield (initial, proper_superset, "X {} proper_superset") 59 | 60 | 61 | def test_range(): 62 | yield check_range_same, 0 63 | yield check_range_same, 10 64 | yield check_range_same, 0, 10 65 | yield check_range_same, 0, 10, 1 66 | yield check_range_same, 0, 10, 2 67 | yield check_range_same, 0, 10, 3 68 | 69 | 70 | def test_invalid_range(): 71 | yield check_invalid_range, TypeError 72 | yield check_invalid_range, TypeError, 0, 1, 2, 3 73 | yield check_invalid_range, ValueError, -1 74 | yield check_invalid_range, ValueError, 5, 0, -1 75 | yield check_invalid_range, ValueError, 0, 5, -1 76 | yield check_invalid_range, ValueError, 0, 5, 0 77 | yield check_invalid_range, ValueError, -1, 2 78 | yield check_invalid_range, ValueError, 2, -1 79 | yield check_invalid_range, ValueError, -5, -2 80 | 81 | 82 | def test_binary_operators(): 83 | for op, op_name in BINARY_OPERATORS: 84 | for (s1, s2, name) in gen_random_set_pairs(): 85 | yield check_binary_same, op, s1, s2, name.format(op_name) 86 | 87 | 88 | def test_boolean_operators(): 89 | for op, op_name in BOOLEAN_OPERATORS: 90 | for (s1, s2, name) in gen_random_set_pairs(): 91 | yield check_boolean_same, op, s1, s2, name.format(op_name) 92 | 93 | 94 | def test_inplace_operators(): 95 | for op, op_name in INPLACE_OPERATORS: 96 | for (s1, s2, name) in gen_random_set_pairs(): 97 | yield check_inplace_same, op, s1, s2, name.format(op_name) 98 | 99 | 100 | def check_range_same(*range_args): 101 | range_ = six.PY2 and range or (lambda *a: list(range(*a))) 102 | assert_equals(list(RoaringBitmap.range(*range_args)), range_(*range_args)) 103 | 104 | 105 | def check_invalid_range(exc, *range_args): 106 | with assert_raises(exc): 107 | RoaringBitmap.range(*range_args) 108 | 109 | 110 | def check_binary_same(op, set1, set2, name): 111 | expected = list(op(set(set1), set(set2))) 112 | expected.sort() 113 | actual = list(op(RoaringBitmap(set1), RoaringBitmap(set2))) 114 | assert_equals(actual, expected, name) 115 | with assert_raises(TypeError): 116 | op(RoaringBitmap(set1), set(set2)) 117 | 118 | 119 | def check_boolean_same(op, set1, set2, name): 120 | expected = op(set(set1), set(set2)) 121 | actual = op(RoaringBitmap(set1), RoaringBitmap(set2)) 122 | assert_equals(actual, expected, name) 123 | with assert_raises(TypeError): 124 | op(RoaringBitmap(set1), set(set2)) 125 | 126 | 127 | def check_inplace_same(op, set1, set2, name): 128 | s1, s2 = set(set1), set(set2) 129 | bm1, bm2 = RoaringBitmap(set1), RoaringBitmap(set2) 130 | op(s1, s2) 131 | op(bm1, bm2) 132 | expected = list(s1) 133 | expected.sort() 134 | actual = list(bm1) 135 | assert_equals(actual, expected, name) 136 | with assert_raises(TypeError): 137 | op(bm1, s2) 138 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | CRoaring.py: Fast, compact integer bitmap sets, based on CRoaring 2 | ================================================================= 3 | 4 | `Roaring bitmaps `__ are fast, compressed, 5 | and portable bitmaps, used to store unique sorted integer sets. These 6 | bitmaps offer better real-world space complexity and performance than 7 | typical hash sets (such as Python's built-in ``set``), and can be 8 | serialized into a portable format for storage and interop with the 9 | C/C++, Java and Go libraries. 10 | 11 | This library makes the 12 | `CRoaring `__ implementation 13 | available in Python 2.7 and 3.5+. It uses 14 | `CFFI `__, so it works on both 15 | CPython and PyPy. The full Python ``set`` interface is implemented. 16 | Comprehensive tests are included. 17 | 18 | Installation 19 | ------------ 20 | 21 | :: 22 | 23 | pip install croaring 24 | 25 | The CRoaring source is included with the Python library, so you don't 26 | need to install it from elsewhere (though you may need a C compiler 27 | available if a binary package is unavailable for your architecture). 28 | 29 | Usage 30 | ----- 31 | 32 | Instantiate a ``croaring.RoaringBitmap()``, and use it just like a 33 | normal ``set``: 34 | 35 | :: 36 | 37 | >>> import croaring 38 | >>> bitmap = croaring.RoaringBitmap() 39 | >>> bitmap 40 | RoaringBitmap([]) 41 | >>> bitmap.add(1) 42 | >>> bitmap.add(4572) 43 | >>> bitmap.add(326) 44 | >>> bitmap 45 | RoaringBitmap([1, 326, 4572]) 46 | 47 | You can use either binary operators (``|``, ``&``, ``^`` and ``-``) or 48 | their English names (``union``, ``intersection``, 49 | ``symmetric_difference`` and ``difference``): 50 | 51 | :: 52 | 53 | >>> bitmap | RoaringBitmap([50, 95]) 54 | RoaringBitmap([1, 50, 95, 326, 4572]) 55 | >>> bitmap & RoaringBitmap([200, 326]) 56 | RoaringBitmap([326]) 57 | >>> bitmap ^ RoaringBitmap([200, 326]) 58 | RoaringBitmap([1, 200, 4572]) 59 | 60 | Since the bitmaps are ordered, indexing (including negative) is 61 | supported: 62 | 63 | :: 64 | 65 | >>> bitmap[1] 66 | 326 67 | >>> bitmap[-1] 68 | 4572 69 | 70 | Finally, you can construct a bitmap from a range, similar to the 71 | arguments to Python's built-in ``range``: 72 | 73 | :: 74 | 75 | >>> RoaringBitmap.range(10) 76 | RoaringBitmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 77 | >>> RoaringBitmap.range(2, 10) 78 | RoaringBitmap([2, 3, 4, 5, 6, 7, 8, 9]) 79 | >>> RoaringBitmap.range(2, 10, 3) 80 | RoaringBitmap([2, 5, 8]) 81 | 82 | License 83 | ------- 84 | 85 | CRoaring is licensed under the Apache License v2.0: 86 | 87 | Copyright 2016 The CRoaring authors 88 | 89 | Licensed under the Apache License, Version 2.0 (the "License"); you 90 | may not use this file except in compliance with the License. You may 91 | obtain a copy of the License at 92 | 93 | :: 94 | 95 | http://www.apache.org/licenses/LICENSE-2.0 96 | 97 | Unless required by applicable law or agreed to in writing, software 98 | distributed under the License is distributed on an "AS IS" BASIS, 99 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 100 | implied. See the License for the specific language governing 101 | permissions and limitations under the License. 102 | 103 | All other code is released under the Unlicense: 104 | 105 | This is free and unencumbered software released into the public 106 | domain. 107 | 108 | Anyone is free to copy, modify, publish, use, compile, sell, or 109 | distribute this software, either in source code form or as a 110 | compiled binary, for any purpose, commercial or non-commercial, and 111 | by any means. 112 | 113 | In jurisdictions that recognize copyright laws, the author or 114 | authors of this software dedicate any and all copyright interest in 115 | the software to the public domain. We make this dedication for the 116 | benefit of the public at large and to the detriment of our heirs and 117 | successors. We intend this dedication to be an overt act of 118 | relinquishment in perpetuity of all present and future rights to 119 | this software under copyright law. 120 | 121 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 122 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 123 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 124 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY 125 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 126 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 127 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 128 | 129 | For more information, please refer to http://unlicense.org/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CRoaring.py: Fast, compact integer bitmap sets, based on CRoaring 2 | 3 | [![Build Status](https://travis-ci.org/zacharyvoase/croaring.py.svg?branch=master)](https://travis-ci.org/zacharyvoase/croaring.py) 4 | [![codecov](https://codecov.io/gh/zacharyvoase/croaring.py/branch/master/graph/badge.svg)](https://codecov.io/gh/zacharyvoase/croaring.py) 5 | 6 | [Roaring bitmaps][] are fast, compressed, and portable bitmaps, used to store 7 | unique sorted integer sets. These bitmaps offer better real-world space 8 | complexity and performance than typical hash sets (such as Python's built-in 9 | `set`), and can be serialized into a portable format for storage and interop 10 | with the C/C++, Java and Go libraries. 11 | 12 | This library makes the [CRoaring][] implementation available in Python 2.7 and 13 | 3.5+. It uses [CFFI][], so it works on both CPython and PyPy. The full Python 14 | `set` interface is implemented. Comprehensive tests are included. 15 | 16 | [Roaring bitmaps]: http://roaringbitmap.org/ 17 | [CRoaring]: https://github.com/RoaringBitmap/CRoaring 18 | [CFFI]: http://cffi.readthedocs.io/en/latest/ 19 | 20 | 21 | ## Installation 22 | 23 | pip install croaring 24 | 25 | The CRoaring source is included with the Python library, so you don't need to 26 | install it from elsewhere (though you may need a C compiler available if a 27 | binary package is unavailable for your architecture). 28 | 29 | 30 | ## Usage 31 | 32 | Instantiate a `croaring.RoaringBitmap()`, and use it just like a normal `set`: 33 | 34 | >>> import croaring 35 | >>> bitmap = croaring.RoaringBitmap() 36 | >>> bitmap 37 | RoaringBitmap([]) 38 | >>> bitmap.add(1) 39 | >>> bitmap.add(4572) 40 | >>> bitmap.add(326) 41 | >>> bitmap 42 | RoaringBitmap([1, 326, 4572]) 43 | 44 | You can use either binary operators (`|`, `&`, `^` and `-`) or their English 45 | names (`union`, `intersection`, `symmetric_difference` and `difference`): 46 | 47 | >>> bitmap | RoaringBitmap([50, 95]) 48 | RoaringBitmap([1, 50, 95, 326, 4572]) 49 | >>> bitmap & RoaringBitmap([200, 326]) 50 | RoaringBitmap([326]) 51 | >>> bitmap ^ RoaringBitmap([200, 326]) 52 | RoaringBitmap([1, 200, 4572]) 53 | 54 | Since the bitmaps are ordered, indexing (including negative) is supported: 55 | 56 | >>> bitmap[1] 57 | 326 58 | >>> bitmap[-1] 59 | 4572 60 | 61 | Finally, you can construct a bitmap from a range, similar to the arguments to 62 | Python's built-in `range`: 63 | 64 | >>> RoaringBitmap.range(10) 65 | RoaringBitmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 66 | >>> RoaringBitmap.range(2, 10) 67 | RoaringBitmap([2, 3, 4, 5, 6, 7, 8, 9]) 68 | >>> RoaringBitmap.range(2, 10, 3) 69 | RoaringBitmap([2, 5, 8]) 70 | 71 | 72 | ## License 73 | 74 | CRoaring is licensed under the Apache License v2.0: 75 | 76 | > Copyright 2016 The CRoaring authors 77 | > 78 | > Licensed under the Apache License, Version 2.0 (the "License"); 79 | > you may not use this file except in compliance with the License. 80 | > You may obtain a copy of the License at 81 | > 82 | > http://www.apache.org/licenses/LICENSE-2.0 83 | > 84 | > Unless required by applicable law or agreed to in writing, software 85 | > distributed under the License is distributed on an "AS IS" BASIS, 86 | > WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 87 | > See the License for the specific language governing permissions and 88 | > limitations under the License. 89 | 90 | All other code is released under the Unlicense: 91 | 92 | > This is free and unencumbered software released into the public domain. 93 | > 94 | > Anyone is free to copy, modify, publish, use, compile, sell, or 95 | > distribute this software, either in source code form or as a compiled 96 | > binary, for any purpose, commercial or non-commercial, and by any 97 | > means. 98 | > 99 | > In jurisdictions that recognize copyright laws, the author or authors 100 | > of this software dedicate any and all copyright interest in the 101 | > software to the public domain. We make this dedication for the benefit 102 | > of the public at large and to the detriment of our heirs and 103 | > successors. We intend this dedication to be an overt act of 104 | > relinquishment in perpetuity of all present and future rights to this 105 | > software under copyright law. 106 | > 107 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 108 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 109 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 110 | > IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 111 | > OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 112 | > ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 113 | > OTHER DEALINGS IN THE SOFTWARE. 114 | > 115 | > For more information, please refer to 116 | -------------------------------------------------------------------------------- /croaring_build.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from cffi import FFI 4 | 5 | 6 | SRC_ROOT = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'croaring-src') 7 | 8 | FFI_BUILDER = FFI() 9 | 10 | # The following header code was adapted from the CRoaring amalgamated header, 11 | # which is released under the Apache License v2.0. See the LICENSING file for 12 | # details. 13 | FFI_BUILDER.cdef(''' 14 | typedef struct roaring_bitmap_s { 15 | ...; 16 | } roaring_bitmap_t; 17 | 18 | roaring_bitmap_t *roaring_bitmap_create(void); 19 | roaring_bitmap_t *roaring_bitmap_from_range(uint32_t min, uint32_t max, 20 | uint32_t step); 21 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); 22 | 23 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, 24 | const roaring_bitmap_t *x2); 25 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, 26 | const roaring_bitmap_t *x2); 27 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, 28 | const roaring_bitmap_t *x2); 29 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, 30 | const roaring_bitmap_t *x2); 31 | 32 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, 33 | const roaring_bitmap_t *x2); 34 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, 35 | const roaring_bitmap_t *x2); 36 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, 37 | const roaring_bitmap_t *x2); 38 | 39 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, 40 | const roaring_bitmap_t *x2); 41 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, 42 | const roaring_bitmap_t *x2); 43 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, 44 | const roaring_bitmap_t *x2); 45 | 46 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, 47 | const roaring_bitmap_t *x2); 48 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, 49 | const roaring_bitmap_t *x2); 50 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, 51 | const roaring_bitmap_t *x2); 52 | 53 | void roaring_bitmap_free(roaring_bitmap_t *r); 54 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); 55 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); 56 | void roaring_bitmap_clear(roaring_bitmap_t *ra); 57 | bool roaring_bitmap_contains(const roaring_bitmap_t *r, 58 | uint32_t val); 59 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra); 60 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra); 61 | bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, const roaring_bitmap_t *ra2); 62 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, const roaring_bitmap_t *ra2); 63 | 64 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); 65 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); 66 | 67 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra); 68 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf); 69 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); 70 | 71 | bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank, 72 | uint32_t *element); 73 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x); 74 | 75 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm); 76 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm); 77 | 78 | typedef struct roaring_statistics_s { 79 | uint32_t n_containers; /* number of containers */ 80 | 81 | uint32_t n_array_containers; /* number of array containers */ 82 | uint32_t n_run_containers; /* number of run containers */ 83 | uint32_t n_bitset_containers; /* number of bitmap containers */ 84 | 85 | uint32_t 86 | n_values_array_containers; /* number of values in array containers */ 87 | uint32_t n_values_run_containers; /* number of values in run containers */ 88 | uint32_t 89 | n_values_bitset_containers; /* number of values in bitmap containers */ 90 | 91 | uint32_t n_bytes_array_containers; /* number of allocated bytes in array 92 | containers */ 93 | uint32_t n_bytes_run_containers; /* number of allocated bytes in run 94 | containers */ 95 | uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap 96 | containers */ 97 | 98 | uint32_t 99 | max_value; /* the maximal value, undefined if cardinality is zero */ 100 | uint32_t 101 | min_value; /* the minimal value, undefined if cardinality is zero */ 102 | uint64_t sum_value; /* the sum of all values (could be used to compute 103 | average) */ 104 | 105 | uint64_t cardinality; /* total number of values stored in the bitmap */ 106 | 107 | // and n_values_arrays, n_values_rle, n_values_bitmap 108 | } roaring_statistics_t; 109 | 110 | void roaring_bitmap_statistics(const roaring_bitmap_t *ra, 111 | roaring_statistics_t *stat); 112 | 113 | 114 | typedef struct roaring_uint32_iterator_s { 115 | const roaring_bitmap_t *parent; // owner 116 | ...; 117 | uint32_t current_value; 118 | bool has_value; 119 | } roaring_uint32_iterator_t; 120 | roaring_uint32_iterator_t * roaring_create_iterator(const roaring_bitmap_t *ra); 121 | bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); 122 | roaring_uint32_iterator_t * roaring_copy_uint32_iterator(const roaring_uint32_iterator_t * it); 123 | void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); 124 | ''') 125 | 126 | FFI_BUILDER.set_source( 127 | 'croaring._roaring', 128 | '#include "roaring.c"', 129 | include_dirs=[SRC_ROOT], 130 | extra_compile_args=['-std=c11', '-msse4.2']) 131 | 132 | if __name__ == '__main__': 133 | FFI_BUILDER.compile(verbose=True) 134 | -------------------------------------------------------------------------------- /croaring/__init__.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import functools 3 | 4 | import six 5 | 6 | from croaring._roaring import ffi, lib 7 | 8 | 9 | __all__ = ['RoaringBitmap'] 10 | 11 | 12 | def guard(func): 13 | """Decorator to ensure that both args to a method are RoaringBitmaps.""" 14 | @functools.wraps(func) 15 | def guarded(self, other): 16 | if not isinstance(other, RoaringBitmap): 17 | raise TypeError("Expected RoaringBitmap, got {!r}".format(other)) 18 | return func(self, other) 19 | return guarded 20 | 21 | 22 | def bitmap_operator(lib_func): 23 | """Simple wrapper for binary bitmap operators.""" 24 | @guard 25 | def operator(self, other): 26 | return RoaringBitmap(lib_func(self._bitmap, other._bitmap)) 27 | return operator 28 | 29 | 30 | def bitmap_assign_operator(lib_func): 31 | """Simple wrapper for binary in-place bitmap operators.""" 32 | @guard 33 | def operator(self, other): 34 | lib_func(self._bitmap, other._bitmap) 35 | return self 36 | return operator 37 | 38 | 39 | class RoaringBitmap(collections.MutableSet): 40 | 41 | """An efficient integer bitmap set, based on CRoaring.""" 42 | 43 | __slots__ = ('_bitmap',) 44 | 45 | @classmethod 46 | def range(cls, *args): 47 | """Build a RoaringBitmap with arguments similar to range().""" 48 | if len(args) == 1: 49 | start, stop, step = 0, args[0], 1 50 | elif len(args) == 2: 51 | start, stop, step = args[0], args[1], 1 52 | elif len(args) == 3: 53 | start, stop, step = args 54 | else: 55 | raise TypeError("RoaringBitmap.range() expects exactly 1, 2, or 3 arguments") 56 | 57 | if start < 0 or stop < 0 or step < 0: 58 | raise ValueError("all arguments to range() must be positive") 59 | if step == 0: 60 | raise ValueError("step argument to range() cannot be zero") 61 | if start >= stop: 62 | # Empty set 63 | return cls() 64 | 65 | return cls(lib.roaring_bitmap_from_range(start, stop, step)) 66 | 67 | def __init__(self, bitmap=None): 68 | if isinstance(bitmap, ffi.CData) and ffi.typeof(bitmap).cname == 'struct roaring_bitmap_s *': 69 | self._bitmap = bitmap 70 | elif bitmap is None: 71 | self._bitmap = lib.roaring_bitmap_create() 72 | elif isinstance(bitmap, collections.Iterable): 73 | self._bitmap, iterable = lib.roaring_bitmap_create(), bitmap 74 | for value in iterable: 75 | self.add(value) 76 | else: 77 | raise TypeError("Can't initialize RoaringBitmap from {!r}".format(bitmap)) 78 | 79 | def __repr__(self): 80 | return 'RoaringBitmap([{}])'.format(', '.join(repr(num) for num in self)) 81 | 82 | def __bool__(self): 83 | return not bool(lib.roaring_bitmap_is_empty(self._bitmap)) 84 | 85 | if six.PY2: 86 | __nonzero__ = __bool__ 87 | del __bool__ 88 | 89 | def __len__(self): 90 | return lib.roaring_bitmap_get_cardinality(self._bitmap) 91 | 92 | def __del__(self): 93 | if hasattr(self, '_bitmap') and self._bitmap is not None: 94 | lib.roaring_bitmap_free(self._bitmap) 95 | del self._bitmap 96 | 97 | def __contains__(self, value): 98 | return bool(lib.roaring_bitmap_contains(self._bitmap, value)) 99 | 100 | def __iter__(self): 101 | iterator = lib.roaring_create_iterator(self._bitmap) 102 | try: 103 | while iterator.has_value: 104 | yield iterator.current_value 105 | lib.roaring_advance_uint32_iterator(iterator) 106 | finally: 107 | lib.roaring_free_uint32_iterator(iterator) 108 | 109 | @guard 110 | def __eq__(self, other): 111 | return bool(lib.roaring_bitmap_equals(self._bitmap, other._bitmap)) 112 | 113 | __and__ = bitmap_operator(lib.roaring_bitmap_and) 114 | __or__ = bitmap_operator(lib.roaring_bitmap_or) 115 | __xor__ = bitmap_operator(lib.roaring_bitmap_xor) 116 | __sub__ = bitmap_operator(lib.roaring_bitmap_andnot) 117 | 118 | __iand__ = bitmap_assign_operator(lib.roaring_bitmap_and_inplace) 119 | __ior__ = bitmap_assign_operator(lib.roaring_bitmap_or_inplace) 120 | __ixor__ = bitmap_assign_operator(lib.roaring_bitmap_xor_inplace) 121 | __isub__ = bitmap_assign_operator(lib.roaring_bitmap_andnot_inplace) 122 | 123 | @guard 124 | def __lt__(self, other): 125 | return len(self) < len(other) and self <= other 126 | 127 | @guard 128 | def __gt__(self, other): 129 | return other < self 130 | 131 | @guard 132 | def __le__(self, other): 133 | return bool(lib.roaring_bitmap_is_subset(self._bitmap, other._bitmap)) 134 | 135 | @guard 136 | def __ge__(self, other): 137 | return other <= self 138 | 139 | def add(self, value): 140 | lib.roaring_bitmap_add(self._bitmap, value) 141 | 142 | def discard(self, value): 143 | lib.roaring_bitmap_remove(self._bitmap, value) 144 | 145 | def clear(self): 146 | lib.roaring_bitmap_clear(self._bitmap) 147 | 148 | union = __or__ 149 | intersection = __and__ 150 | difference = __sub__ 151 | symmetric_difference = __xor__ 152 | issubset = __le__ 153 | issuperset = __ge__ 154 | 155 | @guard 156 | def isdisjoint(self, other): 157 | return not lib.roaring_bitmap_intersect(self._bitmap, other._bitmap) 158 | 159 | def copy(self): 160 | return RoaringBitmap(lib.roaring_bitmap_copy(self._bitmap)) 161 | 162 | def minimum(self): 163 | if not self: 164 | return None 165 | return lib.roaring_bitmap_minimum(self._bitmap) 166 | 167 | def maximum(self): 168 | if not self: 169 | return None 170 | return lib.roaring_bitmap_maximum(self._bitmap) 171 | 172 | def __getitem__(self, index): 173 | if not self: 174 | raise IndexError("RoaringBitmap index out of range") 175 | elif index == 0: 176 | return self.minimum() 177 | elif index == -1: 178 | return self.maximum() 179 | elif index < 0: 180 | if abs(index) <= len(self): 181 | return self[len(self) + index] 182 | raise IndexError("RoaringBitmap index out of range") 183 | elem = ffi.new('uint32_t *') 184 | found = lib.roaring_bitmap_select(self._bitmap, index, elem) 185 | if found: 186 | return elem[0] 187 | raise IndexError("RoaringBitmap index out of range") 188 | --------------------------------------------------------------------------------