├── MANIFEST.in
├── .gitmodules
├── tox.ini
├── .coveragerc
├── Makefile
├── .travis.yml
├── setup.py
├── .gitignore
├── LICENSING
├── test
├── test_croaring.py
└── test_generative.py
├── README.rst
├── README.md
├── croaring_build.py
└── croaring
└── __init__.py
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSING
2 | include croaring_build.py
3 | include croaring-src/roaring.c
4 | include croaring-src/roaring.h
5 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "croaring-src"]
2 | path = croaring-src
3 | url = https://github.com/RoaringBitmap/CRoaring.git
4 | ignore = dirty
5 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist=py27,py35,py36
3 | [testenv]
4 | passenv = CI TRAVIS TRAVIS_* NOSE_WITH_COVERAGE
5 | usedevelop=true
6 | deps=
7 | cffi
8 | codecov
9 | coverage
10 | nose
11 | commands=
12 | nosetests
13 | codecov
14 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = croaring
4 |
5 | [report]
6 | exclude_lines =
7 | if self.debug:
8 | if six.PY2:
9 | if six.PY3:
10 | pragma: no cover
11 | raise NotImplementedError
12 | if __name__ == .__main__.:
13 | ignore_errors = True
14 | omit =
15 | test/*
16 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all: README.rst amalgamated
2 |
3 | README.rst: README.md
4 | cat $^ | egrep -v '^\[\!' | pandoc -f markdown -t rst > $@
5 |
6 | amalgamated: croaring-src/roaring.h croaring-src/roaring.c
7 |
8 | croaring-src/roaring.h roaring-src/roaring.c: $@
9 | cd croaring-src/; ./amalgamation.sh
10 |
11 | .PHONY: all amalgamated
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | dist: trusty
3 |
4 | compiler:
5 | - clang
6 | - gcc
7 |
8 | matrix:
9 | include:
10 | - python: "2.7"
11 | env: TOXENV=py27 NOSE_WITH_COVERAGE=true
12 | - python: "3.6"
13 | env: TOXENV=py36 NOSE_WITH_COVERAGE=true
14 | - python: "pypy"
15 | env: TOXENV=py27
16 | - python: "pypy3"
17 | env: TOXENV=py35
18 |
19 | install:
20 | - make amalgamated
21 | - pip install tox
22 |
23 | script: tox
24 |
25 | notifications:
26 | email: false
27 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from codecs import open
2 | from os import path
3 |
4 | from setuptools import setup, find_packages
5 |
6 |
7 | here = path.abspath(path.dirname(__file__))
8 |
9 | # Get the long description from the README file
10 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
11 | readme = f.read()
12 |
13 |
14 | setup(
15 | name='croaring',
16 | version='0.0.2',
17 |
18 | description='Compressed integer sets based on Roaring bitmaps.',
19 | long_description=readme,
20 | url='https://github.com/zacharyvoase/croaring.py',
21 |
22 | author='Zachary Voase',
23 | author_email='zack@meat.io',
24 | license='UNLICENSE',
25 |
26 | packages=find_packages(exclude=['test']),
27 |
28 | setup_requires=[
29 | "cffi>=1.4.0",
30 | "six>=1.10.0",
31 | ],
32 | install_requires=[
33 | "cffi>=1.4.0",
34 | "six>=1.10.0",
35 | ],
36 |
37 | cffi_modules=["croaring_build.py:FFI_BUILDER"],
38 | )
39 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | croaring/_roaring.*
2 |
3 | # Finder files
4 | .DS_Store
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.pyc
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | .hypothesis/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # IPython Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # dotenv
84 | .env
85 |
86 | # virtualenv
87 | .venv/
88 | venv/
89 | ENV/
90 |
91 | # Spyder project settings
92 | .spyderproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
--------------------------------------------------------------------------------
/LICENSING:
--------------------------------------------------------------------------------
1 | The CRoaring library is distributed under the following terms:
2 |
3 | Copyright 2016 The CRoaring authors
4 |
5 | Licensed under the Apache License, Version 2.0 (the "License");
6 | you may not use this file except in compliance with the License.
7 | You may obtain a copy of the License at
8 |
9 | http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | See the License for the specific language governing permissions and
15 | limitations under the License.
16 |
17 | All other code (particularly, the Python bindings) is released into the public
18 | domain under these terms:
19 |
20 | This is free and unencumbered software released into the public domain.
21 |
22 | Anyone is free to copy, modify, publish, use, compile, sell, or
23 | distribute this software, either in source code form or as a compiled
24 | binary, for any purpose, commercial or non-commercial, and by any
25 | means.
26 |
27 | In jurisdictions that recognize copyright laws, the author or authors
28 | of this software dedicate any and all copyright interest in the
29 | software to the public domain. We make this dedication for the benefit
30 | of the public at large and to the detriment of our heirs and
31 | successors. We intend this dedication to be an overt act of
32 | relinquishment in perpetuity of all present and future rights to this
33 | software under copyright law.
34 |
35 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
38 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
39 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
40 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
41 | OTHER DEALINGS IN THE SOFTWARE.
42 |
43 | For more information, please refer to
44 |
--------------------------------------------------------------------------------
/test/test_croaring.py:
--------------------------------------------------------------------------------
1 | import operator
2 |
3 | from nose.tools import assert_raises
4 | import six
5 |
6 | from croaring import RoaringBitmap
7 |
8 |
9 | def test_smoke():
10 | bm = RoaringBitmap()
11 | assert len(bm) == 0
12 | assert list(bm) == []
13 | assert not bm
14 | assert 1 not in bm
15 |
16 | printed = repr(bm)
17 | assert isinstance(printed, six.string_types)
18 |
19 | bm.add(1)
20 | bm.add(6)
21 | assert len(bm) == 2
22 | assert list(bm) == [1, 6]
23 | assert bm
24 | assert 1 in bm
25 | assert 6 in bm
26 | assert 2 not in bm
27 |
28 | bm.discard(1)
29 | assert len(bm) == 1
30 | assert list(bm) == [6]
31 | assert bm
32 | assert 1 not in bm
33 | assert 6 in bm
34 |
35 |
36 | def test_invalid_args():
37 | with assert_raises(TypeError):
38 | RoaringBitmap(123)
39 | with assert_raises(TypeError):
40 | RoaringBitmap('string')
41 |
42 |
43 | def test_minmax():
44 | empty = RoaringBitmap([])
45 | assert empty.minimum() is None
46 | assert empty.maximum() is None
47 |
48 | bm = RoaringBitmap([1, 2, 3, 4])
49 | assert bm.minimum() == 1
50 | assert bm.maximum() == 4
51 |
52 |
53 | def test_indexing_empty():
54 | empty = RoaringBitmap([])
55 | with assert_raises(IndexError):
56 | empty[0]
57 | with assert_raises(IndexError):
58 | empty[-1]
59 |
60 |
61 | def test_indexing_nonempty():
62 | bm = RoaringBitmap([1, 3, 5, 6])
63 | assert bm[0] == 1
64 | assert bm[1] == 3
65 | assert bm[2] == 5
66 | assert bm[3] == 6
67 | with assert_raises(IndexError):
68 | bm[4]
69 | assert bm[-1] == 6
70 | assert bm[-2] == 5
71 | assert bm[-3] == 3
72 | assert bm[-4] == 1
73 | with assert_raises(IndexError):
74 | bm[-5]
75 |
76 |
77 | def test_copy():
78 | bm = RoaringBitmap()
79 | bm.add(1)
80 | bm2 = bm.copy()
81 | bm2.add(6)
82 | assert len(bm) == 1
83 | assert list(bm) == [1]
84 | assert len(bm2) == 2
85 | assert list(bm2) == [1, 6]
86 |
87 |
88 | def test_isdisjoint():
89 | empty = RoaringBitmap()
90 | assert empty.isdisjoint(empty)
91 | bm1 = RoaringBitmap([1, 2, 3, 4, 5])
92 | bm2 = RoaringBitmap([6, 7, 8, 9, 10])
93 | bm3 = RoaringBitmap([4, 5, 6, 7])
94 | assert bm1.isdisjoint(empty)
95 | assert empty.isdisjoint(bm1)
96 | assert bm1.isdisjoint(bm2)
97 | assert not bm1.isdisjoint(bm3)
98 | assert not bm2.isdisjoint(bm3)
99 | assert not bm3.isdisjoint(bm1)
100 | assert not bm3.isdisjoint(bm2)
101 |
102 |
103 | def test_clear():
104 | bm = RoaringBitmap([1, 2, 3, 4, 5])
105 | bm.clear()
106 | assert len(bm) == 0
107 | assert list(bm) == []
108 |
--------------------------------------------------------------------------------
/test/test_generative.py:
--------------------------------------------------------------------------------
1 | import operator
2 | import random
3 | import six
4 |
5 | from nose.tools import assert_equals
6 | from nose.tools import assert_raises
7 |
8 | from croaring import RoaringBitmap
9 |
10 |
11 | EMPTY = set()
12 |
13 |
14 | BINARY_OPERATORS = [
15 | (operator.and_, "&"),
16 | (operator.or_, "|"),
17 | (operator.xor, "^"),
18 | (operator.sub, "-"),
19 | ]
20 |
21 |
22 | BOOLEAN_OPERATORS = [
23 | (operator.eq, "=="),
24 | (operator.lt, "<"),
25 | (operator.gt, ">"),
26 | (operator.le, "<="),
27 | (operator.ge, ">="),
28 | ]
29 |
30 |
31 | INPLACE_OPERATORS = [
32 | (operator.iand, "&="),
33 | (operator.ior, "|="),
34 | (operator.ixor, "^="),
35 | (operator.isub, "-="),
36 | ]
37 |
38 |
39 | def gen_random_set_pairs():
40 | if six.PY2:
41 | range_ = xrange
42 | else:
43 | range_ = range
44 |
45 | initial = random.sample(range_(0, 1000), 50)
46 | equal = initial[:]
47 | disjoint = random.sample(range_(1000, 2000), 50)
48 | overlap = initial[:25] + random.sample(range_(1000, 2000), 25)
49 | proper_subset = initial[:25]
50 | proper_superset = initial + disjoint
51 |
52 | yield (EMPTY, EMPTY, "empty {} empty")
53 | yield (initial, EMPTY, "X {} empty")
54 | yield (initial, equal, "X {} X")
55 | yield (initial, disjoint, "X {} disjoint")
56 | yield (initial, overlap, "X {} overlap")
57 | yield (initial, proper_subset, "X {} proper_subset")
58 | yield (initial, proper_superset, "X {} proper_superset")
59 |
60 |
61 | def test_range():
62 | yield check_range_same, 0
63 | yield check_range_same, 10
64 | yield check_range_same, 0, 10
65 | yield check_range_same, 0, 10, 1
66 | yield check_range_same, 0, 10, 2
67 | yield check_range_same, 0, 10, 3
68 |
69 |
70 | def test_invalid_range():
71 | yield check_invalid_range, TypeError
72 | yield check_invalid_range, TypeError, 0, 1, 2, 3
73 | yield check_invalid_range, ValueError, -1
74 | yield check_invalid_range, ValueError, 5, 0, -1
75 | yield check_invalid_range, ValueError, 0, 5, -1
76 | yield check_invalid_range, ValueError, 0, 5, 0
77 | yield check_invalid_range, ValueError, -1, 2
78 | yield check_invalid_range, ValueError, 2, -1
79 | yield check_invalid_range, ValueError, -5, -2
80 |
81 |
82 | def test_binary_operators():
83 | for op, op_name in BINARY_OPERATORS:
84 | for (s1, s2, name) in gen_random_set_pairs():
85 | yield check_binary_same, op, s1, s2, name.format(op_name)
86 |
87 |
88 | def test_boolean_operators():
89 | for op, op_name in BOOLEAN_OPERATORS:
90 | for (s1, s2, name) in gen_random_set_pairs():
91 | yield check_boolean_same, op, s1, s2, name.format(op_name)
92 |
93 |
94 | def test_inplace_operators():
95 | for op, op_name in INPLACE_OPERATORS:
96 | for (s1, s2, name) in gen_random_set_pairs():
97 | yield check_inplace_same, op, s1, s2, name.format(op_name)
98 |
99 |
100 | def check_range_same(*range_args):
101 | range_ = six.PY2 and range or (lambda *a: list(range(*a)))
102 | assert_equals(list(RoaringBitmap.range(*range_args)), range_(*range_args))
103 |
104 |
105 | def check_invalid_range(exc, *range_args):
106 | with assert_raises(exc):
107 | RoaringBitmap.range(*range_args)
108 |
109 |
110 | def check_binary_same(op, set1, set2, name):
111 | expected = list(op(set(set1), set(set2)))
112 | expected.sort()
113 | actual = list(op(RoaringBitmap(set1), RoaringBitmap(set2)))
114 | assert_equals(actual, expected, name)
115 | with assert_raises(TypeError):
116 | op(RoaringBitmap(set1), set(set2))
117 |
118 |
119 | def check_boolean_same(op, set1, set2, name):
120 | expected = op(set(set1), set(set2))
121 | actual = op(RoaringBitmap(set1), RoaringBitmap(set2))
122 | assert_equals(actual, expected, name)
123 | with assert_raises(TypeError):
124 | op(RoaringBitmap(set1), set(set2))
125 |
126 |
127 | def check_inplace_same(op, set1, set2, name):
128 | s1, s2 = set(set1), set(set2)
129 | bm1, bm2 = RoaringBitmap(set1), RoaringBitmap(set2)
130 | op(s1, s2)
131 | op(bm1, bm2)
132 | expected = list(s1)
133 | expected.sort()
134 | actual = list(bm1)
135 | assert_equals(actual, expected, name)
136 | with assert_raises(TypeError):
137 | op(bm1, s2)
138 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | CRoaring.py: Fast, compact integer bitmap sets, based on CRoaring
2 | =================================================================
3 |
4 | `Roaring bitmaps `__ are fast, compressed,
5 | and portable bitmaps, used to store unique sorted integer sets. These
6 | bitmaps offer better real-world space complexity and performance than
7 | typical hash sets (such as Python's built-in ``set``), and can be
8 | serialized into a portable format for storage and interop with the
9 | C/C++, Java and Go libraries.
10 |
11 | This library makes the
12 | `CRoaring `__ implementation
13 | available in Python 2.7 and 3.5+. It uses
14 | `CFFI `__, so it works on both
15 | CPython and PyPy. The full Python ``set`` interface is implemented.
16 | Comprehensive tests are included.
17 |
18 | Installation
19 | ------------
20 |
21 | ::
22 |
23 | pip install croaring
24 |
25 | The CRoaring source is included with the Python library, so you don't
26 | need to install it from elsewhere (though you may need a C compiler
27 | available if a binary package is unavailable for your architecture).
28 |
29 | Usage
30 | -----
31 |
32 | Instantiate a ``croaring.RoaringBitmap()``, and use it just like a
33 | normal ``set``:
34 |
35 | ::
36 |
37 | >>> import croaring
38 | >>> bitmap = croaring.RoaringBitmap()
39 | >>> bitmap
40 | RoaringBitmap([])
41 | >>> bitmap.add(1)
42 | >>> bitmap.add(4572)
43 | >>> bitmap.add(326)
44 | >>> bitmap
45 | RoaringBitmap([1, 326, 4572])
46 |
47 | You can use either binary operators (``|``, ``&``, ``^`` and ``-``) or
48 | their English names (``union``, ``intersection``,
49 | ``symmetric_difference`` and ``difference``):
50 |
51 | ::
52 |
53 | >>> bitmap | RoaringBitmap([50, 95])
54 | RoaringBitmap([1, 50, 95, 326, 4572])
55 | >>> bitmap & RoaringBitmap([200, 326])
56 | RoaringBitmap([326])
57 | >>> bitmap ^ RoaringBitmap([200, 326])
58 | RoaringBitmap([1, 200, 4572])
59 |
60 | Since the bitmaps are ordered, indexing (including negative) is
61 | supported:
62 |
63 | ::
64 |
65 | >>> bitmap[1]
66 | 326
67 | >>> bitmap[-1]
68 | 4572
69 |
70 | Finally, you can construct a bitmap from a range, similar to the
71 | arguments to Python's built-in ``range``:
72 |
73 | ::
74 |
75 | >>> RoaringBitmap.range(10)
76 | RoaringBitmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
77 | >>> RoaringBitmap.range(2, 10)
78 | RoaringBitmap([2, 3, 4, 5, 6, 7, 8, 9])
79 | >>> RoaringBitmap.range(2, 10, 3)
80 | RoaringBitmap([2, 5, 8])
81 |
82 | License
83 | -------
84 |
85 | CRoaring is licensed under the Apache License v2.0:
86 |
87 | Copyright 2016 The CRoaring authors
88 |
89 | Licensed under the Apache License, Version 2.0 (the "License"); you
90 | may not use this file except in compliance with the License. You may
91 | obtain a copy of the License at
92 |
93 | ::
94 |
95 | http://www.apache.org/licenses/LICENSE-2.0
96 |
97 | Unless required by applicable law or agreed to in writing, software
98 | distributed under the License is distributed on an "AS IS" BASIS,
99 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
100 | implied. See the License for the specific language governing
101 | permissions and limitations under the License.
102 |
103 | All other code is released under the Unlicense:
104 |
105 | This is free and unencumbered software released into the public
106 | domain.
107 |
108 | Anyone is free to copy, modify, publish, use, compile, sell, or
109 | distribute this software, either in source code form or as a
110 | compiled binary, for any purpose, commercial or non-commercial, and
111 | by any means.
112 |
113 | In jurisdictions that recognize copyright laws, the author or
114 | authors of this software dedicate any and all copyright interest in
115 | the software to the public domain. We make this dedication for the
116 | benefit of the public at large and to the detriment of our heirs and
117 | successors. We intend this dedication to be an overt act of
118 | relinquishment in perpetuity of all present and future rights to
119 | this software under copyright law.
120 |
121 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
122 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
123 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
124 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
125 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
126 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
127 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
128 |
129 | For more information, please refer to http://unlicense.org/
130 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CRoaring.py: Fast, compact integer bitmap sets, based on CRoaring
2 |
3 | [](https://travis-ci.org/zacharyvoase/croaring.py)
4 | [](https://codecov.io/gh/zacharyvoase/croaring.py)
5 |
6 | [Roaring bitmaps][] are fast, compressed, and portable bitmaps, used to store
7 | unique sorted integer sets. These bitmaps offer better real-world space
8 | complexity and performance than typical hash sets (such as Python's built-in
9 | `set`), and can be serialized into a portable format for storage and interop
10 | with the C/C++, Java and Go libraries.
11 |
12 | This library makes the [CRoaring][] implementation available in Python 2.7 and
13 | 3.5+. It uses [CFFI][], so it works on both CPython and PyPy. The full Python
14 | `set` interface is implemented. Comprehensive tests are included.
15 |
16 | [Roaring bitmaps]: http://roaringbitmap.org/
17 | [CRoaring]: https://github.com/RoaringBitmap/CRoaring
18 | [CFFI]: http://cffi.readthedocs.io/en/latest/
19 |
20 |
21 | ## Installation
22 |
23 | pip install croaring
24 |
25 | The CRoaring source is included with the Python library, so you don't need to
26 | install it from elsewhere (though you may need a C compiler available if a
27 | binary package is unavailable for your architecture).
28 |
29 |
30 | ## Usage
31 |
32 | Instantiate a `croaring.RoaringBitmap()`, and use it just like a normal `set`:
33 |
34 | >>> import croaring
35 | >>> bitmap = croaring.RoaringBitmap()
36 | >>> bitmap
37 | RoaringBitmap([])
38 | >>> bitmap.add(1)
39 | >>> bitmap.add(4572)
40 | >>> bitmap.add(326)
41 | >>> bitmap
42 | RoaringBitmap([1, 326, 4572])
43 |
44 | You can use either binary operators (`|`, `&`, `^` and `-`) or their English
45 | names (`union`, `intersection`, `symmetric_difference` and `difference`):
46 |
47 | >>> bitmap | RoaringBitmap([50, 95])
48 | RoaringBitmap([1, 50, 95, 326, 4572])
49 | >>> bitmap & RoaringBitmap([200, 326])
50 | RoaringBitmap([326])
51 | >>> bitmap ^ RoaringBitmap([200, 326])
52 | RoaringBitmap([1, 200, 4572])
53 |
54 | Since the bitmaps are ordered, indexing (including negative) is supported:
55 |
56 | >>> bitmap[1]
57 | 326
58 | >>> bitmap[-1]
59 | 4572
60 |
61 | Finally, you can construct a bitmap from a range, similar to the arguments to
62 | Python's built-in `range`:
63 |
64 | >>> RoaringBitmap.range(10)
65 | RoaringBitmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
66 | >>> RoaringBitmap.range(2, 10)
67 | RoaringBitmap([2, 3, 4, 5, 6, 7, 8, 9])
68 | >>> RoaringBitmap.range(2, 10, 3)
69 | RoaringBitmap([2, 5, 8])
70 |
71 |
72 | ## License
73 |
74 | CRoaring is licensed under the Apache License v2.0:
75 |
76 | > Copyright 2016 The CRoaring authors
77 | >
78 | > Licensed under the Apache License, Version 2.0 (the "License");
79 | > you may not use this file except in compliance with the License.
80 | > You may obtain a copy of the License at
81 | >
82 | > http://www.apache.org/licenses/LICENSE-2.0
83 | >
84 | > Unless required by applicable law or agreed to in writing, software
85 | > distributed under the License is distributed on an "AS IS" BASIS,
86 | > WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
87 | > See the License for the specific language governing permissions and
88 | > limitations under the License.
89 |
90 | All other code is released under the Unlicense:
91 |
92 | > This is free and unencumbered software released into the public domain.
93 | >
94 | > Anyone is free to copy, modify, publish, use, compile, sell, or
95 | > distribute this software, either in source code form or as a compiled
96 | > binary, for any purpose, commercial or non-commercial, and by any
97 | > means.
98 | >
99 | > In jurisdictions that recognize copyright laws, the author or authors
100 | > of this software dedicate any and all copyright interest in the
101 | > software to the public domain. We make this dedication for the benefit
102 | > of the public at large and to the detriment of our heirs and
103 | > successors. We intend this dedication to be an overt act of
104 | > relinquishment in perpetuity of all present and future rights to this
105 | > software under copyright law.
106 | >
107 | > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
108 | > EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
109 | > MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
110 | > IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
111 | > OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
112 | > ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
113 | > OTHER DEALINGS IN THE SOFTWARE.
114 | >
115 | > For more information, please refer to
116 |
--------------------------------------------------------------------------------
/croaring_build.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from cffi import FFI
4 |
5 |
6 | SRC_ROOT = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'croaring-src')
7 |
8 | FFI_BUILDER = FFI()
9 |
10 | # The following header code was adapted from the CRoaring amalgamated header,
11 | # which is released under the Apache License v2.0. See the LICENSING file for
12 | # details.
13 | FFI_BUILDER.cdef('''
14 | typedef struct roaring_bitmap_s {
15 | ...;
16 | } roaring_bitmap_t;
17 |
18 | roaring_bitmap_t *roaring_bitmap_create(void);
19 | roaring_bitmap_t *roaring_bitmap_from_range(uint32_t min, uint32_t max,
20 | uint32_t step);
21 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r);
22 |
23 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
24 | const roaring_bitmap_t *x2);
25 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
26 | const roaring_bitmap_t *x2);
27 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
28 | const roaring_bitmap_t *x2);
29 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
30 | const roaring_bitmap_t *x2);
31 |
32 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
33 | const roaring_bitmap_t *x2);
34 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
35 | const roaring_bitmap_t *x2);
36 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
37 | const roaring_bitmap_t *x2);
38 |
39 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
40 | const roaring_bitmap_t *x2);
41 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
42 | const roaring_bitmap_t *x2);
43 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
44 | const roaring_bitmap_t *x2);
45 |
46 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
47 | const roaring_bitmap_t *x2);
48 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
49 | const roaring_bitmap_t *x2);
50 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
51 | const roaring_bitmap_t *x2);
52 |
53 | void roaring_bitmap_free(roaring_bitmap_t *r);
54 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x);
55 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x);
56 | void roaring_bitmap_clear(roaring_bitmap_t *ra);
57 | bool roaring_bitmap_contains(const roaring_bitmap_t *r,
58 | uint32_t val);
59 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra);
60 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra);
61 | bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, const roaring_bitmap_t *ra2);
62 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, const roaring_bitmap_t *ra2);
63 |
64 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r);
65 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
66 |
67 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra);
68 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf);
69 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
70 |
71 | bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank,
72 | uint32_t *element);
73 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x);
74 |
75 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm);
76 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm);
77 |
78 | typedef struct roaring_statistics_s {
79 | uint32_t n_containers; /* number of containers */
80 |
81 | uint32_t n_array_containers; /* number of array containers */
82 | uint32_t n_run_containers; /* number of run containers */
83 | uint32_t n_bitset_containers; /* number of bitmap containers */
84 |
85 | uint32_t
86 | n_values_array_containers; /* number of values in array containers */
87 | uint32_t n_values_run_containers; /* number of values in run containers */
88 | uint32_t
89 | n_values_bitset_containers; /* number of values in bitmap containers */
90 |
91 | uint32_t n_bytes_array_containers; /* number of allocated bytes in array
92 | containers */
93 | uint32_t n_bytes_run_containers; /* number of allocated bytes in run
94 | containers */
95 | uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap
96 | containers */
97 |
98 | uint32_t
99 | max_value; /* the maximal value, undefined if cardinality is zero */
100 | uint32_t
101 | min_value; /* the minimal value, undefined if cardinality is zero */
102 | uint64_t sum_value; /* the sum of all values (could be used to compute
103 | average) */
104 |
105 | uint64_t cardinality; /* total number of values stored in the bitmap */
106 |
107 | // and n_values_arrays, n_values_rle, n_values_bitmap
108 | } roaring_statistics_t;
109 |
110 | void roaring_bitmap_statistics(const roaring_bitmap_t *ra,
111 | roaring_statistics_t *stat);
112 |
113 |
114 | typedef struct roaring_uint32_iterator_s {
115 | const roaring_bitmap_t *parent; // owner
116 | ...;
117 | uint32_t current_value;
118 | bool has_value;
119 | } roaring_uint32_iterator_t;
120 | roaring_uint32_iterator_t * roaring_create_iterator(const roaring_bitmap_t *ra);
121 | bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it);
122 | roaring_uint32_iterator_t * roaring_copy_uint32_iterator(const roaring_uint32_iterator_t * it);
123 | void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it);
124 | ''')
125 |
126 | FFI_BUILDER.set_source(
127 | 'croaring._roaring',
128 | '#include "roaring.c"',
129 | include_dirs=[SRC_ROOT],
130 | extra_compile_args=['-std=c11', '-msse4.2'])
131 |
132 | if __name__ == '__main__':
133 | FFI_BUILDER.compile(verbose=True)
134 |
--------------------------------------------------------------------------------
/croaring/__init__.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import functools
3 |
4 | import six
5 |
6 | from croaring._roaring import ffi, lib
7 |
8 |
9 | __all__ = ['RoaringBitmap']
10 |
11 |
12 | def guard(func):
13 | """Decorator to ensure that both args to a method are RoaringBitmaps."""
14 | @functools.wraps(func)
15 | def guarded(self, other):
16 | if not isinstance(other, RoaringBitmap):
17 | raise TypeError("Expected RoaringBitmap, got {!r}".format(other))
18 | return func(self, other)
19 | return guarded
20 |
21 |
22 | def bitmap_operator(lib_func):
23 | """Simple wrapper for binary bitmap operators."""
24 | @guard
25 | def operator(self, other):
26 | return RoaringBitmap(lib_func(self._bitmap, other._bitmap))
27 | return operator
28 |
29 |
30 | def bitmap_assign_operator(lib_func):
31 | """Simple wrapper for binary in-place bitmap operators."""
32 | @guard
33 | def operator(self, other):
34 | lib_func(self._bitmap, other._bitmap)
35 | return self
36 | return operator
37 |
38 |
39 | class RoaringBitmap(collections.MutableSet):
40 |
41 | """An efficient integer bitmap set, based on CRoaring."""
42 |
43 | __slots__ = ('_bitmap',)
44 |
45 | @classmethod
46 | def range(cls, *args):
47 | """Build a RoaringBitmap with arguments similar to range()."""
48 | if len(args) == 1:
49 | start, stop, step = 0, args[0], 1
50 | elif len(args) == 2:
51 | start, stop, step = args[0], args[1], 1
52 | elif len(args) == 3:
53 | start, stop, step = args
54 | else:
55 | raise TypeError("RoaringBitmap.range() expects exactly 1, 2, or 3 arguments")
56 |
57 | if start < 0 or stop < 0 or step < 0:
58 | raise ValueError("all arguments to range() must be positive")
59 | if step == 0:
60 | raise ValueError("step argument to range() cannot be zero")
61 | if start >= stop:
62 | # Empty set
63 | return cls()
64 |
65 | return cls(lib.roaring_bitmap_from_range(start, stop, step))
66 |
67 | def __init__(self, bitmap=None):
68 | if isinstance(bitmap, ffi.CData) and ffi.typeof(bitmap).cname == 'struct roaring_bitmap_s *':
69 | self._bitmap = bitmap
70 | elif bitmap is None:
71 | self._bitmap = lib.roaring_bitmap_create()
72 | elif isinstance(bitmap, collections.Iterable):
73 | self._bitmap, iterable = lib.roaring_bitmap_create(), bitmap
74 | for value in iterable:
75 | self.add(value)
76 | else:
77 | raise TypeError("Can't initialize RoaringBitmap from {!r}".format(bitmap))
78 |
79 | def __repr__(self):
80 | return 'RoaringBitmap([{}])'.format(', '.join(repr(num) for num in self))
81 |
82 | def __bool__(self):
83 | return not bool(lib.roaring_bitmap_is_empty(self._bitmap))
84 |
85 | if six.PY2:
86 | __nonzero__ = __bool__
87 | del __bool__
88 |
89 | def __len__(self):
90 | return lib.roaring_bitmap_get_cardinality(self._bitmap)
91 |
92 | def __del__(self):
93 | if hasattr(self, '_bitmap') and self._bitmap is not None:
94 | lib.roaring_bitmap_free(self._bitmap)
95 | del self._bitmap
96 |
97 | def __contains__(self, value):
98 | return bool(lib.roaring_bitmap_contains(self._bitmap, value))
99 |
100 | def __iter__(self):
101 | iterator = lib.roaring_create_iterator(self._bitmap)
102 | try:
103 | while iterator.has_value:
104 | yield iterator.current_value
105 | lib.roaring_advance_uint32_iterator(iterator)
106 | finally:
107 | lib.roaring_free_uint32_iterator(iterator)
108 |
109 | @guard
110 | def __eq__(self, other):
111 | return bool(lib.roaring_bitmap_equals(self._bitmap, other._bitmap))
112 |
113 | __and__ = bitmap_operator(lib.roaring_bitmap_and)
114 | __or__ = bitmap_operator(lib.roaring_bitmap_or)
115 | __xor__ = bitmap_operator(lib.roaring_bitmap_xor)
116 | __sub__ = bitmap_operator(lib.roaring_bitmap_andnot)
117 |
118 | __iand__ = bitmap_assign_operator(lib.roaring_bitmap_and_inplace)
119 | __ior__ = bitmap_assign_operator(lib.roaring_bitmap_or_inplace)
120 | __ixor__ = bitmap_assign_operator(lib.roaring_bitmap_xor_inplace)
121 | __isub__ = bitmap_assign_operator(lib.roaring_bitmap_andnot_inplace)
122 |
123 | @guard
124 | def __lt__(self, other):
125 | return len(self) < len(other) and self <= other
126 |
127 | @guard
128 | def __gt__(self, other):
129 | return other < self
130 |
131 | @guard
132 | def __le__(self, other):
133 | return bool(lib.roaring_bitmap_is_subset(self._bitmap, other._bitmap))
134 |
135 | @guard
136 | def __ge__(self, other):
137 | return other <= self
138 |
139 | def add(self, value):
140 | lib.roaring_bitmap_add(self._bitmap, value)
141 |
142 | def discard(self, value):
143 | lib.roaring_bitmap_remove(self._bitmap, value)
144 |
145 | def clear(self):
146 | lib.roaring_bitmap_clear(self._bitmap)
147 |
148 | union = __or__
149 | intersection = __and__
150 | difference = __sub__
151 | symmetric_difference = __xor__
152 | issubset = __le__
153 | issuperset = __ge__
154 |
155 | @guard
156 | def isdisjoint(self, other):
157 | return not lib.roaring_bitmap_intersect(self._bitmap, other._bitmap)
158 |
159 | def copy(self):
160 | return RoaringBitmap(lib.roaring_bitmap_copy(self._bitmap))
161 |
162 | def minimum(self):
163 | if not self:
164 | return None
165 | return lib.roaring_bitmap_minimum(self._bitmap)
166 |
167 | def maximum(self):
168 | if not self:
169 | return None
170 | return lib.roaring_bitmap_maximum(self._bitmap)
171 |
172 | def __getitem__(self, index):
173 | if not self:
174 | raise IndexError("RoaringBitmap index out of range")
175 | elif index == 0:
176 | return self.minimum()
177 | elif index == -1:
178 | return self.maximum()
179 | elif index < 0:
180 | if abs(index) <= len(self):
181 | return self[len(self) + index]
182 | raise IndexError("RoaringBitmap index out of range")
183 | elem = ffi.new('uint32_t *')
184 | found = lib.roaring_bitmap_select(self._bitmap, index, elem)
185 | if found:
186 | return elem[0]
187 | raise IndexError("RoaringBitmap index out of range")
188 |
--------------------------------------------------------------------------------