├── ordered_set ├── py.typed └── __init__.py ├── .flake8 ├── .coveragerc ├── .codecov.yml ├── pytest.ini ├── tox.ini ├── .gitignore ├── .mailmap ├── setup.py ├── MIT-LICENSE ├── pyproject.toml ├── CHANGELOG.md ├── README.md └── test └── test_ordered_set.py /ordered_set/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | source = ordered_set.py 4 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: "diff" 3 | require_changes: true 4 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --doctest-modules --doctest-glob=README.md --doctest-glob=*.py --ignore=setup.py 3 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = pypy3, py37, py38, py39, py310 3 | 4 | [testenv] 5 | deps = pytest 6 | commands = pytest 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *~ 3 | .DS_Store 4 | .*.swp 5 | *.egg-info 6 | dist/ 7 | .tox/ 8 | .coverage 9 | .pytest_cache 10 | htmlcov 11 | .eggs 12 | .venv 13 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | # Elia has used different names and e-mail addresses in the course of this project. Map them all to her current name and e-mail. 2 | Elia Robyn Lake 3 | Elia Robyn Lake 4 | Elia Robyn Lake 5 | Elia Robyn Lake 6 | Elia Robyn Lake 7 | Elia Robyn Lake 8 | Elia Robyn Lake 9 | 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # This is a wrapper for environments that require a `setup.py`-based installation. 2 | # This is not the primary way of installing ordered-set. 3 | # 4 | # The primary setup is in pyproject.toml. You can install ordered-set as a 5 | # dependency using `poetry` or `pip`. 6 | 7 | from setuptools import setup 8 | 9 | packages = ['ordered_set'] 10 | 11 | setup_kwargs = { 12 | 'name': 'ordered-set', 13 | 'version': '4.1.0', 14 | 'description': 'A set that remembers its order, and allows looking up its items by their index in that order.', 15 | 'author': 'Elia Robyn Lake', 16 | 'author_email': 'gh@arborelia.net', 17 | 'url': 'https://github.com/rspeer/ordered-set', 18 | 'packages': packages, 19 | 'python_requires': '>=3.7', 20 | } 21 | 22 | 23 | setup(**setup_kwargs) 24 | 25 | -------------------------------------------------------------------------------- /MIT-LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2022 Elia Robyn Lake 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a 4 | copy of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the 8 | Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=3.2,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [project] 6 | name = "ordered-set" 7 | authors = [{name = "Elia Robyn Lake", email = "gh@arborelia.net"}] 8 | readme = "README.md" 9 | license = {file = "MIT-LICENSE"} 10 | classifiers = [ 11 | "Development Status :: 5 - Production/Stable", 12 | "Intended Audience :: Developers", 13 | "License :: OSI Approved :: MIT License", 14 | "Programming Language :: Python", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.7", 17 | "Programming Language :: Python :: 3.8", 18 | "Programming Language :: Python :: 3.9", 19 | "Programming Language :: Python :: 3.10", 20 | "Programming Language :: Python :: Implementation :: CPython", 21 | "Programming Language :: Python :: Implementation :: PyPy", 22 | ] 23 | dynamic = ["version", "description"] 24 | requires-python = ">=3.7" 25 | 26 | [tool.flit.module] 27 | name = "ordered_set" 28 | 29 | [project.urls] 30 | Home = "https://github.com/rspeer/ordered-set" 31 | 32 | [project.optional-dependencies] 33 | dev = ["pytest", "black", "mypy"] 34 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Significant changes in major and minor releases of this library: 4 | 5 | ## Version 4.1 (January 2022) 6 | 7 | - Packaged using flit. Wheels now exist, and setuptools is no longer required. 8 | - This package now has a typical package structure, instead of being a single module. The code is in `ordered_set/__init__.py` instead of `ordered_set.py`. 9 | - There is an `ordered_set/py.typed` so that type checkers know about the types. 10 | - Use the type aliases `SetLike[T]` and `OrderedSetInitializer[T]` to simplify some types. 11 | - Updated the way overloaded type signatures are written to what MyPy currently expects. 12 | - Minimum Python version is 3.7. 13 | 14 | ## Version 4.0 (January 2020) 15 | 16 | - Added type signatures inline to the code, instead of using type stubs. 17 | - Dropped Python 2 support. The minimum supported Python version is 3.5. 18 | 19 | ## Version 3.1 (November 2018) 20 | 21 | - `__getitem__` accepts NumPy arrays of indices, and returns a list of elements with those indices. 22 | - Updated in-place operations that took O(N^2) time, such as .difference_update(), to take O(N) time. 23 | - Clarified whether various methods mutate or copy the OrderedSet. 24 | - Added `OrderedSet.get_loc` and `OrderedSet.get_indexer` as aliases for `OrderedSet.index`, for interoperability with `pandas.Index`. 25 | - Added type stubs in a .pyi file. 26 | 27 | ## Version 3.0 (June 2018) 28 | 29 | - Implemented the abstract base classes `collections.MutableSet` and `collections.Sequence`. 30 | - Changed the behavior of some methods to follow the MutableSet API. 31 | - Indexing an OrderedSet with `[:]` returns a copy, not the same object. 32 | 33 | ## Version 2.0 (December 2015) 34 | 35 | - Tuples are allowable values in the set, and are not treated as "fancy indexing". 36 | - Added `update` and `pop` methods. 37 | 38 | ## Version 1.4 (September 2015) 39 | 40 | - Added `discard` and `clear` methods. 41 | 42 | ## Version 1.3 (April 2015) 43 | 44 | - Added support for pickling. 45 | 46 | ## Version 1.2 (May 2014) 47 | 48 | - First Python 3 support. 49 | 50 | ## Version 1.1 (August 2013) 51 | 52 | - Added tests. 53 | - Removed a broken implementation of `discard`. 54 | 55 | ## Version 1.0 (August 2012) 56 | 57 | - First release. 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Pypi](https://img.shields.io/pypi/v/ordered-set.svg)](https://pypi.python.org/pypi/ordered-set) 2 | 3 | An OrderedSet is a mutable data structure that is a hybrid of a list and a set. 4 | It remembers the order of its entries, and every entry has an index number that 5 | can be looked up. 6 | 7 | ## Installation 8 | 9 | `ordered_set` is available on PyPI and packaged as a wheel. You can list it 10 | as a dependency of your project, in whatever form that takes. 11 | 12 | To install it into your current Python environment: 13 | 14 | pip install ordered-set 15 | 16 | To install the code for development, after checking out the repository: 17 | 18 | pip install flit 19 | flit install 20 | 21 | ## Usage examples 22 | 23 | An OrderedSet is created and used like a set: 24 | 25 | >>> from ordered_set import OrderedSet 26 | 27 | >>> letters = OrderedSet('abracadabra') 28 | 29 | >>> letters 30 | OrderedSet(['a', 'b', 'r', 'c', 'd']) 31 | 32 | >>> 'r' in letters 33 | True 34 | 35 | It is efficient to find the index of an entry in an OrderedSet, or find an 36 | entry by its index. To help with this use case, the `.add()` method returns 37 | the index of the added item, whether it was already in the set or not. 38 | 39 | >>> letters.index('r') 40 | 2 41 | 42 | >>> letters[2] 43 | 'r' 44 | 45 | >>> letters.add('r') 46 | 2 47 | 48 | >>> letters.add('x') 49 | 5 50 | 51 | OrderedSets implement the union (`|`), intersection (`&`), and difference (`-`) 52 | operators like sets do. 53 | 54 | >>> letters |= OrderedSet('shazam') 55 | 56 | >>> letters 57 | OrderedSet(['a', 'b', 'r', 'c', 'd', 'x', 's', 'h', 'z', 'm']) 58 | 59 | >>> letters & set('aeiou') 60 | OrderedSet(['a']) 61 | 62 | >>> letters -= 'abcd' 63 | 64 | >>> letters 65 | OrderedSet(['r', 'x', 's', 'h', 'z', 'm']) 66 | 67 | The `__getitem__()` and `index()` methods have been extended to accept any 68 | iterable except a string, returning a list, to perform NumPy-like "fancy 69 | indexing". 70 | 71 | >>> letters = OrderedSet('abracadabra') 72 | 73 | >>> letters[[0, 2, 3]] 74 | ['a', 'r', 'c'] 75 | 76 | >>> letters.index(['a', 'r', 'c']) 77 | [0, 2, 3] 78 | 79 | OrderedSet implements `__getstate__` and `__setstate__` so it can be pickled, 80 | and implements the abstract base classes `collections.MutableSet` and 81 | `collections.Sequence`. 82 | 83 | OrderedSet can be used as a generic collection type, similar to the collections 84 | in the `typing` module like List, Dict, and Set. For example, you can annotate 85 | a variable as having the type `OrderedSet[str]` or `OrderedSet[Tuple[int, 86 | str]]`. 87 | 88 | 89 | ## OrderedSet in data science applications 90 | 91 | An OrderedSet can be used as a bi-directional mapping between a sparse 92 | vocabulary and dense index numbers. As of version 3.1, it accepts NumPy arrays 93 | of index numbers as well as lists. 94 | 95 | This combination of features makes OrderedSet a simple implementation of many 96 | of the things that `pandas.Index` is used for, and many of its operations are 97 | faster than the equivalent pandas operations. 98 | 99 | For further compatibility with pandas.Index, `get_loc` (the pandas method for 100 | looking up a single index) and `get_indexer` (the pandas method for fancy 101 | indexing in reverse) are both aliases for `index` (which handles both cases 102 | in OrderedSet). 103 | 104 | 105 | ## Authors 106 | 107 | OrderedSet was implemented by Elia Robyn Lake (maiden name: Robyn Speer). 108 | Jon Crall contributed changes and tests to make it fit the Python set API. 109 | Roman Inflianskas added the original type annotations. 110 | 111 | 112 | ## Comparisons 113 | 114 | The original implementation of OrderedSet was a [recipe posted to ActiveState 115 | Recipes][recipe] by Raymond Hettiger, released under the MIT license. 116 | 117 | [recipe]: https://code.activestate.com/recipes/576694-orderedset/ 118 | 119 | Hettiger's implementation kept its content in a doubly-linked list referenced by a 120 | dict. As a result, looking up an item by its index was an O(N) operation, while 121 | deletion was O(1). 122 | 123 | This version makes different trade-offs for the sake of efficient lookups. Its 124 | content is a standard Python list instead of a doubly-linked list. This 125 | provides O(1) lookups by index at the expense of O(N) deletion, as well as 126 | slightly faster iteration. 127 | 128 | In Python 3.6 and later, the built-in `dict` type is inherently ordered. If you 129 | ignore the dictionary values, that also gives you a simple ordered set, with 130 | fast O(1) insertion, deletion, iteration and membership testing. However, `dict` 131 | does not provide the list-like random access features of OrderedSet. You 132 | would have to convert it to a list in O(N) to look up the index of an entry or 133 | look up an entry by its index. 134 | -------------------------------------------------------------------------------- /test/test_ordered_set.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools as it 3 | import operator 4 | import pickle 5 | import random 6 | import sys 7 | 8 | import pytest 9 | 10 | from ordered_set import OrderedSet 11 | 12 | 13 | def test_pickle(): 14 | set1 = OrderedSet("abracadabra") 15 | roundtrip = pickle.loads(pickle.dumps(set1)) 16 | assert roundtrip == set1 17 | 18 | 19 | def test_empty_pickle(): 20 | empty_oset = OrderedSet() 21 | empty_roundtrip = pickle.loads(pickle.dumps(empty_oset)) 22 | assert empty_roundtrip == empty_oset 23 | 24 | 25 | def test_order(): 26 | set1 = OrderedSet("abracadabra") 27 | assert len(set1) == 5 28 | assert set1 == OrderedSet(["a", "b", "r", "c", "d"]) 29 | assert list(reversed(set1)) == ["d", "c", "r", "b", "a"] 30 | 31 | 32 | def test_binary_operations(): 33 | set1 = OrderedSet("abracadabra") 34 | set2 = OrderedSet("simsalabim") 35 | assert set1 != set2 36 | 37 | assert set1 & set2 == OrderedSet(["a", "b"]) 38 | assert set1 | set2 == OrderedSet(["a", "b", "r", "c", "d", "s", "i", "m", "l"]) 39 | assert set1 - set2 == OrderedSet(["r", "c", "d"]) 40 | 41 | 42 | def test_indexing(): 43 | set1 = OrderedSet("abracadabra") 44 | assert set1[:] == set1 45 | assert set1.copy() == set1 46 | assert set1 is set1 47 | assert set1[:] is not set1 48 | assert set1.copy() is not set1 49 | 50 | assert set1[[1, 2]] == OrderedSet(["b", "r"]) 51 | assert set1[1:3] == OrderedSet(["b", "r"]) 52 | assert set1.index("b") == 1 53 | assert set1.index(["b", "r"]) == [1, 2] 54 | with pytest.raises(KeyError): 55 | set1.index("br") 56 | 57 | 58 | class FancyIndexTester: 59 | """ 60 | Make sure we can index by a NumPy ndarray, without having to import 61 | NumPy. 62 | """ 63 | 64 | def __init__(self, indices): 65 | self.indices = indices 66 | 67 | def __iter__(self): 68 | return iter(self.indices) 69 | 70 | def __index__(self): 71 | raise TypeError("NumPy arrays have weird __index__ methods") 72 | 73 | def __eq__(self, other): 74 | # Emulate NumPy being fussy about the == operator 75 | raise TypeError 76 | 77 | 78 | def test_fancy_index_class(): 79 | set1 = OrderedSet("abracadabra") 80 | indexer = FancyIndexTester([1, 0, 4, 3, 0, 2]) 81 | assert "".join(set1[indexer]) == "badcar" 82 | 83 | 84 | def test_pandas_compat(): 85 | set1 = OrderedSet("abracadabra") 86 | assert set1.get_loc("b") == 1 87 | assert set1.get_indexer(["b", "r"]) == [1, 2] 88 | 89 | 90 | def test_tuples(): 91 | set1 = OrderedSet() 92 | tup = ("tuple", 1) 93 | set1.add(tup) 94 | assert set1.index(tup) == 0 95 | assert set1[0] == tup 96 | 97 | 98 | def test_remove(): 99 | set1 = OrderedSet("abracadabra") 100 | 101 | set1.remove("a") 102 | set1.remove("b") 103 | 104 | assert set1 == OrderedSet("rcd") 105 | assert set1[0] == "r" 106 | assert set1[1] == "c" 107 | assert set1[2] == "d" 108 | 109 | assert set1.index("r") == 0 110 | assert set1.index("c") == 1 111 | assert set1.index("d") == 2 112 | 113 | assert "a" not in set1 114 | assert "b" not in set1 115 | assert "r" in set1 116 | 117 | # Make sure we can .discard() something that's already gone, plus 118 | # something that was never there 119 | set1.discard("a") 120 | set1.discard("a") 121 | 122 | 123 | def test_remove_error(): 124 | # If we .remove() an element that's not there, we get a KeyError 125 | set1 = OrderedSet("abracadabra") 126 | with pytest.raises(KeyError): 127 | set1.remove("z") 128 | 129 | 130 | def test_clear(): 131 | set1 = OrderedSet("abracadabra") 132 | set1.clear() 133 | 134 | assert len(set1) == 0 135 | assert set1 == OrderedSet() 136 | 137 | 138 | def test_update(): 139 | set1 = OrderedSet("abcd") 140 | result = set1.update("efgh") 141 | 142 | assert result == 7 143 | assert len(set1) == 8 144 | assert "".join(set1) == "abcdefgh" 145 | 146 | set2 = OrderedSet("abcd") 147 | result = set2.update("cdef") 148 | assert result == 5 149 | assert len(set2) == 6 150 | assert "".join(set2) == "abcdef" 151 | 152 | 153 | def test_pop(): 154 | set1 = OrderedSet("ab") 155 | elem = set1.pop() 156 | 157 | assert elem == "b" 158 | elem = set1.pop() 159 | 160 | assert elem == "a" 161 | 162 | pytest.raises(KeyError, set1.pop) 163 | 164 | 165 | def test_getitem_type_error(): 166 | set1 = OrderedSet("ab") 167 | with pytest.raises(TypeError): 168 | set1["a"] 169 | 170 | 171 | def test_update_value_error(): 172 | set1 = OrderedSet("ab") 173 | with pytest.raises(ValueError): 174 | # noinspection PyTypeChecker 175 | set1.update(3) 176 | 177 | 178 | def test_empty_repr(): 179 | set1 = OrderedSet() 180 | assert repr(set1) == "OrderedSet()" 181 | 182 | 183 | def test_eq_wrong_type(): 184 | set1 = OrderedSet() 185 | assert set1 != 2 186 | 187 | 188 | def test_ordered_equality(): 189 | # Ordered set checks order against sequences. 190 | assert OrderedSet([1, 2]) == OrderedSet([1, 2]) 191 | assert OrderedSet([1, 2]) == [1, 2] 192 | assert OrderedSet([1, 2]) == (1, 2) 193 | assert OrderedSet([1, 2]) == collections.deque([1, 2]) 194 | 195 | 196 | def test_ordered_inequality(): 197 | # Ordered set checks order against sequences. 198 | assert OrderedSet([1, 2]) != OrderedSet([2, 1]) 199 | 200 | assert OrderedSet([1, 2]) != [2, 1] 201 | assert OrderedSet([1, 2]) != [2, 1, 1] 202 | 203 | assert OrderedSet([1, 2]) != (2, 1) 204 | assert OrderedSet([1, 2]) != (2, 1, 1) 205 | 206 | # Note: in Python 2.7 deque does not inherit from Sequence, but __eq__ 207 | # contains an explicit check for this case for python 2/3 compatibility. 208 | assert OrderedSet([1, 2]) != collections.deque([2, 1]) 209 | assert OrderedSet([1, 2]) != collections.deque([2, 2, 1]) 210 | 211 | 212 | def test_comparisons(): 213 | # Comparison operators on sets actually test for subset and superset. 214 | assert OrderedSet([1, 2]) < OrderedSet([1, 2, 3]) 215 | assert OrderedSet([1, 2]) > OrderedSet([1]) 216 | 217 | # MutableSet subclasses aren't comparable to set on 3.3. 218 | if tuple(sys.version_info) >= (3, 4): 219 | assert OrderedSet([1, 2]) > {1} 220 | 221 | 222 | def test_unordered_equality(): 223 | # Unordered set checks order against non-sequences. 224 | assert OrderedSet([1, 2]) == {1, 2} 225 | assert OrderedSet([1, 2]) == frozenset([2, 1]) 226 | 227 | assert OrderedSet([1, 2]) == {1: "a", 2: "b"} 228 | assert OrderedSet([1, 2]) == {1: 1, 2: 2}.keys() 229 | assert OrderedSet([1, 2]) == {1: 1, 2: 2}.values() 230 | 231 | # Corner case: OrderedDict is not a Sequence, so we don't check for order, 232 | # even though it does have the concept of order. 233 | assert OrderedSet([1, 2]) == collections.OrderedDict([(2, 2), (1, 1)]) 234 | 235 | # Corner case: We have to treat iterators as unordered because there 236 | # is nothing to distinguish an ordered and unordered iterator 237 | assert OrderedSet([1, 2]) == iter([1, 2]) 238 | assert OrderedSet([1, 2]) == iter([2, 1]) 239 | assert OrderedSet([1, 2]) == iter([2, 1, 1]) 240 | 241 | 242 | def test_unordered_inequality(): 243 | assert OrderedSet([1, 2]) != set([]) 244 | assert OrderedSet([1, 2]) != frozenset([2, 1, 3]) 245 | 246 | assert OrderedSet([1, 2]) != {2: "b"} 247 | assert OrderedSet([1, 2]) != {1: 1, 4: 2}.keys() 248 | assert OrderedSet([1, 2]) != {1: 1, 2: 3}.values() 249 | 250 | # Corner case: OrderedDict is not a Sequence, so we don't check for order, 251 | # even though it does have the concept of order. 252 | assert OrderedSet([1, 2]) != collections.OrderedDict([(2, 2), (3, 1)]) 253 | 254 | 255 | def allsame_(iterable, eq=operator.eq): 256 | """returns True of all items in iterable equal each other""" 257 | iter_ = iter(iterable) 258 | try: 259 | first = next(iter_) 260 | except StopIteration: 261 | return True 262 | return all(eq(first, item) for item in iter_) 263 | 264 | 265 | def check_results_(results, datas, name): 266 | """ 267 | helper for binary operator tests. 268 | 269 | check that all results have the same value, but are different items. 270 | data and name are used to indicate what sort of tests is run. 271 | """ 272 | if not allsame_(results): 273 | raise AssertionError( 274 | "Not all same {} for {} with datas={}".format(results, name, datas) 275 | ) 276 | for a, b in it.combinations(results, 2): 277 | if not isinstance(a, (bool, int)): 278 | assert a is not b, name + " should all be different items" 279 | 280 | 281 | def _operator_consistency_testdata(): 282 | """ 283 | Predefined and random data used to test operator consistency. 284 | """ 285 | # test case 1 286 | data1 = OrderedSet([5, 3, 1, 4]) 287 | data2 = OrderedSet([1, 4]) 288 | yield data1, data2 289 | 290 | # first set is empty 291 | data1 = OrderedSet([]) 292 | data2 = OrderedSet([3, 1, 2]) 293 | yield data1, data2 294 | 295 | # second set is empty 296 | data1 = OrderedSet([3, 1, 2]) 297 | data2 = OrderedSet([]) 298 | yield data1, data2 299 | 300 | # both sets are empty 301 | data1 = OrderedSet([]) 302 | data2 = OrderedSet([]) 303 | yield data1, data2 304 | 305 | # random test cases 306 | rng = random.Random(0) 307 | a, b = 20, 20 308 | for _ in range(10): 309 | data1 = OrderedSet(rng.randint(0, a) for _ in range(b)) 310 | data2 = OrderedSet(rng.randint(0, a) for _ in range(b)) 311 | yield data1, data2 312 | yield data2, data1 313 | 314 | 315 | def test_operator_consistency_isect(): 316 | for data1, data2 in _operator_consistency_testdata(): 317 | result1 = data1.copy() 318 | result1.intersection_update(data2) 319 | result2 = data1 & data2 320 | result3 = data1.intersection(data2) 321 | check_results_([result1, result2, result3], datas=(data1, data2), name="isect") 322 | 323 | 324 | def test_operator_consistency_difference(): 325 | for data1, data2 in _operator_consistency_testdata(): 326 | result1 = data1.copy() 327 | result1.difference_update(data2) 328 | result2 = data1 - data2 329 | result3 = data1.difference(data2) 330 | check_results_( 331 | [result1, result2, result3], datas=(data1, data2), name="difference" 332 | ) 333 | 334 | 335 | def test_operator_consistency_xor(): 336 | for data1, data2 in _operator_consistency_testdata(): 337 | result1 = data1.copy() 338 | result1.symmetric_difference_update(data2) 339 | result2 = data1 ^ data2 340 | result3 = data1.symmetric_difference(data2) 341 | check_results_([result1, result2, result3], datas=(data1, data2), name="xor") 342 | 343 | 344 | def test_operator_consistency_union(): 345 | for data1, data2 in _operator_consistency_testdata(): 346 | result1 = data1.copy() 347 | result1.update(data2) 348 | result2 = data1 | data2 349 | result3 = data1.union(data2) 350 | check_results_([result1, result2, result3], datas=(data1, data2), name="union") 351 | 352 | 353 | def test_operator_consistency_subset(): 354 | for data1, data2 in _operator_consistency_testdata(): 355 | result1 = data1 <= data2 356 | result2 = data1.issubset(data2) 357 | result3 = set(data1).issubset(set(data2)) 358 | check_results_([result1, result2, result3], datas=(data1, data2), name="subset") 359 | 360 | 361 | def test_operator_consistency_superset(): 362 | for data1, data2 in _operator_consistency_testdata(): 363 | result1 = data1 >= data2 364 | result2 = data1.issuperset(data2) 365 | result3 = set(data1).issuperset(set(data2)) 366 | check_results_( 367 | [result1, result2, result3], datas=(data1, data2), name="superset" 368 | ) 369 | 370 | 371 | def test_operator_consistency_disjoint(): 372 | for data1, data2 in _operator_consistency_testdata(): 373 | result1 = data1.isdisjoint(data2) 374 | result2 = len(data1.intersection(data2)) == 0 375 | check_results_([result1, result2], datas=(data1, data2), name="disjoint") 376 | 377 | 378 | def test_bitwise_and_consistency(): 379 | # Specific case that was failing without explicit __and__ definition 380 | data1 = OrderedSet([12, 13, 1, 8, 16, 15, 9, 11, 18, 6, 4, 3, 19, 17]) 381 | data2 = OrderedSet([19, 4, 9, 3, 2, 10, 15, 17, 11, 13, 20, 6, 14, 16, 8]) 382 | result1 = data1.copy() 383 | result1.intersection_update(data2) 384 | # This requires a custom & operation apparently 385 | result2 = data1 & data2 386 | result3 = data1.intersection(data2) 387 | check_results_([result1, result2, result3], datas=(data1, data2), name="isect") 388 | -------------------------------------------------------------------------------- /ordered_set/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | An OrderedSet is a custom MutableSet that remembers its order, so that every 3 | entry has an index that can be looked up. It can also act like a Sequence. 4 | 5 | Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger, 6 | and released under the MIT license. 7 | """ 8 | import itertools as it 9 | from typing import ( 10 | Any, 11 | Dict, 12 | Iterable, 13 | Iterator, 14 | List, 15 | MutableSet, 16 | AbstractSet, 17 | Sequence, 18 | Set, 19 | TypeVar, 20 | Union, 21 | overload, 22 | ) 23 | 24 | SLICE_ALL = slice(None) 25 | __version__ = "4.1.1" 26 | 27 | 28 | T = TypeVar("T", covariant=True) 29 | 30 | # SetLike[T] is either a set of elements of type T, or a sequence, which 31 | # we will convert to an OrderedSet by adding its elements in order. 32 | SetLike = Union[AbstractSet[T], Sequence[T]] 33 | OrderedSetInitializer = Union[AbstractSet[T], Sequence[T], Iterable[T]] 34 | 35 | 36 | def _is_atomic(obj: object) -> bool: 37 | """ 38 | Returns True for objects which are iterable but should not be iterated in 39 | the context of indexing an OrderedSet. 40 | 41 | When we index by an iterable, usually that means we're being asked to look 42 | up a list of things. 43 | 44 | However, in the case of the .index() method, we shouldn't handle strings 45 | and tuples like other iterables. They're not sequences of things to look 46 | up, they're the single, atomic thing we're trying to find. 47 | 48 | As an example, oset.index('hello') should give the index of 'hello' in an 49 | OrderedSet of strings. It shouldn't give the indexes of each individual 50 | character. 51 | """ 52 | return isinstance(obj, (str, tuple)) 53 | 54 | 55 | class OrderedSet(MutableSet[T], Sequence[T]): 56 | """ 57 | An OrderedSet is a custom MutableSet that remembers its order, so that 58 | every entry has an index that can be looked up. 59 | 60 | Example: 61 | >>> OrderedSet([1, 1, 2, 3, 2]) 62 | OrderedSet([1, 2, 3]) 63 | """ 64 | 65 | def __init__(self, initial: OrderedSetInitializer[T] = None): 66 | self.items: List[T] = [] 67 | self.map: Dict[T, int] = {} 68 | if initial is not None: 69 | # In terms of duck-typing, the default __ior__ is compatible with 70 | # the types we use, but it doesn't expect all the types we 71 | # support as values for `initial`. 72 | self |= initial # type: ignore 73 | 74 | def __len__(self) -> int: 75 | """ 76 | Returns the number of unique elements in the ordered set 77 | 78 | Example: 79 | >>> len(OrderedSet([])) 80 | 0 81 | >>> len(OrderedSet([1, 2])) 82 | 2 83 | """ 84 | return len(self.items) 85 | 86 | @overload 87 | def __getitem__(self, index: slice) -> "OrderedSet[T]": 88 | ... 89 | 90 | @overload 91 | def __getitem__(self, index: Sequence[int]) -> List[T]: 92 | ... 93 | 94 | @overload 95 | def __getitem__(self, index: int) -> T: 96 | ... 97 | 98 | # concrete implementation 99 | def __getitem__(self, index): 100 | """ 101 | Get the item at a given index. 102 | 103 | If `index` is a slice, you will get back that slice of items, as a 104 | new OrderedSet. 105 | 106 | If `index` is a list or a similar iterable, you'll get a list of 107 | items corresponding to those indices. This is similar to NumPy's 108 | "fancy indexing". The result is not an OrderedSet because you may ask 109 | for duplicate indices, and the number of elements returned should be 110 | the number of elements asked for. 111 | 112 | Example: 113 | >>> oset = OrderedSet([1, 2, 3]) 114 | >>> oset[1] 115 | 2 116 | """ 117 | if isinstance(index, slice) and index == SLICE_ALL: 118 | return self.copy() 119 | elif isinstance(index, Iterable): 120 | return [self.items[i] for i in index] 121 | elif isinstance(index, slice) or hasattr(index, "__index__"): 122 | result = self.items[index] 123 | if isinstance(result, list): 124 | return self.__class__(result) 125 | else: 126 | return result 127 | else: 128 | raise TypeError("Don't know how to index an OrderedSet by %r" % index) 129 | 130 | def copy(self) -> "OrderedSet[T]": 131 | """ 132 | Return a shallow copy of this object. 133 | 134 | Example: 135 | >>> this = OrderedSet([1, 2, 3]) 136 | >>> other = this.copy() 137 | >>> this == other 138 | True 139 | >>> this is other 140 | False 141 | """ 142 | return self.__class__(self) 143 | 144 | # Define the gritty details of how an OrderedSet is serialized as a pickle. 145 | # We leave off type annotations, because the only code that should interact 146 | # with these is a generalized tool such as pickle. 147 | def __getstate__(self): 148 | if len(self) == 0: 149 | # In pickle, the state can't be an empty list. 150 | # We need to return a truthy value, or else __setstate__ won't be run. 151 | # 152 | # This could have been done more gracefully by always putting the state 153 | # in a tuple, but this way is backwards- and forwards- compatible with 154 | # previous versions of OrderedSet. 155 | return (None,) 156 | else: 157 | return list(self) 158 | 159 | def __setstate__(self, state): 160 | if state == (None,): 161 | self.__init__([]) 162 | else: 163 | self.__init__(state) 164 | 165 | def __contains__(self, key: object) -> bool: 166 | """ 167 | Test if the item is in this ordered set. 168 | 169 | Example: 170 | >>> 1 in OrderedSet([1, 3, 2]) 171 | True 172 | >>> 5 in OrderedSet([1, 3, 2]) 173 | False 174 | """ 175 | return key in self.map 176 | 177 | # Technically type-incompatible with MutableSet, because we return an 178 | # int instead of nothing. This is also one of the things that makes 179 | # OrderedSet convenient to use. 180 | def add(self, key: T) -> int: 181 | """ 182 | Add `key` as an item to this OrderedSet, then return its index. 183 | 184 | If `key` is already in the OrderedSet, return the index it already 185 | had. 186 | 187 | Example: 188 | >>> oset = OrderedSet() 189 | >>> oset.append(3) 190 | 0 191 | >>> print(oset) 192 | OrderedSet([3]) 193 | """ 194 | if key not in self.map: 195 | self.map[key] = len(self.items) 196 | self.items.append(key) 197 | return self.map[key] 198 | 199 | append = add 200 | 201 | def update(self, sequence: SetLike[T]) -> int: 202 | """ 203 | Update the set with the given iterable sequence, then return the index 204 | of the last element inserted. 205 | 206 | Example: 207 | >>> oset = OrderedSet([1, 2, 3]) 208 | >>> oset.update([3, 1, 5, 1, 4]) 209 | 4 210 | >>> print(oset) 211 | OrderedSet([1, 2, 3, 5, 4]) 212 | """ 213 | item_index = 0 214 | try: 215 | for item in sequence: 216 | item_index = self.add(item) 217 | except TypeError: 218 | raise ValueError(f"Argument needs to be an iterable, got {type(sequence)}") 219 | return item_index 220 | 221 | @overload 222 | def index(self, key: Sequence[T]) -> List[int]: 223 | ... 224 | 225 | @overload 226 | def index(self, key: T) -> int: 227 | ... 228 | 229 | # concrete implementation 230 | def index(self, key): 231 | """ 232 | Get the index of a given entry, raising an IndexError if it's not 233 | present. 234 | 235 | `key` can be an iterable of entries that is not a string, in which case 236 | this returns a list of indices. 237 | 238 | Example: 239 | >>> oset = OrderedSet([1, 2, 3]) 240 | >>> oset.index(2) 241 | 1 242 | """ 243 | if isinstance(key, Iterable) and not _is_atomic(key): 244 | return [self.index(subkey) for subkey in key] 245 | return self.map[key] 246 | 247 | # Provide some compatibility with pd.Index 248 | get_loc = index 249 | get_indexer = index 250 | 251 | def pop(self, index: int = -1) -> T: 252 | """ 253 | Remove and return item at index (default last). 254 | 255 | Raises KeyError if the set is empty. 256 | Raises IndexError if index is out of range. 257 | 258 | Example: 259 | >>> oset = OrderedSet([1, 2, 3]) 260 | >>> oset.pop() 261 | 3 262 | """ 263 | if not self.items: 264 | raise KeyError("Set is empty") 265 | 266 | elem = self.items[index] 267 | del self.items[index] 268 | del self.map[elem] 269 | return elem 270 | 271 | def discard(self, key: T) -> None: 272 | """ 273 | Remove an element. Do not raise an exception if absent. 274 | 275 | The MutableSet mixin uses this to implement the .remove() method, which 276 | *does* raise an error when asked to remove a non-existent item. 277 | 278 | Example: 279 | >>> oset = OrderedSet([1, 2, 3]) 280 | >>> oset.discard(2) 281 | >>> print(oset) 282 | OrderedSet([1, 3]) 283 | >>> oset.discard(2) 284 | >>> print(oset) 285 | OrderedSet([1, 3]) 286 | """ 287 | if key in self: 288 | i = self.map[key] 289 | del self.items[i] 290 | del self.map[key] 291 | for k, v in self.map.items(): 292 | if v >= i: 293 | self.map[k] = v - 1 294 | 295 | def clear(self) -> None: 296 | """ 297 | Remove all items from this OrderedSet. 298 | """ 299 | del self.items[:] 300 | self.map.clear() 301 | 302 | def __iter__(self) -> Iterator[T]: 303 | """ 304 | Example: 305 | >>> list(iter(OrderedSet([1, 2, 3]))) 306 | [1, 2, 3] 307 | """ 308 | return iter(self.items) 309 | 310 | def __reversed__(self) -> Iterator[T]: 311 | """ 312 | Example: 313 | >>> list(reversed(OrderedSet([1, 2, 3]))) 314 | [3, 2, 1] 315 | """ 316 | return reversed(self.items) 317 | 318 | def __repr__(self) -> str: 319 | if not self: 320 | return f"{self.__class__.__name__}()" 321 | return f"{self.__class__.__name__}({list(self)!r})" 322 | 323 | def __eq__(self, other: object) -> bool: 324 | """ 325 | Returns true if the containers have the same items. If `other` is a 326 | Sequence, then order is checked, otherwise it is ignored. 327 | 328 | Example: 329 | >>> oset = OrderedSet([1, 3, 2]) 330 | >>> oset == [1, 3, 2] 331 | True 332 | >>> oset == [1, 2, 3] 333 | False 334 | >>> oset == [2, 3] 335 | False 336 | >>> oset == OrderedSet([3, 2, 1]) 337 | False 338 | """ 339 | if isinstance(other, Sequence): 340 | # Check that this OrderedSet contains the same elements, in the 341 | # same order, as the other object. 342 | return list(self) == list(other) 343 | try: 344 | other_as_set = set(other) 345 | except TypeError: 346 | # If `other` can't be converted into a set, it's not equal. 347 | return False 348 | else: 349 | return set(self) == other_as_set 350 | 351 | def union(self, *sets: SetLike[T]) -> "OrderedSet[T]": 352 | """ 353 | Combines all unique items. 354 | Each items order is defined by its first appearance. 355 | 356 | Example: 357 | >>> oset = OrderedSet.union(OrderedSet([3, 1, 4, 1, 5]), [1, 3], [2, 0]) 358 | >>> print(oset) 359 | OrderedSet([3, 1, 4, 5, 2, 0]) 360 | >>> oset.union([8, 9]) 361 | OrderedSet([3, 1, 4, 5, 2, 0, 8, 9]) 362 | >>> oset | {10} 363 | OrderedSet([3, 1, 4, 5, 2, 0, 10]) 364 | """ 365 | cls: type = OrderedSet 366 | if isinstance(self, OrderedSet): 367 | cls = self.__class__ 368 | containers = map(list, it.chain([self], sets)) 369 | items = it.chain.from_iterable(containers) 370 | return cls(items) 371 | 372 | def __and__(self, other: SetLike[T]) -> "OrderedSet[T]": 373 | # the parent implementation of this is backwards 374 | return self.intersection(other) 375 | 376 | def intersection(self, *sets: SetLike[T]) -> "OrderedSet[T]": 377 | """ 378 | Returns elements in common between all sets. Order is defined only 379 | by the first set. 380 | 381 | Example: 382 | >>> oset = OrderedSet.intersection(OrderedSet([0, 1, 2, 3]), [1, 2, 3]) 383 | >>> print(oset) 384 | OrderedSet([1, 2, 3]) 385 | >>> oset.intersection([2, 4, 5], [1, 2, 3, 4]) 386 | OrderedSet([2]) 387 | >>> oset.intersection() 388 | OrderedSet([1, 2, 3]) 389 | """ 390 | cls: type = OrderedSet 391 | items: OrderedSetInitializer[T] = self 392 | if isinstance(self, OrderedSet): 393 | cls = self.__class__ 394 | if sets: 395 | common = set.intersection(*map(set, sets)) 396 | items = (item for item in self if item in common) 397 | return cls(items) 398 | 399 | def difference(self, *sets: SetLike[T]) -> "OrderedSet[T]": 400 | """ 401 | Returns all elements that are in this set but not the others. 402 | 403 | Example: 404 | >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2])) 405 | OrderedSet([1, 3]) 406 | >>> OrderedSet([1, 2, 3]).difference(OrderedSet([2]), OrderedSet([3])) 407 | OrderedSet([1]) 408 | >>> OrderedSet([1, 2, 3]) - OrderedSet([2]) 409 | OrderedSet([1, 3]) 410 | >>> OrderedSet([1, 2, 3]).difference() 411 | OrderedSet([1, 2, 3]) 412 | """ 413 | cls = self.__class__ 414 | items: OrderedSetInitializer[T] = self 415 | if sets: 416 | other = set.union(*map(set, sets)) 417 | items = (item for item in self if item not in other) 418 | return cls(items) 419 | 420 | def issubset(self, other: SetLike[T]) -> bool: 421 | """ 422 | Report whether another set contains this set. 423 | 424 | Example: 425 | >>> OrderedSet([1, 2, 3]).issubset({1, 2}) 426 | False 427 | >>> OrderedSet([1, 2, 3]).issubset({1, 2, 3, 4}) 428 | True 429 | >>> OrderedSet([1, 2, 3]).issubset({1, 4, 3, 5}) 430 | False 431 | """ 432 | if len(self) > len(other): # Fast check for obvious cases 433 | return False 434 | return all(item in other for item in self) 435 | 436 | def issuperset(self, other: SetLike[T]) -> bool: 437 | """ 438 | Report whether this set contains another set. 439 | 440 | Example: 441 | >>> OrderedSet([1, 2]).issuperset([1, 2, 3]) 442 | False 443 | >>> OrderedSet([1, 2, 3, 4]).issuperset({1, 2, 3}) 444 | True 445 | >>> OrderedSet([1, 4, 3, 5]).issuperset({1, 2, 3}) 446 | False 447 | """ 448 | if len(self) < len(other): # Fast check for obvious cases 449 | return False 450 | return all(item in self for item in other) 451 | 452 | def symmetric_difference(self, other: SetLike[T]) -> "OrderedSet[T]": 453 | """ 454 | Return the symmetric difference of two OrderedSets as a new set. 455 | That is, the new set will contain all elements that are in exactly 456 | one of the sets. 457 | 458 | Their order will be preserved, with elements from `self` preceding 459 | elements from `other`. 460 | 461 | Example: 462 | >>> this = OrderedSet([1, 4, 3, 5, 7]) 463 | >>> other = OrderedSet([9, 7, 1, 3, 2]) 464 | >>> this.symmetric_difference(other) 465 | OrderedSet([4, 5, 9, 2]) 466 | """ 467 | cls: type = OrderedSet 468 | if isinstance(self, OrderedSet): 469 | cls = self.__class__ 470 | diff1 = cls(self).difference(other) 471 | diff2 = cls(other).difference(self) 472 | return diff1.union(diff2) 473 | 474 | def _update_items(self, items: list) -> None: 475 | """ 476 | Replace the 'items' list of this OrderedSet with a new one, updating 477 | self.map accordingly. 478 | """ 479 | self.items = items 480 | self.map = {item: idx for (idx, item) in enumerate(items)} 481 | 482 | def difference_update(self, *sets: SetLike[T]) -> None: 483 | """ 484 | Update this OrderedSet to remove items from one or more other sets. 485 | 486 | Example: 487 | >>> this = OrderedSet([1, 2, 3]) 488 | >>> this.difference_update(OrderedSet([2, 4])) 489 | >>> print(this) 490 | OrderedSet([1, 3]) 491 | 492 | >>> this = OrderedSet([1, 2, 3, 4, 5]) 493 | >>> this.difference_update(OrderedSet([2, 4]), OrderedSet([1, 4, 6])) 494 | >>> print(this) 495 | OrderedSet([3, 5]) 496 | """ 497 | items_to_remove = set() # type: Set[T] 498 | for other in sets: 499 | items_as_set = set(other) # type: Set[T] 500 | items_to_remove |= items_as_set 501 | self._update_items([item for item in self.items if item not in items_to_remove]) 502 | 503 | def intersection_update(self, other: SetLike[T]) -> None: 504 | """ 505 | Update this OrderedSet to keep only items in another set, preserving 506 | their order in this set. 507 | 508 | Example: 509 | >>> this = OrderedSet([1, 4, 3, 5, 7]) 510 | >>> other = OrderedSet([9, 7, 1, 3, 2]) 511 | >>> this.intersection_update(other) 512 | >>> print(this) 513 | OrderedSet([1, 3, 7]) 514 | """ 515 | other = set(other) 516 | self._update_items([item for item in self.items if item in other]) 517 | 518 | def symmetric_difference_update(self, other: SetLike[T]) -> None: 519 | """ 520 | Update this OrderedSet to remove items from another set, then 521 | add items from the other set that were not present in this set. 522 | 523 | Example: 524 | >>> this = OrderedSet([1, 4, 3, 5, 7]) 525 | >>> other = OrderedSet([9, 7, 1, 3, 2]) 526 | >>> this.symmetric_difference_update(other) 527 | >>> print(this) 528 | OrderedSet([4, 5, 9, 2]) 529 | """ 530 | items_to_add = [item for item in other if item not in self] 531 | items_to_remove = set(other) 532 | self._update_items( 533 | [item for item in self.items if item not in items_to_remove] + items_to_add 534 | ) 535 | --------------------------------------------------------------------------------