├── .gitignore ├── README.md ├── pyproject.toml ├── src └── pyhashmaps │ ├── __init__.py │ ├── base.py │ ├── chains.py │ ├── open_addressing.py │ ├── py.typed │ └── separate_chaining.py └── tests ├── __init__.py ├── base_test_file.py ├── test_open_addressing_hashmap.py └── test_separate_chaining_hashmap.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # IDE settings 105 | .vscode/ 106 | .idea/ 107 | 108 | # handmade 109 | .DS_Store -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | `pyhashmaps` is a fully annotated Python package which has several functional hashmap classes for educational purposes. 4 | 5 | All classes support common operations on [ `MutableMapping` ](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping) type. 6 | 7 | Here are the relationships between components of this package: 8 | 9 | ```mermaid 10 | classDiagram 11 | class MutableMapping 12 | class BaseHM 13 | 14 | class OpenAddressingHM {list slots} 15 | class LinearProbingHM 16 | class QuadraticProbingHM 17 | class DoubleHashingHM 18 | 19 | class SeparateChainingHM {list[Chain] slots} 20 | class DynamicArrayHM {list[DynamicArray] slots} 21 | class LinkedListHM {list[LinkedList] slots} 22 | class BSTHM {list[BinarySearchTree] slots} 23 | 24 | MutableMapping <|-- BaseHM 25 | BaseHM <|-- OpenAddressingHM 26 | BaseHM <|-- SeparateChainingHM 27 | OpenAddressingHM <|-- LinearProbingHM 28 | OpenAddressingHM <|-- QuadraticProbingHM 29 | OpenAddressingHM <|-- DoubleHashingHM 30 | SeparateChainingHM <|-- DynamicArrayHM 31 | SeparateChainingHM <|-- LinkedListHM 32 | SeparateChainingHM <|-- BSTHM 33 | ``` 34 | 35 | ```mermaid 36 | classDiagram 37 | class Chain { 38 | <> 39 | find() 40 | insert() 41 | delete() 42 | } 43 | class DynamicArray 44 | class LinkedList 45 | class BinarySearchTree 46 | 47 | Chain <|-- DynamicArray 48 | Chain <|-- LinkedList 49 | Chain <|-- BinarySearchTree 50 | ``` 51 | 52 | # Requirements 53 | 54 | It's tested on Python 3.10 & 3.11. 55 | 56 | # Installation 57 | 58 | If you have `git` installed: 59 | 60 | ```none 61 | pip install git+https://github.com/amirsoroush/Python_Hashmaps.git 62 | ``` 63 | 64 | Otherwise: 65 | 66 | ```none 67 | pip install https://github.com/amirsoroush/Python_Hashmaps/tarball/main 68 | ``` 69 | 70 | # Usage 71 | 72 | It has the same interface as the built-in `dict` class. 73 | 74 | ```python 75 | >>> from pyhashmaps import ( 76 | ... LinearProbingHashMap, 77 | ... QuadraticProbingHashMap, 78 | ... DoubleHashingHashMap, 79 | ... DynamicArrayHashMap, 80 | ... LinkedListHashMap, 81 | ... BSTHashMap, 82 | ... ) 83 | >>> 84 | >>> hashmap = LinearProbingHashMap() 85 | >>> hashmap = LinearProbingHashMap[str, int]() 86 | >>> hashmap["a"] = 10 87 | >>> hashmap 88 | LinearProbingHashMap({'a': 10}) 89 | >>> hashmap.update({"b": 20, "c": 30}) 90 | >>> len(hashmap) 91 | 3 92 | >>> for k, v in hashmap.items(): 93 | ... print(k, v) 94 | ... 95 | c 30 96 | a 10 97 | b 20 98 | >>> hashmap.clear() 99 | >>> 100 | ``` 101 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pyhashmaps" 3 | version = "1.0.0" 4 | readme = "README.md" 5 | authors = [{ name = "amirsoroush", email = "amirsoroush.dev@gmail.com" }] 6 | 7 | [tool.setuptools] 8 | package-dir = {"" = "src"} 9 | 10 | [tool.setuptools.package-data] 11 | "pyhashmaps" = ["py.typed"] 12 | 13 | [tool.setuptools.packages.find] 14 | where = ["src"] -------------------------------------------------------------------------------- /src/pyhashmaps/__init__.py: -------------------------------------------------------------------------------- 1 | from .open_addressing import ( 2 | DoubleHashingHashMap, 3 | LinearProbingHashMap, 4 | QuadraticProbingHashMap, 5 | ) 6 | from .separate_chaining import BSTHashMap, DynamicArrayHashMap, LinkedListHashMap 7 | 8 | __all__ = [ 9 | "DoubleHashingHashMap", 10 | "LinearProbingHashMap", 11 | "QuadraticProbingHashMap", 12 | "BSTHashMap", 13 | "DynamicArrayHashMap", 14 | "LinkedListHashMap", 15 | ] 16 | -------------------------------------------------------------------------------- /src/pyhashmaps/base.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | from collections.abc import Hashable, Iterable, Iterator, Mapping, MutableMapping 3 | from dataclasses import dataclass 4 | from typing import Any, Generic, Protocol, TypeVar 5 | 6 | 7 | class Comparable(Hashable, Protocol): 8 | """Protocol for annotating comparable and Hashable types.""" 9 | 10 | def __lt__(self, other: Any) -> bool: 11 | ... 12 | 13 | 14 | K = TypeVar("K", bound=Hashable) 15 | Comp_K = TypeVar("Comp_K", bound=Comparable) 16 | V = TypeVar("V") 17 | HashMapArgument = Mapping[K, V] | Iterable[tuple[K, V]] 18 | 19 | 20 | def is_same(k1: K, k2: K) -> bool: 21 | """ 22 | Check whether two keys are the same or not. 23 | 24 | It's consistent with how Python itself compares keys in hashtables. 25 | """ 26 | return k1 is k2 or k1 == k2 27 | 28 | 29 | @dataclass(slots=True) 30 | class HashEntry(Generic[K, V]): 31 | hash_value: int 32 | key: K 33 | value: V 34 | 35 | 36 | class Chain(Protocol[K, V]): 37 | """ 38 | A protocol for classes which are intended to be used as the underlying 39 | data structure for storing objects in 'separate chaining' method. 40 | """ 41 | 42 | def __iter__(self) -> Iterator[HashEntry[K, V]]: 43 | ... 44 | 45 | def __len__(self) -> int: 46 | ... 47 | 48 | def find(self, key: K, hash_: int) -> HashEntry[K, V]: 49 | ... 50 | 51 | def insert(self, item: HashEntry[K, V]) -> None: 52 | ... 53 | 54 | def delete(self, key: K, hash_: int) -> None: 55 | ... 56 | 57 | def append_at_end(self, item: HashEntry[K, V]) -> None: 58 | """ 59 | Append the `item` at the end. 60 | 61 | `.append_at_end()` is a faster route than `.insert()` to insert items when 62 | we no longer have to check the existing items - when all items are different. 63 | This is the case when we resize the hashtable. 64 | """ 65 | ... 66 | 67 | 68 | class BaseHashMap(MutableMapping[K, V]): 69 | """ 70 | An abstract base class which is the parent of all classes implementing hashtables 71 | using either methods(open addressing, separete chaining) 72 | """ 73 | 74 | def __init__(self, initial_size: int) -> None: 75 | if not (isinstance(initial_size, int) and initial_size > 0): 76 | raise ValueError("initial_size must be a positive integer.") 77 | self.size = initial_size 78 | self._len = 0 79 | 80 | def __len__(self) -> int: 81 | return self._len 82 | 83 | def __repr__(self) -> str: 84 | class_name = self.__class__.__name__ 85 | items = ", ".join(f"{k!r}: {v!r}" for k, v in self.items()) 86 | return f"{class_name}({{{items}}})" 87 | 88 | def _hash_func(self, key: K) -> int: 89 | """ 90 | Hash function used for hashing keys. 91 | 92 | For simplicity, the built-in `hash()` function is used. 93 | """ 94 | return hash(key) 95 | 96 | @abstractmethod 97 | def _increase_size(self) -> None: 98 | """ 99 | increases the size of the hash table based on the criteria 100 | specified by subclasses. 101 | """ 102 | pass 103 | -------------------------------------------------------------------------------- /src/pyhashmaps/chains.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from dataclasses import dataclass, field 4 | from typing import TYPE_CHECKING, Generic, cast 5 | 6 | from .base import Chain, Comp_K, HashEntry, K, V, is_same 7 | 8 | if TYPE_CHECKING: 9 | from collections.abc import Generator, Iterator 10 | 11 | 12 | @dataclass(slots=True) 13 | class LinkedListNode(Generic[K, V]): 14 | """Node implementation used in the LinkedList class""" 15 | 16 | data: HashEntry[K, V] 17 | next: LinkedListNode[K, V] | None = field(default=None, repr=False, compare=False) 18 | 19 | 20 | @dataclass(slots=True) 21 | class BSTNode(Generic[Comp_K, V]): 22 | """Node implementation used in the BinarySearchTree class""" 23 | 24 | data: HashEntry[Comp_K, V] 25 | left: BSTNode[Comp_K, V] | None = field(default=None, repr=False, compare=False) 26 | right: BSTNode[Comp_K, V] | None = field(default=None, repr=False, compare=False) 27 | parent: BSTNode[Comp_K, V] | None = field(default=None, repr=False, compare=False) 28 | 29 | 30 | class DynamicArray(Chain[K, V]): 31 | def __init__(self) -> None: 32 | self.lst: list[HashEntry[K, V]] = [] 33 | 34 | def __len__(self) -> int: 35 | return len(self.lst) 36 | 37 | def __iter__(self) -> Iterator[HashEntry[K, V]]: 38 | yield from self.lst 39 | 40 | def find(self, key: K, hash_: int) -> HashEntry[K, V]: 41 | for e in self.lst: 42 | if hash_ == e.hash_value and is_same(e.key, key): 43 | return e 44 | raise KeyError(repr(key)) 45 | 46 | def insert(self, item: HashEntry[K, V]) -> None: 47 | for idx, e in enumerate(self.lst): 48 | if item.hash_value == e.hash_value and is_same(item.key, e.key): 49 | self.lst[idx] = item 50 | break 51 | else: 52 | self.lst.append(item) 53 | 54 | def delete(self, key: K, hash_: int) -> None: 55 | for idx, e in enumerate(self.lst): 56 | if hash_ == e.hash_value and is_same(key, e.key): 57 | del self.lst[idx] 58 | return 59 | raise KeyError(repr(key)) 60 | 61 | def append_at_end(self, item: HashEntry[K, V]) -> None: 62 | self.lst.append(item) 63 | 64 | 65 | class LinkedList(Chain[K, V]): 66 | def __init__(self) -> None: 67 | self.head: LinkedListNode[K, V] | None = None 68 | self.tail: LinkedListNode[K, V] | None = None 69 | self.count: int = 0 70 | 71 | def __len__(self) -> int: 72 | return self.count 73 | 74 | def __iter__(self) -> Iterator[HashEntry[K, V]]: 75 | current = self.head 76 | while current: 77 | yield current.data 78 | current = current.next 79 | 80 | def find(self, key: K, hash_: int) -> HashEntry[K, V]: 81 | current_node = self.head 82 | while current_node: 83 | if hash_ == current_node.data.hash_value and is_same( 84 | current_node.data.key, key 85 | ): 86 | return current_node.data 87 | current_node = current_node.next 88 | raise KeyError(repr(key)) 89 | 90 | def insert(self, item: HashEntry[K, V]) -> None: 91 | current_node = self.head 92 | while current_node: 93 | if item.hash_value == current_node.data.hash_value and is_same( 94 | current_node.data.key, item.key 95 | ): 96 | current_node.data = item 97 | break 98 | current_node = current_node.next 99 | else: 100 | self.insert_tail(item) 101 | 102 | def delete(self, key: K, hash_: int) -> None: 103 | current_node = self.head 104 | previous_node = self.head 105 | while current_node is not None: 106 | if hash_ == current_node.data.hash_value and is_same( 107 | current_node.data.key, key 108 | ): 109 | if current_node is self.head: 110 | self.head = current_node.next 111 | else: 112 | previous_node = cast(LinkedListNode[K, V], previous_node) 113 | previous_node.next = current_node.next 114 | self.count -= 1 115 | break 116 | previous_node = current_node 117 | current_node = current_node.next 118 | else: 119 | raise KeyError(repr(key)) 120 | 121 | def insert_tail(self, data: HashEntry[K, V]) -> None: 122 | node = LinkedListNode(data) 123 | if self.tail: 124 | self.tail.next = node 125 | self.tail = node 126 | else: 127 | self.head = self.tail = node 128 | self.count += 1 129 | 130 | def append_at_end(self, item: HashEntry[K, V]) -> None: 131 | self.insert_tail(item) 132 | 133 | 134 | class BinarySearchTree(Chain[Comp_K, V]): 135 | def __init__(self) -> None: 136 | self.root: BSTNode[Comp_K, V] | None = None 137 | self.count: int = 0 138 | 139 | def __len__(self) -> int: 140 | return self.count 141 | 142 | def __iter__(self) -> Iterator[HashEntry[Comp_K, V]]: 143 | yield from self.inorder_traversal(self.root) 144 | 145 | def find(self, key: Comp_K, hash_: int) -> HashEntry[Comp_K, V]: 146 | return self.find_node(key, hash_).data 147 | 148 | def insert(self, item: HashEntry[Comp_K, V]) -> None: 149 | new_node = BSTNode(item) 150 | 151 | if self.root is None: 152 | self.root = new_node 153 | self.count += 1 154 | return 155 | 156 | current_node = self.root 157 | while True: 158 | if item.hash_value == current_node.data.hash_value and is_same( 159 | item.key, current_node.data.key 160 | ): 161 | current_node.data = item 162 | return 163 | if item.key < current_node.data.key: 164 | if current_node.left is None: 165 | current_node.left = new_node 166 | new_node.parent = current_node 167 | self.count += 1 168 | return 169 | current_node = current_node.left 170 | else: 171 | if current_node.right is None: 172 | current_node.right = new_node 173 | new_node.parent = current_node 174 | self.count += 1 175 | return 176 | current_node = current_node.right 177 | 178 | def delete(self, key: Comp_K, hash_: int) -> None: 179 | node = self.find_node(key, hash_) 180 | 181 | # Node has no children 182 | if node.left is None and node.right is None: 183 | self.reassign_nodes(node, None) 184 | 185 | # Node has one right child 186 | elif node.left is None: 187 | self.reassign_nodes(node, node.right) 188 | 189 | # Node has one left child 190 | elif node.right is None: 191 | self.reassign_nodes(node, node.left) 192 | 193 | # Node has both left and right children 194 | else: 195 | temp_node = self.find_biggest_node(node.left) 196 | self.delete(temp_node.data.key, temp_node.data.hash_value) 197 | node.data = temp_node.data 198 | 199 | def inorder_traversal( 200 | self, node: BSTNode[Comp_K, V] | None 201 | ) -> Generator[HashEntry[Comp_K, V], None, None]: 202 | if node is None: 203 | return 204 | yield from self.inorder_traversal(node.left) 205 | yield node.data 206 | yield from self.inorder_traversal(node.right) 207 | 208 | def find_node(self, key: Comp_K, hash_: int) -> BSTNode[Comp_K, V]: 209 | current_node = self.root 210 | while current_node is not None: 211 | if hash_ == current_node.data.hash_value and is_same( 212 | current_node.data.key, key 213 | ): 214 | return current_node 215 | if key < current_node.data.key: 216 | current_node = current_node.left 217 | elif key > current_node.data.key: 218 | current_node = current_node.right 219 | 220 | raise KeyError(repr(key)) 221 | 222 | def is_right_child(self, node: BSTNode[Comp_K, V]) -> bool: 223 | node.parent = cast(BSTNode[Comp_K, V], node.parent) 224 | if node.parent.right is not None: 225 | return is_same(node.parent.right.data.key, node.data.key) 226 | return False 227 | 228 | def reassign_nodes( 229 | self, node: BSTNode[Comp_K, V], child: BSTNode[Comp_K, V] | None 230 | ) -> None: 231 | if child is not None: 232 | child.parent = node.parent 233 | if node.parent is not None: 234 | if self.is_right_child(node): 235 | node.parent.right = child 236 | else: 237 | node.parent.left = child 238 | else: 239 | self.root = child 240 | self.count -= 1 241 | 242 | def find_biggest_node(self, node: BSTNode[Comp_K, V]) -> BSTNode[Comp_K, V]: 243 | while node.right is not None: 244 | node = node.right 245 | return node 246 | 247 | def append_at_end(self, item: HashEntry[Comp_K, V]) -> None: 248 | # Delegates to `insert` since there is no faster way of doing it except 249 | # removing the `is_same` from `.insert` which is a micro optimization. 250 | self.insert(item) 251 | -------------------------------------------------------------------------------- /src/pyhashmaps/open_addressing.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import ctypes 4 | from abc import abstractmethod 5 | from enum import Enum 6 | from typing import TYPE_CHECKING 7 | 8 | from .base import BaseHashMap, HashEntry, HashMapArgument, K, V, is_same 9 | 10 | if TYPE_CHECKING: 11 | from collections.abc import Generator, Iterator 12 | 13 | 14 | class NotExist(Enum): 15 | empty = 0 16 | deleted = 1 17 | 18 | def __str__(self) -> str: 19 | return self.name.upper() 20 | 21 | __repr__ = __str__ 22 | 23 | 24 | EMPTY = NotExist.empty 25 | DELETED = NotExist.deleted 26 | 27 | 28 | class OpenAddressingHashMap(BaseHashMap[K, V]): 29 | def __init__( 30 | self, 31 | mapping_or_iterable: HashMapArgument[K, V] | None = None, 32 | /, 33 | *, 34 | initial_size: int = 64, 35 | resize_factor: float = 0.7, 36 | ) -> None: 37 | super().__init__(initial_size) 38 | 39 | if not 0.0 < resize_factor < 1.0: 40 | raise ValueError("resize_factor must be between 0 and 1.") 41 | self.resize_factor = resize_factor 42 | self.slots: list[HashEntry[K, V] | NotExist] = [EMPTY] * self.size 43 | if mapping_or_iterable is not None: 44 | self.update(mapping_or_iterable) 45 | 46 | def __iter__(self) -> Iterator[K]: 47 | for item in self.slots: 48 | if isinstance(item, HashEntry): 49 | yield item.key 50 | 51 | def __getitem__(self, key: K) -> V: 52 | h = self._hash_func(key) 53 | prob_sequence_gen = self._probing_sequence(key, h, self.size) 54 | while True: 55 | idx = next(prob_sequence_gen) 56 | slot = self.slots[idx] 57 | if slot is EMPTY: 58 | raise KeyError(repr(key)) 59 | if ( 60 | isinstance(slot, HashEntry) 61 | and h == slot.hash_value 62 | and is_same(slot.key, key) 63 | ): 64 | return slot.value 65 | 66 | def __setitem__(self, key: K, value: V) -> None: 67 | h = self._hash_func(key) 68 | hash_entry = HashEntry(h, key, value) 69 | 70 | prob_sequence_gen = self._probing_sequence(key, h, self.size) 71 | while True: 72 | idx = next(prob_sequence_gen) 73 | slot = self.slots[idx] 74 | 75 | if ( 76 | isinstance(slot, HashEntry) 77 | and h == slot.hash_value 78 | and is_same(slot.key, key) 79 | ): 80 | slot.value = value 81 | break 82 | if slot is EMPTY: 83 | self.slots[idx] = hash_entry 84 | self._len += 1 85 | 86 | if self._need_increase(): 87 | self._increase_size() 88 | break 89 | 90 | def __delitem__(self, key: K) -> None: 91 | h = self._hash_func(key) 92 | prob_sequence_gen = self._probing_sequence(key, h, self.size) 93 | while True: 94 | idx = next(prob_sequence_gen) 95 | slot = self.slots[idx] 96 | if slot is EMPTY: 97 | raise KeyError(repr(key)) 98 | if ( 99 | isinstance(slot, HashEntry) 100 | and h == slot.hash_value 101 | and is_same(slot.key, key) 102 | ): 103 | self.slots[idx] = DELETED 104 | break 105 | 106 | self._len -= 1 107 | 108 | def __sizeof__(self) -> int: 109 | instance_size = super().__sizeof__() 110 | pointer_size = ctypes.sizeof(ctypes.c_void_p) 111 | items_size = len(self.slots) * (3 * pointer_size) 112 | return instance_size + items_size 113 | 114 | def _need_increase(self) -> bool: 115 | return len(self) / self.size >= self.resize_factor 116 | 117 | def _increase_size(self) -> None: 118 | new_size = self.size * 2 119 | new_slots: list[HashEntry[K, V] | NotExist] = [EMPTY] * new_size 120 | 121 | for item in self.slots: 122 | if isinstance(item, HashEntry): 123 | h = self._hash_func(item.key) 124 | for idx in self._probing_sequence(item.key, h, new_size): 125 | slot = new_slots[idx] 126 | if slot is EMPTY: 127 | new_slots[idx] = item 128 | break 129 | 130 | self.slots = new_slots 131 | self.size = new_size 132 | 133 | @abstractmethod 134 | def _probing_sequence( 135 | self, key: K, hash_: int, size: int 136 | ) -> Generator[int, None, None]: 137 | yield 0 138 | 139 | 140 | class LinearProbingHashMap(OpenAddressingHashMap[K, V]): 141 | def _probing_sequence( 142 | self, key: K, hash_: int, size: int 143 | ) -> Generator[int, None, None]: 144 | idx = hash_ % size 145 | while True: 146 | yield idx % size 147 | idx += 1 148 | 149 | 150 | class QuadraticProbingHashMap(OpenAddressingHashMap[K, V]): 151 | def _probing_sequence( 152 | self, key: K, hash_: int, size: int 153 | ) -> Generator[int, None, None]: 154 | idx = hash_ % size 155 | i = 0 156 | while True: 157 | yield idx % size 158 | idx += i**2 159 | i += 1 160 | 161 | 162 | class DoubleHashingHashMap(OpenAddressingHashMap[K, V]): 163 | def __init__( 164 | self, 165 | mapping_or_iterable: HashMapArgument[K, V] | None = None, 166 | /, 167 | *, 168 | initial_size: int = 64, 169 | resize_factor: float = 0.7, 170 | prime_number: int = 7, 171 | ) -> None: 172 | self._prime = prime_number 173 | super().__init__( 174 | mapping_or_iterable, initial_size=initial_size, resize_factor=resize_factor 175 | ) 176 | 177 | def _hash_func2(self, h1_hash: int) -> int: 178 | return self._prime - (h1_hash % self._prime) 179 | 180 | def _probing_sequence( 181 | self, key: K, hash_: int, size: int 182 | ) -> Generator[int, None, None]: 183 | h1 = hash_ 184 | h2 = self._hash_func2(hash_) 185 | i = 0 186 | while True: 187 | yield ((h1 % size) + (h2 % size) + i) % size 188 | i += 1 189 | -------------------------------------------------------------------------------- /src/pyhashmaps/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirsoroush/Python_Hashmaps/db48dd538e9860a3257764db399fccefc99fc1dd/src/pyhashmaps/py.typed -------------------------------------------------------------------------------- /src/pyhashmaps/separate_chaining.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import ctypes 4 | from typing import TYPE_CHECKING 5 | 6 | from .base import BaseHashMap, Chain, Comp_K, HashEntry, HashMapArgument, K, V 7 | from .chains import BinarySearchTree, DynamicArray, LinkedList 8 | 9 | if TYPE_CHECKING: 10 | from collections.abc import Iterator 11 | 12 | 13 | class SeparateChainingHashMap(BaseHashMap[K, V]): 14 | chain: type[Chain[K, V]] 15 | 16 | def __init__( 17 | self, 18 | mapping_or_iterable: HashMapArgument[K, V] | None = None, 19 | /, 20 | *, 21 | initial_size: int = 40, 22 | max_chain_size: int = 5, 23 | ) -> None: 24 | super().__init__(initial_size) 25 | self._max_chain_size = max_chain_size 26 | self.slots: list[Chain[K, V]] = [self.chain() for _ in range(self.size)] 27 | if mapping_or_iterable is not None: 28 | self.update(mapping_or_iterable) 29 | 30 | def __iter__(self) -> Iterator[K]: 31 | for chain in self.slots: 32 | for item in chain: 33 | yield item.key 34 | 35 | def __getitem__(self, key: K) -> V: 36 | h = self._hash_func(key) 37 | idx = h % self.size 38 | 39 | chain = self.slots[idx] 40 | return chain.find(key, h).value 41 | 42 | def __setitem__(self, key: K, value: V) -> None: 43 | h = self._hash_func(key) 44 | idx = h % self.size 45 | hash_entry = HashEntry(h, key, value) 46 | 47 | chain = self.slots[idx] 48 | 49 | chain_length_before = len(chain) 50 | chain.insert(hash_entry) 51 | chain_length_after = len(chain) 52 | 53 | if chain_length_before != chain_length_after: 54 | self._len += 1 55 | 56 | if self._need_increase(chain_length_after): 57 | self._increase_size() 58 | 59 | def __delitem__(self, key: K) -> None: 60 | h = self._hash_func(key) 61 | idx = h % self.size 62 | 63 | chain = self.slots[idx] 64 | 65 | chain_length_before = len(chain) 66 | chain.delete(key, h) 67 | chain_length_after = len(chain) 68 | 69 | if chain_length_before != chain_length_after: 70 | self._len -= 1 71 | 72 | def __sizeof__(self) -> int: 73 | instance_size = super().__sizeof__() 74 | pointer_size = ctypes.sizeof(ctypes.c_void_p) 75 | items_size = sum( 76 | pointer_size + sum(pointer_size * 3 for _hash_entry in bucket) 77 | for bucket in self.slots 78 | ) 79 | return instance_size + items_size 80 | 81 | def _need_increase(self, chain_size: int) -> bool: 82 | return chain_size >= self._max_chain_size 83 | 84 | def _increase_size(self) -> None: 85 | new_size = self.size * 2 86 | new_slots = [self.chain() for _ in range(new_size)] 87 | 88 | for chain in self.slots: 89 | for item in chain: 90 | idx = item.hash_value % new_size 91 | new_slots[idx].append_at_end(item) 92 | 93 | self.slots = new_slots 94 | self.size = new_size 95 | 96 | 97 | class DynamicArrayHashMap(SeparateChainingHashMap[K, V]): 98 | chain: type[DynamicArray[K, V]] = DynamicArray 99 | 100 | 101 | class LinkedListHashMap(SeparateChainingHashMap[K, V]): 102 | chain: type[LinkedList[K, V]] = LinkedList 103 | 104 | 105 | class BSTHashMap(SeparateChainingHashMap[Comp_K, V]): 106 | chain: type[BinarySearchTree[Comp_K, V]] = BinarySearchTree 107 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amirsoroush/Python_Hashmaps/db48dd538e9860a3257764db399fccefc99fc1dd/tests/__init__.py -------------------------------------------------------------------------------- /tests/base_test_file.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # ruff: noqa 3 | import unittest 4 | from typing import TYPE_CHECKING 5 | 6 | from src.pyhashmaps.base import BaseHashMap 7 | 8 | base = unittest.TestCase if TYPE_CHECKING else object 9 | 10 | 11 | class BaseTestCase(base): 12 | cls: BaseHashMap 13 | 14 | def test_constructor(self): 15 | hashmap1 = self.cls({"a": 10, "b": 20, "c": 30}) 16 | hashmap2 = self.cls([("a", 10), ("b", 20), ("c", 30)]) 17 | self.assertEqual(hashmap1.items(), {("a", 10), ("b", 20), ("c", 30)}) 18 | self.assertEqual(hashmap2.items(), {("a", 10), ("b", 20), ("c", 30)}) 19 | 20 | def test_add_item(self): 21 | hashmap = self.cls() 22 | hashmap["foo"] = 10 23 | hashmap["bar"] = 20 24 | self.assertEqual(hashmap["foo"], 10) 25 | self.assertEqual(hashmap["bar"], 20) 26 | with self.assertRaises(TypeError): 27 | hashmap[{}] = 10 28 | 29 | def test_not_existed_item(self): 30 | self.assertIsNone(self.cls().get("foo")) 31 | 32 | def test_delete_item(self): 33 | hashmap = self.cls() 34 | hashmap["foo"] = 10 35 | self.assertEqual(hashmap["foo"], 10) 36 | del hashmap["foo"] 37 | self.assertIsNone(hashmap.get("foo")) 38 | 39 | def test_len(self): 40 | hashmap = self.cls(initial_size=15) 41 | for i in range(10): 42 | hashmap[str(i)] = i 43 | self.assertEqual(len(hashmap), 10) 44 | del hashmap["4"] 45 | self.assertEqual(len(hashmap), 9) 46 | hashmap["5"] = None 47 | self.assertEqual(len(hashmap), 9) 48 | del hashmap["9"] 49 | self.assertEqual(len(hashmap), 8) 50 | del hashmap["0"] 51 | self.assertEqual(len(hashmap), 7) 52 | hashmap.clear() 53 | self.assertEqual(len(hashmap), 0) 54 | 55 | def test_same_key(self): 56 | hashmap = self.cls() 57 | hashmap["foo"] = 10 58 | self.assertEqual(hashmap["foo"], 10) 59 | hashmap["foo"] = 20 60 | self.assertEqual(hashmap["foo"], 20) 61 | self.assertEqual(len(hashmap), 1) 62 | 63 | def test_contain(self): 64 | hashmap = self.cls() 65 | hashmap["foo"] = 10 66 | self.assertTrue("foo" in hashmap) 67 | self.assertFalse("bar" in hashmap) 68 | 69 | def test_iteration(self): 70 | hashmap = self.cls() 71 | for i in range(10): 72 | hashmap[i] = i * 2 73 | self.assertEqual(set(hashmap), set(range(10))) 74 | self.assertEqual(set(hashmap.values()), set(i * 2 for i in range(10))) 75 | 76 | def test_large_number_of_items(self): 77 | hashmap = self.cls() 78 | n = 1000 79 | for i in range(n): 80 | hashmap[f"_{i}_"] = i 81 | self.assertEqual(len(hashmap), n) 82 | for i in range(0, n, 2): 83 | del hashmap[f"_{i}_"] 84 | self.assertEqual(len(hashmap), n // 2) 85 | 86 | def test_repr(self): 87 | hashmap = self.cls() 88 | class_name = hashmap.__class__.__name__ 89 | self.assertEqual(repr(hashmap), f"{class_name}({{}})") 90 | hashmap["A"] = 30 91 | self.assertEqual(repr(hashmap), f"{class_name}({{'A': 30}})") 92 | 93 | def test_equality(self): 94 | hashmap = self.cls({"a": 10, "b": 20, "c": 30}) 95 | dictionary = {"a": 10, "b": 20, "c": 30} 96 | self.assertEqual(hashmap, dictionary) 97 | 98 | def test_accidental_same_slot(self): 99 | """ 100 | hash functions can accidentally collide in a same bucket. This test 101 | reveals this situation specifically in LinearProbingHashMap and 102 | QuadraticProbingHashMap. 103 | """ 104 | 105 | class A: 106 | def __init__(self, var): 107 | self.var = var 108 | 109 | def __lt__(self, other): 110 | if isinstance(other, A): 111 | return self.var < other.var 112 | return NotImplemented 113 | 114 | def __hash__(self) -> int: 115 | return hash(self.var) 116 | 117 | def __eq__(self, other: object) -> bool: 118 | if isinstance(other, A): 119 | return self.var == other.var 120 | return NotImplemented 121 | 122 | hashmap = self.cls(initial_size=10) 123 | 124 | obj = A(11) 125 | # The A(11) object is placed into the second slot in the ten slots. 126 | hashmap[obj] = "something1" 127 | 128 | # A(21) also wants to go to the second slot in the ten slots. 129 | obj.var = 21 130 | hashmap[obj] = "something2" 131 | self.assertEqual(len(hashmap), 2) 132 | -------------------------------------------------------------------------------- /tests/test_open_addressing_hashmap.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # ruff: noqa 3 | import unittest 4 | 5 | from src.pyhashmaps.open_addressing import ( 6 | DoubleHashingHashMap, 7 | LinearProbingHashMap, 8 | QuadraticProbingHashMap, 9 | ) 10 | 11 | from .base_test_file import BaseTestCase 12 | 13 | 14 | class TestOpenAddressingHashMap(BaseTestCase): 15 | cls = None 16 | 17 | def test_creation_valid(self): 18 | self.cls() 19 | self.cls(initial_size=100) 20 | self.cls(resize_factor=0.7) 21 | self.cls(initial_size=30, resize_factor=0.6) 22 | 23 | def test_creation_invalid(self): 24 | self.assertRaises(ValueError, self.cls, initial_size=-2) 25 | self.assertRaises(ValueError, self.cls, initial_size=0) 26 | self.assertRaises(ValueError, self.cls, resize_factor=1.1) 27 | 28 | def test_resize(self): 29 | hashmap = self.cls(initial_size=10, resize_factor=0.8) 30 | self.assertEqual(hashmap.size, 10) 31 | for i in range(8): 32 | hashmap[i] = None 33 | self.assertEqual(hashmap.size, 20) 34 | self.assertEqual(len(hashmap), 8) 35 | 36 | 37 | class TestLinearProbingHashMap(TestOpenAddressingHashMap, unittest.TestCase): 38 | cls = LinearProbingHashMap 39 | 40 | 41 | class TestQuadraticProbingHashMap(TestOpenAddressingHashMap, unittest.TestCase): 42 | cls = QuadraticProbingHashMap 43 | 44 | 45 | class TestDoubleHashingHashMap(TestOpenAddressingHashMap, unittest.TestCase): 46 | cls = DoubleHashingHashMap 47 | -------------------------------------------------------------------------------- /tests/test_separate_chaining_hashmap.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # ruff: noqa 3 | import unittest 4 | 5 | from src.pyhashmaps.separate_chaining import ( 6 | BSTHashMap, 7 | DynamicArrayHashMap, 8 | LinkedListHashMap, 9 | ) 10 | 11 | from .base_test_file import BaseTestCase 12 | 13 | 14 | class TestSeparateChainingHashmap(BaseTestCase): 15 | cls = None 16 | 17 | def test_creation_valid(self): 18 | self.cls() 19 | self.cls(initial_size=100) 20 | self.cls(initial_size=100, max_chain_size=5) 21 | self.cls(max_chain_size=5) 22 | 23 | def test_creation_invalid(self): 24 | self.assertRaises(ValueError, self.cls, initial_size=-2) 25 | 26 | def test_resize(self): 27 | class A: 28 | __hash__ = lambda self: 0 29 | 30 | hashmap = self.cls(initial_size=5, max_chain_size=3) 31 | self.assertEqual(hashmap.size, 5) 32 | for i in range(3): 33 | hashmap[A()] = i 34 | self.assertEqual(hashmap.size, 5 * 2) 35 | 36 | 37 | class TestDynamicArrayHashMap(TestSeparateChainingHashmap, unittest.TestCase): 38 | cls = DynamicArrayHashMap 39 | 40 | 41 | class TestLinkedListHashMap(TestSeparateChainingHashmap, unittest.TestCase): 42 | cls = LinkedListHashMap 43 | 44 | 45 | class TestBSTHashMap(TestSeparateChainingHashmap, unittest.TestCase): 46 | cls = BSTHashMap 47 | 48 | # This is an override since `BSTHashMap` needs `HashEntr`s to be comparable. 49 | def test_resize(self): 50 | class A: 51 | def __hash__(self) -> int: 52 | return 0 53 | 54 | def __lt__(self, item) -> bool: 55 | return True 56 | 57 | hashmap = self.cls(initial_size=5, max_chain_size=3) 58 | self.assertEqual(hashmap.size, 5) 59 | for i in range(3): 60 | hashmap[A()] = i 61 | self.assertEqual(hashmap.size, 5 * 2) 62 | --------------------------------------------------------------------------------