├── .gitignore
├── README.md
├── pyproject.toml
├── src
    └── pyhashmaps
    │   ├── __init__.py
    │   ├── base.py
    │   ├── chains.py
    │   ├── open_addressing.py
    │   ├── py.typed
    │   └── separate_chaining.py
└── tests
    ├── __init__.py
    ├── base_test_file.py
    ├── test_open_addressing_hashmap.py
    └── test_separate_chaining_hashmap.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/
106 | .idea/
107 | 
108 | # handmade
109 | .DS_Store


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Introduction
  2 | 
  3 | `pyhashmaps` is a fully annotated Python package which has several functional hashmap classes for educational purposes.
  4 | 
  5 | All classes support common operations on [ `MutableMapping` ](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping) type.
  6 | 
  7 | Here are the relationships between components of this package:
  8 | 
  9 | ```mermaid
 10 | classDiagram
 11 |     class MutableMapping
 12 |     class BaseHM
 13 | 
 14 |     class OpenAddressingHM {list slots}
 15 |     class LinearProbingHM
 16 |     class QuadraticProbingHM
 17 |     class DoubleHashingHM
 18 | 
 19 |     class SeparateChainingHM {list[Chain] slots}
 20 |     class DynamicArrayHM {list[DynamicArray] slots}
 21 |     class LinkedListHM {list[LinkedList] slots}
 22 |     class BSTHM {list[BinarySearchTree] slots}
 23 | 
 24 |     MutableMapping <|-- BaseHM
 25 |     BaseHM <|-- OpenAddressingHM
 26 |     BaseHM <|-- SeparateChainingHM
 27 |     OpenAddressingHM <|-- LinearProbingHM
 28 |     OpenAddressingHM <|-- QuadraticProbingHM
 29 |     OpenAddressingHM <|-- DoubleHashingHM
 30 |     SeparateChainingHM <|-- DynamicArrayHM
 31 |     SeparateChainingHM <|-- LinkedListHM
 32 |     SeparateChainingHM <|-- BSTHM
 33 | ```
 34 | 
 35 | ```mermaid
 36 | classDiagram
 37 |     class Chain {
 38 |         <<interface>>
 39 |         find()
 40 |         insert()
 41 |         delete()
 42 |     }
 43 |     class DynamicArray
 44 |     class LinkedList
 45 |     class BinarySearchTree
 46 | 
 47 |     Chain <|-- DynamicArray
 48 |     Chain <|-- LinkedList
 49 |     Chain <|-- BinarySearchTree
 50 | ```
 51 | 
 52 | # Requirements
 53 | 
 54 | It's tested on Python 3.10 & 3.11.
 55 | 
 56 | # Installation
 57 | 
 58 | If you have `git` installed:
 59 | 
 60 | ```none
 61 | pip install git+https://github.com/amirsoroush/Python_Hashmaps.git
 62 | ```
 63 | 
 64 | Otherwise:
 65 | 
 66 | ```none
 67 | pip install https://github.com/amirsoroush/Python_Hashmaps/tarball/main
 68 | ```
 69 | 
 70 | # Usage
 71 | 
 72 | It has the same interface as the built-in `dict` class.
 73 | 
 74 | ```python
 75 | >>> from pyhashmaps import (
 76 | ...                     LinearProbingHashMap,
 77 | ...                     QuadraticProbingHashMap,
 78 | ...                     DoubleHashingHashMap,
 79 | ...                     DynamicArrayHashMap,
 80 | ...                     LinkedListHashMap,
 81 | ...                     BSTHashMap,
 82 | ...                     )
 83 | >>>
 84 | >>> hashmap = LinearProbingHashMap()
 85 | >>> hashmap = LinearProbingHashMap[str, int]()
 86 | >>> hashmap["a"] = 10
 87 | >>> hashmap
 88 | LinearProbingHashMap({'a': 10})
 89 | >>> hashmap.update({"b": 20, "c": 30})
 90 | >>> len(hashmap)
 91 | 3
 92 | >>> for k, v in hashmap.items():
 93 | ...     print(k, v)
 94 | ... 
 95 | c 30
 96 | a 10
 97 | b 20
 98 | >>> hashmap.clear()
 99 | >>> 
100 | ```
101 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "pyhashmaps"
 3 | version = "1.0.0"
 4 | readme = "README.md"
 5 | authors = [{ name = "amirsoroush", email = "amirsoroush.dev@gmail.com" }]
 6 | 
 7 | [tool.setuptools]
 8 | package-dir = {"" = "src"}
 9 | 
10 | [tool.setuptools.package-data]
11 | "pyhashmaps" = ["py.typed"]
12 | 
13 | [tool.setuptools.packages.find]
14 | where = ["src"]


--------------------------------------------------------------------------------
/src/pyhashmaps/__init__.py:
--------------------------------------------------------------------------------
 1 | from .open_addressing import (
 2 |     DoubleHashingHashMap,
 3 |     LinearProbingHashMap,
 4 |     QuadraticProbingHashMap,
 5 | )
 6 | from .separate_chaining import BSTHashMap, DynamicArrayHashMap, LinkedListHashMap
 7 | 
 8 | __all__ = [
 9 |     "DoubleHashingHashMap",
10 |     "LinearProbingHashMap",
11 |     "QuadraticProbingHashMap",
12 |     "BSTHashMap",
13 |     "DynamicArrayHashMap",
14 |     "LinkedListHashMap",
15 | ]
16 | 


--------------------------------------------------------------------------------
/src/pyhashmaps/base.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | from collections.abc import Hashable, Iterable, Iterator, Mapping, MutableMapping
  3 | from dataclasses import dataclass
  4 | from typing import Any, Generic, Protocol, TypeVar
  5 | 
  6 | 
  7 | class Comparable(Hashable, Protocol):
  8 |     """Protocol for annotating comparable and Hashable types."""
  9 | 
 10 |     def __lt__(self, other: Any) -> bool:
 11 |         ...
 12 | 
 13 | 
 14 | K = TypeVar("K", bound=Hashable)
 15 | Comp_K = TypeVar("Comp_K", bound=Comparable)
 16 | V = TypeVar("V")
 17 | HashMapArgument = Mapping[K, V] | Iterable[tuple[K, V]]
 18 | 
 19 | 
 20 | def is_same(k1: K, k2: K) -> bool:
 21 |     """
 22 |     Check whether two keys are the same or not.
 23 | 
 24 |     It's consistent with how Python itself compares keys in hashtables.
 25 |     """
 26 |     return k1 is k2 or k1 == k2
 27 | 
 28 | 
 29 | @dataclass(slots=True)
 30 | class HashEntry(Generic[K, V]):
 31 |     hash_value: int
 32 |     key: K
 33 |     value: V
 34 | 
 35 | 
 36 | class Chain(Protocol[K, V]):
 37 |     """
 38 |     A protocol for classes which are intended to be used as the underlying
 39 |     data structure for storing objects in 'separate chaining' method.
 40 |     """
 41 | 
 42 |     def __iter__(self) -> Iterator[HashEntry[K, V]]:
 43 |         ...
 44 | 
 45 |     def __len__(self) -> int:
 46 |         ...
 47 | 
 48 |     def find(self, key: K, hash_: int) -> HashEntry[K, V]:
 49 |         ...
 50 | 
 51 |     def insert(self, item: HashEntry[K, V]) -> None:
 52 |         ...
 53 | 
 54 |     def delete(self, key: K, hash_: int) -> None:
 55 |         ...
 56 | 
 57 |     def append_at_end(self, item: HashEntry[K, V]) -> None:
 58 |         """
 59 |         Append the `item` at the end.
 60 | 
 61 |         `.append_at_end()` is a faster route than `.insert()` to insert items when
 62 |         we no longer have to check the existing items - when all items are different.
 63 |         This is the case when we resize the hashtable.
 64 |         """
 65 |         ...
 66 | 
 67 | 
 68 | class BaseHashMap(MutableMapping[K, V]):
 69 |     """
 70 |     An abstract base class which is the parent of all classes implementing hashtables
 71 |     using either methods(open addressing, separete chaining)
 72 |     """
 73 | 
 74 |     def __init__(self, initial_size: int) -> None:
 75 |         if not (isinstance(initial_size, int) and initial_size > 0):
 76 |             raise ValueError("initial_size must be a positive integer.")
 77 |         self.size = initial_size
 78 |         self._len = 0
 79 | 
 80 |     def __len__(self) -> int:
 81 |         return self._len
 82 | 
 83 |     def __repr__(self) -> str:
 84 |         class_name = self.__class__.__name__
 85 |         items = ", ".join(f"{k!r}: {v!r}" for k, v in self.items())
 86 |         return f"{class_name}({{{items}}})"
 87 | 
 88 |     def _hash_func(self, key: K) -> int:
 89 |         """
 90 |         Hash function used for hashing keys.
 91 | 
 92 |         For simplicity, the built-in `hash()` function is used.
 93 |         """
 94 |         return hash(key)
 95 | 
 96 |     @abstractmethod
 97 |     def _increase_size(self) -> None:
 98 |         """
 99 |         increases the size of the hash table based on the criteria
100 |         specified by subclasses.
101 |         """
102 |         pass
103 | 


--------------------------------------------------------------------------------
/src/pyhashmaps/chains.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from dataclasses import dataclass, field
  4 | from typing import TYPE_CHECKING, Generic, cast
  5 | 
  6 | from .base import Chain, Comp_K, HashEntry, K, V, is_same
  7 | 
  8 | if TYPE_CHECKING:
  9 |     from collections.abc import Generator, Iterator
 10 | 
 11 | 
 12 | @dataclass(slots=True)
 13 | class LinkedListNode(Generic[K, V]):
 14 |     """Node implementation used in the LinkedList class"""
 15 | 
 16 |     data: HashEntry[K, V]
 17 |     next: LinkedListNode[K, V] | None = field(default=None, repr=False, compare=False)
 18 | 
 19 | 
 20 | @dataclass(slots=True)
 21 | class BSTNode(Generic[Comp_K, V]):
 22 |     """Node implementation used in the BinarySearchTree class"""
 23 | 
 24 |     data: HashEntry[Comp_K, V]
 25 |     left: BSTNode[Comp_K, V] | None = field(default=None, repr=False, compare=False)
 26 |     right: BSTNode[Comp_K, V] | None = field(default=None, repr=False, compare=False)
 27 |     parent: BSTNode[Comp_K, V] | None = field(default=None, repr=False, compare=False)
 28 | 
 29 | 
 30 | class DynamicArray(Chain[K, V]):
 31 |     def __init__(self) -> None:
 32 |         self.lst: list[HashEntry[K, V]] = []
 33 | 
 34 |     def __len__(self) -> int:
 35 |         return len(self.lst)
 36 | 
 37 |     def __iter__(self) -> Iterator[HashEntry[K, V]]:
 38 |         yield from self.lst
 39 | 
 40 |     def find(self, key: K, hash_: int) -> HashEntry[K, V]:
 41 |         for e in self.lst:
 42 |             if hash_ == e.hash_value and is_same(e.key, key):
 43 |                 return e
 44 |         raise KeyError(repr(key))
 45 | 
 46 |     def insert(self, item: HashEntry[K, V]) -> None:
 47 |         for idx, e in enumerate(self.lst):
 48 |             if item.hash_value == e.hash_value and is_same(item.key, e.key):
 49 |                 self.lst[idx] = item
 50 |                 break
 51 |         else:
 52 |             self.lst.append(item)
 53 | 
 54 |     def delete(self, key: K, hash_: int) -> None:
 55 |         for idx, e in enumerate(self.lst):
 56 |             if hash_ == e.hash_value and is_same(key, e.key):
 57 |                 del self.lst[idx]
 58 |                 return
 59 |         raise KeyError(repr(key))
 60 | 
 61 |     def append_at_end(self, item: HashEntry[K, V]) -> None:
 62 |         self.lst.append(item)
 63 | 
 64 | 
 65 | class LinkedList(Chain[K, V]):
 66 |     def __init__(self) -> None:
 67 |         self.head: LinkedListNode[K, V] | None = None
 68 |         self.tail: LinkedListNode[K, V] | None = None
 69 |         self.count: int = 0
 70 | 
 71 |     def __len__(self) -> int:
 72 |         return self.count
 73 | 
 74 |     def __iter__(self) -> Iterator[HashEntry[K, V]]:
 75 |         current = self.head
 76 |         while current:
 77 |             yield current.data
 78 |             current = current.next
 79 | 
 80 |     def find(self, key: K, hash_: int) -> HashEntry[K, V]:
 81 |         current_node = self.head
 82 |         while current_node:
 83 |             if hash_ == current_node.data.hash_value and is_same(
 84 |                 current_node.data.key, key
 85 |             ):
 86 |                 return current_node.data
 87 |             current_node = current_node.next
 88 |         raise KeyError(repr(key))
 89 | 
 90 |     def insert(self, item: HashEntry[K, V]) -> None:
 91 |         current_node = self.head
 92 |         while current_node:
 93 |             if item.hash_value == current_node.data.hash_value and is_same(
 94 |                 current_node.data.key, item.key
 95 |             ):
 96 |                 current_node.data = item
 97 |                 break
 98 |             current_node = current_node.next
 99 |         else:
100 |             self.insert_tail(item)
101 | 
102 |     def delete(self, key: K, hash_: int) -> None:
103 |         current_node = self.head
104 |         previous_node = self.head
105 |         while current_node is not None:
106 |             if hash_ == current_node.data.hash_value and is_same(
107 |                 current_node.data.key, key
108 |             ):
109 |                 if current_node is self.head:
110 |                     self.head = current_node.next
111 |                 else:
112 |                     previous_node = cast(LinkedListNode[K, V], previous_node)
113 |                     previous_node.next = current_node.next
114 |                 self.count -= 1
115 |                 break
116 |             previous_node = current_node
117 |             current_node = current_node.next
118 |         else:
119 |             raise KeyError(repr(key))
120 | 
121 |     def insert_tail(self, data: HashEntry[K, V]) -> None:
122 |         node = LinkedListNode(data)
123 |         if self.tail:
124 |             self.tail.next = node
125 |             self.tail = node
126 |         else:
127 |             self.head = self.tail = node
128 |         self.count += 1
129 | 
130 |     def append_at_end(self, item: HashEntry[K, V]) -> None:
131 |         self.insert_tail(item)
132 | 
133 | 
134 | class BinarySearchTree(Chain[Comp_K, V]):
135 |     def __init__(self) -> None:
136 |         self.root: BSTNode[Comp_K, V] | None = None
137 |         self.count: int = 0
138 | 
139 |     def __len__(self) -> int:
140 |         return self.count
141 | 
142 |     def __iter__(self) -> Iterator[HashEntry[Comp_K, V]]:
143 |         yield from self.inorder_traversal(self.root)
144 | 
145 |     def find(self, key: Comp_K, hash_: int) -> HashEntry[Comp_K, V]:
146 |         return self.find_node(key, hash_).data
147 | 
148 |     def insert(self, item: HashEntry[Comp_K, V]) -> None:
149 |         new_node = BSTNode(item)
150 | 
151 |         if self.root is None:
152 |             self.root = new_node
153 |             self.count += 1
154 |             return
155 | 
156 |         current_node = self.root
157 |         while True:
158 |             if item.hash_value == current_node.data.hash_value and is_same(
159 |                 item.key, current_node.data.key
160 |             ):
161 |                 current_node.data = item
162 |                 return
163 |             if item.key < current_node.data.key:
164 |                 if current_node.left is None:
165 |                     current_node.left = new_node
166 |                     new_node.parent = current_node
167 |                     self.count += 1
168 |                     return
169 |                 current_node = current_node.left
170 |             else:
171 |                 if current_node.right is None:
172 |                     current_node.right = new_node
173 |                     new_node.parent = current_node
174 |                     self.count += 1
175 |                     return
176 |                 current_node = current_node.right
177 | 
178 |     def delete(self, key: Comp_K, hash_: int) -> None:
179 |         node = self.find_node(key, hash_)
180 | 
181 |         # Node has no children
182 |         if node.left is None and node.right is None:
183 |             self.reassign_nodes(node, None)
184 | 
185 |         # Node has one right child
186 |         elif node.left is None:
187 |             self.reassign_nodes(node, node.right)
188 | 
189 |         # Node has one left child
190 |         elif node.right is None:
191 |             self.reassign_nodes(node, node.left)
192 | 
193 |         # Node has both left and right children
194 |         else:
195 |             temp_node = self.find_biggest_node(node.left)
196 |             self.delete(temp_node.data.key, temp_node.data.hash_value)
197 |             node.data = temp_node.data
198 | 
199 |     def inorder_traversal(
200 |         self, node: BSTNode[Comp_K, V] | None
201 |     ) -> Generator[HashEntry[Comp_K, V], None, None]:
202 |         if node is None:
203 |             return
204 |         yield from self.inorder_traversal(node.left)
205 |         yield node.data
206 |         yield from self.inorder_traversal(node.right)
207 | 
208 |     def find_node(self, key: Comp_K, hash_: int) -> BSTNode[Comp_K, V]:
209 |         current_node = self.root
210 |         while current_node is not None:
211 |             if hash_ == current_node.data.hash_value and is_same(
212 |                 current_node.data.key, key
213 |             ):
214 |                 return current_node
215 |             if key < current_node.data.key:
216 |                 current_node = current_node.left
217 |             elif key > current_node.data.key:
218 |                 current_node = current_node.right
219 | 
220 |         raise KeyError(repr(key))
221 | 
222 |     def is_right_child(self, node: BSTNode[Comp_K, V]) -> bool:
223 |         node.parent = cast(BSTNode[Comp_K, V], node.parent)
224 |         if node.parent.right is not None:
225 |             return is_same(node.parent.right.data.key, node.data.key)
226 |         return False
227 | 
228 |     def reassign_nodes(
229 |         self, node: BSTNode[Comp_K, V], child: BSTNode[Comp_K, V] | None
230 |     ) -> None:
231 |         if child is not None:
232 |             child.parent = node.parent
233 |         if node.parent is not None:
234 |             if self.is_right_child(node):
235 |                 node.parent.right = child
236 |             else:
237 |                 node.parent.left = child
238 |         else:
239 |             self.root = child
240 |         self.count -= 1
241 | 
242 |     def find_biggest_node(self, node: BSTNode[Comp_K, V]) -> BSTNode[Comp_K, V]:
243 |         while node.right is not None:
244 |             node = node.right
245 |         return node
246 | 
247 |     def append_at_end(self, item: HashEntry[Comp_K, V]) -> None:
248 |         # Delegates to `insert` since there is no faster way of doing it except
249 |         # removing the `is_same` from `.insert` which is a micro optimization.
250 |         self.insert(item)
251 | 


--------------------------------------------------------------------------------
/src/pyhashmaps/open_addressing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import ctypes
  4 | from abc import abstractmethod
  5 | from enum import Enum
  6 | from typing import TYPE_CHECKING
  7 | 
  8 | from .base import BaseHashMap, HashEntry, HashMapArgument, K, V, is_same
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from collections.abc import Generator, Iterator
 12 | 
 13 | 
 14 | class NotExist(Enum):
 15 |     empty = 0
 16 |     deleted = 1
 17 | 
 18 |     def __str__(self) -> str:
 19 |         return self.name.upper()
 20 | 
 21 |     __repr__ = __str__
 22 | 
 23 | 
 24 | EMPTY = NotExist.empty
 25 | DELETED = NotExist.deleted
 26 | 
 27 | 
 28 | class OpenAddressingHashMap(BaseHashMap[K, V]):
 29 |     def __init__(
 30 |         self,
 31 |         mapping_or_iterable: HashMapArgument[K, V] | None = None,
 32 |         /,
 33 |         *,
 34 |         initial_size: int = 64,
 35 |         resize_factor: float = 0.7,
 36 |     ) -> None:
 37 |         super().__init__(initial_size)
 38 | 
 39 |         if not 0.0 < resize_factor < 1.0:
 40 |             raise ValueError("resize_factor must be between 0 and 1.")
 41 |         self.resize_factor = resize_factor
 42 |         self.slots: list[HashEntry[K, V] | NotExist] = [EMPTY] * self.size
 43 |         if mapping_or_iterable is not None:
 44 |             self.update(mapping_or_iterable)
 45 | 
 46 |     def __iter__(self) -> Iterator[K]:
 47 |         for item in self.slots:
 48 |             if isinstance(item, HashEntry):
 49 |                 yield item.key
 50 | 
 51 |     def __getitem__(self, key: K) -> V:
 52 |         h = self._hash_func(key)
 53 |         prob_sequence_gen = self._probing_sequence(key, h, self.size)
 54 |         while True:
 55 |             idx = next(prob_sequence_gen)
 56 |             slot = self.slots[idx]
 57 |             if slot is EMPTY:
 58 |                 raise KeyError(repr(key))
 59 |             if (
 60 |                 isinstance(slot, HashEntry)
 61 |                 and h == slot.hash_value
 62 |                 and is_same(slot.key, key)
 63 |             ):
 64 |                 return slot.value
 65 | 
 66 |     def __setitem__(self, key: K, value: V) -> None:
 67 |         h = self._hash_func(key)
 68 |         hash_entry = HashEntry(h, key, value)
 69 | 
 70 |         prob_sequence_gen = self._probing_sequence(key, h, self.size)
 71 |         while True:
 72 |             idx = next(prob_sequence_gen)
 73 |             slot = self.slots[idx]
 74 | 
 75 |             if (
 76 |                 isinstance(slot, HashEntry)
 77 |                 and h == slot.hash_value
 78 |                 and is_same(slot.key, key)
 79 |             ):
 80 |                 slot.value = value
 81 |                 break
 82 |             if slot is EMPTY:
 83 |                 self.slots[idx] = hash_entry
 84 |                 self._len += 1
 85 | 
 86 |                 if self._need_increase():
 87 |                     self._increase_size()
 88 |                 break
 89 | 
 90 |     def __delitem__(self, key: K) -> None:
 91 |         h = self._hash_func(key)
 92 |         prob_sequence_gen = self._probing_sequence(key, h, self.size)
 93 |         while True:
 94 |             idx = next(prob_sequence_gen)
 95 |             slot = self.slots[idx]
 96 |             if slot is EMPTY:
 97 |                 raise KeyError(repr(key))
 98 |             if (
 99 |                 isinstance(slot, HashEntry)
100 |                 and h == slot.hash_value
101 |                 and is_same(slot.key, key)
102 |             ):
103 |                 self.slots[idx] = DELETED
104 |                 break
105 | 
106 |         self._len -= 1
107 | 
108 |     def __sizeof__(self) -> int:
109 |         instance_size = super().__sizeof__()
110 |         pointer_size = ctypes.sizeof(ctypes.c_void_p)
111 |         items_size = len(self.slots) * (3 * pointer_size)
112 |         return instance_size + items_size
113 | 
114 |     def _need_increase(self) -> bool:
115 |         return len(self) / self.size >= self.resize_factor
116 | 
117 |     def _increase_size(self) -> None:
118 |         new_size = self.size * 2
119 |         new_slots: list[HashEntry[K, V] | NotExist] = [EMPTY] * new_size
120 | 
121 |         for item in self.slots:
122 |             if isinstance(item, HashEntry):
123 |                 h = self._hash_func(item.key)
124 |                 for idx in self._probing_sequence(item.key, h, new_size):
125 |                     slot = new_slots[idx]
126 |                     if slot is EMPTY:
127 |                         new_slots[idx] = item
128 |                         break
129 | 
130 |         self.slots = new_slots
131 |         self.size = new_size
132 | 
133 |     @abstractmethod
134 |     def _probing_sequence(
135 |         self, key: K, hash_: int, size: int
136 |     ) -> Generator[int, None, None]:
137 |         yield 0
138 | 
139 | 
140 | class LinearProbingHashMap(OpenAddressingHashMap[K, V]):
141 |     def _probing_sequence(
142 |         self, key: K, hash_: int, size: int
143 |     ) -> Generator[int, None, None]:
144 |         idx = hash_ % size
145 |         while True:
146 |             yield idx % size
147 |             idx += 1
148 | 
149 | 
150 | class QuadraticProbingHashMap(OpenAddressingHashMap[K, V]):
151 |     def _probing_sequence(
152 |         self, key: K, hash_: int, size: int
153 |     ) -> Generator[int, None, None]:
154 |         idx = hash_ % size
155 |         i = 0
156 |         while True:
157 |             yield idx % size
158 |             idx += i**2
159 |             i += 1
160 | 
161 | 
162 | class DoubleHashingHashMap(OpenAddressingHashMap[K, V]):
163 |     def __init__(
164 |         self,
165 |         mapping_or_iterable: HashMapArgument[K, V] | None = None,
166 |         /,
167 |         *,
168 |         initial_size: int = 64,
169 |         resize_factor: float = 0.7,
170 |         prime_number: int = 7,
171 |     ) -> None:
172 |         self._prime = prime_number
173 |         super().__init__(
174 |             mapping_or_iterable, initial_size=initial_size, resize_factor=resize_factor
175 |         )
176 | 
177 |     def _hash_func2(self, h1_hash: int) -> int:
178 |         return self._prime - (h1_hash % self._prime)
179 | 
180 |     def _probing_sequence(
181 |         self, key: K, hash_: int, size: int
182 |     ) -> Generator[int, None, None]:
183 |         h1 = hash_
184 |         h2 = self._hash_func2(hash_)
185 |         i = 0
186 |         while True:
187 |             yield ((h1 % size) + (h2 % size) + i) % size
188 |             i += 1
189 | 


--------------------------------------------------------------------------------
/src/pyhashmaps/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amirsoroush/Python_Hashmaps/db48dd538e9860a3257764db399fccefc99fc1dd/src/pyhashmaps/py.typed


--------------------------------------------------------------------------------
/src/pyhashmaps/separate_chaining.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import ctypes
  4 | from typing import TYPE_CHECKING
  5 | 
  6 | from .base import BaseHashMap, Chain, Comp_K, HashEntry, HashMapArgument, K, V
  7 | from .chains import BinarySearchTree, DynamicArray, LinkedList
  8 | 
  9 | if TYPE_CHECKING:
 10 |     from collections.abc import Iterator
 11 | 
 12 | 
 13 | class SeparateChainingHashMap(BaseHashMap[K, V]):
 14 |     chain: type[Chain[K, V]]
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         mapping_or_iterable: HashMapArgument[K, V] | None = None,
 19 |         /,
 20 |         *,
 21 |         initial_size: int = 40,
 22 |         max_chain_size: int = 5,
 23 |     ) -> None:
 24 |         super().__init__(initial_size)
 25 |         self._max_chain_size = max_chain_size
 26 |         self.slots: list[Chain[K, V]] = [self.chain() for _ in range(self.size)]
 27 |         if mapping_or_iterable is not None:
 28 |             self.update(mapping_or_iterable)
 29 | 
 30 |     def __iter__(self) -> Iterator[K]:
 31 |         for chain in self.slots:
 32 |             for item in chain:
 33 |                 yield item.key
 34 | 
 35 |     def __getitem__(self, key: K) -> V:
 36 |         h = self._hash_func(key)
 37 |         idx = h % self.size
 38 | 
 39 |         chain = self.slots[idx]
 40 |         return chain.find(key, h).value
 41 | 
 42 |     def __setitem__(self, key: K, value: V) -> None:
 43 |         h = self._hash_func(key)
 44 |         idx = h % self.size
 45 |         hash_entry = HashEntry(h, key, value)
 46 | 
 47 |         chain = self.slots[idx]
 48 | 
 49 |         chain_length_before = len(chain)
 50 |         chain.insert(hash_entry)
 51 |         chain_length_after = len(chain)
 52 | 
 53 |         if chain_length_before != chain_length_after:
 54 |             self._len += 1
 55 | 
 56 |             if self._need_increase(chain_length_after):
 57 |                 self._increase_size()
 58 | 
 59 |     def __delitem__(self, key: K) -> None:
 60 |         h = self._hash_func(key)
 61 |         idx = h % self.size
 62 | 
 63 |         chain = self.slots[idx]
 64 | 
 65 |         chain_length_before = len(chain)
 66 |         chain.delete(key, h)
 67 |         chain_length_after = len(chain)
 68 | 
 69 |         if chain_length_before != chain_length_after:
 70 |             self._len -= 1
 71 | 
 72 |     def __sizeof__(self) -> int:
 73 |         instance_size = super().__sizeof__()
 74 |         pointer_size = ctypes.sizeof(ctypes.c_void_p)
 75 |         items_size = sum(
 76 |             pointer_size + sum(pointer_size * 3 for _hash_entry in bucket)
 77 |             for bucket in self.slots
 78 |         )
 79 |         return instance_size + items_size
 80 | 
 81 |     def _need_increase(self, chain_size: int) -> bool:
 82 |         return chain_size >= self._max_chain_size
 83 | 
 84 |     def _increase_size(self) -> None:
 85 |         new_size = self.size * 2
 86 |         new_slots = [self.chain() for _ in range(new_size)]
 87 | 
 88 |         for chain in self.slots:
 89 |             for item in chain:
 90 |                 idx = item.hash_value % new_size
 91 |                 new_slots[idx].append_at_end(item)
 92 | 
 93 |         self.slots = new_slots
 94 |         self.size = new_size
 95 | 
 96 | 
 97 | class DynamicArrayHashMap(SeparateChainingHashMap[K, V]):
 98 |     chain: type[DynamicArray[K, V]] = DynamicArray
 99 | 
100 | 
101 | class LinkedListHashMap(SeparateChainingHashMap[K, V]):
102 |     chain: type[LinkedList[K, V]] = LinkedList
103 | 
104 | 
105 | class BSTHashMap(SeparateChainingHashMap[Comp_K, V]):
106 |     chain: type[BinarySearchTree[Comp_K, V]] = BinarySearchTree
107 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/amirsoroush/Python_Hashmaps/db48dd538e9860a3257764db399fccefc99fc1dd/tests/__init__.py


--------------------------------------------------------------------------------
/tests/base_test_file.py:
--------------------------------------------------------------------------------
  1 | # type: ignore
  2 | # ruff: noqa
  3 | import unittest
  4 | from typing import TYPE_CHECKING
  5 | 
  6 | from src.pyhashmaps.base import BaseHashMap
  7 | 
  8 | base = unittest.TestCase if TYPE_CHECKING else object
  9 | 
 10 | 
 11 | class BaseTestCase(base):
 12 |     cls: BaseHashMap
 13 | 
 14 |     def test_constructor(self):
 15 |         hashmap1 = self.cls({"a": 10, "b": 20, "c": 30})
 16 |         hashmap2 = self.cls([("a", 10), ("b", 20), ("c", 30)])
 17 |         self.assertEqual(hashmap1.items(), {("a", 10), ("b", 20), ("c", 30)})
 18 |         self.assertEqual(hashmap2.items(), {("a", 10), ("b", 20), ("c", 30)})
 19 | 
 20 |     def test_add_item(self):
 21 |         hashmap = self.cls()
 22 |         hashmap["foo"] = 10
 23 |         hashmap["bar"] = 20
 24 |         self.assertEqual(hashmap["foo"], 10)
 25 |         self.assertEqual(hashmap["bar"], 20)
 26 |         with self.assertRaises(TypeError):
 27 |             hashmap[{}] = 10
 28 | 
 29 |     def test_not_existed_item(self):
 30 |         self.assertIsNone(self.cls().get("foo"))
 31 | 
 32 |     def test_delete_item(self):
 33 |         hashmap = self.cls()
 34 |         hashmap["foo"] = 10
 35 |         self.assertEqual(hashmap["foo"], 10)
 36 |         del hashmap["foo"]
 37 |         self.assertIsNone(hashmap.get("foo"))
 38 | 
 39 |     def test_len(self):
 40 |         hashmap = self.cls(initial_size=15)
 41 |         for i in range(10):
 42 |             hashmap[str(i)] = i
 43 |         self.assertEqual(len(hashmap), 10)
 44 |         del hashmap["4"]
 45 |         self.assertEqual(len(hashmap), 9)
 46 |         hashmap["5"] = None
 47 |         self.assertEqual(len(hashmap), 9)
 48 |         del hashmap["9"]
 49 |         self.assertEqual(len(hashmap), 8)
 50 |         del hashmap["0"]
 51 |         self.assertEqual(len(hashmap), 7)
 52 |         hashmap.clear()
 53 |         self.assertEqual(len(hashmap), 0)
 54 | 
 55 |     def test_same_key(self):
 56 |         hashmap = self.cls()
 57 |         hashmap["foo"] = 10
 58 |         self.assertEqual(hashmap["foo"], 10)
 59 |         hashmap["foo"] = 20
 60 |         self.assertEqual(hashmap["foo"], 20)
 61 |         self.assertEqual(len(hashmap), 1)
 62 | 
 63 |     def test_contain(self):
 64 |         hashmap = self.cls()
 65 |         hashmap["foo"] = 10
 66 |         self.assertTrue("foo" in hashmap)
 67 |         self.assertFalse("bar" in hashmap)
 68 | 
 69 |     def test_iteration(self):
 70 |         hashmap = self.cls()
 71 |         for i in range(10):
 72 |             hashmap[i] = i * 2
 73 |         self.assertEqual(set(hashmap), set(range(10)))
 74 |         self.assertEqual(set(hashmap.values()), set(i * 2 for i in range(10)))
 75 | 
 76 |     def test_large_number_of_items(self):
 77 |         hashmap = self.cls()
 78 |         n = 1000
 79 |         for i in range(n):
 80 |             hashmap[f"_{i}_"] = i
 81 |         self.assertEqual(len(hashmap), n)
 82 |         for i in range(0, n, 2):
 83 |             del hashmap[f"_{i}_"]
 84 |         self.assertEqual(len(hashmap), n // 2)
 85 | 
 86 |     def test_repr(self):
 87 |         hashmap = self.cls()
 88 |         class_name = hashmap.__class__.__name__
 89 |         self.assertEqual(repr(hashmap), f"{class_name}({{}})")
 90 |         hashmap["A"] = 30
 91 |         self.assertEqual(repr(hashmap), f"{class_name}({{'A': 30}})")
 92 | 
 93 |     def test_equality(self):
 94 |         hashmap = self.cls({"a": 10, "b": 20, "c": 30})
 95 |         dictionary = {"a": 10, "b": 20, "c": 30}
 96 |         self.assertEqual(hashmap, dictionary)
 97 | 
 98 |     def test_accidental_same_slot(self):
 99 |         """
100 |         hash functions can accidentally collide in a same bucket. This test
101 |         reveals this situation specifically in LinearProbingHashMap and
102 |         QuadraticProbingHashMap.
103 |         """
104 | 
105 |         class A:
106 |             def __init__(self, var):
107 |                 self.var = var
108 | 
109 |             def __lt__(self, other):
110 |                 if isinstance(other, A):
111 |                     return self.var < other.var
112 |                 return NotImplemented
113 | 
114 |             def __hash__(self) -> int:
115 |                 return hash(self.var)
116 | 
117 |             def __eq__(self, other: object) -> bool:
118 |                 if isinstance(other, A):
119 |                     return self.var == other.var
120 |                 return NotImplemented
121 | 
122 |         hashmap = self.cls(initial_size=10)
123 | 
124 |         obj = A(11)
125 |         # The A(11) object is placed into the second slot in the ten slots.
126 |         hashmap[obj] = "something1"
127 | 
128 |         # A(21) also wants to go to the second slot in the ten slots.
129 |         obj.var = 21
130 |         hashmap[obj] = "something2"
131 |         self.assertEqual(len(hashmap), 2)
132 | 


--------------------------------------------------------------------------------
/tests/test_open_addressing_hashmap.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | # ruff: noqa
 3 | import unittest
 4 | 
 5 | from src.pyhashmaps.open_addressing import (
 6 |     DoubleHashingHashMap,
 7 |     LinearProbingHashMap,
 8 |     QuadraticProbingHashMap,
 9 | )
10 | 
11 | from .base_test_file import BaseTestCase
12 | 
13 | 
14 | class TestOpenAddressingHashMap(BaseTestCase):
15 |     cls = None
16 | 
17 |     def test_creation_valid(self):
18 |         self.cls()
19 |         self.cls(initial_size=100)
20 |         self.cls(resize_factor=0.7)
21 |         self.cls(initial_size=30, resize_factor=0.6)
22 | 
23 |     def test_creation_invalid(self):
24 |         self.assertRaises(ValueError, self.cls, initial_size=-2)
25 |         self.assertRaises(ValueError, self.cls, initial_size=0)
26 |         self.assertRaises(ValueError, self.cls, resize_factor=1.1)
27 | 
28 |     def test_resize(self):
29 |         hashmap = self.cls(initial_size=10, resize_factor=0.8)
30 |         self.assertEqual(hashmap.size, 10)
31 |         for i in range(8):
32 |             hashmap[i] = None
33 |         self.assertEqual(hashmap.size, 20)
34 |         self.assertEqual(len(hashmap), 8)
35 | 
36 | 
37 | class TestLinearProbingHashMap(TestOpenAddressingHashMap, unittest.TestCase):
38 |     cls = LinearProbingHashMap
39 | 
40 | 
41 | class TestQuadraticProbingHashMap(TestOpenAddressingHashMap, unittest.TestCase):
42 |     cls = QuadraticProbingHashMap
43 | 
44 | 
45 | class TestDoubleHashingHashMap(TestOpenAddressingHashMap, unittest.TestCase):
46 |     cls = DoubleHashingHashMap
47 | 


--------------------------------------------------------------------------------
/tests/test_separate_chaining_hashmap.py:
--------------------------------------------------------------------------------
 1 | # type: ignore
 2 | # ruff: noqa
 3 | import unittest
 4 | 
 5 | from src.pyhashmaps.separate_chaining import (
 6 |     BSTHashMap,
 7 |     DynamicArrayHashMap,
 8 |     LinkedListHashMap,
 9 | )
10 | 
11 | from .base_test_file import BaseTestCase
12 | 
13 | 
14 | class TestSeparateChainingHashmap(BaseTestCase):
15 |     cls = None
16 | 
17 |     def test_creation_valid(self):
18 |         self.cls()
19 |         self.cls(initial_size=100)
20 |         self.cls(initial_size=100, max_chain_size=5)
21 |         self.cls(max_chain_size=5)
22 | 
23 |     def test_creation_invalid(self):
24 |         self.assertRaises(ValueError, self.cls, initial_size=-2)
25 | 
26 |     def test_resize(self):
27 |         class A:
28 |             __hash__ = lambda self: 0
29 | 
30 |         hashmap = self.cls(initial_size=5, max_chain_size=3)
31 |         self.assertEqual(hashmap.size, 5)
32 |         for i in range(3):
33 |             hashmap[A()] = i
34 |         self.assertEqual(hashmap.size, 5 * 2)
35 | 
36 | 
37 | class TestDynamicArrayHashMap(TestSeparateChainingHashmap, unittest.TestCase):
38 |     cls = DynamicArrayHashMap
39 | 
40 | 
41 | class TestLinkedListHashMap(TestSeparateChainingHashmap, unittest.TestCase):
42 |     cls = LinkedListHashMap
43 | 
44 | 
45 | class TestBSTHashMap(TestSeparateChainingHashmap, unittest.TestCase):
46 |     cls = BSTHashMap
47 | 
48 |     # This is an override since `BSTHashMap` needs `HashEntr`s to be comparable.
49 |     def test_resize(self):
50 |         class A:
51 |             def __hash__(self) -> int:
52 |                 return 0
53 | 
54 |             def __lt__(self, item) -> bool:
55 |                 return True
56 | 
57 |         hashmap = self.cls(initial_size=5, max_chain_size=3)
58 |         self.assertEqual(hashmap.size, 5)
59 |         for i in range(3):
60 |             hashmap[A()] = i
61 |         self.assertEqual(hashmap.size, 5 * 2)
62 | 


--------------------------------------------------------------------------------