├── py_skiplist ├── __init__.py ├── iterators.py └── skiplist.py ├── tests ├── __init__.py ├── dev_tests.py └── functional_tests.py ├── .coveralls.yml ├── .travis.yml ├── LICENSE └── README.md /py_skiplist/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'azhukov' 2 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-pro 2 | repo_token: YA4qSAUPVb198pKbytduNmDxYvxAZLp4z -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | python: 4 | - "3.7" 5 | 6 | install: "pip install pep8 coveralls" 7 | before_script: 8 | - "pep8 --ignore=E501 py_skiplist/" 9 | # command to run tests 10 | script: coverage run --source py_skiplist -m unittest discover tests "*_tests.py" 11 | 12 | after_success: 13 | coveralls 14 | -------------------------------------------------------------------------------- /py_skiplist/iterators.py: -------------------------------------------------------------------------------- 1 | from itertools import dropwhile, count, repeat 2 | import random 3 | 4 | 5 | def geometric(p): 6 | return (next(dropwhile(lambda _: random.randint(1, int(1. / p)) == 1, count())) for _ in repeat(1)) 7 | 8 | 9 | # Simple deterministic distribution for testing internals of the skiplist. 10 | uniform = repeat 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2018 Alexander Zhukov 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /tests/dev_tests.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import unittest 3 | 4 | from py_skiplist.skiplist import Skiplist, NIL 5 | from py_skiplist.skiplist import geometric 6 | from py_skiplist.iterators import uniform 7 | 8 | 9 | class DistributionTestCase(unittest.TestCase): 10 | def test_geometric(self): 11 | p = 0.5 12 | g = geometric(p) 13 | expected = [p**i for i in range(1, 5)] 14 | sample = [next(g) for _ in range(10000)] 15 | actual = [float(sum(1 for n in sample if n == t)) / len(sample) for t in range(10)] 16 | self.assertAlmostEqual(1, sum(i for i in actual), delta=0.01) 17 | self.assertAlmostEqual(0, sum(i - j for i, j in zip(expected, actual)), delta=0.01) 18 | 19 | 20 | class DataStructTestCase(unittest.TestCase): 21 | def test_nil_always_false(self): 22 | self.assertFalse(NIL()) 23 | 24 | 25 | class PropertiesTestCase(unittest.TestCase): 26 | def test_sorted(self): 27 | sl = Skiplist() 28 | sl.distribution = uniform(2) 29 | import random 30 | l = [random.randint(1, 78) for i in range(10)] 31 | for i in l: 32 | sl[i] = i 33 | for level in range(len(sl.head.nxt)): 34 | self.assertEqual(sorted(set(l)), [node.key for node in sl._level(sl.head.nxt[-1], level)]) 35 | 36 | -------------------------------------------------------------------------------- /tests/functional_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import collections 3 | from py_skiplist.iterators import uniform 4 | 5 | from py_skiplist.skiplist import Skiplist 6 | 7 | 8 | class InterfaceTestCase(unittest.TestCase): 9 | def test_interface_methods_set(self): 10 | self.assertTrue(issubclass(Skiplist, collections.MutableMapping), 11 | msg='Skiplist should alway implement the MutableMapping interface') 12 | 13 | def test_get(self): 14 | sl = Skiplist(foo='bar') 15 | self.assertEqual(sl.get('foo'), 'bar') 16 | self.assertEqual(sl.get('None', 'baz'), 'baz') 17 | self.assertIsNone(sl.get('Nothing')) 18 | 19 | def test_contains(self): 20 | sl = Skiplist(one=1) 21 | self.assertIn('one', sl) 22 | self.assertNotIn('two', sl) 23 | 24 | def test_pop(self): 25 | sl = Skiplist(john='Snow') 26 | self.assertEqual(sl.pop('john'), 'Snow') 27 | self.assertRaises(KeyError, lambda: sl.pop('Sansa')) 28 | 29 | def test_iteritems(self): 30 | sl = Skiplist(one=1, two=2) 31 | self.assertListEqual(sorted([('one', 1), ('two', 2)]), 32 | sorted(sl.iteritems())) 33 | 34 | 35 | class SkipListTestCase(unittest.TestCase): 36 | 37 | def test_insert(self): 38 | sl = Skiplist() 39 | sl._insert(1, 1) 40 | e = sl[1] 41 | self.assertEqual(e, 1) 42 | 43 | def test_update(self): 44 | sl = Skiplist() 45 | sl['foo'] = 'bar' 46 | self.assertEqual(sl['foo'], 'bar') 47 | sl['foo'] = 'baz' 48 | self.assertEqual(sl['foo'], 'baz') 49 | 50 | def test_remove(self): 51 | sl = Skiplist() 52 | sl['what'] = 'that' 53 | self.assertTrue(sl['what']) 54 | del sl['what'] 55 | self.assertRaises(KeyError, lambda: sl['what']) 56 | self.assertRaises(KeyError, lambda: sl._remove('not here')) 57 | 58 | def test_init(self): 59 | sl = Skiplist(a=1, b=2) 60 | self.assertEqual(sl['a'], 1) 61 | self.assertEqual(sl['b'], 2) 62 | self.assertEqual(len(sl), 2) 63 | 64 | def test_str(self): 65 | sl = Skiplist() 66 | self.assertEqual('skiplist({})', str(sl)) 67 | sl['1'] = 1 68 | self.assertEqual('skiplist({1: 1})', str(sl)) 69 | 70 | def test_589(self): 71 | sl = Skiplist() 72 | sl.distribution = uniform(2) 73 | sl[10] = 10 74 | sl[2] = 2 75 | sl[3] = 3 76 | self.assertTrue(True) 77 | 78 | if __name__ == '__main__': 79 | unittest.main() 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | skiplist-python 2 | =============== 3 | 4 | Pure python implementation of a skiplist data structure. 5 | 6 | [![Build Status](https://travis-ci.org/ZhukovAlexander/py-skiplist.svg?branch=master)](https://travis-ci.org/ZhukovAlexander/py-skiplist) 7 | [![Coverage Status](https://coveralls.io/repos/ZhukovAlexander/py-skiplist/badge.svg?branch=master&service=github)](https://coveralls.io/github/ZhukovAlexander/py-skiplist?branch=master) 8 | 9 | Intro 10 | ----- 11 | Skip lists are a data structure that can be used in place 12 | of balanced trees. Skip lists use probabilistic balancing 13 | rather than strictly enforced balancing and as a result 14 | the algorithms for insertion and deletion in skip lists 15 | are much simpler and significantly faster than equivalent 16 | algorithms for balanced trees. 17 | 18 | Skip lists are balanced by consulting a random number 19 | generator. Although skip lists have bad worst-case 20 | performance, no input sequence consistently produces the 21 | worst-case performance (much like quicksort when the pivot 22 | element is chosen randomly). 23 | 24 | Example usage 25 | ------------- 26 | 27 | ```python 28 | >>>sl = Skiplist(foo='bar', 'spam'='eggs') 29 | >>>sl 30 | 'skiplist({"foo": "bar", "spam": "eggs"})' 31 | >>> 32 | >>>sl['foo'] 33 | 'bar' 34 | >>> 35 | >>>sl['foo'] = 'baz' 36 | >>>sl['foo'] 37 | 'baz' 38 | >>> 39 | >>>'spam' in sl 40 | True 41 | >>> 42 | >>>del sl['spam'] 43 | >>>sl 44 | 'skiplist({"foo": "bar"})' 45 | ``` 46 | 47 | Skip List Structure 48 | -------------------- 49 | Each element is represented by a node, the level of 50 | which is chosen randoml when the node is inserted 51 | without regard for the number of elements in the 52 | data structure. A `level i` node has `i` forward 53 | pointers, indexed 1 through `i`. There is no need 54 | to store the level of a node in the node. Levels 55 | are capped at some appropriate constant `MaxLevel`. 56 | The *level of a list* is the maximum level currently 57 | in the list (or 1 if the list if empty). The `header` 58 | of a list has forward pointers at levels one through 59 | `MaxLevel`. The forward pointers of the header at 60 | levels higher than the current maximum level of the 61 | list point to `NIL`. 62 | 63 | Skip List Algoritms 64 | =================== 65 | Skip list operations are analogous to that of a binary 66 | tree. They include: **search**, **insert**, 67 | and **delete**. Note that skip lists are easily 68 | extendable to support operations like "find the minimum key" or "find the next key". 69 | 70 | Initialization 71 | -------------- 72 | An element `NIL` is allocated and given a key 73 | greater than any legal key. All levels of all 74 | skip lists are terminated with `NIL`. A new list 75 | has level 1 and all forward pointers of the list's 76 | header point to `NIL`. 77 | 78 | Search Algorithm 79 | ----------------- 80 | Search works by traversing forward pointers 81 | that do not overshoot the node containing the element 82 | being searched for. When no more progress can be 83 | made at the current level of forward pointers, the 84 | search moves down to the next level. When we can make 85 | no more progress at level 1, we must be in front 86 | of the node that contains the desired element (if 87 | it is in the list). 88 | 89 | At what level should the search be started? William's 90 | analysis suggests that ideally we should start 91 | at level `L` where we expect `log_{1/p}n` where 92 | `n` is the number of elements in the list and 93 | `p` is the fraction of nodes in level `i` that 94 | also have level `i+1` pointers. Starting a search 95 | at the maximum level in the list does not add more 96 | than a small constant to the expected search time. 97 | 98 | Insertion and Deletion Algorithm 99 | -------------------------------- 100 | To insert or delete a node, we simply search and 101 | splice. A vector `update` is maintained so that when 102 | the search is complete, `update[i]` contains a pointer 103 | to the rightmost node of level `i`. The new node 104 | is of a random level. 105 | If the insertion generates a node with a greater level 106 | than the previous maximum, both *Maxlevel* 107 | and the appropriate portions of the update vector 108 | are updated. After each deletion, we check if we have 109 | deleted the maximum element of the list and if so, 110 | decrease the maximum level of the list. 111 | 112 | **References** 113 | 114 | * [*Skip list wikipedea article*](http://en.wikipedia.org/wiki/Skip_list), 115 | * [*A Skip List Cookbook*](http://cg.scs.carleton.ca/~morin/teaching/5408/refs/p90b.pdf) by William Pugh 116 | * [Skip List vs. Binary Search Tree on Stack Overflow 117 | ](http://stackoverflow.com/questions/256511/skip-list-vs-binary-tree) 118 | * [A Provably Correct Scalable Concurrent Skip 119 | List Whitepaper](http://www.cs.tau.ac.il/~shanir/nir-pubs-web/Papers/OPODIS2006-BA.pdf) 120 | * [Purely functional concurrent skip list 121 | on Stack Overflow](http://stackoverflow.com/questions/3489560/purely-functional-concurrent-skip-list) 122 | * [http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_skip.aspx](http://eternallyconfuzzled.com/tuts/datastructures/jsw_tut_skip.aspx) 123 | 124 | -------------------------------------------------------------------------------- /py_skiplist/skiplist.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod, abstractproperty 2 | from contextlib import contextmanager 3 | from math import log 4 | 5 | import collections 6 | from itertools import chain, takewhile, dropwhile 7 | from threading import Lock 8 | 9 | from .iterators import geometric 10 | 11 | 12 | class NIL(object): 13 | """Sentinel object that always compares greater than another object""" 14 | __slots__ = () 15 | 16 | def __cmp__(self, other): 17 | # NIL is always greater than the other 18 | return 1 19 | 20 | def __lt__(self, other): 21 | return False 22 | 23 | def __le__(self, other): 24 | return False 25 | 26 | def __ge__(self, other): 27 | return True 28 | 29 | def __str__(self): 30 | return 'NIL' 31 | 32 | def __nonzero__(self): 33 | return False 34 | 35 | def __bool__(self): 36 | return False 37 | 38 | 39 | class _Skipnode(object): 40 | __slots__ = ('data', 'nxt', 'key', 'prev', 'height') 41 | 42 | def __init__(self, key, data, nxt, prev): 43 | self.key = key 44 | self.data = data 45 | self.nxt = nxt 46 | self.prev = prev 47 | 48 | for level in range(len(prev)): 49 | prev[level].nxt[level] = self.nxt[level].prev[level] = self 50 | 51 | 52 | class LockableArray(list): 53 | def __init__(self, seq=()): 54 | super(LockableArray, self).__init__(seq) 55 | self._lock = Lock() 56 | 57 | @contextmanager 58 | def lock(self): 59 | try: 60 | yield self._lock.acquire() 61 | finally: 62 | self._lock.release() 63 | 64 | 65 | class SkiplistAbstractBase: 66 | __metaclass__ = ABCMeta 67 | """Class for randomized indexed skip list. The default 68 | distribution of node heights is geometric.""" 69 | 70 | distribution = geometric(0.5) 71 | 72 | @abstractproperty 73 | def head(self): 74 | raise NotImplementedError 75 | 76 | @abstractproperty 77 | def tail(self): 78 | raise NotImplementedError 79 | 80 | def _height(self): 81 | return len(self.head.nxt) 82 | 83 | def _level(self, start=None, level=0): 84 | node = start or self.head.nxt[level] 85 | while node is not self.tail: 86 | yield node 87 | node = node.nxt[level] 88 | 89 | def _scan(self, key): 90 | return_value = None 91 | height = len(self.head.nxt) 92 | prevs = LockableArray([self.head] * height) 93 | node = self.head.nxt[-1] 94 | for level in reversed(range(height)): 95 | node = next( 96 | dropwhile( 97 | lambda node_: node_.nxt[level].key <= key, 98 | chain([self.head], self._level(node, level)) 99 | ) 100 | ) 101 | if node.key == key: 102 | return_value = node 103 | else: 104 | prevs[level] = node 105 | 106 | return return_value, prevs 107 | 108 | def _insert(self, key, data): 109 | """Inserts data into appropriate position.""" 110 | 111 | node, update = self._scan(key) 112 | 113 | if node: 114 | node.data = data 115 | return 116 | 117 | node_height = next(self.distribution) + 1 # because height should be positive non-zero 118 | # if node's height is greater than number of levels 119 | # then add new levels, if not do nothing 120 | height = len(self.head.nxt) 121 | 122 | update.extend([self.head for _ in range(height, node_height)]) 123 | 124 | self.head.nxt.extend([self.tail for _ in range(height, node_height)]) 125 | 126 | self.tail.prev.extend([self.head for _ in range(height, node_height)]) 127 | 128 | new_node = _Skipnode(key, data, [update[l].nxt[l] for l in range(node_height)], [update[l] for l in range(node_height)]) 129 | 130 | def _remove(self, key): 131 | """Removes node with given data. Raises KeyError if data is not in list.""" 132 | 133 | node, update = self._scan(key) 134 | if not node: 135 | raise KeyError 136 | 137 | with update.lock(): 138 | for level in range(len(node.nxt)): 139 | update[level].nxt[level] = node.nxt[level] 140 | 141 | # trim not used head pointers 142 | for i in reversed(range(len(self.head.nxt))): 143 | if self.head.nxt[i] != self.tail: 144 | break 145 | elif i > 0: # at least one pointer 146 | head_node = self.head.nxt.pop() 147 | del head_node 148 | 149 | del node 150 | 151 | 152 | class Skiplist(SkiplistAbstractBase, collections.MutableMapping): 153 | 154 | def _remove(self, key): 155 | super(Skiplist, self)._remove(key) 156 | self._size -= 1 157 | 158 | def _insert(self, key, data): 159 | super(Skiplist, self)._insert(key, data) 160 | self._size += 1 161 | 162 | @property 163 | def head(self): 164 | return self._head 165 | 166 | @property 167 | def tail(self): 168 | return self._tail 169 | 170 | def __init__(self, **kwargs): 171 | super(Skiplist, self).__init__() 172 | 173 | self._tail = _Skipnode(NIL(), None, [], []) 174 | self._head = _Skipnode(None, 'HEAD', [self.tail], []) 175 | self._tail.prev.extend([self.head]) 176 | 177 | self._size = 0 178 | 179 | for k, v in kwargs.items(): 180 | self[k] = v 181 | 182 | def __len__(self): 183 | return self._size 184 | 185 | def __str__(self): 186 | return 'skiplist({{{}}})'.format( 187 | ', '.join('{key}: {value}'.format(key=node.key, value=node.data) for node in self._level()) 188 | ) 189 | 190 | def __getitem__(self, key): 191 | """Returns item with given index""" 192 | node, _ = self._scan(key) 193 | if node is None: 194 | raise KeyError('Key <{0}> not found'.format(key)) 195 | return node.data 196 | 197 | def __setitem__(self, key, value): 198 | return self._insert(key, value) 199 | 200 | def __delitem__(self, key): 201 | self._remove(key) 202 | 203 | def __iter__(self): 204 | """Iterate over keys in sorted order""" 205 | return (node.key for node in self._level()) 206 | 207 | def iteritems(self): 208 | return ((node.key, node.data) for node in self._level()) 209 | 210 | def iterkeys(self): 211 | return (item[0] for item in self.iteritems()) 212 | 213 | def itervalues(self): 214 | return (item[1] for item in self.iteritems()) 215 | --------------------------------------------------------------------------------