├── setup.cfg ├── MANIFEST.in ├── .gitignore ├── .travis.yml ├── SECURITY.md ├── LICENSE ├── README.md ├── .github └── workflows │ └── test.yml ├── test_pickleshare.py ├── setup.py └── pickleshare.py /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include test_pickleshare.py 3 | exclude *.md 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/ 3 | /build/ 4 | /dist/ 5 | .pytest_cache/ 6 | *.egg-info/ 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | install: pip install -e . 3 | script: py.test 4 | 5 | python: 6 | - "pypy" 7 | - "pypy3" 8 | - "2.7" 9 | - "3.3" 10 | - "3.4" 11 | - "3.5" 12 | - "3.6" 13 | 14 | matrix: 15 | include: 16 | - python: "3.7" 17 | dist: xenial 18 | sudo: true 19 | - python: "3.8-dev" 20 | dist: xenial 21 | sudo: true 22 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | All Pickleshare security are handled via Jupyter/IPython security request security@ipython.org. 6 | You can find more information on the Jupyter website. https://jupyter.org/security 7 | 8 | ## Tidelift 9 | 10 | You can report security concerns for IPython via the [Tidelift platform](https://tidelift.com/security). 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Ville Vainio 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PickleShare - a small 'shelve' like datastore with concurrency support 2 | 3 | Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike shelve, 4 | many processes can access the database simultaneously. Changing a value in 5 | database is immediately visible to other processes accessing the same database. 6 | 7 | Concurrency is possible because the values are stored in separate files. Hence 8 | the "database" is a directory where *all* files are governed by PickleShare. 9 | 10 | Both python2 and python3 are supported. 11 | 12 | Example usage: 13 | 14 | ```python 15 | 16 | from pickleshare import * 17 | db = PickleShareDB('~/testpickleshare') 18 | db.clear() 19 | print("Should be empty:", db.items()) 20 | db['hello'] = 15 21 | db['aku ankka'] = [1,2,313] 22 | db['paths/are/ok/key'] = [1,(5,46)] 23 | print(db.keys()) 24 | ``` 25 | 26 | This module is certainly not ZODB, but can be used for low-load 27 | (non-mission-critical) situations where tiny code size trumps the 28 | advanced features of a "real" object database. 29 | 30 | Installation guide: 31 | 32 | ```sh 33 | pip install pickleshare 34 | ``` 35 | 36 | Or, if installing from source 37 | 38 | ```sh 39 | pip install . 40 | ``` 41 | 42 | 43 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - '*.x' 8 | # Run weekly on Monday at 1:23 UTC 9 | 10 | 11 | jobs: 12 | test: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest] 18 | python-version: ["3.12"] 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | cache: pip 26 | cache-dependency-path: | 27 | setup.cfg 28 | - name: Install and update Python dependencies (binary only) 29 | if: ${{ ! contains( matrix.python-version, 'dev' ) }} 30 | run: | 31 | python -m pip install --only-binary ':all:' --upgrade pip setuptools wheel build 32 | python -m pip install --only-binary ':all:' --upgrade check-manifest pytest-cov pytest-json-report 33 | - name: Try building with Python build 34 | run: | 35 | python -m build 36 | shasum -a 256 dist/* 37 | - name: Check manifest 38 | run: check-manifest 39 | - name: pytest 40 | env: 41 | COLUMNS: 120 42 | run: | 43 | pytest --color=yes -raXxs 44 | -------------------------------------------------------------------------------- /test_pickleshare.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | 4 | from pickleshare import PickleShareDB 5 | 6 | def test_pickleshare(tmpdir): 7 | db = PickleShareDB(tmpdir) 8 | db.clear() 9 | print("Should be empty:",db.items()) 10 | assert len(db) == 0 11 | db['hello'] = 15 12 | assert db['hello'] == 15 13 | db['aku ankka'] = [1,2,313] 14 | assert db['aku ankka'] == [1,2,313] 15 | db['paths/nest/ok/keyname'] = [1,(5,46)] 16 | assert db['paths/nest/ok/keyname'] == [1,(5,46)] 17 | 18 | db.hset('hash', 'aku', 12) 19 | db.hset('hash', 'ankka', 313) 20 | assert db.hget('hash', 'aku') == 12 21 | assert db.hget('hash', 'ankka') == 313 22 | 23 | print("all hashed",db.hdict('hash')) 24 | print(db.keys()) 25 | print(db.keys('paths/nest/ok/k*')) 26 | print(dict(db)) # snapsot of whole db 27 | db.uncache() # frees memory, causes re-reads later 28 | 29 | # shorthand for accessing deeply nested files 30 | lnk = db.getlink('myobjects/test') 31 | lnk.foo = 2 32 | lnk.bar = lnk.foo + 5 33 | assert lnk.bar == 7 34 | 35 | def test_stress(tmpdir): 36 | db = PickleShareDB(tmpdir) 37 | import time,sys 38 | for i in range(100): 39 | for j in range(500): 40 | if i % 15 == 0 and i < 70: 41 | if str(j) in db: 42 | del db[str(j)] 43 | continue 44 | 45 | if j%33 == 0: 46 | time.sleep(0.02) 47 | 48 | db[str(j)] = db.get(str(j), []) + [(i,j,"proc %d" % os.getpid())] 49 | db.hset('hash',j, db.hget('hash',j,15) + 1 ) 50 | 51 | print(i, end=' ') 52 | sys.stdout.flush() 53 | if i % 10 == 0: 54 | db.uncache() 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | # extract version from pickleshare.py 4 | # can't import because pickleshare depends on path.py 5 | with open('pickleshare.py') as f: 6 | for line in f: 7 | if line.startswith('__version__'): 8 | version = eval(line.split('=', 1)[1]) 9 | break 10 | 11 | setup( 12 | name="pickleshare", 13 | version=version, 14 | py_modules=['pickleshare'], 15 | author="Ville Vainio", 16 | author_email="vivainio@gmail.com", 17 | description="Tiny 'shelve'-like database with concurrency support", 18 | license="MIT", 19 | extras_require = { 20 | # Ugly, but we can't do < comparison here 21 | ':python_version in "2.6 2.7 3.2 3.3"': ['pathlib2'], 22 | }, 23 | url="https://github.com/ipython/pickleshare", 24 | keywords="database persistence pickle ipc shelve", 25 | long_description="""\ 26 | PickleShare - a small 'shelve' like datastore with concurrency support 27 | 28 | Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike shelve, 29 | many processes can access the database simultaneously. Changing a value in 30 | database is immediately visible to other processes accessing the same database. 31 | 32 | Concurrency is possible because the values are stored in separate files. Hence 33 | the "database" is a directory where *all* files are governed by PickleShare. 34 | 35 | Example usage:: 36 | 37 | from pickleshare import * 38 | db = PickleShareDB('~/testpickleshare') 39 | db.clear() 40 | print("Should be empty:",db.items()) 41 | db['hello'] = 15 42 | db['aku ankka'] = [1,2,313] 43 | db['paths/are/ok/key'] = [1,(5,46)] 44 | print(db.keys()) 45 | 46 | This module is certainly not ZODB, but can be used for low-load 47 | (non-mission-critical) situations where tiny code size trumps the 48 | advanced features of a "real" object database. 49 | 50 | Installation guide: pip install pickleshare 51 | """, 52 | classifiers=[ 53 | 'License :: OSI Approved :: MIT License', 54 | 'Programming Language :: Python :: 2', 55 | 'Programming Language :: Python :: 2.7', 56 | 'Programming Language :: Python :: 3', 57 | ] 58 | ) 59 | -------------------------------------------------------------------------------- /pickleshare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ PickleShare - a small 'shelve' like datastore with concurrency support 4 | 5 | Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike 6 | shelve, many processes can access the database simultaneously. Changing a 7 | value in database is immediately visible to other processes accessing the 8 | same database. 9 | 10 | Concurrency is possible because the values are stored in separate files. Hence 11 | the "database" is a directory where *all* files are governed by PickleShare. 12 | 13 | Example usage:: 14 | 15 | from pickleshare import * 16 | db = PickleShareDB('~/testpickleshare') 17 | db.clear() 18 | print "Should be empty:",db.items() 19 | db['hello'] = 15 20 | db['aku ankka'] = [1,2,313] 21 | db['paths/are/ok/key'] = [1,(5,46)] 22 | print db.keys() 23 | del db['aku ankka'] 24 | 25 | This module is certainly not ZODB, but can be used for low-load 26 | (non-mission-critical) situations where tiny code size trumps the 27 | advanced features of a "real" object database. 28 | 29 | Installation guide: pip install pickleshare 30 | 31 | Author: Ville Vainio 32 | License: MIT open source license. 33 | 34 | """ 35 | 36 | from __future__ import print_function 37 | 38 | 39 | __version__ = "0.7.5" 40 | 41 | try: 42 | from pathlib import Path 43 | except ImportError: 44 | # Python 2 backport 45 | from pathlib2 import Path 46 | 47 | import os,stat,time 48 | try: 49 | import collections.abc as collections_abc 50 | except ImportError: 51 | import collections as collections_abc 52 | try: 53 | import cPickle as pickle 54 | except ImportError: 55 | import pickle 56 | import errno 57 | import sys 58 | 59 | if sys.version_info[0] >= 3: 60 | string_types = (str,) 61 | else: 62 | string_types = (str, unicode) 63 | 64 | def gethashfile(key): 65 | return ("%02x" % abs(hash(key) % 256))[-2:] 66 | 67 | _sentinel = object() 68 | 69 | class PickleShareDB(collections_abc.MutableMapping): 70 | """ The main 'connection' object for PickleShare database """ 71 | def __init__(self,root): 72 | """ Return a db object that will manage the specied directory""" 73 | if not isinstance(root, string_types): 74 | root = str(root) 75 | root = os.path.abspath(os.path.expanduser(root)) 76 | self.root = Path(root) 77 | if not self.root.is_dir(): 78 | # catching the exception is necessary if multiple processes are concurrently trying to create a folder 79 | # exists_ok keyword argument of mkdir does the same but only from Python 3.5 80 | try: 81 | self.root.mkdir(parents=True) 82 | except OSError as e: 83 | if e.errno != errno.EEXIST: 84 | raise 85 | # cache has { 'key' : (obj, orig_mod_time) } 86 | self.cache = {} 87 | 88 | 89 | def __getitem__(self,key): 90 | """ db['key'] reading """ 91 | fil = self.root / key 92 | try: 93 | mtime = (fil.stat()[stat.ST_MTIME]) 94 | except OSError: 95 | raise KeyError(key) 96 | 97 | if fil in self.cache and mtime == self.cache[fil][1]: 98 | return self.cache[fil][0] 99 | try: 100 | # The cached item has expired, need to read 101 | with fil.open("rb") as f: 102 | obj = pickle.loads(f.read()) 103 | except: 104 | raise KeyError(key) 105 | 106 | self.cache[fil] = (obj,mtime) 107 | return obj 108 | 109 | def __setitem__(self,key,value): 110 | """ db['key'] = 5 """ 111 | fil = self.root / key 112 | parent = fil.parent 113 | if parent and not parent.is_dir(): 114 | parent.mkdir(parents=True) 115 | # We specify protocol 2, so that we can mostly go between Python 2 116 | # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete. 117 | with fil.open('wb') as f: 118 | pickle.dump(value, f, protocol=2) 119 | try: 120 | self.cache[fil] = (value, fil.stat().st_mtime) 121 | except OSError as e: 122 | if e.errno != errno.ENOENT: 123 | raise 124 | 125 | def hset(self, hashroot, key, value): 126 | """ hashed set """ 127 | hroot = self.root / hashroot 128 | if not hroot.is_dir(): 129 | hroot.mkdir() 130 | hfile = hroot / gethashfile(key) 131 | d = self.get(hfile, {}) 132 | d.update( {key : value}) 133 | self[hfile] = d 134 | 135 | 136 | 137 | def hget(self, hashroot, key, default = _sentinel, fast_only = True): 138 | """ hashed get """ 139 | hroot = self.root / hashroot 140 | hfile = hroot / gethashfile(key) 141 | 142 | d = self.get(hfile, _sentinel ) 143 | #print "got dict",d,"from",hfile 144 | if d is _sentinel: 145 | if fast_only: 146 | if default is _sentinel: 147 | raise KeyError(key) 148 | 149 | return default 150 | 151 | # slow mode ok, works even after hcompress() 152 | d = self.hdict(hashroot) 153 | 154 | return d.get(key, default) 155 | 156 | def hdict(self, hashroot): 157 | """ Get all data contained in hashed category 'hashroot' as dict """ 158 | hfiles = self.keys(hashroot + "/*") 159 | hfiles.sort() 160 | last = len(hfiles) and hfiles[-1] or '' 161 | if last.endswith('xx'): 162 | # print "using xx" 163 | hfiles = [last] + hfiles[:-1] 164 | 165 | all = {} 166 | 167 | for f in hfiles: 168 | # print "using",f 169 | try: 170 | all.update(self[f]) 171 | except KeyError: 172 | print("Corrupt",f,"deleted - hset is not threadsafe!") 173 | del self[f] 174 | 175 | self.uncache(f) 176 | 177 | return all 178 | 179 | def hcompress(self, hashroot): 180 | """ Compress category 'hashroot', so hset is fast again 181 | 182 | hget will fail if fast_only is True for compressed items (that were 183 | hset before hcompress). 184 | 185 | """ 186 | hfiles = self.keys(hashroot + "/*") 187 | all = {} 188 | for f in hfiles: 189 | # print "using",f 190 | all.update(self[f]) 191 | self.uncache(f) 192 | 193 | self[hashroot + '/xx'] = all 194 | for f in hfiles: 195 | p = self.root / f 196 | if p.name == 'xx': 197 | continue 198 | p.unlink() 199 | 200 | 201 | 202 | def __delitem__(self,key): 203 | """ del db["key"] """ 204 | fil = self.root / key 205 | self.cache.pop(fil,None) 206 | try: 207 | fil.unlink() 208 | except OSError: 209 | # notfound and permission denied are ok - we 210 | # lost, the other process wins the conflict 211 | pass 212 | 213 | def _normalized(self, p): 214 | """ Make a key suitable for user's eyes """ 215 | return str(p.relative_to(self.root)).replace('\\','/') 216 | 217 | def keys(self, globpat = None): 218 | """ All keys in DB, or all keys matching a glob""" 219 | 220 | if globpat is None: 221 | files = self.root.rglob('*') 222 | else: 223 | files = self.root.glob(globpat) 224 | return [self._normalized(p) for p in files if p.is_file()] 225 | 226 | def __iter__(self): 227 | return iter(self.keys()) 228 | 229 | def __len__(self): 230 | return len(self.keys()) 231 | 232 | def uncache(self,*items): 233 | """ Removes all, or specified items from cache 234 | 235 | Use this after reading a large amount of large objects 236 | to free up memory, when you won't be needing the objects 237 | for a while. 238 | 239 | """ 240 | if not items: 241 | self.cache = {} 242 | for it in items: 243 | self.cache.pop(it,None) 244 | 245 | def waitget(self,key, maxwaittime = 60 ): 246 | """ Wait (poll) for a key to get a value 247 | 248 | Will wait for `maxwaittime` seconds before raising a KeyError. 249 | The call exits normally if the `key` field in db gets a value 250 | within the timeout period. 251 | 252 | Use this for synchronizing different processes or for ensuring 253 | that an unfortunately timed "db['key'] = newvalue" operation 254 | in another process (which causes all 'get' operation to cause a 255 | KeyError for the duration of pickling) won't screw up your program 256 | logic. 257 | """ 258 | 259 | wtimes = [0.2] * 3 + [0.5] * 2 + [1] 260 | tries = 0 261 | waited = 0 262 | while 1: 263 | try: 264 | val = self[key] 265 | return val 266 | except KeyError: 267 | pass 268 | 269 | if waited > maxwaittime: 270 | raise KeyError(key) 271 | 272 | time.sleep(wtimes[tries]) 273 | waited+=wtimes[tries] 274 | if tries < len(wtimes) -1: 275 | tries+=1 276 | 277 | def getlink(self,folder): 278 | """ Get a convenient link for accessing items """ 279 | return PickleShareLink(self, folder) 280 | 281 | def __repr__(self): 282 | return "PickleShareDB('%s')" % self.root 283 | 284 | 285 | 286 | class PickleShareLink: 287 | """ A shortdand for accessing nested PickleShare data conveniently. 288 | 289 | Created through PickleShareDB.getlink(), example:: 290 | 291 | lnk = db.getlink('myobjects/test') 292 | lnk.foo = 2 293 | lnk.bar = lnk.foo + 5 294 | 295 | """ 296 | def __init__(self, db, keydir ): 297 | self.__dict__.update(locals()) 298 | 299 | def __getattr__(self,key): 300 | return self.__dict__['db'][self.__dict__['keydir']+'/' + key] 301 | def __setattr__(self,key,val): 302 | self.db[self.keydir+'/' + key] = val 303 | def __repr__(self): 304 | db = self.__dict__['db'] 305 | keys = db.keys( self.__dict__['keydir'] +"/*") 306 | return "" % ( 307 | self.__dict__['keydir'], 308 | ";".join([Path(k).basename() for k in keys])) 309 | 310 | def main(): 311 | import textwrap 312 | usage = textwrap.dedent("""\ 313 | pickleshare - manage PickleShare databases 314 | 315 | Usage: 316 | 317 | pickleshare dump /path/to/db > dump.txt 318 | pickleshare load /path/to/db < dump.txt 319 | pickleshare test /path/to/db 320 | """) 321 | DB = PickleShareDB 322 | import sys 323 | if len(sys.argv) < 2: 324 | print(usage) 325 | return 326 | 327 | cmd = sys.argv[1] 328 | args = sys.argv[2:] 329 | if cmd == 'dump': 330 | if not args: args= ['.'] 331 | db = DB(args[0]) 332 | import pprint 333 | pprint.pprint(db.items()) 334 | elif cmd == 'load': 335 | cont = sys.stdin.read() 336 | db = DB(args[0]) 337 | data = eval(cont) 338 | db.clear() 339 | for k,v in db.items(): 340 | db[k] = v 341 | elif cmd == 'testwait': 342 | db = DB(args[0]) 343 | db.clear() 344 | print(db.waitget('250')) 345 | elif cmd == 'test': 346 | test() 347 | stress() 348 | 349 | if __name__== "__main__": 350 | main() 351 | 352 | 353 | --------------------------------------------------------------------------------