├── h5pyswmr
    ├── test
    │   ├── __init__.py
    │   ├── test_parallel.py
    │   ├── test_api.py
    │   └── test_locks.py
    ├── __init__.py
    ├── exithandler.py
    ├── h5pyswmr.py
    └── locking.py
├── .gitignore
├── MANIFEST.in
├── util
    ├── redis_delkeys.py
    └── redis_showkeys.py
├── setup.py
├── CHANGES.txt
├── misc
    └── sigterm.py
├── LICENSE
└── README.md


/h5pyswmr/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .*
2 | !/.gitignore
3 | *.pyc
4 | venv*
5 | build*
6 | dist*
7 | *.egg*
8 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include *.md
3 | include *.rst
4 | recursive-include docs *.txt
5 | 


--------------------------------------------------------------------------------
/h5pyswmr/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | A drop-in replacement for the h5py library. Allows "single write multiple
 5 | read" (SWMR) access to hdf5 files.
 6 | """
 7 | 
 8 | from __future__ import absolute_import
 9 | 
10 | __version__ = "0.3.3"
11 | 
12 | try:
13 |     from h5pyswmr.h5pyswmr import File, Node, Dataset, Group
14 |     from h5pyswmr.test import test_api, test_locks, test_parallel
15 | except ImportError:
16 |     # imports fail during setup.py
17 |     pass
18 | 
19 | 
20 | def test():
21 |     test_locks.run()
22 |     test_parallel.run()
23 | 


--------------------------------------------------------------------------------
/util/redis_delkeys.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Deletes all redis keys and values. Use with care!!
 5 | """
 6 | 
 7 | import time
 8 | import sys
 9 | import os
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     # add parent directory to python path such that we can import modules
14 |     HERE = os.path.dirname(os.path.realpath(__file__))
15 |     PROJ_PATH = os.path.abspath(os.path.join(HERE, '../'))
16 |     sys.path.insert(0, PROJ_PATH)
17 | 
18 |     from h5pyswmr.locking import redis_conn as r
19 | 
20 |     for key in sorted(r.keys()):
21 |         try:
22 |             print('Deleting {0}'.format(key))
23 |             del r[key]
24 |         except KeyError:
25 |             pass
26 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Setup script
 5 | """
 6 | import io
 7 | import os
 8 | 
 9 | try:
10 |     from setuptools import setup
11 | except ImportError:
12 |     from distutils.core import setup
13 | 
14 | from h5pyswmr import __version__
15 | 
16 | here = os.path.abspath(os.path.dirname(__file__))
17 | 
18 | # Get the long description from the README file
19 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
20 |     long_description = f.read()
21 | 
22 | setup(
23 |     name='h5pySWMR',
24 |     version=__version__,
25 |     author='Meteotest',
26 |     packages=['h5pyswmr', 'h5pyswmr.test'],
27 |     license='LICENSE.txt',
28 |     long_description=long_description,
29 |     install_requires=[
30 |         "cython>= 0.23.0",
31 |         "h5py >= 2.5.0",
32 |         "redis >= 2.10.3"
33 |     ]
34 | )
35 | 


--------------------------------------------------------------------------------
/util/redis_showkeys.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Display all redis keys and values
 5 | """
 6 | 
 7 | import time
 8 | import sys
 9 | import os
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     # add parent directory to python path such that we can import modules
14 |     HERE = os.path.dirname(os.path.realpath(__file__))
15 |     PROJ_PATH = os.path.abspath(os.path.join(HERE, '../'))
16 |     sys.path.insert(0, PROJ_PATH)
17 | 
18 |     from h5pyswmr.locking import redis_conn as r
19 | 
20 |     while(True):
21 |         sys.stderr.write("\x1b[2J\x1b[H")
22 |         print("Redis server keys and values:")
23 |         print("=============================")
24 |         for key in sorted(r.keys()):
25 |             try:
26 |                 print('{0}\t{1}'.format(key, r[key]))
27 |             except KeyError:
28 |                 pass
29 |         time.sleep(0.1)
30 | 
31 | 


--------------------------------------------------------------------------------
/CHANGES.txt:
--------------------------------------------------------------------------------
 1 | Version 0.3.3
 2 | -------------
 3 | 
 4 | * Fixed bug in Node.attrs.get()
 5 | 
 6 | 
 7 | Version 0.3.2
 8 | -------------
 9 | 
10 | 2017-02-20
11 | 
12 | Changes:
13 | 
14 | * Allow multiple handlers for SIGTERM
15 | 
16 | 
17 | Version 0.3.1
18 | -------------
19 | 
20 | 2015-10-20
21 | 
22 | Changes:
23 | 
24 | * Minor modification to setup.py such that installation of requirements
25 |   is done automatically.
26 | 
27 | Version 0.3
28 | -----------
29 | 
30 | 2015-06-07
31 | 
32 | Bug fixes:
33 | 
34 | * Readers/writer synchronization is now more robust against unexpected process
35 |   termination.
36 | 
37 | Features:
38 | 
39 | * improved documentation of readers/writer synchronization
40 | 
41 | 
42 | Version 0.2
43 | -----------
44 | 
45 | Bug fixes:
46 | 
47 | * Library now processes SIGTERM signals to clean up locks.
48 | * Small improvements have been made to unit tests.
49 | * Code has been made more PEP8 compliant.
50 | 
51 | Features:
52 | 
53 | None
54 | 
55 | 
56 | Version 0.1
57 | -----------
58 | 
59 | 2014-11-27 -- Initial release.
60 | 


--------------------------------------------------------------------------------
/misc/sigterm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import atexit
 4 | import sys
 5 | import time
 6 | import os
 7 | import signal
 8 | 
 9 | # This is executed only with handle_exit()
10 | # @atexit.register
11 | # def cleanup():
12 | #     # ==== XXX ====
13 | #     # this never gets called
14 | #     print "exiting"
15 | 
16 | def main():
17 |     print("starting")
18 |     time.sleep(1)
19 |     print("committing suicide")
20 |     os.kill(os.getpid(), signal.SIGTERM)
21 |     print("not executed####")
22 | 
23 | if __name__ == '__main__':
24 |     # add ../.. directory to python path such that we can import the main
25 |     # module
26 |     HERE = os.path.dirname(os.path.realpath(__file__))
27 |     PROJ_PATH = os.path.abspath(os.path.join(HERE, '..'))
28 |     sys.path.insert(0, PROJ_PATH)
29 | 
30 |     from h5pyswmr.exithandler import handle_exit
31 | 
32 |     with handle_exit():
33 |         try:
34 |             main()
35 |         except (KeyboardInterrupt, SystemExit):
36 |             pass
37 |         finally:
38 |             # this gets called thanks to handle_exit()
39 |             print("cleanup")
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 METEOTEST Corp.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/h5pyswmr/test/test_parallel.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import unittest
  4 | import sys
  5 | import os
  6 | import multiprocessing
  7 | import tempfile
  8 | import time
  9 | import random
 10 | 
 11 | import numpy as np
 12 | 
 13 | if __name__ == '__main__':
 14 |     # add ../.. directory to python path such that we can import the main
 15 |     # module
 16 |     HERE = os.path.dirname(os.path.realpath(__file__))
 17 |     PROJ_PATH = os.path.abspath(os.path.join(HERE, '../..'))
 18 |     sys.path.insert(0, PROJ_PATH)
 19 | 
 20 | from h5pyswmr import File
 21 | 
 22 | 
 23 | class TestHDF5(unittest.TestCase):
 24 | 
 25 |     def setUp(self):
 26 |         self.shape = (8000, 1500)
 27 | 
 28 |     def test_parallel(self):
 29 |         """
 30 |         Test parallel read/write access
 31 |         """
 32 | 
 33 |         tmpdir = tempfile.gettempdir()
 34 | 
 35 |         NO_WORKERS = 40
 36 |         filename = os.path.join(tmpdir, 'paralleltest827348723.h5')
 37 |         f = File(filename, 'w')
 38 |         # create some datasets (to test reading)
 39 |         for i in range(NO_WORKERS):
 40 |             f.create_dataset(name='/testgrp/dataset{}'.format(i),
 41 |                              data=np.random.random(self.shape)
 42 |                              .astype(np.float32))
 43 | 
 44 |         def worker_read(i, hdf5file):
 45 |             """ reading worker """
 46 |             time.sleep(random.random())
 47 |             print("worker {0} is reading...".format(i))
 48 |             data = hdf5file['/testgrp/dataset{}'.format(i)][:]
 49 |             print("worker {0} is done reading.".format(i))
 50 |             self.assertEqual(data.shape, self.shape)
 51 | 
 52 |         def worker_write(i, hdf5file):
 53 |             """ writing worker """
 54 |             # do some reading
 55 |             # print(hdf5file.keys())
 56 |             # do some writing
 57 |             time.sleep(random.random())
 58 |             data = np.empty((4, self.shape[0], self.shape[1]), dtype=np.int32)
 59 |             data[:] = i*100
 60 |             # modify existing dataset
 61 |             dst = hdf5file['/testgrp/dataset{}'.format(i)]
 62 |             print("worker {0} is writing...".format(i))
 63 |             dst[0:50, ] = i
 64 |             print("worker {0} done writing.".format(i))
 65 | 
 66 |         jobs = []
 67 |         writers = []
 68 |         print("")
 69 |         for i in range(NO_WORKERS):
 70 |             if i % 4 == 0:
 71 |                 p = multiprocessing.Process(target=worker_write, args=(i, f))
 72 |                 writers.append(i)
 73 |             else:
 74 |                 p = multiprocessing.Process(target=worker_read, args=(i, f))
 75 |             jobs.append(p)
 76 |             p.start()
 77 |             # p.join()
 78 | 
 79 |         # wait until all processes have terminated
 80 |         while True:
 81 |             time.sleep(0.3)
 82 |             all_terminated = not max((job.is_alive() for job in jobs))
 83 |             if all_terminated:
 84 |                 break
 85 | 
 86 |         # then test if data was written correctly
 87 |         print("Testing if data was written correctly...")
 88 |         for i in writers:
 89 |             dst = f['/testgrp/dataset{}'.format(i)]
 90 |             self.assertTrue(np.all(dst[0:50, ] == i))
 91 | 
 92 |     def tearDown(self):
 93 |         pass
 94 | 
 95 | 
 96 | def run():
 97 |     suite = unittest.TestLoader().loadTestsFromTestCase(TestHDF5)
 98 |     unittest.TextTestRunner(verbosity=2).run(suite)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     run()
103 | 


--------------------------------------------------------------------------------
/h5pyswmr/test/test_api.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | Unit test for .attrs wrapper.
  5 | """
  6 | 
  7 | import unittest
  8 | import sys
  9 | import os
 10 | import tempfile
 11 | 
 12 | 
 13 | if __name__ == '__main__':
 14 |     # add ../.. directory to python path such that we can import the main
 15 |     # module
 16 |     HERE = os.path.dirname(os.path.realpath(__file__))
 17 |     PROJ_PATH = os.path.abspath(os.path.join(HERE, '../..'))
 18 |     sys.path.insert(0, PROJ_PATH)
 19 | 
 20 | from h5pyswmr import File
 21 | 
 22 | 
 23 | class TestAPI(unittest.TestCase):
 24 |     """
 25 |     Test h5pyswmr API
 26 |     """
 27 | 
 28 |     def setUp(self):
 29 |         tmpdir = tempfile.gettempdir()
 30 |         self.filename = os.path.join(tmpdir, 'test_attrs.h5')
 31 |         with File(self.filename, 'w') as f:
 32 |             print("created {0}.".format(self.filename))
 33 |             f.create_dataset(name='/bla', shape=(30, 30))
 34 | 
 35 |     def test_attrs(self):
 36 |         """
 37 |         Test .attrs property
 38 |         """
 39 |         attrs = {
 40 |             'bla': 3,
 41 |             'blu': 'asasdfsa'
 42 |         }
 43 |         with File(self.filename, 'w') as f:
 44 |             dst = f.create_dataset(name='/testgrp/dataset', shape=(30, 30))
 45 |             for key, value in attrs.items():
 46 |                 dst.attrs[key] = value
 47 | 
 48 |         with File(self.filename, 'r') as f:
 49 |             dst = f['/testgrp/dataset']
 50 |             self.assertIn('bla', dst.attrs)
 51 |             self.assertEqual(dst.attrs['bla'], attrs['bla'])
 52 |             for key in dst.attrs:
 53 |                 self.assertIn(key, attrs)
 54 | 
 55 |         # same test with a group
 56 |         with File(self.filename, 'a') as f:
 57 |             grp = f['/testgrp']
 58 |             grp.attrs['bla'] = 3
 59 |             dst = grp.create_dataset(name='dataset2', shape=(30, 30))
 60 |             self.assertIn('bla', grp.attrs)
 61 |             self.assertEqual(['bla'], grp.attrs.keys())
 62 |             self.assertEqual(grp.attrs['bla'], 3)
 63 | 
 64 |     # def test_visit(self):
 65 |     #     """
 66 |     #     Test visiting pattern
 67 |     #     """
 68 |     #     # create some groups and datasets
 69 |     #     with File(self.filename, 'a') as f:
 70 |     #         g1 = f.create_group('/a/b/g1')
 71 |     #         f.create_group('/a/b/g2')
 72 |     #         f.create_group('/a/b/g3')
 73 |     #         f.create_dataset(name='a/b/g1/dst1', shape=(30, 30))
 74 |     #         f.create_dataset(name='/a/b/g1/dst2', shape=(30, 30))
 75 |     #         f.create_dataset(name='/a/b/g2/dst1', shape=(30, 30))
 76 | 
 77 |     #     def foo(name):
 78 |     #         print(name)
 79 | 
 80 |     #     with File(self.filename, 'r') as f:
 81 |     #         f.visit(foo)
 82 | 
 83 |     # def test_visititems(self):
 84 |     #     """
 85 |     #     Test visititems() method
 86 |     #     """
 87 |     #     # create some groups and datasets
 88 |     #     with File(self.filename, 'a') as f:
 89 |     #         g1 = f.create_group('/a/b/g1')
 90 |     #         f.create_group('/a/b/g2')
 91 |     #         f.create_group('/a/b/g3')
 92 |     #         f.create_dataset(name='a/b/g1/dst1', shape=(30, 30))
 93 |     #         f.create_dataset(name='/a/b/g1/dst2', shape=(30, 30))
 94 |     #         f.create_dataset(name='/a/b/g2/dst1', shape=(30, 30))
 95 | 
 96 |     #     def foo(name, obj):
 97 |     #         print(name)
 98 |     #         print(obj)
 99 | 
100 |     #     with File(self.filename, 'r') as f:
101 |     #         f.visititems(foo)
102 | 
103 |     def test_items(self):
104 |         """
105 |         Test items() method
106 |         """
107 |         # create some groups and datasets
108 |         with File(self.filename, 'a') as f:
109 |             g1 = f.create_group('/a/b/g1')
110 |             f.create_group('/a/b/g2')
111 |             f.create_group('/a/b/g3')
112 |             f.create_dataset(name='a/b/g1/dst1', shape=(30, 30))
113 |             f.create_dataset(name='/a/b/g1/dst2', shape=(30, 30))
114 |             f.create_dataset(name='/a/b/g2/dst1', shape=(30, 30))
115 | 
116 |             for key, val in f.items():
117 |                 print(key, val)
118 |                 
119 |     def test_resize(self):
120 |         '''
121 |         Test Dataset.resize() method
122 |         '''
123 |         with File(self.filename, 'a') as f:
124 |             start_size = (10,20)
125 |             f.create_dataset(name='resizable',shape=start_size, maxshape=(50, 20))
126 |             dset = f['resizable']
127 |             new_size = (40, 20)
128 |             dset.resize(new_size)
129 |             self.assertEqual(dset.shape, new_size)
130 |             
131 | 
132 |     def tearDown(self):
133 |         # TODO remove self.filename
134 |         pass
135 | 
136 | 
137 | def run():
138 |     unittest.main()
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     run()
143 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ! ! ! NO LONGER MAINTAINED ! ! !
  2 | Check https://github.com/zarr-developers/zarr-python.
  3 | 
  4 | Parallel (single write multiple read) HDF5 for Python
  5 | =====================================================
  6 | 
  7 | v0.3
  8 | 
  9 | h5pySWMR is a drop-in replacement for the [h5py](http://www.h5py.org) library.
 10 | h5pySWMR synchronizes read and write access to HDF5 files. It allows parallel
 11 | reading, but writing is serialized.
 12 | With h5pySWMR, you can read and write HDF5 files from parallel
 13 | processes (with threads, there are some limitations, see below) without having
 14 | to fear data corruption. Note that, with h5py, reading and writing from/to a
 15 | file can result in data corruption.
 16 | 
 17 | Example:
 18 | 
 19 | ```python
 20 | # replaces 'from h5py import File'
 21 | from h5pyswmr import File
 22 | 
 23 | f = File('test.h5', 'w')
 24 | # create a dataset containing a 500x700 random array
 25 | f.create_dataset(name='/mygroup/mydataset', data=np.random.random((500, 700)))
 26 | # read data back into memory
 27 | data = f['/mygroup/mydataset'][:]
 28 | # no need to explicitely close the file (files are opened/closed when accessed)
 29 | ```
 30 | 
 31 | 
 32 | 
 33 | FAQ
 34 | ---
 35 | 
 36 | #### When should I use h5pySWMR?
 37 | 
 38 | When you want to read and write hdf5 files at the same time, i.e.,
 39 | from parallel processes.
 40 | 
 41 | #### Is h5pySWMR production ready?
 42 | 
 43 | Yes. Read section 'Limitations', though.
 44 | 
 45 | #### Is h5pySWMR Python 3 compatible?
 46 | 
 47 | Of course it is.
 48 | 
 49 | #### Does h5pySWMR require the MPI version of HDF5?
 50 | 
 51 | No.
 52 | 
 53 | #### Is h5pySWMR as fast as h5py?
 54 | 
 55 | Almost. There is a small overhead due to synchronization and because files
 56 | must be opened/closed for every operation. This overhead is neglible,
 57 | especially if you read/write large amounts of data.
 58 | 
 59 | #### What is HDF5 and what is h5py?
 60 | 
 61 | HDF5 (Hierarchical Data Format 5) is a binary file format designed to store
 62 | large amounts of numerical raster data, i.e., arrays. It also allows to
 63 | store data in so-called groups (hence the name "Hierarchical").
 64 | h5py is a great library that provides Pythonic bindings to the HDF5 library.
 65 | 
 66 | #### How does h5pySWMR work?
 67 | 
 68 | Even though HDF5 (and h5py) does not allow parallel reading **and** writing,
 69 | parallel reading is possible (with the restriction that files are opened
 70 | only **after** processes are forked). This allows us — using appropriate
 71 | synchronization techniques — to provide parallel reading and **serialized**
 72 | writing, i.e., processes (reading or writing) are forced to wait while a file
 73 | is being written to. This is sometimes called "single write multiple read"
 74 | (SWMR). h5pySWMR implements a standard solution to the readers-writers problem,
 75 | giving preference to writers. Check the 1971
 76 | [paper](http://cs.nyu.edu/~lerner/spring10/MCP-S10-Read04-ReadersWriters.pdf)
 77 | by Courtois, Heymans, and Parnas if you're interested.
 78 | A [redis](http://www.redis.io)-server is used to implement inter-process locks
 79 | and counters.
 80 | 
 81 | #### Why is it not on pypi?
 82 | 
 83 | It will be, soon...
 84 | 
 85 | #### I found a bug, what should I do?
 86 | 
 87 | Please open an issue on github.
 88 | 
 89 | 
 90 | Limitations
 91 | -----------
 92 | 
 93 | * True parallel reading can only be achieved with parallel processes. Thread
 94 |   concurrency is not supported. This is a limitation of h5py, which currently
 95 |   does not release the global interpreter lock (GIL) for I/O operations.
 96 | * After a crash (or if the process is killed by sending a SIGKILL signal), the
 97 |   redis-based synchronization algorithm may end up in an inconsistent state.
 98 |   This can result in deadlocks or data corruption.
 99 |   Proper process termination (SIGTERM or pressing Ctrl+C) is fine, though.
100 | * Be careful when using h5pySWMR in a multithreaded environment. Signal
101 |   handling does not work well with threads. Therefore, it is very likely that
102 |   you end up with pending locks when you terminate threads during I/O
103 |   operations.
104 | 
105 | 
106 | Differences between h5py and h5pySWMR
107 | -------------------------------------
108 | 
109 | In general, you could simply replace `import h5py` with `import h5pyswmr as h5py`
110 | and everything should work as expected. There are a few differences and
111 | limitations, though:
112 | 
113 | * TODO
114 | 
115 | 
116 | Installation
117 | ------------
118 | 
119 | Using pip (globally or in a virtualenv):
120 | ```
121 | $ pip install git+https://github.com/meteotest/h5pySWMR.git
122 | ```
123 | 
124 | Manually:
125 | ```
126 | $ git clone https://github.com/meteotest/h5pySWMR.git
127 | $ python setup.py install
128 | ```
129 | 
130 | 
131 | Running tests
132 | -------------
133 | 
134 | To make sure everything works as expected, run the following:
135 | 
136 | ```python
137 | import h5pyswmr
138 | h5pyswmr.test()
139 | ```
140 | 
141 | Prerequisites
142 | -------------
143 | 
144 | It probably works with any recent version of Python, h5py, and redis. But I've
145 | only tested it with Python 2.7/3.4 and the following library versions:
146 | 
147 | * h5py 2.3.1 to 2.5
148 | * redis 2.10.3
149 | 
150 | See http://www.h5py.org for h5py requirements (basically NumPy, Cython and the HDF5 C-library).
151 | 
152 | h5pyswmr also requires a running redis server (see below).
153 | 
154 | 
155 | Configuration of the redis server
156 | ---------------------------------
157 | 
158 | Note that h5pyswmr is expecting a running redis server on
159 | `localhost:6379` (on Debian based systems, `apt-get install redis-server` is all you need to do).
160 | These settings are hard-coded but can be modified at run time
161 | (a more elegant solution will be provided in future versions):
162 | 
163 | ```python
164 | import redis
165 | from h5pyswmr import locking
166 | 
167 | # overwrite redis connection object
168 | locking.redis_conn = redis.StrictRedis(host='localhost', port=6666, db=0,
169 |                                        decode_responses=True)
170 | ```
171 | 
172 | For performance reasons (after all, hdf5 is all about performance),
173 | you may want to keep the redis server on the same machine.
174 | 


--------------------------------------------------------------------------------
/h5pyswmr/test/test_locks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import unittest
  6 | import sys
  7 | import os
  8 | from multiprocessing import Process
  9 | import time
 10 | import random
 11 | import signal
 12 | import uuid
 13 | 
 14 | 
 15 | if __name__ == '__main__':
 16 |     # add ../.. directory to python path such that we can import the main
 17 |     # module
 18 |     HERE = os.path.dirname(os.path.realpath(__file__))
 19 |     PROJ_PATH = os.path.abspath(os.path.join(HERE, '../..'))
 20 |     sys.path.insert(0, PROJ_PATH)
 21 | 
 22 | from h5pyswmr.locking import reader, writer, redis_conn
 23 | 
 24 | 
 25 | class DummyResource(object):
 26 |     """
 27 |     Simulates reading and writing to a shared resource.
 28 |     """
 29 | 
 30 |     def __init__(self, name):
 31 |         self.file = name
 32 | 
 33 |     @reader
 34 |     def read(self, worker_no, suicide=False):
 35 |         """
 36 |         simulate reading
 37 | 
 38 |         Args:
 39 |             worker_no: worker number (for debugging)
 40 |             suicide: if True, then the current process will commit suicide
 41 |                 while reading. This is useful for testing if the process
 42 |                 does clean up its locks.
 43 |         """
 44 |         pid = os.getpid()
 45 |         print(u"❤ {0}worker {1} (PID {2}) reading!"
 46 |               .format('suicidal ' if suicide else '', worker_no, pid))
 47 |         if suicide:
 48 |             print(u"✟ Worker {0} (PID {1}) committing suicide..."
 49 |                   .format(worker_no, pid))
 50 |             os.kill(pid, signal.SIGTERM)
 51 |         else:
 52 |             time.sleep(random.random())
 53 | 
 54 |     @writer
 55 |     def write(self, worker_no):
 56 |         """
 57 |         simulate writing
 58 |         """
 59 |         print(u"⚡ worker {0} writing!".format(worker_no))
 60 |         time.sleep(random.random())
 61 | 
 62 | 
 63 | class TestLocks(unittest.TestCase):
 64 |     """
 65 |     Unit test for locking module
 66 |     """
 67 | 
 68 |     def test_locks(self):
 69 |         """
 70 |         Test parallel read/write access
 71 |         """
 72 |         res_name = 'test1234'
 73 |         resource = DummyResource(res_name)
 74 | 
 75 |         def worker_read(i, resource):
 76 |             """ reading worker """
 77 |             pid = os.getpid()
 78 |             time.sleep(random.random() * 2)
 79 |             print(u"Worker {0}/{1} attempts to read...".format(i, pid))
 80 |             if i % 13 == 1:
 81 |                 resource.read(i, suicide=True)
 82 |             else:
 83 |                 resource.read(i)
 84 | 
 85 |         def worker_write(i, resource):
 86 |             """ writing worker """
 87 |             pid = os.getpid()
 88 |             time.sleep(random.random() * 2.4)
 89 |             print(u"Worker {0}/{1} tries to write...".format(i, pid))
 90 |             resource.write(i)
 91 | 
 92 |         jobs = []
 93 |         NO_WORKERS = 100
 94 |         for i in range(NO_WORKERS):
 95 |             if i % 6 == 1:
 96 |                 p = Process(target=worker_write, args=(i, resource))
 97 |             else:
 98 |                 p = Process(target=worker_read, args=(i, resource))
 99 |             p.start()
100 |             jobs.append(p)
101 | 
102 |         # wait until all processes have terminated
103 |         while True:
104 |             time.sleep(0.3)
105 |             all_terminated = not max((job.is_alive() for job in jobs))
106 |             if all_terminated:
107 |                 break
108 | 
109 |         # Verify if all locks have been released
110 |         print("Testing if locks have been released...")
111 |         for key in redis_conn.keys():
112 |             if res_name not in key:
113 |                 continue
114 |             if (key == 'readcount__{0}'.format(res_name)
115 |                     or key == 'writecount__{0}'.format(res_name)):
116 |                 assert(redis_conn[key] == u'0')
117 |             else:
118 |                 raise AssertionError("Lock '{0}' was not released!"
119 |                                      .format(key))
120 | 
121 |     # def test_locks_manywriters(self):
122 |     #     """
123 |     #     Test locking with many writers and only one reader
124 |     #     """
125 |     #     res_name = 'testresource98352'
126 |     #     resource = DummyResource(res_name)
127 | 
128 |     #     def worker_read(i, resource):
129 |     #         """ reading worker """
130 |     #         print(u"Worker {0} attempts to read...".format(i))
131 |     #         resource.read(i, suicide=True)
132 | 
133 |     #     def worker_write(i, resource):
134 |     #         """ writing worker """
135 |     #         print(u"Worker {0} tries to write...".format(i))
136 |     #         resource.write(i)
137 | 
138 |     #     pid = os.getpid()
139 |     #     print("\nMain process has PID {0}".format(pid))
140 |     #     jobs = []
141 |     #     NO_WORKERS = 30
142 |     #     for i in range(NO_WORKERS):
143 |     #         if i == 10:
144 |     #             p = Process(target=worker_read, args=(i, resource))
145 |     #         else:
146 |     #             p = Process(target=worker_write, args=(i, resource))
147 |     #         p.start()
148 |     #         jobs.append(p)
149 | 
150 |     #     # wait until all processes have terminated
151 |     #     while True:
152 |     #         time.sleep(0.3)
153 |     #         all_terminated = not max((job.is_alive() for job in jobs))
154 |     #         if all_terminated:
155 |     #             break
156 | 
157 |     #     # Verify if all locks have been released
158 |     #     print("Testing if locks have been released...")
159 |     #     # TODO
160 |     #     for key in redis_conn.keys():
161 |     #         if res_name not in key:
162 |     #             continue
163 |     #         if (key == 'readcount__{0}'.format(res_name)
164 |     #                 or key == 'writecount__{0}'.format(res_name)):
165 |     #             assert(redis_conn[key] == u'0')
166 |     #         else:
167 |     #             raise AssertionError("Lock '{0}' has not been released!"
168 |     #                                  .format(key))
169 | 
170 | 
171 | def run():
172 |     suite = unittest.TestLoader().loadTestsFromTestCase(TestLocks)
173 |     unittest.TextTestRunner(verbosity=2).run(suite)
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     run()
178 | 


--------------------------------------------------------------------------------
/h5pyswmr/exithandler.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | SIGTERM handler. Makes sure that reader/writer synchronization remains in a
  5 | consistent state after process termination. Note that deadlocks or data
  6 | corruption may still occur if processes are killed (SIGKILL / kill -9).
  7 | 
  8 | Special thanks to Giampaolo Rodola for this code:
  9 | http://code.activestate.com/recipes/577997-handle-exit-context-manager/
 10 | """
 11 | 
 12 | import contextlib
 13 | import signal
 14 | import sys
 15 | import threading
 16 | import warnings
 17 | 
 18 | 
 19 | def _sigterm_handler(signum, frame):
 20 |     sys.exit(0)
 21 | 
 22 | _sigterm_handler.__enter_ctx__ = False
 23 | 
 24 | 
 25 | @contextlib.contextmanager
 26 | def handle_exit(callback=None, append=False):
 27 |     """A context manager which properly handles SIGTERM and SIGINT
 28 |     (KeyboardInterrupt) signals, registering a function which is
 29 |     guaranteed to be called after signals are received.
 30 |     Also, it makes sure to execute previously registered signal
 31 |     handlers as well (if any).
 32 | 
 33 |     >>> app = App()
 34 |     >>> with handle_exit(app.stop):
 35 |     ...     app.start()
 36 |     ...
 37 |     >>>
 38 | 
 39 |     If append == False raise RuntimeError if there's already a handler
 40 |     registered for SIGTERM, otherwise both new and old handlers are
 41 |     executed in this order.
 42 |     """
 43 |     t = threading.current_thread()
 44 |     if t.name != 'MainThread':
 45 |         warnings.warn("!!! h5pySWMR warning: SIGTERM handling does not (yet) "
 46 |                       "work in a threaded environment. Locks may not be "
 47 |                       "released after process termination.", UserWarning)
 48 |         yield
 49 |         return
 50 | 
 51 |     old_handler = signal.signal(signal.SIGTERM, _sigterm_handler)
 52 |     if old_handler != signal.SIG_DFL and old_handler != _sigterm_handler:
 53 |         if not append:
 54 |             raise RuntimeError("there is already a handler registered for "
 55 |                                "SIGTERM: %r" % old_handler)
 56 | 
 57 |         def handler(signum, frame):
 58 |             try:
 59 |                 _sigterm_handler(signum, frame)
 60 |             finally:
 61 |                 old_handler(signum, frame)
 62 |         signal.signal(signal.SIGTERM, handler)
 63 | 
 64 |     if _sigterm_handler.__enter_ctx__:
 65 |         raise RuntimeError("can't use nested contexts")
 66 |     _sigterm_handler.__enter_ctx__ = True
 67 | 
 68 |     try:
 69 |         yield
 70 |     except KeyboardInterrupt:
 71 |         pass
 72 |     except SystemExit as err:
 73 |         # code != 0 refers to an application error (e.g. explicit
 74 |         # sys.exit('some error') call).
 75 |         # We don't want that to pass silently.
 76 |         # Nevertheless, the 'finally' clause below will always
 77 |         # be executed.
 78 |         if err.code != 0:
 79 |             raise
 80 |     finally:
 81 |         _sigterm_handler.__enter_ctx__ = False
 82 |         if callback is not None:
 83 |             callback()
 84 | 
 85 | 
 86 | if __name__ == '__main__':
 87 |     # ===============================================================
 88 |     # --- test suite
 89 |     # ===============================================================
 90 | 
 91 |     import unittest
 92 |     import time
 93 |     import os
 94 | 
 95 |     class TestOnExit(unittest.TestCase):
 96 | 
 97 |         def setUp(self):
 98 |             # reset signal handlers
 99 |             signal.signal(signal.SIGTERM, signal.SIG_DFL)
100 |             self.flag = None
101 | 
102 |         def tearDown(self):
103 |             # make sure we exited the ctx manager
104 |             self.assertTrue(self.flag is not None)
105 | 
106 |         def test_base(self):
107 |             with handle_exit():
108 |                 pass
109 |             self.flag = True
110 | 
111 |         def test_callback(self):
112 |             callback = []
113 |             with handle_exit(lambda: callback.append(None)):
114 |                 pass
115 |             self.flag = True
116 |             self.assertEqual(callback, [None])
117 | 
118 |         def test_kinterrupt(self):
119 |             with handle_exit():
120 |                 raise KeyboardInterrupt
121 |             self.flag = True
122 | 
123 |         def test_sigterm(self):
124 |             with handle_exit():
125 |                 os.kill(os.getpid(), signal.SIGTERM)
126 |             self.flag = True
127 | 
128 |         def test_sigterm_complex(self):
129 | 
130 |             def handler():
131 |                 print("handler")
132 | 
133 |             with handle_exit(handler):
134 |                 time.sleep(2)
135 |                 os.kill(os.getpid(), signal.SIGTERM)
136 | 
137 |             self.flag = True
138 | 
139 |         def test_sigint(self):
140 |             with handle_exit():
141 |                 os.kill(os.getpid(), signal.SIGINT)
142 |             self.flag = True
143 | 
144 |         def test_sigterm_old(self):
145 |             # make sure the old handler gets executed
146 |             queue = []
147 |             signal.signal(signal.SIGTERM, lambda s, f: queue.append('old'))
148 |             with handle_exit(lambda: queue.append('new'), append=True):
149 |                 os.kill(os.getpid(), signal.SIGTERM)
150 |             self.flag = True
151 |             self.assertEqual(queue, ['old', 'new'])
152 | 
153 |         def test_sigint_old(self):
154 |             # make sure the old handler gets executed
155 |             queue = []
156 |             signal.signal(signal.SIGINT, lambda s, f: queue.append('old'))
157 |             with handle_exit(lambda: queue.append('new'), append=True):
158 |                 os.kill(os.getpid(), signal.SIGINT)
159 |             self.flag = True
160 |             self.assertEqual(queue, ['old', 'new'])
161 | 
162 |         def test_no_append(self):
163 |             # make sure we can't use the context manager if there's
164 |             # already a handler registered for SIGTERM
165 |             signal.signal(signal.SIGTERM, lambda s, f: sys.exit(0))
166 |             try:
167 |                 with handle_exit(lambda: self.flag.append(None)):
168 |                     pass
169 |             except RuntimeError:
170 |                 pass
171 |             else:
172 |                 self.fail("exception not raised")
173 |             finally:
174 |                 self.flag = True
175 | 
176 |         def test_nested_context(self):
177 |             self.flag = True
178 |             try:
179 |                 with handle_exit():
180 |                     with handle_exit():
181 |                         pass
182 |             except RuntimeError:
183 |                 pass
184 |             else:
185 |                 self.fail("exception not raised")
186 | 
187 |     unittest.main()
188 | 


--------------------------------------------------------------------------------
/h5pyswmr/h5pyswmr.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | Wrapper around h5py that synchronizes reading and writing of hdf5 files
  5 | (parallel reading is possible, writing is serialized)
  6 | 
  7 | Access to hdf5 files is synchronized by a solution to the readers/writers
  8 | problem,
  9 | cf. http://en.wikipedia.org/wiki/Readers%E2%80%93writers_problem
 10 | #The_second_readers-writers_problem
 11 | 
 12 | !!! IMPORTANT !!!
 13 | Note that the locks used are not recursive/reentrant. Therefore, a synchronized
 14 | method (decorated by @reader or @writer) must *not* call other synchronized
 15 | methods, otherwise we get a deadlock!
 16 | """
 17 | 
 18 | from __future__ import absolute_import
 19 | 
 20 | import os
 21 | 
 22 | import h5py
 23 | 
 24 | from h5pyswmr.locking import reader, writer
 25 | 
 26 | 
 27 | class Node(object):
 28 |     """
 29 |     Wrapper for h5py.Node
 30 |     """
 31 | 
 32 |     def __init__(self, file, path):
 33 |         """
 34 |         Args:
 35 |             file: full path to hdf5 file
 36 |             path: full path to the hdf5 node (not to be confused with path of
 37 |                 the file)
 38 |         """
 39 |         self.file = file
 40 |         self._path = path
 41 |         self.attrs = AttributeManager(self.file, self._path)
 42 | 
 43 |     @reader
 44 |     def __getitem__(self, key):
 45 |         """
 46 |         Raises:
 47 |             KeyError if object does not exist.
 48 |         """
 49 |         # sometimes the underlying hdf5 C library writes errors to stdout,
 50 |         # e.g., if a path is not found in a file.
 51 |         # cf. http://stackoverflow.com/questions/15117128/
 52 |         # h5py-in-memory-file-and-multiprocessing-error
 53 |         h5py._errors.silence_errors()
 54 | 
 55 |         if key.startswith('/'):  # absolute path
 56 |             path = key
 57 |         else:                    # relative path
 58 |             path = os.path.join(self.path, key)
 59 | 
 60 |         with h5py.File(self.file, 'r') as f:
 61 |             node = f[path]
 62 |             return self._wrap_class(node)
 63 | 
 64 |     @property
 65 |     def path(self):
 66 |         """
 67 |         wrapper
 68 |         """
 69 |         return self._path
 70 | 
 71 |     def _wrap_class(self, node):
 72 |         """
 73 |         Wraps h5py objects into h5pyswmr objects.
 74 | 
 75 |         Args:
 76 |             node: instance of h5py.Group or h5py.Dataset
 77 | 
 78 |         Returns:
 79 |             Corresponding object as a h5pyswmr object
 80 | 
 81 |         Raises:
 82 |             TypeError if ``obj`` is of unknown type
 83 |         """
 84 |         if isinstance(node, h5py.Group):
 85 |             return Group(file=self.file, path=node.name)
 86 |         elif isinstance(node, h5py.Dataset):
 87 |             return Dataset(file=self.file, path=node.name)
 88 |         else:
 89 |             raise TypeError('not implemented!')
 90 | 
 91 | 
 92 | class Group(Node):
 93 |     """
 94 |     Wrapper for h5py.Group
 95 |     """
 96 | 
 97 |     def __init__(self, file, path):
 98 |         Node.__init__(self, file, path)
 99 | 
100 |     def __repr__(self):
101 |         return "<HDF5 Group (path={0})>".format(self.path)
102 | 
103 |     @writer
104 |     def create_group(self, name):
105 |         with h5py.File(self.file, 'r+') as f:
106 |             group = f[self.path]
107 |             created_group = group.create_group(name)
108 |             path = created_group.name
109 | 
110 |         return Group(self.file, path=path)
111 | 
112 |     @writer
113 |     def require_group(self, name):
114 |         with h5py.File(self.file, 'r+') as f:
115 |             group = f[self.path]
116 |             created_group = group.require_group(name)
117 |             path = created_group.name
118 | 
119 |         return Group(self.file, path=path)
120 | 
121 |     @writer
122 |     def create_dataset(self, **kwargs):
123 |         overwrite = kwargs.get('overwrite', False)
124 |         name = kwargs['name']
125 |         # remove additional arguments because they are not supported by h5py
126 |         try:
127 |             del kwargs['overwrite']
128 |         except Exception:
129 |             pass
130 |         with h5py.File(self.file, 'r+') as f:
131 |             group = f[self.path]
132 |             if overwrite and name in group:
133 |                 del group[name]
134 |             dst = group.create_dataset(**kwargs)
135 |             path = dst.name
136 | 
137 |         return Dataset(self.file, path=path)
138 | 
139 |     @writer
140 |     def require_dataset(self, **kwargs):
141 |         with h5py.File(self.file, 'r+') as f:
142 |             group = f[self.path]
143 |             dst = group.require_dataset(**kwargs)
144 |             path = dst.name
145 |         return Dataset(self.file, path=path)
146 | 
147 |     @reader
148 |     def keys(self):
149 |         with h5py.File(self.file, 'r') as f:
150 |             # w/o list() it does not work with py3 (returns a view on a closed
151 |             # hdf5 file)
152 |             return list(f[self.path].keys())
153 | 
154 |     # TODO does not yet work because @reader methods are not reentrant!
155 |     # @reader
156 |     # def visit(self, func):
157 |     #     """
158 |     #     Wrapper around h5py.Group.vist()
159 | 
160 |     #     Args:
161 |     #         func: a unary function
162 |     #     """
163 |     #     with h5py.File(self.file, 'r') as f:
164 |     #         return f[self.path].visit(func)
165 | 
166 |     # @reader
167 |     # def visititems(self, func):
168 |     #     """
169 |     #     Wrapper around h5py.Group.visititems()
170 | 
171 |     #     Args:
172 |     #         func: a 2-ary function
173 |     #     """
174 |     #     with h5py.File(self.file, 'r') as f:
175 |     #         grp = f[self.path]
176 |     #         def proxy(name):
177 |     #             obj = self._wrap_class(grp[name])
178 |     #             return func(name, obj)
179 |     #         return self.visit(proxy)
180 | 
181 |     @reader
182 |     def items(self):
183 |         """
184 |         Returns a list of (name, object) pairs for objects directly
185 |         attached to this group. Values for broken soft or external links
186 |         show up as None.
187 |         Note that this differs from h5py, where a list (Py2) or a
188 |         "set-like object" (Py3) is returned.
189 |         """
190 |         result = []
191 |         with h5py.File(self.file, 'r') as f:
192 |             for name, obj in f[self.path].items():
193 |                 result.append((name, self._wrap_class(obj)))
194 | 
195 |         return result
196 | 
197 |     @reader
198 |     def __contains__(self, key):
199 |         with h5py.File(self.file, 'r') as f:
200 |             group = f[self.path]
201 |             return key in group
202 | 
203 |     @writer
204 |     def __delitem__(self, key):
205 |         with h5py.File(self.file, 'r+') as f:
206 |             group = f[self.path]
207 |             del group[key]
208 | 
209 | 
210 | class File(Group):
211 |     """
212 |     Wrapper for h5py.File
213 |     """
214 | 
215 |     def __init__(self, *args, **kwargs):
216 |         """
217 |         try to open/create an h5py.File object
218 |         note that this must be synchronized!
219 |         """
220 |         # this is crucial for the @writer annotation
221 |         self.file = args[0]
222 | 
223 |         # TODO this creates an exclusive lock every time the file is read!!
224 | 
225 |         @writer
226 |         def init(self):
227 |             with h5py.File(*args, **kwargs) as f:
228 |                 Group.__init__(self, f.filename, '/')
229 |         init(self)
230 | 
231 |     def __enter__(self):
232 |         """
233 |         simple context manager (so we can use 'with File() as f')
234 |         """
235 |         return self
236 | 
237 |     def __exit__(self, type, value, tb):
238 |         pass
239 | 
240 |     def __repr__(self):
241 |         return "<HDF5 File ({0})>".format(self.file)
242 | 
243 | 
244 | class Dataset(Node):
245 |     """
246 |     Wrapper for h5py.Dataset
247 |     """
248 | 
249 |     def __init__(self, file, path):
250 |         Node.__init__(self, file, path)
251 | 
252 |     @reader
253 |     def __getitem__(self, slice):
254 |         """
255 |         implement multidimensional slicing for datasets
256 |         """
257 |         with h5py.File(self.file, 'r') as f:
258 |             return f[self.path][slice]
259 | 
260 |     @writer
261 |     def __setitem__(self, slice, value):
262 |         """
263 |         Broadcasting for datasets. Example: mydataset[0,:] = np.arange(100)
264 |         """
265 |         with h5py.File(self.file, 'r+') as f:
266 |             f[self.path][slice] = value
267 | 
268 |     @writer
269 |     def resize(self, size, axis=None):
270 |         with h5py.File(self.file, 'r+') as f:
271 |             f[self.path].resize(size, axis)
272 | 
273 |     @property
274 |     @reader
275 |     def shape(self):
276 |         with h5py.File(self.file, 'r') as f:
277 |             return f[self.path].shape
278 | 
279 |     @property
280 |     @reader
281 |     def dtype(self):
282 |         with h5py.File(self.file, 'r') as f:
283 |             return f[self.path].dtype
284 | 
285 | 
286 | class AttributeManager(object):
287 |     """
288 |     Provides same features as AttributeManager from h5py.
289 |     """
290 | 
291 |     def __init__(self, h5file, path):
292 |         """
293 |         Args:
294 |             h5file: file name of hdf5 file
295 |             path: full path to hdf5 node
296 |         """
297 |         self.file = h5file
298 |         self.path = path
299 | 
300 |     @reader
301 |     def __iter__(self):
302 |         # In order to be compatible with h5py, we return a generator.
303 |         # However, to preserve thread-safety, we must make sure that the hdf5
304 |         # file is closed while the generator is being traversed.
305 |         with h5py.File(self.file, 'r') as f:
306 |             node = f[self.path]
307 |             keys = [key for key in node.attrs]
308 | 
309 |         return (key for key in keys)
310 | 
311 |     @reader
312 |     def keys(self):
313 |         """
314 |         Returns attribute keys (list)
315 |         """
316 |         with h5py.File(self.file, 'r') as f:
317 |             node = f[self.path]
318 |             return list(node.attrs.keys())
319 | 
320 |     @reader
321 |     def __contains__(self, key):
322 |         with h5py.File(self.file, 'r') as f:
323 |             node = f[self.path]
324 |             return key in node.attrs
325 | 
326 |     @reader
327 |     def __getitem__(self, key):
328 |         with h5py.File(self.file, 'r') as f:
329 |             node = f[self.path]
330 |             return node.attrs[key]
331 | 
332 |     @writer
333 |     def __setitem__(self, key, value):
334 |         with h5py.File(self.file, 'r+') as f:
335 |             node = f[self.path]
336 |             node.attrs[key] = value
337 | 
338 |     @writer
339 |     def __delitem__(self, key):
340 |         with h5py.File(self.file, 'r+') as f:
341 |             node = f[self.path]
342 |             del node.attrs[key]
343 | 
344 |     @reader
345 |     def get(self, key, defaultvalue):
346 |         """
347 |         Return attribute value or return a default value if key is missing.
348 |         Args:
349 |             key: attribute key
350 |             defaultvalue: default value to be returned if key is missing
351 |         """
352 |         with h5py.File(self.file, 'r') as f:
353 |             node = f[self.path]
354 |             return node.attrs.get(key, defaultvalue)
355 | 


--------------------------------------------------------------------------------
/h5pyswmr/locking.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | Cross-process readers/writer synchronization.
  5 | The algorithm implemented is "Problem 2" in the following paper:
  6 | http://cs.nyu.edu/~lerner/spring10/MCP-S10-Read04-ReadersWriters.pdf
  7 | Note that the proposed solution works for threads accessing a shared resource.
  8 | To get a working solution for process-based concurrency, one has to deal
  9 | with (unexpected) process termination, which makes our solution slightly
 10 | more involved.
 11 | 
 12 | Lock/semaphore implementation based on redis server.
 13 | Using redis allows locks to be shared among processes, even if processes are
 14 | not forked from a common parent process.
 15 | Redis locks inspired by:
 16 | http://www.dr-josiah.com/2012/01/creating-lock-with-redis.html
 17 | http://redis.io/topics/distlock
 18 | 
 19 | Note that — in addition to a lock name — every acquire/release operation
 20 | requires an identifier. This guarantees that a lock can only be released by
 21 | the client (process/thread) that acquired it. Unless, of course,
 22 | the identifier is known to other clients as well (which is also a reasonable
 23 | use case).
 24 | For example a client may acquire a lock and be busy with an expensive
 25 | operation that takes longer than the lock's timeout. This causes the lock
 26 | to be automatically released. After that happened, another client may
 27 | acquire the same lock (with a different identifier). The different
 28 | identifier now prohibits the first client from releasing the lock, which
 29 | is good because the second client may be performing critical operations.
 30 | This reduces (but does not eliminate) potential damage. Clearly,
 31 | programmers should make sure that clients do not exceed lock timeouts.
 32 | """
 33 | 
 34 | import os
 35 | import time
 36 | import contextlib
 37 | import uuid
 38 | from functools import wraps
 39 | 
 40 | import redis
 41 | 
 42 | from .exithandler import handle_exit
 43 | 
 44 | 
 45 | # we make sure that redis connections do not time out
 46 | redis_conn = redis.StrictRedis(host='localhost', port=6379, db=0,
 47 |                                decode_responses=True)  # important for Python3
 48 | 
 49 | 
 50 | APPEND_SIGHANDLER = True
 51 | DEFAULT_TIMEOUT = 20  # seconds
 52 | ACQ_TIMEOUT = 15
 53 | 
 54 | 
 55 | # note that the process releasing the read/write lock may not be the
 56 | # same as the one that acquired it, so the identifier may have
 57 | # changed and the lock is never released!
 58 | # => we use an identifier unique to all readers/writers.
 59 | WRITELOCK_ID = 'id_reader'
 60 | READLOCK_ID = 'id_writer'
 61 | 
 62 | 
 63 | def reader(f):
 64 |     """
 65 |     Decorates methods reading an HDF5 file.
 66 |     """
 67 | 
 68 |     @wraps(f)
 69 |     def func_wrapper(self, *args, **kwargs):
 70 |         """
 71 |         Wraps reading functions.
 72 |         """
 73 |         # names of locks
 74 |         mutex3 = 'mutex3__{}'.format(self.file)
 75 |         mutex1 = 'mutex1__{}'.format(self.file)
 76 |         readcount = 'readcount__{}'.format(self.file)
 77 |         r = 'r__{}'.format(self.file)
 78 |         w = 'w__{}'.format(self.file)
 79 | 
 80 |         with handle_exit(append=APPEND_SIGHANDLER):
 81 |             # Note that try/finally must cover incrementing readcount as well
 82 |             # as acquiring w. Otherwise readcount/w cannot be
 83 |             # decremented/released if program execution ends, e.g., while
 84 |             # performing reading operation (because of a SIGTERM signal, for
 85 |             # example).
 86 |             readcount_val = None
 87 |             try:
 88 |                 with redis_lock(redis_conn, mutex3):
 89 |                     with redis_lock(redis_conn, r):
 90 |                         # mutex1's purpose is to make readcount++ together with
 91 |                         # the readcount == 1 check atomic
 92 |                         with redis_lock(redis_conn, mutex1):
 93 |                             readcount_val = redis_conn.incr(readcount, amount=1)
 94 | 
 95 |                             # testing if locks/counters are cleaned up in case
 96 |                             # of abrupt process termination
 97 |                             # print("killing myself in 5 seconds...")
 98 |                             # time.sleep(5)
 99 |                             # os.kill(os.getpid(), signal.SIGTERM)
100 | 
101 |                             # first reader sets the w lock to block writers
102 |                             if readcount_val == 1:
103 |                                 if not acquire_lock(redis_conn, w, WRITELOCK_ID):
104 |                                     raise LockException("could not acquire write lock "
105 |                                                         " {0}".format(w))
106 |                 result = f(self, *args, **kwargs)  # critical section
107 |                 return result
108 |             finally:
109 |                 # if readcount was incremented above, we have to decrement it.
110 |                 # Also, if we are the last reader, we have to release w to open
111 |                 # the gate for writers.
112 |                 if readcount_val is not None:
113 |                     # again, mutex1's purpose is to make readcount-- and the
114 |                     # subsequent check atomic.
115 |                     with redis_lock(redis_conn, mutex1):
116 |                         readcount_val = redis_conn.decr(readcount, amount=1)
117 |                         if readcount_val == 0:
118 |                             if not release_lock(redis_conn, w, WRITELOCK_ID):
119 |                                 # Note that it's possible that, even though
120 |                                 # readcount was > 0, w was not set. This can
121 |                                 # happen if – during execution of the code
122 |                                 # above – a process terminated after
123 |                                 # readcount++ but before acquiring w.
124 |                                 # TODO what should we do? print a notification?
125 |                                 print("Warning: {0} was lost or was not "
126 |                                       "acquired in the first place".format(w))
127 | 
128 |     return func_wrapper
129 | 
130 | 
131 | def writer(f):
132 |     """
133 |     Decorates methods writing to an HDF5 file.
134 |     """
135 | 
136 |     @wraps(f)
137 |     def func_wrapper(self, *args, **kwargs):
138 |         """
139 |         Wraps writing functions.
140 |         """
141 |         # names of locks
142 |         mutex2 = 'mutex2__{}'.format(self.file)
143 |         # note that writecount may be > 1 as it also counts the waiting writers
144 |         writecount = 'writecount__{}'.format(self.file)
145 |         r = 'r__{}'.format(self.file)
146 |         w = 'w__{}'.format(self.file)
147 | 
148 |         with handle_exit(append=APPEND_SIGHANDLER):
149 |             writecount_val = None
150 |             try:
151 |                 # mutex2's purpose is to make writecount++ together with
152 |                 # the writecount == 1 check atomic
153 |                 with redis_lock(redis_conn, mutex2):
154 |                     writecount_val = redis_conn.incr(writecount, amount=1)
155 |                     # first writer sets r to block readers
156 |                     if writecount_val == 1:
157 |                         if not acquire_lock(redis_conn, r, READLOCK_ID):
158 |                             raise LockException("could not acquire read lock {0}"
159 |                                                 .format(r))
160 | 
161 |                 with redis_lock(redis_conn, w):
162 |                     # perform writing operation
163 |                     return_val = f(self, *args, **kwargs)
164 |                     return return_val
165 |             finally:
166 |                 # if writecount was incremented above, we have to decrement it.
167 |                 # Also, if we are the last writer, we have to release r to open
168 |                 # the gate for readers.
169 |                 if writecount_val is not None:
170 |                     with redis_lock(redis_conn, mutex2):
171 |                         writecount_val = redis_conn.decr(writecount, amount=1)
172 |                         if writecount_val == 0:
173 |                             if not release_lock(redis_conn, r, READLOCK_ID):
174 |                                 # Note that it's possible that, even though
175 |                                 # writecount was > 0, r was not set. This can
176 |                                 # happen if – during execution of the code
177 |                                 # above – a process terminated after
178 |                                 # writecount++ but before acquiring w.
179 |                                 # TODO what should we do? print a notification?
180 |                                 print("Warning: {0} was lost or was not "
181 |                                       "acquired in the first place".format(r))
182 | 
183 |     return func_wrapper
184 | 
185 | 
186 | def acquire_lock(conn, lockname, identifier, acq_timeout=ACQ_TIMEOUT,
187 |                  timeout=DEFAULT_TIMEOUT):
188 |     """
189 |     Wait for and acquire a lock. Returns identifier on success and False
190 |     on failure.
191 | 
192 |     Args:
193 |         conn: redis connection object
194 |         lockname: name of the lock
195 |         identifier: an identifier that will be required in order to release
196 |             the lock.
197 |         acq_timeout: timeout for acquiring the lock. If lock could not be
198 |             acquired during *atime* seconds, False is returned.
199 |         timeout: timeout of the lock in seconds. The lock is automatically
200 |             released after *ltime* seconds. Make sure your operation does
201 |             not take longer than the timeout!
202 | 
203 |     Returns:
204 |         ``identifier`` on success or False on failure
205 |     """
206 |     end = time.time() + acq_timeout
207 |     while end > time.time():
208 |         if conn.setnx(lockname, identifier):
209 |             conn.expire(lockname, timeout)
210 |             return identifier
211 |         elif not conn.ttl(lockname):
212 |             conn.expire(lockname, timeout)
213 |         # could not acquire lock, go to sleep and try again later...
214 |         time.sleep(.001)
215 | 
216 |     return False
217 | 
218 | 
219 | def release_lock(conn, lockname, identifier):
220 |     """
221 |     Signal/release a lock.
222 | 
223 |     Args:
224 |         conn: redi connection
225 |         lockname: name of the lock to be released
226 |         identifier: lock will only be released if identifier matches the
227 |             identifier that was provided when the lock was acquired.
228 | 
229 |     Returns:
230 |         True on success, False on failure
231 |     """
232 | 
233 |     pipe = conn.pipeline(True)
234 |     while True:
235 |         try:
236 |             pipe.watch(lockname)
237 |             if pipe.get(lockname) == identifier:
238 |                 pipe.multi()
239 |                 pipe.delete(lockname)
240 |                 pipe.execute()
241 |                 return True
242 |             else:
243 |                 pipe.unwatch()
244 |                 return False   # we lost the lock
245 |         except redis.exceptions.WatchError as e:
246 |             raise e
247 | 
248 | 
249 | class LockException(Exception):
250 |     """
251 |     Raises when a lock could not be acquired or when a lock is lost.
252 |     """
253 |     pass
254 | 
255 | 
256 | @contextlib.contextmanager
257 | def redis_lock(conn, lockname, acq_timeout=DEFAULT_TIMEOUT,
258 |                timeout=DEFAULT_TIMEOUT):
259 |     """
260 |     Allows atomic execution of code blocks using 'with' syntax:
261 | 
262 |     with redis_lock(redis_conn, 'mylock'):
263 |         # critical section...
264 | 
265 |     Args:
266 |         conn: redis connection object
267 |         lockname: name of the lock
268 |         acq_timeout: timeout for acquiring the lock. If lock could not be
269 |             acquired during *atime* seconds, False is returned.
270 |         timeout: timeout of the lock in seconds. The lock is automatically
271 |             released after *ltime* seconds. Make sure your operation does
272 |             not take longer than the timeout!
273 |     """
274 | 
275 |     # generate (random) unique identifier, prefixed by current PID (allows
276 |     # cleaning up locks before process is being killed)
277 |     pid = os.getpid()
278 |     identifier = 'pid{0}_{1}'.format(pid, str(uuid.uuid4()))
279 |     if acquire_lock(conn, lockname, identifier, acq_timeout,
280 |                     timeout) != identifier:
281 |         raise LockException("could not acquire lock {0}".format(lockname))
282 |     try:
283 |         yield identifier
284 |     finally:
285 |         if not release_lock(conn, lockname, identifier):
286 |             raise LockException("lock {0} was lost".format(lockname))
287 | 


--------------------------------------------------------------------------------