├── .gitignore ├── datafabric ├── tests │ ├── __init__.py │ └── test_datafabric.py ├── __init__.py └── yellow_pages.py ├── README.md ├── setup.py ├── LICENSE └── Description.md /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | -------------------------------------------------------------------------------- /datafabric/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datafabric 2 | A distributed in-memory fabric based on shared-memory blocks and datashape. Any language can operate on the data. 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup(name = 'datafabric', version='0.1', description='Python Distribution Utilities', 4 | packages=['datafabric', 'datafabric.tests']) 5 | -------------------------------------------------------------------------------- /datafabric/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | 4 | from yellow_pages import YellowPages 5 | 6 | def test(): 7 | testsuite = unittest.TestLoader().discover(os.path.join(os.path.dirname(__file__), 'tests')) 8 | unittest.TextTestRunner(verbosity=2).run(testsuite) 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Blaze 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of datafabric nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /datafabric/tests/test_datafabric.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import distributed 4 | import datafabric as df 5 | 6 | class TestDataFabric(unittest.TestCase): 7 | def test_allocate(self): 8 | executor = distributed.Executor('127.0.0.1:8786') 9 | 10 | yp = df.YellowPages(executor) 11 | yp.allocate(['block{}'.format(i) for i in range(10)], 1024) 12 | yp.allocate(['_block{}'.format(i) for i in range(5)], 2048) 13 | 14 | blocks = [block[1] for block in yp.blocks()] 15 | self.assertTrue(len(blocks) == 15) 16 | for i in range(10): 17 | self.assertTrue('block{}'.format(i) in blocks) 18 | for i in range(5): 19 | self.assertTrue('_block{}'.format(i) in blocks) 20 | 21 | blocks = [block[1:] for block in yp.blocks(ip_only = False)] 22 | self.assertTrue(len(blocks) == 15) 23 | for i in range(10): 24 | self.assertTrue(('block{}'.format(i), 1024, 0) in blocks) 25 | for i in range(5): 26 | self.assertTrue(('_block{}'.format(i), 2048, 0) in blocks) 27 | 28 | yp.clear() 29 | 30 | self.assertTrue(not yp.blocks()) 31 | 32 | def test_insert(self): 33 | executor = distributed.Executor('127.0.0.1:8786') 34 | 35 | yp = df.YellowPages(executor) 36 | yp.allocate(['block{}'.format(i) for i in range(10)], 1024) 37 | 38 | with self.assertRaises(LookupError): 39 | yp.find('x') 40 | 41 | yp.insert('x', 4) 42 | yp.find('x') # Should not raise an exception 43 | self.assertEqual((1024, 4, 4, 0), yp.find('x', ip_only = False)[2:]) 44 | 45 | with self.assertRaises(LookupError): 46 | yp.find('y') 47 | 48 | yp.insert('y', 8) 49 | yp.find('x') # Should not raise an exception 50 | self.assertEqual((1024, 12, 4, 0), yp.find('x', ip_only = False)[2:]) 51 | 52 | yp.find('y') # Should not raise an exception 53 | self.assertEqual((1024, 12, 8, 4), yp.find('y', ip_only = False)[2:]) 54 | 55 | with self.assertRaises(LookupError): 56 | yp.find('p') 57 | with self.assertRaises(LookupError): 58 | yp.find('q') 59 | 60 | with self.assertRaises(ValueError): 61 | yp.insert('z', 2048) 62 | 63 | yp.clear() 64 | 65 | yp.allocate(['block0'], 1024) 66 | 67 | yp.insert('x', 4) 68 | yp.insert('y', 8) 69 | with self.assertRaises(ValueError): 70 | yp.insert('z', 1013) 71 | 72 | yp.clear() 73 | 74 | def test_remove(self): 75 | executor = distributed.Executor('127.0.0.1:8786') 76 | 77 | yp = df.YellowPages(executor) 78 | yp.allocate(['block{}'.format(i) for i in range(10)], 1024) 79 | 80 | yp.insert('x', 4) 81 | yp.find('x') # Should not raise an exception 82 | self.assertEqual((1024, 4, 4, 0), yp.find('x', ip_only = False)[2:]) 83 | 84 | yp.remove('x') 85 | with self.assertRaises(LookupError): 86 | yp.find('x') 87 | 88 | yp.insert('x', 4) 89 | yp.insert('y', 8) 90 | yp.find('x') # Should not raise an exception 91 | self.assertEqual((1024, 12, 4, 4), yp.find('x', ip_only = False)[2:]) 92 | yp.find('y') # Should not raise an exception 93 | self.assertEqual((1024, 12, 8, 8), yp.find('y', ip_only = False)[2:]) 94 | 95 | yp.remove('x') 96 | self.assertEqual((1024, 8, 8, 8), yp.find('y', ip_only = False)[2:]) 97 | with self.assertRaises(LookupError): 98 | yp.find('x') 99 | 100 | yp.remove('y') 101 | with self.assertRaises(LookupError): 102 | yp.find('y') 103 | 104 | yp.clear() 105 | 106 | if __name__ == '__main__': 107 | unittest.main() 108 | -------------------------------------------------------------------------------- /datafabric/yellow_pages.py: -------------------------------------------------------------------------------- 1 | import atexit 2 | import itertools 3 | import os 4 | 5 | import distributed 6 | 7 | class YellowPages(object): 8 | """ A manager class for shared memory blocks. 9 | 10 | `YellowPages` is a simple interface for allocating and interfacing with shared 11 | memory blocks. It is built on top of an executor from the `distributed` module. 12 | """ 13 | 14 | class Block(object): 15 | def __init__(self, capacity): 16 | self.capacity = capacity 17 | self.size = 0 18 | self.offset = 0 19 | self.variables = {} 20 | 21 | def __contains__(self, name): 22 | return name in self.variables 23 | 24 | def __getitem__(self, name): 25 | return self.variables[name] 26 | 27 | def insert(self, name, size): 28 | if (self.offset + size > self.capacity): 29 | raise ValueError('block is at capacity') 30 | 31 | self.size += size 32 | self.variables[name] = size, self.offset 33 | self.offset += size 34 | 35 | def remove(self, name): 36 | size, offset = self.variables.pop(name) 37 | self.size -= size 38 | 39 | def __init__(self, executor): 40 | self._executor = executor 41 | self._blocks = {} 42 | 43 | atexit.register(self.clear) 44 | 45 | def allocate(self, names, size): 46 | """ Allocate shared memory blocks of the specified size. This method will find a worker 47 | to put each shared memory block. 48 | 49 | Parameters 50 | ---------- 51 | names: 52 | an iterable of names for the shared memory blocks 53 | size: 54 | the size of each shared memory block, which should be a single value not an iterable 55 | """ 56 | 57 | def func(name, size): 58 | import posix_ipc 59 | 60 | sm = posix_ipc.SharedMemory(name, flags = posix_ipc.O_CREAT | posix_ipc.O_EXCL, size = size) 61 | sm.close_fd() 62 | 63 | return YellowPages.Block(size) 64 | 65 | futures = self._executor.map(func, names, itertools.repeat(size)) 66 | self._executor.gather(futures) 67 | 68 | s = distributed.sync(self._executor.loop, self._executor.scheduler.who_has) 69 | for name, future in zip(names, futures): 70 | ip = itertools.chain(*s[future.key]).next() 71 | try: 72 | block = self._blocks[ip] 73 | except KeyError: 74 | block = {} 75 | self._blocks[ip] = block 76 | 77 | block[name] = future.result() 78 | 79 | def clear(self): 80 | """ Safely remove all shared memory blocks managed by this instace of `YellowPages`. 81 | 82 | Normally, this *must* be called by the user, as we cannot necessarily rely on `__del__` 83 | being called. We do, however, prevent never deallocating the shared memory blocks by 84 | registering this method with `atexit`. 85 | """ 86 | 87 | def func(blocks): 88 | import posix_ipc 89 | 90 | for name in blocks: 91 | sm = posix_ipc.SharedMemory(name) 92 | sm.close_fd() 93 | sm.unlink() 94 | 95 | for ip in self._blocks: 96 | self._executor.submit(func, self._blocks[ip], workers = (ip,)) 97 | 98 | self._blocks.clear() 99 | 100 | def ips(self): 101 | """ Return the IP addresses of all workers with shared memory blocks. 102 | """ 103 | 104 | return self._blocks.keys() 105 | 106 | def blocks(self, ip_only = True): 107 | """ Return a list containing the IP addresses and the name of each shared memory block. 108 | 109 | If `ip_only = True`, then each item in the list is a tuple of the form `(ip, block_name)`. 110 | If `ip_only` is `False`, then each item in the list if a tuple of the form `(ip, block_name, 111 | block_capacity, block_size)`. 112 | """ 113 | 114 | res = [] 115 | for ip, blocks in self._blocks.items(): 116 | for name in blocks: 117 | if ip_only: 118 | res.append((ip, name)) 119 | else: 120 | block = blocks[name] 121 | res.append((ip, name, block.capacity, block.size)) 122 | 123 | return res 124 | 125 | def insert(self, name, size): 126 | """ Insert a variable into a shared memory block with enough space. 127 | 128 | Parameters 129 | ---------- 130 | name: 131 | the variable name as a string 132 | size: 133 | the size of the variable in bytes 134 | """ 135 | 136 | for blocks in self._blocks.values(): 137 | for block in blocks.values(): 138 | try: 139 | block.insert(name, size) 140 | return 141 | except Exception: 142 | pass 143 | 144 | raise ValueError('there is not enough space to insert variable \'{}\' of size {} bytes'.format(name, size)) 145 | 146 | def remove(self, name): 147 | """ Remove a variable from a shared memory block. 148 | 149 | Parameters 150 | ---------- 151 | name: 152 | the variable name as a string 153 | """ 154 | 155 | for blocks in self._blocks.values(): 156 | for block in blocks.values(): 157 | if name in block: 158 | return block.remove(name) 159 | 160 | def find(self, name, ip_only = True): 161 | """ Find which shared memory block contains a variable. 162 | 163 | If `ip_only = True`, then a tuple of the form `(ip, block_name)` is returned. 164 | If `ip_only` is `False`, then a tuple of the form `(ip, block_name, block_capacity, block_size, variable_size, variable_offset)` 165 | is returned. 166 | 167 | Parameters 168 | ---------- 169 | name: 170 | the variable name as a string 171 | ip_only: 172 | a flag indicating whether only the IP address should be returned 173 | """ 174 | 175 | for ip, blocks in self._blocks.items(): 176 | for block_name, block in blocks.items(): 177 | if name in block: 178 | if ip_only: 179 | return (ip, block_name) 180 | else: 181 | return (ip, block_name, block.capacity, block.size) + block[name] 182 | 183 | raise LookupError('no variable with name \'{}\''.format(name)) 184 | -------------------------------------------------------------------------------- /Description.md: -------------------------------------------------------------------------------- 1 | # Data Fabric 2 | 3 | This is a design document for a distributed data warehouse that leverages the POSIX 4 | shared memory API -- see http://man7.org/linux/man-pages/man7/shm_overview.7.html -- to avoid unnecessary copies. Our intent is ultimately to provide typed binary storage, distributed across nodes and accessible by many languages. 5 | 6 | Interoperability across languages will require a C++ library -- potentially C++11 so as to make use of ``, as well as other asynchronous functionality, in the C++ standard library. An appropriate C API will also be required. 7 | 8 | However, our short term objective is to build a "minimal viable proof of concept" in Python. We will do 9 | that by leveraging what already exists in the [Blaze ecosystem](http://blaze.pydata.org/) as much as possible. From there we will work to ensure access from multiple languages and port potential bottlenecks to C++. 10 | 11 | 12 | Scope 13 | ----- 14 | 15 | The initial demo version of data fabric will simply support data allocation and access in 16 | a distributed setting. It will not provide compute functionality. Nor will it provide serialization functionality. It will provide CREATE/LOAD, READ, and DELETE functionality for names in the in-memory store and will return a memory address given the name 17 | 18 | The entire purpose of our "minimal viable demo" is to scope out an acceptable API for the 19 | data fabric. And, also, to educate the development team about how to build with distributed. 20 | 21 | The project will not involve distributed values or data-structures. Such a distributed data-structure could live on top of this data-fabric using sharding and multiple keys. 22 | 23 | 24 | Distributed Framework 25 | --------------------- 26 | 27 | For the proof of concept we will build our data fabric on top of the [Distributed](https://github.com/blaze/distributed) library in Python. We envision a simple key-value storage, where a key (some string that tags the distributed data) maps to a list of (node, shared-memory name, offset) containing the key. 28 | 29 | As part of the prototype, we will define a data-fabric yellow-pages (YP) class that will be the center-point of the data-fabric data fabric. It will learn heavily from the Center class in [Distributed](https://github.com/blaze/distributed). This YP class will be responsible for CREATING shared-memory blocks on all the nodes of the Distributed cluster. It will also keep track of where all the variables stored in the system are stored. 30 | 31 | Datafabric will be a script that connects to an already running distributed cluster, allocates a certain amount of total shared-memory evenly distributed across the nodes of the cluster starting with one block per node and keeps track of the names as they are created and deleted and will be the place to find out where specific names are listed 32 | 33 | MANAGE 34 | ====== 35 | 36 | allocate(num_megabytes) -> [(node, shared_id),...] 37 | 38 | Allocate new memory for the data-fabric with room for num_megabytes. This will create the 39 | shared-memory blocks owned by the system. 40 | 41 | 42 | CREATE (COPY) 43 | ============= 44 | 45 | There are two (three) ways to get data into shared memory: 46 | 47 | 1) Read from Disk 48 | 2) Copy from other memory 49 | 3) possibly memory mapping 50 | 51 | insert(variable_name, documentation, tp, bytes) 52 | 53 | variable_name -- string to key the variable 54 | documentation -- a small string describing the variable 55 | tp -- an ndt.type from [DyND](https://github.com/libdynd/libdynd). 56 | where bytes is an open-file, a memory-view, or a memory-mapped file. 57 | 58 | This command will find an empty place to put the bytes and track them in the YP class 59 | 60 | READ 61 | ==== 62 | 63 | get_list() 64 | get_locations(variable_name) --> list of (node, shared_id, offset) 65 | node is the ip address of the machine 66 | shared_id is the id of the shared-memory block on that machine 67 | offset is the byte offset into the shared-memory block where the start of the data is located 68 | 69 | get_ptr(node, shared_id, offset) -> ndt.type, memory pointer (ctype) 70 | 71 | Question? Should the ndt.type be stored in the YP Class or near the bytes themselves (i.e. at the front of them?) So that memory pointers are self-describing. 72 | 73 | To start let's store the ndt.type in the YP Class. 74 | 75 | DELETE 76 | ====== 77 | 78 | delete(variable_name) 79 | 80 | removes the variable name from the data-fabric and allows the memory it is using to be re-used. 81 | 82 | defragment() 83 | 84 | free up contiguous space by moving variables around in the shared-segments. 85 | 86 | deallocate(node_id, shared_id) 87 | 88 | Remove the shared_memory block from the system. 89 | 90 | 91 | Eventually, at the C++ stage, we could move away from the concept of a distributed center towards a model where the stored information is more distributed using things like the CRUSH algorithm. 92 | 93 | 94 | Possible data uses 95 | ========================== 96 | 97 | Data Abstraction 98 | ---------------- 99 | 100 | Variables in our data fabric will be write-once. Variables in the data-fabric can be represented in the YPClass with a lightweight class either in Python (using `ctypes`) 101 | or in Cython. An example of what variable could look like, in Cython, is below. 102 | 103 | ``` 104 | cdef class variable: 105 | const char *name # the name of the variable 106 | const key_t id # the key of the shared-memory 107 | 108 | type _tp # the ndt.type that describes the data 109 | char *_metadata # the low-level datashape, i.e. DyND metadata 110 | char *_data # a pointer to the data as return by the POSIX API function `mmap` 111 | 112 | def __cinit__(self, tp): 113 | # default-initialize the metadata (low-level datashape) 114 | # allocate the data in shared memory, retrieve the pointer via `mmap` 115 | 116 | property type: 117 | def __get__(self): 118 | # return the ndt.type 119 | 120 | property metadata: 121 | def __get__(self): 122 | # return a ctype pointer to metadata (low-level datashape) serialized as a char * 123 | 124 | property data: 125 | def __get__(self): 126 | # return a ctype pointer to data as a char * 127 | ``` 128 | 129 | We will pre-allocate shared-memory blocks avoid the problem of not ha limited number of shared-memory blocks are available in Linux. 130 | 131 | 132 | Example Workflow 133 | ---------------- 134 | 135 | Here is an example workflow of how the demo version of Data Fabric could work. 136 | 137 | - Setup distributed in Python. 138 | - Create a data fabric YP Class and allocate shared memory 139 | - Create a `ndt.type` that describes your data, e.g. `ndt.type("40 * 20 * float64")` 140 | - Store (or read from disk) the data into a variable in the fabric. 141 | - At this point, the `YP Class` takes care of the mapping to the local worker nodes, and each local 142 | worker takes care of the allocation. 143 | 144 | - Later on, we can ask the `YP Class` for a list of which nodes contain the object with name `x`. This will return a list of the nodes and their local names of `x`. 145 | 146 | It is then up to the user to apply operations to this read-only data and store intermediate results back into the fabric. 147 | --------------------------------------------------------------------------------