├── README.md └── fdb-nbd.py /README.md: -------------------------------------------------------------------------------- 1 | # A totally proof-of-concept FoundationDB based NBD backend 2 | 3 | I wanted to play around with [FoundationDB](https://www.foundationdb.org/) a bit more and 4 | building a [network block device (NBD)](https://nbd.sourceforge.io/) backend seemed like 5 | a good fit for that. 6 | 7 | **Do not use this in production**, unless you like data loss, kernel crashes and you don't 8 | mind if your house burns down. 9 | 10 | If you're interested in a more serious implementation, have a look at https://github.com/spullara/nbd. 11 | 12 | ## Installation 13 | 14 | * Install [FoundationDB](https://apple.github.io/foundationdb/local-dev.html). 15 | * Run `fdb-nbd.py`. It will start a local TCP server on port 10809 (default NBD port). 16 | * Load the `nbd` kernel module. 17 | * Run `nbd-client -N example 127.0.0.1 /dev/nbd0`. This will initialize the network block device `/dev/nbd0` and point it to the started python server. 18 | * You might now format `/dev/nbd0` with any filesystem and mount it. 19 | 20 | ## Cleaning up 21 | 22 | * Unmount your filesystem 23 | * Run `nbd-client -d /dev/nbd0` to disconnect the block device from the server 24 | 25 | ## Worth noting 26 | 27 | * The server can handle multiple block device "stores" at once. In the above example, `example` got selected with the `-N` argument of `nbd-client`. Have a look at the source code of `fdb-nbd.py` to see how this is initialized. 28 | * You can list all other available "stores" with `nbd-client -l 127.0.0.1` 29 | * The server is hardcoded to use a blocksize of `1024`. Each block is stored in its own FoundationDB key `('dev', 'example', 'blocks', block_nr)`. Partial reads or writes of blocks are not supported. 30 | * Since it's possible, I just compress/decompress each key before set/get. Yay. 31 | * It's interesting to see how block device caching works. File system actions often don't directly cause block device operations. Play around with `sync` and flushing the cache `echo 3 > /proc/sys/vm/drop_caches` for maximum effect. 32 | * Don't suddenly stop the server or disconnect with `nbd-client`. The kernel can be a bit sensitive about this. I've had unkillable processes as a result and a kernel OOPS. You have been warned. 33 | * Performance isn't too good, at least in my tests. It's around 10MB/s or so with a locally running FoundationDB. But hey: It works :-) 34 | -------------------------------------------------------------------------------- /fdb-nbd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Based on `swiftnbd`, server module 4 | https://github.com/reidrac/swift-nbd-server 5 | Copyright (C) 2013 by Juan J. Martinez 6 | 7 | Modifications for FoundationDB 8 | Copyright (C) 2018 by Florian Wesch 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in 18 | all copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 | THE SOFTWARE. 27 | """ 28 | 29 | from __future__ import print_function 30 | 31 | import struct 32 | import logging 33 | import signal 34 | import fdb 35 | import gevent 36 | from gevent.server import StreamServer 37 | 38 | if 1: 39 | import sys 40 | root = logging.getLogger() 41 | root.setLevel(logging.DEBUG) 42 | ch = logging.StreamHandler(sys.stdout) 43 | ch.setLevel(logging.DEBUG) 44 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 45 | ch.setFormatter(formatter) 46 | root.addHandler(ch) 47 | 48 | log = logging.getLogger('nbd-fdb') 49 | 50 | class Server(StreamServer): 51 | # NBD's magic 52 | NBD_HANDSHAKE = 0x49484156454F5054 53 | NBD_REPLY = 0x3e889045565a9 54 | 55 | NBD_REQUEST = 0x25609513 56 | NBD_RESPONSE = 0x67446698 57 | 58 | NBD_OPT_EXPORTNAME = 1 59 | NBD_OPT_ABORT = 2 60 | NBD_OPT_LIST = 3 61 | 62 | NBD_REP_ACK = 1 63 | NBD_REP_SERVER = 2 64 | NBD_REP_ERR_UNSUP = 2**31 + 1 65 | 66 | NBD_CMD_READ = 0 67 | NBD_CMD_WRITE = 1 68 | NBD_CMD_DISC = 2 69 | NBD_CMD_FLUSH = 3 70 | 71 | # fixed newstyle handshake 72 | NBD_HANDSHAKE_FLAGS = (1 << 0) 73 | 74 | # has flags, supports flush 75 | NBD_EXPORT_FLAGS = (1 << 0) ^ (1 << 2) 76 | NBD_RO_FLAG = (1 << 1) 77 | 78 | def __init__(self, listener, stores): 79 | super(Server, self).__init__(listener, handle=self.handler) 80 | self._stores = stores 81 | 82 | def nbd_response(self, fob, handle, error=0, data=None): 83 | fob.write(struct.pack('>LLQ', self.NBD_RESPONSE, error, handle)) 84 | if data: 85 | fob.write(data) 86 | fob.flush() 87 | 88 | def handler(self, socket, address): 89 | host, port = address 90 | store = None 91 | log.info("Incoming connection from %s:%s" % address) 92 | 93 | try: 94 | fob = socket.makefile() 95 | 96 | fob.write("NBDMAGIC" + struct.pack(">QH", self.NBD_HANDSHAKE, self.NBD_HANDSHAKE_FLAGS)) 97 | fob.flush() 98 | 99 | data = fob.read(4) 100 | try: 101 | client_flag = struct.unpack(">L", data)[0] 102 | except struct.error: 103 | raise IOError("Handshake failed, disconnecting") 104 | 105 | # we support both fixed and unfixed new-style handshake 106 | if client_flag == 0: 107 | fixed = False 108 | log.warning("Client using new-style non-fixed handshake") 109 | elif client_flag & 1 == 1: 110 | fixed = True 111 | else: 112 | raise IOError("Handshake failed, disconnecting") 113 | 114 | # negotiation phase 115 | while True: 116 | header = fob.read(16) 117 | try: 118 | magic, opt, length = struct.unpack(">QLL", header) 119 | except struct.error: 120 | raise IOError("Negotiation failed: Invalid request, disconnecting") 121 | 122 | if magic != self.NBD_HANDSHAKE: 123 | raise IOError("Negotiation failed: bad magic number: %s" % magic) 124 | 125 | if length: 126 | data = fob.read(length) 127 | if len(data) != length: 128 | raise IOError("Negotiation failed: %s bytes expected" % length) 129 | else: 130 | data = None 131 | 132 | log.debug("[%s:%s]: opt=%s, len=%s, data=%s" % (host, port, opt, length, data)) 133 | 134 | if opt == self.NBD_OPT_EXPORTNAME: 135 | if not data: 136 | raise IOError("Negotiation failed: no export name was provided") 137 | name = data 138 | store = self._stores.get(name) 139 | 140 | if not store: 141 | if not fixed: 142 | raise IOError("Negotiation failed: unknown export name") 143 | fob.write(struct.pack(">QLLL", self.NBD_REPLY, opt, self.NBD_REP_ERR_UNSUP, 0)) 144 | fob.flush() 145 | continue 146 | 147 | log.info("[%s:%s] Negotiated export: %s" % (host, port, name)) 148 | 149 | export_flags = self.NBD_EXPORT_FLAGS 150 | if store.read_only: 151 | export_flags ^= self.NBD_RO_FLAG 152 | log.info("[%s:%s] %s is read only" % (host, port, name)) 153 | fob.write(struct.pack('>QH', store.size, export_flags) + "\x00"*124) 154 | fob.flush() 155 | break 156 | elif opt == self.NBD_OPT_LIST: 157 | for name in sorted(self._stores.list()): 158 | fob.write(struct.pack(">QLLL", self.NBD_REPLY, opt, self.NBD_REP_SERVER, len(name) + 4)) 159 | fob.write(struct.pack(">L", len(name)) + name) 160 | fob.write(struct.pack(">QLLL", self.NBD_REPLY, opt, self.NBD_REP_ACK, 0)) 161 | fob.flush() 162 | elif opt == self.NBD_OPT_ABORT: 163 | fob.write(struct.pack(">QLLL", self.NBD_REPLY, opt, self.NBD_REP_ACK, 0)) 164 | fob.flush() 165 | raise IOError("Client aborted negotiation") 166 | else: 167 | # we don't support any other option 168 | if not fixed: 169 | raise IOError("Unsupported option") 170 | fob.write(struct.pack(">QLLL", self.NBD_REPLY, opt, self.NBD_REP_ERR_UNSUP, 0)) 171 | fob.flush() 172 | 173 | # operation phase 174 | while True: 175 | header = fob.read(28) 176 | try: 177 | (magic, cmd, handle, offset, length) = struct.unpack(">LLQQL", header) 178 | except struct.error: 179 | raise IOError("Invalid request, disconnecting") 180 | 181 | if magic != self.NBD_REQUEST: 182 | raise IOError("Bad magic number, disconnecting") 183 | 184 | log.debug("[%s:%s]: cmd=%s, handle=%s, offset=%s, len=%s" % (host, port, cmd, handle, offset, length)) 185 | 186 | if cmd == self.NBD_CMD_DISC: 187 | log.info("[%s:%s] disconnecting" % address) 188 | break 189 | elif cmd == self.NBD_CMD_WRITE: 190 | data = fob.read(length) 191 | if len(data) != length: 192 | raise IOError("%s bytes expected, disconnecting" % length) 193 | 194 | try: 195 | store.seek(offset) 196 | store.write(data) 197 | except IOError as ex: 198 | log.error("[%s:%s] %s" % (host, port, ex)) 199 | self.nbd_response(fob, handle, error=ex.errno) 200 | continue 201 | 202 | self.nbd_response(fob, handle) 203 | elif cmd == self.NBD_CMD_READ: 204 | try: 205 | store.seek(offset) 206 | data = store.read(length) 207 | except IOError as ex: 208 | log.error("[%s:%s] %s" % (host, port, ex)) 209 | self.nbd_response(fob, handle, error=ex.errno) 210 | continue 211 | 212 | self.nbd_response(fob, handle, data=data) 213 | elif cmd == self.NBD_CMD_FLUSH: 214 | self.nbd_response(fob, handle) 215 | else: 216 | log.warning("[%s:%s] Unknown cmd %s, disconnecting" % (host, port, cmd)) 217 | break 218 | 219 | except IOError as ex: 220 | log.error("[%s:%s] %s" % (host, port, ex)) 221 | finally: 222 | socket.close() 223 | 224 | BLOCK_SIZE = 1024 225 | 226 | class FDBStore(object): 227 | def __init__(self, db, name): 228 | self._db = db 229 | self._device = fdb.Subspace(('dev', name)) 230 | self._block_size = int(self._db[self._device['meta']['block_size']]) 231 | self._size = self._block_size * int(self._db[self._device['meta']['num_blocks']]) 232 | self._blocks = self._device['blocks'] 233 | self._empty = '\0' * self._block_size 234 | self._pos = 0 235 | 236 | @property 237 | def read_only(self): 238 | return False 239 | 240 | @property 241 | def size(self): 242 | return self._size 243 | 244 | def seek(self, pos): 245 | # print('seek', pos) 246 | self._pos = pos 247 | assert pos % self._block_size == 0, "misaligned seek" 248 | 249 | def write(self, data): 250 | # print('write', len(data)) 251 | assert len(data) % self._block_size == 0, "misaligned write" 252 | 253 | @fdb.transactional 254 | def transactional_write(tr): 255 | for relative_offset in xrange(0, len(data), self._block_size): 256 | block = (self._pos + relative_offset) / self._block_size 257 | tr[self._blocks[block]] = data[relative_offset:relative_offset+self._block_size].encode('zlib') 258 | transactional_write(self._db) 259 | 260 | def read(self, length): 261 | # print('read', length) 262 | assert length % self._block_size == 0, "misaligned read" 263 | start = self._pos / self._block_size 264 | end = (self._pos + length) / self._block_size 265 | 266 | @fdb.transactional 267 | def transactional_read(tr): 268 | blocks = {} 269 | for key, value in self._db[self._blocks[start]: self._blocks[end]]: 270 | blocks[self._blocks.unpack(key)[0]] = value.decode('zlib') 271 | return blocks 272 | 273 | blocks = transactional_read(self._db) 274 | out = [] 275 | for relative_offset in xrange(0, length, self._block_size): 276 | block = (self._pos + relative_offset) / self._block_size 277 | if block in blocks: 278 | out.append(blocks[block]) 279 | else: 280 | out.append(self._empty) 281 | return ''.join(out) 282 | 283 | class Stores(object): 284 | def __init__(self, db): 285 | self._db = db 286 | self._index = fdb.Subspace(('devices',)) 287 | 288 | def list(self): 289 | names = set() 290 | for key, value in self._db[self._index.range()]: 291 | names.add(self._index.unpack(key)[0]) 292 | return names 293 | 294 | def get(self, name): 295 | if self._db[self._index[name]] is not None: 296 | return FDBStore(self._db, name) 297 | 298 | def create(self, name, num_blocks, block_size=BLOCK_SIZE): 299 | @fdb.transactional 300 | def create(tr): 301 | tr.set(self._index[name], '') 302 | device = fdb.Subspace(('dev', name)) 303 | tr.set(device['meta']['block_size'], str(BLOCK_SIZE)) 304 | tr.set(device['meta']['num_blocks'], str(num_blocks)) 305 | create(self._db) 306 | 307 | def main(): 308 | fdb.api_version(510) 309 | db = fdb.open() 310 | 311 | stores = Stores(db) 312 | 313 | # del db[:] 314 | 315 | stores.create('foobar', 1000000) 316 | stores.create('example', 1000000) 317 | 318 | for name in stores.list(): 319 | print('store %s\n nbd-client -N %s 127.0.0.1 /dev/nbd0' % (name, name)) 320 | 321 | server = Server(('127.0.0.1', 10809), stores) 322 | gevent.signal(signal.SIGTERM, server.stop) 323 | gevent.signal(signal.SIGINT, server.stop) 324 | server.serve_forever() 325 | 326 | if __name__ == "__main__": 327 | main() 328 | --------------------------------------------------------------------------------