├── .gitignore ├── src ├── workers.cfg ├── lsddd.py ├── cptoddd.py ├── cpfromddd.py ├── worker.py ├── libtripled.py └── master.py ├── LICENSE └── README /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.cfg 3 | *~ 4 | -------------------------------------------------------------------------------- /src/workers.cfg: -------------------------------------------------------------------------------- 1 | workerA 2 | workerB 3 | workerC 4 | workerD 5 | -------------------------------------------------------------------------------- /src/lsddd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import logging, os, redis, sys 4 | 5 | # CONSTANTS 6 | REDIS_HOST = 'localhost' 7 | log = logging.getLogger('tripled.ls') 8 | 9 | if __name__ == '__main__': 10 | logging.basicConfig(level=logging.DEBUG) 11 | 12 | if len(sys.argv) < 2: 13 | print '%s ' % (sys.argv[0]) 14 | exit(-1) 15 | 16 | redis = redis.Redis(host=REDIS_HOST, port=6379, db=0) 17 | search_string = os.path.join(sys.argv[1], '*') 18 | log.info('searching: %s', search_string) 19 | print '\n'.join(redis.keys(search_string)) 20 | -------------------------------------------------------------------------------- /src/cptoddd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import libtripled, logging, sys, os 4 | 5 | # CONSTANTS 6 | log = logging.getLogger('tripled.cptoddd') 7 | CHUNK_SIZE = 64*1024**2 8 | 9 | def next_chunk(f): 10 | data = f.read(CHUNK_SIZE) 11 | while (data): 12 | yield data 13 | data = f.read(CHUNK_SIZE) 14 | 15 | if __name__ == '__main__': 16 | logging.basicConfig(level=logging.DEBUG) 17 | 18 | if len(sys.argv) < 4: 19 | print '%s ' % (sys.argv[0]) 20 | exit(-1) 21 | 22 | tripled = libtripled.tripled(sys.argv[1]) 23 | with open(sys.argv[2], 'r') as f: 24 | for i, chunk in enumerate(next_chunk(f)): 25 | tripled.write_block(sys.argv[3], i, chunk) 26 | -------------------------------------------------------------------------------- /src/cpfromddd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import libtripled, logging, sys, os 4 | 5 | # CONSTANTS 6 | log = logging.getLogger('tripled.cpfromddd') 7 | 8 | def next_chunk(tripled, path): 9 | chunks = tripled.read_file(path) 10 | for chunk in chunks: 11 | log.debug('reading from worker[%s] path[%s]' % (chunk[0], chunk[1])) 12 | yield tripled.read_block(chunk[0], chunk[1]) 13 | 14 | if __name__ == '__main__': 15 | logging.basicConfig(level=logging.DEBUG) 16 | 17 | if len(sys.argv) < 4: 18 | print '%s ' % (sys.argv[0]) 19 | exit(-1) 20 | 21 | tripled = libtripled.tripled(sys.argv[1]) 22 | try: os.makedirs(os.path.dirname(sys.argv[3])) 23 | except OSError: pass 24 | with open(sys.argv[3], 'w') as f: 25 | for chunk in next_chunk(tripled, sys.argv[2]): 26 | f.write(chunk) 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2011 Wolfgang Richter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/worker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import logging, os, zmq 4 | 5 | log = logging.getLogger('tripled.worker') 6 | 7 | class worker: 8 | def __init__(self): 9 | pass 10 | 11 | def client_read_block(self, client, path): 12 | log.info('worker reading block[%s]', path) 13 | with open(path, 'r') as f: 14 | client.send_pyobj(f.read(), protocol=0) 15 | 16 | def client_write_block(self, client, command): 17 | log.info('worker writing block[%s]', command[1]) 18 | try: os.makedirs(os.path.dirname(command[1])) 19 | except OSError: pass 20 | with open(command[1], 'w') as f: 21 | f.write(command[2]) 22 | client.send_pyobj(True, protocol=0) 23 | 24 | def parse_client_command(self, client, command): 25 | log.debug('command: %s' % (command[0:1])) 26 | if command[0] == 'read': 27 | self.client_read_block(client, command[1]) 28 | elif command[0] == 'write': 29 | self.client_write_block(client, command) 30 | else: 31 | log.error('Error parsing client command. Failing.') 32 | exit(-1) 33 | 34 | if __name__ == '__main__': 35 | logging.basicConfig(level=logging.DEBUG) 36 | 37 | context = zmq.Context() 38 | socket = context.socket(zmq.REP) 39 | socket.bind('tcp://*:8008') 40 | 41 | worker = worker() 42 | 43 | while True: 44 | worker.parse_client_command(socket, socket.recv()) 45 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Introduction: 2 | 3 | This is a very simple distributed file system borrowing many ideas from 4 | GFS. The idea is to focus on simplicity and performance. Not 5 | availability or fault tolerance (at all). 6 | 7 | Triple D: 8 | 1) Dead simple... 9 | 2) Done right... 10 | 3) Distributed file system 11 | 12 | Why: 13 | 14 | Tired of bullshit configurations and degraded performance? 15 | Want good performance? 16 | How about usable and understandable code? 17 | 18 | Requirements: 19 | 20 | zeromq-2.1.7 21 | redis-2.2.7 22 | 23 | Usage: 24 | master.py -- run on metadata master node 25 | worker.py -- run on worker nodes 26 | 27 | cptoddd.py -- use to copy files into TripleD 28 | cpfromddd.py -- use to copy files out of TripleD 29 | lsddd.py -- use to list files in TripleD 30 | 31 | Design: 32 | 33 | 1) Master handles all metadata 34 | 2) Workers are dumb trusting writes from clients 35 | 3) Clients do RPC with master and workers directly 36 | 4) Metadata comes from master, flows via client to workers 37 | 5) Assume write once, read many in future; no append; no seek 38 | 6) Client allowed to arbitrarily chunk (client knows best!) 39 | 7) No replication right now; client would get list from master 40 | 8) No multi-reading or multi-writing... 41 | 9) All workers store chunks in the same path on their system 42 | 10) Dumb round-robin assignment of chunks; worst case first node gets 43 | more data; last node gets least data 44 | 11) Variable-sized chunks allowed 45 | 12) No delete or rewrite 46 | 47 | 48 | TODO: 49 | 1) master should only write into Redis on receipt of write info from a 50 | worker 51 | 2) concurrent writing to multiple nodes, reading from multi nodes 52 | (readahead as well)---basically add threading with push/pull sockets 53 | 54 | Legal: 55 | 56 | TripleD is licensed under the MIT license. 57 | -------------------------------------------------------------------------------- /src/libtripled.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | import logging, zmq 4 | 5 | log = logging.getLogger('tripled.libtripled') 6 | 7 | class tripled: 8 | def __init__(self, master): 9 | self.context = zmq.Context() 10 | self.workers = {} 11 | self.master = self.get_master(master) 12 | 13 | def get_master(self, master): 14 | socket = self.context.socket(zmq.REQ) 15 | uri = 'tcp://%s:1337' % (master) 16 | log.debug('master connect string[%s]', uri) 17 | socket.connect(uri) 18 | return socket 19 | 20 | def get_worker(self, worker): 21 | if worker in self.workers: 22 | log.debug('using cached worker connection') 23 | worker = self.workers[worker] 24 | else: 25 | socket = self.context.socket(zmq.REQ) 26 | uri = 'tcp://%s:8008' % (worker) 27 | log.debug('worker connect string[%s]', uri) 28 | socket.connect(uri) 29 | self.workers[worker] = socket 30 | worker = self.workers[worker] 31 | return worker 32 | 33 | def read_file(self, path): 34 | self.master.send_pyobj(('read', path), protocol=0) 35 | blocks = self.master.recv() 36 | log.debug('blocks: %s', blocks) 37 | return blocks 38 | 39 | def write_block(self, path, block, data): 40 | self.master.send_pyobj(('write', path, block), protocol=0) 41 | details = self.master.recv() 42 | self.worker_write_block(details[0], details[1], data) 43 | 44 | def read_block(self, worker, path): 45 | worker = self.get_worker(worker) 46 | worker.send_pyobj(('read',path), protocol=0) 47 | return worker.recv() 48 | 49 | def worker_write_block(self, worker, path, data): 50 | log.debug('writing[%s] to worker[%s]' % (path, worker)) 51 | worker = self.get_worker(worker) 52 | log.debug('got socket...writing data[%d]' % (len(data))) 53 | worker.send_pyobj(('write', path, data), protocol=0) 54 | log.debug('sent message...') 55 | return worker.recv() 56 | 57 | if __name__ == '__main__': 58 | print 'This is a library of client functions.' 59 | -------------------------------------------------------------------------------- /src/master.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import fileinput, hashlib, logging, os, sys, redis, zmq 4 | 5 | # CONSTANTS 6 | log = logging.getLogger('tripled.master') 7 | REDIS_SERVER = 'localhost' 8 | CHUNK_DIR = '/tmp/tripled_chunks/' 9 | 10 | class master: 11 | def __init__(self): 12 | self.redis = redis.Redis(host=REDIS_SERVER, port=6379, db=0) 13 | self.workers = [] 14 | self.count = 0 15 | self.written_blocks = 0 16 | 17 | def add_worker(self, worker): 18 | self.workers.append(worker) 19 | self.count += 1 20 | 21 | def client_read_file(self, client, file): 22 | blocks = self.redis.lrange(file, 0, -1) 23 | client.send_pyobj(blocks, protocol=0) 24 | 25 | def client_write(self, client, file, block): 26 | worker = self.workers[self.written_blocks % self.count] 27 | directory = os.path.join(CHUNK_DIR, hashlib.sha256(file).hexdigest()) 28 | path = os.path.join(directory, str(block)) 29 | log.debug('writing to worker[%s] path[%s]'% (worker, path)) 30 | self.written_blocks += 1 31 | serialized = (worker, path) 32 | self.redis.rpush(file, serialized) 33 | client.send_pyobj(serialized, protocol=0) 34 | 35 | def parse_client_command(self, client, command): 36 | log.debug('command: %s', command) 37 | if command[0] == 'read': 38 | self.client_read_file(client, command[1]) 39 | elif command[0] == 'write': 40 | self.client_write(client, command[1], command[2]) 41 | else: 42 | log.error('Error parsing client command. Failing.') 43 | exit(-1) 44 | 45 | if __name__ == '__main__': 46 | logging.basicConfig(level=logging.DEBUG) 47 | 48 | if len(sys.argv) < 2: 49 | print '%s [ worker2.cfg ...]]>' % (sys.argv[0]) 50 | 51 | workers = [] 52 | for line in fileinput.input(): 53 | workers.append(line.strip()) 54 | log.debug('new worker: %s', workers[-1]) 55 | 56 | master = master() 57 | for worker in workers: 58 | master.add_worker(worker) 59 | 60 | context = zmq.Context() 61 | socket = context.socket(zmq.REP) 62 | socket.bind('tcp://*:1337') 63 | 64 | while True: 65 | master.parse_client_command(socket, socket.recv()) 66 | --------------------------------------------------------------------------------