├── .gitignore ├── network.py ├── settings.py ├── address.py ├── LICENSE ├── create_chord.py ├── README.md ├── test.py ├── remote.py ├── dht.py ├── dfs.py ├── chord.py └── fuse_dfs.py /.gitignore: -------------------------------------------------------------------------------- 1 | # .pyc 2 | *.pyc 3 | -------------------------------------------------------------------------------- /network.py: -------------------------------------------------------------------------------- 1 | 2 | # reads from socket until "\r\n" 3 | def read_from_socket(s): 4 | result = "" 5 | while 1: 6 | data = s.recv(256) 7 | if data[-2:] == "\r\n": 8 | result += data[:-2] 9 | break 10 | result += data 11 | # if result != "": 12 | # print "read : %s" % result 13 | return result 14 | 15 | # sends all on socket, adding "\r\n" 16 | def send_to_socket(s, msg): 17 | # print "respond : %s" % msg 18 | s.sendall(str(msg) + "\r\n") 19 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | # CONFIGURATION FILE 2 | 3 | # log size of the ring 4 | LOGSIZE = 8 5 | SIZE = 1<python test.py` to check consistency. Tests can fail due to the fact that the network is not stable yet, should work by increasing the rate of updates. 20 | - `$>python create_chord.py $N_CHORD_NODES` to run a DHT that lets you ask questions to random members. 21 | 22 | ## Distributed Hash Table 23 | A distributed hash table implementation on top of Chord is available in `dht.py`. It 24 | uses the overlay network provided by Chord's algorithms and adds two more commands to 25 | the network, the commands `set` and `get`. 26 | 27 | After registering those commands with the appropriate callbacks we have a fairly 28 | simple DHT implementation that also balances loads according to node joins. 29 | 30 | ### To be implemented: 31 | - Replication to handle node failures/departures without losing information. 32 | 33 | ## Distributed File System 34 | For this case we implemented a file system ... (to be continued) 35 | 36 | ### How to test? 37 | - `$>python create_chord.py $N_CHORD_NODES`, followed by `$>python dfs.py 38 | $MOUNT_POINT`. Read description on dfs.py to know how to operate. 39 | 40 | ### What's next? 41 | 42 | - Add replication! 43 | - Adaptative load balance, based on [this paper](http://members.unine.ch/pascal.felber/publications/ICCCN-06.pdf). 44 | 45 | **DISCLAIMER** 46 | Pet project for fun to learn about DHT's, not intended to be used in real life. 47 | 48 | Other projects: 49 | - SOON: C++ implementation of Raft concencus protocol. -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import socket 4 | import random 5 | from chord import * 6 | 7 | 8 | def check_key_lookup(peers, hash_list): 9 | print "Running key lookup consistency test" 10 | for key in range(SIZE): 11 | # select random node 12 | node = peers[random.randrange(len(peers))] 13 | # get the successor 14 | target = node.find_successor(key) 15 | for i in range(len(peers)): 16 | if inrange(key, hash_list[i]+1, hash_list[(i+1)%len(peers)]+1): 17 | tries = 1 18 | while 1: 19 | try: 20 | assert target.id() == hash_list[(i+1)%len(peers)] 21 | break 22 | except Exception, e: 23 | print "Fail number %s, %s to abort" % (tries, 4-tries) 24 | tries += 1 25 | if tries > 4: 26 | raise e 27 | time.sleep(1.5 ** tries) 28 | print "Finished key lookup consistency test, all good" 29 | 30 | """ 31 | def data_fusser(peers): 32 | print "Running data fusser trying to detect failures" 33 | data = {} 34 | for i in range(1000): 35 | if random.random() < 0.4 and len(data.keys()): 36 | key = data.keys()[random.randrange(len(data.keys()))] 37 | tries = 0 38 | while 1: 39 | try: 40 | assert peers[random.randrange(len(peers))].get(key) == data[key] 41 | break 42 | except Exception, e: 43 | time.sleep(1<" % (msg, self.address_) 47 | 48 | def recv(self): 49 | # we use to have more complicated logic here 50 | # and we might have again, so I'm not getting rid of this yet 51 | return read_from_socket(self.socket_) 52 | 53 | def ping(self): 54 | try: 55 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 56 | s.connect((self.address_.ip, self.address_.port)) 57 | s.sendall("\r\n") 58 | s.close() 59 | return True 60 | except socket.error: 61 | return False 62 | 63 | @requires_connection 64 | def command(self, msg): 65 | self.send(msg) 66 | response = self.recv() 67 | return response 68 | 69 | @requires_connection 70 | def get_successors(self): 71 | self.send('get_successors') 72 | 73 | response = self.recv() 74 | # if our next guy doesn't have successors, return empty list 75 | if response == "": 76 | return [] 77 | response = json.loads(response) 78 | return map(lambda address: Remote(Address(address[0], address[1])) ,response) 79 | 80 | @requires_connection 81 | def successor(self): 82 | self.send('get_successor') 83 | 84 | response = json.loads(self.recv()) 85 | return Remote(Address(response[0], response[1])) 86 | 87 | @requires_connection 88 | def predecessor(self): 89 | self.send('get_predecessor') 90 | 91 | response = self.recv() 92 | if response == "": 93 | return None 94 | response = json.loads(response) 95 | return Remote(Address(response[0], response[1])) 96 | 97 | @requires_connection 98 | def find_successor(self, id): 99 | self.send('find_successor %s' % id) 100 | 101 | response = json.loads(self.recv()) 102 | return Remote(Address(response[0], response[1])) 103 | 104 | @requires_connection 105 | def closest_preceding_finger(self, id): 106 | self.send('closest_preceding_finger %s' % id) 107 | 108 | response = json.loads(self.recv()) 109 | return Remote(Address(response[0], response[1])) 110 | 111 | @requires_connection 112 | def notify(self, node): 113 | self.send('notify %s %s' % (node.address_.ip, node.address_.port)) 114 | -------------------------------------------------------------------------------- /dht.py: -------------------------------------------------------------------------------- 1 | from chord import Local, Daemon, repeat_and_sleep, inrange 2 | from remote import Remote 3 | from address import Address 4 | import json 5 | 6 | # data structure that represents a distributed hash table 7 | class DHT(object): 8 | def __init__(self, local_address, remote_address = None): 9 | self.local_ = Local(local_address, remote_address) 10 | def set_wrap(msg): 11 | return self._set(msg) 12 | def get_wrap(msg): 13 | return self._get(msg) 14 | 15 | self.data_ = {} 16 | self.shutdown_ = False 17 | 18 | self.local_.register_command("set", set_wrap) 19 | self.local_.register_command("get", get_wrap) 20 | 21 | self.daemons_ = {} 22 | self.daemons_['distribute_data'] = Daemon(self, 'distribute_data') 23 | self.daemons_['distribute_data'].start() 24 | 25 | self.local_.start() 26 | 27 | def shutdown(self): 28 | self.local_.shutdown() 29 | self.shutdown_ = True 30 | 31 | def _get(self, request): 32 | try: 33 | data = json.loads(request) 34 | # we have the key 35 | return json.dumps({'status':'ok', 'data':self.get(data['key'])}) 36 | except Exception: 37 | # key not present 38 | return json.dumps({'status':'failed'}) 39 | 40 | def _set(self, request): 41 | try: 42 | data = json.loads(request) 43 | key = data['key'] 44 | value = data['value'] 45 | self.set(key, value) 46 | return json.dumps({'status':'ok'}) 47 | except Exception: 48 | # something is not working 49 | return json.dumps({'status':'failed'}) 50 | 51 | def get(self, key): 52 | try: 53 | return self.data_[key] 54 | except Exception: 55 | # not in our range 56 | suc = self.local_.find_successor(hash(key)) 57 | if self.local_.id() == suc.id(): 58 | # it's us but we don't have it 59 | return None 60 | try: 61 | response = suc.command('get %s' % json.dumps({'key':key})) 62 | if not response: 63 | raise Exception 64 | value = json.loads(response) 65 | if value['status'] != 'ok': 66 | raise Exception 67 | return value['data'] 68 | except Exception: 69 | return None 70 | def set(self, key, value): 71 | # eventually it will distribute the keys 72 | self.data_[key] = value 73 | 74 | @repeat_and_sleep(5) 75 | def distribute_data(self): 76 | to_remove = [] 77 | # to prevent from RTE in case data gets updated by other thread 78 | keys = self.data_.keys() 79 | for key in keys: 80 | if self.local_.predecessor() and \ 81 | not inrange(hash(key), self.local_.predecessor().id(1), self.local_.id(1)): 82 | try: 83 | node = self.local_.find_successor(hash(key)) 84 | node.command("set %s" % json.dumps({'key':key, 'value':self.data_[key]})) 85 | # print "moved %s into %s" % (key, node.id()) 86 | to_remove.append(key) 87 | print "migrated" 88 | except socket.error: 89 | print "error migrating" 90 | # we'll migrate it next time 91 | pass 92 | # remove all the keys we do not own any more 93 | for key in to_remove: 94 | del self.data_[key] 95 | # Keep calling us 96 | return True 97 | 98 | def create_dht(lport): 99 | laddress = map(lambda port: Address('127.0.0.1', port), lport) 100 | r = [DHT(laddress[0])] 101 | for address in laddress[1:]: 102 | r.append(DHT(address, laddress[0])) 103 | return r 104 | 105 | 106 | if __name__ == "__main__": 107 | import sys 108 | if len(sys.argv) == 2: 109 | dht = DHT(Address("127.0.0.1", sys.argv[1])) 110 | else: 111 | dht = DHT(Address("127.0.0.1", sys.argv[1]), Address("127.0.0.1", sys.argv[2])) 112 | raw_input("Press any key to shutdown") 113 | print "shuting down.." 114 | dht.shutdown() 115 | 116 | -------------------------------------------------------------------------------- /dfs.py: -------------------------------------------------------------------------------- 1 | # read : file, offset, size -> status b64_data 2 | # write : file, offset, size, buf 3 | # truncate_size : file new_size 4 | # attr : file -> dict 5 | import settings 6 | 7 | BLOCK_SIZE = 4096 8 | 9 | # data structure that represents a distributed file system 10 | class DFS(object): 11 | def __init__(self, local_address, remote_address = None): 12 | self.local_ = Local(local_address, remote_address) 13 | def read_wrap(msg): 14 | return self._read(msg) 15 | def write_wrap(msg): 16 | return self._write(msg) 17 | def attr_wrap(msg): 18 | return self._attr(msg) 19 | 20 | self.data_ = {} 21 | self.attr_ = {} 22 | 23 | self.shutdown_ = False 24 | 25 | self.local_.register_command("read", read_wrap) 26 | self.local_.register_command("write", write_wrap) 27 | self.local_.register_command("attr", attr_wrap) 28 | 29 | self.local_.start() 30 | 31 | # helper function to eliminate duplicated code 32 | def get_offsets(self, offset, size): 33 | block_offset = offset / BLOCK_SIZE 34 | start = offset % BLOCK_SIZE 35 | end = min(start + size, BLOCK_SIZE) 36 | return (block_offset, start, end) 37 | 38 | def get_id(self, file_name, offset): 39 | block_offset, start, end = self.get_offsets(offset, 0) 40 | return "%s:%s" % (file_name, block_offset) 41 | 42 | def get_hash(self, file_name, offset): 43 | return hash(self.get_id(file_name, offset)) % settings.SIZE 44 | 45 | def get_remote(self, file_name, offset): 46 | hs = self.get_hash(file_name, offset) 47 | suc = self.local_.find_successor(hs) 48 | return suc 49 | 50 | def _read(self, request): 51 | # request = {'file_name':'my_file.txt', 'offset':<#NUMBER#>, 'size': <#NUMBER#>} 52 | # response = {'status':'failed'} | 53 | # {'status':'failed','code':<#CODE ERROR#>} | 54 | # {'status':'redirect'} 55 | # {'status':'ok','data':<#DATA READ AS B64#>} 56 | try: 57 | data = json.loads(request) 58 | if not self.local_.is_ours(self.get_hash(data['file_name'], data['offset'])): 59 | return json.dumps({'status':'redirect'}) 60 | # otherwise continue 61 | result = self.read(data['file_name'], data['offset'], data['size']) 62 | if type result == type -1: 63 | return json.dumps({'status':'failed', 'code': result}) 64 | result = base64.b64encode(result) 65 | return json.dumpds({'status':'ok','data':total_read}) 66 | 67 | except Exception: 68 | return json.dumps({'status':'failed'}) 69 | 70 | def _write(self, request): 71 | # request = {'file_name':'my_file.txt', 'offset':<#NUMBER#>, 'data':<#B64 ENCODED DATA#>} 72 | # response = {'status':'failed'} | 73 | # {'status':'failed','code':<#CODE ERROR#>} | 74 | # {'status':'redirect'} 75 | # {'status':'ok','bytes':<#BYTES WROTE#>} 76 | try: 77 | data = json.loads(request) 78 | if not self.local_.is_ours(self.get_hash(data['file_name'], data['offset'])): 79 | return json.dumps({'status':'redirect'}) 80 | result = self.write(data['file_name'], base64.b64decode(data['data']), offset) 81 | if result < 0: 82 | return json.dumps({'status':'failed','code':result}) 83 | else: 84 | return json.dumps({'status':'ok','bytes':result}) 85 | except Exception: 86 | return json.dumps({'status':'failed'}) 87 | 88 | def _attr(self, request): 89 | # request = {'file_name':'my_file.txt'[,'size':<#NEW VALUE#>|,'mode':<#NEW MODE#>]} 90 | # response = {'status':'failed'} | 91 | # {'status':'failed','code':<#CODE ERROR#>} | 92 | # {'status':'redirect'} 93 | try: 94 | data = json.loads(request) 95 | if not self.local_.is_ours(self.get_hash(data['file_name'], 0)): 96 | return json.dumps({'status':'redirect'}) 97 | 98 | except Exception: 99 | return json.dumps({'status':'failed'}) 100 | 101 | def trunc_(self, request): 102 | 103 | def read(self, path, size, offset): 104 | attr = self.attr(file_name) 105 | 106 | if offset > attr.size: 107 | return "" 108 | 109 | block_offset, start, end = self.get_offsets(offset, size) 110 | block_id = self.get_id(file_name, offset) 111 | 112 | 113 | result = self.data_[block_id][start:end] 114 | 115 | def write(self, path, buf, offset): 116 | pass 117 | 118 | def attr(self, path): 119 | pass 120 | 121 | if __name__ == "__main__": 122 | import sys 123 | if len(sys.argv) == 2: 124 | dfs = Local(Address("127.0.0.1", sys.argv[1])) 125 | else: 126 | dfs = Local(Address("127.0.0.1", sys.argv[1]), Address("127.0.0.1", sys.argv[2])) 127 | 128 | -------------------------------------------------------------------------------- /chord.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | import sys 3 | import json 4 | import socket 5 | import threading 6 | import random 7 | import time 8 | import mutex 9 | 10 | from address import Address, inrange 11 | from remote import Remote 12 | from settings import * 13 | from network import * 14 | 15 | def repeat_and_sleep(sleep_time): 16 | def decorator(func): 17 | def inner(self, *args, **kwargs): 18 | while 1: 19 | time.sleep(sleep_time) 20 | if self.shutdown_: 21 | return 22 | ret = func(self, *args, **kwargs) 23 | if not ret: 24 | return 25 | return inner 26 | return decorator 27 | 28 | def retry_on_socket_error(retry_limit): 29 | def decorator(func): 30 | def inner(self, *args, **kwargs): 31 | retry_count = 0 32 | while retry_count < retry_limit: 33 | try: 34 | ret = func(self, *args, **kwargs) 35 | return ret 36 | except socket.error: 37 | # exp retry time 38 | time.sleep(2 ** retry_count) 39 | retry_count += 1 40 | if retry_count == retry_limit: 41 | print "Retry count limit reached, aborting.. (%s)" % func.__name__ 42 | self.shutdown_ = True 43 | sys.exit(-1) 44 | return inner 45 | return decorator 46 | 47 | 48 | # deamon to run Local's run method 49 | class Daemon(threading.Thread): 50 | def __init__(self, obj, method): 51 | threading.Thread.__init__(self) 52 | self.obj_ = obj 53 | self.method_ = method 54 | 55 | def run(self): 56 | getattr(self.obj_, self.method_)() 57 | 58 | # class representing a local peer 59 | class Local(object): 60 | def __init__(self, local_address, remote_address = None): 61 | self.address_ = local_address 62 | print "self id = %s" % self.id() 63 | self.shutdown_ = False 64 | # list of successors 65 | self.successors_ = [] 66 | # join the DHT 67 | self.join(remote_address) 68 | # we don't have deamons until we start 69 | self.daemons_ = {} 70 | # initially no commands 71 | self.command_ = [] 72 | 73 | 74 | # is this id within our range? 75 | def is_ours(self, id): 76 | assert id >= 0 and id < SIZE 77 | return inrange(id, self.predecessor_.id(1), self.id(1)) 78 | 79 | def shutdown(self): 80 | self.shutdown_ = Trues 81 | self.socket_.shutdown(socket.SHUT_RDWR) 82 | self.socket_.close() 83 | 84 | # logging function 85 | def log(self, info): 86 | f = open("/tmp/chord.log", "a+") 87 | f.write(str(self.id()) + " : " + info + "\n") 88 | f.close() 89 | #print str(self.id()) + " : " + info 90 | 91 | def start(self): 92 | # start the daemons 93 | self.daemons_['run'] = Daemon(self, 'run') 94 | self.daemons_['fix_fingers'] = Daemon(self, 'fix_fingers') 95 | self.daemons_['stabilize'] = Daemon(self, 'stabilize') 96 | self.daemons_['update_successors'] = Daemon(self, 'update_successors') 97 | for key in self.daemons_: 98 | self.daemons_[key].start() 99 | 100 | self.log("started") 101 | 102 | def ping(self): 103 | return True 104 | 105 | def join(self, remote_address = None): 106 | # initially just set successor 107 | self.finger_ = map(lambda x: None, range(LOGSIZE)) 108 | 109 | self.predecessor_ = None 110 | 111 | if remote_address: 112 | remote = Remote(remote_address) 113 | self.finger_[0] = remote.find_successor(self.id()) 114 | else: 115 | self.finger_[0] = self 116 | 117 | self.log("joined") 118 | 119 | @repeat_and_sleep(STABILIZE_INT) 120 | @retry_on_socket_error(STABILIZE_RET) 121 | def stabilize(self): 122 | self.log("stabilize") 123 | suc = self.successor() 124 | # We may have found that x is our new successor iff 125 | # - x = pred(suc(n)) 126 | # - x exists 127 | # - x is in range (n, suc(n)) 128 | # - [n+1, suc(n)) is non-empty 129 | # fix finger_[0] if successor failed 130 | if suc.id() != self.finger_[0].id(): 131 | self.finger_[0] = suc 132 | x = suc.predecessor() 133 | if x != None and \ 134 | inrange(x.id(), self.id(1), suc.id()) and \ 135 | self.id(1) != suc.id() and \ 136 | x.ping(): 137 | self.finger_[0] = x 138 | # We notify our new successor about us 139 | self.successor().notify(self) 140 | # Keep calling us 141 | return True 142 | 143 | def notify(self, remote): 144 | # Someone thinks they are our predecessor, they are iff 145 | # - we don't have a predecessor 146 | # OR 147 | # - the new node r is in the range (pred(n), n) 148 | # OR 149 | # - our previous predecessor is dead 150 | self.log("notify") 151 | if self.predecessor() == None or \ 152 | inrange(remote.id(), self.predecessor().id(1), self.id()) or \ 153 | not self.predecessor().ping(): 154 | self.predecessor_ = remote 155 | 156 | @repeat_and_sleep(FIX_FINGERS_INT) 157 | def fix_fingers(self): 158 | # Randomly select an entry in finger_ table and update its value 159 | self.log("fix_fingers") 160 | i = random.randrange(LOGSIZE - 1) + 1 161 | self.finger_[i] = self.find_successor(self.id(1< 9 | # unmount with fusermount -u 10 | # 11 | 12 | # Brief Summary 13 | # ============= 14 | # This is a FS implemented on top of python-chord library. There are many things 15 | # to experiment with (caching, adaptative load balancing, etc). 16 | # 17 | # I have not ran the POSIX test yet, but I did verify that the MD5 sum of the copy 18 | # of a 30 MB file was the same. 19 | # 20 | # The structure is pretty simple, every item stored has the following fields: 21 | # {'type':('directory', 'file'), 22 | # 'data': ___ 23 | # } 24 | # 25 | # - In the case of directories, 'data' contains a dictionary with the keyword \ 26 | # 'files' that returns a list of files in the FS. 27 | # - In the case of files, 'data' contains a 'base64_data' field with bytes encoded 28 | # in base64. 29 | # 30 | # 31 | 32 | import stat 33 | import errno 34 | import fuse 35 | import socket 36 | import dfs 37 | from time import time 38 | from subprocess import * 39 | 40 | import chord 41 | import json 42 | import base64 43 | 44 | fuse.fuse_python_api = (0, 2) 45 | 46 | # port from an chord node listening on <127.0.0.1:PORT> 47 | PORT = 19308 48 | BLOCK_SIZE = 4096 49 | 50 | 51 | # default stat, not very useful 52 | class MyStat(fuse.Stat): 53 | def __init__(self): 54 | self.st_mode = stat.S_IFDIR | 0755 55 | self.st_dev = 0 56 | self.st_ino = 0 57 | self.st_nlink = 1 58 | self.st_uid = 1000 # my uid 59 | self.st_gid = 1000 # my gid 60 | self.st_size = 4096 61 | self.st_atime = 0 62 | self.st_mtime = 0 63 | self.st_ctime = 0 64 | 65 | 66 | # logging function 67 | def log(info): 68 | f = open("/tmp/dfs.log", "a+") 69 | f.write(info + "\n") 70 | f.close() 71 | 72 | # decorator to log every system call on our fs (strace equiv) 73 | def logtofile(func): 74 | def inner(self, *args, **kwargs): 75 | f = open("/tmp/dfs.log", "a+") 76 | f.write("Function %s called with parameters %s %s\n" % (func.__name__, 77 | args, kwargs)) 78 | f.close() 79 | return func(self, *args, **kwargs) 80 | return inner 81 | 82 | 83 | class FUSEDFS(fuse.Fuse): 84 | def __init__(self, local, *args, **kw): 85 | fuse.Fuse.__init__(self, *args, **kw) 86 | self.local_ = local 87 | 88 | # helper function to eliminate duplicated code 89 | def get_offsets(self, offset, size): 90 | block_offset = offset / BLOCK_SIZE 91 | start = offset % BLOCK_SIZE 92 | end = min(start + size, BLOCK_SIZE) 93 | return (block_offset, start, end) 94 | 95 | # This is the guy responsible of making our FS visible to linux 96 | @logtofile 97 | def getattr(self, path): 98 | st = MyStat() 99 | 100 | # TODO: I need to add this metadata to the :0 block :/ 101 | st.st_atime = int(time()) 102 | st.st_mtime = st.st_atime 103 | st.st_ctime = st.st_atime 104 | 105 | # if we are asking for root, just get /, else, remove / and ask for block :0 106 | if path == '/': 107 | obj = get('/') 108 | else: 109 | obj = get("%s:0" % path[1:]) 110 | 111 | # if nothing was returned, it can be 2 things, the file doesn't exist or 112 | # we need to bootstrap 113 | if obj == None: 114 | if path == '/': 115 | log("Creating empty root folder") 116 | obj = {'type':'directory', 'data':{'files':[]}} 117 | put(path, obj) 118 | else: 119 | log("File ' %s' doesn't exist" % path) 120 | return -errno.ENOENT 121 | 122 | if obj['type'] == 'file': 123 | # if it's a file, set the file flag and get size 124 | st.st_mode = stat.S_IFREG | 0666 125 | # we assume there's nothing bigger than 4G here. we do a binary search 126 | # to find the las block. This could be improved a lot with different 127 | # algorithms or ideas. 128 | left = 0 129 | right = (1<<32)/BLOCK_SIZE 130 | while left + 1 < right: 131 | mid = (left + right) / 2 132 | offsets = self.get_offsets(mid * BLOCK_SIZE, 1) 133 | key = "%s:%s" %(path[1:], offsets[0]) 134 | block = get(key) 135 | if block != None: 136 | left = mid 137 | else: 138 | right = mid 139 | # the total size is the sum of previous blocks plus the data stored 140 | # at block 'left' 141 | key = "%s:%s" %(path[1:], left) 142 | block = get(key) 143 | size = left * BLOCK_SIZE + len(base64.b64decode(block['data']['b64_data'])) 144 | 145 | st.st_size = size 146 | return st 147 | 148 | @logtofile 149 | def readdir(self, path, offset): 150 | files = [ "..", "." ] 151 | # right now we only support for '/', but this is general enough to support 152 | # folders in case we decide to implement mkdir 153 | directory = get(path) 154 | if directory != None and directory['type'] == 'directory': 155 | files.extend(directory['data']['files']) 156 | 157 | for r in files: 158 | yield fuse.Direntry(str(r)) 159 | 160 | @logtofile 161 | def mknod(self, path, mode, dev): 162 | root = get('/') 163 | key = path[1:] 164 | # check if it exist, we shouldn't create an already existing file 165 | if key in root['data']['files']: 166 | return - 42 167 | # we are going to add it then! 168 | root['data']['files'].extend([key]) 169 | put('/', root) 170 | 171 | # we only set the initial block 172 | key = "%s:0" % key 173 | obj = {'type': 'file', 174 | 'data': { 'b64_data': base64.b64encode("") } 175 | } 176 | put(key, obj) 177 | 178 | # logging 179 | log("New node created '%s'" % key) 180 | 181 | return 0 182 | 183 | @logtofile 184 | def unlink(self, path): 185 | # not possible to remove files yet 186 | return -42 187 | 188 | @logtofile 189 | def read(self, path, size, offset): 190 | # we get rid of '/' 191 | path = path[1:] 192 | 193 | # first we make sure it exsist 194 | key = "%s:0" % path 195 | obj = get(key) 196 | if obj == None: 197 | return - errno.ENOENT 198 | 199 | # otherwise it exist, and we need to calculate the key for the current block 200 | block_offset, start, end = self.get_offsets(offset, size) 201 | key = "%s:%s" % (path, block_offset) 202 | 203 | # get the file block 204 | obj = get(key) 205 | # if it doesn't exist, just return 0, because it means we are at the end of 206 | # the file 207 | if obj == None: 208 | return 0 209 | 210 | # we read 211 | data = base64.b64decode(obj['data']['b64_data'])[start:end] 212 | 213 | return data 214 | 215 | @logtofile 216 | def write(self, path, buf, offset): 217 | # we get rid of '/' 218 | path = path[1:] 219 | 220 | # first we make sure it exsist 221 | key = "%s:0" % path 222 | obj = get(key) 223 | if obj == None: 224 | return - errno.ENOENT 225 | 226 | # otherwise it exist, and we need to calculate the key for the current block 227 | block_offset, start, end = self.get_offsets(offset, len(buf)) 228 | 229 | key = "%s:%s" % (path, block_offset) 230 | 231 | # get the file block 232 | obj = get(key) 233 | 234 | # if it doesn't exist, just return create it 235 | if obj == None: 236 | obj = {'type':'file', 237 | 'data':{'b64_data': None} 238 | } 239 | 240 | # fill up with 0x00's before 241 | data = ("\00" * start) + buf[:end-start] 242 | obj['data']['b64_data'] = base64.b64encode(data) 243 | 244 | else: 245 | data = base64.b64decode(obj['data']['b64_data']) 246 | # this is the new data 247 | data = data[:start] + buf[:end-start] + data[end:] 248 | obj['data']['b64_data'] = base64.b64encode(data) 249 | 250 | 251 | # save into the DHT 252 | put(key, obj) 253 | return int(end-start) 254 | 255 | @logtofile 256 | def release(self, path, flags): 257 | return 0 258 | 259 | @logtofile 260 | def open(self, path, flags): 261 | return 0 262 | 263 | @logtofile 264 | def truncate(self, path, size): 265 | # we get rid of '/' 266 | path = path[1:] 267 | 268 | # first we make sure it exsist 269 | key = "%s:0" % path 270 | obj = get(key) 271 | if obj == None: 272 | return - errno.ENOENT 273 | 274 | # otherwise it exist, and we need to calculate the key for the current block 275 | block_offset, start, end = self.get_offsets(size, 0) 276 | key = "%s:%s" % (path, block_offset) 277 | 278 | # get the file block 279 | obj = get(key) 280 | 281 | # if it doesn't exist, just return 0 282 | if obj == None: 283 | return 0 284 | 285 | # if it does exist, truncate it 286 | data = base64.b64decode(obj['data']['b64_data']) 287 | obj['data']['b64_data'] = base64.b64encode(data[:end]) 288 | 289 | 290 | put(key, obj) 291 | log("File %s truncated to %s" % (key, end)) 292 | return 0 293 | 294 | @logtofile 295 | def utime(self, path, times): 296 | return 0 297 | 298 | @logtofile 299 | def mkdir(self, path, mode): 300 | return 0 301 | 302 | @logtofile 303 | def rmdir(self, path): 304 | return 0 305 | 306 | @logtofile 307 | def rename(self, pathfrom, pathto): 308 | return 0 309 | 310 | @logtofile 311 | def fsync(self, path, isfsyncfile): 312 | return 0 313 | 314 | def main(): 315 | usage=""" 316 | FUSEDFS: A filesystem implemented on top of a DHT. 317 | """ + fuse.Fuse.fusage 318 | 319 | if len(sys.argv) == 2: 320 | local = Local(Address("127.0.0.1", sys.argv[1])) 321 | else: 322 | local = Local(Address("127.0.0.1", sys.argv[1]), Address("127.0.0.1", sys.argv[2])) 323 | 324 | local.start() 325 | server = FUSEDFS(local, version="%prog " + fuse.__version__, 326 | usage=usage, dash_s_do='setsingle') 327 | server.parse(errex=1) 328 | server.main() 329 | 330 | if __name__ == '__main__': 331 | main() --------------------------------------------------------------------------------