├── README.md ├── Vagrantfile ├── client ├── __init__.py └── ayfs.py ├── provisioners ├── etcd.sh └── node.sh ├── server └── __init__.py └── start_server.py /README.md: -------------------------------------------------------------------------------- 1 | # ayFS 2 | Are you fucking serious 3 | 4 | This is a filesystem where all the files exist on the network between machines so disk is not used. 5 | 6 | Requires an etcd server and at least 3 servers to form a chain of servers sending each other packets. 7 | 8 | I'll polish up the code and README later but the setup code is pretty straight forward. 9 | 10 | I made this at HackBeanpot 2015 so its a bit of a mess right now. 11 | 12 | And no, its not POSIX compliant. 13 | 14 | 15 | On the servers, just run `python start_server.py` after changing __main__ to point to the right etcd server. 16 | 17 | On your local client, run `python client/ayfs.py ` 18 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant.configure(2) do |config| 5 | config.vm.synced_folder "./", "/vagrant" 6 | config.vm.box = "digital_ocean" 7 | 8 | config.ssh.private_key_path = "~/.ssh/digital_ocean" 9 | config.vm.provider :digital_ocean do |provider, override| 10 | override.ssh.private_key_path = '~/.ssh/digital_ocean' 11 | override.vm.box = 'digital_ocean' 12 | override.vm.box_url = "https://github.com/smdahlen/vagrant-digitalocean/raw/master/box/digital_ocean.box" 13 | provider.token = '06afa880e5c6696b4e9a7eebbd61a2e62d37d9b8f1df9183594e7fc812e4a6b4' 14 | provider.image = 'ubuntu-14-04-x64' 15 | provider.region = 'nyc2' 16 | provider.size = '512mb' 17 | end 18 | 19 | config.vm.define "node4" do |node4_config| 20 | node4_config.vm.network "private_network", ip: "10.0.0.2" 21 | node4_config.vm.hostname = "node4" 22 | node4_config.vm.provision "shell", path: "provisioners/node.sh" 23 | end 24 | 25 | config.vm.define "node6" do |node4_config| 26 | node4_config.vm.network "private_network", ip: "10.0.0.2" 27 | node4_config.vm.hostname = "node6" 28 | node4_config.vm.provision "shell", path: "provisioners/node.sh" 29 | end 30 | 31 | 32 | 33 | config.vm.define "node5" do |node5_config| 34 | node5_config.vm.network "private_network", ip: "10.0.0.3" 35 | node5_config.vm.hostname = "node5" 36 | node5_config.vm.provision "shell", path: "provisioners/node.sh" 37 | end 38 | 39 | config.vm.define "node3" do |node3_config| 40 | node3_config.vm.network "private_network", ip: "10.0.0.6" 41 | node3_config.vm.hostname = "node3" 42 | node3_config.vm.provision "shell", path: "provisioners/node.sh" 43 | end 44 | 45 | config.vm.define "node4" do |node4_config| 46 | node4_config.vm.network "private_network", ip: "10.0.0.7" 47 | node4_config.vm.hostname = "node4" 48 | node4_config.vm.provision "shell", path: "provisioners/node.sh" 49 | end 50 | 51 | 52 | config.vm.define "etcd" do |etcd_config| 53 | etcd_config.vm.network "private_network", ip: "10.0.0.4" 54 | etcd_config.vm.hostname = "etcd" 55 | etcd_config.vm.provision "shell", path: "provisioners/etcd.sh" 56 | end 57 | 58 | config.vm.define "client" do |client_config| 59 | client_config.vm.network "private_network", ip: "10.0.0.5" 60 | client_config.vm.hostname = "client" 61 | client_config.vm.provision "shell", path: "provisioners/node.sh" 62 | end 63 | 64 | end 65 | -------------------------------------------------------------------------------- /client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pranav/ayfs/b5820e3d07d77c3fd530b76aa803397142300747/client/__init__.py -------------------------------------------------------------------------------- /client/ayfs.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import socket 3 | import Queue 4 | import struct 5 | import sys 6 | import errno 7 | import time 8 | import json 9 | import etcd 10 | from pprint import pprint 11 | from fuse import FUSE, Operations, FuseOSError 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | logger.setLevel(logging.INFO) 16 | 17 | 18 | class AYFS(Operations): 19 | """ 20 | /files/ 21 | value: json 22 | example: 23 | { 24 | "mode": "0700", 25 | "uid": "001", 26 | "gid": "002", 27 | } 28 | 29 | """ 30 | def __init__(self, etcd_host='node4'): 31 | self.FILE_PREFIX = '/files' 32 | self.DIR_INFO = '__ayfs_dir_info' 33 | self.s_receiver = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 34 | self.etcd = etcd.Client(host=etcd_host) 35 | self.add_test_data() 36 | self.receive_queue = Queue.Queue() 37 | self.start_receiver_thread() 38 | self.BLOCK_SIZE = 1000 39 | self.PACK = "II%ds" % self.BLOCK_SIZE 40 | 41 | def unpack(self, raw_block): 42 | block_id, block_size, data = struct.unpack(self.PACK, raw_block) 43 | return block_id, block_size, data[:block_size] 44 | 45 | def start_receiver_thread(self): 46 | t = threading.Thread(target=self.receiver_worker) 47 | t.daemon = True 48 | t.start() 49 | 50 | def get_etcd_tree(self, key): 51 | return self.etcd.read(key, recursive=True).children 52 | 53 | def receiver_worker(self): 54 | self.s_receiver.bind(('0.0.0.0', 4101)) 55 | self.s_receiver.listen(100) 56 | while True: 57 | client, addr = self.s_receiver.accept() 58 | data = client.recv(100000) 59 | self.receive_queue.put_nowait(data) 60 | 61 | 62 | def get_requested_block_ids(self): 63 | my_ip = self.get_my_ipaddr() 64 | block_ids = [] 65 | for node in self.get_etcd_tree('/wanted_blocks'): 66 | if str(node.value) == my_ip: 67 | block_ids.append(node.key.split('/')[2]) 68 | return block_ids 69 | 70 | 71 | def get_file(self, path): 72 | if path.endswith('/'): 73 | path += self.DIR_INFO 74 | if path.endswith('.'): 75 | path = path[:-1] + self.DIR_INFO 76 | logger.info("Got %s" % path) 77 | return json.loads(self.etcd.get(self.FILE_PREFIX + path).value) 78 | 79 | def set_file(self, path, file_dict): 80 | if path.endswith('/'): 81 | self.etcd.write(self.FILE_PREFIX + path + self.DIR_INFO, json.dumps(file_dict)) 82 | else: 83 | self.etcd.write(self.FILE_PREFIX + path, json.dumps(file_dict)) 84 | logger.debug("Added %s" % path) 85 | 86 | def delete_file(self, path): 87 | if path.endswith('/'): 88 | path += self.DIR_INFO 89 | self.etcd.delete(self.FILE_PREFIX + path) 90 | logger.info("Deleted %s" % path) 91 | 92 | def get_subfolder_paths(self, path): 93 | paths = [] 94 | try: 95 | for node in self.etcd.read(self.FILE_PREFIX + path, recursive=True).children: 96 | paths.append(node.key.split("/")[2]) 97 | except KeyError: 98 | pass 99 | return paths 100 | 101 | def chmod(self, path, mode): 102 | f = self.get_file(path) 103 | f['mode'] = mode 104 | self.set_file(path, f) 105 | logger.info("Set mode on %s to %s" % (path, mode)) 106 | return 0 107 | 108 | def chown(self, path, uid, gid): 109 | f = self.get_file(path) 110 | f['uid'] = uid 111 | f['gid'] = gid 112 | self.set_file(path, f) 113 | return 0 114 | 115 | def create(self, path, mode, fi=None): 116 | current_time = int(time.time()) 117 | #TODO: Fix uid/gid 118 | file_dict = { 119 | "mode": mode, 120 | "uid": 0, 121 | "gid": 0, 122 | "atime": current_time, 123 | "mtime": current_time, 124 | "size": 0, 125 | "blocks": ['0'] 126 | } 127 | self.set_file(path, file_dict) 128 | logging.info("Created ", path) 129 | return 0 130 | 131 | def destroy(self, path): 132 | try: 133 | self.delete_file(path) 134 | logger.info("Deleted file: %s" % path) 135 | except KeyError: 136 | pass 137 | return 0 138 | 139 | def getattr(self, path, fh=None): 140 | try: 141 | logger.info("Getting attrs for %s" % path) 142 | f = self.get_file(path) 143 | attrs = ['atime', 'gid', 'mode', 'mtime', 'size', 'uid'] 144 | attrs_dict = dict(("st_%s" % key, f[key]) for key in attrs) 145 | logger.info(attrs_dict) 146 | return attrs_dict 147 | except KeyError: 148 | raise FuseOSError(errno.ENOENT) 149 | 150 | def mkdir(self, path, mode): 151 | pass 152 | 153 | def read(self, path, size, offset, fh): 154 | blocks_ids = self.get_blocks_ids(path) 155 | for block_id in blocks_ids: 156 | self.etcd.write("/wanted_blocks/%d" % block_id, self.get_my_ipaddr()) 157 | received_blocks = 0 158 | f = self.get_file(path) 159 | blocks = {} 160 | while received_blocks < len(f['blocks']): 161 | #TODO: Put back block if not for me, block I need 162 | raw_block = self.receive_queue.get(True) 163 | received_block_id, size, data = self.unpack(raw_block) 164 | blocks[received_block_id] = data 165 | logger.info("Received block: %s" % received_block_id) 166 | received_blocks += 1 167 | whole_file = self.assemble_file(blocks, blocks_ids) 168 | return whole_file 169 | 170 | def assemble_file(self, blocks, block_ids): 171 | fbuffer = blocks[block_ids[0]] 172 | for block_id in block_ids[1:]: 173 | fbuffer += blocks[block_id] 174 | return fbuffer 175 | 176 | def get_blocks_ids(self, path): 177 | f = self.get_file(path) 178 | return map(int, f['blocks']) 179 | 180 | def readdir(self, path, fh): 181 | logger.info("Readdir path %s" % path) 182 | dirs = ['.'] 183 | logger.info("Found files %s" % dirs) 184 | dirs.extend(self.get_subfolder_paths(path)) 185 | 186 | return dirs 187 | 188 | def readlink(self, path): 189 | pass 190 | 191 | def rename(self, old, new): 192 | f_old = self.get_file(old) 193 | self.set_file(old, f_old) 194 | self.delete_file(old) 195 | return 0 196 | 197 | def rmdir(self, path): 198 | self.delete_file(path) 199 | return 0 200 | 201 | def symlink(self, target, source): 202 | pass 203 | 204 | def truncate(self, path, length, fh=None): 205 | pass 206 | 207 | def unlink(self, path): 208 | self.destroy(path) 209 | return 0 210 | 211 | def utimens(self, path, times=None): 212 | pass 213 | 214 | def write(self, path, data, offset, fh): 215 | logger.info("Offset: %s" % offset) 216 | f = self.get_file(path) 217 | if offset == 0: 218 | f['blocks'] = ['0'] 219 | f['size'] = 0 220 | for i in range(0, len(data), self.BLOCK_SIZE): 221 | block_id = self.get_new_block_id() 222 | data_size = len(data[i:i+self.BLOCK_SIZE]) 223 | block = struct.pack(self.PACK, block_id, data_size, data[i:i+self.BLOCK_SIZE]) 224 | f = self.upload_block(block, f, block_id) 225 | self.set_file(path, f) 226 | f['size'] += len(data) 227 | logger.info("Total Size: %d" % f['size']) 228 | self.set_file(path, f) 229 | return len(data) 230 | 231 | def upload_block(self, block, f, block_id): 232 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 233 | node = list(self.etcd.read('/nodes/', recursive=True).children)[0] 234 | host = str(node.key.split('/')[2]) 235 | sock.connect((host, 4100)) 236 | sock.sendall(block) # Pray 237 | sock.close() 238 | if f['blocks'] == ['0']: 239 | f['blocks'] = [block_id] 240 | else: 241 | f['blocks'].append(block_id) 242 | return f 243 | 244 | def get_new_block_id(self): 245 | """ 246 | Python ints are 4 bytes. 247 | :return: int 248 | """ 249 | block_id = 1 250 | for node in self.etcd.read("/files/", recursive=True).children: 251 | blocks = map(int, json.loads(node.value)['blocks']) 252 | block_id = max(block_id, max(blocks) + 1) 253 | return block_id 254 | 255 | def get_my_ipaddr(self): 256 | #TODO: Figure out the public ip differently 257 | return socket.gethostbyname(socket.gethostname()) 258 | 259 | def add_test_data(self): 260 | # 40755 261 | self.create("/", 16877) 262 | # 100644 263 | self.create("/test", 33188) 264 | 265 | 266 | 267 | 268 | if __name__ == '__main__': 269 | if len(sys.argv) != 3: 270 | print('usage: %s ' % sys.argv[0]) 271 | exit(1) 272 | 273 | fuse = FUSE(AYFS(sys.argv[1]), sys.argv[2], foreground=True, nothreads=True) -------------------------------------------------------------------------------- /provisioners/etcd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | apt-get update 4 | apt-get install -y docker.io 5 | docker run -d -p 4001:4001 quay.io/coreos/etcd 6 | -------------------------------------------------------------------------------- /provisioners/node.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | apt-get update 4 | apt-get install -y python-pip python-dev libffi-dev libssl-dev 5 | pip install python-etcd fusepy 6 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import struct 4 | import etcd 5 | import socket 6 | import Queue 7 | import threading 8 | import logging 9 | 10 | logger = logging.getLogger(__name__) 11 | logger.setLevel(logging.INFO) 12 | logger.addHandler(logging.StreamHandler(sys.stderr)) 13 | 14 | 15 | class Server(): 16 | def __init__(self, etcd_host='127.0.0.1', etcd_port=4001): 17 | self.BUFFER_SIZE = 1000000 18 | self.WORKERS = 4 19 | self.received_queue = Queue.Queue() 20 | self.send_queue = Queue.Queue() 21 | self.client_queue = Queue.Queue() 22 | 23 | self.receive_port = 4100 24 | self.s_receiver = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | self.recipient_host = None 26 | 27 | self.etcd_host = etcd_host 28 | self.etcd_port = etcd_port 29 | self.etcd = etcd.Client(host=self.etcd_host, port=self.etcd_port, read_timeout=5) 30 | self.hostname = socket.gethostname() 31 | 32 | def start(self): 33 | self.start_heartbeat_thread() 34 | self.start_receiver_thread() 35 | self.start_processor_workers() 36 | self.start_client_sender_thread() 37 | self.send() 38 | 39 | def start_client_sender_thread(self): 40 | t = threading.Thread(target=self.client_sender) 41 | t.daemon = True 42 | t.start() 43 | 44 | def client_sender(self): 45 | while True: 46 | block_ip_data = self.client_queue.get(True) 47 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 48 | sock.connect((block_ip_data[0], 4101)) 49 | sock.sendall(block_ip_data[1]) 50 | sock.close() 51 | 52 | @staticmethod 53 | def key2host(nodekey): 54 | if len(nodekey.split('/')) > 2: 55 | return nodekey.split('/')[2] 56 | 57 | def get_all_nodes(self): 58 | return [node for node in self.etcd.read("/nodes", recursive=True).children if len(node.key.split('/')) > 2] 59 | 60 | def get_active_nodes(self): 61 | return [node.key for node in self.etcd.read("/active_nodes", recursive=True).children] 62 | 63 | def register_etcd(self): 64 | all_nodes = self.get_all_nodes() 65 | if len(all_nodes) == 0: 66 | self.etcd.write("/nodes/%s" % self.hostname, 0, ttl=1) 67 | if len(all_nodes) == 1 and Server.key2host(all_nodes[0].key) != self.hostname: 68 | self.etcd.write("/nodes/%s" % self.hostname, Server.key2host(all_nodes[0].key), ttl=1) 69 | self.recipient_host = Server.key2host(all_nodes[0].key) 70 | logger.info("Connected to %s" % Server.key2host(all_nodes[0].key)) 71 | if len(all_nodes) > 1: 72 | if not (self.recipient_host is None): 73 | if self.recipient_host in map(lambda x: Server.key2host(x.key), all_nodes): 74 | self.etcd.write("/nodes/%s" % self.hostname, self.recipient_host, ttl=1) 75 | return 76 | if Server.key2host(all_nodes[-1].key) != self.hostname and str(all_nodes[-1].value) != self.hostname: 77 | self.recipient_host = Server.key2host(all_nodes[-1].key) 78 | self.etcd.write("/nodes/%s" % self.hostname, Server.key2host(all_nodes[-1].key), ttl=1) 79 | logger.info("Connected to %s" % self.recipient_host) 80 | return 81 | self.etcd.write("/nodes/%s" % self.hostname, 0, ttl=1) 82 | 83 | 84 | 85 | 86 | def deregister_etcd(self): 87 | self.etcd.delete("/nodes/%s") 88 | 89 | def start_heartbeat_thread(self): 90 | t = threading.Thread(target=self.heartbeat) 91 | t.daemon = True 92 | t.start() 93 | 94 | def heartbeat(self): 95 | while True: 96 | time.sleep(0.2) 97 | self.register_etcd() 98 | 99 | def start_receiver_thread(self): 100 | receiver_thread = threading.Thread(target=self._start_listening) 101 | receiver_thread.daemon = True 102 | receiver_thread.start() 103 | logger.debug("Started Receiver Thread") 104 | 105 | def _start_listening(self): 106 | self.s_receiver.bind(('0.0.0.0', self.receive_port)) 107 | self.s_receiver.listen(1000) 108 | while True: 109 | client, address = self.s_receiver.accept() 110 | data = client.recv(self.BUFFER_SIZE) 111 | logger.debug("Received %s bytes" % len(data)) 112 | self.received_queue.put_nowait(data) 113 | 114 | def start_processor_workers(self): 115 | for worker_id in range(0, self.WORKERS): 116 | t = threading.Thread(target=self.worker) 117 | t.daemon = True 118 | t.start() 119 | logger.debug("Started worker %s" % worker_id) 120 | 121 | def unpack(self, raw_block): 122 | """ 123 | Returns raw block typle 124 | :param raw_block: 125 | :return:tuple (block_id, data_size, data) 126 | """ 127 | block_id, block_size, data = struct.unpack("II1000s", raw_block) 128 | return block_id, block_size, data[:block_size] 129 | 130 | def worker(self): 131 | while True: 132 | block = self.received_queue.get(True, timeout=None) 133 | block_id, data_size, block_data = self.unpack(block) 134 | 135 | for block_ip_id in self.get_list_of_wanted_blocks(): 136 | if int(block_id) == int(block_ip_id[1]): 137 | self.client_queue.put_nowait((block_ip_id[0], block)) 138 | try: 139 | self.etcd.delete("/wanted_blocks/%d" % int(block_ip_id[1])) 140 | except KeyError: 141 | pass 142 | logger.info("Added %s to client_queue for %s" % (block_ip_id[1], block_ip_id[0])) 143 | self.send_queue.put_nowait(block) 144 | logger.debug("Put block in send_queue") 145 | 146 | def send(self): 147 | while True: 148 | block = self.send_queue.get(True, timeout=None) 149 | logger.debug("Got block from send_queue") 150 | s_send = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 151 | s_send.connect((self.recipient_host, self.receive_port)) 152 | sent_bytes = s_send.sendall(block) 153 | s_send.close() 154 | logger.debug("Sent %s bytes to %s:%s" % (sent_bytes, self.recipient_host, self.receive_port)) 155 | 156 | def get_list_of_wanted_blocks(self): 157 | """ 158 | :return:tuple (ip, block) 159 | """ 160 | block_ids = [] 161 | for node in self.etcd.read("/wanted_blocks", recursive=True).children: 162 | if len(node.key.split('/')) > 2: 163 | block_ids.append((node.value, node.key.split('/')[2])) 164 | return block_ids 165 | 166 | def __exit__(self, exc_type, exc_val, exc_tb): 167 | self.deregister_etcd() 168 | 169 | -------------------------------------------------------------------------------- /start_server.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | import server 4 | 5 | if __name__ == '__main__': 6 | s = server.Server(etcd_host='etcd') 7 | s.start() 8 | --------------------------------------------------------------------------------