├── .gitignore ├── doc ├── Beaver.pdf └── impl.md ├── tests ├── upload.sh ├── track.sh ├── info.sh └── upload.py ├── haystack.conf ├── README.md ├── geventsendfile.py ├── haystack_logging.py ├── haystack.py ├── haystack_track.py └── haystack_storage.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.log 4 | -------------------------------------------------------------------------------- /doc/Beaver.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/richmonkey/haystack/HEAD/doc/Beaver.pdf -------------------------------------------------------------------------------- /tests/upload.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | host=localhost:8080 3 | ab -k -c 100 -n 100000 -p ./send.data -T 'application/octet-stream' http://$host/upload -------------------------------------------------------------------------------- /tests/track.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | host=localhost:8000 3 | 4 | curl "http://$host/request_writable_node" 5 | echo "" 6 | curl "http://$host/request_readable_node?fileid=18014398509481985" 7 | echo "" -------------------------------------------------------------------------------- /haystack.conf: -------------------------------------------------------------------------------- 1 | listenip="127.0.0.1" 2 | listenport=8080 3 | 4 | #storage 5 | dbfilename="/tmp/test.hs" 6 | dbindexfilename="/tmp/test.hsi" 7 | groupid=1 8 | trackip="127.0.0.1" 9 | trackport=8000 10 | 11 | #slave 12 | #masterip=x.x.x.x 13 | #masterport=xxxx -------------------------------------------------------------------------------- /tests/info.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | host=localhost:8000 3 | echo "track info" 4 | curl "http://$host/info" 5 | host=localhost:8080 6 | echo "\nstorage1 info" 7 | curl "http://$host/info" 8 | host=localhost:8081 9 | echo "\nstorage2 info" 10 | curl "http://$host/info" 11 | echo "" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Introduction 2 | --------- 3 | A simple distribute filesystem used for saving a billion of small file. 4 | Haystack append small file into a very large file for reduce the inode's count. 5 | You can find more detailed design at doc/Beaver.pdf 6 | 7 | Run 8 | --------- 9 | track 10 | python2.7 haystack_track.py xxx.conf 11 | 12 | storage 13 | python2.7 haystack_storage.py xxx.conf 14 | 15 | Client 16 | --------- 17 | Upload: 18 | - request writable storage node from track node 19 | - post file to the writable storage node 20 | 21 | Download: 22 | - request readable storage node by fileid from track node 23 | - get file from the readable storage node 24 | -------------------------------------------------------------------------------- /geventsendfile.py: -------------------------------------------------------------------------------- 1 | """An example how to use sendfile[1] with gevent. 2 | 3 | [1] http://pypi.python.org/pypi/py-sendfile/ 4 | """ 5 | from sys import exc_info 6 | from errno import EAGAIN 7 | from sendfile import sendfile as original_sendfile 8 | from gevent.socket import wait_write 9 | 10 | def gevent_sendfile(out_fd, in_fd, offset, count): 11 | total_sent = 0 12 | while total_sent < count: 13 | try: 14 | sent = original_sendfile(out_fd, in_fd, offset + total_sent, count - total_sent) 15 | #print '%s: sent %s [%d%%]' % (out_fd, sent, 100*total_sent/count) 16 | total_sent += sent 17 | except OSError: 18 | ex = exc_info()[1] 19 | if ex[0] == EAGAIN: 20 | wait_write(out_fd) 21 | else: 22 | raise 23 | return offset + total_sent, total_sent 24 | 25 | def patch_sendfile(): 26 | import sendfile 27 | sendfile.sendfile = gevent_sendfile 28 | -------------------------------------------------------------------------------- /haystack_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import logging.handlers 3 | import sys 4 | import fcntl 5 | import os 6 | 7 | def init_logger(appname, level): 8 | root_logger = logging.getLogger('') 9 | 10 | strm_out = logging.StreamHandler(sys.__stdout__) 11 | strm_out.setFormatter(logging.Formatter('%(name)s %(asctime)s %(filename)s %(lineno)s %(levelname)s:%(message)s')) 12 | root_logger.addHandler(strm_out) 13 | 14 | handler = logging.handlers.RotatingFileHandler(appname + "err.log", "ab", 50*1024*1024, 5) 15 | 16 | handler.setLevel(logging.ERROR) 17 | handler.setFormatter(logging.Formatter('%(asctime)s %(filename)s %(lineno)s %(levelname)s:%(message)s')) 18 | root_logger.addHandler(handler) 19 | 20 | handler = logging.handlers.RotatingFileHandler(appname + ".log", "ab", 50*1024*1024, 5) 21 | 22 | handler.setLevel(logging.DEBUG) 23 | handler.setFormatter(logging.Formatter('%(asctime)s %(filename)s %(lineno)s %(levelname)s:%(message)s')) 24 | root_logger.addHandler(handler) 25 | 26 | root_logger.setLevel(level) 27 | 28 | -------------------------------------------------------------------------------- /tests/upload.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python 2 | # coding: utf-8 3 | import sys 4 | import urllib3 5 | import logging 6 | import json 7 | 8 | def upload(): 9 | upload_url = "http://127.0.0.1:8080/upload" 10 | url = urllib3.util.parse_url(upload_url) 11 | cb_url = url.request_uri 12 | 13 | if url.port is not None: 14 | server = "%s:%d"%(url.host, url.port) 15 | else: 16 | server = url.host 17 | 18 | conn = urllib3.connection_from_url(server) 19 | headers = urllib3.make_headers(keep_alive=True) 20 | content = "hello world" 21 | response = conn.urlopen("POST", cb_url, body=content, headers=headers) 22 | if response.status != 200: 23 | print "eeeeeeeeeeee" 24 | sys.exit(1) 25 | else: 26 | print response.getheaders() 27 | print response.read() 28 | print response.data 29 | fileid = json.loads(response.data)["fileid"] 30 | 31 | path = "/download?fileid=%d"%fileid 32 | print "download path:", path 33 | response = conn.urlopen("GET", path, headers=headers) 34 | if response.status != 200: 35 | print "download fail" 36 | sys.exit(1) 37 | else: 38 | print response.data 39 | 40 | def main(): 41 | for _ in range(1): 42 | upload() 43 | 44 | main() 45 | -------------------------------------------------------------------------------- /doc/impl.md: -------------------------------------------------------------------------------- 1 | Inroduction 2 | --------- 3 | Haystack has two type of nodes, tracker and storage. 4 | One tracker node connects with multiple storage nodes. 5 | 6 | Tracker 7 | --------- 8 | Tracker node manages all storage nodes information in memory. 9 | It exports two functions to client, request\_readable\_node and request\_writable\_node. 10 | Normally two tracker nodes are deployed in production environment. Keepalived service is used to make one of them the standby node to guard against Single Point of Failure(SPOF) 11 | 12 | Storage 13 | --------- 14 | Storage nodes are managered in groups. 15 | The nodes in the same group has the same copies of files. 16 | One group has one master for write and read, other slave nodes for read and replication only. 17 | Storage exports two function upload, download. 18 | Upload: 19 | - get an auto-incremented fileno 20 | - append uploaded file content into the larger file 21 | - send fileid(groupid+fileno) to client 22 | - synchronize the file content to other slaves 23 | - report the new fileno to tracker node 24 | 25 | Download: 26 | - find the file's offset and size in Google Sparse hash 27 | - send file content to client 28 | 29 | Protocol 30 | --------- 31 | Http. 32 | client with tracker, clien with storage, storage with tracker, storage with storage communicates with each other using http protocol. 33 | 34 | Storage->Tracker 35 | --------- 36 | Storage reports its infomation to tracker. 37 | The infomation contains last fileno, groupid, listenip, listenport, disk available size, write priority, read priority. 38 | 39 | Storage->Storage 40 | --------- 41 | Replication steps: 42 | - slave node connects to master node of the group at startup 43 | - slave node posts its last fileno to master 44 | - master node sends all the files after the slave's last fileno 45 | - if socket is disconnected, goto first step 46 | 47 | 48 | Tracker Interface 49 | ========= 50 | 51 | storage report 52 | --------- 53 | **method:** POST 54 | **path:**/report 55 | **body:** 56 | ```js 57 | { 58 | "last_fileno":int, 59 | "groupid":int, 60 | "listenip":string, 61 | "listenport":int, 62 | "master":bool, 63 | "disk\_available\_size":int 64 | } 65 | ``` 66 | **response:**none 67 | 68 | request writable node 69 | --------- 70 | **method:**GET 71 | **path:**/request\_writable\_node 72 | **body:**none 73 | **response:** 74 | ```js 75 | { 76 | "ip":string, 77 | "port":int, 78 | } 79 | ``` 80 | 81 | request readable node 82 | -------- 83 | **method:**GET 84 | **path:**/request\_readable\_node?fileno=xxx 85 | **body:**none 86 | **response:** 87 | ```js 88 | { 89 | "ip":string, 90 | "port":int, 91 | } 92 | ``` 93 | 94 | Storage Interface 95 | ========= 96 | client uploads file to storage 97 | --------- 98 | **method:**POST 99 | **path:**/upload 100 | **body:**file binary content 101 | **response:** 102 | ```js 103 | { 104 | "fileid":int 105 | } 106 | ``` 107 | 108 | client downloads files from storage 109 | -------- 110 | **method:**GET 111 | **path:**/download?fileid=xxx 112 | **body:**none 113 | **response:**file binary content 114 | 115 | master storage replicates files to slave nodes 116 | -------- 117 | **method:**POST 118 | **path:**/sync_upload 119 | **body:**file binary content 120 | **response:**none 121 | 122 | slave send sync request to master 123 | -------- 124 | **method:**POST 125 | **path:**/sync 126 | **body:** 127 | ```js 128 | { 129 | "last_fileno":int 130 | } 131 | ``` 132 | **response:**200 133 | 134 | 135 | -------------------------------------------------------------------------------- /haystack.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import logging 3 | haystack_files = {} 4 | 5 | haystack_file = None 6 | haystack_index_file = None 7 | haystack_last_fileno = 0 8 | 9 | haystack_magic = "mofs" 10 | haystack_version = 1<<16 #1.0 11 | 12 | HEADER_SIZE = 8 13 | NEEDLE_INDEX_SIZE = 21 14 | 15 | 16 | class Needle: 17 | HEADER_SIZE = 4+8+1+4 18 | FOOTER_SIZE = 8 19 | def __init__(self): 20 | self.key = 0 21 | self.deleted = 0 22 | self.data = "" 23 | 24 | @property 25 | def padding_size(self): 26 | l = (4+8+1+4+self.data_size+4+4) 27 | if l%8: 28 | padding = 8-l%8 29 | else: 30 | padding = 0 31 | return padding 32 | 33 | def unpack_header(self, data): 34 | self.hmagic, self.key, self.deleted, self.data_size = struct.unpack("!IQBI", data) 35 | 36 | def write(self): 37 | assert(self.data) 38 | haystack_file.seek(0, 2) 39 | offset = haystack_file.tell() 40 | pk = struct.pack("!4sQBI", haystack_magic, self.key, self.deleted, len(self.data)) 41 | haystack_file.write(pk) 42 | haystack_file.write(self.data) 43 | 44 | crc = 0 45 | l = len(pk) + len(self.data) + 8 46 | if l%8: 47 | padding = 8-l%8 48 | pk = struct.pack("!4sI%ds"%padding, haystack_magic, crc, "") 49 | else: 50 | pk = struct.pack("!4sI", haystack_magic, crc) 51 | 52 | haystack_file.write(pk) 53 | haystack_file.flush() 54 | index = NeedleIndex() 55 | index.key = self.key 56 | index.deleted = self.deleted 57 | index.offset = offset 58 | index.size = len(self.data) 59 | pk = index.pack() 60 | haystack_index_file.write(pk) 61 | haystack_index_file.flush() 62 | return offset 63 | 64 | class NeedleIndex: 65 | def __init__(self): 66 | self.key = 0 67 | self.deleted = 0 68 | self.offset = 0 69 | self.size = 0 70 | 71 | def unpack(self, data): 72 | self.key, self.deleted, self.offset, self.size = struct.unpack("!QBQI", data) 73 | 74 | def pack(self): 75 | return struct.pack("!QBQI", self.key, self.deleted, self.offset, self.size) 76 | 77 | def load(path, ipath): 78 | global haystack_last_fileno, haystack_file, haystack_index_file 79 | try: 80 | store_file = open(path, "rb") 81 | index_file = open(ipath, "rb") 82 | store_header = store_file.read(HEADER_SIZE) 83 | index_header = index_file.read(HEADER_SIZE) 84 | 85 | if len(store_header) != HEADER_SIZE: 86 | return False 87 | m, v = struct.unpack("!4sI", store_header) 88 | if m != haystack_magic or v != haystack_version: 89 | return False 90 | 91 | if len(index_header) != HEADER_SIZE: 92 | return False 93 | m, v = struct.unpack("!4sI", index_header) 94 | if m != haystack_magic or v != haystack_version: 95 | return False 96 | 97 | haystack_files.clear() 98 | nis = NEEDLE_INDEX_SIZE 99 | while True: 100 | data = index_file.read(nis*1024) 101 | if not data: 102 | break 103 | for i in range(0, len(data), nis): 104 | d = data[i:i+nis] 105 | if len(d) != nis: 106 | logging.warn("index file corrupted") 107 | return False 108 | index = NeedleIndex() 109 | index.unpack(d) 110 | haystack_last_fileno = index.key 111 | if not index.deleted: 112 | haystack_files[index.key] = (index.offset, index.size) 113 | elif haystack_files.has_key(index.key): 114 | haystack_files.pop(index.key) 115 | else: 116 | logging.warn("delete nonthing:%d", index.key) 117 | store_file.close() 118 | index_file.close() 119 | store_file = open(path, "ab+") 120 | index_file = open(ipath, "ab+") 121 | haystack_file = store_file 122 | haystack_index_file = index_file 123 | logging.debug("last fileno:%d", haystack_last_fileno) 124 | return True 125 | except IOError: 126 | return False 127 | 128 | def recover(path, ipath): 129 | try: 130 | store_file = open(path, "rb+") 131 | index_file = open(ipath, "rb+") 132 | store_header = store_file.read(HEADER_SIZE) 133 | index_header = index_file.read(HEADER_SIZE) 134 | 135 | if len(store_header) != HEADER_SIZE: 136 | logging.debug("111") 137 | return False 138 | m, v = struct.unpack("!4sI", store_header) 139 | if m != haystack_magic or v != haystack_version: 140 | logging.debug("222") 141 | return False 142 | 143 | if len(index_header) != HEADER_SIZE: 144 | return False 145 | m, v = struct.unpack("!4sI", index_header) 146 | if m != haystack_magic or v != haystack_version: 147 | return False 148 | 149 | index_file.seek(0, 2) 150 | fsize = index_file.tell() 151 | logging.debug("index file size:%d", fsize) 152 | index_size = fsize-HEADER_SIZE 153 | nis = NEEDLE_INDEX_SIZE 154 | if index_size%nis: 155 | logging.error("truncate index file:%d", index_size%nis) 156 | fsize -= index_size%nis 157 | index_size = fsize - HEADER_SIZE 158 | index_file.truncate(fsize) 159 | 160 | offset = 0 161 | if index_size: 162 | index_file.seek(fsize-nis) 163 | data = index_file.read(nis) 164 | assert(len(data) == nis) 165 | index = NeedleIndex() 166 | index.unpack(data) 167 | offset = index.offset 168 | if offset == 0: 169 | offset = HEADER_SIZE 170 | 171 | index_file.seek(0, 2) 172 | store_file.seek(0, 2) 173 | fsize = store_file.tell() 174 | store_file.seek(offset) 175 | while True: 176 | begin = store_file.tell() 177 | data = store_file.read(Needle.HEADER_SIZE) 178 | if len(data) != Needle.HEADER_SIZE: 179 | if data: 180 | logging.error("truncate store file:%d", fsize - begin) 181 | store_file.truncate(begin) 182 | store_file.close() 183 | index_file.close() 184 | return True 185 | needle = Needle() 186 | needle.unpack_header(data) 187 | 188 | index = NeedleIndex() 189 | index.key = needle.key 190 | index.deleted = needle.deleted 191 | index.offset = begin 192 | index.size = needle.data_size 193 | pk = index.pack() 194 | index_file.write(pk) 195 | store_file.seek(needle.data_size + needle.FOOTER_SIZE + needle.padding_size, 1) 196 | 197 | assert(False) 198 | except IOError: 199 | return False 200 | 201 | def _create(path): 202 | try: 203 | f = open(path, "ab") 204 | assert(f.tell() == 0) 205 | header = struct.pack("!4sI", haystack_magic, haystack_version) 206 | f.write(header) 207 | f.close() 208 | return True 209 | except IOError: 210 | return False 211 | 212 | def create_index(ipath): 213 | return _create(ipath) 214 | 215 | def create_store(path): 216 | return _create(path) 217 | 218 | -------------------------------------------------------------------------------- /haystack_track.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import gevent 3 | import gevent.socket as socket 4 | import gevent.select as select 5 | import json 6 | import logging 7 | import random 8 | try: 9 | from http_parser.parser import HttpParser 10 | except ImportError: 11 | from http_parser.pyparser import HttpParser 12 | 13 | try: 14 | import urlparse 15 | except ImportError: 16 | import urllib.parse as urlparse 17 | 18 | import haystack_logging 19 | 20 | groupnodes = {} 21 | masternodes = {} 22 | 23 | class HaystackNode: 24 | def __init__(self): 25 | self.disk_available_size = 0 26 | self.last_fileno = 0 27 | self.master = False 28 | self.groupid = 0 29 | self.listenip = "" 30 | self.listenport = 0 31 | self.read_priority = 0 32 | self.write_priority = 0 33 | 34 | 35 | def parse_fileid(fileid): 36 | fileno = fileid & ((1<<54)-1) 37 | groupid = fileid>>54 38 | return (groupid, fileno) 39 | 40 | def handle_report(sock, parser): 41 | logging.debug("handle report") 42 | 43 | body = parser.recv_body() 44 | if not body: 45 | return False 46 | 47 | obj = json.loads(body) 48 | 49 | if hasattr(sock, "node"): 50 | node = sock.node 51 | else: 52 | node = HaystackNode() 53 | sock.node = node 54 | if obj.has_key("last_fileno"): 55 | node.last_fileno = obj["last_fileno"] 56 | if obj.has_key("listenip"): 57 | node.listenip = obj["listenip"] 58 | if obj.has_key("listenport"): 59 | node.listenport = obj["listenport"] 60 | if obj.has_key("groupid"): 61 | assert(node.groupid == 0 or node.groupid == obj["groupid"]) 62 | node.groupid = obj["groupid"] 63 | if obj.has_key("master"): 64 | node.master = obj["master"] 65 | if obj.has_key("disk_available_size"): 66 | node.disk_available_size = obj["disk_available_size"] 67 | 68 | if not node.groupid: 69 | return False 70 | 71 | if node.master: 72 | if masternodes.has_key(node.groupid) and node != masternodes[node.groupid]: 73 | logging.error("group:%d already has master") 74 | else: 75 | masternodes[node.groupid] = node 76 | 77 | if not groupnodes.has_key(node.groupid): 78 | groupnodes[node.groupid] = [] 79 | 80 | if node not in groupnodes[node.groupid]: 81 | groupnodes[node.groupid].append(node) 82 | 83 | keepalived = parser.should_keep_alive() 84 | assert(keepalived) 85 | sock.send("HTTP/1.1 200 OK\r\n") 86 | sock.send("Content-Length: 0\r\n") 87 | sock.send("Connection: keep-alive\r\n") 88 | sock.send("\r\n") 89 | return bool(keepalived) 90 | 91 | def handle_request_readable_node(sock, parser): 92 | logging.debug("handle_request_readable_node") 93 | args = urlparse.parse_qs(parser.get_query_string()) 94 | if not args.has_key("fileid"): 95 | return False 96 | fileid = int(args["fileid"][0]) 97 | groupid, fileno = parse_fileid(fileid) 98 | if not groupnodes.has_key(groupid): 99 | return False 100 | 101 | rnodes = [] 102 | for node in groupnodes[groupid]: 103 | if node.last_fileno >= fileno: 104 | rnodes.append(node) 105 | if not rnodes and masternodes.has_key(groupid): 106 | rnodes.append(masternodes[groupid]) 107 | if not rnodes: 108 | return False 109 | i = random.randint(0, len(rnodes)-1) 110 | node = rnodes[i] 111 | obj = {"ip":node.listenip, "port":node.listenport} 112 | return obj 113 | 114 | def handle_request_writable_node(sock, parser): 115 | logging.debug("handle request writable node") 116 | wnodes = [] 117 | for groupid in masternodes: 118 | node = masternodes[groupid] 119 | GB = 1024*1024*1024 120 | if node.disk_available_size > GB: 121 | wnodes.append(node) 122 | 123 | if not wnodes: 124 | return False 125 | i = random.randint(0, len(wnodes)-1) 126 | node = wnodes[i] 127 | obj = {"ip":node.listenip, "port":node.listenport} 128 | return obj 129 | 130 | def handle_ping(sock, parser): 131 | logging.debug("handle ping") 132 | return "pong" 133 | 134 | def handle_info(sock, parser): 135 | logging.debug("handle info") 136 | obj = [] 137 | for groupid in groupnodes: 138 | group_nodes = groupnodes[groupid] 139 | for node in group_nodes: 140 | o = {} 141 | o["last_fileno"] = node.last_fileno 142 | o["role"] = "master" if node.master else "slave" 143 | o["groupid"] = node.groupid 144 | o["listenip"] = node.listenip 145 | o["listenport"] = node.listenport 146 | o["disk_free"] = node.disk_available_size 147 | obj.append(o) 148 | 149 | return obj 150 | 151 | def handle_request(sock): 152 | parser = HttpParser() 153 | while True: 154 | data = sock.recv(1024) 155 | if not data: 156 | logging.warn("client sock closed") 157 | return False 158 | recved = len(data) 159 | nparsed = parser.execute(data, recved) 160 | assert(nparsed == recved) 161 | if parser.is_message_complete(): 162 | break 163 | 164 | obj = None 165 | if parser.get_path() == "/request_readable_node": 166 | obj = handle_request_readable_node(sock, parser) 167 | elif parser.get_path() == "/request_writable_node": 168 | obj = handle_request_writable_node(sock, parser) 169 | elif parser.get_path() == "/ping": 170 | obj = handle_ping(sock, parser) 171 | elif parser.get_path() == "/report": 172 | obj = handle_report(sock, parser) 173 | elif parser.get_path() == "/info": 174 | obj = handle_info(sock, parser) 175 | else: 176 | logging.debug("unknown request path:%s", parser.get_path()) 177 | 178 | keepalived = parser.should_keep_alive() 179 | if obj is None: 180 | sock.send("HTTP/1.1 404 Not Found\r\n") 181 | sock.send("Content-Length: 0\r\n") 182 | if keepalived: 183 | sock.send("Connection: keep-alive\r\n") 184 | else: 185 | sock.send("Connection: close\r\n") 186 | sock.send("\r\n") 187 | return False 188 | 189 | if not isinstance(obj, bool): 190 | resp = json.dumps(obj) 191 | sock.send("HTTP/1.1 200 OK\r\n") 192 | sock.send("Content-Type: application/json\r\n") 193 | sock.send("Content-Length: %d\r\n"%len(resp)) 194 | if keepalived: 195 | sock.send("Connection: keep-alive\r\n") 196 | else: 197 | sock.send("Connection: close\r\n") 198 | sock.send("\r\n") 199 | sock.send(resp) 200 | return bool(keepalived) 201 | else: 202 | return obj 203 | 204 | def handle_client(sock): 205 | try: 206 | while True: 207 | rds, _, _ = select.select([sock], [], [], 60*5) 208 | if not rds: 209 | break 210 | keepalived = handle_request(sock) 211 | if not keepalived: 212 | break 213 | except socket.error, e: 214 | logging.debug("socket error:%r", e) 215 | finally: 216 | logging.debug("close client") 217 | if hasattr(sock, "node"): 218 | node = sock.node 219 | groupnodes[node.groupid].remove(node) 220 | if masternodes.has_key(node.groupid) and \ 221 | masternodes[node.groupid] == node: 222 | masternodes.pop(node.groupid) 223 | sock.node = None 224 | 225 | sock.close() 226 | 227 | def main(): 228 | haystack_logging.init_logger("track", logging.DEBUG) 229 | 230 | config = {} 231 | if len(sys.argv) == 1: 232 | logging.error("needs config file") 233 | return 234 | config_file = sys.argv[1] 235 | execfile(config_file, config) 236 | listenip = config["listenip"] 237 | listenport = config["listenport"] 238 | 239 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 240 | address = (listenip, listenport) 241 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 242 | s.bind(address) 243 | s.listen(5) 244 | 245 | while True: 246 | client_sock, address = s.accept() 247 | gevent.spawn(handle_client, client_sock) 248 | 249 | if __name__ == "__main__": 250 | main() 251 | 252 | -------------------------------------------------------------------------------- /haystack_storage.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import gevent 4 | import gevent.socket as socket 5 | import gevent.select as select 6 | import gevent.queue as queue 7 | from geventsendfile import gevent_sendfile as sendfile 8 | import json 9 | import logging 10 | try: 11 | from http_parser.parser import HttpParser 12 | except ImportError: 13 | from http_parser.pyparser import HttpParser 14 | 15 | try: 16 | import urlparse 17 | except ImportError: 18 | import urllib.parse as urlparse 19 | 20 | import haystack 21 | import haystack_logging 22 | 23 | haystack_path = "" 24 | haystack_index_path = "" 25 | groupid = 0 26 | listenip = "" 27 | listenport = 0 28 | master = False 29 | 30 | slaves = [] 31 | track = None 32 | 33 | masterip = "" 34 | masterport = 0 35 | 36 | TRACK_ONLINE = 1 37 | TRACK_OFFLINE = 2 38 | 39 | class HaystackTrack: 40 | def __init__(self): 41 | self.ip = "" 42 | self.port = 0 43 | self.channel = queue.Channel() 44 | self.state = TRACK_OFFLINE 45 | self.waiting = False 46 | 47 | class HaystackSlave: 48 | def __init__(self): 49 | self.channel = queue.Channel() 50 | self.last_fileno = 0 51 | self.waiting = False 52 | 53 | def make_fileid(groupid, fileno): 54 | return (groupid<<54)|fileno 55 | 56 | def parse_fileid(fileid): 57 | fileno = fileid & ((1<<54)-1) 58 | groupid = fileid>>54 59 | return (groupid, fileno) 60 | 61 | def handle_sync_upload(sock, parser): 62 | logging.debug("handle sync upload") 63 | body = parser.recv_body() 64 | if not body: 65 | return False 66 | headers = parser.get_headers() 67 | fileno = int(headers["FileNO"]) if headers.has_key("FileNO") else 0 68 | if fileno <= haystack.haystack_last_fileno: 69 | logging.error("fileno:%d less than %d", \ 70 | fileno, haystack.haystack_last_fileno) 71 | return False 72 | 73 | if fileno - haystack.haystack_last_fileno != 1: 74 | logging.error("fileno is't continuous fileno:%d, last_fileno:%d", \ 75 | fileno, haystack.haystack_last_fileno) 76 | 77 | needle = haystack.Needle() 78 | needle.data = body 79 | needle.key = fileno 80 | offset = needle.write() 81 | haystack.haystack_files[needle.key] = (offset, len(body)) 82 | haystack.haystack_last_fileno = fileno 83 | logging.debug("fileno:%d", fileno) 84 | 85 | for slave in slaves: 86 | if slave.waiting: 87 | slave.channel.put(haystack.haystack_last_fileno) 88 | if track.waiting: 89 | track.channel.put(haystack.haystack_last_fileno) 90 | 91 | return True 92 | 93 | def handle_upload(sock, parser): 94 | body = parser.recv_body() 95 | if not body: 96 | return False 97 | 98 | needle = haystack.Needle() 99 | needle.data = body 100 | needle.key = haystack.haystack_last_fileno+1 101 | offset = needle.write() 102 | haystack.haystack_last_fileno += 1 103 | haystack.haystack_files[needle.key] = (offset, len(body)) 104 | fileid = make_fileid(groupid, haystack.haystack_last_fileno) 105 | logging.debug("fileid:%d fileno:%d", fileid, haystack.haystack_last_fileno) 106 | 107 | for slave in slaves: 108 | if slave.waiting: 109 | slave.channel.put(haystack.haystack_last_fileno) 110 | if track.waiting: 111 | track.channel.put(haystack.haystack_last_fileno) 112 | 113 | return {"fileid":fileid} 114 | 115 | def handle_download(sock, parser): 116 | keepalived = parser.should_keep_alive() 117 | args = urlparse.parse_qs(parser.get_query_string()) 118 | logging.debug("args:%r", args) 119 | fileid = int(args["fileid"][0]) if args.has_key("fileid") else 0 120 | _, fileid = parse_fileid(fileid) 121 | if not haystack.haystack_files.has_key(fileid): 122 | logging.debug("can't find file:%d", fileid) 123 | sock.send("HTTP/1.1 404 Not Found\r\n") 124 | sock.send("Content-Length: 0\r\n") 125 | if keepalived: 126 | sock.send("Connection: keep-alive\r\n") 127 | else: 128 | sock.send("Connection: close\r\n") 129 | sock.send("\r\n") 130 | else: 131 | sock.send("HTTP/1.1 200 OK\r\n") 132 | sock.send("Content-Type: application/octet-stream\r\n") 133 | offset, size = haystack.haystack_files[fileid] 134 | sock.send("Content-Length: %d\r\n"%size) 135 | if keepalived: 136 | sock.send("Connection: keep-alive\r\n") 137 | else: 138 | sock.send("Connection: close\r\n") 139 | sock.send("\r\n") 140 | logging.debug("offset:%d, size:%d\n", offset, size) 141 | sendfile(sock.fileno(), haystack.haystack_file.fileno(), offset+haystack.Needle.HEADER_SIZE, size) 142 | 143 | return bool(keepalived) 144 | 145 | def heartbeat(sock): 146 | ip, port = sock.getpeername() 147 | parser = HttpParser() 148 | sock.send("GET /ping HTTP/1.1\r\nHost: %s:%d\r\n\r\n"%(ip, port)) 149 | 150 | while True: 151 | data = sock.recv(1024) 152 | if not data: 153 | return False 154 | 155 | recved = len(data) 156 | nparsed = parser.execute(data, recved) 157 | assert(nparsed == recved) 158 | if parser.is_message_complete(): 159 | break 160 | 161 | return parser.get_status_code() == 200 162 | 163 | def post_file(sock, fileno): 164 | ip, port = sock.getpeername() 165 | if not haystack.haystack_files.has_key(fileno): 166 | logging.error("can't find file:%d", fileno) 167 | return False 168 | 169 | offset, size = haystack.haystack_files[fileno] 170 | logging.debug("post file:%d size:%d", fileno, size) 171 | 172 | sock.send("POST /sync_upload HTTP/1.1\r\n") 173 | sock.send("Host: %s:%d\r\n"%(ip, port)) 174 | sock.send("Content-Length: %d\r\n"%size) 175 | sock.send("Content-Type: application/octet-stream\r\n") 176 | sock.send("Connection: keep-alive\r\n") 177 | sock.send("FileNO: %d\r\n"%fileno) 178 | sock.send("\r\n") 179 | sendfile(sock.fileno(), haystack.haystack_file.fileno(), \ 180 | offset+haystack.Needle.HEADER_SIZE, size) 181 | return True 182 | 183 | def handle_sync(sock, parser): 184 | logging.debug("handle sync") 185 | body = parser.recv_body() 186 | if not body: 187 | return False 188 | obj = json.loads(body) 189 | last_fileno = obj["last_fileno"] 190 | keepalived = parser.should_keep_alive() 191 | assert(keepalived) 192 | sock.send("HTTP/1.1 200 OK\r\n") 193 | sock.send("Content-Length: 0\r\n") 194 | sock.send("Connection: keep-alive\r\n") 195 | sock.send("\r\n") 196 | 197 | slave = HaystackSlave() 198 | slave.last_fileno = last_fileno 199 | slaves.append(slave) 200 | 201 | try: 202 | while True: 203 | while slave.last_fileno < haystack.haystack_last_fileno: 204 | if not post_file(sock, slave.last_fileno+1): 205 | return False 206 | slave.last_fileno += 1 207 | try: 208 | slave.waiting = True 209 | slave.channel.get(timeout=5) 210 | except queue.Empty: 211 | if not heartbeat(sock): 212 | return False 213 | finally: 214 | slave.waiting = False 215 | finally: 216 | slaves.remove(slave) 217 | 218 | def handle_info(sock, parser): 219 | logging.debug("handle info") 220 | keepalived = parser.should_keep_alive() 221 | obj = {} 222 | obj["trackip"] = track.ip 223 | obj["trackport"] = track.port 224 | if track.state == TRACK_ONLINE: 225 | obj["trackstate"] = "online" 226 | elif track.state == TRACK_OFFLINE: 227 | obj["trackstate"] = "offline" 228 | 229 | if master: 230 | obj["role"] = "master" 231 | obj["nslaves"] = len(slaves) 232 | else: 233 | obj["role"] = "slave" 234 | obj["masterip"] = masterip 235 | obj["masterport"] = masterport 236 | obj["last_fileno"] = haystack.haystack_last_fileno 237 | return obj 238 | 239 | def handle_ping(sock, parser): 240 | logging.debug("handle ping") 241 | return "pong" 242 | 243 | def handle_request(sock, parser, preread): 244 | logging.debug("handle request") 245 | if parser: 246 | assert(parser.is_headers_complete()) 247 | headers = parser.get_headers() 248 | content_length = int(headers["Content-Length"]) if headers.has_key("Content-Length") else 0 249 | assert(content_length >= len(preread)) 250 | if content_length: 251 | if preread: 252 | nparsed = parser.execute(preread, len(preread)) 253 | assert(nparsed == len(preread)) 254 | content_length -= len(preread) 255 | while content_length: 256 | data = sock.recv(content_length) 257 | if not data: 258 | logging.warn("client sock closed") 259 | return False 260 | recved = len(data) 261 | content_length -= recved 262 | nparsed = parser.execute(data, recved) 263 | assert(nparsed == recved) 264 | if parser.is_message_complete(): 265 | break 266 | else: 267 | parser = HttpParser() 268 | while True: 269 | logging.debug("recv........") 270 | data = sock.recv(64*1024) 271 | if not data: 272 | logging.warn("client sock closed") 273 | return False 274 | recved = len(data) 275 | nparsed = parser.execute(data, recved) 276 | assert(nparsed == recved) 277 | if parser.is_message_complete(): 278 | break 279 | 280 | obj = None 281 | if parser.get_path() == "/upload": 282 | obj = handle_upload(sock, parser) 283 | elif parser.get_path() == "/sync_upload": 284 | obj = handle_sync_upload(sock, parser) 285 | elif parser.get_path() == "/download": 286 | obj = handle_download(sock, parser) 287 | elif parser.get_path() == "/sync": 288 | obj = handle_sync(sock, parser) 289 | elif parser.get_path() == "/ping": 290 | obj = handle_ping(sock, parser) 291 | elif parser.get_path() == "/info": 292 | obj = handle_info(sock, parser) 293 | else: 294 | logging.debug("unknown request path:%s", parser.get_path()) 295 | 296 | if obj is None: 297 | sock.send("HTTP/1.1 404 Not Found\r\n") 298 | sock.send("Content-Length: 0\r\n") 299 | if keepalived: 300 | sock.send("Connection: keep-alive\r\n") 301 | else: 302 | sock.send("Connection: close\r\n") 303 | sock.send("\r\n") 304 | return False 305 | 306 | if not isinstance(obj, bool): 307 | resp = json.dumps(obj) 308 | keepalived = parser.should_keep_alive() 309 | sock.send("HTTP/1.1 200 OK\r\n") 310 | sock.send("Content-Type: application/json\r\n") 311 | 312 | sock.send("Content-Length: %d\r\n"%len(resp)) 313 | if keepalived: 314 | sock.send("Connection: keep-alive\r\n") 315 | else: 316 | sock.send("Connection: close\r\n") 317 | sock.send("\r\n") 318 | sock.send(resp) 319 | return bool(keepalived) 320 | else: 321 | return obj 322 | 323 | def handle_client(sock): 324 | try: 325 | while True: 326 | rds, _, _ = select.select([sock], [], [], 60*5) 327 | if not rds: 328 | break 329 | 330 | keepalived = handle_request(sock, None, None) 331 | if not keepalived: 332 | break 333 | except socket.error, e: 334 | logging.debug("socket error:%r", e) 335 | finally: 336 | logging.debug("close client") 337 | sock.close() 338 | 339 | def handle_batch_client(sock): 340 | recvbuf = "" 341 | while True: 342 | rds, _, _ = select.select([sock], [], [], 60*5) 343 | if not rds: 344 | break 345 | 346 | data = sock.recv(1024) 347 | if not data: 348 | break 349 | recvbuf += data 350 | 351 | pos = recvbuf.find("\r\n\r\n") 352 | if pos == -1: 353 | continue 354 | parser = HttpParser() 355 | nparsed = parser.execute(recvbuf, pos+4) 356 | if nparsed != pos+4: 357 | logging.debug("pos:%d, nparsed:%d, recvbuf:%r", pos, nparsed, recvbuf) 358 | assert(nparsed == pos+4) 359 | assert(parser.is_headers_complete()) 360 | headers = parser.get_headers() 361 | content_length = int(headers["Content-Length"]) if headers.has_key("Content-Length") else 0 362 | logging.debug("content length:%d", content_length) 363 | recvbuf = recvbuf[pos+4:] 364 | preread = recvbuf[:content_length] 365 | recvbuf = recvbuf[content_length:] 366 | keepalived = handle_request(sock, parser, preread) 367 | if not keepalived: 368 | break 369 | 370 | logging.debug("close client") 371 | sock.close() 372 | 373 | def post_sync(sock, masterip, masterport): 374 | obj = {"last_fileno":haystack.haystack_last_fileno} 375 | body = json.dumps(obj) 376 | sock.send("POST /sync HTTP/1.1\r\n") 377 | sock.send("Host: %s:%d\r\n"%(masterip, masterport)) 378 | sock.send("Content-Length: %d\r\n"%len(body)) 379 | sock.send("Content-Type: application/json\r\n") 380 | sock.send("Connection: keep-alive\r\n") 381 | sock.send("\r\n") 382 | sock.send(body) 383 | 384 | parser = HttpParser() 385 | while True: 386 | #!!!ugly prevent recveive next http request 387 | data = sock.recv(1) 388 | if not data: 389 | return False 390 | 391 | recved = len(data) 392 | nparsed = parser.execute(data, recved) 393 | assert(nparsed == recved) 394 | if parser.is_message_complete(): 395 | break 396 | 397 | return parser.get_status_code() == 200 398 | 399 | def _sync(masterip, masterport): 400 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 401 | sock.connect((masterip, masterport)) 402 | if not post_sync(sock, masterip, masterport): 403 | return 404 | logging.debug("slave sync begin recv...") 405 | handle_batch_client(sock) 406 | 407 | def sync_with_master(masterip, masterport): 408 | while True: 409 | try: 410 | logging.debug("sync........") 411 | _sync(masterip, masterport) 412 | except socket.error, e: 413 | logging.debug("disconnect with master, exception:%r", e) 414 | gevent.sleep(5) 415 | except Exception, e: 416 | logging.debug("sync exception:%r", e) 417 | gevent.sleep(5) 418 | 419 | def post_report(sock): 420 | st = os.statvfs(haystack_path) 421 | available_size = st.f_bavail * st.f_frsize 422 | obj = {} 423 | obj["listenip"] = listenip 424 | obj["listenport"] = listenport 425 | obj["disk_available_size"] = available_size 426 | obj["master"] = master 427 | obj["groupid"] = groupid 428 | obj["last_fileno"] = haystack.haystack_last_fileno 429 | body = json.dumps(obj) 430 | sock.send("POST /report HTTP/1.1\r\n") 431 | sock.send("Host: %s:%d\r\n"%(track.ip, track.port)) 432 | sock.send("Content-Length: %d\r\n"%len(body)) 433 | sock.send("Content-Type: application/json\r\n") 434 | sock.send("Connection: keep-alive\r\n") 435 | sock.send("\r\n") 436 | sock.send(body) 437 | 438 | parser = HttpParser() 439 | while True: 440 | data = sock.recv(1024) 441 | if not data: 442 | return False 443 | 444 | recved = len(data) 445 | nparsed = parser.execute(data, recved) 446 | assert(nparsed == recved) 447 | if parser.is_message_complete(): 448 | break 449 | 450 | return parser.get_status_code() == 200 451 | 452 | #reconnect when 0,1,2,4,8,16. 453 | def track_report(): 454 | nseconds = 0 455 | while True: 456 | try: 457 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 458 | sock.connect((track.ip, track.port)) 459 | while True: 460 | last_fileno = haystack.haystack_last_fileno 461 | post_report(sock) 462 | track.state = TRACK_ONLINE 463 | nseconds = 0 464 | if last_fileno != haystack.haystack_last_fileno: 465 | continue 466 | try: 467 | track.waiting = True 468 | track.channel.get(timeout=5) 469 | except queue.Empty: 470 | continue 471 | finally: 472 | track.waiting = False 473 | except socket.error, e: 474 | logging.debug("socket error:%r", e) 475 | except Exception, e: 476 | logging.debug("exception:%r", e) 477 | finally: 478 | track.state = TRACK_OFFLINE 479 | sock.close() 480 | if nseconds == 0: 481 | nseconds= 1 482 | else: 483 | gevent.sleep(nseconds) 484 | if nseconds < 10: 485 | nseconds *= 2 486 | logging.debug("disconnect with track") 487 | 488 | def file_exists(path): 489 | try: 490 | f = open(path, "rb") 491 | f.close() 492 | return True 493 | except IOError: 494 | return False 495 | 496 | def main(): 497 | global listenip, listenport 498 | global track, masterip, masterport 499 | global groupid, master 500 | global haystack_path, haystack_index_path 501 | haystack_logging.init_logger("storage", logging.DEBUG) 502 | 503 | config = {} 504 | if len(sys.argv) == 1: 505 | logging.error("needs config file") 506 | return 507 | config_file = sys.argv[1] 508 | execfile(config_file, config) 509 | haystack_path = config["dbfilename"] 510 | haystack_index_path = config["dbindexfilename"] 511 | groupid = config["groupid"] 512 | 513 | if not file_exists(haystack_path) and not haystack.create_store(haystack_path): 514 | logging.error("create store file fail") 515 | sys.exit(1) 516 | if not file_exists(haystack_index_path) and not haystack.create_index(haystack_index_path): 517 | logging.error("create index file fail") 518 | sys.exit(1) 519 | if not haystack.recover(haystack_path, haystack_index_path): 520 | logging.error("recover haystack store fail") 521 | sys.exit(1) 522 | if not haystack.load(haystack_path, haystack_index_path): 523 | logging.error("load haystack file fail") 524 | sys.exit(1) 525 | 526 | 527 | masterip = config["masterip"] if config.has_key("masterip") else "" 528 | masterport = config["masterport"] if config.has_key("masterport") else 0 529 | listenip = config["listenip"] 530 | listenport = config["listenport"] 531 | 532 | trackip = config["trackip"] 533 | trackport = config["trackport"] 534 | track = HaystackTrack() 535 | track.ip = trackip 536 | track.port = trackport 537 | assert(track.ip and track.port) 538 | gevent.spawn(track_report) 539 | if masterip and masterport: 540 | gevent.spawn(sync_with_master, masterip, masterport) 541 | master = False 542 | else: 543 | master = True 544 | 545 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 546 | address = (listenip, listenport) 547 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 548 | s.bind(address) 549 | s.listen(5) 550 | 551 | while True: 552 | client_sock, address = s.accept() 553 | gevent.spawn(handle_client, client_sock) 554 | 555 | if __name__ == "__main__": 556 | main() 557 | 558 | --------------------------------------------------------------------------------