├── test.py ├── BTL.py ├── README.md ├── client.py ├── bencode.py ├── torrent.py └── lightdht.py /test.py: -------------------------------------------------------------------------------- 1 | #This file has no purpose anymore -------------------------------------------------------------------------------- /BTL.py: -------------------------------------------------------------------------------- 1 | class BTFailure(Exception): 2 | pass 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | I was learning to code. No bully. 2 | 3 | ld project built sometime in 2012. Crawls the DHT network, downloads torrent metadata from peers and uploads that torrent information into a database. 4 | 5 | Main project: https://github.com/laino/shiny-adventure 6 | 7 | Lightdht: https://github.com/laino/lightdht 8 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import hashlib 3 | import time 4 | import os 5 | import lightdht 6 | import torrent 7 | import json 8 | import urllib 9 | import urllib2 10 | import traceback 11 | import bencode 12 | import random 13 | 14 | #Maximal simultanous jobs 15 | MAX_JOBS = 30 16 | API_URL = "http://localhost:8080/mapi/"; 17 | API_PASS = "test" 18 | 19 | # Enable logging: 20 | lightdht.logger.setLevel(logging.ERROR) 21 | formatter = logging.Formatter("[%(levelname)s@%(created)s] %(message)s") 22 | stdout_handler = logging.StreamHandler() 23 | stdout_handler.setFormatter(formatter) 24 | lightdht.logger.addHandler(stdout_handler) 25 | 26 | # Create a DHT node. 27 | dht = lightdht.DHT(port=8000) 28 | 29 | #Running torrents that are downloading metadata 30 | manager = torrent.TorrentManager(dht, 8000, None) 31 | found_torrents = set() 32 | 33 | def addHash(info_hash): 34 | if len(info_hash) == 20: 35 | found_torrents.add(info_hash) 36 | 37 | def makeRequest(method, body = None): 38 | data = {'method':method, 'password':API_PASS} 39 | if body != None: 40 | body = bencode.bencode(body).encode("base64") 41 | data['body'] = body 42 | data = urllib.urlencode(data) 43 | while True: 44 | try: 45 | req = urllib2.Request(API_URL,data) 46 | response = urllib2.urlopen(req).read() 47 | return bencode.bdecode(response.decode("base64")) 48 | except (urllib2.HTTPError, urllib2.URLError), e: 49 | print "Error while making requests: %s. Retrying in 10 seconds" % str(e) 50 | time.sleep(10) 51 | return None 52 | 53 | def sendFound(): 54 | if len(found_torrents) == 0: 55 | return 56 | to_send = list() 57 | while len(found_torrents) != 0: 58 | to_send.append(found_torrents.pop()) 59 | print("Sending %d info_hashes to server" % len(to_send)) 60 | makeRequest('put_hashes',to_send) 61 | 62 | def sendFinished(): 63 | ret = manager.fetchAndRemove() 64 | for torrent in ret: 65 | info_hash, peers, data = torrent 66 | processFinished(info_hash, peers, data) 67 | 68 | def getNewWork(): 69 | njobs = manager.count() 70 | if njobs < MAX_JOBS: 71 | jobs = get_work(MAX_JOBS - njobs) 72 | for work in jobs: 73 | if work['type'] == 'download_metadata': 74 | manager.addTorrent(work['info_hash']) 75 | elif work['type'] == 'check_peers': 76 | manager.addTorrent(work['info_hash'], metadata = False) 77 | 78 | def processFinished(info_hash, peers, data): 79 | req = {'info_hash':info_hash, 'peers':peers} 80 | if data != None: 81 | req['metadata'] = data 82 | print "Sending info of %s" % info_hash.encode("hex") 83 | makeRequest('update',req) 84 | 85 | def get_work(amount): 86 | jobs = [] 87 | for i in range(amount): 88 | jobs.append(makeRequest('get_work')) 89 | return jobs 90 | 91 | # handler 92 | def myhandler(rec, c): 93 | try: 94 | if "a" in rec: 95 | a = rec["a"] 96 | if "info_hash" in a: 97 | info_hash = a["info_hash"] 98 | addHash(info_hash) 99 | finally: 100 | dht.default_handler(rec,c) 101 | 102 | dht.handler = myhandler 103 | dht.active_discovery = True 104 | dht.self_find_delay = 30 105 | 106 | # Start it! 107 | with dht: 108 | print "Started" 109 | # Go to sleep and let the DHT service requests. 110 | while True: 111 | sendFound() 112 | sendFinished() 113 | getNewWork() 114 | time.sleep(10) 115 | -------------------------------------------------------------------------------- /bencode.py: -------------------------------------------------------------------------------- 1 | # The contents of this file are subject to the BitTorrent Open Source License 2 | # Version 1.1 (the License). You may not copy or use this file, in either 3 | # source code or executable form, except in compliance with the License. You 4 | # may obtain a copy of the License at http://www.bittorrent.com/license/. 5 | # 6 | # Software distributed under the License is distributed on an AS IS basis, 7 | # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 8 | # for the specific language governing rights and limitations under the 9 | # License. 10 | 11 | # Written by Petru Paler 12 | 13 | from BTL import BTFailure 14 | import traceback as trace 15 | 16 | def decode_int(x, f): 17 | f += 1 18 | newf = x.index('e', f) 19 | n = int(x[f:newf]) 20 | if x[f] == '-': 21 | if x[f + 1] == '0': 22 | raise ValueError 23 | elif x[f] == '0' and newf != f+1: 24 | raise ValueError 25 | return (n, newf+1) 26 | 27 | def decode_string(x, f): 28 | colon = x.index(':', f) 29 | n = int(x[f:colon]) 30 | if x[f] == '0' and colon != f+1: 31 | raise ValueError 32 | colon += 1 33 | return (x[colon:colon+n], colon+n) 34 | 35 | def decode_list(x, f): 36 | r, f = [], f+1 37 | while x[f] != 'e': 38 | v, f = decode_func[x[f]](x, f) 39 | r.append(v) 40 | return (r, f + 1) 41 | 42 | def decode_dict(x, f): 43 | r, f = {}, f+1 44 | while x[f] != 'e': 45 | k, f = decode_string(x, f) 46 | r[k], f = decode_func[x[f]](x, f) 47 | return (r, f + 1) 48 | 49 | decode_func = {} 50 | decode_func['l'] = decode_list 51 | decode_func['d'] = decode_dict 52 | decode_func['i'] = decode_int 53 | decode_func['0'] = decode_string 54 | decode_func['1'] = decode_string 55 | decode_func['2'] = decode_string 56 | decode_func['3'] = decode_string 57 | decode_func['4'] = decode_string 58 | decode_func['5'] = decode_string 59 | decode_func['6'] = decode_string 60 | decode_func['7'] = decode_string 61 | decode_func['8'] = decode_string 62 | decode_func['9'] = decode_string 63 | 64 | def bdecode(x): 65 | r, l = bdecode_len(x) 66 | if l != len(x): 67 | raise BTFailure("invalid bencoded value (data after valid prefix)") 68 | return r 69 | 70 | def bdecode_len(x): 71 | try: 72 | return decode_func[x[0]](x, 0) 73 | except (IndexError, KeyError, ValueError): 74 | trace.print_exc() 75 | raise BTFailure("not a valid bencoded string") 76 | 77 | from types import StringType, IntType, LongType, DictType, ListType, TupleType 78 | 79 | class Bencached(object): 80 | 81 | __slots__ = ['bencoded'] 82 | 83 | def __init__(self, s): 84 | self.bencoded = s 85 | 86 | def encode_bencached(x,r): 87 | r.append(x.bencoded) 88 | 89 | def encode_int(x, r): 90 | r.extend(('i', str(x), 'e')) 91 | 92 | def encode_bool(x, r): 93 | if x: 94 | encode_int(1, r) 95 | else: 96 | encode_int(0, r) 97 | 98 | def encode_string(x, r): 99 | r.extend((str(len(x)), ':', x)) 100 | 101 | def encode_list(x, r): 102 | r.append('l') 103 | for i in x: 104 | encode_func[type(i)](i, r) 105 | r.append('e') 106 | 107 | def encode_dict(x,r): 108 | r.append('d') 109 | ilist = x.items() 110 | ilist.sort() 111 | for k, v in ilist: 112 | r.extend((str(len(k)), ':', k)) 113 | encode_func[type(v)](v, r) 114 | r.append('e') 115 | 116 | encode_func = {} 117 | encode_func[Bencached] = encode_bencached 118 | encode_func[IntType] = encode_int 119 | encode_func[LongType] = encode_int 120 | encode_func[StringType] = encode_string 121 | encode_func[ListType] = encode_list 122 | encode_func[TupleType] = encode_list 123 | encode_func[DictType] = encode_dict 124 | 125 | try: 126 | from types import BooleanType 127 | encode_func[BooleanType] = encode_bool 128 | except ImportError: 129 | pass 130 | 131 | def bencode(x): 132 | r = [] 133 | encode_func[type(x)](x, r) 134 | return ''.join(r) 135 | -------------------------------------------------------------------------------- /torrent.py: -------------------------------------------------------------------------------- 1 | from thread import start_new_thread 2 | import lightdht 3 | import struct 4 | import time 5 | import socket as pysocket 6 | import bencode 7 | import traceback 8 | import math 9 | import random 10 | import hashlib 11 | 12 | #Timeouts 13 | 14 | #Exceeding one of these will resilu in the peer being disconnected 15 | CONNECT_TIMEOUT = 10 #Maximum amount of time to wait for a connection to be established 16 | RECV_TIMEOUT = 60 #Maximum amount of time to wait for recv to get all the data 17 | PEER_TIMEOUT = 30 #Maximum amount of time to wait for a block of metadata 18 | #A torrent that takes longer than this will be canceled 19 | TORRENT_TIMEOUT = 600 20 | 21 | def recvAll(stream, l, timeout = RECV_TIMEOUT): 22 | data = "" 23 | start = time.time() 24 | while True: 25 | if time.time() >= start + timeout: 26 | raise PeerException, "Read timed out" 27 | try: 28 | data = data + stream.recv(l - len(data)) 29 | except pysocket.timeout: 30 | pass 31 | if len(data) < l: 32 | time.sleep(0.5) 33 | else: 34 | break 35 | return data 36 | 37 | class PeerException(Exception): 38 | pass 39 | 40 | class Peer: 41 | def __init__(self, socket, torrent = None, timeout = PEER_TIMEOUT): 42 | socket.setblocking(True) 43 | socket.settimeout(RECV_TIMEOUT) 44 | self.timeout = timeout 45 | self.socket = socket 46 | self.torrent = torrent 47 | self.handshakeSend = False 48 | self.handshakeReceived = False 49 | self.extensionHandshakeReceived = False 50 | self.closed = False 51 | 52 | def _receiveHandshake(self): 53 | pstr_len = ord(recvAll(self.socket,1)) 54 | pstr = recvAll(self.socket, pstr_len) 55 | if pstr != "BitTorrent protocol": 56 | self.close() 57 | raise PeerException, "Peer uses wrong protocol" 58 | 59 | self.reserved = recvAll(self.socket,8) 60 | #Check if the peer supports the extension protocol 61 | if ord(self.reserved[5]) & 0x10 != 0x10: 62 | self.socket.close() 63 | raise PeerException, "Not supporting extensions" 64 | 65 | self.info_hash = recvAll(self.socket,20) 66 | self.peer_id = recvAll(self.socket,20) 67 | self.handshakeReceived = True 68 | 69 | def _sendMessage(self, msgtype = None, contents = None): 70 | l = 0 71 | msg = "" 72 | if msgtype != None: 73 | l = l + 1 74 | msg = msg + chr(msgtype) 75 | if contents != None: 76 | l = l + len(contents) 77 | msg = msg + contents 78 | packed = struct.pack(">I",l) + msg 79 | self.socket.sendall(packed) 80 | 81 | #Returns tuple(length, msgtype, data) 82 | def _receiveMessage(self): 83 | socket = self.socket 84 | length = struct.unpack(">I",recvAll(socket,4))[0] 85 | msgtype = None 86 | content = None 87 | if length>0: 88 | msgtype = ord(recvAll(socket,1)) 89 | if length>1: 90 | content = recvAll(socket,length-1) 91 | 92 | return (length, msgtype, content) 93 | 94 | def _sendHandshake(self): 95 | #Send the handshake 96 | # 1 byte 8 byte 20byte 20byte 97 | #handshake: 98 | pstr = "BitTorrent protocol" 99 | pstr_len = len(pstr) 100 | reserved = [chr(0) for i in range(8)] 101 | reserved[5] = chr(0x10) 102 | reserved = ''.join(reserved) 103 | info_hash = self.torrent.info_hash 104 | _id = "-TI0002-TORRENTINDEX" 105 | packed = chr(pstr_len) + pstr + reserved + info_hash + _id 106 | self.socket.sendall(packed) 107 | self._sendExtensionHandshake() 108 | self.handshakeSend = True 109 | 110 | def _sendExtensionHandshake(self): 111 | contents = {'m': {'ut_metadata': 3}, 'metadata_size':0,'v':'DHT-Crawler-0.1'} 112 | self._sendExtensionMessage(0, contents) 113 | 114 | def _sendExtensionMessage(self, msg, contents, add = None): 115 | data = chr(msg) + bencode.bencode(contents) 116 | if add != None: 117 | data = data + add 118 | self._sendMessage(20, data) 119 | 120 | def doReceiveHandshake(self): 121 | if not self.handshakeReceived: 122 | self._receiveHandshake() 123 | 124 | def performHandshake(self): 125 | """ 126 | Performs a complete handshake with the peer 127 | """ 128 | while not self.handshakeSend or not self.handshakeReceived: 129 | if not self.handshakeSend and self.torrent != None: 130 | self._sendHandshake() 131 | if not self.handshakeReceived: 132 | self._receiveHandshake() 133 | time.sleep(0.1) 134 | 135 | 136 | def _requestPiece(self): 137 | if self.torrent.finished: 138 | return 139 | piece = self.torrent.getNeededPiece() 140 | self._sendExtensionMessage(self.metadata_id,{'msg_type':0,'piece':piece}) 141 | 142 | def resetTimeout(self): 143 | self.limit = time.time() + self.timeout 144 | 145 | #Mainloop 146 | def loop(self): 147 | self.resetTimeout() 148 | while not self.torrent.finished and not self.closed: 149 | if time.time() >= self.limit: 150 | raise PeerException, "Peer timed out" 151 | length, msgtype, content = self._receiveMessage() 152 | if length > 0: 153 | if msgtype == 20: 154 | #extended 155 | self._extended(content) 156 | elif msgtype == 0: 157 | #Choke 158 | pass 159 | elif msgtype == 1: 160 | #unchoke 161 | pass 162 | elif msgtype == 2: 163 | #interested 164 | pass 165 | elif msgtype == 3: 166 | #not interested 167 | pass 168 | elif msgtype == 4: 169 | #have 170 | pass 171 | 172 | def _metadataExt(self, msg, extra): 173 | msg_type = msg['msg_type'] 174 | torrent = self.torrent 175 | if msg_type == 0: 176 | #request 177 | #currently we are rejeting all of them 178 | piece = msg['piece'] 179 | self.sendExtensionMessage(self.metadata_id,{'msg_type':2,'piece':piece}) 180 | elif msg_type == 1: 181 | #data 182 | self.resetTimeout() 183 | size = msg['total_size'] 184 | if size != self.torrent.metadataSize: 185 | raise PeerException, "Peer was reporting wrong metadata size during download" 186 | piece = msg['piece'] 187 | self.torrent.gotMetadata(piece, extra) 188 | self._requestPiece() 189 | elif msg_type == 2: 190 | #reject 191 | self.close() 192 | raise PeerException, "Peer is rejecting metadata requests" 193 | 194 | def _extended(self, data): 195 | msgtype = ord(data[0]) 196 | if msgtype == 0 and not self.extensionHandshakeReceived: 197 | #handshake 198 | payload = bencode.bdecode(data[1:]) 199 | if not "metadata_size" in payload or not "ut_metadata" in payload['m']: 200 | self.close() 201 | raise PeerException, "Not supporting ut_metadata extension" 202 | 203 | size = payload['metadata_size'] 204 | if size == 0: 205 | self.close() 206 | raise PeerException, "The peer does not appear to have any metadata" 207 | 208 | self.torrent.setMetadataSize(size) 209 | self.metadata_id = payload['m']['ut_metadata'] 210 | self.extensionHandshakeReceived = True 211 | #everything seems fine, go ahead an request the first bit of metadata 212 | self._requestPiece() 213 | self.resetTimeout() 214 | elif not self.extensionHandshakeReceived: 215 | self.close() 216 | raise PeerException, "Peer send extension messages before handshake" 217 | 218 | if msgtype == 3: 219 | #Got metadata extension message 220 | r, l = bencode.bdecode_len(data[1:]) 221 | self._metadataExt(r, data[l+1:]) 222 | 223 | def close(self): 224 | self.socket.close() 225 | self.closed = True 226 | 227 | class Torrent: 228 | def __init__(self, dht, info_hash, get_metadata): 229 | self.get_metadata = get_metadata 230 | self.dht = dht 231 | self.info_hash = info_hash 232 | self.metadata = {} 233 | self.metadataSize = -1 234 | self.metadataPieces = 0 235 | self.finished = False 236 | self.peer_list = set() 237 | self.peers = [] 238 | self.started = time.time() 239 | self.shutdown = False 240 | self.got_peers = False 241 | start_new_thread(self._run, tuple()) 242 | 243 | def gotMetadata(self, piece, content): 244 | length = len(content) 245 | slength = 16384 246 | if piece == self.metadataPieces -1: 247 | slength = self.metadataSize % 16384 248 | if length < slength : 249 | raise PeerException, "Received metadata piece of wrong length ("+str(length)+"/"+str(slength)+")" 250 | elif length > slength: 251 | content = content[0:slength] 252 | if not piece in self.metadata: 253 | self.metadata[piece] = content 254 | #Check if the torrent is finished 255 | if self.getNeededPiece() == -1: 256 | self.finished = True 257 | 258 | def peerCount(self): 259 | return len(self.peer_list) 260 | 261 | def disconnect(self): 262 | self.shutdown = True 263 | for peer in self.peers: 264 | try: 265 | peer.close() 266 | except Exception, e: 267 | print(str(e)) 268 | traceback.print_exc() 269 | finally: 270 | try: 271 | self.peers.remove(peer) 272 | except ValueError: 273 | #Was not in list 274 | pass 275 | 276 | def setMetadataSize(self, size): 277 | if size == 0: 278 | raise PeerException, "Metadata size cannot be 0" 279 | self.metadataSize = size 280 | self.metadataPieces = int(math.ceil(size / 16384.0)) 281 | self.log("Downloading "+str(self.metadataPieces)+" pieces of metadata ("+str(size)+" bytes)") 282 | 283 | def getNeededPiece(self): 284 | """ 285 | Returns a random metadata piece we still need 286 | """ 287 | piece = 0 288 | pieces = [] 289 | while piece < self.metadataPieces: 290 | if not piece in self.metadata: 291 | pieces.append(piece) 292 | piece += 1 293 | if len(pieces) == 0: 294 | return -1 295 | return random.choice(pieces) 296 | 297 | def openConnection(self, ip, port): 298 | socket = pysocket.create_connection((ip, port), CONNECT_TIMEOUT) 299 | peer = Peer(socket, self) 300 | peer.performHandshake() 301 | self._handlePeer(peer) 302 | 303 | def addPeer(self, peer): 304 | if not self.get_metadata: 305 | peer.close() 306 | raise Exception, "Not interested in metadata" 307 | peer.torrent = self 308 | peer.performHandshake() 309 | self._handlePeer(peer) 310 | 311 | def _handlePeer(self, peer): 312 | if peer.info_hash != self.info_hash: 313 | peer.close() 314 | raise PeerException, "Peer is serving the wrong torrent" 315 | self.peers.append(peer) 316 | try: 317 | peer.loop() 318 | finally: 319 | peer.close() 320 | try: 321 | self.peers.remove(peer) 322 | except ValueError: 323 | #was not on list 324 | pass 325 | 326 | def _updatePeers(self): 327 | peer_list = None 328 | try: 329 | peer_list = self.dht.get_peers(self.info_hash) 330 | except (lightdht.KRPCTimeout, lightdht.KRPCError, lightdht.NotFoundError), e: 331 | self.log("Problem getting peer list: "+str(e)) 332 | return 333 | 334 | if peer_list == None: 335 | return 336 | if type(peer_list) == str: 337 | peer_list = [peer_list] 338 | peer_list = filter(lambda x:len(x)==6, peer_list) 339 | self.got_peers = True 340 | self.peer_list = set(list(self.peer_list) + peer_list) 341 | 342 | def _run(self): 343 | 344 | tries = 0 345 | while not self.finished and not self.shutdown and tries <2: 346 | if tries != 0: 347 | time.sleep(10) 348 | tries += 1 349 | self._updatePeers() 350 | 351 | if not self.get_metadata: 352 | if len(self.peer_list) > 0: 353 | break 354 | else: 355 | continue 356 | 357 | for peer in self.peer_list: 358 | if self.finished or self.shutdown: 359 | break 360 | if len(peer)!=6: 361 | continue 362 | data = struct.unpack('>BBBBH',peer) 363 | ip = '.'.join([str(d) for d in data[:4]]) 364 | port = data[4] 365 | try: 366 | self.openConnection(ip, port) 367 | except (PeerException, pysocket.error, pysocket.timeout), e: 368 | self.log("Error "+ip+": "+str(e)) 369 | #traceback.print_exc() 370 | self.finished = True 371 | 372 | def prepareData(self): 373 | if not self.get_metadata: 374 | return None 375 | num = len(self.metadata) 376 | if num != self.metadataPieces or num == 0: 377 | return None 378 | data = "" 379 | for i in range(num): 380 | data = data + self.metadata[i] 381 | sha = hashlib.sha1() 382 | sha.update(data) 383 | info_hash = sha.digest() 384 | if info_hash != self.info_hash: 385 | self.log("The hashes do not match! ("+info_hash.encode("hex")+") ") 386 | return None 387 | return data 388 | 389 | def log(self, what): 390 | print "Torrent "+(self.info_hash.encode("hex"))+": "+str(what) 391 | 392 | class TorrentManager: 393 | def __init__(self, dht, port, onfinish, timeout = TORRENT_TIMEOUT): 394 | self.timeout = timeout 395 | self.dht = dht 396 | self.port = port 397 | self.onfinish = onfinish 398 | self.running = {} 399 | #Don't handle incoming connections 400 | #start_new_thread(self._run,tuple()) 401 | 402 | def addTorrent(self, info_hash, metadata = True): 403 | if not info_hash in self.running: 404 | torrent = Torrent(self.dht, info_hash, metadata) 405 | self.running[info_hash] = torrent 406 | 407 | def count(self): 408 | return len(self.running) 409 | 410 | def fetchAndRemove(self): 411 | now = time.time() 412 | ret = [] 413 | for info_hash in self.running.keys(): 414 | torrent = self.running[info_hash] 415 | if torrent.finished: 416 | del self.running[info_hash] 417 | torrent.disconnect() 418 | data = torrent.prepareData() 419 | peers = torrent.peerCount() 420 | if data != None or torrent.got_peers: 421 | ret.append((info_hash, peers, data)) 422 | elif now > torrent.started + self.timeout: 423 | del self.running[info_hash] 424 | torrent.log("Timeout") 425 | torrent.disconnect() 426 | return ret 427 | 428 | def _run(self): 429 | serversocket = pysocket.socket(pysocket.AF_INET, pysocket.SOCK_STREAM) 430 | serversocket.bind(('localhost', self.port)) 431 | serversocket.listen(10) 432 | while True: 433 | socket, address = serversocket.accept() 434 | start_new_thread(self._handlePeer, tuple(socket)) 435 | 436 | def _handlePeer(self, socket): 437 | try: 438 | peer = Peer(socket) 439 | peer.doReceiveHandshake() 440 | info_hash = peer.info_hash 441 | if info_hash in self.running: 442 | torrent = self.running[info_hash] 443 | torrent.addPeer(peer) 444 | else: 445 | peer.close() 446 | except Exception, e: 447 | print "Error while handling incoming connection: "+str(e) 448 | -------------------------------------------------------------------------------- /lightdht.py: -------------------------------------------------------------------------------- 1 | """ 2 | LightDHT - A lightweight python implementation of the Bittorrent distributed 3 | hashtable. 4 | 5 | 6 | The aim of LightDHT is to provide a simple, flexible implementation of the 7 | Bittorrent DHT for use in research applications. If you want to trade files, 8 | you have come to the wrong place. LightDHT does not implement the actual 9 | file transfer parts of the bittorrent protocol. It only takes part in the 10 | DHT. 11 | 12 | 13 | Philosophy: 14 | 15 | - Ease of use over performance 16 | - Adaptability over scalability 17 | 18 | In order to keep LightDHT easy to use, all DHT RPC calls are performed 19 | synchronously. This means that when you call a DHT method, your program will 20 | block until you have an answer to your request. That answer will be the 21 | return value of the function. This has the advantage that it keeps the 22 | logical program flow intact, and makes it more comfortable to use. 23 | 24 | In order to maintain O(log N) scaling across the network, BEP0005 (the 25 | standard governing the DHT) mandates that implementations use a bucket-based 26 | approach to the routing table. This enables the node to fulfill all requests 27 | in constant time and (more or less) constant memory. In LightDHT, we throw 28 | that recommendation to the wind. 29 | 30 | Since the main focus of LightDHT is reseach, we are going to keep around all 31 | the data we can. This means that we keep around every single node we know 32 | about. Since in practice the number of nodes is limited and the request 33 | rates are rather low, we do not bother keeping the routing table organized 34 | in a tree structure for quick lookups. Instead we keep it in a dictionary 35 | and sort on-demand. The performance penalty is well worth the reduced 36 | complexity of the implementation, and the flexibility of having all nodes in 37 | an easy to use data structure. 38 | 39 | """ 40 | 41 | import socket 42 | import sys 43 | import os 44 | import time 45 | import hashlib 46 | import hmac 47 | import random 48 | import struct 49 | import threading 50 | import traceback 51 | import logging 52 | 53 | from bencode import bencode, bdecode 54 | from BTL import BTFailure 55 | 56 | # Logging is disabled by default. 57 | # See http://docs.python.org/library/logging.html 58 | logger = logging.getLogger(__name__) 59 | logger.addHandler(logging.NullHandler()) 60 | 61 | 62 | # Our version string! 63 | version = 'XN\x00\x00' 64 | 65 | 66 | # 67 | # Utility functions 68 | 69 | def dottedQuadToNum(ip): 70 | "convert decimal dotted quad string to long integer" 71 | 72 | hexn = ''.join(["%02X" % long(i) for i in ip.split('.')]) 73 | return long(hexn, 16) 74 | 75 | def numToDottedQuad(n): 76 | "convert long int to dotted quad string" 77 | 78 | d = 256 * 256 * 256 79 | q = [] 80 | while d > 0: 81 | m,n = divmod(n,d) 82 | q.append(str(m)) 83 | d = d/256 84 | 85 | return '.'.join(q) 86 | 87 | def strxor(a, b): 88 | """ xor two strings of different lengths """ 89 | if len(a) > len(b): 90 | return "".join([chr(ord(x) ^ ord(y)) for (x, y) in zip(a[:len(b)], b)]) 91 | else: 92 | return "".join([chr(ord(x) ^ ord(y)) for (x, y) in zip(a, b[:len(a)])]) 93 | 94 | def decode_nodes(nodes): 95 | """ Decode node_info into a list of id, connect_info """ 96 | nrnodes = len(nodes)/26 97 | nodes = struct.unpack("!" + "20sIH"*nrnodes,nodes) 98 | for i in xrange(nrnodes): 99 | id_, ip, port = nodes[i*3], numToDottedQuad(nodes[i*3+1]), nodes[i*3+2] 100 | yield id_,(ip, port) 101 | 102 | def encode_nodes(nodes): 103 | """ Encode a list of (id, connect_info) pairs into a node_info """ 104 | n = [] 105 | for node in nodes: 106 | n.extend([node[0], dottedQuadToNum(node[1].c[0]),node[1].c[1]]) 107 | return struct.pack("!" + "20sIH"*len(nodes),*n) 108 | 109 | class KRPCTimeout(RuntimeError): 110 | """ 111 | This exception is raised whenever a KRPC request times out 112 | in synchronous mode. 113 | """ 114 | pass 115 | 116 | class KRPCError(RuntimeError): 117 | pass 118 | 119 | 120 | class Node(object): 121 | def __init__(self,c): 122 | self.c = c 123 | self.treq = 0 124 | self.trep = 0 125 | self.t = set() 126 | self.useid = os.urandom(20) # The id we pretend to be 127 | 128 | class KRPCServer(object): 129 | 130 | def __init__(self, port): 131 | self._port = port 132 | self._shutdown_flag = False 133 | self._thread = None 134 | self._sock = None 135 | self._transaction_id = 0 136 | self._transactions = {} 137 | self._transactions_lock = threading.Lock() 138 | self._results = {} 139 | self.handler = self.default_handler 140 | 141 | def default_handler(self, req, c): 142 | """ 143 | Default incoming KRPC request handler. 144 | Gets replaces by application specific code. 145 | """ 146 | print req 147 | 148 | 149 | def start(self): 150 | """ 151 | Start the KRPC server 152 | """ 153 | self._sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 154 | self._sock.settimeout(0.5) 155 | self._sock.bind( ("0.0.0.0",self._port) ) 156 | self._thread = threading.Thread(target=self._pump) 157 | self._thread.daemon = True 158 | self._thread.start() 159 | 160 | def shutdown(self): 161 | """ 162 | Shut down the KRPC server 163 | """ 164 | self._shutdown_flag = True 165 | self._thread.join() 166 | 167 | def _pump(self): 168 | """ 169 | Thread that processes incoming datagrams 170 | """ 171 | # Listen and react 172 | while True: 173 | if self._shutdown_flag: 174 | break 175 | rec = {} 176 | try: 177 | rec,c = self._sock.recvfrom(4096) 178 | rec = bdecode(rec) 179 | if rec["y"] == "r": 180 | # It's a reply. 181 | # Remove the transaction id from the list of pending 182 | # transactions and add the result to the result table. 183 | # The client thread will take it from there. 184 | t = rec["t"] 185 | with self._transactions_lock: 186 | if t in self._transactions: 187 | node = self._transactions[t][1] 188 | node.trep = time.time() 189 | if t in node.t: 190 | node.t.remove(t) 191 | if self._transactions[t][0] is not None: 192 | self._transactions[t][0](rec, node) # invoke the callback 193 | else: 194 | self._results[t] = rec # sync path 195 | del self._transactions[t] 196 | elif rec["y"] == "q": 197 | # It's a request, send it to the handler. 198 | self.handler(rec,c) 199 | elif rec["y"] == "e": 200 | # just post the error to the results array, but only if 201 | # we have a transaction ID! 202 | # Some software (e.g. LibTorrent) does not post the "t" 203 | if "t" in rec: 204 | t = rec["t"] 205 | with self._transactions_lock: 206 | if t in self._transactions: 207 | del self._transactions[t] 208 | self._results[t] = rec 209 | else: 210 | # log it 211 | logger.warning("Node %r reported error %r, but did " 212 | "not specify a 't'" % (c,rec)) 213 | else: 214 | raise RuntimeError,"Unknown KRPC message %r from %r" % (rec,c) 215 | 216 | # Scrub the transaction list 217 | t1 = time.time() 218 | for tid,(cb,node) in self._transactions.items(): 219 | if t1-node.treq > 10.0: 220 | with self._transactions_lock: 221 | if tid in self._transactions: 222 | del self._transactions[tid] 223 | 224 | 225 | except socket.timeout: 226 | # no packets, that's ok 227 | pass 228 | except BTFailure: 229 | # bdecode error, ignore the packet 230 | pass 231 | except: 232 | # Log and carry on to keep the packet pump alive. 233 | logger.critical("Exception while handling KRPC requests:\n\n"+traceback.format_exc()+("\n\n%r from %r" % (rec,c))) 234 | 235 | 236 | def send_krpc(self, req , node, callback=None): 237 | """ 238 | Perform a KRPC request 239 | """ 240 | logger.debug("KRPC request to %r", node.c) 241 | t = -1 242 | if "t" not in req: 243 | # add transaction id 244 | with self._transactions_lock: 245 | self._transaction_id += 1 246 | t = struct.pack("i",self._transaction_id) 247 | req["t"] = t 248 | else: 249 | t = req["t"] 250 | req["v"] = version 251 | data = bencode(req) 252 | self._transactions[t] = callback, node 253 | node.treq = time.time() 254 | node.t.add(t) 255 | 256 | self._sock.sendto(data, node.c) 257 | return t 258 | 259 | def send_krpc_reply(self, resp, connect_info): 260 | """ 261 | Bencode and send a reply to a KRPC client 262 | """ 263 | logger.info("REPLY: %r %r" % (connect_info, resp)) 264 | 265 | data = bencode(resp) 266 | self._sock.sendto(data,connect_info) 267 | 268 | def _synctrans(self, q, node): 269 | """ 270 | Perform a synchronous transaction. 271 | Used by the KRPC methods below 272 | """ 273 | # We fake a syncronous transaction by sending 274 | # the request, then waiting for the server thread 275 | # to post the results of our transaction into 276 | # the results dict. 277 | t = self.send_krpc(q, node) 278 | dt = 0 279 | while t not in self._results: 280 | time.sleep(0.1) 281 | dt+=0.1 282 | if dt > 5.0: 283 | raise KRPCTimeout 284 | 285 | # Retrieve the result 286 | r = self._results[t] 287 | del self._results[t] 288 | 289 | if r["y"]=="e": 290 | # Error condition! 291 | raise KRPCError, "Error %r while processing transaction %r" % (r,q) 292 | 293 | return r["r"] 294 | 295 | 296 | def ping(self, id_, node): 297 | q = { "y":"q", "q":"ping", "a":{"id":id_}} 298 | return self._synctrans(q, node) 299 | 300 | def find_node(self, id_, node, target): 301 | q = { "y":"q", "q":"find_node", "a":{"id":id_,"target":target}} 302 | return self._synctrans(q, node) 303 | 304 | def get_peers(self, id_, node, info_hash): 305 | q = { "y":"q", "q":"get_peers", "a":{"id":id_,"info_hash":info_hash}} 306 | return self._synctrans(q, node) 307 | 308 | def announce_peer(self, id_,node, info_hash, port, token): 309 | # We ignore "name" and "seed" for now as they are not part of BEP0005 310 | q = {'a': { 311 | #'name': '', 312 | 'info_hash': info_hash, 313 | 'id': id_, 314 | 'token': token, 315 | 'port': port}, 316 | 'q': 'announce_peer', 'y': 'q'} 317 | return self._synctrans(q, node) 318 | 319 | 320 | class NotFoundError(RuntimeError): 321 | pass 322 | 323 | class DHT(object): 324 | def __init__(self, port): 325 | self._server = KRPCServer(port) 326 | 327 | # This is our routing table. 328 | # We don't do any bucketing or anything like that, we just 329 | # keep track of all the nodes we know about. 330 | # This gives us significant memory overhead over a bucketed 331 | # implementation and ruins the logN scaling behaviour of the DHT. 332 | # We don't care ;) 333 | 334 | self._nodes = {} 335 | self._nodes_lock = threading.Lock() 336 | self._bad = set() 337 | 338 | # Thread details 339 | self._shutdown_flag = False 340 | self._thread = None 341 | 342 | # default handler 343 | self.handler = self.default_handler 344 | 345 | 346 | # Behaviour configuration 347 | # Am I actively seeking out other nodes? 348 | self.active_discovery = True 349 | # After how many seconds should i do another self-lookup? 350 | self.self_find_delay = 180.0 351 | # How many active node discovery attempts between self-lookups? 352 | self.active_discoveries = 10 353 | 354 | # Session key 355 | self._key = os.urandom(20) # 20 random bytes == 160 bits 356 | 357 | 358 | def start(self): 359 | """ 360 | Start the DHT node 361 | """ 362 | self._server.start() 363 | self._server.handler = self.handler 364 | 365 | # Add the default nodes 366 | DEFAULT_CONNECT_INFO = (socket.gethostbyaddr("router.bittorrent.com")[2][0], 6881) 367 | DEFAULT_NODE = Node(DEFAULT_CONNECT_INFO) 368 | DEFAULT_ID = self._server.ping(DEFAULT_NODE.useid,DEFAULT_NODE)['id'] 369 | with self._nodes_lock: 370 | self._nodes[DEFAULT_ID] = DEFAULT_NODE 371 | 372 | # Start our event thread 373 | self._thread = threading.Thread(target=self._pump) 374 | self._thread.daemon = True 375 | self._thread.start() 376 | 377 | 378 | def shutdown(self): 379 | self._server.shutdown() 380 | 381 | def __enter__(self): 382 | self.start() 383 | 384 | def __exit__(self, type, value, traceback): 385 | self.shutdown() 386 | 387 | def _pump(self): 388 | """ 389 | Thread that maintains DHT connectivity and does 390 | routing table housekeeping. 391 | Started by self.start() 392 | 393 | The very first thing this function does, is look up itself 394 | in the DHT. This connects it to neighbouring nodes and enables 395 | it to give reasonable answers to incoming queries. 396 | 397 | Afterward we look up random nodes to increase our connectedness 398 | and gather information about the DHT as a whole 399 | 400 | """ 401 | # Try to establish links to close nodes 402 | logger.info("Establishing connections to DHT") 403 | for node in self._nodes.values(): 404 | self.find_node(node.useid) 405 | 406 | delay = self.self_find_delay 407 | 408 | if self.active_discovery: 409 | delay /= (self.active_discoveries + 1) 410 | 411 | iteration = 0 412 | while True: 413 | try: 414 | time.sleep(delay) 415 | iteration += 1 416 | if self.active_discovery and iteration % (self.active_discoveries + 1) != 0: 417 | target = os.urandom(20) 418 | self.find_node(target) 419 | logger.info("Tracing done, routing table contains %d nodes", len(self._nodes)) 420 | else: 421 | # Regular maintenance: 422 | # Find N random nodes. Execute a find_node() on them. 423 | # toss them if they come up empty. 424 | n = random.sample(self._nodes.items(),10) 425 | for node_id, node in n: 426 | try: 427 | r = self._server.find_node(node.useid, node, node.useid) 428 | if "nodes" in r: 429 | self._process_incoming_nodes(r["nodes"]) 430 | except KRPCTimeout: 431 | # The node did not reply. 432 | # Blacklist it. 433 | with self._nodes_lock: 434 | self._bad.add(node.c) 435 | if node_id in self._nodes: 436 | del self._nodes[node_id] 437 | logger.info("Cleanup, routing table contains %d nodes", len(self._nodes)) 438 | except: 439 | # This loop should run forever. If we get into trouble, log 440 | # the exception and carry on. 441 | logger.critical("Exception in DHT maintenance thread:\n\n"+traceback.format_exc()) 442 | 443 | def _process_incoming_nodes(self,bnodes): 444 | 445 | # Add them to the routing table 446 | for node_id,node_c in decode_nodes(bnodes): 447 | if node_c not in self._bad: 448 | with self._nodes_lock: 449 | self._nodes[node_id] = Node(node_c) 450 | 451 | def get_close_nodes(self,target, N=3): 452 | """ 453 | Find the N nodes in the routing table closest to target 454 | 455 | We do this by brute force: we compute the distance of the 456 | target node to all the nodes in the routing table. 457 | A bucketing system would speed things up considerably, and 458 | require less memory. 459 | However, we like to keep as many nodes as possible in our routing 460 | table for research purposes. 461 | """ 462 | 463 | # If we have no known nodes, exception! 464 | if len(self._nodes) == 0: 465 | raise RuntimeError, "No nodes in routing table!" 466 | 467 | # Sort the entire routing table by distance to the target 468 | # and return the top N matches 469 | with self._nodes_lock: 470 | nodes = [(node_id,self._nodes[node_id]) for node_id in self._nodes] 471 | nodes.sort(key=lambda x:strxor(target,x[0])) 472 | return nodes[:N] 473 | 474 | 475 | def _recurse(self, target, function, max_attempts=4, result_key=None): 476 | """ 477 | Recursively query the DHT, following "nodes" replies 478 | until we hit the desired key 479 | 480 | This is the workhorse function used by all recursive queries. 481 | """ 482 | logger.debug("Recursing to target %r" % target.encode("hex")) 483 | attempts = 0 484 | while attempts < max_attempts: 485 | attempts += 1 486 | for id_, node in self.get_close_nodes(target): 487 | try: 488 | r = function(node.useid , node, target) 489 | logger.debug("Results from %r ", node.c)# d.encode("hex")) 490 | if result_key and result_key in r: 491 | return r[result_key] 492 | if "nodes" in r: 493 | self._process_incoming_nodes(r["nodes"]) 494 | except KRPCTimeout: 495 | # The node did not reply. 496 | # Blacklist it. 497 | with self._nodes_lock: 498 | self._bad.add(node) 499 | if id_ in self._nodes: 500 | del self._nodes[id_] 501 | except KRPCError: 502 | # Sometimes we just flake out due to UDP being unreliable 503 | # Don't sweat it, just log and carry on. 504 | logger.error("KRPC Error:\n\n"+traceback.format_exc()) 505 | 506 | 507 | if result_key: 508 | # We were expecting a result, but we did not find it! 509 | # Raise the NotFoundError exception instead of returning None 510 | raise NotFoundError, "No result!" 511 | 512 | def find_node(self, target, attempts = 10): 513 | """ 514 | Recursively call the find_node function to get as 515 | close as possible to the target node 516 | """ 517 | 518 | logger.debug("Tracing to %r" % target.encode("hex")) 519 | self._recurse(target,self._server.find_node, max_attempts=attempts) 520 | 521 | def get_peers(self,info_hash,attempts=10): 522 | """ 523 | Recursively call the get_peers function to fidn peers 524 | for the given info_hash 525 | """ 526 | logger.debug("Finding peers for %r" % info_hash.encode("hex")) 527 | return self._recurse(info_hash,self._server.get_peers, result_key="values",max_attempts=attempts) 528 | 529 | def default_handler(self,rec,c): 530 | """ 531 | Process incoming requests 532 | """ 533 | logger.info("REQUEST: %r %r" % (c, rec)) 534 | # Use the request to update teh routing table 535 | n_id = rec["a"]["id"] 536 | with self._nodes_lock: 537 | self._nodes[n_id] = Node(c) 538 | node = self._nodes[n_id] 539 | # Skeleton response 540 | resp = {"y":"r","t":rec["t"],"r":{"id":node.useid}, "v":version} 541 | if rec["q"] == "ping": 542 | self._server.send_krpc_reply(resp,c) 543 | elif rec["q"] == "find_node": 544 | target = rec["a"]["target"] 545 | resp["r"]["nodes"] = encode_nodes(self.get_close_nodes(target)) 546 | self._server.send_krpc_reply(resp,c) 547 | elif rec["q"] == "get_peers": 548 | # Provide a token so we can receive announces 549 | # The token is generated using HMAC and a secret 550 | # session key, so we don't have to remember it. 551 | # Token is based on nodes id, connection details 552 | # torrent infohash to avoid clashes in NAT scenarios. 553 | info_hash = rec["a"]["info_hash"] 554 | token = hmac.new(self._key,info_hash+n_id+str(c),hashlib.sha1).digest() 555 | resp["r"]["token"] = token 556 | # We dont actually keep any peer administration, so we 557 | # always send back the closest nodes 558 | resp["r"]["nodes"] = encode_nodes(self.get_close_nodes(info_hash)) 559 | self._server.send_krpc_reply(resp,c) 560 | elif rec["q"] == "announce_peer": 561 | # First things first, validate the token. 562 | info_hash = rec["a"]["info_hash"] 563 | token = hmac.new(self._key,info_hash+n_id+str(c),hashlib.sha1).digest() 564 | if token != rec["a"]["token"]: 565 | return # Ignore the request 566 | else: 567 | # We dont actually keep any peer administration, so we 568 | # just acknowledge. 569 | self._server.send_krpc_reply(resp,c) 570 | else: 571 | logger.error("Unknown request in query %r" % rec) 572 | 573 | if __name__ == "__main__": 574 | 575 | # Enable logging: 576 | # Tell the module's logger to log at level DEBUG 577 | logger.setLevel(logging.DEBUG) 578 | # Create a handler, tell it to log at level DEBUG on stdout 579 | handler = logging.StreamHandler() 580 | handler.setLevel(logging.DEBUG) 581 | # Add the handler 582 | logger.addHandler(handler) 583 | 584 | # Create a DHT node. 585 | dht1 = DHT(port=54767, id_=hashlib.sha1( 586 | "Change this to avoid getting ID clashes").digest()) 587 | # Start it! 588 | with dht1: 589 | # Look up peers that are sharing one of the Ubuntu 12.04 ISO torrents 590 | print dht1.get_peers("8ac3731ad4b039c05393b5404afa6e7397810b41".decode("hex")) 591 | # Go to sleep and let the DHT service requests. 592 | while True: 593 | time.sleep(1) 594 | 595 | 596 | --------------------------------------------------------------------------------