├── src ├── Platform │ ├── index.fio │ └── interface daemon.php ├── Classes │ ├── nodeExtract.php │ ├── node_holder.php │ ├── node.php │ └── bencoded.php ├── Tests │ └── dht.php ├── Client │ └── dht.class.php └── Server │ └── routing_table.php ├── README.md ├── LICENSE └── docs └── bep_0005.html /src/Platform/index.fio: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

hey

8 | 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PHP-K-DHT 2 | ================== 3 | 4 | A PHP implementation of the Bittorrent DHT protocol. 5 | 6 | There are 2 parts to this project 7 | 8 | * A client part which focuses on Querys too a selected DHT Node e.g router.bitttorrent.com:6881 9 | * A server part which focuses on being a DHT Node and taking requests. 10 | 11 | Also these 2 parts will be merged together to create a platform which will be used 12 | as an interface into the Bittorrent DHT. 13 | 14 | 15 | Please feel free to modify the code. 16 | 17 | Email me if you have any questions AT fio DOT rutschmann AT gmail DOT com 18 | 19 | ^^ bloody spiders crawling for email addresse's 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/Classes/nodeExtract.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/Classes/node_holder.php: -------------------------------------------------------------------------------- 1 | $nodes = array(); 11 | } 12 | 13 | //adds node if not allready on list; 14 | public function add_nodes($array_nodes) 15 | { 16 | foreach ($array_nodes as $j) 17 | { 18 | $found = 0; 19 | foreach($this->nodes as $i) 20 | { 21 | if($j->return_node_id() === $i['obj']->return_node_id()) 22 | { 23 | $found = 1; 24 | } 25 | } 26 | 27 | if (found == 0) 28 | { 29 | $something = array( 30 | 'obj' => $j, 31 | 'cnt' => 0 32 | ); 33 | 34 | array_push($this->nodes, $something); 35 | } 36 | } 37 | 38 | return; 39 | } 40 | 41 | //returns the next unqueried node 42 | //return FALSE if finished array 43 | public function get_next_node() 44 | { 45 | //7 so we get reference/pointer to the node 46 | foreach($this->nodes as &$i) 47 | { 48 | if ($i['cnt'] == 0) 49 | { 50 | $i['cnt'] = 1; 51 | return $i['obj']; 52 | } 53 | } 54 | 55 | return FALSE; 56 | } 57 | 58 | 59 | 60 | 61 | 62 | } 63 | 64 | 65 | 66 | 67 | ?> -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, fiorenzo-rutschmann 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /src/Platform/interface daemon.php: -------------------------------------------------------------------------------- 1 | " . $input . "\n"; 48 | 49 | } 50 | 51 | socket_write($client, $output); 52 | 53 | socket_close($client); 54 | } 55 | 56 | socket_close($sock); 57 | 58 | 59 | //HTTP HEADER 60 | /* $output = 'URL: http://ip-of-my-server:9000/ 61 | HTTP/1.1 200 OK 62 | Date: Tue, 10 Jul 2012 16:58:23 GMT 63 | Server: TestServer/1.0.0 (PHPServ) 64 | Last-Modified: Fri, 06 Jul 2012 14:29:58 GMT 65 | ETag: "13c008e-1b9-4c42a193de580" 66 | Accept-Ranges: bytes 67 | Content-Length: 441 68 | Vary: Accept-Encoding 69 | Content-Type: text/html 70 | 71 | '; */ 72 | ?> -------------------------------------------------------------------------------- /src/Classes/node.php: -------------------------------------------------------------------------------- 1 | compact = array(); 14 | $this->compact = unpack('C*', $compact); 15 | } 16 | else 17 | { 18 | throw new Exception("Input not Correct Format. \n"); 19 | } 20 | 21 | //initalise info_hash array 22 | $this->info_hash = Array(); 23 | } 24 | 25 | public function return_node_id() 26 | { 27 | return array_splice($this->compact,0,20); 28 | } 29 | 30 | public function return_ip() 31 | { 32 | $ip_string = sprintf("%d.%d.%d.%d", $this->compact[20], $this->compact[21], $this->compact[22], $this->compact[23]); 33 | return $ip_string; 34 | } 35 | 36 | public function return_port_string() 37 | { 38 | return sprintf("%d", $this->compact[24] << 8 | $this->compact[25]); 39 | } 40 | 41 | public function return_port() 42 | { 43 | return $this->compact[24] << 8 | $this->compact[25]; 44 | } 45 | 46 | public function return_compact_form() 47 | { 48 | return $this->$compact; 49 | } 50 | 51 | public function update_communication() 52 | { 53 | $this->last_communication = time(); 54 | $this->bad_counter = 0; 55 | } 56 | 57 | public function update_bad() 58 | { 59 | $this->bad_counter += 1; 60 | } 61 | 62 | public function get_info_hashes() 63 | { 64 | return $this->info_hash(); 65 | } 66 | 67 | public function add_info_hash($info_hash) 68 | { 69 | array_push($this->$info_hash, $info_hash); 70 | } 71 | } 72 | 73 | class node { 74 | private $compact; //4 x ip + 2 x port 75 | 76 | public function __construct($compact) 77 | { 78 | //make sure $compact is 6 bytes else throw exemption 79 | if (strlen($compact) == 6) 80 | { 81 | $this->compact = $compact; 82 | } 83 | else 84 | { 85 | throw new Exception("Input not Correct Format. \n"); 86 | } 87 | } 88 | 89 | public function return_ip() 90 | { 91 | return substr($this->compact,0, 4); 92 | } 93 | 94 | public function return_port() 95 | { 96 | return substr($this->compact,4, 2); 97 | } 98 | 99 | public function return_compact_form() 100 | { 101 | return $this->$compact; 102 | } 103 | } 104 | 105 | 106 | 107 | ?> -------------------------------------------------------------------------------- /src/Tests/dht.php: -------------------------------------------------------------------------------- 1 | $timeout, 'usec' => 0)); 12 | socket_connect($socket, $host, null); 13 | 14 | $ts = microtime(true); 15 | socket_send($socket, $package, strLen($package), 0); 16 | if (socket_read($socket, 255)) 17 | $result = microtime(true) - $ts; 18 | else $result = false; 19 | socket_close($socket); 20 | 21 | return $result; 22 | } 23 | 24 | //echo ping("google.com"); 25 | 26 | function dhtping() 27 | { 28 | $socket = socket_create(AF_INET, SOCK_DGRAM, SOL_UDP); 29 | //socket_bind($socket, '0.0.0.0', 6881); 30 | $packet = "d1:ad2:id20:abcdefghij0123456789e1:q4:ping1:t2:aa1:y1:qe"; 31 | //$packet = "{\"t\":\"aa\", \"y\":\"q\", \"q\":\"ping\", \"a\":{\"id\":\"abcdefghij0123456789\"}}"; 32 | $host = "router.bittorrent.com"; 33 | $port = 6881; 34 | socket_sendto($socket, $packet, strlen($packet), 0, $host, $port); 35 | 36 | try { 37 | socket_recvfrom($socket, $buf, 12000, 0, $host, $port); 38 | } 39 | catch (Exception $e) 40 | { 41 | echo ""; 42 | } 43 | 44 | 45 | socket_close($socket); 46 | echo $buf; 47 | } 48 | 49 | //dhtping(); 50 | 51 | function dhtlibping() 52 | { 53 | $lib = new phpdht(); 54 | 55 | //$lib->ping(); 56 | $lib->get_peers(); 57 | 58 | } 59 | 60 | 61 | //TESTING FOR USER XJIOP, get_peers() 62 | function XJIOP() 63 | { 64 | echo "PHP K DHT: \n"; 65 | echo "Running get_peers \n"; 66 | echo "valid info_hash = 2E3781F347760F304B278B22AE4ADF9320AACE5E \n"; 67 | //$info_hash = readline("Enter a valid info_hash:"); 68 | $info_hash = "C797C6D270002A2D507447EEF2FBC4D271309E8C"; 69 | 70 | // // "dht.transmissionbt.com" , 6881 71 | // "router.utorrent.com" , 6881 72 | $lib = new phpdht(); 73 | $peers = $lib->get_peers($info_hash, "124.0.1.1" , 41353 ); 74 | 75 | 76 | //ok heres the tricky part, get_peers in the specification returns either nodes or bittorrent peers. 77 | 78 | //differentiate between returned nodes or peers or FALSE 79 | if ($peers == FALSE) 80 | { 81 | echo "-------- FUNCTION RETURNED FALSE -------------- \n"; 82 | 83 | XJIOP(); 84 | return; 85 | } 86 | else if ( is_a($peers[1], 'DHT_node')) 87 | { 88 | echo "----------- DHT NODES -------------------------- \n"; 89 | 90 | foreach($peers as $i) 91 | { 92 | echo "DHT_id: " . vsprintf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",$i->return_node_id()) . " ip:" . $i->return_ip() . "\tport:" . $i->return_port_string() . "\n"; 93 | } 94 | 95 | //XJIOP(); 96 | return; 97 | } 98 | else if ( is_a($peers[1], 'node')) 99 | { 100 | echo "----------- Bittorrent peers ------------------ \n"; 101 | 102 | foreach($peers as $i) 103 | { 104 | echo "ip: $i->return_ip() port: $i->return_port() \n"; 105 | } 106 | } 107 | else 108 | { 109 | echo "Function returned something random, please place an issue with the project and copy in the below data; \n "; 110 | print_r($peers); 111 | 112 | return; 113 | } 114 | 115 | } 116 | 117 | function get_peers_recursive() 118 | { 119 | //turn off those damn socket warnings 120 | error_reporting(E_ERROR | E_PARSE); 121 | 122 | echo "Getting peers()"; 123 | 124 | $info_hash = '31FE2672E754DDD7AC57543219329A95E61E0F77'; //most popular torrent on tpb atm 125 | 126 | $lib = new phpdht(); 127 | 128 | $lib->get_peers_for_info_hash_blocking($info_hash); 129 | 130 | return; 131 | 132 | 133 | } 134 | 135 | function readline( $prompt = '' ) 136 | { 137 | echo $prompt; 138 | return rtrim( fgets( STDIN ), "\n" ); 139 | } 140 | 141 | function quick() 142 | { 143 | echo hex2bin("2E3781F347760F204B278B22AE4ADF9320AACE5E"); 144 | } 145 | 146 | get_peers_recursive(); 147 | //XJIOP(); 148 | //quick(); 149 | //just to take away the socket notice 150 | //error_reporting(E_ALL ^ E_WARNING); 151 | 152 | //dhtlibping(); 153 | ?> 154 | 155 | -------------------------------------------------------------------------------- /src/Classes/bencoded.php: -------------------------------------------------------------------------------- 1 | $value) 20 | { 21 | $ret .= strlen($key) . ":" . $key . strlen($value) . ":" . $value; 22 | } 23 | $ret .= "e"; 24 | //dictionary 2 finish 25 | 26 | foreach($type as $key => $value) 27 | { 28 | $ret .= strlen($key) . ":" . $key . strlen($value) . ":" . $value; 29 | } 30 | 31 | $ret .= "e"; 32 | //dictionary 1 finish 33 | 34 | return $ret; 35 | } 36 | 37 | 38 | /** 39 | * Decodes an bencoded string to data 40 | * 41 | * @param string String to decode 42 | * @return mixed Outputted data 43 | */ 44 | public static function decode($string=NULL, &$i=0) 45 | { 46 | if (is_string($string)) 47 | { 48 | $string = str_split($string); 49 | } 50 | 51 | switch ($string[$i]) 52 | { 53 | case 'd': 54 | 55 | $dict = array(); 56 | 57 | while (isset($string[++$i])) 58 | { 59 | if ($string[$i] == 'e') 60 | { 61 | return $dict; 62 | } 63 | else 64 | { 65 | $key = self::decode($string, $i); 66 | 67 | if (isset($string[++$i])) 68 | { 69 | $dict[$key] = self::decode($string, $i); 70 | } 71 | else 72 | { 73 | throw new Exception('Dictionary key ('.$key.') without a value at index '.$i); 74 | } 75 | } 76 | } 77 | 78 | throw new Exception('Unterminated dictionary at index '.$i); 79 | break; 80 | 81 | case 'l': 82 | 83 | $list = array(); 84 | 85 | while (isset($string[++$i])) 86 | { 87 | if ($string[$i] == 'e') 88 | { 89 | return $list; 90 | } 91 | else 92 | { 93 | $list[] = self::decode($string, $i); 94 | } 95 | } 96 | 97 | throw new Exception('Unterminated list at index '.$i); 98 | break; 99 | 100 | case 'i': 101 | 102 | $buffer = ''; 103 | 104 | while (isset($string[++$i])) 105 | { 106 | if ($string[$i] == 'e') 107 | { 108 | return intval($buffer); 109 | } 110 | elseif (ctype_digit($string[$i])) 111 | { 112 | $buffer .= $string[$i]; 113 | } 114 | else 115 | { 116 | throw new Exception('Unexpected token while parsing integer at index '.$i.': '.$string[$i]); 117 | } 118 | } 119 | 120 | throw new Exception("Unterminated integer at index $i"); 121 | break; 122 | 123 | case ctype_digit($string[$i]): 124 | 125 | $length = $string[$i]; 126 | 127 | while (isset($string[++$i])) 128 | { 129 | if ($string[$i] == ':') 130 | { 131 | break; 132 | } 133 | elseif (ctype_digit($string[$i])) 134 | { 135 | $length .= $string[$i]; 136 | } 137 | else 138 | { 139 | throw new Exception('Unexpected token while parsing string length at index '.$i.': '.$string[$i]); 140 | } 141 | } 142 | 143 | $end = $i + intval($length); 144 | 145 | $buffer = ''; 146 | 147 | while (isset($string[++$i])) 148 | { 149 | if ($i <= $end) 150 | { 151 | $buffer .= $string[$i]; 152 | if ($i == $end) 153 | { 154 | return $buffer; 155 | } 156 | } 157 | } 158 | throw new Exception('Unterminated string at index '.$i); 159 | } 160 | 161 | throw new Exception('Unexpected token at index '.$i.': '.$string[$i]); 162 | break; 163 | } 164 | 165 | private static function is_assoc($arr) 166 | { 167 | if ( ! is_array($arr)) 168 | { 169 | throw new InvalidArgumentException('The parameter must be an array.'); 170 | } 171 | 172 | $counter = 0; 173 | foreach ($arr as $key => $unused) 174 | { 175 | if ( ! is_int($key) or $key !== $counter++) 176 | { 177 | return true; 178 | } 179 | } 180 | return false; 181 | } 182 | 183 | } 184 | 185 | 186 | ?> -------------------------------------------------------------------------------- /src/Client/dht.class.php: -------------------------------------------------------------------------------- 1 | id = rand(3,1000); 16 | } 17 | 18 | public function ping() 19 | { 20 | //create socket 21 | $socket = socket_create(AF_INET, SOCK_DGRAM, SOL_UDP); 22 | //socket_bind($socket, '0.0.0.0', 6881); 23 | $packet = "d1:ad2:id20:abcdefghij0123456789e1:q4:ping1:t2:441:y1:qe"; 24 | //$packet = bencode::encode(array("id" => $this->get_unique_node_id()), array("q" => "ping", "t" => $this->unique_id(), "y" => "q")); 25 | 26 | echo "\n packet=" . $packet; 27 | 28 | $host = "router.bittorrent.com"; 29 | $port = 6881; 30 | socket_sendto($socket, $packet, strlen($packet), 0, $host, $port); 31 | 32 | socket_recvfrom($socket, $buf, 12000, 0, $host, $port); 33 | socket_close($socket); 34 | 35 | echo $buf; 36 | print_r( bencode::decode($buf)); 37 | } 38 | 39 | public function find_node() 40 | { 41 | 42 | } 43 | 44 | 45 | //blocking - returns array of dht nodes or peers 46 | private function get_peers_blocking($info_hash, $host = "router.bittorrent.com" , $port = 6881) 47 | { 48 | //create a UDP socket to send commands through 49 | $socket = socket_create(AF_INET, SOCK_DGRAM, SOL_UDP); 50 | 51 | //Create Command Packet 52 | $packet = bencode::encode(array("id" => $this->get_unique_node_id(), "info_hash" => hex2bin($info_hash)), array("q" => "get_peers", "t" => $this->unique_id(), "y" => "q" ) ); 53 | 54 | socket_sendto($socket, $packet, strlen($packet), 0, $host, $port); 55 | 56 | //set timeout 57 | $timeout = array('sec' => 5, 'usec' => 0); 58 | socket_set_option($socket, SOL_SOCKET, SO_RCVTIMEO, $timeout); 59 | 60 | $time = time(); 61 | //recieve data 62 | try { 63 | socket_recvfrom($socket, $buf, 12000, 0, $host, $port); 64 | } catch (Exception $e) { 65 | echo "Error"; 66 | return FALSE; 67 | } 68 | 69 | //have to manually do the timeout, cant seem to get info from this socket 70 | if ((time() - $time) >= 4) 71 | { 72 | socket_close($socket); 73 | return FALSE; 74 | } 75 | 76 | //close socket so bad shit don't happen 77 | socket_close($socket); 78 | 79 | return nodeExtract::return_nodes(bencode::decode($buf)); 80 | 81 | } 82 | 83 | //nonblocking returns socket 84 | private function get_peers_non_blocking($info_hash, $host = "router.bittorrent.com" , $port = 6881) 85 | { 86 | 87 | } 88 | 89 | public function get_peers_for_info_hash_blocking($info_hash) 90 | { 91 | //create starting output 92 | echo "\n"; 93 | echo "Collecting peers for " . $info_hash . "\n"; 94 | echo "\n"; 95 | 96 | //create class to hold nodes. 97 | $nodes_holder = new node_holder(); 98 | 99 | //seed this : TODO fix this mess 100 | $peers = $this->get_peers_blocking($info_hash, "dht.transmissionbt.com" , 6881 ); 101 | 102 | //differentiate between returned nodes or peers or FALSE 103 | if ($peers == FALSE) 104 | { 105 | echo "-------- FUNCTION RETURNED FALSE -------------- \n"; 106 | return; 107 | } 108 | else if ( is_a($peers[1], 'DHT_node')) 109 | { 110 | echo "----------- DHT NODES -------------------------- \n"; 111 | $nodes_holder->add_nodes($peers); 112 | } 113 | else if ( is_a($peers[1], 'node')) 114 | { 115 | echo "----------- Bittorrent peers ------------------ \n"; 116 | 117 | foreach($peers as $i) 118 | { 119 | echo "ip: $i->return_ip() port: $i->return_port() \n"; 120 | } 121 | } 122 | else 123 | { 124 | echo "Function returned something random, please place an issue with the project and copy in the below data; \n "; 125 | print_r($peers); 126 | return; 127 | } 128 | 129 | 130 | while(($DHT_node = $nodes_holder->get_next_node()) != FALSE ) 131 | { 132 | $peers = $this->get_peers_blocking($info_hash, $DHT_node->return_ip() , $DHT_node->return_port()); 133 | 134 | //differentiate between returned nodes or peers or FALSE 135 | if ($peers == FALSE) 136 | { 137 | echo "-------- FUNCTION RETURNED FALSE -------------- \n"; 138 | } 139 | else if ( is_a($peers[1], 'DHT_node')) 140 | { 141 | echo "----------- DHT NODES -------------------------- \n"; 142 | $nodes_holder->add_nodes($peers); 143 | } 144 | else if ( is_a($peers[1], 'node')) 145 | { 146 | echo "----------- Bittorrent peers ------------------ \n"; 147 | 148 | foreach($peers as $i) 149 | { 150 | echo "ip: $i->return_ip() port: $i->return_port() \n"; 151 | } 152 | } 153 | else 154 | { 155 | echo "Function returned something random, please place an issue with the project and copy in the below data; \n "; 156 | print_r($peers); 157 | return; 158 | } 159 | 160 | } 161 | 162 | 163 | } 164 | 165 | public function announce_peer() 166 | { 167 | 168 | } 169 | 170 | //private functions 171 | //unique_id returns a two byte code to repersent the query (base 36) 172 | 173 | public function unique_id() 174 | { 175 | //loop back around 176 | if ($this->id >= 1290) 177 | { 178 | $this->id = 0; 179 | } 180 | 181 | //convert to base 36 182 | $ret = base_convert($this->id,10,36); 183 | 184 | //pad to 2 characters 185 | $ret = str_pad($ret, 2, "0", STR_PAD_LEFT); 186 | 187 | //increment id silly 188 | $this->id = $this->id + 1; 189 | 190 | return $ret; 191 | } 192 | 193 | 194 | private function get_unique_node_id() 195 | { 196 | //hack, scramble $node_id 197 | 198 | for ($i = 0; $i < strlen($this->node_id); $i++) 199 | { 200 | $random = rand(0,strlen($this->node_id) -1); 201 | 202 | $temp = $this->node_id[$i]; 203 | $this->node_id[$i] = $this->node_id[$random]; 204 | $this->node_id[$random] = $temp; 205 | 206 | } 207 | 208 | //just in case i want to use as return function 209 | 210 | return $this->node_id; 211 | } 212 | 213 | } 214 | ?> -------------------------------------------------------------------------------- /src/Server/routing_table.php: -------------------------------------------------------------------------------- 1 | buckets as &$i ) 22 | { 23 | if ($i->in_keyspace($node->return_node_id())) 24 | { 25 | //check to see if bucket full 26 | if ($i->return_nodes_count() >= 8) 27 | { 28 | //ifso then splitbucket 29 | $this->split_bucket($i); 30 | //bloody recursion better no loop forever. 31 | return $this->add_node($compact); 32 | } 33 | else 34 | { 35 | // if not add node to bucket 36 | $error = $i->add_node($node); 37 | 38 | //check for error -> TODO implement code to handle error codes 1 to 4 39 | switch($error) 40 | { 41 | case 0: return true; break; 42 | default: return false; break; 43 | } 44 | } 45 | } 46 | 47 | } 48 | 49 | //hail mary 50 | return false; 51 | } 52 | 53 | //TODO: 54 | public function add_node($node_id, $ip, $port) 55 | { 56 | } 57 | 58 | //return node if found 59 | //return FALSE if not found 60 | public function find_node($node_id) 61 | { 62 | //& =we want reference so we can change values 63 | foreach( $this->buckets as &$i) 64 | { 65 | if ($i->in_keyspace($node_id)) 66 | { 67 | if ($i->in_bucket($node_id)) 68 | { 69 | //having trust in code not to put a if statement here, 70 | return $i->get_node($node_id); 71 | } 72 | } 73 | } 74 | 75 | return false; 76 | } 77 | 78 | public function get_eight_closest_nodes($node_id) 79 | { 80 | $ret = new array(); 81 | 82 | foreach($this->buckets as $key => $value) 83 | { 84 | if ($value->in_keyspace($node_id)) 85 | { 86 | 87 | } 88 | } 89 | 90 | 91 | } 92 | 93 | private function split_bucket(&$bucket) 94 | { 95 | $index = array_search($bucket, $this->buckets); 96 | 97 | if ($index == FALSE) 98 | { 99 | throw new Exception("\n Class: routing_table Function: split_bucket Cause: \$bucket not found."); 100 | return 0; 101 | } 102 | 103 | //split bucket 104 | $new_bucket = $bucket->split(); 105 | //add new bucket to the array 106 | array_splice($this->buckets, $index, 0, $new_bucket); 107 | } 108 | 109 | } 110 | 111 | class bucket 112 | { 113 | private $start; //keyspace 114 | private $finish; 115 | 116 | //upto 8 nodes 117 | private $elements; 118 | 119 | public function __construct($start,$finish) 120 | { 121 | $this->elements = new array(); 122 | $this->start = $start; 123 | $this->finish = $finish; 124 | } 125 | 126 | //return 0 for successful 127 | //return 1 for full ie 8 nodes 128 | //return 2 for element allready in bucket. 129 | //return 3 for when node_id is in the array 130 | //retunr -1 for unsuccessful - not implemented 131 | public function add_node($node) 132 | { 133 | //check for 8 nodes 134 | if (count($this->elements) >= 8) 135 | { 136 | return 1; 137 | } 138 | 139 | if (in_array($node, $this->elements )) 140 | { 141 | return 2; 142 | } 143 | 144 | foreach( $this->element as $i) 145 | { 146 | if ( $node->return_node_id() == $i->return_node_id()) 147 | { 148 | return 3; 149 | } 150 | } 151 | 152 | array_push($this->elements, $node); 153 | 154 | return 0; 155 | } 156 | 157 | public function in_keyspace($node_id) 158 | { 159 | if ( $node_id >= $this->start && $node_id <= $this->finish) 160 | { 161 | return true; 162 | } 163 | 164 | return false; 165 | } 166 | 167 | public function in_bucket($node_id) 168 | { 169 | foreach($elements as $i) 170 | { 171 | if ($i->return_node_id() == $node_id) 172 | { 173 | return true; 174 | } 175 | } 176 | 177 | return false; 178 | } 179 | 180 | public function return_nodes_count() 181 | { 182 | return count($this->elements); 183 | } 184 | 185 | public function get_node() 186 | { 187 | foreach($elements as $i) 188 | { 189 | if ($i->return_node_id() == $node_id) 190 | { 191 | return $i; 192 | } 193 | } 194 | 195 | return false; 196 | } 197 | 198 | public function return_nodes() 199 | { 200 | return $this->elements; 201 | } 202 | 203 | //-1 not found not removed 204 | // 0 A OK 205 | public function remove_node($node) 206 | { 207 | $found = array_search($node, $this->elements); 208 | 209 | if ($found == FALSE) 210 | { 211 | return -1; 212 | } 213 | 214 | unset($this->elements[$found]); 215 | } 216 | 217 | //this function will remove and return nodes not in keyspace of the bucket 218 | private function remove_nodes_not_in_range() 219 | { 220 | $ret = new array(); 221 | 222 | foreach($this->elements as $key => $i) 223 | { 224 | if ($i->get_node_id() > $this->finish()) 225 | { 226 | array_push($ret, $i); 227 | unset($this->elements[$key]); 228 | } 229 | } 230 | 231 | return $ret; 232 | } 233 | 234 | //this is for when the bucket gets too big 235 | // bucket == A OK 236 | //return null if bucket too small to be split 237 | public function split_bucket() 238 | { 239 | //error checking 240 | if ( ($this->start - $this->finish) < 16 ) //hopefully 16 is correct 241 | { 242 | return null; 243 | } 244 | 245 | $finish = $this->finish; 246 | $this->finish = ($this->start - $this->finish) / 2; 247 | 248 | $new_bucket = new bucket($this->finish + 1,$finish); 249 | 250 | //full bucket with nodes 251 | $elements = $this->remove_nodes_not_in_range(); 252 | foreach($elements as $i) 253 | { 254 | $new_bucket->add_node($i); 255 | } 256 | 257 | //cleanup 258 | unset($elements); 259 | 260 | //return the new bucket we created 261 | return $new_bucket; 262 | } 263 | 264 | public function __toString() 265 | { 266 | return "bucket { \n\tStart=" . $this->start . " \n\tFinish=" . $this->finish "\n\t nodes =" . print_r($this) . "\n};"; 267 | } 268 | } 269 | 270 | ?> -------------------------------------------------------------------------------- /docs/bep_0005.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 |
14 |
15 | 18 | 28 | 29 |
30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 |
BEP:5
Title:DHT Protocol
Version:11031
Last-Modified:2008-02-28 16:43:58 -0800 (Thu, 28 Feb 2008)
Author:Andrew Loewenstern <drue at bittorrent.com>
Status:Draft
Type:Standards Track
Created:31-Jan-2008
Post-History:
57 |
58 |
59 |

Contents

60 | 82 |
83 |

BitTorrent uses a "distributed sloppy hash table" (DHT) for storing 84 | peer contact information for "trackerless" torrents. In effect, each 85 | peer becomes a tracker. The protocol is based on Kademila [1] and is 86 | implemented over UDP.

87 |

Please note the terminology used in this document to avoid 88 | confusion. A "peer" is a client/server listening on a TCP port that 89 | implements the BitTorrent protocol. A "node" is a client/server 90 | listening on a UDP port implementing the distributed hash table 91 | protocol. The DHT is composed of nodes and stores the location of 92 | peers. BitTorrent clients include a DHT node, which is used to contact 93 | other nodes in the DHT to get the location of peers to download from 94 | using the BitTorrent protocol.

95 |
96 |

Overview

97 |

Each node has a globally unique identifier known as the "node ID." 98 | Node IDs are chosen at random from the same 160-bit space as 99 | BitTorrent infohashes [2]. A "distance metric" is used to 100 | compare two node IDs or a node ID and an infohash for "closeness." 101 | Nodes must maintain a routing table containing the contact information 102 | for a small number of other nodes. The routing table becomes more 103 | detailed as IDs get closer to the node's own ID. Nodes know about many 104 | other nodes in the DHT that have IDs that are "close" to their own but 105 | have only a handful of contacts with IDs that are very far away from 106 | their own.

107 |

In Kademlia, the distance metric is XOR and the result is interpreted 108 | as an unsigned integer. distance(A,B) = |A xor B| Smaller values 109 | are closer.

110 |

When a node wants to find peers for a torrent, it uses the distance 111 | metric to compare the infohash of the torrent with the IDs of the 112 | nodes in its own routing table. It then contacts the nodes it knows 113 | about with IDs closest to the infohash and asks them for the contact 114 | information of peers currently downloading the torrent. If a contacted 115 | node knows about peers for the torrent, the peer contact information 116 | is returned with the response. Otherwise, the contacted node must 117 | respond with the contact information of the nodes in its routing table 118 | that are closest to the infohash of the torrent. The original node 119 | iteratively queries nodes that are closer to the target infohash until 120 | it cannot find any closer nodes. After the search is exhausted, the 121 | client then inserts the peer contact information for itself onto the 122 | responding nodes with IDs closest to the infohash of the torrent.

123 |

The return value for a query for peers includes an opaque value known 124 | as the "token." For a node to announce that its controlling peer is 125 | downloading a torrent, it must present the token received from the 126 | same queried node in a recent query for peers. When a node attempts to 127 | "announce" a torrent, the queried node checks the token against the 128 | querying node's IP address. This is to prevent malicious hosts from 129 | signing up other hosts for torrents. Since the token is merely 130 | returned by the querying node to the same node it received the token 131 | from, the implementation is not defined. Tokens must be accepted for a 132 | reasonable amount of time after they have been distributed. The 133 | BitTorrent implementation uses the SHA1 hash of the IP address 134 | concatenated onto a secret that changes every five minutes and tokens 135 | up to ten minutes old are accepted.

136 |
137 |
138 |

Routing Table

139 |

Every node maintains a routing table of known good nodes. The nodes in 140 | the routing table are used as starting points for queries in the 141 | DHT. Nodes from the routing table are returned in response to queries 142 | from other nodes.

143 |

Not all nodes that we learn about are equal. Some are "good" and some 144 | are not. Many nodes using the DHT are able to send queries and receive 145 | responses, but are not able to respond to queries from other nodes. It 146 | is important that each node's routing table must contain only known 147 | good nodes. A good node is a node has responded to one of our queries 148 | within the last 15 minutes. A node is also good if it has ever 149 | responded to one of our queries and has sent us a query within the 150 | last 15 minutes. After 15 minutes of inactivity, a node becomes 151 | questionable. Nodes become bad when they fail to respond to multiple 152 | queries in a row. Nodes that we know are good are given priority over 153 | nodes with unknown status.

154 |

The routing table covers the entire node ID space from 0 to 155 | 2160. The routing table is subdivided into "buckets" that 156 | each cover a portion of the space. An empty table has one bucket with 157 | an ID space range of min=0, max=2160. When a node with ID 158 | "N" is inserted into the table, it is placed within the bucket that 159 | has min &lt;= N &lt; max. An empty table has only one bucket so any 160 | node must fit within it. Each bucket can only hold K nodes, currently 161 | eight, before becoming "full." When a bucket is full of known good 162 | nodes, no more nodes may be added unless our own node ID falls within 163 | the range of the bucket. In that case, the bucket is replaced by two 164 | new buckets each with half the range of the old bucket and the nodes 165 | from the old bucket are distributed among the two new ones. For a new 166 | table with only one bucket, the full bucket is always split into two 167 | new buckets covering the ranges 0..2159 and 168 | 2159..2160.

169 |

When the bucket is full of good nodes, the new node is simply 170 | discarded. If any nodes in the bucket are known to have become bad, 171 | then one is replaced by the new node. If there are any questionable 172 | nodes in the bucket have not been seen in the last 15 minutes, the 173 | least recently seen node is pinged. If the pinged node responds then 174 | the next least recently seen questionable node is pinged until one 175 | fails to respond or all of the nodes in the bucket are known to be 176 | good. If a node in the bucket fails to respond to a ping, it is 177 | suggested to try once more before discarding the node and replacing it 178 | with a new good node. In this way, the table fills with stable long 179 | running nodes.

180 |

Each bucket should maintain a "last changed" property to 181 | indicate how "fresh" the contents are. When a node in a bucket is 182 | pinged and it responds, or a node is added to a bucket, or a node in a 183 | bucket is replaced with another node, the bucket's last changed 184 | property should be updated. Buckets that have not been changed in 15 185 | minutes should be "refreshed." This is done by picking a random ID in 186 | the range of the bucket and performing a find_nodes search on it. Nodes 187 | that are able to receive queries from other nodes usually do not need 188 | to refresh buckets often. Nodes that are not able to receive queries 189 | from other nodes usually will need to refresh all buckets periodically 190 | to ensure there are good nodes in their table when the DHT is needed.

191 |

Upon inserting the first node into its routing table and when starting 192 | up thereafter, the node should attempt to find the closest nodes in 193 | the DHT to itself. It does this by issuing find_node messages to 194 | closer and closer nodes until it cannot find any closer. The routing 195 | table should be saved between invocations of the client software.

196 |
197 |
198 |

BitTorrent Protocol Extension

199 |

The BitTorrent protocol has been extended to exchange node UDP port 200 | numbers between peers that are introduced by a tracker. In this way, 201 | clients can get their routing tables seeded automatically through the 202 | download of regular torrents. Newly installed clients who attempt to 203 | download a trackerless torrent on the first try will not have any 204 | nodes in their routing table and will need the contacts included in 205 | the torrent file.

206 |

Peers supporting the DHT set the last bit of the 8-byte reserved flags 207 | exchanged in the BitTorrent protocol handshake. Peer receiving a 208 | handshake indicating the remote peer supports the DHT should send a 209 | PORT message. It begins with byte 0x09 and has a two byte payload 210 | containing the UDP port of the DHT node in network byte order. Peers 211 | that receive this message should attempt to ping the node on the 212 | received port and IP address of the remote peer. If a response to the 213 | ping is recieved, the node should attempt to insert the new contact 214 | information into their routing table according to the usual rules.

215 |
216 |
217 |

Torrent File Extensions

218 |

A trackerless torrent dictionary does not have an "announce" key. 219 | Instead, a trackerless torrent has a "nodes" key. This key should be 220 | set to the K closest nodes in the torrent generating client's routing 221 | table. Alternatively, the key could be set to a known good node such 222 | as one operated by the person generating the torrent. Please do not 223 | automatically add "router.bittorrent.com" to torrent files or 224 | automatically add this node to clients routing tables.

225 |
226 | nodes = [["<host>", <port>], ["<host>", <port>], ...]
227 | nodes = [["127.0.0.1", 6881], ["your.router.node", 4804]]
228 | 
229 |
230 |
231 |

KRPC Protocol

232 |

The KRPC protocol is a simple RPC mechanism consisting of bencoded 233 | dictionaries sent over UDP. A single query packet is sent out and a 234 | single packet is sent in response. There is no retry. There are three 235 | message types: query, response, and error. For the DHT protocol, there 236 | are four queries: ping, find_node, get_peers, and announce_peer.

237 |

A KRPC message is a single dictionary with two keys common to 238 | every message and additional keys depending on the type of message. 239 | Every message has a key "t" with a string value representing a transaction 240 | ID. This transaction ID is generated by the querying node and is echoed 241 | in the response, so responses may be correlated with multiple queries 242 | to the same node. The transaction ID should be encoded as a short string 243 | of binary numbers, typically 2 characters are enough as they cover 2^16 244 | outstanding queries. The other key contained in every KRPC message is "y" 245 | with a single character value describing the type of message. The value 246 | of the "y" key is one of "q" for query, "r" for response, or "e" for 247 | error.

248 |
249 |

Contact Encoding

250 |

Contact information for peers is encoded as a 6-byte string. Also 251 | known as "Compact IP-address/port info" the 4-byte IP address is in 252 | network byte order with the 2 byte port in network byte order 253 | concatenated onto the end.

254 |

Contact information for nodes is encoded as a 26-byte string. 255 | Also known as "Compact node info" the 20-byte Node ID in network byte 256 | order has the compact IP-address/port info concatenated to the end.

257 |
258 |
259 |

Queries

260 |

Queries, or KRPC message dictionaries with a "y" value of "q", 261 | contain two additional keys; "q" and "a". Key "q" has a string value 262 | containing the method name of the query. Key "a" has a dictionary value 263 | containing named arguments to the query.

264 |
265 |
266 |

Responses

267 |

Responses, or KRPC message dictionaries with a "y" value of "r", 268 | contain one additional key "r". The value of "r" is a dictionary 269 | containing named return values. Response messages are sent upon 270 | successful completion of a query.

271 |
272 |
273 |

Errors

274 |

Errors, or KRPC message dictionaries with a "y" value of "e", 275 | contain one additional key "e". The value of "e" is a list. The first 276 | element is an integer representing the error code. The second element 277 | is a string containing the error message. Errors are sent when a query 278 | cannot be fulfilled. The following table describes the possible error 279 | codes:

280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 298 | 299 | 300 | 301 | 302 | 303 |
CodeDescription
201Generic Error
202Server Error
203Protocol Error, such as a malformed 297 | packet, invalid arguments, or bad token
204Method Unknown
304 |

Example Error Packets:

305 |
306 | generic error = {"t":"aa", "y":"e", "e":[201, "A Generic Error Ocurred"]}
307 | bencoded = d1:eli201e23:A Generic Error Ocurrede1:t2:aa1:y1:ee
308 | 
309 |
310 |
311 |
312 |

DHT Queries

313 |

All queries have an "id" key and value containing the node ID of the 314 | querying node. All responses have an "id" key and value containing the 315 | node ID of the responding node.

316 |
317 |

ping

318 |

The most basic query is a ping. "q" = "ping" A ping query has a 319 | single argument, "id" the value is a 20-byte string containing the 320 | senders node ID in network byte order. The appropriate response to a 321 | ping has a single key "id" containing the node ID of the responding 322 | node.

323 |
324 | arguments:  {"id"&nbsp;: "<querying nodes id>"}
325 | 
326 | response: {"id"&nbsp;: "<queried nodes id>"}
327 | 
328 |

Example Packets

329 |
330 | ping Query = {"t":"aa", "y":"q", "q":"ping", "a":{"id":"abcdefghij0123456789"}}
331 | bencoded = d1:ad2:id20:abcdefghij0123456789e1:q4:ping1:t2:aa1:y1:qe
332 | 
333 |
334 | Response = {"t":"aa", "y":"r", "r": {"id":"mnopqrstuvwxyz123456"}}
335 | bencoded = d1:rd2:id20:mnopqrstuvwxyz123456e1:t2:aa1:y1:re
336 | 
337 |
338 |
339 |

find_node

340 |

Find node is used to find the contact information for a node given 341 | its ID. "q" == "find_node" A find_node query has two arguments, "id" 342 | containing the node ID of the querying node, and "target" containing 343 | the ID of the node sought by the queryer. When a node receives a 344 | find_node query, it should respond with a key "nodes" and value of a 345 | string containing the compact node info for the target node or the K 346 | (8) closest good nodes in its own routing table.

347 |
348 | arguments:  {"id"&nbsp;: "<querying nodes id>", "target"&nbsp;: "<id of target node>"}
349 | 
350 | response: {"id"&nbsp;: "<queried nodes id>", "nodes"&nbsp;: "<compact node info>"}
351 | 
352 |

Example Packets

353 |
354 | find_node Query = {"t":"aa", "y":"q", "q":"find_node", "a": {"id":"abcdefghij0123456789", "target":"mnopqrstuvwxyz123456"}}
355 | bencoded = d1:ad2:id20:abcdefghij01234567896:target20:mnopqrstuvwxyz123456e1:q9:find_node1:t2:aa1:y1:qe
356 | 
357 |
358 | Response = {"t":"aa", "y":"r", "r": {"id":"0123456789abcdefghij", "nodes": "def456..."}}
359 | bencoded = d1:rd2:id20:0123456789abcdefghij5:nodes9:def456...e1:t2:aa1:y1:re
360 | 
361 |
362 |
363 |

get_peers

364 |

Get peers associated with a torrent infohash. "q" = "get_peers" A 365 | get_peers query has two arguments, "id" containing the node ID of the 366 | querying node, and "info_hash" containing the infohash of the torrent. 367 | If the queried node has peers for the infohash, they are returned in a 368 | key "values" as a list of strings. Each string containing "compact" format 369 | peer information for a single peer. If the queried node has no 370 | peers for the infohash, a key "nodes" is returned containing the K 371 | nodes in the queried nodes routing table closest to the infohash 372 | supplied in the query. In either case a "token" key is also included in 373 | the return value. The token value is a required argument for a future 374 | announce_peer query. The token value should be a short binary string.

375 |
376 | arguments:  {"id"&nbsp;: "<querying nodes id>", "info_hash"&nbsp;: "<20-byte infohash of target torrent>"}
377 | 
378 | response: {"id"&nbsp;: "<queried nodes id>", "token"&nbsp;:"<opaque write token>", "values"&nbsp;: ["<peer 1 info string>", "<peer 2 info string>"]}
379 | 
380 | or: {"id"&nbsp;: "<queried nodes id>", "token"&nbsp;:"<opaque write token>", "nodes"&nbsp;: "<compact node info>"}
381 | 
382 |

Example Packets:

383 |
384 | get_peers Query = {"t":"aa", "y":"q", "q":"get_peers", "a": {"id":"abcdefghij0123456789", "info_hash":"mnopqrstuvwxyz123456"}}
385 | bencoded = d1:ad2:id20:abcdefghij01234567899:info_hash20:mnopqrstuvwxyz123456e1:q9:get_peers1:t2:aa1:y1:qe
386 | 
387 |
388 | Response with peers = {"t":"aa", "y":"r", "r": {"id":"abcdefghij0123456789", "token":"aoeusnth", "values": ["axje.u", "idhtnm"]}}
389 | bencoded = d1:rd2:id20:abcdefghij01234567895:token8:aoeusnth6:valuesl6:axje.u6:idhtnmee1:t2:aa1:y1:re
390 | 
391 |
392 | Response with closest nodes = {"t":"aa", "y":"r", "r": {"id":"abcdefghij0123456789", "token":"aoeusnth", "nodes": "def456..."}}
393 | bencoded = d1:rd2:id20:abcdefghij01234567895:nodes9:def456...5:token8:aoeusnthe1:t2:aa1:y1:re
394 | 
395 |
396 |
397 |

announce_peer

398 |

Announce that the peer, controlling the querying node, is downloading 399 | a torrent on a port. announce_peer has four arguments: "id" containing the node ID of the 400 | querying node, "info_hash" containing the infohash of the torrent, 401 | "port" containing the port as an integer, and the "token" received in 402 | response to a previous get_peers query. The queried node must verify 403 | that the token was previously sent to the same IP address as the 404 | querying node. Then the queried node should store the IP address of the 405 | querying node and the supplied port number under the infohash in its 406 | store of peer contact information.

407 |
408 | arguments:  {"id" : "<querying nodes id>", "info_hash" : "<20-byte infohash of target torrent>", "port" : <port number>, "token" : "<opaque token>"}
409 | 
410 | response: {"id" : "<queried nodes id>"}
411 | 
412 |

Example Packets:

413 |
414 | announce_peers Query = {"t":"aa", "y":"q", "q":"announce_peer", "a": {"id":"abcdefghij0123456789", "info_hash":"mnopqrstuvwxyz123456", "port": 6881, "token": "aoeusnth"}}
415 | bencoded = d1:ad2:id20:abcdefghij01234567899:info_hash20:<br />
416 | mnopqrstuvwxyz1234564:porti6881e5:token8:aoeusnthe1:q13:announce_peer1:t2:aa1:y1:qe
417 | 
418 |
419 | Response = {"t":"aa", "y":"r", "r": {"id":"mnopqrstuvwxyz123456"}}
420 | bencoded = d1:rd2:id20:mnopqrstuvwxyz123456e1:t2:aa1:y1:re
421 | 
422 |
423 |
424 |
425 |

References

426 | 427 | 428 | 429 | 430 | 431 |
[1]Peter Maymounkov, David Mazieres, "Kademlia: A Peer-to-peer Information System Based on the XOR Metric", IPTPS 2002. http://www.cs.rice.edu/Conferences/IPTPS02/109.pdf
432 | 433 | 434 | 435 | 436 | 437 |
[2]Use SHA1 and plenty of entropy to ensure a unique ID.
438 |
439 | 450 | 451 | 452 |
453 | 456 | 457 |
458 | 459 | 460 | --------------------------------------------------------------------------------