├── .gitignore ├── LICENSE.md ├── README.md ├── _config.yml ├── config.js ├── images └── Captur2.JPG ├── package-lock.json ├── package.json ├── resources ├── countPerMinute.txt ├── lastInfohashIdIPs.txt └── lastInfohashIdMetadata.txt └── src ├── crawlEvaluation.js ├── crawlInfohashes.js ├── indexInfohashesManual.js ├── indexMetadata.js ├── indexPeers.js ├── lib ├── Crawling │ ├── DHTCrawler.js │ └── RoutingTable.js ├── Database │ ├── Categoriser.js │ ├── Elasticsearch.js │ └── createDatabase.js ├── Services │ ├── MetadataResolver.js │ ├── MetadataResolverService.js │ ├── PeerDiscovery.js │ └── PeerDiscoveryService.js └── utils.js └── test ├── test.js ├── testDHTCrawler.js ├── testMetadataResolver.js ├── testPeerDiscovery.js └── testTorcache.js /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | [Ll]og/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | project.fragment.lock.json 46 | artifacts/ 47 | 48 | *_i.c 49 | *_p.c 50 | *_i.h 51 | *.ilk 52 | *.meta 53 | *.obj 54 | *.pch 55 | *.pdb 56 | *.pgc 57 | *.pgd 58 | *.rsp 59 | *.sbr 60 | *.tlb 61 | *.tli 62 | *.tlh 63 | *.tmp 64 | *.tmp_proj 65 | *.log 66 | *.vspscc 67 | *.vssscc 68 | .builds 69 | *.pidb 70 | *.svclog 71 | *.scc 72 | 73 | # Chutzpah Test files 74 | _Chutzpah* 75 | 76 | # Visual C++ cache files 77 | ipch/ 78 | *.aps 79 | *.ncb 80 | *.opendb 81 | *.opensdf 82 | *.sdf 83 | *.cachefile 84 | *.VC.db 85 | *.VC.VC.opendb 86 | 87 | # Visual Studio profiler 88 | *.psess 89 | *.vsp 90 | *.vspx 91 | *.sap 92 | 93 | # TFS 2012 Local Workspace 94 | $tf/ 95 | 96 | # Guidance Automation Toolkit 97 | *.gpState 98 | 99 | # ReSharper is a .NET coding add-in 100 | _ReSharper*/ 101 | *.[Rr]e[Ss]harper 102 | *.DotSettings.user 103 | 104 | # JustCode is a .NET coding add-in 105 | .JustCode 106 | 107 | # TeamCity is a build add-in 108 | _TeamCity* 109 | 110 | # DotCover is a Code Coverage Tool 111 | *.dotCover 112 | 113 | # NCrunch 114 | _NCrunch_* 115 | .*crunch*.local.xml 116 | nCrunchTemp_* 117 | 118 | # MightyMoose 119 | *.mm.* 120 | AutoTest.Net/ 121 | 122 | # Web workbench (sass) 123 | .sass-cache/ 124 | 125 | # Installshield output folder 126 | [Ee]xpress/ 127 | 128 | # DocProject is a documentation generator add-in 129 | DocProject/buildhelp/ 130 | DocProject/Help/*.HxT 131 | DocProject/Help/*.HxC 132 | DocProject/Help/*.hhc 133 | DocProject/Help/*.hhk 134 | DocProject/Help/*.hhp 135 | DocProject/Help/Html2 136 | DocProject/Help/html 137 | 138 | # Click-Once directory 139 | publish/ 140 | 141 | # Publish Web Output 142 | *.[Pp]ublish.xml 143 | *.azurePubxml 144 | # TODO: Comment the next line if you want to checkin your web deploy settings 145 | # but database connection strings (with potential passwords) will be unencrypted 146 | #*.pubxml 147 | *.publishproj 148 | 149 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 150 | # checkin your Azure Web App publish settings, but sensitive information contained 151 | # in these scripts will be unencrypted 152 | PublishScripts/ 153 | 154 | # NuGet Packages 155 | *.nupkg 156 | # The packages folder can be ignored because of Package Restore 157 | **/packages/* 158 | # except build/, which is used as an MSBuild target. 159 | !**/packages/build/ 160 | # Uncomment if necessary however generally it will be regenerated when needed 161 | #!**/packages/repositories.config 162 | # NuGet v3's project.json files produces more ignoreable files 163 | *.nuget.props 164 | *.nuget.targets 165 | 166 | # Microsoft Azure Build Output 167 | csx/ 168 | *.build.csdef 169 | 170 | # Microsoft Azure Emulator 171 | ecf/ 172 | rcf/ 173 | 174 | # Windows Store app package directories and files 175 | AppPackages/ 176 | BundleArtifacts/ 177 | Package.StoreAssociation.xml 178 | _pkginfo.txt 179 | 180 | # Visual Studio cache files 181 | # files ending in .cache can be ignored 182 | *.[Cc]ache 183 | # but keep track of directories ending in .cache 184 | !*.[Cc]ache/ 185 | 186 | # Others 187 | ClientBin/ 188 | ~$* 189 | *~ 190 | *.dbmdl 191 | *.dbproj.schemaview 192 | *.jfm 193 | *.pfx 194 | *.publishsettings 195 | node_modules/ 196 | orleans.codegen.cs 197 | 198 | # Since there are multiple workflows, uncomment next line to ignore bower_components 199 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 200 | #bower_components/ 201 | 202 | # RIA/Silverlight projects 203 | Generated_Code/ 204 | 205 | # Backup & report files from converting an old project file 206 | # to a newer Visual Studio version. Backup files are not needed, 207 | # because we have git ;-) 208 | _UpgradeReport_Files/ 209 | Backup*/ 210 | UpgradeLog*.XML 211 | UpgradeLog*.htm 212 | 213 | # SQL Server files 214 | *.mdf 215 | *.ldf 216 | 217 | # Business Intelligence projects 218 | *.rdl.data 219 | *.bim.layout 220 | *.bim_*.settings 221 | 222 | # Microsoft Fakes 223 | FakesAssemblies/ 224 | 225 | # GhostDoc plugin setting file 226 | *.GhostDoc.xml 227 | 228 | # Node.js Tools for Visual Studio 229 | .ntvs_analysis.dat 230 | 231 | # Visual Studio 6 build log 232 | *.plg 233 | 234 | # Visual Studio 6 workspace options file 235 | *.opt 236 | 237 | # Visual Studio LightSwitch build output 238 | **/*.HTMLClient/GeneratedArtifacts 239 | **/*.DesktopClient/GeneratedArtifacts 240 | **/*.DesktopClient/ModelManifest.xml 241 | **/*.Server/GeneratedArtifacts 242 | **/*.Server/ModelManifest.xml 243 | _Pvt_Extensions 244 | 245 | # Paket dependency manager 246 | .paket/paket.exe 247 | paket-files/ 248 | 249 | # FAKE - F# Make 250 | .fake/ 251 | 252 | # JetBrains Rider 253 | .idea/ 254 | *.sln.iml 255 | 256 | # CodeRush 257 | .cr/ 258 | 259 | # Python Tools for Visual Studio (PTVS) 260 | __pycache__/ 261 | *.pyc 262 | /DHTIndexer/src/test/test.js 263 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Simion Robert George 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BitInsight 2 | 3 | BitTorrent Nodejs indexer which leverages the DHT protocol for crawling infohashes, torrent metadata and IPs. 4 | 5 | The following solution presents an implementation which leverages the BitTorrent DHT Network in order to build a clean torent search engine. Moreover, it can be used to monitor pirates behaviour by tracking their torent download history in preparation for legal attacks. It also provides in-depth analytics and data statistics about the BitTorrent network. In fact, this issue becomes more significant as the copyright and legal pressure from authorities systematically determines the closure of torent hosting sites, futilely trying to stop the damage done to the creative industry. 6 | 7 | A tool was written to crawl the DHT Mainline for torents using both Sybil attack and BEP 51 protocol extension. Following, the network was used to collect addresses of downloaders. The gathered data was then stored and analysed throught Elasticsearch. On top of that, a web application was build to interface and query the related data. See [BitInsight-WebInterface](https://github.com/simionrobert/BitInsight-WebInterface.git), a statistical analyser and data visualisation tool. 8 | 9 | Over half a million torents have been captured in one hour and 61.315 IP addresses have been also tracked, downloading 7733 torents over 14 hours. The subsequent analyses showed that the largest IP clusters were mainly located in Europe and Asia. All these results imply that the adoption of DHT indexing over centralised tracking servers will have mixed implications. While it will likely make illicit torents harder to supress, it will not help users hide their actions. 10 | 11 | ## Prerequistes 12 | 13 | 1. Download and install [https://www.elastic.co/downloads/elasticsearch](https://www.elastic.co/downloads/elasticsearch). Run bin/elasticsearch (or bin\elasticsearch.bat on Windows) 14 | 15 | 2. (Optionally) Download and install [https://www.elastic.co/downloads/kibana](https://www.elastic.co/downloads/kibana). Run bin/kibana (or bin\kibana.bat on Windows) 16 | 17 | ## Install 18 | 19 | Get the latest snapshot: 20 | 21 | ``` 22 | git clone https://github.com/simionrobert/BitInsight.git 23 | 24 | npm install 25 | ``` 26 | 27 | ## Usage 28 | 29 | ``` 30 | # Create elasticsearch database index mappings 31 | node createESDatabase.js 32 | 33 | # Choice 1. Crawling infohashes 34 | node crawlInfohases.js 35 | 36 | # Choice 2. Download metadata for each infohash from the db (infohashes must be prior in db) 37 | node indexMetadata.js 38 | 39 | # Choice 3. Getting peers IP address for each infohash from the db (infohashes must be prior in db) 40 | 41 | #You need to access http://localhost:5601/app/kibana#/dev_tools/console?_g=() and run this command 42 | PUT /_ingest/pipeline/geoip?pretty 43 | { 44 | "description" : "Add geoip information to the given IP address", 45 | "processors" : [ 46 | { 47 | "geoip" : { 48 | "field" : "IP" 49 | } 50 | } 51 | ] 52 | } 53 | 54 | #Then start indexing IPs 55 | node indexPeers.js 56 | ``` 57 | 58 | Visit http://localhost:5601/app/kibana to view your data 59 | 60 | ## Modules Description 61 | 62 | This repo contains the following modules: 63 | 64 | 1. BitTorrent DHT Indexer 65 | 2. BitTorrent Peer Discovery 66 | 3. BitTorrent Torrent Metadata 67 | 68 | ### BitTorrent DHT Indexer 69 | 70 | This module crawls the DHT Network for infohashes. Implement several functionalities from [BEP5](http://www.bittorrent.org/beps/bep_0005.html) 71 | 72 | - it uses the Sybil attack (horrizontal and/or vertical) on other nodes's routing tables. 73 | - efficiently discovers infohashes on DHT 74 | - has a mechanism for avoiding 'bad/questionable node' tag from other nodes 75 | - complete implementation of the [BEP51](http://www.bittorrent.org/beps/bep_0051.html) in JavaScript 76 | - follows [the spec](http://www.bittorrent.org/beps/bep_0051.html) 77 | 78 | The idea of a Sybil attack is to inject multiple fake identities into the system, and use them as a starting point to perform further attacks. 79 | 80 | ### BitTorrent Peer Discovery 81 | 82 | This module uses bittorrent-dht, a Node.js implementation of BEP5, for discovering BitTorrent peers. 83 | 84 | - finds peers from DHT network based on an infohash 85 | - can start finding peers with just an infohash, before full metadata is available 86 | - automatically announces, so other peers can discover us 87 | 88 | ### BitTorrent Torrent Metadata 89 | 90 | This module uses bittorrent-protocol and ut_metadata for getting torrent metadata. 91 | 92 | - allows a client to join a swarm and complete a download without a .torrent file 93 | - uses Bittorrent Peer Discovery for finding peers 94 | - get torrent structure, file names and sizes 95 | 96 | ## Performance 97 | 98 | A crawling rate comparison was made between our algorithm, [AlphaReign](https://github.com/AlphaReign/scraper) and [simDHT](https://github.com/wuzhenda/simDHT). 99 | 100 | Also a paper has been published: `A BitTorrent DHT Crawler, IEEE 13th SACI, Simion Robert George`. 101 | 102 | ![](https://github.com/simionrobert/BitInsight/blob/master/images/Captur2.JPG) 103 | 104 | ## Further reading 105 | 106 | - [BitTorent DHT protocol](http://www.bittorrent.org/beps/bep_0005.html) 107 | - [BEP51-DHT Infohash Indexing](http://www.bittorrent.org/beps/bep_0051.html) 108 | - [Crawling BitTorrent DHTs for Fun and Profit](https://www.usenix.org/legacy/event/woot10/tech/full_papers/Wolchok.pdf) 109 | - [Real-world sybil attacks in BitTorrent mainline DHT](https://www.researchgate.net/profile/Liang_Wang84/publication/261046350_Real-world_sybil_attacks_in_BitTorrent_mainline_DHT/links/550808160cf27e990e08c7bb/Real-world-sybil-attacks-in-BitTorrent-mainline-DHT.pdf) 110 | - [Kademlia: A Peer-to-peer Information System Based on the XOR Metric](http://www.ic.unicamp.br/~bit/ensino/mo809_1s13/papers/P2P/Kademlia-%20A%20Peer-to-Peer%20Information%20System%20Based%20on%20the%20XOR%20Metric%20.pdf) 111 | 112 | ## Thank You 113 | 114 | I really appreciate all kinds of feedback and contributions. 115 | 116 | **Aditional notes:** 117 | If you have some issues referring parts of code in the master branch add them in the issues section. 118 | 119 | ## License 120 | 121 | This project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details 122 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /config.js: -------------------------------------------------------------------------------- 1 | const config = { 2 | 3 | DEFAULT_CRAWLER_OPTIONS: { 4 | address: '0.0.0.0', 5 | port: 6881, 6 | tableMaxSize: 256, 7 | dhtAnnouncingBootstrap: 1381, 8 | dhtAnnouncingTable: 1511, 9 | BEP51Mode: false, 10 | verticalAttackMode: false, 11 | verticalAttackNrNodes: 8, 12 | BOOTSTRAP_NODES: [ 13 | ['router.bittorrent.com', 6881], 14 | ['dht.transmissionbt.com', 6881] 15 | ] 16 | }, 17 | 18 | DEFAULT_PEER_DISCOVERY_OPTIONS: { 19 | port: 6880, 20 | timeout: 7 * 1000, //for rapid crawling put 2 21 | timeout_initial: 5 * 1000, 22 | dht: false 23 | }, 24 | 25 | DEFAULT_METADATA_FETCHER_OPTIONS: { 26 | timeout: 8 * 1000, 27 | socketTimeout: 5000, 28 | tracker: true, 29 | torcacheURL: "http://itorrents.org/torrent/" 30 | }, 31 | 32 | DEFAULT_ELASTIC_SEARCH_OPTIONS: { 33 | connection: { 34 | host: 'localhost:9200' 35 | }, 36 | batchSizeDHT: 5000, 37 | batchSizeTorrent: 10 38 | }, 39 | }; 40 | 41 | module.exports = config; -------------------------------------------------------------------------------- /images/Captur2.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simionrobert/BitInsight/1d1eae1234628ce1ab883562234cec4e2eccb669/images/Captur2.JPG -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dht-crawler", 3 | "version": "1.0.1", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "agentkeepalive": { 8 | "version": "3.5.2", 9 | "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-3.5.2.tgz", 10 | "integrity": "sha512-e0L/HNe6qkQ7H19kTlRRqUibEAwDK5AFk6y3PtMsuut2VAH6+Q4xZml1tNDJD7kSAyqmbG/K08K5WEJYtUrSlQ==", 11 | "requires": { 12 | "humanize-ms": "^1.2.1" 13 | } 14 | }, 15 | "ansi-regex": { 16 | "version": "2.1.1", 17 | "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-2.1.1.tgz", 18 | "integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8=" 19 | }, 20 | "ansi-styles": { 21 | "version": "2.2.1", 22 | "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz", 23 | "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=" 24 | }, 25 | "bencode": { 26 | "version": "2.0.1", 27 | "resolved": "https://registry.npmjs.org/bencode/-/bencode-2.0.1.tgz", 28 | "integrity": "sha512-2uhEl8FdjSBUyb69qDTgOEeeqDTa+n3yMQzLW0cOzNf1Ow5bwcg3idf+qsWisIKRH8Bk8oC7UXL8irRcPA8ZEQ==", 29 | "requires": { 30 | "safe-buffer": "^5.1.1" 31 | } 32 | }, 33 | "bitfield": { 34 | "version": "3.0.0", 35 | "resolved": "https://registry.npmjs.org/bitfield/-/bitfield-3.0.0.tgz", 36 | "integrity": "sha512-hJmWKucJQfdSkQPDPBKmWogM9s8+NOSzDT9QVbJbjinXaQ0bJKPu/cn98qRWy3PDNWtKw4XaoUP3XruGRIKEgg==" 37 | }, 38 | "bittorrent-dht": { 39 | "version": "9.0.3", 40 | "resolved": "https://registry.npmjs.org/bittorrent-dht/-/bittorrent-dht-9.0.3.tgz", 41 | "integrity": "sha512-6FISjApL62THEMyptDm0kPTAnInBn8Sft3dK/JZcCI07LRIpIP+3Z6gle6xJUhyRVs6K5HmXAtaRatFsOEySOg==", 42 | "requires": { 43 | "bencode": "^2.0.0", 44 | "debug": "^4.1.1", 45 | "inherits": "^2.0.1", 46 | "k-bucket": "^5.0.0", 47 | "k-rpc": "^5.0.0", 48 | "last-one-wins": "^1.0.4", 49 | "lru": "^3.1.0", 50 | "randombytes": "^2.0.5", 51 | "record-cache": "^1.0.2", 52 | "simple-sha1": "^3.0.0" 53 | } 54 | }, 55 | "bittorrent-protocol": { 56 | "version": "3.1.1", 57 | "resolved": "https://registry.npmjs.org/bittorrent-protocol/-/bittorrent-protocol-3.1.1.tgz", 58 | "integrity": "sha512-kthSXghQ9DRQ4Lrjr1ceyIeEMeL5x9WiaSrQyR+5Nrr3g9QY6MvDeq+KLQz17R6094iDmT/LgFbQYAPj09/oUA==", 59 | "requires": { 60 | "bencode": "^2.0.0", 61 | "bitfield": "^3.0.0", 62 | "debug": "^4.1.1", 63 | "randombytes": "^2.0.5", 64 | "readable-stream": "^3.0.0", 65 | "speedometer": "^1.0.0", 66 | "unordered-array-remove": "^1.0.2" 67 | } 68 | }, 69 | "blob-to-buffer": { 70 | "version": "1.2.8", 71 | "resolved": "https://registry.npmjs.org/blob-to-buffer/-/blob-to-buffer-1.2.8.tgz", 72 | "integrity": "sha512-re0AIxakF504MgeMtIyJkVcZ8T5aUxtp/QmTMlmjyb3P44E1BEv5x3LATBGApWAJATyXHtkXRD+gWTmeyYLiQA==" 73 | }, 74 | "chalk": { 75 | "version": "1.1.3", 76 | "resolved": "https://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz", 77 | "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=", 78 | "requires": { 79 | "ansi-styles": "^2.2.1", 80 | "escape-string-regexp": "^1.0.2", 81 | "has-ansi": "^2.0.0", 82 | "strip-ansi": "^3.0.0", 83 | "supports-color": "^2.0.0" 84 | } 85 | }, 86 | "chrome-dgram": { 87 | "version": "3.0.4", 88 | "resolved": "https://registry.npmjs.org/chrome-dgram/-/chrome-dgram-3.0.4.tgz", 89 | "integrity": "sha512-G8rOANSvSRC4hGny/K/ec1gXtNuZGzryFeoev49u0J4g/qws7H25vMKQlbD9izuedFVHwXFTdKQG62Tf/7Cmwg==", 90 | "requires": { 91 | "inherits": "^2.0.1", 92 | "run-series": "^1.1.2" 93 | } 94 | }, 95 | "chrome-dns": { 96 | "version": "1.0.1", 97 | "resolved": "https://registry.npmjs.org/chrome-dns/-/chrome-dns-1.0.1.tgz", 98 | "integrity": "sha512-HqsYJgIc8ljJJOqOzLphjAs79EUuWSX3nzZi2LNkzlw3GIzAeZbaSektC8iT/tKvLqZq8yl1GJu5o6doA4TRbg==", 99 | "requires": { 100 | "chrome-net": "^3.3.2" 101 | } 102 | }, 103 | "chrome-net": { 104 | "version": "3.3.3", 105 | "resolved": "https://registry.npmjs.org/chrome-net/-/chrome-net-3.3.3.tgz", 106 | "integrity": "sha512-11jL8+Ogna8M5TEdyalE8IG6cpaFEU3YcaxAj3YjZKjRM/PeT70pZbrUY+xoGwqiEJZwJE4Td2CvGxUvS9ytKQ==", 107 | "requires": { 108 | "inherits": "^2.0.1" 109 | } 110 | }, 111 | "debug": { 112 | "version": "4.1.1", 113 | "resolved": "https://registry.npmjs.org/debug/-/debug-4.1.1.tgz", 114 | "integrity": "sha512-pYAIzeRo8J6KPEaJ0VWOh5Pzkbw/RetuzehGM7QRRX5he4fPHx2rdKMB256ehJCkX+XRQm16eZLqLNS8RSZXZw==", 115 | "requires": { 116 | "ms": "^2.1.1" 117 | } 118 | }, 119 | "decompress-response": { 120 | "version": "4.2.1", 121 | "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-4.2.1.tgz", 122 | "integrity": "sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==", 123 | "requires": { 124 | "mimic-response": "^2.0.0" 125 | } 126 | }, 127 | "elasticsearch": { 128 | "version": "16.6.0", 129 | "resolved": "https://registry.npmjs.org/elasticsearch/-/elasticsearch-16.6.0.tgz", 130 | "integrity": "sha512-MhsdE2JaBJoV1EGzSkCqqhNGxafXJuhPr+eD3vbXmsk/QWhaiU12oyXF0VhjcL8+UlwTHv0CAUbyjtE1wqoIdw==", 131 | "requires": { 132 | "agentkeepalive": "^3.4.1", 133 | "chalk": "^1.0.0", 134 | "lodash": "^4.17.10" 135 | } 136 | }, 137 | "escape-string-regexp": { 138 | "version": "1.0.5", 139 | "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", 140 | "integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=" 141 | }, 142 | "get-stdin": { 143 | "version": "7.0.0", 144 | "resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-7.0.0.tgz", 145 | "integrity": "sha512-zRKcywvrXlXsA0v0i9Io4KDRaAw7+a1ZpjRwl9Wox8PFlVCCHra7E9c4kqXCoCM9nR5tBkaTTZRBoCm60bFqTQ==" 146 | }, 147 | "has-ansi": { 148 | "version": "2.0.0", 149 | "resolved": "https://registry.npmjs.org/has-ansi/-/has-ansi-2.0.0.tgz", 150 | "integrity": "sha1-NPUEnOHs3ysGSa8+8k5F7TVBbZE=", 151 | "requires": { 152 | "ansi-regex": "^2.0.0" 153 | } 154 | }, 155 | "humanize-ms": { 156 | "version": "1.2.1", 157 | "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", 158 | "integrity": "sha1-xG4xWaKT9riW2ikxbYtv6Lt5u+0=", 159 | "requires": { 160 | "ms": "^2.0.0" 161 | } 162 | }, 163 | "inherits": { 164 | "version": "2.0.4", 165 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 166 | "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" 167 | }, 168 | "k-bucket": { 169 | "version": "5.0.0", 170 | "resolved": "https://registry.npmjs.org/k-bucket/-/k-bucket-5.0.0.tgz", 171 | "integrity": "sha512-r/q+wV/Kde62/tk+rqyttEJn6h0jR7x+incdMVSYTqK73zVxVrzJa70kJL49cIKen8XjIgUZKSvk8ktnrQbK4w==", 172 | "requires": { 173 | "randombytes": "^2.0.3" 174 | } 175 | }, 176 | "k-rpc": { 177 | "version": "5.1.0", 178 | "resolved": "https://registry.npmjs.org/k-rpc/-/k-rpc-5.1.0.tgz", 179 | "integrity": "sha512-FGc+n70Hcjoa/X2JTwP+jMIOpBz+pkRffHnSl9yrYiwUxg3FIgD50+u1ePfJUOnRCnx6pbjmVk5aAeB1wIijuQ==", 180 | "requires": { 181 | "k-bucket": "^5.0.0", 182 | "k-rpc-socket": "^1.7.2", 183 | "randombytes": "^2.0.5" 184 | } 185 | }, 186 | "k-rpc-socket": { 187 | "version": "1.11.1", 188 | "resolved": "https://registry.npmjs.org/k-rpc-socket/-/k-rpc-socket-1.11.1.tgz", 189 | "integrity": "sha512-8xtA8oqbZ6v1Niryp2/g4GxW16EQh5MvrUylQoOG+zcrDff5CKttON2XUXvMwlIHq4/2zfPVFiinAccJ+WhxoA==", 190 | "requires": { 191 | "bencode": "^2.0.0", 192 | "chrome-dgram": "^3.0.2", 193 | "chrome-dns": "^1.0.0", 194 | "chrome-net": "^3.3.2" 195 | } 196 | }, 197 | "last-one-wins": { 198 | "version": "1.0.4", 199 | "resolved": "https://registry.npmjs.org/last-one-wins/-/last-one-wins-1.0.4.tgz", 200 | "integrity": "sha1-wb/Qy8tGeQ7JFWuNGu6Py4bNoio=" 201 | }, 202 | "lodash": { 203 | "version": "4.17.15", 204 | "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz", 205 | "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" 206 | }, 207 | "lru": { 208 | "version": "3.1.0", 209 | "resolved": "https://registry.npmjs.org/lru/-/lru-3.1.0.tgz", 210 | "integrity": "sha1-6n+4VG2DczOWoTCR12z+tMBoN9U=", 211 | "requires": { 212 | "inherits": "^2.0.1" 213 | } 214 | }, 215 | "magnet-uri": { 216 | "version": "5.2.4", 217 | "resolved": "https://registry.npmjs.org/magnet-uri/-/magnet-uri-5.2.4.tgz", 218 | "integrity": "sha512-VYaJMxhr8B9BrCiNINUsuhaEe40YnG+AQBwcqUKO66lSVaI9I3A1iH/6EmEwRI8OYUg5Gt+4lLE7achg676lrg==", 219 | "requires": { 220 | "thirty-two": "^1.0.1", 221 | "uniq": "^1.0.1" 222 | } 223 | }, 224 | "mimic-response": { 225 | "version": "2.0.0", 226 | "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-2.0.0.tgz", 227 | "integrity": "sha512-8ilDoEapqA4uQ3TwS0jakGONKXVJqpy+RpM+3b7pLdOjghCrEiGp9SRkFbUHAmZW9vdnrENWHjaweIoTIJExSQ==" 228 | }, 229 | "ms": { 230 | "version": "2.1.2", 231 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", 232 | "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" 233 | }, 234 | "once": { 235 | "version": "1.4.0", 236 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", 237 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", 238 | "requires": { 239 | "wrappy": "1" 240 | } 241 | }, 242 | "parse-torrent": { 243 | "version": "7.0.1", 244 | "resolved": "https://registry.npmjs.org/parse-torrent/-/parse-torrent-7.0.1.tgz", 245 | "integrity": "sha512-FdF1kBImRLt+ICV4NTz8L+sI2hFlPXAq1tXuw21gKz8EuThyVUFJ/wPfBEyYQrvnBpmGf7cM/LVSOhMRe8MrKw==", 246 | "requires": { 247 | "bencode": "^2.0.0", 248 | "blob-to-buffer": "^1.2.6", 249 | "get-stdin": "^7.0.0", 250 | "magnet-uri": "^5.1.3", 251 | "simple-get": "^3.0.1", 252 | "simple-sha1": "^3.0.0", 253 | "uniq": "^1.0.1" 254 | } 255 | }, 256 | "queue-microtask": { 257 | "version": "1.1.2", 258 | "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.1.2.tgz", 259 | "integrity": "sha512-F9wwNePtXrzZenAB3ax0Y8TSKGvuB7Qw16J30hspEUTbfUM+H827XyN3rlpwhVmtm5wuZtbKIHjOnwDn7MUxWQ==" 260 | }, 261 | "randombytes": { 262 | "version": "2.1.0", 263 | "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", 264 | "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", 265 | "requires": { 266 | "safe-buffer": "^5.1.0" 267 | } 268 | }, 269 | "readable-stream": { 270 | "version": "3.5.0", 271 | "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.5.0.tgz", 272 | "integrity": "sha512-gSz026xs2LfxBPudDuI41V1lka8cxg64E66SGe78zJlsUofOg/yqwezdIcdfwik6B4h8LFmWPA9ef9X3FiNFLA==", 273 | "requires": { 274 | "inherits": "^2.0.3", 275 | "string_decoder": "^1.1.1", 276 | "util-deprecate": "^1.0.1" 277 | } 278 | }, 279 | "record-cache": { 280 | "version": "1.1.0", 281 | "resolved": "https://registry.npmjs.org/record-cache/-/record-cache-1.1.0.tgz", 282 | "integrity": "sha512-u8rbtLEJV7HRacl/ZYwSBFD8NFyB3PfTTfGLP37IW3hftQCwu6z4Q2RLyxo1YJUNRTEzJfpLpGwVuEYdaIkG9Q==" 283 | }, 284 | "run-series": { 285 | "version": "1.1.8", 286 | "resolved": "https://registry.npmjs.org/run-series/-/run-series-1.1.8.tgz", 287 | "integrity": "sha512-+GztYEPRpIsQoCSraWHDBs9WVy4eVME16zhOtDB4H9J4xN0XRhknnmLOl+4gRgZtu8dpp9N/utSPjKH/xmDzXg==" 288 | }, 289 | "rusha": { 290 | "version": "0.8.13", 291 | "resolved": "https://registry.npmjs.org/rusha/-/rusha-0.8.13.tgz", 292 | "integrity": "sha1-mghOe4YLF7/zAVuSxnpqM2GRUTo=" 293 | }, 294 | "safe-buffer": { 295 | "version": "5.2.0", 296 | "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.0.tgz", 297 | "integrity": "sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg==" 298 | }, 299 | "simple-concat": { 300 | "version": "1.0.0", 301 | "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.0.tgz", 302 | "integrity": "sha1-c0TLuLbib7J9ZrL8hvn21Zl1IcY=" 303 | }, 304 | "simple-get": { 305 | "version": "3.1.0", 306 | "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-3.1.0.tgz", 307 | "integrity": "sha512-bCR6cP+aTdScaQCnQKbPKtJOKDp/hj9EDLJo3Nw4y1QksqaovlW/bnptB6/c1e+qmNIDHRK+oXFDdEqBT8WzUA==", 308 | "requires": { 309 | "decompress-response": "^4.2.0", 310 | "once": "^1.3.1", 311 | "simple-concat": "^1.0.0" 312 | } 313 | }, 314 | "simple-sha1": { 315 | "version": "3.0.1", 316 | "resolved": "https://registry.npmjs.org/simple-sha1/-/simple-sha1-3.0.1.tgz", 317 | "integrity": "sha512-q7ehqWfHc1VhOm7sW099YDZ4I0yYX7rqyhqqhHV1IYeUTjPOhHyD3mXvv8k2P+rO7+7c8R4/D+8ffzC9BE7Cqg==", 318 | "requires": { 319 | "queue-microtask": "^1.1.2", 320 | "rusha": "^0.8.1" 321 | } 322 | }, 323 | "speedometer": { 324 | "version": "1.1.0", 325 | "resolved": "https://registry.npmjs.org/speedometer/-/speedometer-1.1.0.tgz", 326 | "integrity": "sha512-z/wAiTESw2XVPssY2XRcme4niTc4S5FkkJ4gknudtVoc33Zil8TdTxHy5torRcgqMqksJV2Yz8HQcvtbsnw0mQ==" 327 | }, 328 | "string_decoder": { 329 | "version": "1.3.0", 330 | "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", 331 | "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", 332 | "requires": { 333 | "safe-buffer": "~5.2.0" 334 | } 335 | }, 336 | "strip-ansi": { 337 | "version": "3.0.1", 338 | "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", 339 | "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=", 340 | "requires": { 341 | "ansi-regex": "^2.0.0" 342 | } 343 | }, 344 | "supports-color": { 345 | "version": "2.0.0", 346 | "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", 347 | "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=" 348 | }, 349 | "thirty-two": { 350 | "version": "1.0.2", 351 | "resolved": "https://registry.npmjs.org/thirty-two/-/thirty-two-1.0.2.tgz", 352 | "integrity": "sha1-TKL//AKlEpDSdEueP1V2k8prYno=" 353 | }, 354 | "uniq": { 355 | "version": "1.0.1", 356 | "resolved": "https://registry.npmjs.org/uniq/-/uniq-1.0.1.tgz", 357 | "integrity": "sha1-sxxa6CVIRKOoKBVBzisEuGWnNP8=" 358 | }, 359 | "unordered-array-remove": { 360 | "version": "1.0.2", 361 | "resolved": "https://registry.npmjs.org/unordered-array-remove/-/unordered-array-remove-1.0.2.tgz", 362 | "integrity": "sha1-xUbo+I4xegzyZEyX7LV9umbSUO8=" 363 | }, 364 | "ut_metadata": { 365 | "version": "3.5.0", 366 | "resolved": "https://registry.npmjs.org/ut_metadata/-/ut_metadata-3.5.0.tgz", 367 | "integrity": "sha512-eqiRaDYiDl94uVB9oR8Yor+xl0rSKUovsqDxMt+hXzJt1yLYAo0HksVLlSiPPwkXBGFpERQADPanCi0EGhixnw==", 368 | "requires": { 369 | "bencode": "^2.0.0", 370 | "bitfield": "^3.0.0", 371 | "debug": "^4.0.0", 372 | "simple-sha1": "^3.0.0" 373 | } 374 | }, 375 | "util-deprecate": { 376 | "version": "1.0.2", 377 | "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", 378 | "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" 379 | }, 380 | "wrappy": { 381 | "version": "1.0.2", 382 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", 383 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" 384 | } 385 | } 386 | } 387 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dht-crawler", 3 | "version": "1.0.1", 4 | "description": "A Distributed Hash Table Crawler for Node.js and IP Monitor", 5 | "main": "app.js", 6 | "author": { 7 | "name": "Simion Robert George" 8 | }, 9 | "dependencies": { 10 | "bencode": "^2.0.1", 11 | "bittorrent-dht": "^9.0.3", 12 | "bittorrent-protocol": "^3.1.1", 13 | "elasticsearch": "^16.5.0", 14 | "lodash": "^4.17.15", 15 | "magnet-uri": "^5.2.4", 16 | "parse-torrent": "^7.0.1", 17 | "ut_metadata": "^3.5.0" 18 | } 19 | } -------------------------------------------------------------------------------- /resources/countPerMinute.txt: -------------------------------------------------------------------------------- 1 | 42 2 | 305 3 | 793 4 | 1644 5 | 2644 6 | 4175 7 | 6427 8 | 9106 9 | 12996 10 | 15142 11 | 19471 12 | 13393 13 | 17345 14 | 12554 15 | 15715 16 | 11621 17 | -------------------------------------------------------------------------------- /resources/lastInfohashIdIPs.txt: -------------------------------------------------------------------------------- 1 | 3753 -------------------------------------------------------------------------------- /resources/lastInfohashIdMetadata.txt: -------------------------------------------------------------------------------- 1 | 3258 -------------------------------------------------------------------------------- /src/crawlEvaluation.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const config = require('../config'); 4 | const DHTCrawler = require('./lib/Crawling/DHTCrawler'); 5 | const fs = require('fs'); 6 | 7 | 8 | var crawler = new DHTCrawler(config.DEFAULT_CRAWLER_OPTIONS); 9 | var count = 0; 10 | var id = 0; 11 | 12 | function startRegistering(file, endTime, periodTime) { 13 | function setTimeoutHours(timeout) { 14 | secTimeoutHours = setTimeout(function() { 15 | clearInterval(secRemaining) 16 | fs.appendFile(file, count + "\nTotal infohashes crawled in " + timeout / 60000 + " minutes: " + id, function(err) { 17 | if (err) { 18 | return console.log(err); 19 | } 20 | 21 | console.log("Count registered"); 22 | console.log("Crawling process done"); 23 | crawler.end(); 24 | process.exit(); 25 | }); 26 | }, timeout) 27 | } 28 | 29 | //initialise variabiles for statistics 30 | var secTimeoutHours = 0; 31 | var secRemaining = 0; 32 | 33 | // Clean file 34 | fs.writeFile(file, "", function(err) { 35 | if (err) { 36 | return console.log(err); 37 | } 38 | 39 | console.log("File cleaned"); 40 | }); 41 | 42 | //set crawler for 1 hour 43 | setTimeoutHours(endTime) 44 | 45 | // Set interval to write 46 | secRemaining = setInterval(function() { 47 | fs.appendFile(file, count + "\n", function(err) { 48 | if (err) { 49 | return console.log(err); 50 | } 51 | 52 | console.log("Count registered"); 53 | }); 54 | 55 | count = 0; 56 | }, periodTime) 57 | } 58 | 59 | crawler.on('infohash', function(listInfohash, rinfo) { 60 | count++; 61 | id++; 62 | }); 63 | 64 | 65 | crawler.start(); 66 | startRegistering("resources/countPerMinute.txt", 60 * 60 * 1000, 60 * 1000); //1h and each minute -------------------------------------------------------------------------------- /src/crawlInfohashes.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | process.env.UV_THREADPOOL_SIZE = 64 3 | 4 | const { spawn } = require('child_process'); 5 | const config = require('../config'); 6 | const ElasticSearch = require('./lib/Database/Elasticsearch'); 7 | const DHTCrawler = require('./lib/Crawling/DHTCrawler'); 8 | const fs = require('fs'); 9 | 10 | 11 | var indexer = new ElasticSearch(config.DEFAULT_ELASTIC_SEARCH_OPTIONS); 12 | var crawler = new DHTCrawler(config.DEFAULT_CRAWLER_OPTIONS); 13 | var id = 0; 14 | var count = 0; 15 | 16 | crawler.on('listInfohash', function(listInfohash, rinfo) { 17 | 18 | setImmediate((listInfohash, rinfo) => { 19 | for (let i = 0; i < listInfohash.length; i++) { 20 | console.log((id++) + ". magnet:?xt=urn:btih:%s from %s:%s", listInfohash[i].toString("hex"), rinfo.address, rinfo.port); 21 | indexer.indexInfohash(listInfohash[i]); 22 | count++; 23 | } 24 | }, listInfohash, rinfo); 25 | }); 26 | 27 | crawler.on('infohash', function(infohash, rinfo) { 28 | 29 | setImmediate((infohash, rinfo) => { 30 | console.log((id++) + ". magnet:?xt=urn:btih:%s from %s:%s", infohash.toString("hex"), rinfo.address, rinfo.port); 31 | indexer.indexInfohash(infohash); 32 | count++; 33 | }, infohash, rinfo); 34 | }); 35 | 36 | function startRegistering(file, endTime, periodTime) { 37 | function setTimeoutHours(timeout) { 38 | secTimeoutHours = setTimeout(function() { 39 | clearInterval(secRemaining) 40 | fs.appendFile(file, count + "\nTotal infohashes crawled in " + timeout / 60000 + " minutes: " + id, function(err) { 41 | if (err) { 42 | return console.log(err); 43 | } 44 | 45 | console.log("Count registered"); 46 | console.log("Crawling process done"); 47 | crawler.end(); 48 | process.exit(); 49 | }); 50 | }, timeout) 51 | } 52 | 53 | //initialise variabiles for statistics 54 | var secTimeoutHours = 0; 55 | var secRemaining = 0; 56 | 57 | // Clean file 58 | fs.writeFile(file, "", function(err) { 59 | if (err) { 60 | return console.log(err); 61 | } 62 | 63 | console.log("File cleaned"); 64 | }); 65 | 66 | //set crawler for 1 hour 67 | setTimeoutHours(endTime) 68 | 69 | // Set interval to write 70 | secRemaining = setInterval(function() { 71 | fs.appendFile(file, count + "\n", function(err) { 72 | if (err) { 73 | return console.log(err); 74 | } 75 | 76 | console.log("Count registered"); 77 | }); 78 | 79 | count = 0; 80 | }, periodTime) 81 | } 82 | 83 | 84 | 85 | indexer.ready(function() { 86 | crawler.start(); 87 | startRegistering("resources/countPerMinute.txt", 60 * 60 * 1000, 60 * 1000); //1h and each minute 88 | }); -------------------------------------------------------------------------------- /src/indexInfohashesManual.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const config = require('../config'); 4 | const ElasticSearch = require('./lib/Database/Elasticsearch'); 5 | const DHTCrawler = require('./lib/Crawling/DHTCrawler'); 6 | const MetadataService = require('./lib/Services/MetadataResolverService'); 7 | const PeerDiscoveryService = require('./lib/Services/PeerDiscoveryService'); 8 | const utils = require('./lib/utils'); 9 | 10 | config.DEFAULT_ELASTIC_SEARCH_OPTIONS.batchSizeDHT = 1; 11 | config.DEFAULT_ELASTIC_SEARCH_OPTIONS.batchSizeTorrent = 1; 12 | 13 | var indexer = new ElasticSearch(config.DEFAULT_ELASTIC_SEARCH_OPTIONS); 14 | var metadataService = new MetadataService(config); 15 | var peerDiscoveryService = new PeerDiscoveryService(config); 16 | 17 | 18 | metadataService.on('metadata', function (torrent) { 19 | console.log("Infohash: " + torrent.infohash.toString('hex')) 20 | console.log('Torrent sent to batch: ' + torrent.name); 21 | 22 | setImmediate((metadata) => { 23 | 24 | //Second, index torrent 25 | indexer.indexTorrent(metadata, null) 26 | }, torrent); 27 | 28 | console.log('/////////////////////////////////////////////////////'); 29 | }); 30 | 31 | metadataService.on('metadataTimeout', function (infohash) { 32 | console.log("Infohash: " + infohash.toString('hex')); 33 | console.log('No metadata available'); 34 | console.log('/////////////////////////////////////////////////////'); 35 | }) 36 | 37 | metadataService.on('cacheEmpty', function () { 38 | console.log("Metadata indexing Done") 39 | }) 40 | 41 | 42 | peerDiscoveryService.on('ip', function (torrent) { 43 | console.log("Infohash: " + torrent.infohash.toString('hex')); 44 | console.log('List ip sent to batch ' + torrent.listIP.length); 45 | 46 | setImmediate((torrent) => { 47 | 48 | //Third, index IPs 49 | indexer.indexIP(torrent, null) 50 | }, torrent); 51 | }); 52 | 53 | peerDiscoveryService.on('cacheEmpty', function () { 54 | console.log("IP indexing Done") 55 | }) 56 | 57 | 58 | indexer.ready(function () { 59 | var listInfohashes = ["f3077eaaa6cb8f420f97a4553905b3cac444d998", "a45776cef4455136f4782e331a87fee5cfbff599"]; 60 | 61 | // First index infohash 62 | listInfohashes.forEach(function (element, index, array) { 63 | indexer.indexInfohash(Buffer.from(element, "hex")); 64 | }); 65 | 66 | 67 | // Metadata Service uses PeerDiscovery Services, which uses port 6801. DO NOT call peerDiscoveryService and metadataService at the same time 68 | var x = 1 69 | if (x == 0) { 70 | peerDiscoveryService.addToCache(listInfohashes); 71 | peerDiscoveryService.startService() 72 | } else { 73 | metadataService.addToCache(listInfohashes); 74 | metadataService.startService() 75 | } 76 | }); 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/indexMetadata.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | process.env.UV_THREADPOOL_SIZE = 64 4 | 5 | const fs = require('fs'); 6 | const config = require('../config'); 7 | const ElasticSearch = require('./lib/Database/Elasticsearch'); 8 | const MetadataService = require('./lib/Services/MetadataResolverService'); 9 | 10 | var indexer = new ElasticSearch(config.DEFAULT_ELASTIC_SEARCH_OPTIONS); 11 | var metadataService = new MetadataService(config); 12 | 13 | var lastInfohashIdMetadata = parseInt(fs.readFileSync('resources/lastInfohashIdMetadata.txt'), 10); 14 | if (isNaN(lastInfohashIdMetadata)) { 15 | lastInfohashIdMetadata = 0; 16 | } 17 | 18 | 19 | function saveMetaIDCallback() { 20 | //periodically save to keep log of where i remained and to continue from 21 | fs.writeFile('resources/lastInfohashIdMetadata.txt', lastInfohashIdMetadata, function() { 22 | console.log('File updated') 23 | }); 24 | } 25 | 26 | metadataService.on('metadata', function(torrent) { 27 | lastInfohashIdMetadata++; 28 | 29 | console.log('\n' + lastInfohashIdMetadata + ". Infohash: " + torrent.infohash.toString('hex')) 30 | console.log('Torrent sent to batch: ' + torrent.name); 31 | 32 | setImmediate((metadata) => { 33 | indexer.indexTorrent(metadata, saveMetaIDCallback) 34 | }, torrent); 35 | 36 | console.log('/////////////////////////////////////////////////////'); 37 | }); 38 | 39 | metadataService.on('metadataTimeout', function(infohash) { 40 | lastInfohashIdMetadata++; 41 | 42 | console.log('\n' + lastInfohashIdMetadata + ". Infohash: " + infohash.toString('hex')); 43 | console.log('No metadata available'); 44 | console.log('/////////////////////////////////////////////////////'); 45 | }) 46 | 47 | metadataService.on('cacheEmpty', function() { 48 | 49 | console.log('Cache Empty'); 50 | 51 | indexer.getLastInfohashes(lastInfohashIdMetadata, lastInfohashIdMetadata + 9, function(listInfohashes) { 52 | if (listInfohashes.length != 0) { 53 | metadataService.addToCache(listInfohashes); 54 | metadataService.startService() 55 | } 56 | }) 57 | }) 58 | 59 | indexer.ready(function() { 60 | metadataService.startService() 61 | }) -------------------------------------------------------------------------------- /src/indexPeers.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | process.env.UV_THREADPOOL_SIZE = 64 4 | 5 | const fs = require('fs'); 6 | const config = require('../config'); 7 | const ElasticSearch = require('./lib/Database/Elasticsearch'); 8 | const PeerDiscoveryService = require('./lib/Services/PeerDiscoveryService'); 9 | 10 | var indexer = new ElasticSearch(config.DEFAULT_ELASTIC_SEARCH_OPTIONS); 11 | var peerDiscoveryService = new PeerDiscoveryService(config); 12 | 13 | var lastInfohashIdIPs = parseInt(fs.readFileSync('resources/lastInfohashIdIPs.txt'), 10); 14 | 15 | if (isNaN(lastInfohashIdIPs)) { 16 | lastInfohashIdIPs = 0; 17 | } 18 | 19 | 20 | function saveInfohashIDCallback() { 21 | //periodically save to keep log of where i remained and to continue from 22 | fs.writeFile('resources/lastInfohashIdIPs.txt', lastInfohashIdIPs, function() { 23 | console.log("File updated") 24 | }); 25 | } 26 | peerDiscoveryService.on('ip', function(torrent) { 27 | lastInfohashIdIPs++; 28 | 29 | console.log('\n' + lastInfohashIdIPs + ". Infohash: " + torrent.infohash.toString('hex')); 30 | console.log('List ip sent to batch ' + torrent.listIP.length); 31 | 32 | 33 | 34 | setImmediate((torrent) => { 35 | indexer.indexIP(torrent, saveInfohashIDCallback) 36 | }, torrent); 37 | }); 38 | 39 | peerDiscoveryService.on('cacheEmpty', function() { 40 | indexer.getLastInfohashes(lastInfohashIdIPs, lastInfohashIdIPs + 9, function(listInfohashes) { 41 | if (listInfohashes.length != 0) { 42 | peerDiscoveryService.addToCache(listInfohashes); 43 | peerDiscoveryService.startService() 44 | } 45 | }) 46 | }) 47 | 48 | indexer.ready(function() { 49 | peerDiscoveryService.startService() 50 | }) -------------------------------------------------------------------------------- /src/lib/Crawling/DHTCrawler.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var EventEmitter = require('events') 4 | var dgram = require('dgram'); 5 | var bencode = require('bencode'); 6 | var utils = require('../utils'); 7 | var RoutingTable = require('./RoutingTable'); 8 | 9 | //TODO: Implement BitTorrent Protocol Extension http://www.bittorrent.org/beps/bep_0005.html Tracker peers exhange DHT data 10 | //TODO: Make vertical attack only on interesting targets 11 | class DHTCrawler extends EventEmitter { 12 | 13 | constructor(options) { 14 | super(); 15 | 16 | this.address = options.address || '0.0.0.0'; 17 | this.port = options.port || 6881; 18 | this.dhtAnnouncingBootstrap = options.dhtAnnouncingBootstrap || 1000; 19 | this.dhtAnnouncingTable = options.dhtAnnouncingTable || 1000; 20 | this.BOOTSTRAP_NODES = options.BOOTSTRAP_NODES; 21 | this.verticalAttackMode = options.verticalAttackMode || false; 22 | this.verticalAttackNrNodes = options.verticalAttackNrNodes || 8; 23 | this.BEP51Mode = options.BEP51Mode || false; 24 | 25 | this.socket = dgram.createSocket('udp4'); 26 | this.routingTable = new RoutingTable(options.tableMaxSize || 128); 27 | } 28 | 29 | 30 | start() { 31 | this.socket.bind(this.port, this.address); 32 | 33 | this.socket.on('listening', function () { 34 | console.log('UDP Server listening on %s:%s', this.address, this.port); 35 | }.bind(this)); 36 | 37 | this.socket.on('message', function (msg, rinfo) { 38 | this.onMessage(msg, rinfo); 39 | }.bind(this)); 40 | 41 | this.socket.on('error', function (err) { 42 | console.error("UDP error: %s", err); 43 | }); 44 | 45 | setInterval(function () { 46 | this.contactBootstrapNodes(); 47 | }.bind(this), this.dhtAnnouncingBootstrap); 48 | 49 | setInterval(function () { 50 | this.horrizontalAttack(); 51 | 52 | if (this.verticalAttackMode == true) //TODO: Heuristically decide if vertical attack is feazible 53 | this.verticalAttack(); 54 | 55 | if (this.BEP51Mode == true) 56 | this.indexDHT(); 57 | 58 | this.routingTable.nodes = []; 59 | }.bind(this), this.dhtAnnouncingTable); 60 | } 61 | 62 | end() { 63 | this.socket.close(); 64 | } 65 | 66 | contactBootstrapNodes() { 67 | this.BOOTSTRAP_NODES.forEach(function (node) { 68 | this.sendFindNodeRequest({ address: node[0], port: node[1] }, this.routingTable.nid); 69 | }.bind(this)); 70 | } 71 | 72 | /* 73 | * Send KRPC find_node query to all nodes in the kbucket. The queryingNodeId is 74 | * calculated by using some high bytes of peer node's id, which makes our id close 75 | * to the peer node XOR wise. 76 | * After broadcasting, nodes in the kbucket are useless for crawler's sake. Just 77 | * empty the kbucket for future peer nodes. 78 | */ 79 | horrizontalAttack() { 80 | 81 | // generateNeighborID(nid, this.routingTable.nid) to have greater chance that others store my id in their routing table (close to him) 82 | // this.routingTable.nid to have same id, if i send to him my id. Random or this? 83 | this.routingTable.nodes.forEach(function (node) { 84 | this.sendFindNodeRequest({ 85 | address: node.address, 86 | port: node.port 87 | }, utils.generateNeighborID(node.nid, this.routingTable.nid)) 88 | }.bind(this)); 89 | } 90 | 91 | verticalAttack() { 92 | this.routingTable.nodes.forEach(function (node) { 93 | for (var i = 0; i < this.verticalAttackNrNodes; i++) { 94 | utils.generateRandomIDAsync(node, null, function (node, x, randomID) { 95 | 96 | // We limit the number of outgoing UDP requests to 1000 packages per second. 97 | setTimeout(function () { 98 | this.sendFindNodeRequest({ 99 | address: node.address, 100 | port: node.port 101 | }, utils.generateNeighborID(node.nid, randomID)); 102 | }.bind(this), 0) 103 | 104 | }.bind(this)); 105 | } 106 | }.bind(this)); 107 | } 108 | 109 | indexDHT() { 110 | this.routingTable.nodes.forEach(function (node) { 111 | this.sendSampleInfohashesRequest({ 112 | address: node.address, 113 | port: node.port 114 | }, node.nid); 115 | }.bind(this)); 116 | } 117 | 118 | 119 | /* 120 | * The KRPC protocol is a simple RPC mechanism consisting of bencoded dictionaries sent 121 | * over UDP. A single query packet is sent out and a single packet is sent in response. 122 | * There is no retry. 123 | */ 124 | sendKRPC(msg, rinfo) { 125 | var buf = bencode.encode(msg); 126 | this.socket.send(buf, 0, buf.length, rinfo.port, rinfo.address); 127 | } 128 | 129 | /* 130 | * The KRPC find_node query lets other DHT peer nodes know us. Before having any peer 131 | * nodes, bootstrap nodes are used to query selfId. Then nodes from find_node responses 132 | * can be used to fill up the kbucket. Once there are some nodes in the kbucket, further 133 | * find_node queries can be made by using peer nodes' neighbor id. 134 | */ 135 | sendFindNodeRequest(rinfo, personalID) { 136 | utils.generateRandomIDAsync(rinfo, personalID, function (rinfo, personalID, targetID) { 137 | var msg = { 138 | t: targetID.slice(0, 4), 139 | y: 'q', 140 | q: 'find_node', 141 | a: { 142 | id: personalID, 143 | target: targetID 144 | } 145 | }; 146 | this.sendKRPC(msg, rinfo); 147 | 148 | }.bind(this)); 149 | } 150 | 151 | sendSampleInfohashesRequest(rinfo, nid) { 152 | utils.generateRandomIDAsync(rinfo, nid, function (rinfo, nid, targetID) { 153 | var msg = { 154 | t: targetID.slice(0, 4), 155 | y: 'q', 156 | q: 'sample_infohashes', 157 | a: { 158 | id: this.routingTable.nid, 159 | target: targetID //TODO:random or static->speed 160 | } 161 | }; 162 | 163 | this.sendKRPC(msg, rinfo); 164 | 165 | }.bind(this)); 166 | } 167 | 168 | 169 | 170 | ////////////////////////////////////////////////Messages got//////////////////////////////////// 171 | onMessage(data, rinfo) { 172 | try { 173 | var msg = bencode.decode(data); 174 | 175 | if (msg.y == 'r' && msg.r.samples !== undefined) { 176 | 177 | // BEP51 message received 178 | if (msg.r.nodes) { 179 | var listInfohash = []; 180 | for (let i = 0; i < msg.r.samples.length; i += 20) { 181 | 182 | let infohash = []; 183 | for (let j = i; j < i + 20; j++) { 184 | infohash[j % 20] = msg.r.samples[j]; 185 | } 186 | infohash = Buffer.from(infohash); 187 | 188 | listInfohash.push(infohash); 189 | } 190 | this.emit('listInfohash', listInfohash, rinfo); 191 | this.onFindNodeResponse(msg.r.nodes); 192 | } 193 | } 194 | else if (msg.y == 'r' && msg.r.nodes) { 195 | 196 | // List of nodes got 197 | this.onFindNodeResponse(msg.r.nodes); 198 | } 199 | else if (msg.y == 'q' && msg.q == 'ping') { 200 | 201 | // horrizontal attack: not to be deleted from nodes tables or Bootstrap node's protection 202 | this.onPingRequest(msg, rinfo); 203 | } 204 | else if (msg.y == 'q' && msg.q == 'find_node') { 205 | 206 | // horrizontal attack: not to be deleted from nodes tables or Bootstrap node's protection 207 | this.onFindNodeRequest(msg, rinfo); 208 | } 209 | else if (msg.y == 'q' && msg.q == 'get_peers') { 210 | 211 | // passively observe get_peers querries 212 | // infohash catched 213 | this.onGetPeersRequest(msg, rinfo); 214 | } 215 | else if (msg.y == 'q' && msg.q == 'announce_peer') { 216 | 217 | // infohash catched 218 | this.onAnnouncePeerRequest(msg, rinfo); 219 | } 220 | } 221 | catch (err) { 222 | 223 | } 224 | } 225 | 226 | onFindNodeResponse(data) { 227 | var nodes = utils.decodeNodes(data); 228 | nodes.forEach(function (node) { 229 | if (node.address != this.address && node.nid != this.routingTable.nid 230 | && node.port < 65536 && node.port > 0) { 231 | this.routingTable.push(node); 232 | } 233 | }.bind(this)); 234 | } 235 | 236 | onPingRequest(msg, rinfo) { 237 | //TODO: Verify if it's a bootstrap node. If it is, send only my id 238 | //QUestion: Does bootstrap verify its nodes? Maybe 239 | 240 | var tid = msg.t; 241 | var nid = msg.a.id; 242 | 243 | if (tid === undefined || nid.length != 20) { 244 | throw new Error("Invalid Ping RPC received"); 245 | } 246 | 247 | this.sendKRPC({ 248 | t: tid, 249 | y: 'r', 250 | r: { 251 | id: utils.generateNeighborID(nid, this.routingTable.nid) 252 | } 253 | }, rinfo); 254 | } 255 | 256 | 257 | onFindNodeRequest(msg, rinfo) { 258 | var tid = msg.t; 259 | var nid = msg.a.id; 260 | 261 | if (tid === undefined || nid.length != 20) { 262 | throw new Error("Invalid FindNode RPC received"); 263 | } 264 | 265 | this.sendKRPC({ 266 | t: tid, 267 | y: 'r', 268 | r: { 269 | id: utils.generateNeighborID(nid, this.routingTable.nid), 270 | nodes: utils.encodeNodes(this.routingTable.pop8()) //Previous: this.routingTable.nid 271 | } 272 | }, rinfo); 273 | } 274 | 275 | /* 276 | * The infohash in the message is not guaranteed to be legit. 277 | * First 2 bytes of infohash are used to be the token. If the peer querying for 278 | * the infohash finds it in the future, it is supposed to send announce_peer to 279 | * us with the token, which can be used to verify the announce_peer packet. 280 | */ 281 | onGetPeersRequest(msg, rinfo) { 282 | var infohash = msg.a.info_hash; 283 | var tid = msg.t; 284 | var nid = msg.a.id; 285 | var token = infohash.slice(0, 2); 286 | 287 | if (tid === undefined || infohash.length != 20 || nid.length != 20) { 288 | throw new Error("Invalid GetPeers RPC received"); 289 | } 290 | 291 | this.sendKRPC({ 292 | t: tid, 293 | y: 'r', 294 | r: { 295 | id: utils.generateNeighborID(infohash, this.routingTable.nid), 296 | nodes: utils.encodeNodes(this.routingTable.pop8()), //Previous:'' 297 | token: token 298 | } 299 | }, rinfo); 300 | 301 | //this.emit('infohash', infohash, rinfo); 302 | } 303 | 304 | onAnnouncePeerRequest(msg, rinfo) { 305 | var port; 306 | var infohash = msg.a.info_hash; 307 | var token = msg.a.token; 308 | var nid = msg.a.id; 309 | var tid = msg.t; 310 | 311 | if (tid == undefined) { 312 | throw new Error("Invalid AnnouncePeer RPC received"); 313 | } 314 | 315 | if (infohash.slice(0, 2).toString() != token.toString()) { 316 | return; 317 | } 318 | 319 | 320 | /* There is an optional argument called implied_port which value is either 0 or 1. 321 | * If it is present and non-zero, the port argument should be ignored and the source 322 | * port of the UDP packet should be used as the peer's port instead. 323 | */ 324 | if (msg.a.implied_port != undefined && msg.a.implied_port != 0) { 325 | port = rinfo.port; 326 | } 327 | else { 328 | port = msg.a.port || 0; 329 | } 330 | 331 | if (port >= 65536 || port <= 0) { 332 | return; 333 | } 334 | 335 | this.sendKRPC({ 336 | t: tid, 337 | y: 'r', 338 | r: { 339 | id: utils.generateNeighborID(nid, this.routingTable.nid) 340 | } 341 | }, rinfo); 342 | 343 | 344 | this.emit('infohash', infohash, rinfo); 345 | } 346 | } 347 | 348 | module.exports = DHTCrawler; 349 | 350 | -------------------------------------------------------------------------------- /src/lib/Crawling/RoutingTable.js: -------------------------------------------------------------------------------- 1 | var utils = require('../utils'); 2 | 3 | class RoutingTable { 4 | constructor(maxsize) { 5 | this.nid = utils.generateRandomIDSync(); 6 | //this.nid = new Uint8Array([61, 67, 77, 134, 255, 61, 143, 59, 44, 118, 178, 114, 123, 212, 166, 73, 131, 27, 72, 240]); 7 | this.nodes = []; 8 | this.maxsize = maxsize; 9 | } 10 | 11 | push(node) { 12 | if (this.nodes.length < this.maxsize) { 13 | this.nodes.push(node); 14 | } 15 | } 16 | 17 | pop8() { 18 | if (this.nodes.length >= 8) { 19 | return this.nodes.slice(0, 8) 20 | } else if (this.nodes.length > 0) { 21 | return new Array(8).join().split(',').map(() => this.nodes[0]) 22 | } 23 | return [] 24 | } 25 | 26 | 27 | } 28 | 29 | module.exports = RoutingTable; -------------------------------------------------------------------------------- /src/lib/Database/Categoriser.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const _ = require('lodash'); 4 | 5 | class Categorizer { 6 | 7 | /* 8 | *Type is determined solely by extension 9 | * Type is determined by name 10 | */ 11 | constructor(opts, metadataFlag, ipFlag) { 12 | this.videoFormats = ['.wma', '.3g2', '.3gp', '.amv', '.asf', '.avi', '.drc', '.f4a', '.f4b', '.f4p', '.f4v', '.flv', '.gif', '.gifv', '.m2v', '.m4p', '.m4v', '.mkv', '.mng', '.mov', '.mp2', '.mp4', '.mpe', '.mpeg', '.mpg', '.mpv', '.mxf', '.net', '.nsv', '.ogv', '.qt', '.rm', '.rmvb', '.roq', '.svi', '.vob', '.webm', '.wmv', '.yuv']; 13 | this.audioFormats = ['.aa', '.aac', '.aax', '.act', '.aiff', '.amr', '.ape', '.au', '.awb', '.dct', '.dss', '.dvf', '.flac', '.gsm', '.iklax', '.ivs', '.m4a', '.m4b', '.mmf', '.mp3', '.mpc', '.msv', '.ogg', '.opus', '.ra', '.raw', '.sln', '.tta', '.vox', '.wav', '.wma', '.wv']; 14 | this.documentFormats = ['.pptx', '.ppt', '.csv', '.txt', '.doc', '.docx', '.pdf', '.cbr', '.cbz', '.cb7', '.cbt', '.cba', 'djvu', '.epub', '.fb2', '.ibook', '.azw', '.lit', '.prc', '.mobi', '.pdb', '.pdb', '.oxps', '.xps', '.xls', '.xlsx']; 15 | this.mediaFormat = ['.bin', '.iso', '.dmg', '.cue', '.mdf', '.zip', '.rar', '.7z', '.tar.gz', '.deb']; 16 | this.photoFormat = ['.jpg','.png','.gif','.bmp','.psd','.tif','.tiff','.svg'] 17 | } 18 | 19 | parse(torrent) { 20 | let newTorrent = { 21 | ...torrent, 22 | categories: [], 23 | type: '' 24 | }; 25 | 26 | // Categorise by torrent Name 27 | this.getVideoCategories(newTorrent, newTorrent); 28 | this.getMediaCategories(newTorrent, newTorrent); 29 | 30 | // Categorise by file with maximum size 31 | const file = _.maxBy(torrent.files, 'size') 32 | this.getCategories(file, newTorrent); 33 | 34 | newTorrent.categories = _.uniq(newTorrent.categories); 35 | delete newTorrent.count; 36 | 37 | return newTorrent; 38 | } 39 | 40 | getCategories(file, torrent) { 41 | let newTorrent = torrent; 42 | const ext = `.${file.name.split('.')[file.name.split('.').length - 1]}`; 43 | 44 | if (this.videoFormats.indexOf(ext) > -1) { 45 | if (torrent.type === '') 46 | torrent.type = 'Video'; 47 | 48 | newTorrent = this.getVideoCategories(file, newTorrent); 49 | } else if (this.audioFormats.indexOf(ext) > -1) { 50 | if (torrent.type === '') 51 | torrent.type = 'Audio'; 52 | 53 | } else if (this.documentFormats.indexOf(ext) > -1) { 54 | if (torrent.type === '') 55 | torrent.type = 'Doc'; 56 | 57 | newTorrent = this.getDocCategories(file, newTorrent); 58 | } else if (this.mediaFormat.indexOf(ext) > -1) { 59 | if (torrent.type === '') 60 | torrent.type = 'Media'; 61 | 62 | newTorrent = this.getMediaCategories(file, newTorrent); 63 | } else if (this.photoFormat.indexOf(ext) > -1) { 64 | if (torrent.type === '') 65 | torrent.type = 'Picture'; 66 | } else { 67 | 68 | //default categorise by name if the extension is not recognised 69 | this.getVideoCategories(file, newTorrent); 70 | if (torrent.categories.length != 0) { 71 | torrent.type = 'Video'; 72 | return newTorrent; 73 | } 74 | 75 | this.getMediaCategories(file, newTorrent); 76 | if (torrent.categories.length != 0) { 77 | torrent.type = 'Media'; 78 | return newTorrent; 79 | } 80 | this.getDocCategories(file, newTorrent); 81 | if (torrent.categories.length != 0) { 82 | torrent.type = 'Doc'; 83 | return newTorrent; 84 | } 85 | 86 | torrent.type = 'Other'; 87 | } 88 | 89 | 90 | return newTorrent; 91 | } 92 | 93 | getVideoCategories (file, torrent) { 94 | if (file.name.toLowerCase().match(/season|episode|s[0-9]{2}e[0-9]{2}/i)) { 95 | torrent.categories.push('TVshow'); 96 | 97 | } else if (file.name.match(/[0-9]+x[0-9]+/i)) { 98 | torrent.categories.push('TVshow'); 99 | 100 | } else if (torrent.type == 'Video') { 101 | 102 | //default for video 103 | torrent.categories.push('Movie'); 104 | } 105 | 106 | if (file.name.toLowerCase().indexOf('1080') > -1) { 107 | torrent.categories.push('1080'); 108 | } 109 | if (file.name.toLowerCase().indexOf('720') > -1) { 110 | torrent.categories.push('720'); 111 | } 112 | if (file.name.toLowerCase().indexOf('hd') > -1) { 113 | torrent.categories.push('HD'); 114 | } 115 | if (file.name.toLowerCase().indexOf('sd') > -1) { 116 | torrent.categories.push('SD'); 117 | } 118 | if (file.name.toLowerCase().indexOf('bdrip') > -1) { 119 | torrent.categories.push('BDRIP'); 120 | } 121 | if (file.name.toLowerCase().indexOf('dvdrip') > -1) { 122 | torrent.categories.push('DVDRIP'); 123 | } 124 | if (file.name.toLowerCase().indexOf('xxx') > -1) { 125 | torrent.categories.push('XXX'); 126 | } else if (file.name.toLowerCase().indexOf('porn') > -1) { 127 | torrent.categories.push('XXX'); 128 | } else if (file.name.toLowerCase().indexOf('fuck') > -1) { 129 | torrent.categories.push('XXX'); 130 | } else if (file.name.toLowerCase().indexOf('sex') > -1) { 131 | torrent.categories.push('XXX'); 132 | } else if (file.name.toLowerCase().indexOf('censored') > -1) { 133 | torrent.categories.push('XXX'); 134 | } 135 | } 136 | 137 | getDocCategories (file, torrent) { 138 | if (file.name.indexOf('.epub') != -1) { 139 | torrent.categories.push('Ebook'); 140 | } 141 | if (file.name.indexOf('.mobi') != -1) { 142 | torrent.categories.push('Ebook'); 143 | } 144 | if (file.name.indexOf('.azw3') != -1) { 145 | torrent.categories.push('Ebook'); 146 | } 147 | } 148 | 149 | getMediaCategories(file, torrent) { 150 | if (file.name.toLowerCase().indexOf('codex') > -1) { 151 | torrent.categories.push('Game'); 152 | } else if (file.name.toLowerCase().indexOf('skidrow') > -1) { 153 | torrent.categories.push('Game'); 154 | } else if (file.name.toLowerCase().indexOf('reloaded') > -1) { 155 | torrent.categories.push('Game'); 156 | } else if (file.name.toLowerCase().indexOf('plaza') > -1) { 157 | torrent.categories.push('Game'); 158 | } else if (file.name.toLowerCase().indexOf('gog') > -1) { 159 | torrent.categories.push('Game'); 160 | } else if (file.name.toLowerCase().indexOf('razor1911') > -1) { 161 | torrent.categories.push('Game'); 162 | } else if (file.name.toLowerCase().indexOf('hi2u') > -1) { 163 | torrent.categories.push('Game'); 164 | } else if (file.name.toLowerCase().indexOf('tinyiso') > -1) { 165 | torrent.categories.push('Game'); 166 | } else if (file.name.toLowerCase().indexOf('postmortem') > -1) { 167 | torrent.categories.push('Game'); 168 | } else if (file.name.toLowerCase().indexOf('steam') > -1) { 169 | torrent.categories.push('Game'); 170 | } else if (file.name.toLowerCase().indexOf('repack') > -1) { 171 | torrent.categories.push('Game'); 172 | } else if (file.name.toLowerCase().indexOf('ps4') > -1) { 173 | torrent.categories.push('Game'); 174 | } 175 | } 176 | } 177 | 178 | module.exports = Categorizer; -------------------------------------------------------------------------------- /src/lib/Database/Elasticsearch.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const _ = require('lodash'); 4 | var elasticsearch = require('elasticsearch'); 5 | 6 | class ElasticSearch { 7 | 8 | constructor(opts) { 9 | if (!(this instanceof ElasticSearch)) 10 | return new ElasticSearch(opts) 11 | 12 | this.client = new elasticsearch.Client(opts.connection); 13 | this.batchSizeDHT = opts.batchSizeDHT; 14 | this.batchSizeTorrent = opts.batchSizeTorrent 15 | this.recordInfohashQueue = []; 16 | this.recordTorrentQueue = []; 17 | this.recordIPQueue = []; 18 | this.recordRelationQueue = []; 19 | } 20 | 21 | ready(callback) { 22 | 23 | // do something async and call the callback: 24 | this._getLastID(callback) 25 | } 26 | 27 | indexTorrent(torrent, callback) { 28 | var update = { 29 | update: { 30 | _index: 'torrent', 31 | _type: 'doc', 32 | _id: torrent.infohash.toString('hex') 33 | } 34 | } 35 | 36 | var jsonObject = { 37 | doc: { 38 | Name: torrent.name, 39 | Search: torrent.name.replace(/\./g, ' ').replace(/_/g, ' '), 40 | Type: torrent.type, 41 | Categories: torrent.categories, 42 | Files: [], 43 | Size: 0, 44 | Date: Date.now() 45 | } 46 | }; 47 | 48 | var size = 0; 49 | for (let i = 0; i < torrent.files.length; i++) { 50 | jsonObject.doc.Files.push({ 51 | Name: torrent.files[i].name, 52 | Size: torrent.files[i].size 53 | }); 54 | 55 | size += torrent.files[i].size; 56 | } 57 | 58 | // Attribuite size to object 59 | jsonObject.doc.Size = size; 60 | 61 | this.recordTorrentQueue.push(update); 62 | this.recordTorrentQueue.push(jsonObject); 63 | this._queueMetadata(callback); 64 | } 65 | 66 | indexInfohash(infohash) { 67 | var index = { 68 | index: { 69 | _index: 'torrent', 70 | _type: 'doc', 71 | _id: infohash.toString('hex') 72 | } 73 | } 74 | var jsonObject = { 75 | ID: this._id++, 76 | Peers: 0 77 | }; 78 | 79 | this.recordInfohashQueue.push(index); 80 | this.recordInfohashQueue.push(jsonObject); 81 | this._queueInfohash(); 82 | } 83 | 84 | indexIP(torrent, callback) { 85 | if (torrent.listIP.length != 0) { 86 | 87 | //update torrent Peers value 88 | this._updateSizeTorrent(torrent) 89 | 90 | // Index relation first 91 | this._indexRelation(torrent); 92 | 93 | //Index every ip 94 | for (let i = 0; i < torrent.listIP.length; i++) { 95 | this.recordIPQueue.push({ 96 | index: { 97 | _index: 'ip', 98 | _type: 'doc', 99 | _id: torrent.listIP[i].host, //+torrent.listIP[i].port TODO: Discuss IP:port 100 | pipeline: 'geoip' 101 | } 102 | }); 103 | this.recordIPQueue.push({ 104 | IP: torrent.listIP[i].host, 105 | Port: torrent.listIP[i].port, 106 | Date: Date.now() 107 | }); 108 | } 109 | 110 | //Verify if it needs to be inserted 111 | this._queueIP(callback); 112 | } 113 | } 114 | 115 | _indexRelation(torrent) { 116 | var index = { 117 | index: { 118 | _index: 'relation', 119 | _type: 'doc', 120 | _id: torrent.infohash.toString('hex') 121 | } 122 | } 123 | 124 | var jsonObject = { 125 | IPs: [] 126 | }; 127 | 128 | for (let i = 0; i < torrent.listIP.length; i++) { 129 | jsonObject.IPs.push(torrent.listIP[i].host) //+torrent.listIP[i].port TODO: Discuss IP:port 130 | } 131 | 132 | this.recordRelationQueue.push(index); 133 | this.recordRelationQueue.push(jsonObject); 134 | } 135 | 136 | _updateSizeTorrent(torrent) { 137 | var update = { 138 | update: { 139 | _index: 'torrent', 140 | _type: 'doc', 141 | _id: torrent.infohash.toString('hex') 142 | } 143 | } 144 | 145 | var jsonObject = { 146 | doc: { 147 | Peers: torrent.listIP.length, 148 | Date: Date.now() 149 | } 150 | }; 151 | 152 | this.recordTorrentQueue.push(update); 153 | this.recordTorrentQueue.push(jsonObject); 154 | } 155 | 156 | 157 | 158 | 159 | _queueInfohash() { 160 | if (this.recordInfohashQueue.length / 2 >= this.batchSizeDHT) { 161 | this.client.bulk({ 162 | body: this.recordInfohashQueue 163 | }, function (err, resp) { 164 | }); 165 | this.recordInfohashQueue = []; 166 | 167 | console.log('Elasticsearch Class: Infohash Indexed') 168 | } 169 | } 170 | 171 | _queueIP(callback) { 172 | if (this.recordRelationQueue.length / 2 >= this.batchSizeTorrent) { 173 | this.client.bulk({ 174 | body: this.recordRelationQueue 175 | }, function (err, resp) { 176 | }); 177 | this.recordRelationQueue = []; 178 | 179 | this.client.bulk({ 180 | body: this.recordIPQueue 181 | }, function (err, resp) { 182 | }); 183 | this.recordIPQueue = []; 184 | console.log('Elasticsearch Class: IP and Relation Indexed') 185 | 186 | this.client.bulk({ 187 | body: this.recordTorrentQueue 188 | }, function (err, resp) { 189 | }); 190 | 191 | this.recordTorrentQueue = []; 192 | console.log('Elasticsearch Class: Peers updated') 193 | 194 | if (callback != null) 195 | callback(); 196 | } 197 | } 198 | 199 | _queueMetadata(callback) { 200 | if (this.recordTorrentQueue.length / 2 >= this.batchSizeTorrent) { 201 | this.client.bulk({ 202 | body: this.recordTorrentQueue 203 | }, function (err, resp) { 204 | }); 205 | 206 | this.recordTorrentQueue = []; 207 | console.log('Elasticsearch Class: Metadata Indexed') 208 | if (callback != null) 209 | callback(); 210 | } 211 | } 212 | 213 | getLastInfohashes(min, max, callback) { 214 | this.client.search({ 215 | index: "torrent", 216 | body: { 217 | _source: false, 218 | from: min, 219 | size: max - min + 1, 220 | sort: [{ "ID": { "order": "asc" } }] 221 | } 222 | }, function (error, response) { 223 | if (error) { 224 | console.log("error GetLastInfohashes"); 225 | } else { 226 | callback(this._decodeGetLastInfohashes(response)); 227 | } 228 | }.bind(this)); 229 | } 230 | 231 | _getLastID(callback) { 232 | this.client.search({ 233 | index: 'torrent', 234 | _source: false, 235 | size: 0, 236 | body: { 237 | "aggs": { 238 | "max_id": { 239 | "max": { 240 | "field": "ID" 241 | } 242 | } 243 | } 244 | } 245 | }, function (error, response) { 246 | if (error != undefined) { 247 | console.log("unexpected error from elasticsearch"); 248 | process.exit(0); 249 | } 250 | 251 | this._id = response.aggregations.max_id.value + 1; 252 | callback(); 253 | }.bind(this)) 254 | } 255 | 256 | _decodeGetLastInfohashes(response) { 257 | var listObjects = response.hits.hits; 258 | var listInfohashes = [] 259 | 260 | for (let i = 0; i < listObjects.length; i++) { 261 | listInfohashes.push(listObjects[i]._id) 262 | } 263 | 264 | return listInfohashes; 265 | } 266 | } 267 | 268 | 269 | module.exports = ElasticSearch -------------------------------------------------------------------------------- /src/lib/Database/createDatabase.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | 4 | var elasticsearch = require('elasticsearch'); 5 | var config = require('../../../config'); 6 | 7 | var client = new elasticsearch.Client(config.DEFAULT_ELASTIC_SEARCH_OPTIONS.connection); 8 | 9 | createTorrentIndex(); 10 | createIPIndex(); 11 | createRelationIndex(); 12 | 13 | 14 | function createTorrentIndex() { 15 | client.indices.create({ 16 | index: 'torrent', 17 | body: { 18 | "mappings": { 19 | "doc": { 20 | "properties": { 21 | "ID": { 22 | "type": "long" 23 | }, 24 | "Name": { 25 | "type": "text", 26 | "fields": { 27 | "keyword": { 28 | "type": "keyword" 29 | } 30 | } 31 | }, 32 | "Search": { 33 | "type": "text" 34 | }, 35 | "Type": { 36 | "type": "keyword" 37 | }, 38 | "Categories": { 39 | "type": "keyword" 40 | }, 41 | "Files": { 42 | "properties": { 43 | "Name": { 44 | "type": "text" 45 | }, 46 | "Size": { "type": "long" } 47 | } 48 | }, 49 | "Peers": { "type": "integer" }, 50 | "Size": { "type": "long" }, 51 | "Date": { "type": "date" } 52 | } 53 | } 54 | } 55 | } 56 | }, function (err, resp, status) { 57 | if (err) { 58 | console.log(err); 59 | } 60 | else { 61 | console.log("create", resp); 62 | } 63 | }); 64 | } 65 | 66 | function createIPIndex() { 67 | client.indices.create({ 68 | index: "ip", 69 | body: { 70 | "mappings": { 71 | "doc": { 72 | "properties": { 73 | "IP": { "type": "ip" }, 74 | "Port": { "type": "integer" }, 75 | "Date": { "type": "date" }, 76 | "geoip": { 77 | "properties": { 78 | "continent_name": { "type": "keyword" }, 79 | "city_name": { "type": "keyword" }, 80 | "country_iso_code": { "type": "keyword" }, 81 | "region_name": { "type": "keyword" }, 82 | "location": { "type": "geo_point" } 83 | } 84 | } 85 | } 86 | } 87 | } 88 | } 89 | }, function (err, resp, status) { 90 | if (err) { 91 | console.log(err); 92 | } 93 | else { 94 | console.log("create", resp); 95 | } 96 | }); 97 | } 98 | 99 | function createRelationIndex() { 100 | client.indices.create({ 101 | index: "relation", 102 | body: { 103 | "mappings": { 104 | "doc": { 105 | "properties": { 106 | "IPs": { "type": "ip" } 107 | } 108 | } 109 | } 110 | } 111 | }, function (err, resp, status) { 112 | if (err) { 113 | console.log(err); 114 | } 115 | else { 116 | console.log("create", resp); 117 | } 118 | }); 119 | } 120 | -------------------------------------------------------------------------------- /src/lib/Services/MetadataResolver.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var EventEmitter = require('events') 4 | const parseTorrent = require('parse-torrent') 5 | const Protocol = require('bittorrent-protocol'); 6 | const ut_metadata = require('ut_metadata'); 7 | 8 | const net = require('net'); 9 | var PeerDiscovery = require('./PeerDiscovery'); 10 | const utils = require('../utils'); 11 | 12 | 13 | class MetadataResolver extends EventEmitter { 14 | constructor(opts) { 15 | super(); 16 | if (!(this instanceof MetadataResolver)) 17 | return new MetadataResolver(opts); 18 | 19 | this.timeout = opts.timeout; 20 | this.socketTimeout = opts.socketTimeout; 21 | 22 | this.selfID = utils.generateRandomIDSync(); 23 | this.socketList = []; 24 | this.currentInfohash = null; 25 | this.remainingSec = 0; 26 | this.tracker = opts.tracker; 27 | this.torcacheURL = opts.torcacheURL; 28 | 29 | 30 | this._onDHTPeer = function (peer, infohash, from) { 31 | if (this.currentInfohash == infohash.toString('hex') && this.semaphore != 0) { 32 | this._downloadMetadataFromDHTPeer(peer, infohash) 33 | } 34 | } 35 | } 36 | 37 | start(infohash, peerDiscovery) { 38 | this.currentInfohash = infohash; 39 | this.peerDiscovery = peerDiscovery; 40 | this.semaphore = 1 41 | 42 | this._setMetadataTimeout(this.timeout); 43 | 44 | //conccurently set each other 45 | if (this.tracker == true) { 46 | this._downloadMetadataFromTracker(infohash)//try through torcache first(its faster) 47 | } 48 | this._downloadMetadataFromDHT(infohash); //try through DHT 49 | } 50 | 51 | 52 | _unregister() { 53 | clearTimeout(this.remainingSec); 54 | this.semaphore = 0; 55 | this.peerDiscovery.removeListener('peer', this._onDHTPeer); 56 | 57 | this.socketList.forEach(function (socket) { 58 | socket.destroy(); 59 | }) 60 | delete this.socketList 61 | this.socketList = [] 62 | } 63 | 64 | _setMetadataTimeout(timeout) { 65 | this.remainingSec = setTimeout(function () { 66 | this._unregister() 67 | this.emit('timeout', this.currentInfohash); 68 | }.bind(this), timeout) 69 | } 70 | 71 | _downloadMetadataFromDHT(infohash) { 72 | this.peerDiscovery.on('peer', this._onDHTPeer.bind(this)); 73 | this.peerDiscovery.lookup(infohash); 74 | } 75 | 76 | _downloadMetadataFromTracker(infohash) { 77 | parseTorrent.remote(this.torcacheURL + infohash + ".torrent", function (err, parsedTorrent) { 78 | if (err || typeof parsedTorrent === "undefined") { 79 | 80 | } else { 81 | if (this.semaphore!=0 && parsedTorrent.infoHash == this.currentInfohash) { 82 | this._unregister(); 83 | var torrent = utils.parseMetadataTracker(parsedTorrent) 84 | this.emit('metadata', torrent); 85 | } 86 | } 87 | }.bind(this)) 88 | } 89 | 90 | _downloadMetadataFromDHTPeer(peer, infohash) { 91 | var socket = new net.Socket(); 92 | socket.on('error', err => { socket.destroy(); }); 93 | socket.on('timeout', err => { socket.destroy(); }); 94 | 95 | socket.setTimeout(this.socketTimeout); 96 | this.socketList.push(socket); 97 | 98 | this._onPeerConnected = function () { 99 | if (this.semaphore != 0) { 100 | const wire = new Protocol(); 101 | 102 | socket.pipe(wire).pipe(socket); 103 | wire.use(ut_metadata()); 104 | 105 | wire.on('handshake', function (infohash, peerId) { 106 | if (this.semaphore != 0) 107 | wire.ut_metadata.fetch(); 108 | }); 109 | 110 | wire.ut_metadata.on('metadata', function (rawMetadata) { 111 | if (this.semaphore != 0) { 112 | this._unregister(); 113 | var torrent = utils.parseMetadataDHT(rawMetadata, this.currentInfohash); 114 | this.emit('metadata', torrent) 115 | } 116 | }.bind(this)); 117 | 118 | 119 | wire.handshake(infohash, this.selfID, { dht: true }); 120 | } 121 | }.bind(this); 122 | 123 | socket.connect(peer.port, peer.host, this._onPeerConnected); 124 | } 125 | 126 | } 127 | 128 | module.exports = MetadataResolver; -------------------------------------------------------------------------------- /src/lib/Services/MetadataResolverService.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const EventEmitter = require('events') 4 | const _ = require('lodash'); 5 | const MetadataResolver = require('./MetadataResolver'); 6 | const PeerDiscovery = require('./PeerDiscovery'); 7 | const Categoriser = require('../Database/Categoriser'); 8 | 9 | class MetadataService extends EventEmitter { 10 | constructor(opts) { 11 | super(); 12 | if (!(this instanceof MetadataService)) 13 | return new MetadataService(opts); 14 | 15 | this.opts = opts 16 | 17 | this.metadataFetcher = new MetadataResolver(opts.DEFAULT_METADATA_FETCHER_OPTIONS); 18 | this.categoriser = new Categoriser(); 19 | this.cache = []; 20 | this.peerDiscovery = null; 21 | 22 | this.onMetadata = function (torrent, remoteAddress) { 23 | this.emit("metadata", this.categoriser.parse(torrent)) 24 | setTimeout(function () { 25 | this._nextInfohash() 26 | }.bind(this), 1000); 27 | } 28 | 29 | this.onMetadataTimeout = function (infohash) { 30 | this.emit('metadataTimeout', infohash); 31 | this._nextInfohash() 32 | } 33 | 34 | this.metadataFetcher.on('metadata', this.onMetadata.bind(this)); 35 | this.metadataFetcher.on('timeout', this.onMetadataTimeout.bind(this)); 36 | } 37 | 38 | addToCache(infohash) { 39 | if (Array.isArray(infohash)) 40 | this.cache = this.cache.concat(infohash) 41 | else 42 | this.cache.push(infohash) 43 | } 44 | 45 | startService() { 46 | if (this.cache.length != 0) { 47 | 48 | //create new PeerDiscovery for each infohash 49 | this.peerDiscovery = new PeerDiscovery(this.opts.DEFAULT_PEER_DISCOVERY_OPTIONS); 50 | 51 | // Start metadata fetcher 52 | var infohash = this.cache.shift(); 53 | this.metadataFetcher.start(infohash, this.peerDiscovery) 54 | 55 | } else { 56 | this.emit("cacheEmpty"); 57 | } 58 | } 59 | 60 | _nextInfohash() { 61 | this.peerDiscovery.destroy(); 62 | 63 | setImmediate(function () { 64 | this.startService(); 65 | }.bind(this)); 66 | } 67 | } 68 | 69 | module.exports = MetadataService; -------------------------------------------------------------------------------- /src/lib/Services/PeerDiscovery.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var DHT = require('bittorrent-dht/client') // empty object in browser 4 | var EventEmitter = require('events') 5 | 6 | class PeerDiscovery extends EventEmitter { 7 | 8 | constructor(opts) { 9 | super(); 10 | if (!(this instanceof PeerDiscovery)) 11 | return new PeerDiscovery(opts) 12 | 13 | this._port = opts.port ? opts.port : 20000; // torrent port 14 | this.timeout = opts.timeout || 9000; 15 | this.timeout_initial = opts.timeout_initial || 2000; 16 | this.secRemaining = 0; 17 | this.currentInfohash = 0; 18 | this.semaphore = 0; 19 | 20 | this.dht = new DHT(opts) 21 | 22 | this._onDHTPeer = function (peer, infohash, from) { 23 | if (this.currentInfohash.equals(infohash)) { 24 | clearTimeout(this.secRemaining); 25 | this.emit('peer', peer, infohash, from); 26 | 27 | this._setInfohashTimeout(infohash, this.timeout) 28 | } 29 | } 30 | 31 | this.dht.on('peer', this._onDHTPeer.bind(this)) 32 | this.dht.listen(this._port) 33 | } 34 | 35 | lookup(infohash) { 36 | this.currentInfohash = Buffer.from(infohash, 'hex'); 37 | this._setInfohashTimeout(infohash, this.timeout_initial) 38 | 39 | this.dht.lookup(infohash); 40 | } 41 | 42 | _setInfohashTimeout(infohash, timeout) { 43 | this.secRemaining = setTimeout(function () { 44 | if (this.semaphore == 0) { 45 | this.semaphore = 1; 46 | this.dht.removeListener('peer', this._onDHTPeer) 47 | 48 | this.emit('timeout', infohash); 49 | } 50 | }.bind(this), timeout) 51 | } 52 | 53 | destroy(cb) { 54 | clearTimeout(this.secRemaining); 55 | 56 | if (this.semaphore == 0) { 57 | this.dht.removeListener('peer', this._onDHTPeer) 58 | } 59 | 60 | this.dht.destroy(cb) 61 | } 62 | } 63 | 64 | module.exports = PeerDiscovery 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/lib/Services/PeerDiscoveryService.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const EventEmitter = require('events') 4 | const _ = require('lodash'); 5 | const PeerDiscovery = require('./PeerDiscovery'); 6 | 7 | 8 | class PeerDiscoveryService extends EventEmitter { 9 | constructor(opts) { 10 | super(); 11 | if (!(this instanceof PeerDiscoveryService)) 12 | return new PeerDiscoveryService(opts); 13 | 14 | this.opts = opts 15 | this.cache = []; 16 | this.listIP = []; 17 | 18 | this.onPeer = function (peer, infohash, from) { 19 | this.listIP.push(peer); 20 | } 21 | 22 | this.onDiscoveryEnded = function (infohash) { 23 | var torrent = {}; 24 | torrent.infohash = infohash; 25 | torrent.listIP = _.uniqBy(this.listIP, function (e) { 26 | return e.host + e.port 27 | }) 28 | 29 | 30 | this.peerDiscovery.removeListener('peer', this.onPeer); 31 | this.peerDiscovery.removeListener('timeout', this.onDiscoveryEnded); 32 | this.peerDiscovery.destroy(); 33 | 34 | this.emit("ip", torrent) 35 | 36 | setImmediate(function () { 37 | this.startService(); 38 | }.bind(this)); 39 | } 40 | } 41 | 42 | addToCache(infohash) { 43 | if (Array.isArray(infohash)) 44 | this.cache = this.cache.concat(infohash) 45 | else 46 | this.cache.push(infohash) 47 | } 48 | 49 | startService() { 50 | //TODO:Use DHT not PeerDiscovery 51 | if (this.cache.length != 0) { 52 | var infohash = this.cache.shift(); 53 | delete this.listIP; 54 | this.listIP = []; 55 | 56 | //create new PeerDiscovery for each infohash 57 | this.peerDiscovery = new PeerDiscovery(this.opts.DEFAULT_PEER_DISCOVERY_OPTIONS); 58 | this.peerDiscovery.on('peer', this.onPeer.bind(this)); 59 | this.peerDiscovery.on('timeout', this.onDiscoveryEnded.bind(this)); 60 | 61 | //start getting metadata 62 | this.peerDiscovery.lookup(infohash); 63 | } else { 64 | delete this.listIP; 65 | this.emit("cacheEmpty"); 66 | } 67 | } 68 | } 69 | 70 | module.exports = PeerDiscoveryService; -------------------------------------------------------------------------------- /src/lib/utils.js: -------------------------------------------------------------------------------- 1 | var crypto = require('crypto'); 2 | const bencode = require('bencode'); 3 | 4 | exports.generateRandomIDAsync = function (rinfo, nodeID, cb) { 5 | 6 | crypto.randomBytes(20, (err, buf) => { 7 | if(err) throw err 8 | cb(rinfo, nodeID, buf) 9 | }) 10 | }; 11 | 12 | exports.generateRandomIDSync = function () { 13 | return crypto.randomBytes(20) 14 | }; 15 | 16 | exports.generateNeighborID = function (target, nid) { 17 | return Buffer.concat([target.slice(0, 10), nid.slice(10)]); 18 | }; 19 | 20 | exports.decodeNodes = function (data) { 21 | var nodes = []; 22 | 23 | for (var i = 0; i + 26 <= data.length; i += 26) { 24 | nodes.push({ 25 | nid: data.slice(i, i + 20), 26 | address: data[i + 20] + '.' + data[i + 21] + '.' + data[i + 22] + '.' + data[i + 23], 27 | port: data.readUInt16BE(i + 24) 28 | }); 29 | } 30 | return nodes; 31 | }; 32 | 33 | exports.encodeNodes = function (nodes) { 34 | 35 | return Buffer.concat(nodes.map((node) => Buffer.concat([node.nid, _encodeIP(node.address), _encodePort(node.port)]))) 36 | }; 37 | 38 | function _encodeIP(ip) { 39 | return Buffer.from(ip.split('.').map((i) => parseInt(i))) 40 | }; 41 | 42 | function _encodePort(port) { 43 | const data = Buffer.alloc(2) 44 | data.writeUInt16BE(port, 0) 45 | return data 46 | }; 47 | 48 | 49 | exports.parseMetadataTracker = function (parsedTorrent) { 50 | var files = []; 51 | 52 | if (parsedTorrent.hasOwnProperty('files')) { 53 | 54 | // multiple files 55 | var l = parsedTorrent.files.length; 56 | for (var i = 0; i < l; i++) { 57 | files.push( 58 | { 59 | name: parsedTorrent.files[i].path, 60 | size: parsedTorrent.files[i].length 61 | }); 62 | } 63 | } 64 | 65 | return { 66 | infohash: parsedTorrent.infoHash, 67 | name: parsedTorrent.name, 68 | files: files 69 | } 70 | } 71 | 72 | exports.parseMetadataDHT = function (rawMetadata, infohash) { 73 | var metadata = bencode.decode(rawMetadata).info; 74 | 75 | var torrentName = metadata.name.toString('utf-8'); 76 | var files = []; 77 | 78 | if (metadata.hasOwnProperty('files')) { 79 | 80 | // multiple files 81 | var l = metadata.files.length; 82 | for (var i = 0; i < l; i++) { 83 | files.push( 84 | { 85 | name: metadata.files[i].path.toString('utf-8'), 86 | size: metadata.files[i].length 87 | }); 88 | } 89 | } else { 90 | 91 | // single file 92 | files.push( 93 | { 94 | name: metadata.name.toString('utf-8'), 95 | size: metadata.length 96 | }); 97 | } 98 | 99 | return { 100 | infohash: infohash, 101 | name: torrentName, 102 | files: files 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/test/test.js: -------------------------------------------------------------------------------- 1 | utils=require('../lib/utils') 2 | 3 | 4 | for (var i = 0; i < 100; i++) { 5 | utils.generateRandomID(i, function (i, buf) { 6 | console.log(i) 7 | }.bind(this)) 8 | } 9 | -------------------------------------------------------------------------------- /src/test/testDHTCrawler.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | var DHTCrawler = require('../lib/DHTCrawler'); 4 | var config = require('../../config'); 5 | 6 | var crawler = new DHTCrawler(config.DEFAULT_CRAWLER_OPTIONS); 7 | var count = 1; 8 | 9 | crawler.on('infohash', function (listInfohash,rinfo) { 10 | for (let i = 0; i < listInfohash.length; i++) { 11 | console.log((count++) + ". magnet:?xt=urn:btih:%s from %s:%s", listInfohash[i].toString("hex"), rinfo.address, rinfo.port); 12 | } 13 | }); 14 | 15 | crawler.start(); -------------------------------------------------------------------------------- /src/test/testMetadataResolver.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var MetadataResolver = require('../lib/MetadataResolver'); 4 | var PeerDiscovery = require('../lib/PeerDiscovery'); 5 | var config = require('../../config'); 6 | const INFO_HASH1 = '8CA378DBC8F62E04DF4A4A0114B66018666C17CD'; // ubuntu-16.04.1-server-amd64.iso 7 | const INFO_HASH2 = '726b4809351adf6fedc6ad779762829bf5512ae1' 8 | 9 | 10 | var metadataFetcher = new MetadataResolver(config.DEFAULT_METADATA_FETCHER_OPTIONS); 11 | var count = 1 12 | 13 | metadataFetcher.on('metadata', function (torrent) { 14 | console.log('\nTorrent found: ' + torrent.name); 15 | console.log('Files: ' ); 16 | 17 | for (let i = 0; i < torrent.files.length; i++) { 18 | console.log('\t' +torrent.files[i].name); 19 | } 20 | 21 | //reccursiveCall() 22 | 23 | }); 24 | 25 | metadataFetcher.on('timeout', function (infohash) { 26 | console.log("Metadata Timeout: " + infohash.toString('hex')) 27 | 28 | //reccursiveCall() 29 | }); 30 | 31 | function reccursiveCall() { 32 | if (count == 1) { 33 | count++ 34 | 35 | peerDiscovery.destroy(); 36 | peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 37 | metadataFetcher.register(INFO_HASH2, peerDiscovery) 38 | peerDiscovery.lookup(INFO_HASH2); 39 | } 40 | else 41 | if (count == 2) { 42 | count++ 43 | 44 | peerDiscovery.destroy(); 45 | peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 46 | metadataFetcher.register(INFO_HASH1, peerDiscovery) 47 | peerDiscovery.lookup(INFO_HASH1); 48 | } 49 | 50 | else 51 | if (count == 3) { 52 | count++ 53 | 54 | peerDiscovery.destroy(); 55 | peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 56 | metadataFetcher.register(INFO_HASH2, peerDiscovery) 57 | peerDiscovery.lookup(INFO_HASH2); 58 | } 59 | else 60 | if (count == 4) { 61 | count++ 62 | 63 | peerDiscovery.destroy(); 64 | peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 65 | metadataFetcher.register(INFO_HASH1, peerDiscovery) 66 | peerDiscovery.lookup(INFO_HASH1); 67 | } 68 | } 69 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 70 | var peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 71 | metadataFetcher.register(INFO_HASH1, peerDiscovery) 72 | //try through torcache (its faster) 73 | metadataFetcher.downloadMetadataFromTracker(INFO_HASH1) 74 | peerDiscovery.lookup(INFO_HASH1); 75 | -------------------------------------------------------------------------------- /src/test/testPeerDiscovery.js: -------------------------------------------------------------------------------- 1 | var PeerDiscovery = require('../lib/PeerDiscovery'); 2 | const INFO_HASH1 = '5636cd5dadf6672ae29e538e5c82ed5e4a2bd562'; // ubuntu-16.04.1-server-amd64.iso 3 | const INFO_HASH2 = 'a236f822243ac8356084b0d9f7a0c2a11c06b789' 4 | const INFO_HASH32 = '726b4809351adf6fedc6ad779762829bf5512ae1' 5 | var config = require('../../config'); 6 | 7 | 8 | var peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 9 | var count = 1; 10 | 11 | function onPeer(peer, infohash, from) { 12 | console.log('Infohash: ' + infohash.toString("hex") + ' found potential peer ' + peer.host + ':' + peer.port + ' through ' + from.address + ':' + from.port); 13 | } 14 | 15 | function onTimeout(infohash) { 16 | console.log('Discovery ended for ', infohash.toString('hex')); 17 | if (count == 1) { 18 | count++ 19 | 20 | this.destroy(); 21 | var peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 22 | peerDiscovery.addListener('peer', onPeer); 23 | peerDiscovery.addListener('timeout', onTimeout); 24 | peerDiscovery.lookup(INFO_HASH2); 25 | } 26 | else 27 | if (count == 2) { 28 | count++ 29 | 30 | this.destroy(); 31 | var peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 32 | peerDiscovery.addListener('peer', onPeer); 33 | peerDiscovery.addListener('timeout', onTimeout); 34 | peerDiscovery.lookup(INFO_HASH1); 35 | } 36 | 37 | else 38 | if (count == 3) { 39 | count++ 40 | 41 | this.destroy(); 42 | var peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 43 | peerDiscovery.addListener('peer', onPeer); 44 | peerDiscovery.addListener('timeout', onTimeout); 45 | peerDiscovery.lookup(INFO_HASH2); 46 | } 47 | else 48 | if (count == 4) { 49 | count++ 50 | 51 | this.destroy(); 52 | var peerDiscovery = new PeerDiscovery(config.DEFAULT_PEER_DISCOVERY_OPTIONS); 53 | peerDiscovery.addListener('peer', onPeer); 54 | peerDiscovery.addListener('timeout', onTimeout); 55 | peerDiscovery.lookup(INFO_HASH32); 56 | } 57 | } 58 | 59 | peerDiscovery.addListener('peer', onPeer); 60 | peerDiscovery.addListener('timeout', onTimeout); 61 | peerDiscovery.lookup(INFO_HASH1); 62 | 63 | 64 | -------------------------------------------------------------------------------- /src/test/testTorcache.js: -------------------------------------------------------------------------------- 1 | var parseTorrent = require('parse-torrent') 2 | 3 | parseTorrent.remote("http://itorrents.org/torrent/B415C913643E5FF49FE37D304BBB5E6E11AD5101.torrent", function (err, parsedTorrent) { 4 | if (err) throw err 5 | 6 | var torrent = _parseMetadata(parsedTorrent) 7 | console.log(parsedTorrent) 8 | }) 9 | 10 | function _parseMetadata(parsedTorrent) { 11 | var files = []; 12 | 13 | if (parsedTorrent.hasOwnProperty('files')) { 14 | 15 | // multiple files 16 | var l = parsedTorrent.files.length; 17 | for (var i = 0; i < l; i++) { 18 | files.push( 19 | { 20 | name: parsedTorrent.files[i].path, 21 | size: parsedTorrent.files[i].length 22 | }); 23 | } 24 | } 25 | 26 | return { 27 | infohash: parsedTorrent.infoHash, 28 | name: parsedTorrent.name, 29 | files: files 30 | } 31 | } --------------------------------------------------------------------------------