├── .gitmodules ├── data └── .gitkeep ├── logs └── .gitkeep ├── config ├── .gitkeep ├── aria2 │ └── aria2.conf └── database.js ├── torrent └── .gitkeep ├── .gitignore ├── TODO ├── export_csv.sh ├── Dockerfile ├── models ├── Classifier.js └── Torrent.js ├── package.json ├── docker-compose.yml ├── ecosystem.config.js ├── crawlDHT.js ├── LICENSE ├── trainer.js ├── loadDHT.js ├── classifier.js ├── loadTorrent.js ├── README.md ├── categorize.js └── documentation └── architecture.svg /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /torrent/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | logs/* 3 | !logs/.gitkeep 4 | torrent/* 5 | !torrent/.gitkeep 6 | data/* 7 | !data/.gitkeep 8 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | Improve categorize script 2 | => Can we have category based on torrent hash from an API??? 3 | 4 | Improve code base 5 | => classifier + categorize helper -------------------------------------------------------------------------------- /export_csv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker exec -i mongo mongoexport --db bitcannon --collection torrents --type=csv --fields _id,title,category,details,size,files,imported,lastmod --out /export.csv 4 | docker cp mongo:/export.csv . 5 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM keymetrics/pm2:6-jessie 2 | 3 | RUN groupadd -r app && useradd -r -g app app 4 | 5 | ADD . /var/www 6 | WORKDIR /var/www 7 | 8 | RUN npm install --no-optional 9 | CMD [ "pm2-runtime", "start", "ecosystem.config.js" ] 10 | -------------------------------------------------------------------------------- /models/Classifier.js: -------------------------------------------------------------------------------- 1 | var mongoose = require('mongoose'); 2 | 3 | var ClassifierSchema = mongoose.Schema({ 4 | raw: { type: mongoose.Schema.Types.Mixed, default: {} }, 5 | updated : { type: Date, default: Date.now, index: true } 6 | }); 7 | var Classifier = mongoose.model('Classifier',ClassifierSchema); 8 | 9 | module.exports = Classifier; 10 | -------------------------------------------------------------------------------- /config/aria2/aria2.conf: -------------------------------------------------------------------------------- 1 | dir=/data 2 | save-session=/session 3 | check-integrity=true 4 | log=- 5 | log-level=error 6 | rpc-listen-all=true 7 | rpc-secure=false 8 | dht-listen-port=6900 9 | enable-rpc=true 10 | enable-dht=true 11 | enable-dht6=true 12 | bt-metadata-only=true 13 | bt-save-metadata=true 14 | bt-stop-timeout=120 15 | on-download-complete=/oncomplete.sh 16 | seed-ratio=0 17 | seed-time=0 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dht-bay", 3 | "version": "1.0.1", 4 | "scripts": { 5 | "start": "pm2 start", 6 | "stop": "pm2 delete all" 7 | }, 8 | "dependencies": { 9 | "aria2": "^3.0.0", 10 | "bittorrent-dht": "^7.4.0", 11 | "bluebird": "^3.4.6", 12 | "bunyan": "^1.8.4", 13 | "chokidar": "^1.6.1", 14 | "cron": "^1.1.1", 15 | "glob": "^7.1.2", 16 | "magnet-uri": "^5.1.4", 17 | "minimist": "^1.2.0", 18 | "mongoose": "^4.6.5", 19 | "natural": "^0.4.0", 20 | "pm2": "^3.1.2", 21 | "read-torrent": "^1.3.0", 22 | "redis": "^2.6.2" 23 | }, 24 | "devDependencies": { 25 | "heapdump": "~0.3.7" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /models/Torrent.js: -------------------------------------------------------------------------------- 1 | var mongoose = require('mongoose'); 2 | 3 | var TorrentSchema = mongoose.Schema({ 4 | _id: { type: String, index: true }, 5 | title: { type: String, index: true }, 6 | category: { type: String, default: "Unknown", index: true }, 7 | details: { type: [String], default: [] }, 8 | size: { type: Number, default: 0 }, 9 | files: { type: [String], default: [] }, 10 | swarm: { 11 | seeders: { type: Number, default: 0, index: true }, 12 | leechers: { type: Number, default: 0 } 13 | }, 14 | imported: {type: Date, default: Date.now, index: true}, 15 | lastmod: {type: Date, default: Date.now, index: true} 16 | }); 17 | var Torrent = mongoose.model('Torrent',TorrentSchema); 18 | 19 | module.exports = Torrent; 20 | 21 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | dhtbay: 4 | build: . 5 | image: dhtbay 6 | container_name: dhtbay 7 | restart: always 8 | links: 9 | - aria2 10 | - mongo 11 | - redis 12 | ports: 13 | - "6881:6881/udp" 14 | volumes: 15 | - ./torrent:/var/www/torrent 16 | aria2: 17 | image: flyersweb/aria2 18 | container_name: dhtaria2 19 | restart: always 20 | volumes: 21 | - ./torrent:/data 22 | - ./config/aria2:/etc/aria2 23 | ports: 24 | - "6900:6800" 25 | mongo: 26 | image: mongo:3.0 27 | container_name: mongo 28 | restart: always 29 | volumes: 30 | - ./data:/data/db 31 | redis: 32 | image: redis:3 33 | container_name: redis 34 | restart: always 35 | -------------------------------------------------------------------------------- /ecosystem.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | apps : [{ 3 | name: "loadDHT", 4 | script: "./loadDHT.js", 5 | env: { 6 | NODE_ENV: "development", 7 | }, 8 | env_production: { 9 | NODE_ENV: "production", 10 | } 11 | }, { 12 | name: "loadTorrent", 13 | script: "./loadTorrent.js", 14 | args: "--watch", 15 | env: { 16 | NODE_ENV: "development", 17 | }, 18 | env_production: { 19 | NODE_ENV: "production", 20 | } 21 | }, { 22 | name: "categorize", 23 | script: "./categorize.js", 24 | env: { 25 | NODE_ENV: "development", 26 | }, 27 | env_production: { 28 | NODE_ENV: "production", 29 | } 30 | }, { 31 | name: "crawlDHT", 32 | script: "./crawlDHT.js", 33 | env: { 34 | NODE_ENV: "development", 35 | }, 36 | env_production: { 37 | NODE_ENV: "production", 38 | } 39 | }] 40 | } -------------------------------------------------------------------------------- /crawlDHT.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const config = require('./config/database'); 4 | 5 | const DHT = require('bittorrent-dht'); 6 | 7 | const redis = require("redis"); 8 | const client = redis.createClient(config.redis.port, config.redis.host, config.redis.options); 9 | 10 | const bunyan = require("bunyan"); 11 | const logger = bunyan.createLogger({name: "crawler"}); 12 | 13 | const dht = new DHT(); 14 | 15 | dht.listen(6881, () => { 16 | logger.info('now listening'); 17 | logger.info(dht.address()); 18 | }); 19 | 20 | dht.on('ready', () => { 21 | logger.info('now ready'); 22 | }); 23 | 24 | dht.on('announce', (peer, infoHash) => { 25 | logger.info(`announce : ${peer.host}:${peer.port} : ${infoHash.toString('hex')}`); 26 | dht.lookup(infoHash); 27 | client.publish("DHTS", infoHash.toString('hex')); 28 | }); 29 | 30 | dht.on('peer', (peer, infoHash, from) => { 31 | logger.debug(`peer : ${peer.host}:${peer.port} : ${infoHash.toString('hex')}`); 32 | }); 33 | 34 | dht.on('error', (err) => { 35 | logger.error(err); 36 | dht.destroy(); 37 | }); 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | 26 | -------------------------------------------------------------------------------- /trainer.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const config = require('./config/database'); 4 | 5 | const Promise = require("bluebird"); 6 | 7 | const mongoose = require('mongoose'); 8 | mongoose.Promise = Promise; 9 | mongoose.connect(config.db.uri); 10 | 11 | const path = require('path'); 12 | 13 | const CronJob = require("cron").CronJob; 14 | 15 | const Torrent = require('./models/Torrent.js'); 16 | const Classifier = require('./models/Classifier.js'); 17 | 18 | const natural = require("natural"); 19 | let classifier = new natural.BayesClassifier(); 20 | 21 | const bunyan = require("bunyan"); 22 | const logger = bunyan.createLogger({name: "trainer"}); 23 | 24 | const filter = { $nor: [ { 'category' : /Unknown/ }, { 'category' : /Other/ } ] }; 25 | 26 | const job = new CronJob("* 30 * * * *", function() { // run each 30 min 27 | const cursor = Torrent.find(filter).sort({'imported': -1}).cursor(); 28 | cursor.eachAsync(torrent => { 29 | logger.info(`Adding ${torrent._id} training`); 30 | if(!torrent.files) return Promise.reject(`Torrent ${torrent._id} has no files!`); 31 | const exts = torrent.files 32 | .map(file => path.extname(file).toLowerCase()) 33 | .filter(ext => ext.length > 0) // no empty 34 | .filter(ext => ext.length < config.limitExt) // with min length 35 | return Promise.resolve(classifier.addDocument(exts, torrent.category)); 36 | }) 37 | .then(() => Promise.resolve(classifier.train())) 38 | .then(() => Classifier.findOneAndUpdate( {}, { $set : { 'raw' : JSON.stringify(classifier) } }, { upsert : true })) 39 | .catch(err => Promise.reject(logger.error(err))) 40 | }); 41 | job.start(); -------------------------------------------------------------------------------- /loadDHT.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const config = require('./config/database'); 4 | 5 | const Promise = require('bluebird'); 6 | 7 | const Aria2 = require('aria2'); 8 | const fs = require('fs'); 9 | 10 | const magnet = require('magnet-uri'); 11 | 12 | const redis = require('redis'); 13 | Promise.promisifyAll(redis.RedisClient.prototype); 14 | const client1 = redis.createClient(config.redis.port, config.redis.host, 15 | config.redis.options); 16 | const client2 = redis.createClient(config.redis.port, config.redis.host, 17 | config.redis.options); 18 | 19 | const bunyan = require('bunyan'); 20 | const logger = bunyan.createLogger({ name: 'loader' }); 21 | 22 | const MAGNET_TEMPLATE = 23 | magnet.encode({ xt: 'urn:btih:{DHTHASH}', tr: config.trackers }) 24 | 25 | const aria2 = new Aria2(config.aria2); 26 | 27 | const aria2Options = 28 | { 29 | 'bt-metadata-only': 'true', 30 | 'bt-save-metadata': 'true', 31 | 'follow-torrent': 'false', 32 | 'seed-time': 0 33 | } 34 | 35 | client1.on('subscribe', (channel, count) => { 36 | logger.info(`Subscribed : ${channel}`); 37 | }) 38 | 39 | client1.on('message', (channel, message) => { 40 | let magnetLink = MAGNET_TEMPLATE; 41 | return Promise.resolve(message) 42 | .then(hash => { 43 | return new Promise((resolve, reject) => { 44 | return client2.get(hash, (err, reply) => { 45 | if (err) reject(err); 46 | if (reply) reject(`${hash} already present`); 47 | return client2.setex(hash, 60, "OK", () => { 48 | resolve(hash) 49 | }) 50 | }) 51 | }) 52 | }) 53 | .then(hash => { 54 | return new Promise((resolve, reject) => { 55 | if (!hash) 56 | reject('No torrent in queue'); 57 | const filename = 58 | `${__dirname}/torrent/${hash.toString().toUpperCase()}.torrent`; 59 | magnetLink = MAGNET_TEMPLATE.replace( 60 | '{DHTHASH}', hash.toString().toUpperCase()); 61 | if (fs.existsSync(filename)) { 62 | reject(`File ${filename} already exists`); 63 | } 64 | resolve(magnetLink) 65 | }); 66 | }) 67 | .then(() => aria2.open()) 68 | .then(() => aria2.getVersion()) 69 | .then(() => aria2.addUri([magnetLink], aria2Options)) 70 | .then(res => Promise.resolve( 71 | logger.info(`Added : ${magnetLink} => ${res}`))) 72 | .then(() => aria2.close()) 73 | .catch((err) => logger.error(err)); 74 | }); 75 | 76 | client1.subscribe('DHTS'); 77 | -------------------------------------------------------------------------------- /classifier.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const config = require('./config/database'); 4 | 5 | const Promise = require("bluebird"); 6 | 7 | const mongoose = require('mongoose'); 8 | mongoose.Promise = Promise; 9 | mongoose.connect(config.db.uri); 10 | 11 | const path = require('path'); 12 | 13 | const CronJob = require("cron").CronJob; 14 | 15 | const natural = require('natural'); 16 | 17 | const Torrent = require('./models/Torrent.js'); 18 | const Classifier = require('./models/Classifier.js'); 19 | 20 | const bunyan = require("bunyan"); 21 | const logger = bunyan.createLogger({name: "classifier"}); 22 | 23 | function precision(a) { 24 | let e = 1; 25 | while (Math.round(a * e) / e !== a) e *= 10; 26 | return Math.round(Math.log(e) / Math.LN10); 27 | }; 28 | 29 | let category = "Unknown"; 30 | const filter = { 'category' : /Unknown/ }; 31 | 32 | 33 | const job = new CronJob("30 * * * * *", function() { // run each 30 seconds 34 | Classifier.findOne({}) 35 | .then(dbClassifier => { 36 | if(!dbClassifier) return Promise.reject("Unavailable classifier!"); 37 | return Promise.resolve(natural.BayesClassifier.restore(JSON.parse(dbClassifier.raw))); 38 | }) 39 | .then(classifier => { 40 | const cursor = Torrent.find(filter).sort({'imported': -1}).limit(100).cursor(); 41 | return cursor.eachAsync(torrent => { 42 | logger.info(`Treating ${torrent._id} categorization`) 43 | if(!torrent.files) return Promise.reject(`Torrent ${torrent._id} has no files!`); 44 | const exts = torrent.files 45 | .map(file => path.extname(file).toLowerCase()) 46 | .filter(ext => ext.length > 0) // no empty 47 | .filter(ext => ext.length < config.limitExt) // with min length 48 | 49 | if(!exts.length) return Promise.reject(`No extensions for torrent ${torrent._id}`); 50 | const classifications = classifier.getClassifications(exts); 51 | if(classifications.length && (classifications[0].value * Math.pow(10,8) > 1)) { 52 | const valA = classifications[0].value, 53 | valB = classifications[1].value; 54 | // Detect incertitude to limit classification 55 | const cprecision = precision(valA); 56 | const valAprecision = valA*Math.pow(10,cprecision), 57 | valBprecision = valB*Math.pow(10,cprecision); 58 | if( ((valAprecision-valBprecision)/valAprecision) > 0.4 ) { 59 | category=classifications[0].label; 60 | } 61 | } 62 | torrent.category = category; 63 | return torrent.save() 64 | }) 65 | }) 66 | .catch((err) => Promise.reject(logger.error(err))) 67 | }); 68 | job.start(); -------------------------------------------------------------------------------- /loadTorrent.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const config = require('./config/database'); 4 | 5 | const Promise = require('bluebird'); 6 | 7 | const mongoose = require('mongoose'); 8 | mongoose.Promise = Promise; 9 | mongoose.connect(config.db.uri); 10 | 11 | const fs = require('fs'); 12 | Promise.promisifyAll(fs); 13 | 14 | const path = require('path'); 15 | 16 | const minimist = require('minimist'); 17 | 18 | const rt = require('read-torrent'); 19 | 20 | const Torrent = require('./models/Torrent.js'); 21 | 22 | const bunyan = require('bunyan'); 23 | const logger = bunyan.createLogger({ 24 | name: 'torrentLoader' 25 | }); 26 | 27 | const chokidar = require('chokidar'); 28 | const glob = require('glob'); 29 | 30 | const TORRENT_PATH = `${__dirname}/torrent`; 31 | const TORRENT_GLOB = `${TORRENT_PATH}/*.torrent`; 32 | 33 | function loadTorrent(fsfile) { 34 | logger.info(`File ${fsfile} treatment in progress...`); 35 | return new Promise((resolve, reject) => { 36 | rt(fsfile, (err, ftorrent) => { 37 | if (err) { 38 | reject(err) 39 | } 40 | resolve(ftorrent); 41 | }) 42 | }) 43 | .then(ftorrent => [ftorrent, Torrent.findById(ftorrent.infoHash).exec()]) 44 | .spread((ftorrent, res) => 45 | (res) ? Promise.reject('TEXISTS') : Promise.resolve(ftorrent)) 46 | .then(ftorrent => { 47 | return [ 48 | ftorrent, 49 | new Torrent({ 50 | '_id': ftorrent.infoHash, 51 | 'title': ftorrent.name, 52 | 'details': ftorrent.announce, 53 | 'size': ftorrent.length, 54 | 'files': ftorrent.files.map(f => f.path), 55 | 'imported': new Date() 56 | }).save() 57 | ] 58 | }) 59 | .spread((ftorrent, res) => Promise.resolve( 60 | logger.info(`File ${ftorrent.infoHash} added`))) 61 | .catch(err => (err === 'TEXISTS') ? Promise.resolve( 62 | logger.info(`File ${fsfile} already loaded`)) : Promise.reject(err)) 63 | .then(() => fs.unlinkAsync(fsfile)) 64 | .catch(err => Promise.reject(logger.error(err))) 65 | } 66 | 67 | const argv = minimist(process.argv.slice(2)); 68 | 69 | if (argv.f || argv.force) { 70 | return glob(TORRENT_GLOB, { 71 | silent: true, 72 | absolute: false 73 | }, (err, fsfiles) => 74 | Promise.all(fsfiles.map(fsfile => loadTorrent(fsfile)))); 75 | } else if (argv.w || argv.watch) { 76 | const watcher = chokidar.watch(TORRENT_GLOB); 77 | watcher.on('add', (fsfile) => loadTorrent(fsfile)); 78 | } else { 79 | process.stdout.write( 80 | `Usage : 81 | + Watch mode : loadTorrent --watch 82 | + Force load : loadTorrent --force` 83 | ); 84 | } 85 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dht-bay 2 | A DHT crawler, torrent indexer and search portal all in nodeJS 3 | 4 | DEPENDENCIES 5 | ------------ 6 | 7 | This project works fine with : 8 | - node 6.3.1 9 | - npm 3.10.3 10 | - redis-server 2.8.17 11 | - mongod 2.4.10 12 | - aria2 1.33.1 13 | 14 | INSTALL 15 | ------- 16 | 17 | #### Good to know 18 | 19 | You should open your 6881/udp port to allow the crawler to have access to DHT network. 20 | 21 | #### Use docker 22 | 23 | Or you can just use the docker project version and run it using : 24 | 25 | ``` 26 | docker-compose up -d 27 | ``` 28 | 29 | It will automatically launch redis, mongo and aria2 then start crawling and categorizing for you. You'll have your DHT Crawler up and running. Crawling may take some time so be patient. 30 | 31 | ARCHITECTURE 32 | ------------ 33 | 34 | This diagram presents an overview of the project architecture and how each piece communicate with each other. 35 | 36 | ![DHTBay Architecture](./documentation/architecture.svg) 37 | 38 | CONTENT 39 | ------- 40 | 41 | The project is composed of 4 modules as presented. Each module is independant and can be used separately without problem. 42 | 43 | + **crawlDHT.js** is responsible for crawling hashs from the DHT network. It will push hashes on a redis list called *DHTS*. It also provides a routing table backup system saving it each 10 minutes in a mongo collection called table. 44 | + **loadDHT.js** is responsible of loading hashes from the redis list *DHTS* and to download torrent metadat for indexation. It rely intensely on *aria2* tool and tray to download it from torcache, torrage and through DHT. 45 | + **loadTorrent.js** is responsible of saving metadatas into our mongo instance in collection torrents. This will be our basis data. 46 | + **categorize.js** will try to categorize crawled torrent depending on file extensions. Because module only takes a limited number of extensions in account you can use classifier too. 47 | + **classifier.js** a bayesian classifier that will classify torrent that couldn't be classed by previous one. In order to work you need to train the classifier. 48 | + **trainer.js** the bayesian classifier trainer, based on categorize script classification it helps unknown torrent classification. 49 | 50 | You could use the bayesian classifier when you've already had a bunch of torrent indexed. The more samples you'll have the more accurate it will be. 51 | 52 | Please fork it, and use it everywhere you can. 53 | 54 | IMPROVEMENTS 55 | ------------ 56 | 57 | + Improve categorization to support more extensions. Use a [categorized API](https://github.com/FlyersWeb/file-extension-api). 58 | 59 | Have fun. 60 | 61 | @flyersweb 62 | -------------------------------------------------------------------------------- /categorize.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const config = require('./config/database'); 4 | 5 | const Promise = require("bluebird"); 6 | 7 | const mongoose = require('mongoose'); 8 | mongoose.Promise = Promise; 9 | mongoose.connect(config.db.uri); 10 | 11 | const path = require('path'); 12 | 13 | const Torrent = require('./models/Torrent.js'); 14 | 15 | const bunyan = require("bunyan"); 16 | const logger = bunyan.createLogger({name: "categorize"}); 17 | 18 | const CronJob = require("cron").CronJob; 19 | 20 | const filter = { 'category' : /Unknown/ }; 21 | 22 | const dynamicToIgnore = () => { 23 | const pad = (n, width, z) => { 24 | z = z || '0'; 25 | n = n + ''; 26 | return n.length >= width ? n : new Array(width - n.length + 1).join(z) + n; 27 | } 28 | const result = []; 29 | // need to refactor this shit 30 | for(var i=2; i<100; i++) { 31 | result.push('.s'+pad(i,2)); 32 | result.push('.r'+pad(i,2)); 33 | result.push('.z'+pad(i,2)); 34 | result.push('.'+pad(i,2)); 35 | } 36 | for(var i=2; i<1000; i++) { 37 | result.push('.s'+pad(i,3)); 38 | result.push('.r'+pad(i,3)); 39 | result.push('.z'+pad(i,3)); 40 | result.push('.'+pad(i,3)); 41 | } 42 | return result; 43 | } 44 | 45 | function findCategoryBasedOnExtensions(exts) { 46 | return Object.keys(config.extToCateg) 47 | .map(categ => { // browse category extensions 48 | if ( config.extToCateg[categ].some(c => exts.includes(c)) ) 49 | return categ; 50 | }) 51 | .find((c) => c !== undefined); // find the first category 52 | } 53 | 54 | const job = new CronJob("30 * * * * *", function() { // run each 30 seconds 55 | const cursor = Torrent.find(filter).sort({'imported': -1}).limit(100).cursor(); 56 | cursor.eachAsync(torrent => { 57 | logger.info(`Treating ${torrent._id} categorization`); 58 | if(!torrent.files) { 59 | logger.info(`Torrent ${torrent._id} has no files!`); 60 | return Promise.resolve(torrent); 61 | } 62 | const exts = torrent.files 63 | .map(file => path.extname(file).toLowerCase()) 64 | .filter(ext => ext.length > 0) // no empty 65 | .filter(ext => !config.extToIgnore.includes(ext)) // no ignored 66 | .filter(ext => !dynamicToIgnore().includes(ext)) // no special ignored 67 | .filter(ext => ext.length < config.limitExt) // with min length 68 | .slice() // shallow copy 69 | .sort() // sort 70 | .reduce((p, c) => { 71 | if(p[0] !== c) return p.concat(c); 72 | return p; 73 | }, []) // deduplicate 74 | 75 | if(exts.length > 5) { 76 | return Promise.resolve(`Torrent ${torrent._id} has no too many extensions!`); 77 | } 78 | 79 | const category = findCategoryBasedOnExtensions(exts); 80 | torrent.category = category || "Unknown"; 81 | return torrent.save(); 82 | }) 83 | .then(() => Promise.resolve(logger.info(`All torrents treated`))) 84 | .catch(err => Promise.reject(logger.error(err))) 85 | }); 86 | job.start(); -------------------------------------------------------------------------------- /config/database.js: -------------------------------------------------------------------------------- 1 | var config = { 2 | db : { 3 | uri : 'mongodb://mongo:27017/bitcannon' 4 | }, 5 | redis : { 6 | port : 6379, 7 | host : 'redis', 8 | options : {} 9 | }, 10 | aria2 : { 11 | host: 'aria2', 12 | port: 6800, 13 | secure: false 14 | }, 15 | trackers: [ 16 | "udp://tracker.coppersurfer.tk:6969/announce", 17 | "udp://tracker.leechers-paradise.org:6969/announce", 18 | "udp://tracker.zer0day.to:1337/announce", 19 | "http://tracker.opentrackr.org:1337/announce", 20 | "udp://tracker.opentrackr.org:1337/announce", 21 | "udp://p4p.arenabg.com:1337/announce", 22 | "http://p4p.arenabg.com:1337/announce", 23 | "udp://9.rarbg.com:2710/announce", 24 | "http://explodie.org:6969/announce", 25 | "udp://explodie.org:6969/announce", 26 | "udp://public.popcorn-tracker.org:6969/announce", 27 | "udp://tracker.internetwarriors.net:1337/announce", 28 | "http://tracker.dler.org:6969/announce", 29 | "http://tracker1.wasabii.com.tw:6969/announce", 30 | "http://tracker.mg64.net:6881/announce", 31 | "http://mgtracker.org:6969/announce", 32 | "udp://tracker.mg64.net:6969/announce", 33 | "udp://mgtracker.org:2710/announce", 34 | "http://tracker2.wasabii.com.tw:6969/announce", 35 | "http://tracker.tiny-vps.com:6969/announce" 36 | ], 37 | limitExt : 8, 38 | extToIgnore : [ 39 | '.url', '.txt', '.ico', '.srt', '.gif', '.log', 40 | '.nfo', '.ass', '.lnk', '.rtf', '.bc!', 41 | '.bmp', '.m3u', '.mht', '.cue', '.sfv', '.diz', 42 | '.azw3', '.odt', '.chm', '.md5', '.idx', '.sub', 43 | '.ini', '.html', '.ssa', '.lit', '.xml', '.clpi', 44 | '.bup', '.ifo', '.htm', '.info', '.css', '.php', 45 | '.js', '.jar', '.json', '.sha', '.docx', '.csv', 46 | '.scr', '.inf', '.hdr', '.prq', '.isn', '.inx', '.tpl', 47 | '.aco', '.opa', '.dpc', '.qdl2', '.acf', '.cdx', 48 | '.iwd', '.ff', '.tmp', '.asi', '.flt', '.cfg', 49 | '.tdl', '.tta', '.ape', '.btn', '.sig', '.sql', '.db', 50 | '.zdct', '.bak', '.fxp', '.nxp', '.nsk', '.256', 51 | '.mpls', '.clpi', '.bdmv', '.cdd', '.dbf', 52 | '.vmx', '.vmsd', '.vmxf', '.nvram' 53 | ], 54 | extToCateg : { 55 | 'Picture' : ['.png', '.jpeg', '.jpg'], 56 | 'Program' : ['.exe', '.dll', '.msi', '.jar'], 57 | 'ISO' : ['.rar', '.01', '.001', 'r01', 'r001', 'z01', 'z001', '.iso', '.zip', '.dmg', '.tgz', '.gz', '.chd', '.7z', '.cab', '.apk', '.cdr', '.wbfs', '.dat', '.rar', '.lzma', '.mds', '.gho', '.ima', '.nrg', '.pkg', '.b5i', '.mdx', '.isz', '.vmdk'], 58 | 'Book' : ['.epub', '.pdf', '.cbz', '.cbr', '.cb7', '.cba', '.cbt', '.djvu', '.fb2', '.mobi', '.doc'], 59 | 'Audio' : ['.flac', '.mp3', '.m4p', '.m4r', '.m4a', '.m4b', '.ape', '.wma', '.ogg', '.dsf', '.wav'], 60 | 'Video' : ['.mp4', '.mkv', '.3gp', '.flv', '.f4v', '.avi', '.rm', '.rmvb', '.wmv', '.mov', '.mpg', '.mpeg', '.ts', '.m2ts', '.m4v', '.asf', '.vob', '.divx', '.srt'], 61 | } 62 | }; 63 | 64 | module.exports = config; 65 | -------------------------------------------------------------------------------- /documentation/architecture.svg: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------