├── .gitignore ├── package.json ├── downloader.js ├── feedbot.js ├── runner.js ├── feed.js ├── rss.json └── article.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bfclient", 3 | "version": "0.1.0", 4 | "author" : "Andris Reinman", 5 | "maintainers":[ 6 | { 7 | "name":"andris", 8 | "email":"andris@node.ee" 9 | } 10 | ], 11 | "dependencies": { 12 | "fetch": "*", 13 | "nodepie": "*", 14 | "node-gearman":"*", 15 | "redis": "*", 16 | "iconv": "*", 17 | "rai": "*", 18 | "node-ffi": "*" 19 | } 20 | } -------------------------------------------------------------------------------- /downloader.js: -------------------------------------------------------------------------------- 1 | 2 | // handle parent death 3 | (function(){ 4 | var FFI = require('node-ffi'); 5 | var current = new FFI.Library(null, {"prctl": ["int32", ["int32", "uint32"]]}) 6 | 7 | //1: PR_SET_PDEATHSIG, 15: SIGTERM 8 | var returned = current.prctl(1,15); 9 | 10 | process.on('SIGTERM',function(){ 11 | //do something interesting 12 | process.exit(1); 13 | }); 14 | })(); 15 | 16 | 17 | var redis = require("redis").createClient(), 18 | articlelib = require("./article"); 19 | 20 | mainloop(); 21 | 22 | function mainloop(){ 23 | 24 | articlelib.fetchArticle(function(err, article){ 25 | if(err){ 26 | console.log(err); 27 | setTimeout(mainloop, 300); 28 | return; 29 | } 30 | if(!article){ 31 | //nothing found 32 | setTimeout(mainloop, 1*1000); 33 | return; 34 | } 35 | 36 | console.log("Publishing "+article.title); 37 | redis.publish("article", JSON.stringify(article)); 38 | 39 | /* 40 | console.log(article.title); 41 | console.log(new Array(article.title.length+1).join("=")); 42 | console.log("["+article.url+"]"); 43 | console.log("["+article.domain+"]"); 44 | console.log(article.content); 45 | console.log(article.lemma); 46 | */ 47 | 48 | setTimeout(mainloop, 300); 49 | }); 50 | 51 | } -------------------------------------------------------------------------------- /feedbot.js: -------------------------------------------------------------------------------- 1 | // handle parent death 2 | (function(){ 3 | var FFI = require('node-ffi'); 4 | var current = new FFI.Library(null, {"prctl": ["int32", ["int32", "uint32"]]}) 5 | 6 | //1: PR_SET_PDEATHSIG, 15: SIGTERM 7 | var returned = current.prctl(1,15); 8 | 9 | process.on('SIGTERM',function(){ 10 | //do something interesting 11 | process.exit(1); 12 | }); 13 | })(); 14 | 15 | var fs = require("fs"), 16 | feeder = require("./feed"), 17 | rssList = JSON.parse(fs.readFileSync(__dirname + "/rss.json")); 18 | 19 | // start the bot 20 | initialize(); 21 | 22 | function initialize(){ 23 | // Initial start 24 | for(var i=0, len = rssList.length; i1?"s":"")); 56 | 57 | minutes = Math.floor(time / (60*1000)); 58 | time = time - minutes*(60*1000); 59 | if(minutes)resp.push(minutes+" minute"+(minutes>1?"s":"")); 60 | 61 | seconds = Math.floor(time / (1*1000)); 62 | time = time - seconds*(1*1000); 63 | resp.push(seconds+" second"+(!seconds || seconds>1?"s":"")); 64 | 65 | return resp.join(", "); 66 | } -------------------------------------------------------------------------------- /runner.js: -------------------------------------------------------------------------------- 1 | var spawn = require("child_process").spawn, 2 | RAIServer = require("rai").RAIServer; 3 | 4 | process.nextTick(createFeedbot); 5 | process.nextTick(createDownloader); 6 | process.nextTick(createStore); 7 | process.nextTick(createAPI); 8 | 9 | function createFeedbot(){ 10 | console.log("Starting feedbot"); 11 | 12 | var feedbot = spawn('/usr/local/bin/node', [__dirname+'/feedbot.js'], {cwd: __dirname}); 13 | 14 | feedbot.stdout.on('data', function (data) { 15 | Tail('feedbot stdout: ' + (data || "").toString("utf-8").trim()) 16 | }); 17 | 18 | feedbot.stderr.on('data', function (data) { 19 | Tail('feedbot stderr: ' + (data || "").toString("utf-8").trim()); 20 | }); 21 | 22 | feedbot.on('exit', function (code) { 23 | Tail('feedbot exited with code ' + code); 24 | setTimeout(createFeedbot, 1000); 25 | }); 26 | } 27 | 28 | function createDownloader(){ 29 | Tail("Starting downloader"); 30 | 31 | var downloader = spawn('/usr/local/bin/node', [__dirname+'/downloader.js'], {cwd: __dirname}); 32 | 33 | downloader.stdout.on('data', function (data) { 34 | Tail('downloader stdout: ' + (data || "").toString("utf-8").trim()); 35 | }); 36 | 37 | downloader.stderr.on('data', function (data) { 38 | Tail('downloader stderr: ' + (data || "").toString("utf-8").trim()); 39 | }); 40 | 41 | downloader.on('exit', function (code) { 42 | Tail('downloader exited with code ' + code); 43 | setTimeout(createDownloader, 1000); 44 | }); 45 | } 46 | 47 | function createStore(){ 48 | var store = spawn('/usr/local/bin/node', [__dirname+'/../bfstore/store.js'], {cwd: __dirname+"/../bfstore"}); 49 | 50 | store.stdout.on('data', function (data) { 51 | Tail('store stdout: ' + (data || "").toString("utf-8").trim()); 52 | }); 53 | 54 | store.stderr.on('data', function (data) { 55 | Tail('store stderr: ' + (data || "").toString("utf-8").trim()); 56 | }); 57 | 58 | store.on('exit', function (code) { 59 | Tail('store exited with code ' + code); 60 | setTimeout(createStore, 1000); 61 | }); 62 | } 63 | 64 | function createAPI(){ 65 | var api = spawn('/usr/local/bin/node', [__dirname+'/../bfstore/api.js'], {cwd: __dirname+"/../bfapi"}); 66 | 67 | api.stdout.on('data', function (data) { 68 | Tail('api stdout: ' + (data || "").toString("utf-8").trim()); 69 | }); 70 | 71 | api.stderr.on('data', function (data) { 72 | Tail('api stderr: ' + (data || "").toString("utf-8").trim()); 73 | }); 74 | 75 | api.on('exit', function (code) { 76 | Tail('api exited with code ' + code); 77 | setTimeout(createAPI, 1000); 78 | }); 79 | } 80 | 81 | var tail = []; 82 | function Tail(msg){ 83 | tail.push(msg); 84 | if(tail.length > 250){ 85 | tail.shift(); 86 | } 87 | console.log(msg); 88 | } 89 | 90 | var server = new RAIServer(); 91 | server.listen(8082); 92 | 93 | server.on("connect", function(client){ 94 | 95 | // Greet the client 96 | client.send("Hello!"); 97 | 98 | // Wait for a command 99 | client.on("command", function(command, payload){ 100 | 101 | command = (command || "").toString("utf-8").trim().toUpperCase(); 102 | 103 | if(command == "TAIL"){ 104 | for(var i=0, len=tail.length; i]*\>/g, " "). 64 | replace(/\ /g, " "). 65 | replace(/[^A-Za-z0-9õäöüšžÕÄÖÜŽŠ]/g," "). 66 | trim(). 67 | replace(/\s+/g, ", "). 68 | trim(). 69 | toLowerCase(); 70 | 71 | var article = "", 72 | job = gearman.submitJob("lemma", text); 73 | 74 | job.on("data", function(data){ 75 | article += data.toString("binary"); 76 | }); 77 | 78 | job.on("end", function(){ 79 | 80 | article = new Buffer(article, "binary").toString("utf-8"). 81 | replace(/[,\s]+/g, " "). 82 | trim(); 83 | 84 | process.nextTick(callback.bind(this, null, article)); 85 | }); 86 | 87 | job.setTimeout(10*1000, function(){ 88 | process.nextTick(callback.bind(this, new Error("Gearman worker timeout"))); 89 | }); 90 | 91 | job.on("error", function(err){ 92 | process.nextTick(callback.bind(this, err)); 93 | }); 94 | } 95 | 96 | function parseUrl(url){ 97 | var urlparts = urllib.parse(url, true, true), 98 | keys = Object.keys(urlparts.query || {}), 99 | key; 100 | 101 | for(var i=0, len = keys.length; i/g, ""). 175 | replace(/<\/?(?:p|h|ul|ol|div|br|hr|table|tr)\d?( [^>]*)?>/gi,"\u0000"). 176 | replace(/<[^>]*>(?!<)/g, ""). 177 | replace(/<[^>]*>/g, " "); 178 | 179 | html = decodeHTMLEntities(html). 180 | replace(//g, ">"). 182 | replace(/\s+/g, " "). 183 | replace(/(\s*\u0000\s*)+/g, "\n").trim(); 184 | 185 | return html; 186 | } 187 | 188 | function decodeHTMLEntities(text){ 189 | return text.replace(/&(.+?);/g, function(str, ent){ 190 | return String.fromCharCode( ent[0] !== '#' ? HTMLEntities[ent] : ent[1] === 'x' ? parseInt(ent.substr(2),16) : parseInt(ent.substr(1), 10) ); 191 | } 192 | ); 193 | }; --------------------------------------------------------------------------------