├── .gitignore
├── defaults.json
├── .npmignore
├── demo.gif
├── index.js
├── test
├── basic.html
├── demo.js
└── test.js
├── README.md
├── package.json
└── src
├── ui.js
└── listeners.js
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules
3 |
--------------------------------------------------------------------------------
/defaults.json:
--------------------------------------------------------------------------------
1 | {
2 | "logger": {}
3 | }
4 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules
3 | .gitignore
4 | .npmignore
5 | test/
6 |
--------------------------------------------------------------------------------
/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/medialab/sandcrawler-dashboard/HEAD/demo.gif
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Sandcrawler Dashboard Public Interface
3 | * =======================================
4 | *
5 | * Just a matter of exporting the plugin function. Not something very fancy
6 | * as you might notice.
7 | */
8 | var UI = require('./src/ui.js'),
9 | listeners = require('./src/listeners.js'),
10 | defaults = require('./defaults.json'),
11 | _ = require('lodash');
12 |
13 | module.exports = function(opts) {
14 |
15 | return function(spider) {
16 | var ui = new UI();
17 |
18 | return listeners(spider, ui, _.extend({}, defaults, opts));
19 | };
20 | };
21 |
--------------------------------------------------------------------------------
/test/basic.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Basic
5 |
6 |
7 |
21 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # sandcrawler-dashboard
2 |
3 | A handy terminal dashboard displaying advanced information about one of your [sandcrawler](http://medialab.github.io/sandcrawler/) spiders.
4 |
5 | 
6 |
7 | ## Installation
8 |
9 | You can install **sandcrawler-dashboard** through npm:
10 |
11 | ```bash
12 | npm install sandcrawler-dashboard
13 | ```
14 |
15 | ## Usage
16 |
17 | ```js
18 | var sandcrawler = require('sandcrawler'),
19 | dashboard = require('sandcrawler-dashboard');
20 |
21 | var spider = sandcrawler.spider('MyFancySpider')
22 | .use(dashboard())
23 | .url('http://nicesite.org')
24 | .scraper(function($, done) {
25 | done(null, $('title').text());
26 | })
27 | .run();
28 | ```
29 |
30 | ## Options
31 |
32 | * **logger** *?object*: Any options to pass to the `sandcrawler-logger` used by the dashboard internally. Possible options can be found [here](https://github.com/Yomguithereal/sandcrawler-logger#options).
33 |
34 | *Example*
35 |
36 | ```js
37 | var sandcrawler = require('sandcrawler'),
38 | logger = require('sandcrawler-logger');
39 |
40 | var spider = sandcrawler.spider('MyFancySpider')
41 | .use(dashboard({logger: {color: 'red'}}));
42 | ```
43 |
44 | ## License
45 |
46 | MIT
47 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "sandcrawler-dashboard",
3 | "version": "0.1.1",
4 | "description": "A handy terminal dashboard plugin for sandcrawler.",
5 | "main": "index.js",
6 | "scripts": {
7 | "demo": "node ./test/demo.js",
8 | "test": "node ./test/test.js"
9 | },
10 | "repository": {
11 | "type": "git",
12 | "url": "https://github.com/medialab/sandcrawler-dashboard"
13 | },
14 | "keywords": [
15 | "sandcrawler-plugin",
16 | "dashboard"
17 | ],
18 | "contributors": [
19 | {
20 | "name": "Daniele Guido",
21 | "url": "https://github.com/danieleguido"
22 | },
23 | {
24 | "name": "Guillaume Plique",
25 | "url": "https://github.com/Yomguithereal"
26 | }
27 | ],
28 | "author": "yomguithereal ",
29 | "license": "MIT",
30 | "bugs": {
31 | "url": "https://github.com/medialab/sandcrawler-dashboard/issues"
32 | },
33 | "homepage": "https://github.com/medialab/sandcrawler-dashboard",
34 | "dependencies": {
35 | "blessed": "0.0.51",
36 | "blessed-contrib": "^1.0.11",
37 | "chalk": "^1.0.0",
38 | "lodash": "^3.1.0",
39 | "sandcrawler-logger": "0.1.1"
40 | },
41 | "devDependencies": {
42 | "express": "^4.11.2",
43 | "sandcrawler": "git+https://github.com/medialab/sandcrawler.git"
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/test/demo.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Sandcrawler Dashboard Test
3 | * ===========================
4 | *
5 | * Actual dashboard test.
6 | */
7 | var express = require('express'),
8 | sandcrawler = require('sandcrawler'),
9 | logger = require('../index.js'),
10 | _ = require('lodash');
11 |
12 | // Helpers
13 | function randInt(min, max) {
14 | return Math.floor(Math.random() * (max - min + 1)) + min;
15 | }
16 |
17 | // Server
18 | var app = express();
19 | app.use('/', express.static(__dirname));
20 |
21 | // Spider
22 | var spider = sandcrawler.phantomJawa('MyJawa')
23 | .use(logger())
24 | .config({concurrency: 4, autoRetry: 'later', maxRetries: 3})
25 | .beforeScraping(function(req, next) {
26 | setTimeout(function() {
27 | var n = randInt(1, 10);
28 |
29 | if (n > 9)
30 | return next(new Error('discard'));
31 | else
32 | return next();
33 | }, randInt(2, 5) * 500);
34 | })
35 | .urls(_.range(50).map(function(i) {
36 | var n = randInt(1, 10);
37 | if (n > 9)
38 | return 'http://localhost:3002/not-found';
39 | return 'http://localhost:3002/basic.html?' + (i + 1);
40 | }))
41 | .scraper(function($, done) {
42 | if (Math.random() > 0.5)
43 | return done(null, {hello: 'world'});
44 | else
45 | return done(null, $('.url-list a').scrape('href'));
46 | })
47 | .afterScraping(function(req, res, next) {
48 | var n = randInt(1, 8);
49 |
50 | if (n > 7)
51 | return next(new Error('invalid-data'));
52 | else
53 | return next();
54 | });
55 |
56 | // Listening
57 | var server = app.listen(3002);
58 |
59 | sandcrawler.run(spider, function(err) {
60 | server.close();
61 | });
62 |
--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Sandcrawler Dashboard Test
3 | * ===========================
4 | *
5 | * Actual dashboard test.
6 | */
7 | var express = require('express'),
8 | sandcrawler = require('sandcrawler'),
9 | logger = require('../index.js'),
10 | _ = require('lodash');
11 |
12 | // Helpers
13 | function randInt(min, max) {
14 | return Math.floor(Math.random() * (max - min + 1)) + min;
15 | }
16 |
17 | // Server
18 | var app = express();
19 | app.use('/', express.static(__dirname));
20 |
21 | // Spider
22 | var spider = sandcrawler.phantomJawa('MyJawa')
23 | .use(logger())
24 | .config({concurrency: 4, autoRetry: true, maxRetries: 3})
25 | .beforeScraping(function(req, next) {
26 | setTimeout(function() {
27 | var n = randInt(1, 10);
28 |
29 | if (n > 9)
30 | return next(new Error('discard'));
31 | else
32 | return next();
33 | }, randInt(2, 10) * 500);
34 | })
35 | .urls(_.range(50).map(function(i) {
36 | var n = randInt(1, 10);
37 | if (n > 9)
38 | return 'http://localhost:3002/basic/this/is/an-insupportably-long-and-inexistant/url/just-for-thesakeofitandbecauseI/can.tm';
39 | return 'http://localhost:3002/basic.html?' + (i + 1);
40 | }))
41 | .scraper(function($, done) {
42 | if (Math.random() > 0.5)
43 | return done(null, {hello: 'world'});
44 | else
45 | return done(null, $('.url-list a').scrape('href'));
46 | })
47 | .afterScraping(function(req, res, next) {
48 | var n = randInt(1, 8);
49 |
50 | if (n > 7)
51 | return next(new Error('invalid-data'));
52 | else
53 | return next();
54 | });
55 |
56 | // Listening
57 | var server = app.listen(3002);
58 |
59 | sandcrawler.run(spider, function(err) {
60 | server.close();
61 | });
62 |
--------------------------------------------------------------------------------
/src/ui.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Sandcrawler Dashboard UI
3 | * =========================
4 | *
5 | * Defining the blessed UI used by the plugin.
6 | */
7 | var blessed = require('blessed'),
8 | contrib = require('blessed-contrib'),
9 | _ = require('lodash');
10 |
11 | function UI() {
12 |
13 | var screen = blessed.screen();
14 |
15 | // Log component
16 | this.log = blessed.box({
17 | label: 'Log',
18 | top: '0',
19 | left: '0',
20 | border: {
21 | type: 'line',
22 | fg: 'blue'
23 | },
24 | width: '60%',
25 | height: '70%',
26 | wrap: true
27 | });
28 | this.log.lines = [];
29 |
30 | // Request component
31 | this.request = blessed.box({
32 | label: 'Request',
33 | top: '70%',
34 | left: '0',
35 | border: {
36 | type: 'line',
37 | fg: 'blue'
38 | },
39 | width: '30%',
40 | height: '30%'
41 | });
42 |
43 | // Response component
44 | this.response = blessed.box({
45 | label: 'Response',
46 | top: '70%',
47 | left: '30%',
48 | border: {
49 | type: 'line',
50 | fg: 'blue'
51 | },
52 | width: '30%',
53 | height: '30%'
54 | });
55 |
56 | // Job table component
57 | var table = contrib.table({
58 | label: 'Jobs',
59 | top: '0',
60 | left: '60%',
61 | border: {
62 | type: 'line',
63 | fg: 'blue'
64 | },
65 | width: '40%',
66 | height: '60%',
67 | columnSpacing: [6, 70, 20]
68 | });
69 | table.jobs = {};
70 |
71 | table.add = function(id, rows) {
72 | this.jobs[id] = rows;
73 | return this;
74 | };
75 |
76 | table.remove = function(id) {
77 | delete this.jobs[id];
78 |
79 | return this;
80 | };
81 |
82 | table.find = function(id) {
83 | return this.jobs[id];
84 | };
85 |
86 | table.update = function() {
87 | var data = _.values(this.jobs);
88 |
89 | this.setData({
90 | headers: ['', 'Url', 'Error'],
91 | data: data
92 | });
93 |
94 | this.rows.select(data.length - 1);
95 | };
96 |
97 | // Getting my style
98 | table.rows.style.selected.bg = undefined;
99 | table.rows.style.selected.fg = 'white';
100 |
101 | table.focus();
102 | this.jobTable = table;
103 |
104 | // Gauge component
105 | this.progressBar = blessed.ProgressBar({
106 | label: 'Progress - 0%',
107 | top: '60%',
108 | left: '60%',
109 | border: {
110 | type: 'line',
111 | fg: 'blue'
112 | },
113 | width: '40%',
114 | height: '10%',
115 | barBg: 'blue'
116 | });
117 |
118 | // Stats component
119 | this.stats = blessed.box({
120 | label: 'Stats',
121 | top: '70%',
122 | left: '60%',
123 | border: {
124 | type: 'line',
125 | fg: 'blue'
126 | },
127 | width: '20%',
128 | height: '30%'
129 | });
130 |
131 | this.info = blessed.box({
132 | label: 'Information',
133 | top: '70%',
134 | left: '80%',
135 | border: {
136 | type: 'line',
137 | fg: 'blue'
138 | },
139 | width: '20%',
140 | height: '30%'
141 | });
142 |
143 | // Rendering the UI
144 | screen.append(this.log);
145 | screen.append(this.request);
146 | screen.append(this.response);
147 | screen.append(this.jobTable);
148 | screen.append(this.progressBar);
149 | screen.append(this.stats);
150 | screen.append(this.info);
151 | screen.render();
152 |
153 | // Getting out of the dashboard (might get useful...)
154 | screen.key(['C-c'], function(ch, key) {
155 | return process.exit(0);
156 | });
157 |
158 | this.screen = screen;
159 | }
160 |
161 | module.exports = UI;
162 |
--------------------------------------------------------------------------------
/src/listeners.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Sandcrawler Dashboard Listeners
3 | * ================================
4 | *
5 | * Hooking on the spider to relay information through the dahsboard's UI.
6 | */
7 | var logger = require('sandcrawler-logger'),
8 | blessed = require('blessed'),
9 | chalk = require('chalk'),
10 | util = require('util'),
11 | nodeUrl = require('url'),
12 | _ = require('lodash');
13 |
14 | // Helpers
15 | function pad(nb) {
16 | var nbstr = '' + nb;
17 |
18 | if (nbstr.length < 2)
19 | return '0' + nb;
20 | return nbstr;
21 | }
22 |
23 | function formatHMS(seconds) {
24 | var hours = (seconds / 3600) | 0,
25 | minutes = ((seconds - (hours * 3600)) / 60) | 0,
26 | seconds = Math.round(seconds) % 60;
27 |
28 | return pad(hours) + ':' + pad(minutes) + ':' + pad(seconds);
29 | }
30 |
31 | function formatMS(seconds) {
32 | var minutes = (seconds / 60) | 0,
33 | seconds = Math.round(seconds) % 60;
34 | return pad(minutes) + ':' + pad(seconds);
35 | }
36 |
37 | function formatUrl(url, pad) {
38 | pad = pad || 45;
39 |
40 | var truncatedUrl = url.slice(-pad);
41 |
42 | if (truncatedUrl.length !== url.length) {
43 | var parsed = nodeUrl.parse(url),
44 | root = parsed.protocol + '//' + parsed.host;
45 |
46 | if (root.length > pad - 5) {
47 | truncatedUrl = root.slice(-pad - 2) + '...';
48 | }
49 | else {
50 | truncatedUrl = (root + '/../' + _.last(parsed.path.split('/')));
51 |
52 | if (truncatedUrl.length > pad)
53 | truncatedUrl = root + '/../..';
54 | }
55 | }
56 |
57 | return truncatedUrl;
58 | }
59 |
60 | // Exporting listeners
61 | module.exports = function(spider, ui, opts) {
62 |
63 | function render() {
64 | updateInformation();
65 | return ui.screen.render();
66 | }
67 |
68 | // Rendering every now and then...
69 | var renderInterval = setInterval(render, 30);
70 |
71 | function updateInformation() {
72 | var errors = _(spider.stats.errorIndex)
73 | .pairs()
74 | .sortBy(function(p) {
75 | return -p[1];
76 | })
77 | .map(function(p) {
78 | return ' ' + chalk.red(p[0]) + ' ' + p[1];
79 | })
80 | .value();
81 |
82 | ui.stats.setContent([
83 | chalk.grey.bold('Queued jobs ') + spider.stats.queued,
84 | chalk.grey.bold('In-progress jobs ') + spider.stats.doing,
85 | chalk.grey.bold('Done jobs ') + spider.stats.done,
86 | '',
87 | chalk.grey.bold('Successes ') + chalk.green(spider.stats.successes),
88 | chalk.grey.bold('Failures ') + chalk.red(spider.stats.failures),
89 | chalk.grey.bold('Success Rate ') + spider.stats.successRate + '%',
90 | '',
91 | chalk.grey.bold('Engine type ') + spider.type,
92 | chalk.grey.bold('Concurrency ') + spider.options.concurrency
93 | ].join('\n'));
94 |
95 | var elapsed = spider.stats.getElapsedTime(),
96 | estimate = spider.stats.getRemainingTimeEstimation(),
97 | average = spider.stats.averageTimePerJob;
98 |
99 | ui.info.setContent([
100 | chalk.grey.bold('Elapsed time ') + formatHMS(elapsed),
101 | chalk.grey.bold('Remaining time ') + (estimate ? formatHMS(estimate) : ' ~'),
102 | chalk.grey.bold('Time per job ') + ' ' + (average ? formatMS(average) : ' ~'),
103 | chalk.grey.bold('Errors ')
104 | ].concat(errors).join('\n'));
105 | }
106 |
107 | // Branching the logger
108 | spider.use(logger(_.extend({
109 | out: function(txt) {
110 | var lines = ui.log.lines,
111 | wrapped = ui.log._wrapContent(txt, ui.log.width - 2);
112 |
113 | wrapped.forEach(function(line) {
114 | lines.unshift(line);
115 | });
116 |
117 | lines = lines.slice(0, ui.log.height - 2);
118 |
119 | ui.log.setContent(lines.reverse().join('\n'));
120 |
121 | }
122 | }, opts.logger)));
123 |
124 | // On end
125 | spider.once('spider:teardown', function() {
126 | clearInterval(renderInterval);
127 | setTimeout(function() {
128 | spider.logger.info('Press Ctrl-c to exit...');
129 | render();
130 | }, 10);
131 | });
132 |
133 | // Progress bar & job table
134 | spider.on('job:start', function(job) {
135 | var j = ui.jobTable.find(job.id);
136 |
137 | var truncatedUrl = formatUrl(job.req.url);
138 |
139 | if (j)
140 | ui.jobTable.remove(job.id);
141 |
142 | ui.jobTable.add(job.id, [
143 | ' ' + chalk.bgBlue.bold.white(' ~ ') + ' ',
144 | chalk.bold.grey(truncatedUrl),
145 | chalk.bold.white('-')
146 | ]);
147 |
148 | ui.jobTable.update();
149 | });
150 |
151 | spider.on('job:success', function(job) {
152 | var rows = ui.jobTable.find(job.id);
153 |
154 | rows[0] = ' ' + chalk.bgGreen.bold.white(' ✓ ') + ' ';
155 | rows[1] = chalk.bold.grey(formatUrl(job.res.url || job.req.url));
156 |
157 | ui.jobTable.update();
158 | });
159 |
160 | spider.on('job:retry', function(job) {
161 | var rows = ui.jobTable.find(job.id);
162 |
163 | rows[0] = ' ' + chalk.bgMagenta.bold.white(' · ') + ' ';
164 |
165 | ui.jobTable.update();
166 | });
167 |
168 | spider.on('job:discard', function(err, job) {
169 | var j = ui.jobTable.find(job.id);
170 |
171 | if (j)
172 | ui.jobTable.remove(job.id);
173 | });
174 |
175 | spider.on('job:fail', function(err, job) {
176 | var rows = ui.jobTable.find(job.id),
177 | errMessage = err.message;
178 |
179 | if (errMessage.length > 12)
180 | errMessage = errMessage.slice(0, 9) + '...';
181 |
182 | rows[0] = ' ' + chalk.bgRed.bold.white(' ✗ ') + ' ';
183 | rows[1] = chalk.bold.grey(formatUrl(job.res.url || job.req.url));
184 | rows[2] = chalk.red(errMessage);
185 |
186 | ui.jobTable.update();
187 | });
188 |
189 | function updateReqRes(err, job) {
190 | if (!job) {
191 | job = err;
192 | err = null;
193 | }
194 |
195 | // Request
196 | var reqText = '';
197 | reqText += chalk.grey.bold('Url') + ' ' + formatUrl(job.req.url, ui.request.width - 2 - 4) + '\n';
198 |
199 | _(job.req)
200 | .omit(['url', 'retry', 'retryNow', 'retryLater'])
201 | .forIn(function(v, k) {
202 | reqText += chalk.grey.bold(_.capitalize(k)) + ' ' + util.inspect(v, {depth: 1}) + '\n';
203 | })
204 | .value();
205 |
206 | ui.request.setContent(reqText);
207 |
208 | // Response
209 | var resText = '';
210 |
211 | resText += chalk.grey.bold('Url') + ' ' + formatUrl(job.res.url || job.req.url, ui.response.width - 2 - 4) + '\n';
212 |
213 | if (err)
214 | resText += chalk.red.bold('Error') + ' ' + (err.message || err) + '\n';
215 |
216 | resText += chalk[err ? 'grey' : 'green'].bold('Data') + ' ' + util.inspect(job.res.data, {depth: 1}) + '\n';
217 |
218 | _(job.res)
219 | .omit(['url', 'body', 'data', 'error'])
220 | .forIn(function(v, k) {
221 | resText += chalk.grey.bold(_.capitalize(k)) + ' ' + util.inspect(v, {depth: 1}) + '\n';
222 | })
223 | .value();
224 |
225 | ui.response.setContent(resText);
226 | };
227 |
228 | spider.on('job:success', updateReqRes);
229 | spider.on('job:fail', updateReqRes);
230 |
231 | function updateCompletion() {
232 | var completion = spider.stats.completion;
233 |
234 | ui.progressBar.setProgress(completion);
235 | ui.progressBar.setLabel('Progress - ' + completion + '%');
236 | }
237 |
238 | spider.on('job:add', updateCompletion);
239 | spider.on('job:end', updateCompletion);
240 | };
241 |
--------------------------------------------------------------------------------