├── .gitignore ├── README.md ├── index.js ├── lib ├── storage │ ├── NullStorage.js │ └── MemoryStorage.js ├── merge.js ├── logger.js ├── impress │ ├── InstancePool.js │ ├── Parallel.js │ ├── InstancePortPool.js │ ├── Queue.js │ ├── Deferred.js │ └── Instance.js ├── html │ └── Filter.js ├── Application.js ├── phantom │ └── Instance.js └── Server.js ├── phantomjs ├── lib │ ├── inherit.js │ ├── jsonFileReader.js │ ├── HtmlSanitize.js │ ├── Shell.js │ ├── Application.js │ ├── EventEmitter.js │ ├── ResourceFilter.js │ ├── PageContentPerformer.js │ ├── Server.js │ └── Page.js └── impress.js ├── package.json └── bin └── impresser.js /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | 3 | node_modules 4 | *.log 5 | .DS_STORE 6 | 7 | /config.json -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Impresser 2 | 3 | ## Installation 4 | 5 | npm install -g impresser 6 | impresser -h 7 | 8 | Enjoy! 9 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | Application: require('./lib/Application'), 3 | Server: require('./lib/Server'), 4 | Queue: require('./lib/impress/Queue') 5 | }; 6 | -------------------------------------------------------------------------------- /lib/storage/NullStorage.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = NullStorage; 3 | 4 | function NullStorage(options) { 5 | this.options = options || {}; 6 | } 7 | 8 | NullStorage.prototype = { 9 | 10 | get: function(key, callback) { 11 | callback && callback(); 12 | }, 13 | 14 | put: function(value, callback) { 15 | callback && callback(null, value); 16 | } 17 | 18 | }; -------------------------------------------------------------------------------- /phantomjs/lib/inherit.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = inherit; 3 | 4 | function inherit(Constructor, Parent, methods, properties) { 5 | Constructor.prototype = Object.create(Parent.prototype, properties || {}); 6 | Object.keys(methods || {}) 7 | .forEach(function(name) { 8 | Constructor.prototype[name] = methods[name]; 9 | }); 10 | 11 | return Constructor; 12 | } 13 | -------------------------------------------------------------------------------- /phantomjs/lib/jsonFileReader.js: -------------------------------------------------------------------------------- 1 | var 2 | fs = require('fs'); 3 | 4 | module.exports = readJsonFile; 5 | 6 | function readJsonFile(filePath) { 7 | var 8 | stream, 9 | data = ''; 10 | try { 11 | stream = fs.open(filePath, 'r'); 12 | while(!stream.atEnd()) { 13 | data += stream.readLine(); 14 | } 15 | return JSON.parse(data); 16 | } 17 | catch(e) { 18 | throw new Error('Could not read file "' + filePath + '". Expected error: ' + String(e)); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "impresser", 3 | "description": "Impresser", 4 | "version": "0.3.8", 5 | "repository": { 6 | "url": "https://github.com/icons8/impresser" 7 | }, 8 | "license": "MIT", 9 | "tags": [ 10 | "prerender", 11 | "impress", 12 | "impresser" 13 | ], 14 | "bin": { 15 | "impresser": "./bin/impresser.js" 16 | }, 17 | "dependencies": { 18 | "yargs": "^3.6.0", 19 | "connect": "^3.3.5", 20 | "response-time": "^2.3.0", 21 | "qs": "^2.4.1", 22 | "http-proxy": "^1.10.1", 23 | "minimist": "^1.1.1" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /lib/merge.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = merge; 3 | 4 | function merge(base, object) { 5 | if (!object || typeof object != 'object') { 6 | return; 7 | } 8 | return _merge(base, object); 9 | 10 | function _merge(to, from) { 11 | if (!to || !from || typeof to != 'object' || typeof from != 'object' || Array.isArray(to) || Array.isArray(from)) { 12 | return from; 13 | } 14 | Object.keys(from).forEach(function(key) { 15 | if (to.hasOwnProperty(key)) { 16 | to[key] = _merge(to[key], from[key]); 17 | } 18 | else { 19 | to[key] = from[key]; 20 | } 21 | }); 22 | return to; 23 | } 24 | } -------------------------------------------------------------------------------- /lib/storage/MemoryStorage.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = MemoryStorage; 3 | 4 | function MemoryStorage(options) { 5 | this.options = options || {}; 6 | this._table = {}; 7 | } 8 | 9 | MemoryStorage.prototype = { 10 | 11 | get: function(key, callback) { 12 | var 13 | value; 14 | value = this._table.hasOwnProperty(key) 15 | ? this._table[key] 16 | : null; 17 | callback && callback(null, value); 18 | }, 19 | 20 | put: function(value, callback) { 21 | value = value || {}; 22 | value.url = value.url || ''; 23 | this._table[value.url] = value; 24 | callback && callback(null, value); 25 | } 26 | 27 | }; -------------------------------------------------------------------------------- /phantomjs/lib/HtmlSanitize.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = HtmlSanitize; 3 | 4 | function HtmlSanitize(content) { 5 | this.setContent(content); 6 | } 7 | 8 | HtmlSanitize.prototype = { 9 | 10 | setContent: function(content) { 11 | this.content = String(content || ''); 12 | this.apply(); 13 | }, 14 | 15 | getContent: function() { 16 | return this.content; 17 | }, 18 | 19 | apply: function() { 20 | this.removeScriptTags(); 21 | }, 22 | 23 | removeScriptTags: function() { 24 | this.content = this.content 25 | .replace(/[\S\s]*?<\/script\s*>/gi, function(match, script) { 26 | return script.indexOf('application/ld+json') != -1 27 | ? match 28 | : '' 29 | }); 30 | } 31 | 32 | }; 33 | -------------------------------------------------------------------------------- /phantomjs/lib/Shell.js: -------------------------------------------------------------------------------- 1 | const 2 | OK_EXIT_CODE = 0, 3 | ERROR_EXIT_CODE = 1; 4 | 5 | module.exports = Shell; 6 | 7 | function Shell() { 8 | } 9 | 10 | Shell.ExitCode = { 11 | OK: OK_EXIT_CODE, 12 | ERROR: ERROR_EXIT_CODE 13 | }; 14 | 15 | Shell.exit = function(code) { 16 | code = code || OK_EXIT_CODE; 17 | phantom.exit(code); 18 | }; 19 | 20 | Shell.exitWithError = function(/* ...errors */) { 21 | var 22 | args = Array.prototype.slice.call(arguments); 23 | if (args.length > 0) { 24 | console.log.apply(console, args); 25 | } 26 | Shell.exit(Shell.ExitCode.ERROR); 27 | }; 28 | 29 | Shell.log = function(/* ...value */) { 30 | console.log.apply(console, arguments); 31 | }; 32 | 33 | Shell.output = function(buffer) { 34 | console.log(buffer); 35 | }; 36 | -------------------------------------------------------------------------------- /lib/logger.js: -------------------------------------------------------------------------------- 1 | var 2 | logger; 3 | 4 | logger = getConsoleWriteDelegate('log'); 5 | 6 | ['debug', 'log', 'info', 'warn', 'error'].forEach(function(type) { 7 | logger[type] = getConsoleWriteDelegate(type); 8 | }); 9 | 10 | module.exports = logger; 11 | 12 | function getConsoleWriteDelegate(type) { 13 | return function() { 14 | var 15 | args, 16 | date; 17 | 18 | args = Array.prototype.slice.call(arguments); 19 | date = new Date(); 20 | args.unshift( 21 | [ 22 | date.getUTCFullYear(), 23 | ('0' + (date.getUTCMonth()+1)).slice(-2), 24 | ('0' + date.getUTCDate()).slice(-2), 25 | '-', 26 | ('0' + date.getUTCHours()).slice(-2), 27 | ':', 28 | ('0' + date.getUTCMinutes()).slice(-2), 29 | ':', 30 | ('0' + date.getUTCSeconds()).slice(-2) 31 | ] 32 | .join('') 33 | ); 34 | console[type].apply(console, args); 35 | } 36 | } 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /phantomjs/lib/Application.js: -------------------------------------------------------------------------------- 1 | var 2 | Shell = require('./Shell'), 3 | Page = require('./Page'), 4 | Server = require('./Server'), 5 | inherit = require('./inherit'), 6 | EventEmitter = require('./EventEmitter'); 7 | 8 | module.exports = Application; 9 | 10 | function Application(options) { 11 | EventEmitter.call(this); 12 | 13 | this.options = options || {}; 14 | this._init(); 15 | } 16 | 17 | inherit(Application, EventEmitter, { 18 | 19 | 20 | _initPage: function() { 21 | var 22 | page; 23 | 24 | page = new Page(this.options); 25 | page.on('exit', function(result) { 26 | Shell.output(JSON.stringify(result)); 27 | Shell.exit(); 28 | }); 29 | 30 | this.on('run', function() { 31 | page.open(); 32 | }); 33 | }, 34 | 35 | _initServer: function() { 36 | var 37 | server; 38 | 39 | server = new Server(this.options); 40 | this.on('run', function() { 41 | server.run(); 42 | }); 43 | }, 44 | 45 | _init: function() { 46 | if (this.options.url) { 47 | this._initPage(); 48 | } 49 | else { 50 | this._initServer(); 51 | } 52 | }, 53 | 54 | run: function() { 55 | this.emit('run'); 56 | } 57 | 58 | }); 59 | 60 | 61 | -------------------------------------------------------------------------------- /phantomjs/lib/EventEmitter.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = EventEmitter; 3 | 4 | function EventEmitter() { 5 | this._listeners = {}; 6 | } 7 | 8 | EventEmitter.prototype = { 9 | 10 | emit: function(name, data) { 11 | var 12 | args = Array.prototype.slice.call(arguments, 1), 13 | listeners, 14 | index; 15 | 16 | if (!this._listeners[name]) { 17 | return; 18 | } 19 | listeners = this._listeners[name].slice(); 20 | for (index = 0; index < listeners.length; index++) { 21 | listeners[index].apply(this, args); 22 | } 23 | }, 24 | 25 | on: function(name, fn) { 26 | var 27 | self = this; 28 | 29 | if (!this._listeners[name]) { 30 | this._listeners[name] = []; 31 | } 32 | this._listeners[name].push(fn); 33 | 34 | return function() { 35 | self.off(name, fn); 36 | }; 37 | }, 38 | 39 | off: function(name, fn) { 40 | var 41 | index; 42 | if (!this._listeners[name]) { 43 | return; 44 | } 45 | for (index = 0; index < this._listeners[name].length; ) { 46 | if (this._listeners[name][index] === fn) { 47 | this._listeners[name].splice(index, 1); 48 | } 49 | else { 50 | index ++; 51 | } 52 | } 53 | } 54 | 55 | }; 56 | -------------------------------------------------------------------------------- /lib/impress/InstancePool.js: -------------------------------------------------------------------------------- 1 | var 2 | Instance = require('./Instance'), 3 | InstancePortPool = require('./InstancePortPool') 4 | ; 5 | 6 | module.exports = InstancePool; 7 | 8 | function InstancePool(options, htmlFilters) { 9 | this.options = options || {}; 10 | this.pool = []; 11 | this.portPool = new InstancePortPool(options); 12 | this.htmlFilters = htmlFilters; 13 | } 14 | 15 | InstancePool.prototype = { 16 | 17 | prepareInstances: function(count) { 18 | var 19 | instance, 20 | index, 21 | self = this; 22 | 23 | count = count || 1; 24 | 25 | for (index = 0; index < count; index++) { 26 | instance = new Instance(self.portPool, self.htmlFilters, self.options); 27 | self.pool.push(instance); 28 | instance.prepare(); 29 | } 30 | }, 31 | 32 | getInstance: function() { 33 | var 34 | index, 35 | instance; 36 | for (index = 0; index < this.pool.length; index++) { 37 | instance = this.pool[index]; 38 | if (!instance || instance.destroyed) { 39 | this.pool.splice(index --, 1); 40 | } 41 | else if (!instance.pending) { 42 | return instance; 43 | } 44 | } 45 | instance = new Instance(this.portPool, this.htmlFilters, this.options); 46 | this.pool.push(instance); 47 | return instance; 48 | } 49 | 50 | }; 51 | -------------------------------------------------------------------------------- /phantomjs/impress.js: -------------------------------------------------------------------------------- 1 | #!phantomjs 2 | 3 | var 4 | system = require('system'), 5 | Application = require('./lib/Application'), 6 | minimist = require('../node_modules/minimist'), 7 | Shell = require('./lib/Shell'), 8 | jsonFileReader = require('./lib/jsonFileReader'), 9 | argv, 10 | options = {}, 11 | url, 12 | blockedResources; 13 | 14 | argv = minimist(system.args); 15 | 16 | if (argv.config) { 17 | options = jsonFileReader(argv.config) || {}; 18 | } 19 | 20 | url = argv._[1]; 21 | if (url && argv['url-base64-encoded']) { 22 | try { 23 | url = window.atob(String(url)); 24 | } 25 | catch(e) { 26 | Shell.exitWithError('Incorrect base64 formatted url', e); 27 | } 28 | } 29 | options.url = url || options.url; 30 | 31 | blockedResources = argv['blocked-resources']; 32 | if (!Array.isArray(blockedResources)) { 33 | blockedResources = [blockedResources]; 34 | } 35 | blockedResources = blockedResources 36 | .filter(function(url) { 37 | return url && typeof url == 'string'; 38 | }); 39 | 40 | if (argv['blocked-resources-base64-encoded']) { 41 | try { 42 | blockedResources = blockedResources.map(function(url) { 43 | return window.atob(String(url)); 44 | }); 45 | } 46 | catch(e) { 47 | Shell.exitWithError('Incorrect base64 formatted blocked resources', e); 48 | } 49 | } 50 | 51 | if (blockedResources.length) { 52 | options.blockedResources = blockedResources; 53 | } 54 | 55 | options.blockedResourcesConfig = argv['blocked-resources-config'] || options.blockedResourcesConfig; 56 | 57 | options.serverPort = argv['server-port'] || options.serverPort; 58 | options.notices = argv.notices || options.notices; 59 | options.warnings = argv.warnings || options.warnings; 60 | options.timeout = argv.timeout || options.timeout; 61 | options.resourcesLogging = argv['resources-logging'] || options.resourcesLogging; 62 | 63 | new Application(options).run(); 64 | -------------------------------------------------------------------------------- /lib/impress/Parallel.js: -------------------------------------------------------------------------------- 1 | var 2 | os = require('os'), 3 | InstancePool = require('./InstancePool'), 4 | logger = require('../logger') 5 | ; 6 | 7 | module.exports = Parallel; 8 | 9 | function Parallel(options, htmlFilters) { 10 | options = options || {}; 11 | 12 | this.options = options; 13 | this.maxSize = options.maxParallel || os.cpus().length * 2; 14 | 15 | this.pool = new InstancePool(options, htmlFilters); 16 | this.parallel = []; 17 | this.watchers = []; 18 | } 19 | 20 | Parallel.prototype = { 21 | 22 | prepare: function() { 23 | this.pool.prepareInstances(this.maxSize) 24 | }, 25 | 26 | hasLimit: function() { 27 | return this.parallel.length >= this.maxSize; 28 | }, 29 | 30 | add: function(deferred) { 31 | var 32 | self = this, 33 | parallel = this.parallel, 34 | promise, 35 | instance = this.pool.getInstance(); 36 | 37 | this.parallel.push(instance); 38 | instance.run(deferred); 39 | 40 | promise = deferred.promise; 41 | promise(function() { 42 | var 43 | position; 44 | while( (position = parallel.indexOf(instance)) != -1 ) { 45 | parallel.splice(position, 1); 46 | } 47 | self.digest(); 48 | }); 49 | 50 | return promise; 51 | }, 52 | 53 | digest: function() { 54 | var 55 | watchers = this.watchers; 56 | 57 | watchers.slice().forEach(function(fn) { 58 | try { 59 | fn && fn(); 60 | } 61 | catch(e) { 62 | logger.error(e); 63 | } 64 | }); 65 | 66 | }, 67 | 68 | watch: function(fn) { 69 | var 70 | watchers = this.watchers; 71 | 72 | if (typeof fn == 'function') { 73 | this.watchers.push(fn); 74 | } 75 | return function() { 76 | var 77 | position; 78 | while( (position = watchers.indexOf(fn)) != -1 ) { 79 | watchers.splice(position, 1); 80 | } 81 | } 82 | } 83 | 84 | }; 85 | -------------------------------------------------------------------------------- /lib/impress/InstancePortPool.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_RELEASE_DELAY = 10000, 3 | DEFAULT_PORT_LOW = 8498, 4 | DEFAULT_PORT_HIGH = 8598; 5 | 6 | module.exports = InstancePortPool; 7 | 8 | function InstancePortPool(options) { 9 | this.options = options || {}; 10 | 11 | this.phantomPortLow = options.phantomPortLow || DEFAULT_PORT_LOW; 12 | this.phantomPortHigh = options.phantomPortHigh || DEFAULT_PORT_HIGH; 13 | this.phantomPortReleaseDelay = options.phantomPortReleaseDelay || DEFAULT_RELEASE_DELAY; 14 | 15 | this.used = {}; 16 | this.cursor = this.phantomPortLow; 17 | this.delayedTimeouts = {}; 18 | } 19 | 20 | InstancePortPool.prototype = { 21 | 22 | getUnusedCount: function() { 23 | return this.phantomPortHigh - this.phantomPortLow - Object.keys(this.used).length; 24 | }, 25 | 26 | getPort: function() { 27 | var 28 | cursor = this.cursor, 29 | inf; 30 | 31 | while(this.used[cursor]) { 32 | cursor ++; 33 | if (cursor > this.phantomPortHigh) { 34 | cursor = this.phantomPortLow; 35 | if (inf) { 36 | throw new Error('Unused port for phantomjs server not found'); 37 | } 38 | inf = true; 39 | } 40 | } 41 | this.used[cursor] = true; 42 | this.cursor = cursor; 43 | return cursor; 44 | }, 45 | 46 | releasePort: function(port) { 47 | if (this.used[port]) { 48 | this._cancelDelayedRelease(port); 49 | delete this.used[port]; 50 | } 51 | }, 52 | 53 | delayedReleasePort: function(port, delay) { 54 | var 55 | self = this; 56 | 57 | if (!this.used[port]) { 58 | return; 59 | } 60 | 61 | this._cancelDelayedRelease(port); 62 | this.delayedTimeouts[port] = setTimeout( 63 | function() { 64 | self.releasePort(port); 65 | }, 66 | delay || DEFAULT_RELEASE_DELAY 67 | ); 68 | }, 69 | 70 | _cancelDelayedRelease: function(port) { 71 | if (this.delayedTimeouts[port]) { 72 | clearTimeout(this.delayedTimeouts[port]); 73 | this.delayedTimeouts[port] = null; 74 | } 75 | } 76 | 77 | }; 78 | -------------------------------------------------------------------------------- /lib/html/Filter.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = Filter; 3 | 4 | function Filter(filters, options) { 5 | this.options = options || {}; 6 | 7 | this._filters = []; 8 | this._addFilters(filters); 9 | } 10 | 11 | Filter.prototype = { 12 | 13 | _addFilters: function(filters) { 14 | var 15 | self = this; 16 | 17 | if (!Array.isArray(filters)) { 18 | filters = [filters]; 19 | } 20 | filters.forEach(function(filter) { 21 | if (filter) { 22 | if (typeof filter == 'object' && typeof filter.apply == 'function') { 23 | self._filters.push(function(content, callback) { 24 | try { 25 | if (filter.async) { 26 | filter.apply(content, callback); 27 | } 28 | else { 29 | callback && callback(null, filter.apply(content)); 30 | } 31 | } 32 | catch(err) { 33 | callback && callback(err); 34 | } 35 | }); 36 | } 37 | if (typeof filter == 'function') { 38 | self._filters.push(function(content, callback) { 39 | try { 40 | if (filter.async) { 41 | filter(content, callback); 42 | } 43 | else { 44 | callback && callback(null, filter(content)); 45 | } 46 | } 47 | catch(err) { 48 | callback && callback(err); 49 | } 50 | }); 51 | } 52 | } 53 | }); 54 | }, 55 | 56 | apply: function(content, finish) { 57 | var 58 | fn; 59 | 60 | fn = this._filters.reduce( 61 | function(fn, filter) { 62 | return function(content, callback) { 63 | fn(content, function(err, content) { 64 | if (err) { 65 | callback(err); 66 | return; 67 | } 68 | filter(content, callback); 69 | }); 70 | }; 71 | }, 72 | function(content, callback) { 73 | callback(null, content); 74 | } 75 | ); 76 | 77 | fn(content, finish); 78 | 79 | } 80 | 81 | }; 82 | -------------------------------------------------------------------------------- /lib/impress/Queue.js: -------------------------------------------------------------------------------- 1 | var 2 | Parallel = require('./Parallel'), 3 | Deferred = require('./Deferred'), 4 | logger = require('../logger') 5 | ; 6 | 7 | module.exports = Queue; 8 | 9 | function Queue(options, htmlFilters) { 10 | options = options || {}; 11 | 12 | this.options = options; 13 | this.parallel = new Parallel(options, htmlFilters); 14 | this.maxSize = options.maxQueue || this.parallel.maxSize * 30; 15 | 16 | this.queue = []; 17 | this._registerPoolWatcher(); 18 | } 19 | 20 | Queue.prototype = { 21 | 22 | prepare: function() { 23 | this.parallel.prepare(); 24 | }, 25 | 26 | hasLimit: function() { 27 | return this.queue.length >= this.maxSize; 28 | }, 29 | 30 | isEmpty: function() { 31 | return this.queue.length == 0; 32 | }, 33 | 34 | add: function(url) { 35 | var 36 | deferred, 37 | queue = this.queue, 38 | parallel = this.parallel.parallel, 39 | index 40 | ; 41 | 42 | for (index = 0; index < queue.length; index++) { 43 | if (queue[index].url == url) { 44 | return queue[index]; 45 | } 46 | } 47 | for (index = 0; index < parallel.length; index++) { 48 | if (parallel[index].deferred.url == url) { 49 | return parallel[index].deferred; 50 | } 51 | } 52 | deferred = new Deferred(url, this.options); 53 | 54 | this.queue.push(deferred); 55 | this.parallel.digest(); 56 | 57 | return deferred; 58 | }, 59 | 60 | pull: function() { 61 | return this.queue.shift(); 62 | }, 63 | 64 | _registerPoolWatcher: function() { 65 | var 66 | queue = this, 67 | parallel = this.parallel; 68 | 69 | this.parallel.watch(function() { 70 | if (!parallel.hasLimit() && !queue.isEmpty()) { 71 | parallel.add(queue.pull()); 72 | logger.log( 73 | 'STATUS Queue size:', queue.queue.length, '>', 74 | 'Parallel:', parallel.parallel.length, 75 | 'Pool:', parallel.pool.pool.length, 76 | 'Unused ports', parallel.pool.portPool.getUnusedCount() 77 | ); 78 | } 79 | else { 80 | logger.log('STATUS Queue size:', queue.queue.length); 81 | } 82 | }); 83 | } 84 | 85 | }; 86 | -------------------------------------------------------------------------------- /phantomjs/lib/ResourceFilter.js: -------------------------------------------------------------------------------- 1 | var 2 | jsonFileReader = require('./jsonFileReader'); 3 | 4 | module.exports = ResourceFilter; 5 | 6 | function ResourceFilter(options) { 7 | this.options = options || {}; 8 | 9 | this.rules = []; 10 | this.blockedResources = options.blockedResources; 11 | this.blockedResourcesConfig = options.blockedResourcesConfig; 12 | 13 | this._addRules(); 14 | } 15 | 16 | ResourceFilter.prototype = { 17 | 18 | check: function(url) { 19 | var 20 | index, 21 | filters = this.rules; 22 | 23 | url = String(url || ''); 24 | for (index = 0; index < filters.length; index++) { 25 | if (!this.rules[index](url)) { 26 | return false; 27 | } 28 | } 29 | return true; 30 | }, 31 | 32 | _addRules: function() { 33 | this._addFontRule(); 34 | this._addBlockedResourcesRules(); 35 | }, 36 | 37 | _getBlockedResourcesFromConfig: function() { 38 | return jsonFileReader(this.blockedResourcesConfig); 39 | }, 40 | 41 | _getBlockedResources: function() { 42 | if (this.blockedResources) { 43 | return this.blockedResources; 44 | } 45 | if (this.blockedResourcesConfig) { 46 | return this._getBlockedResourcesFromConfig(); 47 | } 48 | return null; 49 | }, 50 | 51 | _addBlockedResourcesRules: function() { 52 | var 53 | resources, 54 | regExpBuilder = [], 55 | regExp; 56 | 57 | resources = this._getBlockedResources(); 58 | if (!resources) { 59 | return; 60 | } 61 | 62 | regExpBuilder.push( 63 | '^(?:https?:\\/\\/)?[^?/]*?', 64 | '(?:', 65 | resources 66 | .map(function(res) { 67 | return regExpQuote(res).trim(); 68 | }) 69 | .filter(function(res) { 70 | return res; 71 | }) 72 | .join('|'), 73 | ')' 74 | ); 75 | 76 | regExp = new RegExp(regExpBuilder.join(''), 'i'); 77 | 78 | this.rules.push(function(url) { 79 | return !regExp.test(url); 80 | }); 81 | 82 | function regExpQuote(str) { 83 | // @see http://phpjs.org/functions/preg_quote/ 84 | return String(str || '').replace(/[.\\+*?\[\^\]$(){}=!<>|:-]/g, '\\$&'); 85 | } 86 | }, 87 | 88 | _addFontRule: function() { 89 | this.rules.push(function(url) { 90 | return !/^(https?:\/\/)?(www)?[^?]+?\.(ttf|eot|woff|woff2)([?/]|$)/i.test(url); 91 | }); 92 | } 93 | 94 | }; 95 | -------------------------------------------------------------------------------- /phantomjs/lib/PageContentPerformer.js: -------------------------------------------------------------------------------- 1 | var 2 | HtmlSanitize = require('./HtmlSanitize') 3 | ; 4 | 5 | module.exports = PageContentPerformer; 6 | 7 | function PageContentPerformer(content) { 8 | this.metaHttpStatusCode = null; 9 | this.metaHttpHeders = {}; 10 | this.setContent(content); 11 | } 12 | 13 | PageContentPerformer.prototype = { 14 | 15 | setContent: function(content) { 16 | this.content = String(content || ''); 17 | this.apply(); 18 | }, 19 | 20 | getContent: function() { 21 | return this.content; 22 | }, 23 | 24 | apply: function() { 25 | this._htmlSanitize(); 26 | this._parseHttpStatusCode(); 27 | this._parseHttpHeaders(); 28 | }, 29 | 30 | _htmlSanitize: function() { 31 | this.content = new HtmlSanitize(this.content).getContent(); 32 | }, 33 | 34 | _parseHttpStatusCode: function() { 35 | var 36 | metaMatch, 37 | httpStatusCodeMatch, 38 | httpStatusCode; 39 | 40 | metaMatch = this.content.match(/]*?name\s*=\s*["']?(?:prerender|impress(?:er)?)-status-code[>'"\s][^>]*>/i); 41 | if (metaMatch) { 42 | httpStatusCodeMatch = metaMatch[0].match(/content\s*=\s*["']?\s*(\d+)/i); 43 | if (httpStatusCodeMatch) { 44 | httpStatusCode = parseInt(httpStatusCodeMatch[1]); 45 | if (httpStatusCode) { 46 | this.metaHttpStatusCode = httpStatusCode; 47 | } 48 | } 49 | } 50 | }, 51 | 52 | _parseHttpHeaders: function() { 53 | var 54 | headers, 55 | metaMatches; 56 | 57 | headers = this.metaHttpHeders; 58 | 59 | metaMatches = this.content.match(/]*?name\s*=\s*["']?(?:prerender|impress(?:er)?)-header[>'"\s][^>]*>/ig); 60 | if (metaMatches) { 61 | 62 | metaMatches.forEach(function(metaMatch) { 63 | var 64 | headerMatch; 65 | 66 | headerMatch = metaMatch.match(/content\s*=\s*(?:"([^"]+)"|'([^']+)'|([^'"\s>]+))/i); 67 | if (headerMatch) { 68 | headerMatch = (headerMatch[1] || headerMatch[2] || headerMatch[3] || '').split(':').map(function(part) { 69 | return part.trim(); 70 | }); 71 | if (headerMatch[0]) { 72 | headers[headerMatch[0]] = headerMatch.slice(1).join(':'); 73 | } 74 | } 75 | }); 76 | } 77 | }, 78 | 79 | hasMetaHttpStatusCode: function() { 80 | return this.metaHttpStatusCode != null; 81 | }, 82 | 83 | getMetaHttpStatusCode: function() { 84 | return this.metaHttpStatusCode; 85 | }, 86 | 87 | hasMetaHttpHeaders: function() { 88 | return Object.keys(this.metaHttpHeders).length > 0; 89 | }, 90 | 91 | getMetaHttpHeaders: function() { 92 | return this.metaHttpHeders; 93 | } 94 | 95 | }; 96 | -------------------------------------------------------------------------------- /phantomjs/lib/Server.js: -------------------------------------------------------------------------------- 1 | var 2 | Shell = require('./Shell'), 3 | webServer = require('webserver'), 4 | Page = require('./Page'), 5 | qs = require('../../node_modules/qs'); 6 | 7 | module.exports = Server; 8 | 9 | function Server(options) { 10 | this.options = options || {}; 11 | this.port = options.serverPort || 8498; 12 | } 13 | 14 | Server.prototype = { 15 | 16 | run: function() { 17 | this._create(); 18 | }, 19 | 20 | _create: function() { 21 | var 22 | server = webServer.create(), 23 | serverInstance, 24 | port = this.port, 25 | self = this; 26 | 27 | try { 28 | serverInstance = server.listen(port, function (req, res) { 29 | var 30 | query, 31 | url, 32 | blockedResources, 33 | page, 34 | pageOptions = {}; 35 | 36 | pageOptions.__proto__ = self.options; 37 | 38 | query = qs.parse(req.url.split('?')[1]); 39 | url = query.url; 40 | 41 | if (typeof query['blocked-resources'] != 'undefined') { 42 | blockedResources = query['blocked-resources']; 43 | if (!Array.isArray(blockedResources)) { 44 | blockedResources = []; 45 | } 46 | blockedResources = blockedResources.filter(function(resource) { 47 | return resource; 48 | }); 49 | 50 | pageOptions.blockedResources = blockedResources; 51 | } 52 | pageOptions.url = url; 53 | 54 | try { 55 | page = new Page(pageOptions); 56 | 57 | page.on('exit', function(content) { 58 | send(content); 59 | setTimeout(function() { 60 | try { 61 | page.destroy(); 62 | } 63 | catch(error) { 64 | Shell.exitWithError('Error: could not destroy page object', url, error); 65 | } 66 | }); 67 | }); 68 | page.open(); 69 | } 70 | catch(error) { 71 | send(500); 72 | setTimeout(function() { 73 | Shell.exitWithError('Error: could not open page', url, error); 74 | }); 75 | } 76 | 77 | function send(code, result) { 78 | if (typeof code != 'number') { 79 | result = code; 80 | code = 200; 81 | } 82 | res.statusCode = code; 83 | res.headers = { 84 | "Cache": 'no-cache', 85 | "Content-Type": 'text/html' 86 | }; 87 | if (result) { 88 | res.write(JSON.stringify(result)); 89 | } 90 | res.close(); 91 | } 92 | }); 93 | 94 | if (serverInstance) { 95 | Shell.log('Info: server running on port', port); 96 | } else { 97 | throw new Error('Server not created'); 98 | } 99 | } 100 | catch(error) { 101 | Shell.exitWithError('Error: Could not create web server listening on port', port, error); 102 | } 103 | 104 | 105 | } 106 | 107 | }; 108 | -------------------------------------------------------------------------------- /bin/impresser.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | 'use strict'; 4 | 5 | var 6 | Application = require('../lib/Application'), 7 | yargs = require('yargs'), 8 | 9 | argv = yargs 10 | .usage('Usage: $0 [config.json[, ...config.json]] [options]') 11 | .describe('base-url', 'Base url for relative addresses by default http://icons8.com') 12 | .describe('frontend', 'Use impresser as frontend server by default true') 13 | .describe('proxy', 'Use proxy server for resources and not GET requests by default true') 14 | .describe('server-port', 'Port of impresser server by default 8497') 15 | .describe('force-allowed', 'Allow force header or param for force reset stored page by default false') 16 | .describe('storage', 'Use storage for impressed pages by default true') 17 | .describe('content', 'Return impressed pages content by default true') 18 | .describe('max-parallel', 'Limit of parallel impress instances, by default 2 on each CPU cores') 19 | .describe('max-queue', 'Limit of impress queue, by default 30 * "Limit of parallel impress instances.') 20 | .describe('max-queue-timeout', 'Max timeout for deferring in queue by default 60000') 21 | .describe('logging-impress-notices', 'Logging impress notices by default false') 22 | .describe('logging-impress-warnings', 'Logging impress warnings by default false') 23 | .describe('phantom-binary', 'Path to phantomjs binary file by default phantomjs command') 24 | .describe('phantom-ttl', 'Time to live for phantomjs instance by default 1800000') 25 | .describe('phantom-args', 'Add or reassign args for phantomjs command line by default "ignore-ssl-errors=true ssl-protocol=tlsv1"') 26 | .describe('phantom-script', 'Path to phantomjs script file by default %IMPRESS_DIR%/phantomjs/impress.js') 27 | .describe('phantom-port-low', 'Low bound of phantomjs server port by default 8498') 28 | .describe('phantom-port-high', 'High bound of phantomjs server port by default 8598') 29 | .describe('phantom-port-release-delay', 'Delay of release phantomjs server port by default 10000') 30 | .describe('phantom-exec-timeout', 'Max phantomjs execution time by default 5000') 31 | .describe('phantom-impress-timeout', 'Max phantomjs impress time by default 19000') 32 | .describe('min-phantom-restart-interval', 'Min phantomjs restart interval by default 1000') 33 | .describe('impress-timeout', 'Max impress execution time by default 47000') 34 | .describe('impress-attempt-timeout', 'Max impress attempt time by default 20000') 35 | .describe('impress-notices', 'Add notices to impress report by default true') 36 | .describe('impress-resources-logging', 'Add logging for impress resources received by default false') 37 | .describe('impress-warnings', 'Add warnings to impress report by default true') 38 | .describe('config', 'Path to config file, can be multiple') 39 | .help('h') 40 | .alias('h', 'help') 41 | .epilog('impresser (https://github.com/icons8/impresser)') 42 | .argv, 43 | 44 | options; 45 | 46 | options = argv; 47 | options.config = argv._.concat(options.config); 48 | 49 | new Application(options).run(); 50 | -------------------------------------------------------------------------------- /lib/impress/Deferred.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_TIMEOUT = 60000; 3 | 4 | var 5 | logger = require('../logger'); 6 | 7 | module.exports = Deferred; 8 | 9 | function Deferred(url, options) { 10 | options = options || {}; 11 | 12 | this.url = url; 13 | this.pending = true; 14 | this.watchers = []; 15 | this.postResolveListeners = []; 16 | this.error = null; 17 | this.result = null; 18 | 19 | this.createdTime = Date.now(); 20 | this.timeout = options.maxQueueTimeout || DEFAULT_TIMEOUT; 21 | 22 | this.startExecutionTime = null; 23 | this.executionTime = null; 24 | this.performTime = null; 25 | 26 | this._initPromise(); 27 | this._startTimeout(); 28 | } 29 | 30 | Deferred.prototype = { 31 | 32 | _startTimeout: function() { 33 | var 34 | self = this; 35 | 36 | if (!this.timeoutId) { 37 | this.timeoutId = setTimeout( 38 | function() { 39 | self.reject('FAIL page "' + self.url + '" deferred timeout ' + self.timeout); 40 | }, 41 | this.timeout 42 | ); 43 | } 44 | }, 45 | 46 | _stopTimeout: function() { 47 | this.timeoutId && clearTimeout(this.timeoutId); 48 | }, 49 | 50 | startExecution: function() { 51 | if (this.startExecutionTime) { 52 | return; 53 | } 54 | this.startExecutionTime = Date.now(); 55 | }, 56 | 57 | destroy: function() { 58 | this._stopTimeout(); 59 | this.watchers = undefined; 60 | this.promise = undefined; 61 | this.result = undefined; 62 | this.error = undefined; 63 | this.postResolveListeners = undefined; 64 | }, 65 | 66 | _initPromise: function() { 67 | var 68 | self = this, 69 | noop = function() {}; 70 | 71 | this.promise = function(fn) { 72 | if (!self.pending) { 73 | try { 74 | fn && fn(self.error, self.result); 75 | } 76 | catch(e) { 77 | logger.error(e); 78 | } 79 | return noop; 80 | } 81 | if (typeof fn == 'function') { 82 | self.watchers.push(fn); 83 | } 84 | return function() { 85 | var 86 | position; 87 | while( (position = self.watchers.indexOf(fn)) != -1 ) { 88 | self.watchers.splice(position, 1); 89 | } 90 | } 91 | }; 92 | }, 93 | 94 | getRemainedTimeout: function() { 95 | return this.timeout - (Date.now() - this.createdTime); 96 | }, 97 | 98 | resolve: function(result) { 99 | this.finish(null, result); 100 | }, 101 | 102 | reject: function(error) { 103 | this.finish(error); 104 | }, 105 | 106 | finish: function(error, result) { 107 | var 108 | watchers, 109 | postResolveListeners, 110 | time; 111 | 112 | if (!this.pending) { 113 | return; 114 | } 115 | 116 | time = Date.now(); 117 | 118 | this.executionTime = this.startExecutionTime 119 | ? time - this.startExecutionTime 120 | : 0; 121 | this.performTime = time - this.createdTime; 122 | 123 | this.pending = false; 124 | this._stopTimeout(); 125 | 126 | this.error = error; 127 | this.result = result; 128 | 129 | if (result && typeof result == 'object') { 130 | result.executionTime = this.executionTime; 131 | result.performTime = this.performTime; 132 | } 133 | 134 | watchers = this.watchers.slice(); 135 | this.watchers.length = 0; 136 | 137 | watchers.forEach(function(fn) { 138 | try { 139 | fn && fn(error, result); 140 | } 141 | catch(e) { 142 | logger.error(e); 143 | } 144 | }); 145 | 146 | postResolveListeners = this.postResolveListeners.slice(); 147 | this.postResolveListeners.length = 0; 148 | 149 | postResolveListeners.forEach(function(fn) { 150 | try { 151 | fn && fn(); 152 | } 153 | catch(e) { 154 | logger.error(e); 155 | } 156 | }); 157 | }, 158 | 159 | postResolve: function(fn) { 160 | var 161 | self = this; 162 | if (!this.pending) { 163 | fn && fn(); 164 | return; 165 | } 166 | if (typeof fn == 'function') { 167 | this.postResolveListeners.unshift(fn); 168 | } 169 | return function() { 170 | var 171 | position; 172 | while( (position = self.postResolveListeners.indexOf(fn)) != -1 ) { 173 | self.postResolveListeners.splice(position, 1); 174 | } 175 | } 176 | } 177 | 178 | }; 179 | -------------------------------------------------------------------------------- /lib/Application.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_CONFIG_FILENAME = 'impressconfig.js'; 3 | 4 | var 5 | Server = require('./Server'), 6 | merge = require('./merge'), 7 | path = require('path'), 8 | fs = require('fs'), 9 | logger = require('./logger'); 10 | 11 | module.exports = Application; 12 | 13 | function Application(options) { 14 | this.options = options || {}; 15 | 16 | this._server = null; 17 | this._storage = null; 18 | this._htmlFilters = []; 19 | this._init(); 20 | } 21 | 22 | Application.prototype = { 23 | 24 | _init: function() { 25 | var 26 | filtered = {}, 27 | options = this.options; 28 | 29 | if (this.options.config) { 30 | Object.keys(options).forEach(function(key) { 31 | if (typeof options[key] != 'undefined') { 32 | filtered[key] = options[key]; 33 | } 34 | }); 35 | 36 | this.options = merge(this._getParsedConfig(this.options.config), filtered); 37 | } 38 | this.performConfigFile(DEFAULT_CONFIG_FILENAME, true); 39 | }, 40 | 41 | _getParsedConfig: function(/* ...configs */) { 42 | var 43 | optional = false, 44 | args, 45 | result = {}; 46 | 47 | args = Array.prototype.slice.apply(arguments); 48 | if (typeof args[args.length - 1] == 'boolean') { 49 | optional = args.pop(); 50 | } 51 | 52 | args.forEach(function(config) { 53 | if (!Array.isArray(config)) { 54 | config = [config]; 55 | } 56 | 57 | config.forEach(function(config) { 58 | if (!config) { 59 | return; 60 | } 61 | if (typeof config != 'string') { 62 | _merge(config); 63 | return; 64 | } 65 | try { 66 | _merge( 67 | JSON.parse( 68 | fs.readFileSync(config) 69 | ) 70 | ); 71 | } 72 | catch(error) { 73 | if ( !(optional && error instanceof Error && error.code == 'ENOENT') ) { 74 | logger.error('Could not parse config file "'+config+'"', error); 75 | } 76 | } 77 | }); 78 | }); 79 | 80 | function _merge(config) { 81 | if (config && typeof config == 'object' && !Array.isArray(config)) { 82 | merge(result, config); 83 | } 84 | else { 85 | logger.warn('Invalid config data:', JSON.stringify(config)); 86 | } 87 | } 88 | return result; 89 | }, 90 | 91 | performConfigFile: function(fileName, optional) { 92 | var 93 | configurator; 94 | try { 95 | configurator = require(path.resolve(fileName)) 96 | } 97 | catch(error) { 98 | if (!optional || error.code != 'MODULE_NOT_FOUND') { 99 | logger.error(error); 100 | } 101 | return; 102 | } 103 | try { 104 | configurator(this); 105 | } 106 | catch(error) { 107 | logger.error('Error in configuration file', error); 108 | } 109 | }, 110 | 111 | addConfig: function(/* ...configs */) { 112 | merge( 113 | this.options, 114 | this._getParsedConfig.apply( 115 | this, 116 | Array.prototype.slice.call(arguments) 117 | ) 118 | ); 119 | return this; 120 | }, 121 | 122 | addBlockedResources: function(/* ...blockedResources */) { 123 | var 124 | self = this, 125 | blockedResources; 126 | 127 | blockedResources = this.options.blockedResources = this.options.blockedResources || []; 128 | if (!Array.isArray(this.options.blockedResources)) { 129 | this.options.blockedResources = [this.options.blockedResources]; 130 | } 131 | 132 | Array.prototype.slice.apply(arguments) 133 | .forEach(function(resource) { 134 | if (typeof resource == 'string') { 135 | blockedResources.push(resource); 136 | } 137 | else if (Array.isArray(resource)) { 138 | self.addBlockedResources.apply( 139 | self, 140 | resource 141 | ); 142 | } 143 | else if (resource && typeof resource == 'object' && typeof resource.getResources == 'function') { 144 | self.addBlockedResources(resource.getResources()); 145 | } 146 | }); 147 | 148 | return this; 149 | }, 150 | 151 | addHtmlFilters: function(/* ...filters */) { 152 | 153 | Array.prototype.push.apply( 154 | this._htmlFilters, 155 | Array.prototype.slice.apply(arguments) 156 | ); 157 | 158 | return this; 159 | }, 160 | 161 | addStorage: function(storage) { 162 | this._storage = storage; 163 | }, 164 | 165 | run: function() { 166 | this._server = new Server( 167 | this.options, 168 | this._storage, 169 | this._htmlFilters 170 | ); 171 | this._server.run(); 172 | } 173 | 174 | }; -------------------------------------------------------------------------------- /lib/phantom/Instance.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_EXEC_TIMEOUT = 5000, 3 | DEFAULT_PHANTOM_IMPRESS_TIMEOUT = 19000, 4 | INVOKE_READY_WAIT_TIME = 300 5 | ; 6 | 7 | var 8 | path = require('path'), 9 | spawn = require('child_process').spawn, 10 | EventEmitter = require('events').EventEmitter, 11 | logger = require('../logger') 12 | ; 13 | 14 | module.exports = Instance; 15 | 16 | function Instance(portPool, options) { 17 | options = options || {}; 18 | 19 | this.portPool = portPool; 20 | 21 | this.binary = options.phantomBinary || 'phantomjs'; 22 | this.scriptPath = options.phantomScript || path.join(__dirname, '../../phantomjs/impress.js'); 23 | this.notices = options.impressNotices || typeof options.impressNotices == 'undefined'; 24 | this.warnings = options.impressWarnings || typeof options.impressWarnings == 'undefined'; 25 | this.resourcesLogging = options.impressResourcesLogging; 26 | 27 | this.timeout = options.phantomImpressTimeout || DEFAULT_PHANTOM_IMPRESS_TIMEOUT; 28 | this.execTimeout = options.phantomExecTimeout || DEFAULT_EXEC_TIMEOUT; 29 | this.blockedResources = options.blockedResources; 30 | 31 | if (options.phantomArgs) { 32 | this._addArgs(options.phantomArgs); 33 | } 34 | 35 | this.args = { 36 | "--ignore-ssl-errors": 'true', 37 | "--ssl-protocol": 'tlsv1' 38 | }; 39 | 40 | this.serverPort = null; 41 | this.destroyed = false; 42 | 43 | this.startTime = null; 44 | 45 | this._em = new EventEmitter(); 46 | 47 | this._process = null; 48 | this._execTimeoutId = null; 49 | this._execStatusTimeoutId = null; 50 | 51 | this._killed = false; 52 | this._started = false; 53 | 54 | this._init(); 55 | } 56 | 57 | Instance.prototype = { 58 | 59 | _init: function() { 60 | this._initReadyPromise(); 61 | this._initClosePromise(); 62 | }, 63 | 64 | _initReadyPromise: function() { 65 | var 66 | self = this, 67 | noop = function() {}; 68 | 69 | this.readyPromise = function(fn) { 70 | if (self._killed) { 71 | logger.error('Error: Add ready listener for killed phantom instance'); 72 | return noop; 73 | } 74 | if (this._started) { 75 | try { 76 | fn && fn(); 77 | } 78 | catch(e) { 79 | logger.error(e); 80 | } 81 | } 82 | self._em.on('ready', fn); 83 | 84 | return function() { 85 | self._em.removeListener('ready', fn); 86 | } 87 | } 88 | }, 89 | 90 | _initClosePromise: function() { 91 | var 92 | self = this; 93 | 94 | this.closePromise = function(fn) { 95 | if (self._killed) { 96 | try { 97 | fn && fn(); 98 | } 99 | catch(e) { 100 | logger.error(e); 101 | } 102 | } 103 | self._em.once('close', fn); 104 | 105 | return function() { 106 | self._em.removeListener('close', fn); 107 | } 108 | } 109 | }, 110 | 111 | isReady: function() { 112 | return this._started && !this._killed; 113 | }, 114 | 115 | isKilled: function() { 116 | return this._killed; 117 | }, 118 | 119 | isStarting: function() { 120 | return !this._started && !this._killed; 121 | }, 122 | 123 | run: function() { 124 | if (!this._process) { 125 | this.startTime = Date.now(); 126 | this._exec(); 127 | } 128 | }, 129 | 130 | destroy: function() { 131 | this._kill(); 132 | this.destroyed = true; 133 | }, 134 | 135 | _addArgs: function(args) { 136 | var 137 | collection = {}, 138 | self = this 139 | ; 140 | 141 | if (!args) { 142 | return 143 | } 144 | 145 | if (typeof args == 'object') { 146 | if (Array.isArray(args)) { 147 | args.forEach(function(arg) { 148 | var 149 | parts = arg.split('='); 150 | collection[parts[0]] = String(parts[1] || ''); 151 | }) 152 | } 153 | else { 154 | Object.keys(args).forEach(function(key) { 155 | collection[key] = String(args[key]); 156 | }); 157 | } 158 | } 159 | else if (typeof args == 'string') { 160 | args = args.replace(/\s*=\s*/g, '='); 161 | args.split(/\s+/).forEach(function(arg) { 162 | var 163 | parts; 164 | if (arg) { 165 | parts = String(arg).split('='); 166 | collection[parts[0]] = String(parts[1] || ''); 167 | } 168 | }); 169 | } 170 | 171 | Object.keys(collection).forEach(function(key) { 172 | var 173 | arg = collection[key]; 174 | 175 | if (!/^--[^-]/.test(key)) { 176 | if (key[0] == '-') { 177 | key = key.slice(1); 178 | } 179 | key = '--' + key; 180 | } 181 | self.args[key] = arg; 182 | }); 183 | 184 | return this.args; 185 | }, 186 | 187 | _createInstancePort: function() { 188 | if (this.serverPort) { 189 | this._releaseInstancePort(); 190 | } 191 | this.serverPort = this.portPool.getPort(); 192 | return this.serverPort; 193 | }, 194 | 195 | _releaseInstancePort: function() { 196 | if (this.serverPort) { 197 | this.portPool.delayedReleasePort(this.serverPort); 198 | this.serverPort = null; 199 | } 200 | }, 201 | 202 | _getExecArgs: function() { 203 | var 204 | args = this.args, 205 | builder; 206 | 207 | builder = []; 208 | 209 | Array.prototype.push.apply(builder, 210 | Object.keys(this.args).map(function(key) { 211 | return key + '=' + args[key]; 212 | }) 213 | ); 214 | 215 | builder.push( 216 | this.scriptPath, 217 | '--server-port=' + this._createInstancePort(), 218 | '--timeout=' + this.timeout 219 | ); 220 | 221 | if (this.notices) { 222 | builder.push('--notices'); 223 | } 224 | if (this.warnings) { 225 | builder.push('--warnings'); 226 | } 227 | if (this.resourcesLogging) { 228 | builder.push('--resources-logging'); 229 | } 230 | if (Array.isArray(this.blockedResources) && this.blockedResources.length > 0) { 231 | Array.prototype.push.apply( 232 | builder, 233 | this.blockedResources.map(function(resource) { 234 | return '--blocked-resources=' + base64encode(resource); 235 | }) 236 | ); 237 | builder.push('--blocked-resources-base64-encoded'); 238 | } 239 | 240 | return builder; 241 | 242 | function base64encode(string) { 243 | return new Buffer(string || '').toString('base64'); 244 | } 245 | }, 246 | 247 | _exec: function() { 248 | var 249 | self = this, 250 | phantomjs, 251 | stdout, 252 | stderr, 253 | time = Date.now(); 254 | 255 | if (this._process) { 256 | logger.warn('Warn: Phantom process already executed'); 257 | return; 258 | } 259 | 260 | function errorHandler(error) { 261 | self._cancelExecTimeout(); 262 | if (self._killed) { 263 | return; 264 | } 265 | if (error instanceof Buffer) { 266 | error = error.toString(); 267 | } 268 | logger.error('Error: Phantom process error', error || ''); 269 | self._kill(); 270 | } 271 | 272 | function closeHandler(code) { 273 | if (code) { 274 | errorHandler('Exit with code: ' + code); 275 | return; 276 | } 277 | self._cancelExecTimeout(); 278 | logger.log('Info: Phantom process closed'); 279 | if (self._killed) { 280 | return; 281 | } 282 | self._kill(); 283 | } 284 | 285 | function ready() { 286 | self._cancelExecTimeout(); 287 | if (self._killed || self._started) { 288 | return; 289 | } 290 | self._started = true; 291 | logger.log('Ok: phantom process started in time', Date.now() - time, 'ms'); 292 | self._em.emit('ready'); 293 | } 294 | 295 | this._execTimeoutId = setTimeout( 296 | function() { 297 | errorHandler('Exec timeout', self.execTimeout); 298 | }, 299 | this.execTimeout 300 | ); 301 | 302 | try { 303 | phantomjs = spawn(this.binary, this._getExecArgs()); 304 | phantomjs.on('error', errorHandler); 305 | phantomjs.on('close', closeHandler); 306 | 307 | stdout = phantomjs.stdout; 308 | stdout.on('error', errorHandler); 309 | stdout.on('data', function(data) { 310 | if (data instanceof Buffer) { 311 | data = data.toString(); 312 | } 313 | logger.log('Info: Phantom process output:', data); 314 | 315 | if (!self._execStatusTimeoutId && !self._started) { 316 | self._execStatusTimeoutId = setTimeout( 317 | function() { 318 | ready(); 319 | }, 320 | INVOKE_READY_WAIT_TIME 321 | ); 322 | } 323 | 324 | }); 325 | 326 | stderr = phantomjs.stderr; 327 | stderr.on('error', errorHandler); 328 | stderr.on('data', errorHandler); 329 | 330 | this._process = phantomjs; 331 | } 332 | catch(e) { 333 | errorHandler(e); 334 | } 335 | 336 | }, 337 | 338 | _cancelExecStatusTimeout: function() { 339 | this._execStatusTimeoutId && clearTimeout(this._execStatusTimeoutId); 340 | this._execStatusTimeoutId = null; 341 | }, 342 | 343 | _cancelExecTimeout: function() { 344 | this._execTimeoutId && clearTimeout(this._execTimeoutId); 345 | this._execTimeoutId = null; 346 | this._cancelExecStatusTimeout(); 347 | }, 348 | 349 | _kill: function() { 350 | var 351 | killed = this._killed; 352 | 353 | if (!killed) { 354 | logger.log('Info: Phantom process kill on port', this.serverPort); 355 | } 356 | 357 | this._cancelExecTimeout(); 358 | try { 359 | this._process && this._process.kill(); 360 | this._releaseInstancePort(); 361 | } 362 | catch(e) { 363 | logger.error('Error: Phantom kill process error', e); 364 | } 365 | this._process = null; 366 | this._killed = true; 367 | 368 | if (!killed) { 369 | this._em.emit('close'); 370 | this._em.removeAllListeners(); 371 | } 372 | } 373 | 374 | 375 | }; 376 | -------------------------------------------------------------------------------- /lib/Server.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_BASE_URL = 'http://icons8.com', 3 | DEFAULT_SERVER_PORT = 8497; 4 | 5 | var 6 | connect = require('connect'), 7 | responseTime = require('response-time'), 8 | http = require('http'), 9 | ImpressQueue = require('./impress/Queue'), 10 | MemoryStorage = require('./storage/MemoryStorage'), 11 | NullStorage = require('./storage/NullStorage'), 12 | path = require('path'), 13 | zlib = require('zlib'), 14 | stream = require('stream'), 15 | Readable = stream.Readable, 16 | urlLib = require('url'), 17 | qs = require('querystring'), 18 | httpProxy = require('http-proxy'), 19 | logger = require('./logger') 20 | ; 21 | 22 | module.exports = Server; 23 | 24 | function Server(options, storage, htmlFilters) { 25 | options = options || {}; 26 | 27 | this.options = options; 28 | this.setBaseUrl(options.baseUrl || DEFAULT_BASE_URL); 29 | this.port = options.serverPort || DEFAULT_SERVER_PORT; 30 | 31 | this.forceAllowed = options.forceAllowed; 32 | this.proxy = options.proxy || typeof options.proxy == 'undefined'; 33 | this.frontend = options.frontend || typeof options.frontend == 'undefined'; 34 | this.content = options.content || typeof options.content == 'undefined'; 35 | 36 | this.storage = storage; 37 | this.htmlFilters = htmlFilters; 38 | 39 | this._initStorage(); 40 | } 41 | 42 | Server.prototype = { 43 | 44 | setBaseUrl: function(baseUrl) { 45 | baseUrl = String(baseUrl || '').trim(); 46 | while( baseUrl.lastIndexOf('/') == baseUrl.length-1 ) { 47 | baseUrl = baseUrl.slice(0, -1); 48 | } 49 | this.baseUrl = baseUrl; 50 | }, 51 | 52 | run: function() { 53 | 54 | this.instance = connect(); 55 | 56 | this._registerServerExtension(); 57 | this._registerImpressPerformer(); 58 | this._registerProxyServer(); 59 | this._createQueue(); 60 | this._startServer(); 61 | }, 62 | 63 | _initStorage: function() { 64 | var 65 | storage = this.storage; 66 | 67 | if ( !(storage && typeof storage == 'object' && typeof storage.get == 'function' && typeof storage.put == 'function') ) { 68 | if (typeof this.options.storage == 'boolean' && !this.options.storage) { 69 | this.storage = new NullStorage(this.options); 70 | } 71 | else { 72 | this.storage = new MemoryStorage(this.options); 73 | } 74 | } 75 | 76 | }, 77 | 78 | _registerServerExtension: function() { 79 | var 80 | self = this; 81 | 82 | this.instance.use(responseTime()); 83 | this.instance.use(function(req, res, next) { 84 | var 85 | requestTime = Date.now(), 86 | acceptEncoding = req.headers['accept-encoding'] || ''; 87 | 88 | req.on('error', function(error) { 89 | logger.error('REQUEST ERROR', error); 90 | }); 91 | res.on('error', function(error) { 92 | logger.error('RESPONSE ERROR', error); 93 | }); 94 | 95 | res.send = function(code, content, headers, contentType) { 96 | var 97 | stream, 98 | deflated = /\bdeflate\b/i.test(acceptEncoding), 99 | gzipped = /\bgzip\b/i.test(acceptEncoding); 100 | 101 | if (typeof code != 'number') { 102 | headers = content; 103 | content = code; 104 | code = 200; 105 | } 106 | 107 | if (!self.content) { 108 | content = null; 109 | } 110 | 111 | if (typeof content == 'undefined' || content === null) { 112 | content = ''; 113 | } 114 | if (headers && typeof headers == 'object') { 115 | if (headers) { 116 | Object.keys(headers).forEach(function(header) { 117 | res.setHeader(header, headers[header]); 118 | }); 119 | } 120 | } 121 | if (typeof content != 'string') { 122 | content = JSON.stringify(content); 123 | res.setHeader('Content-Type', 'application/json'); 124 | } 125 | else if (/(text|xml|html)/i.test(contentType)) { 126 | res.setHeader('Content-Type', contentType); 127 | } 128 | else { 129 | res.setHeader('Content-Type', 'text/html; charset=utf-8'); 130 | } 131 | res.statusCode = code; 132 | 133 | if (!deflated && !gzipped || !content) { 134 | res.end(content); 135 | end(); 136 | } 137 | else { 138 | stream = new Readable; 139 | stream.on('end', end); 140 | stream.on('error', function(error) { 141 | logger.error('RESPONSE DATA ERROR', error); 142 | }); 143 | stream._read = function() { 144 | this.push(content); 145 | this.push(null); 146 | }; 147 | if (gzipped) { 148 | res.setHeader('Content-Encoding', 'gzip'); 149 | stream.pipe(zlib.createGzip()).pipe(res); 150 | } 151 | else { 152 | res.setHeader('Content-Encoding', 'deflate'); 153 | stream.pipe(zlib.createDeflate()).pipe(res); 154 | } 155 | } 156 | 157 | function end() { 158 | logger.log('Response Time:', Date.now() - requestTime, 'ms'); 159 | } 160 | }; 161 | next(); 162 | }); 163 | }, 164 | 165 | _registerImpressPerformer: function() { 166 | var 167 | self = this 168 | ; 169 | 170 | this.instance.use(function(req, res, next) { 171 | var 172 | deferred, 173 | url, 174 | message, 175 | absoluteUrl, 176 | baseUrl = self.baseUrl || '', 177 | parsedUrl, 178 | query, 179 | queryString, 180 | stored, 181 | time, 182 | force = false, 183 | fragment = null; 184 | 185 | if (req.method.toUpperCase() != 'GET') { 186 | next(); 187 | return; 188 | } 189 | 190 | parsedUrl = urlLib.parse(req.url); 191 | query = qs.parse(parsedUrl.query || ''); 192 | 193 | function performForce(value) { 194 | force = /^\s*(on|yes|y|true)\s*/i.test(value); 195 | } 196 | 197 | if (!self.frontend) { 198 | if (/^\/https?:\/\//.test(parsedUrl.pathname)) { 199 | performForce(query.force || query['impress-force'] || query['impress_force']); 200 | parsedUrl = urlLib.parse(decodeURIComponent(parsedUrl.pathname.slice(1))); 201 | baseUrl = parsedUrl.protocol + '//' + parsedUrl.host; 202 | if (query.hasOwnProperty('_escaped_fragment_')) { 203 | fragment = query._escaped_fragment_; 204 | } 205 | query = qs.parse(parsedUrl.query || ''); 206 | } 207 | else { 208 | if (parsedUrl.pathname != '/') { 209 | logger.error('Unresolved path', parsedUrl.pathname); 210 | res.send(404); 211 | return; 212 | } 213 | performForce(query.force || query['impress-force'] || query['impress_force']); 214 | 215 | parsedUrl = urlLib.parse(query.url || query.uri || '/'); 216 | if (parsedUrl.protocol && parsedUrl.host) { 217 | baseUrl = parsedUrl.protocol + ( parsedUrl.slashes ? '//' : '/' ) + parsedUrl.host; 218 | } 219 | query = qs.parse(parsedUrl.query || ''); 220 | } 221 | } 222 | else { 223 | performForce(req.headers['x-impress-force'] || req.headers['impress-force']); 224 | } 225 | 226 | url = path.normalize(parsedUrl.pathname); 227 | 228 | if (!/^[/\\]/.test(url)) { 229 | url = path.sep + url; 230 | } 231 | 232 | if (query.hasOwnProperty('_escaped_fragment_')) { 233 | fragment = query._escaped_fragment_; 234 | delete query._escaped_fragment_; 235 | } 236 | 237 | queryString = qs.stringify(query); 238 | if (queryString) { 239 | url += '?' + queryString; 240 | } 241 | 242 | if (fragment) { 243 | url += '#!' + fragment; 244 | } 245 | 246 | if (baseUrl.lastIndexOf('/') == baseUrl.length - 1) { 247 | baseUrl = baseUrl.slice(0, -1); 248 | } 249 | absoluteUrl = baseUrl + url; 250 | 251 | if (self.proxy) { 252 | if (/\.(json|js|css|xml|less|png|jpe?g|gif|svg|pdf|ico|mp3|wmv|avi|mpe?g|tiff?|wav|mov|mp4|m4a|swf|flv|m4v|ttf|woff2?|eot)$/i.test(parsedUrl.pathname)) { 253 | next(); 254 | return; 255 | } 256 | } 257 | 258 | if (self.forceAllowed && force) { 259 | perform(); 260 | } 261 | else { 262 | self.storage.get(absoluteUrl, function(err, result) { 263 | if (err) { 264 | logger.error('Error storage: Could not get stored page', absoluteUrl, err); 265 | } 266 | else if (result) { 267 | logger.log('CACHE', result.httpStatusCode, absoluteUrl); 268 | res.send(result.httpStatusCode, result.content, result.httpHeaders, result.contentType); 269 | return; 270 | } 271 | perform(); 272 | }); 273 | } 274 | 275 | function perform() { 276 | if (!self.queue.hasLimit()) { 277 | deferred = self.queue.add(absoluteUrl); 278 | 279 | deferred.promise(function(err, result) { 280 | if (err) { 281 | message = 'FAIL page "' + absoluteUrl + '" could not be impressed.'; 282 | logger.error(message, err); 283 | 284 | if (deferred.getRemainedTimeout() <= 0) { 285 | res.send(504); 286 | } 287 | else { 288 | res.send(503); 289 | } 290 | 291 | return; 292 | } 293 | res.send(result.httpStatusCode, result.content, result.httpHeaders, result.contentType); 294 | 295 | self.storage.put(result, function(err) { 296 | if (err) { 297 | logger.error('Error storage: Could not store page', absoluteUrl, err); 298 | } 299 | }); 300 | 301 | logger.log('Perform time:', result.performTime, 'ms', 'Execution time:', result.executionTime, 'ms'); 302 | }); 303 | deferred.postResolve(function() { 304 | deferred.destroy(); 305 | deferred = undefined; 306 | }); 307 | } 308 | else { 309 | message = 'QUEUE LIMIT "' + self.queue.maxSize + '" page "' + absoluteUrl + '" discarded.'; 310 | logger.error(message); 311 | res.send(502, message); 312 | } 313 | } 314 | 315 | }); 316 | }, 317 | 318 | _registerProxyServer: function() { 319 | var 320 | proxy; 321 | 322 | if (!this.proxy) { 323 | return; 324 | } 325 | 326 | proxy = httpProxy.createProxyServer({ 327 | target: this.baseUrl, 328 | changeOrigin: true 329 | }); 330 | 331 | this.instance.use(function(req, res) { 332 | logger.log('Proxy url:', req.url, 'method:', req.method); 333 | proxy.web(req, res, function(err) { 334 | logger.error('PROXY ERROR for url', req.url, 'and method', req.method, 'with message', err); 335 | }); 336 | }); 337 | proxy.on('error', function(error) { 338 | logger.error('PROXY ERROR', error); 339 | }); 340 | }, 341 | 342 | _startServer: function() { 343 | http.createServer(this.instance).listen(this.port); 344 | logger.log('Server created on port', this.port); 345 | this.instance.on('error', function(error) { 346 | logger.error('SERVER INSTANCE ERROR', error); 347 | }); 348 | }, 349 | 350 | _createQueue: function() { 351 | this.queue = new ImpressQueue(this.options, this.htmlFilters); 352 | this.queue.prepare(); 353 | } 354 | 355 | }; 356 | -------------------------------------------------------------------------------- /lib/impress/Instance.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_PHANTOM_TTL = 1800000, 3 | DEFAULT_IMPRESS_ATTEMPT_TIMEOUT = 20000, 4 | DEFAULT_IMPRESS_TIMEOUT = 47000, 5 | PHANTOM_TTL_SPREAD_FACTOR = .2, 6 | TIMEOUT_APPENDIX = 100, 7 | MIN_PHANTOM_RESTART_INTERVAL = 1000 8 | ; 9 | 10 | var 11 | http = require('http'), 12 | qs = require('querystring'), 13 | HtmlFilter = require('../html/Filter'), 14 | PhantomInstance = require('../phantom/Instance'), 15 | logger = require('../logger') 16 | ; 17 | 18 | module.exports = Instance; 19 | 20 | function Instance(portPool, htmlFilters, options) { 21 | options = options || {}; 22 | 23 | this.options = options; 24 | 25 | this.attemptTimeout = options.impressAttemptTimeout || DEFAULT_IMPRESS_ATTEMPT_TIMEOUT; 26 | this.timeout = options.impressTimeout || DEFAULT_IMPRESS_TIMEOUT; 27 | this.ttl = options.phantomTtl || DEFAULT_PHANTOM_TTL; 28 | this.minPhantomRestartInterval = options.minPhantomRestartInterval || MIN_PHANTOM_RESTART_INTERVAL; 29 | this.notices = options.loggingImpressNotices; 30 | this.warnings = options.loggingImpressWarnings; 31 | 32 | this.htmlFilter = new HtmlFilter(htmlFilters, options); 33 | this.portPool = portPool; 34 | 35 | this.deferred = null; 36 | this.pending = false; 37 | this.destroyed = false; 38 | 39 | this._phantom = null; 40 | this._applyPending = false; 41 | this._impressTimeoutId = null; 42 | this._impressAttemptTimeoutId = null; 43 | this._phantomRestartDelayId = null; 44 | } 45 | 46 | Instance.prototype = { 47 | 48 | destroy: function() { 49 | if (this.deferred) { 50 | this.deferred.reject('Instance destroyed'); 51 | } 52 | this.deferred = null; 53 | this._phantom && this._phantom.destroy(); 54 | this._phantom = null; 55 | 56 | this._cancelPhantomRestartDelay(); 57 | this._cancelTtlTimeout(); 58 | this._cancelImpressTimeout(); 59 | this._abortImpressAttempt(); 60 | 61 | this.destroyed = true; 62 | }, 63 | 64 | prepare: function() { 65 | this._phantomStart(); 66 | }, 67 | 68 | run: function(deferred) { 69 | var 70 | self = this, 71 | timeout; 72 | 73 | if (this.pending) { 74 | deferred.reject('Fail: Impress process already pending. Page', deferred.url, 'rejected'); 75 | return; 76 | } 77 | if (this.destroyed) { 78 | deferred.reject('Fail: Impress instance already destroyed. Page', deferred.url, 'rejected'); 79 | return; 80 | } 81 | 82 | deferred.startExecution(); 83 | 84 | timeout = Math.min(deferred.getRemainedTimeout() - TIMEOUT_APPENDIX, this.timeout); 85 | timeout = Math.max(timeout, 0); 86 | 87 | this._impressTimeoutId = setTimeout( 88 | function() { 89 | deferred.reject('Fail: Page "' + deferred.url + '" impress timeout ' + timeout); 90 | }, 91 | timeout 92 | ); 93 | 94 | this.pending = true; 95 | this.deferred = deferred; 96 | 97 | this._phantomStart(); 98 | this._apply(); 99 | 100 | deferred.promise(function(err) { 101 | self._cancelImpressTimeout(); 102 | if (err) { 103 | self._phantomRestart(); 104 | } 105 | self.pending = false; 106 | self.deferred = null; 107 | }); 108 | }, 109 | 110 | _phantomStart: function() { 111 | if (!this._phantom) { 112 | this._phantomRestart(); 113 | } 114 | }, 115 | 116 | _phantomRestart: function() { 117 | var 118 | delay = null, 119 | self = this, 120 | killed, 121 | phantom; 122 | 123 | if (this._phantom) { 124 | if (this._phantom.isStarting()) { 125 | return; 126 | } 127 | 128 | delay = this._phantom.startTime && this.minPhantomRestartInterval - (Date.now() - this._phantom.startTime); 129 | killed = this._phantom.isKilled(); 130 | phantom = this._phantom; 131 | this._phantom = null; 132 | phantom.destroy(); 133 | if (!killed) { 134 | return; 135 | } 136 | } 137 | 138 | if (this._phantomRestartDelayId) { 139 | return; 140 | } 141 | 142 | if (delay && delay > 0) { 143 | this._phantomRestartDelayId = setTimeout(run, delay); 144 | } 145 | else { 146 | run(); 147 | } 148 | 149 | function run() { 150 | self._cancelPhantomRestartDelay(); 151 | self._phantom = new PhantomInstance(self.portPool, self.options); 152 | self._phantom.run(); 153 | self._phantom.readyPromise(function() { 154 | self._resetTtlTimeout(); 155 | self._apply(); 156 | }); 157 | self._phantom.closePromise(function() { 158 | logger.log('Info: Phantom instance restart'); 159 | self._cancelTtlTimeout(); 160 | self._phantomRestart(); 161 | }); 162 | } 163 | 164 | }, 165 | 166 | _cancelPhantomRestartDelay: function() { 167 | this._phantomRestartDelayId && clearTimeout(this._phantomRestartDelayId); 168 | this._phantomRestartDelayId = null; 169 | }, 170 | 171 | _resetTtlTimeout: function() { 172 | var 173 | self = this; 174 | 175 | this._cancelTtlTimeout(); 176 | this._ttlTimeoutId = setTimeout( 177 | function() { 178 | if (self.deferred) { 179 | self.deferred.promise(function() { 180 | self._phantomRestart(); 181 | }); 182 | } 183 | else { 184 | self._phantomRestart(); 185 | } 186 | }, 187 | this.ttl + Math.ceil(Math.random() * this.ttl * PHANTOM_TTL_SPREAD_FACTOR) 188 | ) 189 | }, 190 | 191 | _cancelTtlTimeout: function() { 192 | this._ttlTimeoutId && clearTimeout(this._ttlTimeoutId); 193 | this._ttlTimeoutId = null; 194 | }, 195 | 196 | 197 | _apply: function() { 198 | var 199 | self = this, 200 | deferred, 201 | url, 202 | finished = false, 203 | requestOptions, 204 | requestTime = Date.now(), 205 | responseTime = null, 206 | unlink, 207 | req 208 | ; 209 | 210 | if (!this.deferred || this._applyPending || !this._phantom || !this._phantom.isReady()) { 211 | return; 212 | } 213 | this._applyPending = true; 214 | 215 | deferred = this.deferred; 216 | url = deferred.url; 217 | 218 | unlink = deferred.promise(function() { 219 | finished = true; 220 | self._applyPending = false; 221 | self._abortImpressAttempt(); 222 | }); 223 | 224 | this._impressAttemptTimeoutId = setTimeout( 225 | function() { 226 | logger.error('Impress attempt timeout', self.attemptTimeout); 227 | retry(); 228 | }, 229 | this.attemptTimeout 230 | ); 231 | 232 | function retry() { 233 | self._applyPending = false; 234 | self._abortImpressAttempt(); 235 | if (finished) { 236 | return; 237 | } 238 | finished = true; 239 | self._phantomRestart(); 240 | unlink && unlink(); 241 | } 242 | 243 | function done(result) { 244 | self._abortImpressAttempt(); 245 | if (finished) { 246 | return; 247 | } 248 | self._performImpressReport(result); 249 | finished = true; 250 | 251 | if (!result.contentType || /html/i.test(result.contentType)) { 252 | self.htmlFilter.apply(result.content, function(err, content) { 253 | if (err) { 254 | logger.error('Error: Html filter error', err); 255 | } 256 | else { 257 | result.content = content; 258 | } 259 | deferred.finish(null, result); 260 | }); 261 | } 262 | else { 263 | deferred.finish(null, result); 264 | } 265 | } 266 | 267 | requestOptions = { 268 | port: this._phantom.serverPort, 269 | path: '/?' + qs.stringify({ url: url }) 270 | }; 271 | 272 | try { 273 | req = http.request(requestOptions, function(res) { 274 | var 275 | result = ''; 276 | if (finished) { 277 | return; 278 | } 279 | 280 | if (res.statusCode != 200) { 281 | logger.error('Error request to impress server with http status code', res.statusCode); 282 | retry(); 283 | return; 284 | } 285 | res.on('error', function(error) { 286 | if (finished) { 287 | return; 288 | } 289 | logger.error('Impress request error', error); 290 | retry(); 291 | }); 292 | 293 | res.on('data', function(chunk) { 294 | if (finished) { 295 | return; 296 | } 297 | if (!responseTime) { 298 | responseTime = Date.now() - requestTime; 299 | } 300 | result = result 301 | ? result + chunk 302 | : chunk; 303 | }); 304 | 305 | res.on('end', function() { 306 | var 307 | error = null; 308 | 309 | self._impressRequest = null; 310 | 311 | if (finished) { 312 | return; 313 | } 314 | try { 315 | result = JSON.parse(result); 316 | } 317 | catch(e) { 318 | error = 'Could not parse impress result. Expected error: ' + e; 319 | } 320 | if (!error && !result.ok) { 321 | logger.error('ERROR impress page "' + url + '":', (result.errors || []).join()); 322 | self._performImpressReport(result); 323 | retry(); 324 | return; 325 | } 326 | if (!error) { 327 | if (!result.contentType || /html/i.test(result.contentType)) { 328 | error = self._validateHtmlContentAndGetValidationError(result.content); 329 | } 330 | } 331 | 332 | if (error) { 333 | logger.error('ERROR page "' + url + '" could not be impressed. Try next attempt.', error); 334 | retry(); 335 | } 336 | else { 337 | logger.log('OK page', result.httpStatusCode, '"' + url + '"', 'in time', responseTime, 'ms'); 338 | done(result); 339 | } 340 | }); 341 | 342 | }); 343 | req.on('error', function(error) { 344 | if (finished) { 345 | return; 346 | } 347 | logger.error('Impress request error', error); 348 | retry(); 349 | }); 350 | req.end(); 351 | 352 | this._impressRequest = req; 353 | } 354 | catch(error) { 355 | logger.error('Impress request error', error); 356 | retry(); 357 | } 358 | 359 | }, 360 | 361 | _abortImpressAttempt: function() { 362 | try { 363 | this._impressRequest && this._impressRequest.abort(); 364 | } 365 | catch(e) { 366 | logger.error('Could not abort impress request', e); 367 | } 368 | this._impressRequest = null; 369 | this._impressAttemptTimeoutId && clearTimeout(this._impressAttemptTimeoutId); 370 | this._impressAttemptTimeoutId = null; 371 | }, 372 | 373 | _cancelImpressTimeout: function() { 374 | this._impressTimeoutId && clearTimeout(this._impressTimeoutId); 375 | this._impressTimeoutId = null; 376 | }, 377 | 378 | _validateHtmlContentAndGetValidationError: function(content) { 379 | if (!/^\s*(\s*$/i.test(content)) { 383 | return 'Could not found close html tag'; 384 | } 385 | return null; 386 | }, 387 | 388 | _performImpressReport: function(result) { 389 | var 390 | url = this.deferred.url; 391 | 392 | if (this.warnings && result.warnings && result.warnings.length > 0) { 393 | logger.warn('IMPRESS WARNINGS for page "' + (result.url || url) + '":\n', result.warnings.join('\n')); 394 | } 395 | if (this.notices && result.notices && result.notices.length > 0) { 396 | logger.info('IMPRESS NOTICES for page "' + (result.url || url) + '":\n', result.notices.join('\n')); 397 | } 398 | } 399 | 400 | 401 | 402 | }; 403 | -------------------------------------------------------------------------------- /phantomjs/lib/Page.js: -------------------------------------------------------------------------------- 1 | const 2 | DEFAULT_READY_CHECK_INTERVAL = 50, 3 | DEFAULT_TIMEOUT = 10000, 4 | AUTO_READY_CHECK_DELAY = 1000 5 | ; 6 | 7 | var 8 | EventEmitter = require('./EventEmitter'), 9 | PageContentPerformer = require('./PageContentPerformer'), 10 | ResourceFilter = require('./ResourceFilter'), 11 | inherit = require('./inherit'), 12 | webPage = require('webpage') 13 | ; 14 | 15 | module.exports = Page; 16 | 17 | function Page(options) { 18 | EventEmitter.call(this); 19 | 20 | options = options || {}; 21 | 22 | this.options = options; 23 | this.notices = options.notices; 24 | this.warnings = options.warnings; 25 | this.resourcesLogging = options.resourcesLogging; 26 | 27 | this._init(); 28 | 29 | this._startTime = null; 30 | this._finished = false; 31 | this._destroyed = false; 32 | 33 | this.url = options.url || ''; 34 | 35 | this._redirectUrlList = []; 36 | this._location = null; 37 | 38 | this._outputBuffer = ''; 39 | this._errorBuffer = []; 40 | this._warningBuffer = []; 41 | this._noticeBuffer = []; 42 | 43 | this._httpStatusCode = null; 44 | this._httpHeaders = null; 45 | this._contentType = null; 46 | this._ok = false; 47 | 48 | this._pageWindowLoaded = false; 49 | this._pageUrlMissedFragmentFixing = false; 50 | this._pageUrlMissedFragmentRedirectUrl = null; 51 | 52 | this._resourceResponses = {}; 53 | this._abortedResources = []; 54 | this._readyCheckInterval = DEFAULT_READY_CHECK_INTERVAL; 55 | this._timeout = options.timeout || DEFAULT_TIMEOUT; 56 | } 57 | 58 | inherit(Page, EventEmitter, { 59 | 60 | _setNetworkReplyErrorCode: function(errorCode) { 61 | // @see http://doc.qt.io/qt-5/qnetworkreply.html 62 | switch(errorCode) { 63 | case 203: 64 | this._httpStatusCode = 404; 65 | break; 66 | case 201: 67 | this._httpStatusCode = 401; 68 | break; 69 | case 0: 70 | this._httpStatusCode = 200; 71 | break; 72 | case 4: 73 | this._httpStatusCode = 504; 74 | break; 75 | default: 76 | this._httpStatusCode = 500; 77 | } 78 | }, 79 | 80 | _exit: function() { 81 | var 82 | finished = this._finished; 83 | this._finish(); 84 | if (!finished) { 85 | this._notice('Page execution time:', Date.now() - this._startTime, 'ms'); 86 | this.emit('exit', this.getResult()); 87 | } 88 | }, 89 | 90 | _exitOk: function() { 91 | if (this._finished) { 92 | return; 93 | } 94 | this._ok = true; 95 | if (!this._httpStatusCode) { 96 | this._httpStatusCode = 200; 97 | } 98 | this._exit(); 99 | }, 100 | 101 | _exitFail: function() { 102 | if (this._finished) { 103 | return; 104 | } 105 | this._ok = false; 106 | this._exit(); 107 | }, 108 | 109 | _output: function(/* ...values*/) { 110 | var 111 | args = Array.prototype.slice.call(arguments); 112 | this._outputBuffer += args.join(' '); 113 | }, 114 | 115 | _error: function(/* ...values*/) { 116 | var 117 | args = Array.prototype.slice.call(arguments); 118 | this._errorBuffer.push(args.join(' ')); 119 | }, 120 | 121 | _warning: function(/* ...values*/) { 122 | var 123 | args = Array.prototype.slice.call(arguments); 124 | this._warningBuffer.push(args.join(' ')); 125 | }, 126 | 127 | _notice: function(/* ...values*/) { 128 | var 129 | args = Array.prototype.slice.call(arguments); 130 | this._noticeBuffer.push(args.join(' ')); 131 | }, 132 | 133 | _init: function() { 134 | this._webPageInit(); 135 | this.resourceFilter = new ResourceFilter(this.options); 136 | }, 137 | 138 | _webPageInit: function() { 139 | this.page = webPage.create(); 140 | this._webPageConfigure(); 141 | this._webPageInitListeners(); 142 | }, 143 | 144 | _webPageConfigure: function() { 145 | this.page.settings.userAgent = 'Prerender Impress Impresser'; 146 | this.page.settings.loadImages = false; 147 | this.page.settings.clearMemoryCaches = true; 148 | }, 149 | 150 | _webPageInitListeners: function() { 151 | this._webPageAddErrorListener(); 152 | this._webPageAddResourceErrorListener(); 153 | this._webPageAddResourceRequestedListener(); 154 | this._webPageAddResourceReceivedListener(); 155 | this._webPageAddInitializedListener(); 156 | this._webPageAddCallbackListener(); 157 | this._webPageAddConsoleMessageListener(); 158 | }, 159 | 160 | _webPageAddErrorListener: function() { 161 | var 162 | self = this; 163 | 164 | this.page.onError = function(message, trace) { 165 | var 166 | messageBuilder = [ 167 | 'JS ERROR:', 168 | message 169 | ]; 170 | if (trace && trace.length) { 171 | messageBuilder.push('Trace:'); 172 | trace.forEach(function(step) { 173 | messageBuilder.push( 174 | ' -> ' 175 | + step.file 176 | + ': ' 177 | + step.line 178 | + (step.function 179 | ? ' (in function "' + step.function + '")' 180 | : '') 181 | ); 182 | }); 183 | } 184 | self._warning(messageBuilder.join('\n')); 185 | }; 186 | 187 | }, 188 | 189 | _webPageAddResourceErrorListener: function() { 190 | var 191 | self = this; 192 | 193 | this.page.onResourceError = function(resourceError) { 194 | var 195 | url = self.url; 196 | 197 | if (self._abortedResources.indexOf(resourceError.id) != -1 || self._pageUrlMissedFragmentFixing) { 198 | return; 199 | } 200 | 201 | self._warning( 202 | 'RESOURCE ERROR:', 203 | 'Unable to load resource (#' + resourceError.id, 204 | 'URL:' + resourceError.url + ')', 205 | 'Error code: ' + resourceError.errorCode + '.', 206 | 'Description: ' + resourceError.errorString 207 | ); 208 | if (resourceError.url == url || self._redirectUrlList.indexOf(resourceError.url) != -1) { 209 | self._setNetworkReplyErrorCode(resourceError.errorCode); 210 | } 211 | self._resourceResponses[resourceError.id] = resourceError; 212 | self._pageReadyCheck(); 213 | }; 214 | 215 | }, 216 | 217 | _webPageAddResourceRequestedListener: function() { 218 | var 219 | self = this; 220 | 221 | this.page.onResourceRequested = function(requestData, networkRequest) { 222 | var 223 | url = self.url; 224 | 225 | if ( (!self.resourceFilter.check(requestData.url)) && requestData.url != url && self._redirectUrlList.indexOf(requestData.url) == -1 ) { 226 | self._abortedResources.push(requestData.id); 227 | networkRequest.abort(); 228 | } 229 | else { 230 | self._resourceResponses[requestData.id] = null; 231 | self._pageReadyCheck(); 232 | } 233 | }; 234 | }, 235 | 236 | _webPageAddResourceReceivedListener: function() { 237 | var 238 | self = this; 239 | 240 | this.page.onResourceReceived = function(response) { 241 | var 242 | url = self.url, 243 | status; 244 | 245 | if (response.url == url || self._redirectUrlList.indexOf(response.url) != -1) { 246 | self._contentType = response.contentType; 247 | if (response.stage == 'start') { 248 | status = self._detectResourceUrlMissedFragment(response); 249 | if (response.redirectURL) { 250 | self._redirectUrlList.push( 251 | status.detected 252 | ? status.fixedRedirectUrl 253 | : response.redirectURL 254 | ); 255 | } 256 | if (status.detected) { 257 | self._resourceResponses[response.id] = response; 258 | self._fixPageUrlMissedFragment(status.fixedRedirectUrl); 259 | return; 260 | } 261 | } 262 | } 263 | 264 | if (response.stage == 'end') { 265 | self._resourceResponses[response.id] = response; 266 | if (self.resourcesLogging && response.url) { 267 | self._notice('Resource received:', response.id, response.url); 268 | } 269 | } 270 | self._pageReadyCheck(); 271 | }; 272 | }, 273 | 274 | _webPageAddInitializedListener: function() { 275 | var 276 | self = this; 277 | 278 | this.page.onInitialized = function() { 279 | self._pageWindowLoaded = false; 280 | self._webPageAddOnLoadCallback(); 281 | self._webPageClearPersistentData(); 282 | }; 283 | }, 284 | 285 | _webPageAddCallbackListener: function() { 286 | var 287 | self = this; 288 | 289 | this.page.onCallback = function(data) { 290 | data = data || {}; 291 | if (data.load) { 292 | self._pageWindowLoaded = true; 293 | self._pageReadyCheck(); 294 | } 295 | }; 296 | }, 297 | 298 | _webPageAddConsoleMessageListener: function() { 299 | var 300 | self = this; 301 | 302 | this.page.onConsoleMessage = function(message, line, sourceId) { 303 | self._notice( 304 | 'CONSOLE: ' + message + 305 | (line || line === 0 || sourceId 306 | ? ' (from line #' + line + ' in "' + sourceId + '")' 307 | : '') 308 | ); 309 | }; 310 | }, 311 | 312 | _webPageAddOnLoadCallback: function() { 313 | try { 314 | this.page.evaluate(function() { 315 | window.addEventListener('load', function() { 316 | if (typeof window.callPhantom === 'function') { 317 | window.callPhantom({ load: true }); 318 | } 319 | }, false); 320 | }); 321 | } 322 | catch(e) { 323 | this._error('Could not evaluate js on page', this.url, e); 324 | this._exitFail(); 325 | } 326 | }, 327 | 328 | _detectResourceUrlMissedFragment: function(resource) { 329 | var 330 | result = {}, 331 | position, 332 | fragment, 333 | url = resource.url, 334 | redirectUrl = resource.redirectURL, 335 | fixedRedirectUrl; 336 | 337 | if (url && redirectUrl) { 338 | position = url.indexOf('#'); 339 | if (position != -1) { 340 | fragment = url.slice(position); 341 | 342 | result.detected = fragment.length > 1 343 | ? redirectUrl.slice(-fragment.length) !== fragment 344 | : false; 345 | 346 | if (result.detected) { 347 | position = redirectUrl.indexOf('#'); 348 | fixedRedirectUrl = position != -1 349 | ? redirectUrl.slice(0, position) 350 | : redirectUrl; 351 | fixedRedirectUrl += fragment; 352 | 353 | result.fixedRedirectUrl = fixedRedirectUrl; 354 | result.redirectUrl = redirectUrl; 355 | } 356 | } 357 | } 358 | 359 | return result; 360 | }, 361 | 362 | _webPageClearPersistentData: function() { 363 | try { 364 | this.page.clearCookies(); 365 | this.page.evaluate(function() { 366 | try { 367 | localStorage.clear(); 368 | } 369 | catch(e) {} 370 | try { 371 | sessionStorage.clear(); 372 | } 373 | catch(e) {} 374 | }); 375 | } 376 | catch(e) { 377 | this._warning('Could not clear persistent data for page', this.url, e); 378 | } 379 | }, 380 | 381 | _pageReadyCheck: function() { 382 | var 383 | self = this, 384 | resourcesPending, 385 | cancelReadyDelayTimeout = true; 386 | 387 | if (this._finished || this._destroyed) { 388 | return; 389 | } 390 | 391 | if (!this._hasReadyFlag()) { 392 | if (this._pageWindowLoaded) { 393 | resourcesPending = false; 394 | Object.keys(this._resourceResponses) 395 | .forEach(function(key) { 396 | if (!self._resourceResponses[key]) { 397 | resourcesPending = true; 398 | } 399 | }); 400 | if (!resourcesPending) { 401 | cancelReadyDelayTimeout = false; 402 | if (!self._readyCheckDelayTimeoutId) { 403 | self._readyCheckDelayTimeoutId = setTimeout(function() { 404 | if (!self._hasReadyFlag()) { 405 | self._warning('WARNING: Prerender or impress ready flags not defined'); 406 | self._success(); 407 | } 408 | }, AUTO_READY_CHECK_DELAY) 409 | } 410 | } 411 | } 412 | } 413 | else if (this._getReadyFlag()) { 414 | this._success(); 415 | } 416 | 417 | if (cancelReadyDelayTimeout) { 418 | this._cancelReadyCheckDelayTimeout(); 419 | } 420 | }, 421 | 422 | _cancelReadyCheckDelayTimeout: function() { 423 | this._readyCheckDelayTimeoutId && clearTimeout(this._readyCheckDelayTimeoutId); 424 | this._readyCheckDelayTimeoutId = null; 425 | }, 426 | 427 | _startTimeout: function() { 428 | var 429 | self = this; 430 | 431 | if (this._finished || this._destroyed) { 432 | return; 433 | } 434 | 435 | this._timeoutId = setTimeout( 436 | function() { 437 | var 438 | resourceResponses = self._resourceResponses, 439 | pendingResourcesCount 440 | ; 441 | 442 | pendingResourcesCount = Object.keys(resourceResponses) 443 | .filter(function(key) { 444 | return !resourceResponses[key]; 445 | }) 446 | .length; 447 | 448 | self._error( 449 | 'TIMEOUT:', self._timeout, 450 | 'Has ready flag:', self._hasReadyFlag(), 451 | 'Ready flag value:', Boolean(self._getReadyFlag()), 452 | 'Page window loaded:', self._pageWindowLoaded, 453 | 'Pending resource count:', pendingResourcesCount 454 | ); 455 | self._exitFail(); 456 | }, 457 | self._timeout 458 | ); 459 | }, 460 | 461 | _cancelTimeout: function() { 462 | clearTimeout(this._timeoutId); 463 | }, 464 | 465 | _startReadyFlagChecker: function() { 466 | var 467 | self = this; 468 | 469 | if (this._finished || this._destroyed) { 470 | return; 471 | } 472 | 473 | this._checkerIntervalId = setInterval(function() { 474 | if (self._hasReadyFlag()) { 475 | self._pageReadyCheck(); 476 | } 477 | }, this._readyCheckInterval); 478 | }, 479 | 480 | _stopReadyFlagChecker: function() { 481 | clearInterval(this._checkerIntervalId); 482 | }, 483 | 484 | _success: function() { 485 | if (this._finished) { 486 | return; 487 | } 488 | this._performPageContent(); 489 | this._exitOk(); 490 | }, 491 | 492 | _performPageContent: function() { 493 | var 494 | performer; 495 | 496 | try { 497 | this._location = this._getLocation(); 498 | performer = new PageContentPerformer(this.page.content); 499 | this._output(performer.getContent()); 500 | if (performer.hasMetaHttpStatusCode()) { 501 | this._httpStatusCode = performer.getMetaHttpStatusCode(); 502 | } 503 | if (performer.hasMetaHttpHeaders()) { 504 | this._httpHeaders = performer.getMetaHttpHeaders(); 505 | } 506 | } 507 | catch(e) { 508 | this._error('Could not get page content', e); 509 | this._exitFail(); 510 | } 511 | }, 512 | 513 | _getReadyFlag: function() { 514 | try { 515 | return this.page.evaluate(function() { 516 | return window.prerenderReady || window.impressReady || window.impresserReady; 517 | }); 518 | } 519 | catch(e) { 520 | this._error('Could not get prerender or impress ready flags value from page', e); 521 | this._exitFail(); 522 | } 523 | return null; 524 | }, 525 | 526 | _hasReadyFlag: function() { 527 | try { 528 | return this.page.evaluate(function() { 529 | return typeof window.prerenderReady != 'undefined' 530 | || typeof window.impressReady != 'undefined' 531 | || typeof window.impresserReady != 'undefined'; 532 | }); 533 | } 534 | catch(e) { 535 | this._error('Could not get prerender or impress ready flags information from page', e); 536 | this._exitFail(); 537 | } 538 | return null; 539 | }, 540 | 541 | _getLocation: function() { 542 | try { 543 | return this.page.evaluate(function() { 544 | return window.location.href; 545 | }); 546 | } 547 | catch(e) { 548 | this._warning('Could not get page location', e); 549 | } 550 | return null; 551 | }, 552 | 553 | _fixPageUrlMissedFragment: function(fixedUrl) { 554 | this._pageUrlMissedFragmentFixing = true; 555 | this._pageUrlMissedFragmentRedirectUrl = fixedUrl; 556 | this._stop(); 557 | }, 558 | 559 | _finish: function() { 560 | this._finished = true; 561 | this._stop(); 562 | }, 563 | 564 | _stop: function() { 565 | this._cancelReadyCheckDelayTimeout(); 566 | this._cancelTimeout(); 567 | this._stopReadyFlagChecker(); 568 | this.page.stop(); 569 | }, 570 | 571 | open: function(url) { 572 | var 573 | self = this; 574 | 575 | url = url || this.url; 576 | this._startTime = Date.now(); 577 | 578 | try { 579 | this._startTimeout(); 580 | if (this.page.clearMemoryCache) { 581 | try { 582 | this.page.clearMemoryCache(); 583 | } 584 | catch(e) { 585 | this._warning('Could not get clear memory cache for page', url, e); 586 | } 587 | } 588 | this._notice('Open', url); 589 | this.page.open(url, function(status) { 590 | if (self._pageUrlMissedFragmentFixing) { 591 | self._pageUrlMissedFragmentFixing = false; 592 | self._pageWindowLoaded = false; 593 | self._resourceResponses = {}; 594 | self._abortedResources = []; 595 | self.open(self._pageUrlMissedFragmentRedirectUrl); 596 | return; 597 | } 598 | if (status !== 'success') { 599 | self._error('Fail to load page'); 600 | self._exitFail(); 601 | return; 602 | } 603 | 604 | self._startReadyFlagChecker(); 605 | }); 606 | } 607 | catch(e) { 608 | this._error(e); 609 | this._exitFail(); 610 | } 611 | }, 612 | 613 | getResult: function() { 614 | return { 615 | url: this.url, 616 | redirectUrlList: this._redirectUrlList, 617 | location: this._location || null, 618 | ok: this._ok, 619 | httpStatusCode: this._httpStatusCode, 620 | httpHeaders: this._httpHeaders, 621 | contentType: this._contentType, 622 | content: this._ok 623 | ? this._outputBuffer 624 | : undefined, 625 | errors: !this._ok 626 | ? this._errorBuffer 627 | : undefined, 628 | warnings: this.warnings && this._ok 629 | ? this._warningBuffer 630 | : undefined, 631 | notices: this.notices && this._ok 632 | ? this._noticeBuffer 633 | : undefined 634 | }; 635 | }, 636 | 637 | destroy: function() { 638 | this._destroyed = true; 639 | this._redirectUrlList = null; 640 | this._httpHeaders = null; 641 | this._httpStatusCode = null; 642 | this._outputBuffer = null; 643 | this._errorBuffer = null; 644 | this._warningBuffer = null; 645 | this._noticeBuffer = null; 646 | this._resourceResponses = null; 647 | this._abortedResources = null; 648 | this._pageUrlMissedFragmentRedirectUrl = null; 649 | this._pageUrlMissedFragmentFixing = false; 650 | this.page.close(); 651 | } 652 | 653 | 654 | }); 655 | --------------------------------------------------------------------------------