├── .editorconfig ├── .gitignore ├── .npmignore ├── .travis.yml ├── README.md ├── app.js ├── config.js ├── config.sample.yml ├── custom.js ├── default.yml ├── logs └── .gitkeep ├── package.json ├── phantom.js ├── phantom ├── querystring.js └── url.js ├── request.js ├── runner.js └── test ├── fixtures └── test_config.yml └── request_test.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | 8 | [*] 9 | 10 | # Change these settings to your own preference 11 | indent_style = space 12 | indent_size = 2 13 | 14 | # It's recommended to keep these unchanged 15 | end_of_line = lf 16 | charset = utf-8 17 | trim_trailing_whitespace = true 18 | insert_final_newline = true 19 | 20 | [*.md] 21 | trim_trailing_whitespace = false -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | config.yml 3 | logs/phoenix-* 4 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | test/ 2 | node_modules/ 3 | logs/ 4 | .editorconfig 5 | .travis.yml 6 | config.yml -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '0.11' 4 | - '0.10' 5 | notifications: 6 | slack: 7 | secure: esKBfmWQDALSIPcQ++1HELpJL0CX3v7xc56Vz3KVLAS/7IWve5WWivjLamBqi4TWqGqJjU3lAYtSrDAX4fwtUK6yZRWVbz12xFPAnO+GHWXkxgSMYkNnR3V/PML6NokCLaJGtwZnYPJFl2BZdJDlcJEyni8CnRYNSSHH4FKlMiI= 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Phoenix [![Build Status](https://travis-ci.org/sdslabs/phoenix.svg?branch=master)](https://travis-ci.org/sdslabs/phoenix) 2 | 3 | Redis based phantomjs queue 4 | 5 | ## Instructions 6 | 7 | * Install `redis` (https://github.com/antirez/redis) 8 | * Install npm modules(`$ npm install`) 9 | * Run tests (`npm test`) 10 | 11 | ## Usage 12 | 13 | By default it subscribes to a single channel specified in the config 14 | Before you start using the application, assure that a redis server is running (`$ redis-server`). 15 | Phoenix will refuse to start if redis is down 16 | 17 | * Run app.js (`$ node app.js`) 18 | 19 | This trigger the node application to spawn a new `phantomjs` child process and opens the website. 20 | 21 | You can see the `config.sample.yml` for a sample configuration. 22 | Note that some options are incompatible with each other, for eg 23 | you can't send a request body in a GET request. 24 | 25 | Do not edit default.yml unless you are working on phoenix itself. 26 | 27 | ## Injection 28 | By setting the `js` key in config.yml you can run code on the loaded web page before the page is 29 | loaded itself. 30 | 31 | ## Logging 32 | phoenix logs every phantomjs instance. For every instance that is created, a new temporary 33 | directory is created in the logs directory. The config passed to phantomjs is stored in the `config.json` 34 | file. The output from the phantomjs instance is stored in two files: `browser.log` and `page.log`. 35 | 36 | `browser.log` is the higher level log file, which records events and errors made by the browser instance. 37 | Such as Injection events and final status of the page load. 38 | 39 | `page.log` holds the log from the page context. This includes any console.log statements made from the page 40 | and any alert/confirm/prompt calls as well. 41 | 42 | ## Usage 43 | Instead of cloning and running the entire repo for each project, you can use the npm package 44 | (called `phantom-phoenix`), which has its own binary called `phoenix` which you can run. 45 | 46 | To run `phoenix` in a directory, the following conditions must be true: 47 | 48 | - a valid `config.yml` file must exist in the directory 49 | 50 | After that you can append messages to the list specified in the config, and phoenix will 51 | start runners for each of your requests once you publish the id 52 | 53 | You can append two things to the list: 54 | 55 | - A complete valid http/https url. This replaces the url provided in the config 56 | - A partial querystring (such as `a=1&b=2`). This overrides and merges with existing query params in the config 57 | 58 | This way, you can send an `id=1` and phoenix will open the correct url. 59 | 60 | ## Redis 61 | 62 | Instead of just using a redis pubsub, we use a hybrid pubsub+list model as queue to store logs. Instead 63 | of directly publishing the request to the channel, you push it to a list, and then publish the index 64 | of the just pushed item on the list. The list is maintained at `$channel:queue`, where $channel is the queue 65 | name specified in the config. 66 | 67 | For, eg if the channel name is the default (`phoenix`), you do the following: 68 | 69 | - run `phoenix` 70 | - `redis-cli` 71 | 72 | ``` 73 | RPUSH phoenix:queue "http://google.com"` 74 | (integer) 8 75 | PUBLISH phoenix 7 76 | ``` 77 | 78 | phoenix will pick this up and give something like following as output: 79 | 80 | ``` 81 | START: phoenix-11634lNeXLbHqhRM3 82 | STOP : phoenix-11634lNeXLbHqhRM3 83 | ``` 84 | -------------------------------------------------------------------------------- /app.js: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ':' //; exec "$(command -v nodejs || command -v node)" "$0" "$@" 3 | var listen = require('redis').createClient(); 4 | var redis = require('redis').createClient(); 5 | var config = require('./config')(); 6 | var request = require('./request') 7 | var settings = request(config.request); 8 | var clone = require('clone'); 9 | var runner = require('./runner'); 10 | 11 | listen.on('error', function(error) { 12 | // We die with redis 13 | console.log(error); 14 | process.exit(1); 15 | }) 16 | 17 | listen.on('message', function(channel, request_id) { 18 | 19 | redis.lrange(config.queue+':queue', request_id, request_id, function(err, message){ 20 | var phantomConfig = clone(settings); 21 | phantomConfig.url = request.updateUrl(phantomConfig, message[0]); 22 | runner(config, phantomConfig, function (err, phantomId){ 23 | if(err){ 24 | console.error(err); 25 | } 26 | else{ 27 | redis.set(config.queue+':log:'+request_id, phantomId); 28 | } 29 | }); 30 | }); 31 | 32 | }); 33 | 34 | // channel subscriptions 35 | listen.subscribe(config.queue); 36 | -------------------------------------------------------------------------------- /config.js: -------------------------------------------------------------------------------- 1 | var yaml = require('js-yaml'); 2 | var merge = require('merge'); 3 | var fs = require('fs'); 4 | var path = require('path'); 5 | var sh = require("shelljs"); 6 | var pathIsAbsolute = require('path-is-absolute'); 7 | 8 | var config = function(config_file) { 9 | var cwd = sh.pwd(); 10 | if(typeof config_file === "undefined"){ 11 | config_file = path.join(cwd, './config.yml'); 12 | } 13 | else if(pathIsAbsolute(config_file)){ 14 | config_file = config_file; 15 | } 16 | else{ 17 | config_file = path.join(cwd, config_file); 18 | } 19 | try { 20 | var defPath = path.join(__dirname, './default.yml'); 21 | var def = yaml.safeLoad(fs.readFileSync(defPath, 'utf8')); 22 | var patch = yaml.safeLoad(fs.readFileSync(config_file, 'utf8')); 23 | var conf = merge.recursive(def, patch); 24 | return conf; 25 | } catch (e) { 26 | console.log(e); 27 | console.log("YAML parser error"); 28 | process.exit(1); 29 | } 30 | }; 31 | 32 | module.exports = config; 33 | -------------------------------------------------------------------------------- /config.sample.yml: -------------------------------------------------------------------------------- 1 | # Name of the redis queue to listen to 2 | queue: phoenix 3 | request: 4 | url: "http://google.com/" 5 | # This is only for POST, PATCH, PUT requests 6 | body: "This is the complete body that you wish to send" 7 | data: 8 | this: is 9 | an: object 10 | which: will 11 | be: serialized 12 | # This is the query string appended to the url 13 | query: 14 | phantom: 1 15 | # removes the referer header when a redirect happens 16 | # Currently not implemented 17 | removeRefererHeader: false 18 | #In case you want to send a json body 19 | #This is automatically serialized and adds the content type headers for json 20 | json: 21 | a: b 22 | # This is just basic auth 23 | auth: 24 | username: username 25 | password: password 26 | headers: 27 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 28 | Accept-Encoding: gzip, deflate, sdch 29 | Accept-Language: en-US,en;q=0.8,hi;q=0.6,sv;q=0.4 30 | Cache-Control: max-age=0 31 | Connection: keep-alive 32 | DNT: 1 33 | User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2342.2 Safari/537.36 34 | # This is custom js to run in the proper context on the webpage _before_ it loads 35 | js: custom.js 36 | -------------------------------------------------------------------------------- /custom.js: -------------------------------------------------------------------------------- 1 | var runInContext = function(){ 2 | // This code will run in the page context 3 | // Before anything else is loaded 4 | navigator.platform = "phoenix"; 5 | } 6 | 7 | exports.run = function(){ 8 | return runInContext; 9 | }; 10 | -------------------------------------------------------------------------------- /default.yml: -------------------------------------------------------------------------------- 1 | request: 2 | method: "GET" 3 | encoding: "utf8" 4 | query: {} 5 | # removes the referer header when a redirect happens 6 | removeRefererHeader: false 7 | headers: 8 | Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" 9 | Accept-Language: "en-US,en;q=0.8,hi;q=0.6,sv;q=0.4" 10 | Cache-Control: "max-age=0" 11 | Connection: "keep-alive" 12 | DNT: "1" 13 | # Top one at https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ 14 | # As on 26 Mar 2015 15 | User-Agent: "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.115 Safari/537.36" 16 | # This is custom js to run in the proper context on the webpage _before_ it loads 17 | js: custom.js 18 | -------------------------------------------------------------------------------- /logs/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sdslabs/phoenix/e57554d11e23e28ccf764d4e66e5403c72afae3a/logs/.gitkeep -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "phantom-phoenix", 3 | "description": "Redis based phantomjs queue and runner", 4 | "keywords": [ 5 | "phantom", 6 | "phantomjs", 7 | "queue", 8 | "pubsub", 9 | "tasks", 10 | "jobs", 11 | "runner", 12 | "task-runner" 13 | ], 14 | "homepage": "https://github.com/sdslabs/phoenix", 15 | "bugs": { 16 | "url" : "https://github.com/sdslabs/phoenix/issues" 17 | }, 18 | "author": { 19 | "name" : "Abhay Rana", 20 | "email" : "nemo@sdslabs.co.in", 21 | "url" : "https://captnemo.in" 22 | }, 23 | "preferGlobal": true, 24 | "repository": "sdslabs/phoenix", 25 | "license" : "MIT", 26 | "version": "1.4.6", 27 | "dependencies": { 28 | "clone": "^1.0.2", 29 | "js-yaml": "^3.2.7", 30 | "merge": "^1.2.0", 31 | "path-is-absolute": "^1.0.0", 32 | "phantomjs": "1.9.11", 33 | "redis": "0.12.1", 34 | "shelljs": "^0.4.0", 35 | "tmp": "0.0.25", 36 | "valid-url": "^1.0.9" 37 | }, 38 | "devDependencies": { 39 | "mocha": "^2.2.1" 40 | }, 41 | "scripts": { 42 | "test": "./node_modules/.bin/mocha --reporter spec" 43 | }, 44 | "bin": { 45 | "phoenix": "./app.js" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /phantom.js: -------------------------------------------------------------------------------- 1 | var page = require('webpage').create(); 2 | var system = require('system'); 3 | var fs = require('fs'); 4 | var args = system.args; 5 | var url = require('./phantom/url.js'); 6 | var timeout; 7 | 8 | console.error = function (){ 9 | require("system").stderr.write(Array.prototype.join.call(arguments, ' ') + '\n'); 10 | }; 11 | 12 | if(args.length < 2) { 13 | phantom.exit(1); 14 | } 15 | 16 | var configFile = args[1]; 17 | var settings = JSON.parse(fs.read(configFile)); 18 | 19 | console.log("URL:\t\t" + settings.url); 20 | 21 | page.settings = settings.phantom; 22 | 23 | if(args[2]){ 24 | page.onInitialized = function (){ 25 | var pageUrl = url.format(url.parse(page.url)); 26 | if(pageUrl === settings.url){ 27 | console.log("INJECT:\t\t"+args[2]); 28 | var code = require(args[2]).run(); 29 | page.evaluate(function (code) { 30 | console.error(code()); 31 | }, code); 32 | } 33 | }; 34 | }; 35 | 36 | page.onConsoleMessage = function (msg){ 37 | console.error(msg); 38 | }; 39 | 40 | page.onAlert = function (msg){ 41 | console.error("ALERT:\t\t"+msg) 42 | } 43 | 44 | page.onPrompt = function(msg, defaultVal) { 45 | console.error("PROMPT:\t\t"+msg); 46 | return defaultVal || null; 47 | }; 48 | 49 | page.onConfirm = function(msg) { 50 | console.error("CONFIRM:\t\t"+msg); 51 | return false;//We always click the cancel button 52 | }; 53 | 54 | function exitNormal(){ 55 | console.log("EXIT:\t\tNormal"); 56 | phantom.exit(0); 57 | } 58 | 59 | // Lets quit by default in 0.5s 60 | var timeout = setTimeout(exitNormal, 500); 61 | 62 | // This method extends the timeout by 200ms 63 | var c4timer = function(time){ 64 | console.log("Extending timeout by " + time); 65 | if(timeout) 66 | clearTimeout(timeout); 67 | timeout = setTimeout(exitNormal, time); 68 | } 69 | 70 | page.onResourceRequested = function(requestData) { 71 | console.error("REQUEST:\t\t"+requestData.url); 72 | if(settings.url!==requestData.url) 73 | c4timer(4000); 74 | }; 75 | page.onResourceTimeout = function(requestData) { 76 | console.error("TIMEOUT:\t\t"+requestData.url); 77 | if(settings.url!==requestData.url) 78 | c4timer(500); 79 | }; 80 | 81 | page.onResourceError = function(resourceError) { 82 | console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')'); 83 | console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString); 84 | }; 85 | 86 | page.onResourceReceived = function(res) { 87 | console.error("RESPONSE:\t\t"+res.url); 88 | // This needs to be the last chunk 89 | if(settings.url!==res.url && res.stage === 'end') 90 | c4timer(1000); 91 | }; 92 | 93 | page.onError = function(msg, trace) { 94 | var msgStack = ['ERROR: ' + msg]; 95 | if (trace && trace.length) { 96 | msgStack.push('TRACE:'); 97 | trace.forEach(function(t) { 98 | msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : '')); 99 | }); 100 | } 101 | console.error(msgStack.join('\n')); 102 | }; 103 | 104 | page.open(settings.url, settings, function (status){ 105 | console.log("STATUS:\t\t"+status); 106 | // This is the hard kill switch 107 | // We force quit after 5 seconds 108 | setTimeout(function(){ 109 | console.log("EXIT:\t\tForced"); 110 | phantom.exit(0); 111 | }, 10000); 112 | }); 113 | -------------------------------------------------------------------------------- /phantom/querystring.js: -------------------------------------------------------------------------------- 1 | const QueryString = exports; 2 | 3 | function charCode(c) { 4 | return c.charCodeAt(0); 5 | } 6 | 7 | 8 | // a safe fast alternative to decodeURIComponent 9 | QueryString.unescapeBuffer = function(s, decodeSpaces) { 10 | var out = new Buffer(s.length); 11 | var state = 'CHAR'; // states: CHAR, HEX0, HEX1 12 | var n, m, hexchar; 13 | 14 | for (var inIndex = 0, outIndex = 0; inIndex <= s.length; inIndex++) { 15 | var c = s.charCodeAt(inIndex); 16 | switch (state) { 17 | case 'CHAR': 18 | switch (c) { 19 | case charCode('%'): 20 | n = 0; 21 | m = 0; 22 | state = 'HEX0'; 23 | break; 24 | case charCode('+'): 25 | if (decodeSpaces) c = charCode(' '); 26 | // pass thru 27 | default: 28 | out[outIndex++] = c; 29 | break; 30 | } 31 | break; 32 | 33 | case 'HEX0': 34 | state = 'HEX1'; 35 | hexchar = c; 36 | if (charCode('0') <= c && c <= charCode('9')) { 37 | n = c - charCode('0'); 38 | } else if (charCode('a') <= c && c <= charCode('f')) { 39 | n = c - charCode('a') + 10; 40 | } else if (charCode('A') <= c && c <= charCode('F')) { 41 | n = c - charCode('A') + 10; 42 | } else { 43 | out[outIndex++] = charCode('%'); 44 | out[outIndex++] = c; 45 | state = 'CHAR'; 46 | break; 47 | } 48 | break; 49 | 50 | case 'HEX1': 51 | state = 'CHAR'; 52 | if (charCode('0') <= c && c <= charCode('9')) { 53 | m = c - charCode('0'); 54 | } else if (charCode('a') <= c && c <= charCode('f')) { 55 | m = c - charCode('a') + 10; 56 | } else if (charCode('A') <= c && c <= charCode('F')) { 57 | m = c - charCode('A') + 10; 58 | } else { 59 | out[outIndex++] = charCode('%'); 60 | out[outIndex++] = hexchar; 61 | out[outIndex++] = c; 62 | break; 63 | } 64 | out[outIndex++] = 16 * n + m; 65 | break; 66 | } 67 | } 68 | 69 | // TODO support returning arbitrary buffers. 70 | 71 | return out.slice(0, outIndex - 1); 72 | }; 73 | 74 | 75 | QueryString.unescape = function(s, decodeSpaces) { 76 | try { 77 | return decodeURIComponent(s); 78 | } catch (e) { 79 | return QueryString.unescapeBuffer(s, decodeSpaces).toString(); 80 | } 81 | }; 82 | 83 | 84 | var hexTable = new Array(256); 85 | for (var i = 0; i < 256; ++i) 86 | hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); 87 | QueryString.escape = function(str) { 88 | // replaces encodeURIComponent 89 | // http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4 90 | str = '' + str; 91 | var len = str.length; 92 | var out = ''; 93 | var i, c; 94 | 95 | if (len === 0) 96 | return str; 97 | 98 | for (i = 0; i < len; ++i) { 99 | c = str.charCodeAt(i); 100 | 101 | // These characters do not need escaping (in order): 102 | // ! - . _ ~ 103 | // ' ( ) * 104 | // digits 105 | // alpha (uppercase) 106 | // alpha (lowercase) 107 | if (c === 0x21 || c === 0x2D || c === 0x2E || c === 0x5F || c === 0x7E || 108 | (c >= 0x27 && c <= 0x2A) || 109 | (c >= 0x30 && c <= 0x39) || 110 | (c >= 0x41 && c <= 0x5A) || 111 | (c >= 0x61 && c <= 0x7A)) { 112 | out += str[i]; 113 | continue; 114 | } 115 | 116 | // Other ASCII characters 117 | if (c < 0x80) { 118 | out += hexTable[c]; 119 | continue; 120 | } 121 | 122 | // Multi-byte characters ... 123 | if (c < 0x800) { 124 | out += hexTable[0xC0 | (c >> 6)] + hexTable[0x80 | (c & 0x3F)]; 125 | continue; 126 | } 127 | if (c < 0xD800 || c >= 0xE000) { 128 | out += hexTable[0xE0 | (c >> 12)] + 129 | hexTable[0x80 | ((c >> 6) & 0x3F)] + 130 | hexTable[0x80 | (c & 0x3F)]; 131 | continue; 132 | } 133 | // Surrogate pair 134 | ++i; 135 | c = 0x10000 + (((c & 0x3FF) << 10) | (str.charCodeAt(i) & 0x3FF)); 136 | out += hexTable[0xF0 | (c >> 18)] + 137 | hexTable[0x80 | ((c >> 12) & 0x3F)] + 138 | hexTable[0x80 | ((c >> 6) & 0x3F)] + 139 | hexTable[0x80 | (c & 0x3F)]; 140 | } 141 | return out; 142 | }; 143 | 144 | var stringifyPrimitive = function(v) { 145 | if (typeof v === 'string') 146 | return v; 147 | if (typeof v === 'number' && isFinite(v)) 148 | return '' + v; 149 | if (typeof v === 'boolean') 150 | return v ? 'true' : 'false'; 151 | return ''; 152 | }; 153 | 154 | 155 | QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) { 156 | sep = sep || '&'; 157 | eq = eq || '='; 158 | 159 | var encode = QueryString.escape; 160 | if (options && typeof options.encodeURIComponent === 'function') { 161 | encode = options.encodeURIComponent; 162 | } 163 | 164 | if (obj !== null && typeof obj === 'object') { 165 | var keys = Object.keys(obj); 166 | var len = keys.length; 167 | var flast = len - 1; 168 | var fields = ''; 169 | for (var i = 0; i < len; ++i) { 170 | var k = keys[i]; 171 | var v = obj[k]; 172 | var ks = encode(stringifyPrimitive(k)) + eq; 173 | 174 | if (Array.isArray(v)) { 175 | var vlen = v.length; 176 | var vlast = vlen - 1; 177 | for (var j = 0; j < vlen; ++j) { 178 | fields += ks + encode(stringifyPrimitive(v[j])); 179 | if (j < vlast) 180 | fields += sep; 181 | } 182 | if (vlen && i < flast) 183 | fields += sep; 184 | } else { 185 | fields += ks + encode(stringifyPrimitive(v)); 186 | if (i < flast) 187 | fields += sep; 188 | } 189 | } 190 | return fields; 191 | } 192 | return ''; 193 | }; 194 | 195 | // Parse a key=val string. 196 | QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { 197 | sep = sep || '&'; 198 | eq = eq || '='; 199 | var obj = {}; 200 | 201 | if (typeof qs !== 'string' || qs.length === 0) { 202 | return obj; 203 | } 204 | 205 | var regexp = /\+/g; 206 | qs = qs.split(sep); 207 | 208 | var maxKeys = 1000; 209 | if (options && typeof options.maxKeys === 'number') { 210 | maxKeys = options.maxKeys; 211 | } 212 | 213 | var len = qs.length; 214 | // maxKeys <= 0 means that we should not limit keys count 215 | if (maxKeys > 0 && len > maxKeys) { 216 | len = maxKeys; 217 | } 218 | 219 | var decode = QueryString.unescape; 220 | if (options && typeof options.decodeURIComponent === 'function') { 221 | decode = options.decodeURIComponent; 222 | } 223 | 224 | var keys = []; 225 | for (var i = 0; i < len; ++i) { 226 | var x = qs[i].replace(regexp, '%20'), 227 | idx = x.indexOf(eq), 228 | k, v; 229 | 230 | if (idx >= 0) { 231 | k = decodeStr(x.substring(0, idx), decode); 232 | v = decodeStr(x.substring(idx + 1), decode); 233 | } else { 234 | k = decodeStr(x, decode); 235 | v = ''; 236 | } 237 | 238 | if (keys.indexOf(k) === -1) { 239 | obj[k] = v; 240 | keys.push(k); 241 | } else if (Array.isArray(obj[k])) { 242 | obj[k].push(v); 243 | } else { 244 | obj[k] = [obj[k], v]; 245 | } 246 | } 247 | 248 | return obj; 249 | }; 250 | 251 | 252 | function decodeStr(s, decoder) { 253 | try { 254 | return decoder(s); 255 | } catch (e) { 256 | return QueryString.unescape(s, true); 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /phantom/url.js: -------------------------------------------------------------------------------- 1 | exports.parse = urlParse; 2 | exports.resolve = urlResolve; 3 | exports.resolveObject = urlResolveObject; 4 | exports.format = urlFormat; 5 | 6 | exports.Url = Url; 7 | 8 | function Url() { 9 | this.protocol = null; 10 | this.slashes = null; 11 | this.auth = null; 12 | this.host = null; 13 | this.port = null; 14 | this.hostname = null; 15 | this.hash = null; 16 | this.search = null; 17 | this.query = null; 18 | this.pathname = null; 19 | this.path = null; 20 | this.href = null; 21 | } 22 | 23 | // Reference: RFC 3986, RFC 1808, RFC 2396 24 | 25 | // define these here so at least they only have to be 26 | // compiled once on the first module load. 27 | const protocolPattern = /^([a-z0-9.+-]+:)/i; 28 | const portPattern = /:[0-9]*$/; 29 | 30 | // Special case for a simple path URL 31 | const simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/; 32 | 33 | // RFC 2396: characters reserved for delimiting URLs. 34 | // We actually just auto-escape these. 35 | const delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t']; 36 | 37 | // RFC 2396: characters not allowed for various reasons. 38 | const unwise = ['{', '}', '|', '\\', '^', '`'].concat(delims); 39 | 40 | // Allowed by RFCs, but cause of XSS attacks. Always escape these. 41 | const autoEscape = ['\''].concat(unwise); 42 | 43 | // Characters that are never ever allowed in a hostname. 44 | // Note that any invalid chars are also handled, but these 45 | // are the ones that are *expected* to be seen, so we fast-path them. 46 | const nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape); 47 | const hostEndingChars = ['/', '?', '#']; 48 | const hostnameMaxLen = 255; 49 | const hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/; 50 | const hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/; 51 | // protocols that can allow "unsafe" and "unwise" chars. 52 | const unsafeProtocol = { 53 | 'javascript': true, 54 | 'javascript:': true 55 | }; 56 | // protocols that never have a hostname. 57 | const hostlessProtocol = { 58 | 'javascript': true, 59 | 'javascript:': true 60 | }; 61 | // protocols that always contain a // bit. 62 | const slashedProtocol = { 63 | 'http': true, 64 | 'https': true, 65 | 'ftp': true, 66 | 'gopher': true, 67 | 'file': true, 68 | 'http:': true, 69 | 'https:': true, 70 | 'ftp:': true, 71 | 'gopher:': true, 72 | 'file:': true 73 | }; 74 | const querystring = require('./querystring.js'); 75 | 76 | function urlParse(url, parseQueryString, slashesDenoteHost) { 77 | if (url instanceof Url) return url; 78 | 79 | var u = new Url; 80 | u.parse(url, parseQueryString, slashesDenoteHost); 81 | return u; 82 | } 83 | 84 | Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { 85 | if (typeof url !== 'string') { 86 | throw new TypeError("Parameter 'url' must be a string, not " + typeof url); 87 | } 88 | 89 | // Copy chrome, IE, opera backslash-handling behavior. 90 | // Back slashes before the query string get converted to forward slashes 91 | // See: https://code.google.com/p/chromium/issues/detail?id=25916 92 | var queryIndex = url.indexOf('?'), 93 | splitter = 94 | (queryIndex !== -1 && queryIndex < url.indexOf('#')) ? '?' : '#', 95 | uSplit = url.split(splitter), 96 | slashRegex = /\\/g; 97 | uSplit[0] = uSplit[0].replace(slashRegex, '/'); 98 | url = uSplit.join(splitter); 99 | 100 | var rest = url; 101 | 102 | // trim before proceeding. 103 | // This is to support parse stuff like " http://foo.com \n" 104 | rest = rest.trim(); 105 | 106 | if (!slashesDenoteHost && url.split('#').length === 1) { 107 | // Try fast path regexp 108 | var simplePath = simplePathPattern.exec(rest); 109 | if (simplePath) { 110 | this.path = rest; 111 | this.href = rest; 112 | this.pathname = simplePath[1]; 113 | if (simplePath[2]) { 114 | this.search = simplePath[2]; 115 | if (parseQueryString) { 116 | this.query = querystring.parse(this.search.substr(1)); 117 | } else { 118 | this.query = this.search.substr(1); 119 | } 120 | } else if (parseQueryString) { 121 | this.search = ''; 122 | this.query = {}; 123 | } 124 | return this; 125 | } 126 | } 127 | 128 | var proto = protocolPattern.exec(rest); 129 | if (proto) { 130 | proto = proto[0]; 131 | var lowerProto = proto.toLowerCase(); 132 | this.protocol = lowerProto; 133 | rest = rest.substr(proto.length); 134 | } 135 | 136 | // figure out if it's got a host 137 | // user@server is *always* interpreted as a hostname, and url 138 | // resolution will treat //foo/bar as host=foo,path=bar because that's 139 | // how the browser resolves relative URLs. 140 | if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) { 141 | var slashes = rest.substr(0, 2) === '//'; 142 | if (slashes && !(proto && hostlessProtocol[proto])) { 143 | rest = rest.substr(2); 144 | this.slashes = true; 145 | } 146 | } 147 | 148 | if (!hostlessProtocol[proto] && 149 | (slashes || (proto && !slashedProtocol[proto]))) { 150 | 151 | // there's a hostname. 152 | // the first instance of /, ?, ;, or # ends the host. 153 | // 154 | // If there is an @ in the hostname, then non-host chars *are* allowed 155 | // to the left of the last @ sign, unless some host-ending character 156 | // comes *before* the @-sign. 157 | // URLs are obnoxious. 158 | // 159 | // ex: 160 | // http://a@b@c/ => user:a@b host:c 161 | // http://a@b?@c => user:a host:c path:/?@c 162 | 163 | // v0.12 TODO(isaacs): This is not quite how Chrome does things. 164 | // Review our test case against browsers more comprehensively. 165 | 166 | // find the first instance of any hostEndingChars 167 | var hostEnd = -1; 168 | for (var i = 0; i < hostEndingChars.length; i++) { 169 | var hec = rest.indexOf(hostEndingChars[i]); 170 | if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) 171 | hostEnd = hec; 172 | } 173 | 174 | // at this point, either we have an explicit point where the 175 | // auth portion cannot go past, or the last @ char is the decider. 176 | var auth, atSign; 177 | if (hostEnd === -1) { 178 | // atSign can be anywhere. 179 | atSign = rest.lastIndexOf('@'); 180 | } else { 181 | // atSign must be in auth portion. 182 | // http://a@b/c@d => host:b auth:a path:/c@d 183 | atSign = rest.lastIndexOf('@', hostEnd); 184 | } 185 | 186 | // Now we have a portion which is definitely the auth. 187 | // Pull that off. 188 | if (atSign !== -1) { 189 | auth = rest.slice(0, atSign); 190 | rest = rest.slice(atSign + 1); 191 | this.auth = decodeURIComponent(auth); 192 | } 193 | 194 | // the host is the remaining to the left of the first non-host char 195 | hostEnd = -1; 196 | for (var i = 0; i < nonHostChars.length; i++) { 197 | var hec = rest.indexOf(nonHostChars[i]); 198 | if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) 199 | hostEnd = hec; 200 | } 201 | // if we still have not hit it, then the entire thing is a host. 202 | if (hostEnd === -1) 203 | hostEnd = rest.length; 204 | 205 | this.host = rest.slice(0, hostEnd); 206 | rest = rest.slice(hostEnd); 207 | 208 | // pull out port. 209 | this.parseHost(); 210 | 211 | // we've indicated that there is a hostname, 212 | // so even if it's empty, it has to be present. 213 | this.hostname = this.hostname || ''; 214 | 215 | // if hostname begins with [ and ends with ] 216 | // assume that it's an IPv6 address. 217 | var ipv6Hostname = this.hostname[0] === '[' && 218 | this.hostname[this.hostname.length - 1] === ']'; 219 | 220 | // validate a little. 221 | if (!ipv6Hostname) { 222 | var hostparts = this.hostname.split(/\./); 223 | for (var i = 0, l = hostparts.length; i < l; i++) { 224 | var part = hostparts[i]; 225 | if (!part) continue; 226 | if (!part.match(hostnamePartPattern)) { 227 | var newpart = ''; 228 | for (var j = 0, k = part.length; j < k; j++) { 229 | if (part.charCodeAt(j) > 127) { 230 | // we replace non-ASCII char with a temporary placeholder 231 | // we need this to make sure size of hostname is not 232 | // broken by replacing non-ASCII by nothing 233 | newpart += 'x'; 234 | } else { 235 | newpart += part[j]; 236 | } 237 | } 238 | // we test again with ASCII char only 239 | if (!newpart.match(hostnamePartPattern)) { 240 | var validParts = hostparts.slice(0, i); 241 | var notHost = hostparts.slice(i + 1); 242 | var bit = part.match(hostnamePartStart); 243 | if (bit) { 244 | validParts.push(bit[1]); 245 | notHost.unshift(bit[2]); 246 | } 247 | if (notHost.length) { 248 | rest = '/' + notHost.join('.') + rest; 249 | } 250 | this.hostname = validParts.join('.'); 251 | break; 252 | } 253 | } 254 | } 255 | } 256 | 257 | if (this.hostname.length > hostnameMaxLen) { 258 | this.hostname = ''; 259 | } else { 260 | // hostnames are always lower case. 261 | this.hostname = this.hostname.toLowerCase(); 262 | } 263 | 264 | var p = this.port ? ':' + this.port : ''; 265 | var h = this.hostname || ''; 266 | this.host = h + p; 267 | 268 | // strip [ and ] from the hostname 269 | // the host field still retains them, though 270 | if (ipv6Hostname) { 271 | this.hostname = this.hostname.substr(1, this.hostname.length - 2); 272 | if (rest[0] !== '/') { 273 | rest = '/' + rest; 274 | } 275 | } 276 | } 277 | 278 | // now rest is set to the post-host stuff. 279 | // chop off any delim chars. 280 | if (!unsafeProtocol[lowerProto]) { 281 | 282 | // First, make 100% sure that any "autoEscape" chars get 283 | // escaped, even if encodeURIComponent doesn't think they 284 | // need to be. 285 | for (var i = 0, l = autoEscape.length; i < l; i++) { 286 | var ae = autoEscape[i]; 287 | if (rest.indexOf(ae) === -1) 288 | continue; 289 | var esc = encodeURIComponent(ae); 290 | if (esc === ae) { 291 | esc = escape(ae); 292 | } 293 | rest = rest.split(ae).join(esc); 294 | } 295 | } 296 | 297 | 298 | // chop off from the tail first. 299 | var hash = rest.indexOf('#'); 300 | if (hash !== -1) { 301 | // got a fragment string. 302 | this.hash = rest.substr(hash); 303 | rest = rest.slice(0, hash); 304 | } 305 | var qm = rest.indexOf('?'); 306 | if (qm !== -1) { 307 | this.search = rest.substr(qm); 308 | this.query = rest.substr(qm + 1); 309 | if (parseQueryString) { 310 | this.query = querystring.parse(this.query); 311 | } 312 | rest = rest.slice(0, qm); 313 | } else if (parseQueryString) { 314 | // no query string, but parseQueryString still requested 315 | this.search = ''; 316 | this.query = {}; 317 | } 318 | if (rest) this.pathname = rest; 319 | if (slashedProtocol[lowerProto] && 320 | this.hostname && !this.pathname) { 321 | this.pathname = '/'; 322 | } 323 | 324 | //to support http.request 325 | if (this.pathname || this.search) { 326 | var p = this.pathname || ''; 327 | var s = this.search || ''; 328 | this.path = p + s; 329 | } 330 | 331 | // finally, reconstruct the href based on what has been validated. 332 | this.href = this.format(); 333 | return this; 334 | }; 335 | 336 | // format a parsed object into a url string 337 | function urlFormat(obj) { 338 | // ensure it's an object, and not a string url. 339 | // If it's an obj, this is a no-op. 340 | // this way, you can call url_format() on strings 341 | // to clean up potentially wonky urls. 342 | if (typeof obj === 'string') obj = urlParse(obj); 343 | 344 | else if (typeof obj !== 'object' || obj === null) 345 | throw new TypeError("Parameter 'urlObj' must be an object, not " + 346 | obj === null ? 'null' : typeof obj); 347 | 348 | else if (!(obj instanceof Url)) return Url.prototype.format.call(obj); 349 | 350 | return obj.format(); 351 | } 352 | 353 | Url.prototype.format = function() { 354 | var auth = this.auth || ''; 355 | if (auth) { 356 | auth = encodeURIComponent(auth); 357 | auth = auth.replace(/%3A/i, ':'); 358 | auth += '@'; 359 | } 360 | 361 | var protocol = this.protocol || '', 362 | pathname = this.pathname || '', 363 | hash = this.hash || '', 364 | host = false, 365 | query = ''; 366 | 367 | if (this.host) { 368 | host = auth + this.host; 369 | } else if (this.hostname) { 370 | host = auth + (this.hostname.indexOf(':') === -1 ? 371 | this.hostname : 372 | '[' + this.hostname + ']'); 373 | if (this.port) { 374 | host += ':' + this.port; 375 | } 376 | } 377 | 378 | if (this.query !== null && 379 | typeof this.query === 'object' && 380 | Object.keys(this.query).length) { 381 | query = querystring.stringify(this.query); 382 | } 383 | 384 | var search = this.search || (query && ('?' + query)) || ''; 385 | 386 | if (protocol && protocol.substr(-1) !== ':') protocol += ':'; 387 | 388 | // only the slashedProtocols get the //. Not mailto:, xmpp:, etc. 389 | // unless they had them to begin with. 390 | if (this.slashes || 391 | (!protocol || slashedProtocol[protocol]) && host !== false) { 392 | host = '//' + (host || ''); 393 | if (pathname && pathname.charAt(0) !== '/') pathname = '/' + pathname; 394 | } else if (!host) { 395 | host = ''; 396 | } 397 | 398 | if (hash && hash.charAt(0) !== '#') hash = '#' + hash; 399 | if (search && search.charAt(0) !== '?') search = '?' + search; 400 | 401 | pathname = pathname.replace(/[?#]/g, function(match) { 402 | return encodeURIComponent(match); 403 | }); 404 | search = search.replace('#', '%23'); 405 | 406 | return protocol + host + pathname + search + hash; 407 | }; 408 | 409 | function urlResolve(source, relative) { 410 | return urlParse(source, false, true).resolve(relative); 411 | } 412 | 413 | Url.prototype.resolve = function(relative) { 414 | return this.resolveObject(urlParse(relative, false, true)).format(); 415 | }; 416 | 417 | function urlResolveObject(source, relative) { 418 | if (!source) return relative; 419 | return urlParse(source, false, true).resolveObject(relative); 420 | } 421 | 422 | Url.prototype.resolveObject = function(relative) { 423 | if (typeof relative === 'string') { 424 | var rel = new Url(); 425 | rel.parse(relative, false, true); 426 | relative = rel; 427 | } 428 | 429 | var result = new Url(); 430 | var tkeys = Object.keys(this); 431 | for (var tk = 0; tk < tkeys.length; tk++) { 432 | var tkey = tkeys[tk]; 433 | result[tkey] = this[tkey]; 434 | } 435 | 436 | // hash is always overridden, no matter what. 437 | // even href="" will remove it. 438 | result.hash = relative.hash; 439 | 440 | // if the relative url is empty, then there's nothing left to do here. 441 | if (relative.href === '') { 442 | result.href = result.format(); 443 | return result; 444 | } 445 | 446 | // hrefs like //foo/bar always cut to the protocol. 447 | if (relative.slashes && !relative.protocol) { 448 | // take everything except the protocol from relative 449 | var rkeys = Object.keys(relative); 450 | for (var rk = 0; rk < rkeys.length; rk++) { 451 | var rkey = rkeys[rk]; 452 | if (rkey !== 'protocol') 453 | result[rkey] = relative[rkey]; 454 | } 455 | 456 | //urlParse appends trailing / to urls like http://www.example.com 457 | if (slashedProtocol[result.protocol] && 458 | result.hostname && !result.pathname) { 459 | result.path = result.pathname = '/'; 460 | } 461 | 462 | result.href = result.format(); 463 | return result; 464 | } 465 | 466 | if (relative.protocol && relative.protocol !== result.protocol) { 467 | // if it's a known url protocol, then changing 468 | // the protocol does weird things 469 | // first, if it's not file:, then we MUST have a host, 470 | // and if there was a path 471 | // to begin with, then we MUST have a path. 472 | // if it is file:, then the host is dropped, 473 | // because that's known to be hostless. 474 | // anything else is assumed to be absolute. 475 | if (!slashedProtocol[relative.protocol]) { 476 | var keys = Object.keys(relative); 477 | for (var v = 0; v < keys.length; v++) { 478 | var k = keys[v]; 479 | result[k] = relative[k]; 480 | } 481 | result.href = result.format(); 482 | return result; 483 | } 484 | 485 | result.protocol = relative.protocol; 486 | if (!relative.host && 487 | !/^file:?$/.test(relative.protocol) && 488 | !hostlessProtocol[relative.protocol]) { 489 | var relPath = (relative.pathname || '').split('/'); 490 | while (relPath.length && !(relative.host = relPath.shift())); 491 | if (!relative.host) relative.host = ''; 492 | if (!relative.hostname) relative.hostname = ''; 493 | if (relPath[0] !== '') relPath.unshift(''); 494 | if (relPath.length < 2) relPath.unshift(''); 495 | result.pathname = relPath.join('/'); 496 | } else { 497 | result.pathname = relative.pathname; 498 | } 499 | result.search = relative.search; 500 | result.query = relative.query; 501 | result.host = relative.host || ''; 502 | result.auth = relative.auth; 503 | result.hostname = relative.hostname || relative.host; 504 | result.port = relative.port; 505 | // to support http.request 506 | if (result.pathname || result.search) { 507 | var p = result.pathname || ''; 508 | var s = result.search || ''; 509 | result.path = p + s; 510 | } 511 | result.slashes = result.slashes || relative.slashes; 512 | result.href = result.format(); 513 | return result; 514 | } 515 | 516 | var isSourceAbs = (result.pathname && result.pathname.charAt(0) === '/'), 517 | isRelAbs = ( 518 | relative.host || 519 | relative.pathname && relative.pathname.charAt(0) === '/' 520 | ), 521 | mustEndAbs = (isRelAbs || isSourceAbs || 522 | (result.host && relative.pathname)), 523 | removeAllDots = mustEndAbs, 524 | srcPath = result.pathname && result.pathname.split('/') || [], 525 | relPath = relative.pathname && relative.pathname.split('/') || [], 526 | psychotic = result.protocol && !slashedProtocol[result.protocol]; 527 | 528 | // if the url is a non-slashed url, then relative 529 | // links like ../.. should be able 530 | // to crawl up to the hostname, as well. This is strange. 531 | // result.protocol has already been set by now. 532 | // Later on, put the first path part into the host field. 533 | if (psychotic) { 534 | result.hostname = ''; 535 | result.port = null; 536 | if (result.host) { 537 | if (srcPath[0] === '') srcPath[0] = result.host; 538 | else srcPath.unshift(result.host); 539 | } 540 | result.host = ''; 541 | if (relative.protocol) { 542 | relative.hostname = null; 543 | relative.port = null; 544 | if (relative.host) { 545 | if (relPath[0] === '') relPath[0] = relative.host; 546 | else relPath.unshift(relative.host); 547 | } 548 | relative.host = null; 549 | } 550 | mustEndAbs = mustEndAbs && (relPath[0] === '' || srcPath[0] === ''); 551 | } 552 | 553 | if (isRelAbs) { 554 | // it's absolute. 555 | result.host = (relative.host || relative.host === '') ? 556 | relative.host : result.host; 557 | result.hostname = (relative.hostname || relative.hostname === '') ? 558 | relative.hostname : result.hostname; 559 | result.search = relative.search; 560 | result.query = relative.query; 561 | srcPath = relPath; 562 | // fall through to the dot-handling below. 563 | } else if (relPath.length) { 564 | // it's relative 565 | // throw away the existing file, and take the new path instead. 566 | if (!srcPath) srcPath = []; 567 | srcPath.pop(); 568 | srcPath = srcPath.concat(relPath); 569 | result.search = relative.search; 570 | result.query = relative.query; 571 | } else if (relative.search !== null && relative.search !== undefined) { 572 | // just pull out the search. 573 | // like href='?foo'. 574 | // Put this after the other two cases because it simplifies the booleans 575 | if (psychotic) { 576 | result.hostname = result.host = srcPath.shift(); 577 | //occationaly the auth can get stuck only in host 578 | //this especialy happens in cases like 579 | //url.resolveObject('mailto:local1@domain1', 'local2@domain2') 580 | var authInHost = result.host && result.host.indexOf('@') > 0 ? 581 | result.host.split('@') : false; 582 | if (authInHost) { 583 | result.auth = authInHost.shift(); 584 | result.host = result.hostname = authInHost.shift(); 585 | } 586 | } 587 | result.search = relative.search; 588 | result.query = relative.query; 589 | //to support http.request 590 | if (result.pathname !== null || result.search !== null) { 591 | result.path = (result.pathname ? result.pathname : '') + 592 | (result.search ? result.search : ''); 593 | } 594 | result.href = result.format(); 595 | return result; 596 | } 597 | 598 | if (!srcPath.length) { 599 | // no path at all. easy. 600 | // we've already handled the other stuff above. 601 | result.pathname = null; 602 | //to support http.request 603 | if (result.search) { 604 | result.path = '/' + result.search; 605 | } else { 606 | result.path = null; 607 | } 608 | result.href = result.format(); 609 | return result; 610 | } 611 | 612 | // if a url ENDs in . or .., then it must get a trailing slash. 613 | // however, if it ends in anything else non-slashy, 614 | // then it must NOT get a trailing slash. 615 | var last = srcPath.slice(-1)[0]; 616 | var hasTrailingSlash = ( 617 | (result.host || relative.host || srcPath.length > 1) && 618 | (last === '.' || last === '..') || last === ''); 619 | 620 | // strip single dots, resolve double dots to parent dir 621 | // if the path tries to go above the root, `up` ends up > 0 622 | var up = 0; 623 | for (var i = srcPath.length; i >= 0; i--) { 624 | last = srcPath[i]; 625 | if (last === '.') { 626 | spliceOne(srcPath, i); 627 | } else if (last === '..') { 628 | spliceOne(srcPath, i); 629 | up++; 630 | } else if (up) { 631 | spliceOne(srcPath, i); 632 | up--; 633 | } 634 | } 635 | 636 | // if the path is allowed to go above the root, restore leading ..s 637 | if (!mustEndAbs && !removeAllDots) { 638 | for (; up--; up) { 639 | srcPath.unshift('..'); 640 | } 641 | } 642 | 643 | if (mustEndAbs && srcPath[0] !== '' && 644 | (!srcPath[0] || srcPath[0].charAt(0) !== '/')) { 645 | srcPath.unshift(''); 646 | } 647 | 648 | if (hasTrailingSlash && (srcPath.join('/').substr(-1) !== '/')) { 649 | srcPath.push(''); 650 | } 651 | 652 | var isAbsolute = srcPath[0] === '' || 653 | (srcPath[0] && srcPath[0].charAt(0) === '/'); 654 | 655 | // put the host back 656 | if (psychotic) { 657 | result.hostname = result.host = isAbsolute ? '' : 658 | srcPath.length ? srcPath.shift() : ''; 659 | //occationaly the auth can get stuck only in host 660 | //this especialy happens in cases like 661 | //url.resolveObject('mailto:local1@domain1', 'local2@domain2') 662 | var authInHost = result.host && result.host.indexOf('@') > 0 ? 663 | result.host.split('@') : false; 664 | if (authInHost) { 665 | result.auth = authInHost.shift(); 666 | result.host = result.hostname = authInHost.shift(); 667 | } 668 | } 669 | 670 | mustEndAbs = mustEndAbs || (result.host && srcPath.length); 671 | 672 | if (mustEndAbs && !isAbsolute) { 673 | srcPath.unshift(''); 674 | } 675 | 676 | if (!srcPath.length) { 677 | result.pathname = null; 678 | result.path = null; 679 | } else { 680 | result.pathname = srcPath.join('/'); 681 | } 682 | 683 | //to support request.http 684 | if (result.pathname !== null || result.search !== null) { 685 | result.path = (result.pathname ? result.pathname : '') + 686 | (result.search ? result.search : ''); 687 | } 688 | result.auth = relative.auth || result.auth; 689 | result.slashes = result.slashes || relative.slashes; 690 | result.href = result.format(); 691 | return result; 692 | }; 693 | 694 | Url.prototype.parseHost = function() { 695 | var host = this.host; 696 | var port = portPattern.exec(host); 697 | if (port) { 698 | port = port[0]; 699 | if (port !== ':') { 700 | this.port = port.substr(1); 701 | } 702 | host = host.substr(0, host.length - port.length); 703 | } 704 | if (host) this.hostname = host; 705 | }; 706 | 707 | // About 1.5x faster than the two-arg version of Array#splice(). 708 | function spliceOne(list, index) { 709 | for (var i = index, k = i + 1, n = list.length; k < n; i += 1, k += 1) 710 | list[i] = list[k]; 711 | list.pop(); 712 | } 713 | -------------------------------------------------------------------------------- /request.js: -------------------------------------------------------------------------------- 1 | var url = require('url'); 2 | var merge = require('merge'); 3 | var qs = require('querystring'); 4 | var validUrl = require('valid-url'); 5 | 6 | var auth = function(request){ 7 | if(!request.auth) 8 | return null; 9 | var auth = ""; 10 | if(request.auth.username) 11 | auth+=request.auth.username; 12 | auth+=":"; 13 | if(request.auth.password) 14 | auth+=request.auth.password; 15 | if(auth!==":") 16 | return auth; 17 | else 18 | return null; 19 | } 20 | 21 | var validBody = function(request){ 22 | var valid_body_methods = ['POST', 'PUT', 'PATCH']; 23 | var valid_no_body_methods = ['GET', 'TRACE', 'DELETE', 'HEAD', 'OPTIONS']; 24 | var method = request.method.toUpperCase(); 25 | var isBodySet = request.body || request.json || request.data; 26 | var canSendBody = valid_body_methods.indexOf(method) >=0; 27 | var cantSendBody = valid_no_body_methods.indexOf(method)>=0; 28 | return (isBodySet && canSendBody) || (!isBodySet && cantSendBody); 29 | } 30 | 31 | var defaultSettings = { 32 | "phantom": { 33 | "webSecurityEnabled": true, 34 | "javascriptEnabled": true, 35 | "userAgent": "Phoenix/1.3", 36 | "XSSAuditingEnabled": true, 37 | "localToRemoteUrlAccessEnabled": true, 38 | "loadImages": true, 39 | "resourceTimeout": 3000 40 | } 41 | }; 42 | 43 | // This parses the request object for phantom 44 | var req = function(request){ 45 | var settings = defaultSettings; 46 | var link = url.parse(request.url, true); 47 | delete link.search; 48 | link.auth = auth(request); 49 | link.query = merge(link.query, request.query); 50 | settings.url = url.format(link); 51 | settings.header = request.headers; 52 | if(!settings.header) 53 | settings.header = []; 54 | settings.operation = request.method; 55 | settings.encoding = request.encoding; 56 | // The priority order is BODY > JSON > POST 57 | if(validBody(request)){ 58 | if(request.data){ 59 | settings.body = qs.stringify(request.data); 60 | settings.header['Content-Type'] = 'application/x-www-form-urlencoded'; 61 | } 62 | if(request.json){ 63 | settings.body = JSON.stringify(request.json); 64 | settings.header['Content-Type'] = 'application/json'; 65 | } 66 | if(request.body){ 67 | settings.body = request.body; 68 | delete settings.header['Content-Type']; 69 | } 70 | } 71 | else{ 72 | throw new Error("Invalid Request Body"); 73 | } 74 | 75 | return settings; 76 | } 77 | 78 | req.updateUrl = function(settings, partial){ 79 | if(validUrl.isWebUri(partial)){ 80 | settings.url = partial; 81 | } 82 | else if(partial.length > 0){ 83 | var query = qs.parse(partial); 84 | var link = url.parse(settings.url, true); 85 | delete link.search; 86 | link.query = merge.recursive(link.query, query); 87 | settings.url = url.format(link); 88 | } 89 | return settings.url; 90 | } 91 | 92 | module.exports = req; 93 | -------------------------------------------------------------------------------- /runner.js: -------------------------------------------------------------------------------- 1 | var path = require('path') 2 | var childProcess = require('child_process'); 3 | var phantomjs = require('phantomjs'); 4 | var binPath = phantomjs.path; 5 | var tmp = require('tmp'); 6 | var fs = require('fs'); 7 | var sh = require("shelljs"); 8 | 9 | module.exports = function(config, phantomConfig, cb){ 10 | var cwd = sh.pwd(); 11 | tmp.dir({ 12 | dir: path.join(cwd, 'logs/'), 13 | prefix: 'phoenix-', 14 | postfix: '', 15 | keep: true 16 | },function (err, dir){ 17 | if(err){ 18 | cb(err); 19 | return; 20 | } 21 | 22 | var configPath = path.join(dir, './config.json'), 23 | id = path.basename(dir); 24 | 25 | // Write the request config to the tmp file 26 | fs.writeFile(configPath, JSON.stringify(phantomConfig), {mode: 0600}, function (err){ 27 | if(err){ 28 | cb(new Error("Couldn't write config")); 29 | return; 30 | } 31 | 32 | console.log('START: ' + id); 33 | 34 | var childArgs = [] 35 | 36 | if(config.phantom){ 37 | childArgs.push(path.join(cwd, config.phantom)); 38 | } 39 | else{ 40 | childArgs.push(path.join(__dirname, 'phantom.js')); 41 | } 42 | 43 | childArgs.push(configPath); 44 | 45 | if(config.js){ 46 | childArgs.push(path.join(cwd, config.js)); 47 | } 48 | 49 | childProcess.execFile(binPath, childArgs, function(err, stdout, stderr) { 50 | var logPath = path.join(dir, './browser.log'), 51 | errLogPath = path.join(dir, './page.log'); 52 | // done with the request 53 | fs.writeFile(logPath, stdout); 54 | fs.writeFile(errLogPath, stderr); 55 | console.log('STOP : ' + id); 56 | cb(null, id); 57 | }); 58 | }); 59 | }); 60 | } 61 | -------------------------------------------------------------------------------- /test/fixtures/test_config.yml: -------------------------------------------------------------------------------- 1 | # Name of the redis queue to listen to 2 | queue: phoenix 3 | request: 4 | url: "http://me.captnemo.in/" 5 | # This is the query string appended to the url 6 | query: 7 | phantom: 1 8 | # This is just basic auth 9 | auth: 10 | username: username 11 | password: password 12 | headers: 13 | Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" 14 | Accept-Encoding: "gzip, deflate, sdch" 15 | Accept-Language: "en-US,en;q=0.8,hi;q=0.6,sv;q=0.4" 16 | Cache-Control: "max-age=0" 17 | Connection: "keep-alive" 18 | DNT: 1 19 | User-Agent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2342.2 Safari/537.36" 20 | # This is custom js to run in the proper context on the webpage _before_ it loads 21 | js: custom.js 22 | -------------------------------------------------------------------------------- /test/request_test.js: -------------------------------------------------------------------------------- 1 | var request = require('../request'); 2 | var configParser = require('../config'); 3 | var assert = require('assert'); 4 | var config = configParser('test/fixtures/test_config.yml'); 5 | var url = require('url'); 6 | 7 | describe('Request', function(){ 8 | 9 | it('should parse request config properly', function(){ 10 | var settings = request(config.request); 11 | assert.equal(settings.header['DNT'], 1); 12 | }); 13 | 14 | it('should parse auth headers properly', function(){ 15 | var link = url.parse(request(config.request).url); 16 | assert.equal(link.auth, "username:password"); 17 | 18 | delete config.request.auth.password; 19 | link = url.parse(request(config.request).url); 20 | assert.equal(link.auth, "username:"); 21 | 22 | delete config.request.auth.username; 23 | link = url.parse(request(config.request).url); 24 | assert.equal(link.auth, null); 25 | 26 | config.request.auth.password = "password"; 27 | link = url.parse(request(config.request).url); 28 | assert.equal(link.auth, ":password"); 29 | 30 | delete config.request.auth; 31 | url.parse(request(config.request).url); 32 | }); 33 | 34 | describe('should raise error for invalid bodies', function(){ 35 | var req, fn; 36 | beforeEach(function(){ 37 | req = configParser('test/fixtures/test_config.yml').request; 38 | }); 39 | 40 | it('for GET + body', function(){ 41 | fn = function(){ 42 | req.body = "Hello"; 43 | }; 44 | }); 45 | 46 | it('for HEAD + json', function(){ 47 | fn = function(){ 48 | req.method = 'head'; 49 | req.json = {"phantom":"opera"}; 50 | } 51 | }); 52 | 53 | it('for OPTIONS + post', function(){ 54 | fn = function(){ 55 | req.method = 'OPTIONS'; 56 | req.body = {"phantom":"opera"}; 57 | } 58 | }); 59 | 60 | afterEach(function(){ 61 | assert.throws(function(){ 62 | fn(); 63 | request(req); 64 | }, /Invalid Request Body/); 65 | }); 66 | }); 67 | 68 | describe('should raise not error for valid bodies', function(){ 69 | var req; 70 | beforeEach(function(){ 71 | req = configParser('test/fixtures/test_config.yml').request; 72 | }); 73 | 74 | it('for GET + no body', function(){ 75 | req.method = 'GET'; 76 | }); 77 | 78 | it('for HEAD + no body', function(){ 79 | req.method = 'head'; 80 | }); 81 | 82 | it('for POST + direct body', function(){ 83 | req.method = 'POST'; 84 | req.body = "Hello"; 85 | }); 86 | 87 | it('for PUT + json body', function(){ 88 | req.method = 'PUT'; 89 | req.json = {a:1}; 90 | }); 91 | 92 | it('for PATCH + post body', function(){ 93 | req.method = 'PATCH'; 94 | req.body = {a:1}; 95 | }); 96 | 97 | afterEach(function(){ 98 | request(req); 99 | }); 100 | }); 101 | 102 | describe('should set correct Content-Type header for body requests', function(){ 103 | var req, expectedContentType; 104 | beforeEach(function(){ 105 | req = configParser('test/fixtures/test_config.yml').request; 106 | }); 107 | 108 | it('for POST + body', function(){ 109 | req.method = 'POST'; 110 | req.body = "Hello"; 111 | expectedContentType = undefined; 112 | }); 113 | 114 | it('for PUT + json', function(){ 115 | req.method = 'PUT'; 116 | req.json = {a:1}; 117 | expectedContentType = 'application/json'; 118 | }); 119 | 120 | it('for PATCH + post', function(){ 121 | req.method = 'PATCH'; 122 | req.data = {a:1}; 123 | expectedContentType = 'application/x-www-form-urlencoded'; 124 | }); 125 | 126 | afterEach(function(){ 127 | var settings = request(req); 128 | assert.equal(settings.header['Content-Type'], expectedContentType); 129 | }); 130 | }); 131 | }); 132 | --------------------------------------------------------------------------------