├── .gitignore ├── .npmignore ├── .travis.yml ├── Changes.md ├── Readme.md ├── benchmark ├── index.js ├── server │ ├── index.html │ └── index.js └── tests │ ├── osmosis.js │ └── x-ray.js ├── index.js ├── jsdoc.json ├── lib ├── Command.js ├── Data.js ├── Form.js ├── Queue.js ├── Request.js └── commands │ ├── click.js │ ├── config.js │ ├── contains.js │ ├── cookie.js │ ├── data.js │ ├── delay.js │ ├── do.js │ ├── done.js │ ├── fail.js │ ├── filter.js │ ├── find.js │ ├── follow.js │ ├── get.js │ ├── header.js │ ├── headers.js │ ├── if.js │ ├── learn.js │ ├── login.js │ ├── match.js │ ├── paginate.js │ ├── parse.js │ ├── pause.js │ ├── proxy.js │ ├── resume.js │ ├── rewrite.js │ ├── set.js │ ├── stop.js │ ├── submit.js │ ├── success.js │ ├── then.js │ ├── train.js │ ├── trigger.js │ ├── use.js │ └── using.js ├── package-lock.json ├── package.json └── test ├── click.js ├── config.js ├── do.js ├── filters.js ├── find.js ├── follow.js ├── get.js ├── internals.js ├── login.js ├── paginate.js ├── parse.js ├── process_response_option.js ├── proxy.js ├── resume.js ├── run.js ├── save.js ├── server └── index.js ├── set.js ├── stop.js ├── submit.js ├── then.js ├── user_agent_option.js └── z_close.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | docs/ 3 | npm-debug.log 4 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | # - "0.10" 4 | - 8 5 | -------------------------------------------------------------------------------- /Changes.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | #### TODO: 4 | 5 | * Add `.learn()` to generate a selector for a selected node 6 | * Add `.listen()` for easily creating DOM event listeners 7 | * Add `.trigger()` for easily triggering DOM events 8 | * Add `.on()` for binding callback to a local-only event 9 | * Add `.url()` to set the current URL 10 | * Add `.params()` to set the current URL parameters 11 | * Add `.save()` to save response data to a file 12 | * Add `.add()`, `.remove()` for node creation/deletion? 13 | * Add `.scroll()` to scrape infinite scroll pages 14 | * Add warnings for parser errors? 15 | * Switch to semantic versioning? 16 | 17 | ## Next major release: 18 | 19 | * Event/error handling 20 | * Error.code = 404, 'timeout', etc. 21 | * Error.module = 'http', 'dom', etc. 22 | * return true = retry, false = stop, anything else = continue 23 | * Event for discontinued context/data 24 | * Module system using osmosis.require and modules prefixed with `osmosis-` 25 | * Way to trigger DOM 26 | * Throw unhandled errors? 27 | * `.while()` to do things more than once as long as they call next() 28 | 29 | ## 0.1.5 30 | 31 | * Fixed bug where .get() without `params` caused empty query string ('?') 32 | * Preserve sort order for `.follow()` results within `.set()` 33 | 34 | ## 0.1.4 35 | 36 | #### `get` 37 | 38 | * Removed `opts` and `callback` arguments 39 | 40 | #### `set` 41 | 42 | * Supports an array as the root data object 43 | * Fixed case where nested `.find` searches the entire document 44 | 45 | ## 0.1.3 46 | 47 | * parseHtml uses `huge` option by default 48 | * Fixed nested Osmosis instances inside `set` 49 | * Update to `libxmljs-dom` v0.0.5 50 | 51 | #### `set` 52 | 53 | * Fixed nested Osmosis instances inside `set` 54 | * Added tests for nested set data 55 | 56 | #### `submit` 57 | 58 | * Proper `submit` button handling 59 | * Accepts a `submit` button selector as the first argument 60 | * Supports `submit` button attributes: "form", "formaction", "formenctype" and "formmethod" 61 | * Added tests for `submit` button handling 62 | 63 | ## 0.1.2 64 | 65 | * Update to `libxmljs-dom` v0.0.4 66 | 67 | ## 0.1.1 68 | 69 | * `proxy` option can now be an array of multiple proxies 70 | 71 | #### `proxy` 72 | 73 | * Added `.proxy()` to easily set the `proxy` configuration option 74 | 75 | #### `then` 76 | 77 | * If the first argument's name is: 78 | * "document" - The callback is given the current document 79 | * "window" - The callback is given the Window object 80 | * "$" - The callback is given a jQuery object (if available) 81 | 82 | ### Internal changes: 83 | 84 | * Uses 'use strict' 85 | * Minimize use of array.forEach 86 | * Added libxml specific memoryUsage monitoring 87 | * Switched to static `libxmljs-dom` version 88 | 89 | ## 0.1.0 90 | 91 | * Added `ignore_http_errors` option 92 | * Added `:internal` for selecting internal links 93 | * Added `:external` for selecting external links 94 | * Added `:domain` for searching by domain name 95 | * Added `:path` for searching by path 96 | 97 | #### `config` 98 | 99 | * Configuration options are inherited down the chain 100 | 101 | #### `contains` 102 | 103 | * Added `.contains(string)` to discard nodes whose contents do not match `string` 104 | 105 | #### `do` 106 | 107 | * Added `.do()` to call one or more commands using the current context 108 | 109 | #### `failure` (or `fail`) 110 | 111 | * Added `.failure(selector)` to discard nodes that match the given selector 112 | 113 | #### `filter` (or `success`) 114 | 115 | * Added `.filter(selector)` to discard nodes that do not match the given selector 116 | 117 | #### `get` 118 | 119 | * Accepts a tokenized URL string 120 | * @{...} - Request info (url, method, params, headers, etc.) 121 | * %{...} - `data` object 122 | * ${...} - `context` search 123 | 124 | #### `headers` (or `header`) 125 | 126 | * Added `headers({ key: value })` and `header(key, value)` to set HTTP headers 127 | 128 | #### `match` 129 | 130 | * Added `.match([selector], RegExp)` to discard nodes whose contents do not match 131 | 132 | #### `rewrite` 133 | 134 | * Added `.rewrite(callback)` to set a URL rewriting function for the preceding request 135 | 136 | ### Internal changes: 137 | 138 | * `promise.args` is now an object (used to be an array) 139 | * HTTP 400 errors are now logged and the requests are retried. 140 | 141 | ## 0.0.9 142 | 143 | * DOM and css2xpath functionality have been moved to `libxmljs-dom` 144 | * Added `keep_data` option to retain the original HTTP response 145 | * Added `process_response` option for processing data before parsing 146 | * Added test suite 147 | 148 | #### `click` 149 | 150 | * Added `.click()` for interacting with JS-only content 151 | 152 | #### `delay` 153 | 154 | * Added `.delay(n)` for waiting n seconds before calling next. Accepts a decimal value. 155 | 156 | #### `find` 157 | 158 | * Accepts an array of selectors as the first argument 159 | 160 | #### `follow` 161 | 162 | * Accepts second argument. Boolean (true = follow external links) or a URL rewriting function. 163 | 164 | #### `get` 165 | 166 | * Accepts `function(context, data)` as the first argument. The function must return a URL string. 167 | 168 | #### `parse` 169 | 170 | * Added second argument to associate a base-url to the document 171 | 172 | #### `then` 173 | 174 | * Added optional `done` argument 175 | 176 | #### `select` 177 | 178 | * Added `.select` for finding elements within the current context 179 | 180 | #### `set` 181 | 182 | * Replaces previously set values 183 | 184 | ### Internal changes: 185 | 186 | * Enhanced stack counting 187 | * Added data object ref counting 188 | * Added domain specific cookie handling 189 | * Improved stability of deep instance nesting with `.set()` 190 | * Osmosis instances operate more independently 191 | * Request queues are now a single array for each instance 192 | * Promises must accept and call `done` if they asynchronously 193 | send more than one output context per input context 194 | * If `.then` sends more than one output context per input context, 195 | then it must accept `done()` as its last argument and 196 | call it after calling `next()` for the last time. 197 | 198 | ## 0.0.8 199 | 200 | #### `config` 201 | 202 | * Ensure non-default `needle` options propagate 203 | 204 | ## 0.0.7 205 | 206 | #### `paginate` 207 | 208 | * Added a more intuitive method for pagination 209 | 210 | #### `submit` 211 | 212 | * Added easy form submission 213 | 214 | #### `login` 215 | 216 | * Added easy login support 217 | 218 | #### `pause`, `resume`, `stop` 219 | 220 | * Added pause, resume, and stop functionality 221 | 222 | #### `find` 223 | 224 | * Searches the entire document by default 225 | 226 | #### `set` 227 | 228 | * Supports innerHTML using `:html` or `:source` in selectors 229 | * Supports deep JSON structures and nested Osmosis instances 230 | 231 | #### `data` 232 | 233 | * `.data(null)` clears the data object 234 | * `.data({})` appends keys to data object 235 | 236 | #### `dom` 237 | 238 | * `.dom()` is continuing progress and can now run jQuery 239 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Osmosis 2 | 3 | HTML/XML parser and web scraper for NodeJS. 4 | 5 | [![NPM](https://nodei.co/npm/osmosis.png)](https://www.npmjs.com/package/osmosis) 6 | 7 | [![Build Status](https://travis-ci.org/rchipka/node-osmosis.svg)](https://travis-ci.org/rchipka/node-osmosis) 8 | 9 | ![Downloads](https://img.shields.io/npm/dm/osmosis.svg) 10 | 11 | ## Features 12 | 13 | - Uses native libxml C bindings 14 | - Clean promise-like interface 15 | - Supports CSS 3.0 and XPath 1.0 selector hybrids 16 | - [Sizzle selectors](https://github.com/jquery/sizzle/wiki#other-selectors-and-conventions), 17 | [Slick selectors](http://mootools.net/core/docs/1.6.0/Slick/Slick), and 18 | [more](https://github.com/rchipka/node-osmosis/blob/master/docs/Selectors.md) 19 | - No large dependencies like jQuery, cheerio, or jsdom 20 | - Compose deep and complex data structures 21 | 22 | - HTML parser features 23 | - Fast parsing 24 | - Very fast searching 25 | - Small memory footprint 26 | 27 | - HTML DOM features 28 | - Load and search ajax content 29 | - DOM interaction and events 30 | - Execute embedded and remote scripts 31 | - Execute code in the DOM 32 | 33 | - HTTP request features 34 | - Logs urls, redirects, and errors 35 | - Cookie jar and custom cookies/headers/user agent 36 | - Login/form submission, session cookies, and basic auth 37 | - Single proxy or multiple proxies and handles proxy failure 38 | - Retries and redirect limits 39 | 40 | ## Example 41 | 42 | ```javascript 43 | var osmosis = require('osmosis'); 44 | 45 | osmosis 46 | .get('www.craigslist.org/about/sites') 47 | .find('h1 + div a') 48 | .set('location') 49 | .follow('@href') 50 | .find('header + div + div li > a') 51 | .set('category') 52 | .follow('@href') 53 | .paginate('.totallink + a.button.next:first') 54 | .find('p > a') 55 | .follow('@href') 56 | .set({ 57 | 'title': 'section > h2', 58 | 'description': '#postingbody', 59 | 'subcategory': 'div.breadbox > span[4]', 60 | 'date': 'time@datetime', 61 | 'latitude': '#map@data-latitude', 62 | 'longitude': '#map@data-longitude', 63 | 'images': ['img@src'] 64 | }) 65 | .data(function(listing) { 66 | // do something with listing data 67 | }) 68 | .log(console.log) 69 | .error(console.log) 70 | .debug(console.log) 71 | ``` 72 | 73 | ## Documentation 74 | 75 | For documentation and examples check out [https://rchipka.github.io/node-osmosis/global.html](https://rchipka.github.io/node-osmosis/global.html) 76 | 77 | ## Dependencies 78 | 79 | - [libxmljs-dom](https://github.com/rchipka/node-libxmljs-dom) - DOM wrapper for [libxmljs](https://github.com/libxmljs/libxmljs) C bindings 80 | - [needle](https://github.com/tomas/needle) - Lightweight HTTP wrapper 81 | 82 | ## Donate 83 | 84 | Please consider a donation if you depend on web scraping and Osmosis makes your job a bit easier. 85 | Your contribution allows me to spend more time making this the best web scraper for Node. 86 | 87 | [![Donate](https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif)](https://www.paypal.com/cgi-bin/webscr?item_name=node-osmosis&cmd=_donations&business=NAXMWBMWKUWUU) 88 | -------------------------------------------------------------------------------- /benchmark/index.js: -------------------------------------------------------------------------------- 1 | var fs = require('fs'), 2 | testPath = __dirname + '/tests/', 3 | tests = fs.readdirSync(testPath), 4 | server = require('./server'), 5 | cycles = 500; 6 | 7 | function Timer(callback) { 8 | this.events = []; 9 | this.callback = callback; 10 | 11 | return this; 12 | } 13 | 14 | Timer.prototype.start = function () { 15 | this.events.push({ 16 | name: 'start', 17 | time: ms(), 18 | duration: 0 19 | }); 20 | }; 21 | 22 | Timer.prototype.done = function (name) { 23 | var event = {}, 24 | length = this.events.length, 25 | prev = this.events[length - 1]; 26 | 27 | event.name = name; 28 | event.time = ms(); 29 | event.duration = event.time - prev.time; 30 | 31 | this.events.push(event); 32 | }; 33 | 34 | Timer.prototype.stop = function () { 35 | var event = { 36 | name: 'stop', 37 | time: ms() 38 | }, 39 | stop = this.events[0]; 40 | 41 | event.duration = event.time - stop.time; 42 | 43 | this.events.push(event); 44 | this.callback.call(this); 45 | }; 46 | 47 | function ms() { 48 | return (new Date()).getTime(); 49 | } 50 | 51 | (function loadTest(i) { 52 | var test = require(testPath + tests[i]); 53 | 54 | loadBenchmark(test, Object.keys(test), 0, function () { 55 | if (++i < tests.length) { 56 | loadTest(i); 57 | } else { 58 | testsFinished(); 59 | } 60 | }); 61 | })(0); 62 | 63 | function loadBenchmark(benchmark, keys, index, done) { 64 | var name = keys[index]; 65 | 66 | console.log("Starting " + name + " - " + cycles + " cycles"); 67 | runBenchmark(name, benchmark[name], function () { 68 | if (keys.length < ++index) { 69 | loadBenchmark(benchmark, keys, index, done); 70 | } else { 71 | done(); 72 | } 73 | }, []); 74 | } 75 | 76 | function runBenchmark(name, start, done, array) { 77 | start(new Timer(function () { 78 | var total = 0; 79 | 80 | array.push(this.events[this.events.length - 1].duration); 81 | 82 | if (array.length === cycles) { 83 | array.forEach(function (duration) { 84 | total += duration; 85 | }); 86 | console.log("Timing (" + name + "): " + (total / array.length) + 'ms'); 87 | done(); 88 | } else { 89 | runBenchmark(name, start, done, array); 90 | } 91 | }), server.url); 92 | } 93 | 94 | function testsFinished() { 95 | server.close(); 96 | } 97 | -------------------------------------------------------------------------------- /benchmark/server/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Home 5 | 6 | 7 | 13 |
14 | 15 | 16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /benchmark/server/index.js: -------------------------------------------------------------------------------- 1 | var http = require('http'), 2 | URL = require('url'), 3 | qs = require('querystring'), 4 | fs = require('fs'), 5 | file = fs.readFileSync(__dirname + '/index.html'), 6 | host = 'localhost', 7 | port = 1337, 8 | server; 9 | 10 | server = http.createServer(function (req, res) { 11 | var url = URL.parse(req.url); 12 | 13 | res.writeHead(200, { 'Content-Type': 'text/html' }); 14 | res.end(fs.readFileSync(__dirname + url.pathname)); 15 | }); 16 | 17 | server.on('error', function () { 18 | console.log('ERROR:', error); 19 | }); 20 | 21 | server.listen(port); 22 | 23 | 24 | module.exports.host = host; 25 | module.exports.port = port; 26 | module.exports.url = 'http://' + host + ':' + port + '/index.html'; 27 | module.exports.close = function () { 28 | server.close(); 29 | }; 30 | -------------------------------------------------------------------------------- /benchmark/tests/osmosis.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var osmosis = require('../../'); 4 | 5 | module.exports.osmosis = function (timer, url) { 6 | timer.start(); 7 | 8 | osmosis(url) 9 | .set({ 10 | links: osmosis.follow('a').find('title') 11 | }) 12 | .done(function () { 13 | timer.stop(); 14 | }); 15 | }; 16 | -------------------------------------------------------------------------------- /benchmark/tests/x-ray.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var Xray = require('x-ray'); 4 | 5 | module.exports.xray = function (timer, url) { 6 | var x = Xray(), total = 0; 7 | 8 | timer.start(); 9 | 10 | x(url, ['a'])(function (err, arr) { 11 | var i = arr.length + 1; 12 | 13 | while (--i) { 14 | x(url, { title: 'title' })(function () { 15 | if (++total === arr.length) { 16 | timer.stop(); 17 | } 18 | }); 19 | } 20 | }); 21 | }; 22 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var Command = require('./lib/Command.js'), 4 | Queue = require('./lib/Queue.js'), 5 | request = require('./lib/Request.js'), 6 | libxml = require('libxmljs-dom'), 7 | instanceId = 0, 8 | memoryUsage = 0, 9 | cachedSelectors = {}, 10 | toMB = function (size, num) { 11 | return (size / 1024 / 1024).toFixed(num || 2) + 'Mb'; 12 | }, 13 | 14 | extend = function (object, donor) { 15 | var key, keys = Object.keys(donor), 16 | i = keys.length; 17 | 18 | while (i--) { 19 | key = keys[i]; 20 | object[key] = donor[key]; 21 | } 22 | 23 | return object; 24 | }; 25 | 26 | /** 27 | * 28 | * Unless called with `new`, Osmosis will start automatically. 29 | * To start an instance created with `new`, use {@link Osmosis.run}. 30 | * 31 | * @constructor Osmosis 32 | * 33 | * @param {(string|contextCallback)} url - A URL 34 | * @param {object} [params] - GET query parameters 35 | * @returns Command 36 | * @see {@link Command.run} 37 | * 38 | * @example {@lang javascript} 39 | * 40 | * // These instances start immediately 41 | * osmosis.get('http://example.com'); 42 | * osmosis('http://example.com'); 43 | * 44 | * // These instances need started 45 | * instance = new osmosis.get('http://example.com'); 46 | * instance.run(); 47 | * 48 | * instance = new osmosis('http://example.com'); 49 | * instance.run(); 50 | */ 51 | 52 | function Osmosis(url, params) { 53 | if (url !== undefined) { 54 | if (this instanceof Osmosis) { 55 | return new Osmosis.get(url, params); 56 | } 57 | 58 | return Osmosis.get(url, params); 59 | } 60 | 61 | this.queue = new Queue(this); 62 | this.command = new Command(this); 63 | this.id = ++instanceId; 64 | } 65 | 66 | 67 | /** 68 | * @name options 69 | * 70 | * Osmosis and {@link https://github.com/tomas/needle|needle} options. 71 | * 72 | * @property {string} accept - HTTP Accept header 73 | * @property {bool} compressed - Compress HTTP requests 74 | * @property {number} concurrency - Number of simultaneous HTTP requests 75 | * @property {bool} decode_response - Decode compressed HTTP responses 76 | * @property {number} follow - Number of redirects to follow 77 | * @property {bool} follow_set_cookies - Set cookies for redirects 78 | * @property {bool} follow_set_referer - Set referer header for redirects 79 | * @property {bool} keep_data - Keep raw HTTP data in 80 | context.response.data 81 | * @property {bool} timeout - HTTP request timeout 82 | * @property {bool} tries - HTTP request attempts 83 | * @property {bool} user_agent - HTTP user agent 84 | * @memberof Osmosis 85 | * @instance 86 | * @default 87 | */ 88 | 89 | Osmosis.prototype.opts = { 90 | accept: 'text/html,application/xhtml+xml,' + 91 | 'application/xml;q=0.9,*/*;q=0.8', 92 | compressed: true, 93 | concurrency: 5, 94 | decode_response: true, 95 | follow: 3, 96 | follow_set_cookies: true, 97 | follow_set_referer: true, 98 | keep_data: false, 99 | parse_cookies: true, // Parse "Set-Cookie" header 100 | parse_response: false, 101 | rejectUnauthorized: false, 102 | statsThreshold: 25, 103 | timeout: 30 * 1000, 104 | tries: 3, 105 | user_agent: 'Mozilla/5.0 (Windows NT x.y; rv:10.0) ' + 106 | 'Gecko/20100101 Firefox/10.0' 107 | }; 108 | 109 | /** 110 | * Configure global Osmosis options. 111 | * 112 | * @function config 113 | * @memberof Osmosis 114 | * @param {string|object} option - A string `key` or an object of 115 | * { key: value } pairs. 116 | * @param {any} [value] - A value for the `key` 117 | * @instance 118 | * @see {@link Command.config} 119 | * @see {@link Osmosis.options} 120 | */ 121 | 122 | Osmosis.config = 123 | Osmosis.prototype.config = function (option, value) { 124 | var hasPrototype = (this.prototype !== undefined), 125 | opts, key; 126 | 127 | if (hasPrototype === true) { 128 | opts = this.prototype.opts; 129 | } else if (this.opts === undefined) { 130 | opts = this.opts = {}; 131 | } else { 132 | opts = this.opts; 133 | } 134 | 135 | if (option === undefined) { 136 | return opts; 137 | } 138 | 139 | if (value !== undefined) { 140 | opts[option] = value; 141 | } else if (option !== undefined) { 142 | for (key in option) { 143 | opts[key] = option[key]; 144 | } 145 | } 146 | }; 147 | 148 | /** 149 | * Run (or re-run) an Osmosis instance. 150 | *g 151 | * If you frequently use the same Osmosis instance 152 | * (such as in an Express server), it's much more efficient to 153 | * initialize the instance once and repeatedly use `run` as needed. 154 | * 155 | * @borrows Command.run 156 | * @see {@link Command.run} 157 | */ 158 | Osmosis.prototype.run = function () { 159 | var self = this; 160 | 161 | process.nextTick(function () { 162 | self.started = true; 163 | self.command.start(); 164 | }); 165 | }; 166 | 167 | /** 168 | * Make an HTTP request. 169 | * 170 | * @private 171 | */ 172 | 173 | Osmosis.prototype.request = function (url, opts, callback, tries) { 174 | var self = this, 175 | href = url.href, 176 | method = url.method, 177 | params = url.params; 178 | 179 | this.requests++; 180 | this.queue.requests++; 181 | this.queue.push(); 182 | 183 | if (typeof opts.user_agent === 'function') { 184 | opts.user_agent = opts.user_agent(); 185 | } 186 | 187 | request(url.method, 188 | url, 189 | url.params, 190 | opts, 191 | tries, 192 | function (err, res, data) { 193 | var proxies = opts.proxies; 194 | 195 | self.queue.requests--; 196 | 197 | if ((res === undefined || res.statusCode !== 404) && 198 | proxies !== undefined) { 199 | self.command.error('proxy ' + (proxies.index + 1) + 200 | '/' + proxies.length + 201 | ' failed (' + opts.proxy + ')'); 202 | 203 | // remove the failing proxy 204 | if (proxies.length > 1) { 205 | opts.proxies.splice(proxies.index, 1); 206 | opts.proxy = proxies[proxies.index]; 207 | } 208 | } 209 | 210 | if (err !== null && ++tries < opts.tries) { 211 | self.queueRequest(url, opts, callback, tries); 212 | 213 | if (self.opts.log === true) { 214 | self.command.error(err + ', retrying ' + 215 | url.href + ' (' + 216 | (tries + 1) + '/' + 217 | opts.tries + ')'); 218 | } 219 | } else { 220 | callback(err, res, data); 221 | } 222 | 223 | self.dequeueRequest(); 224 | self.queue.pop(); 225 | }) 226 | .on('redirect', function (new_url) { 227 | if (self.opts.log === true) { 228 | self.command.log('[redirect] ' + 229 | href + ' -> ' + new_url); 230 | } 231 | }); 232 | }; 233 | 234 | /** 235 | * Add a request to the queue. 236 | * 237 | * @param {string} method - HTTP request method 238 | * @param {string} url - The URL to request 239 | * @param {object} params - HTTP GET/POST Data 240 | * @param {object} opts - HTTP request options 241 | * @param {function} callback - Function to call when done 242 | * @private 243 | */ 244 | 245 | Osmosis.prototype.queueRequest = function (url, 246 | opts, 247 | callback, 248 | tries) { 249 | if (tries === undefined) { 250 | tries = 0; 251 | } 252 | 253 | if (this.queue.requests < this.opts.concurrency) { 254 | this.request(url, opts, callback, tries); 255 | } else { 256 | this.queue.enqueue([url, opts, callback, tries]); 257 | } 258 | }; 259 | 260 | Osmosis.prototype.dequeueRequest = function () { 261 | var arr, length = this.queue.length; 262 | 263 | if (length === 0 || this.queue.requests >= this.opts.concurrency) { 264 | return; 265 | } 266 | 267 | arr = this.queue.dequeue(); 268 | 269 | this.request(arr[0], arr[1], arr[2], arr[3]); 270 | }; 271 | 272 | /** 273 | * Parse XML/HTML data. 274 | * 275 | * @param {string|buffer} data - The data to parse 276 | * @param {object} opts - libxmljs parse options 277 | * @private 278 | * @see Command.parse 279 | */ 280 | 281 | Osmosis.prototype.parse = function (data, opts) { 282 | /* 283 | * We only use `parseHtml` because we need to 284 | * avoid libxml namespaces when searching the document. 285 | */ 286 | 287 | var document = libxml.parseHtml(data, opts); 288 | 289 | if (opts !== undefined && opts.baseUrl !== undefined) { 290 | document.location = opts.baseUrl; 291 | } 292 | 293 | return document; 294 | }; 295 | 296 | /** 297 | * Print Node.JS process statistics via {@link Command.debug}. 298 | * 299 | * @private 300 | */ 301 | 302 | Osmosis.prototype.resources = function () { 303 | var mem = process.memoryUsage(), 304 | memDiff = toMB(mem.rss - memoryUsage), 305 | libxml_mem = libxml.memoryUsage(), 306 | nodes = libxml.nodeCount(); 307 | 308 | if (this.opts.debug !== true) { 309 | this.resources = null; 310 | 311 | return; 312 | } 313 | 314 | if (nodes >= 1000) { 315 | nodes = (nodes / 1000).toFixed(0) + 'k'; 316 | } 317 | 318 | if (memDiff.charAt(0) !== '-') { 319 | memDiff = '+' + memDiff; 320 | } 321 | 322 | this.command.debug( 323 | 'stack: ' + this.queue.count + ', ' + 324 | 325 | 'requests: ' + this.requests + 326 | ' (' + this.queue.requests + ' queued), ' + 327 | 328 | 'RAM: ' + toMB(mem.rss) + ' (' + memDiff + '), ' + 329 | 330 | 'libxml: ' + ((libxml_mem / mem.rss) * 100).toFixed(1) + 331 | '% (' + nodes + ' nodes), ' + 332 | 333 | 'heap: ' + ((mem.heapUsed / mem.heapTotal) * 100) 334 | .toFixed(0) + '% of ' + 335 | toMB(mem.heapTotal) 336 | ); 337 | 338 | memoryUsage = mem.rss; 339 | }; 340 | 341 | /** 342 | * Set the parent instance for this instance. 343 | * 344 | * Inherit the parent's queue and options. 345 | * 346 | * @private 347 | * @param {Command} parent - The parent Command. 348 | */ 349 | 350 | Osmosis.prototype.setParent = function (parent) { 351 | this.parent = parent; 352 | this.queue = parent.instance.queue; 353 | this.opts = parent.instance.opts; 354 | }; 355 | 356 | /** 357 | * Resume the current instance. 358 | * 359 | * @param {function} callback - A function to call when resuming 360 | * @borrows Command.resume 361 | * @private 362 | */ 363 | 364 | Osmosis.prototype.resume = function (arg) { 365 | var length, i; 366 | 367 | if (typeof arg === 'function') { 368 | if (this.resumeQueue === undefined) { 369 | this.resumeQueue = []; 370 | } 371 | 372 | this.resumeQueue.push(arg); 373 | } else { 374 | length = this.resumeQueue.length; 375 | 376 | for (i = 0; i < length; ++i) { 377 | this.resumeQueue[i](); 378 | } 379 | 380 | this.dequeueRequest(); 381 | } 382 | }; 383 | 384 | Osmosis.prototype.requests = 0; 385 | Osmosis.prototype.paused = false; 386 | Osmosis.prototype.stopped = false; 387 | Osmosis.prototype.inspect = function () { 388 | return 'Osmosis:' + this.id; 389 | }; 390 | 391 | // Allow use of commands without creating a new instance: 392 | 393 | Object.keys(Command.prototype).forEach(function (name) { 394 | if (Osmosis[name] !== undefined) { 395 | return; 396 | } 397 | 398 | Osmosis[name] = function StartingFunction(arg1, arg2, arg3) { 399 | var instance = new Osmosis(), 400 | command = instance.command; 401 | 402 | instance.calledWithNew = (this instanceof StartingFunction); 403 | 404 | return command[name](arg1, arg2, arg3); 405 | }; 406 | }); 407 | 408 | // libxmljs overrides: 409 | 410 | libxml.Document.prototype.findXPath = libxml.Document.prototype.find; 411 | libxml.Element.prototype.findXPath = libxml.Element.prototype.find; 412 | 413 | libxml.Document.prototype.find = function (selector, cache) { 414 | return this.root().find(selector, cache); 415 | }; 416 | 417 | libxml.Element.prototype.find = function (selector) { 418 | if (selector.charAt(1) === '/' || 419 | selector.charAt(0) === '/' || 420 | selector.charAt(0) === '(') { 421 | return this.findXPath(selector); 422 | } else if (cachedSelectors[selector] === undefined) { 423 | cachedSelectors[selector] = libxml.css2xpath(selector); 424 | } 425 | 426 | return this.findXPath(cachedSelectors[selector]) || []; 427 | }; 428 | 429 | /** 430 | * @typedef {object} context 431 | * 432 | * An XML/HTML DOM object represting a Document, Element, Attribute 433 | * or other Node. 434 | */ 435 | 436 | /** 437 | * @typedef {object} data 438 | * 439 | * An object containing values set by `.set` 440 | * @see {@link Command.set} 441 | */ 442 | 443 | /** 444 | * @typedef {string} Selector 445 | * 446 | * A CSS/XPath selector 447 | * @see {@link https://github.com/css2xpath/css2xpath|Selectors} 448 | */ 449 | 450 | /** 451 | * A callback function that returns the desired value. 452 | * 453 | * @callback middlewareCallback 454 | * @param {context} context - The current XML/HTML context node. 455 | * @param {data} data - The current data object. 456 | */ 457 | 458 | Osmosis.libxmljs = libxml; 459 | 460 | module.exports = Osmosis; 461 | -------------------------------------------------------------------------------- /jsdoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "source": { 3 | "include": [ "index.js", "lib" ] 4 | }, 5 | "opts": { 6 | "destination": "docs", 7 | "recurse": true, 8 | "template": "node_modules/ink-docstrap/template" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /lib/Command.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var Data = require('./Data.js'), 4 | URL = require('url'), 5 | fs = require('fs'), 6 | qs = require('querystring'), 7 | formFunctions = require('./Form.js'), 8 | cmdDir = __dirname + '/commands/'; 9 | 10 | /** 11 | * An Osmosis command. 12 | * 13 | * @constructor Command 14 | * @protected 15 | * @param {object} parent - parent instance 16 | * @returns Command 17 | */ 18 | 19 | function Command(parent) { 20 | var self = this; 21 | 22 | if (Object.getPrototypeOf(parent) === Command.prototype) { 23 | // parent is a Command 24 | this.prev = parent; 25 | Object.defineProperty(this, 'instance', { 26 | get: Command.prototype.getInstance, 27 | set: Command.prototype.setInstance 28 | }); 29 | } else if (parent !== undefined) { 30 | // `parent` is an Osmosis instance 31 | this.instance = parent; 32 | // Call `process.nextTick` so other instances can initialize 33 | process.nextTick(function () { 34 | // Attempt to auto-run the instance only IF: 35 | // * Not already running 36 | // * Not created using `new` 37 | // * Not a child instance 38 | if (parent.calledWithNew !== true && 39 | parent.parent === undefined) { 40 | process.nextTick(function () { 41 | // Run on nextTick to allow any 42 | // runtimeCommands to finish first 43 | parent.run(); 44 | }); 45 | } 46 | }); 47 | } 48 | 49 | return this; 50 | } 51 | 52 | Command.prototype = { 53 | isCommand: true, 54 | 55 | /** 56 | * Change context to the current Document. 57 | * 58 | * @property document 59 | * @see {@link Command.doc} 60 | */ 61 | 62 | get document() { 63 | return this.doc(); 64 | }, 65 | 66 | /** 67 | * Else. 68 | * 69 | * @property else 70 | * @private 71 | * @see {@link Command.if} 72 | */ 73 | 74 | get else() { 75 | return this.else(); 76 | }, 77 | 78 | inherit: function (command) { 79 | command.instance = this.instance; 80 | 81 | return command; 82 | }, 83 | 84 | /** 85 | * Change context to the current Window. 86 | * 87 | * @property window 88 | */ 89 | 90 | get window() { 91 | return this.getWindow(); 92 | }, 93 | 94 | getInstance: function () { 95 | return this.prev.instance; 96 | }, 97 | 98 | setInstance: function (val) { 99 | return (this.prev.instance = val); 100 | } 101 | }; 102 | 103 | Command.prototype.run = function (context, data) { 104 | return this.instance.run(context, data); 105 | }; 106 | 107 | /** 108 | * Start a Command. 109 | * 110 | * @private 111 | * @function start 112 | * @param {context} context - HTML/XML context 113 | * @param {data} data - User defined Data 114 | * @memberof Command 115 | */ 116 | 117 | Command.prototype.start = function (context, data) { 118 | var self = this, 119 | next = this.next, 120 | instance = this.instance, 121 | callback = this.cb, 122 | calledNext = false, 123 | window; 124 | 125 | if (context === null) { 126 | return; 127 | } 128 | 129 | if (instance.stopped === true) { 130 | return; 131 | } 132 | 133 | if (instance.paused === true) { 134 | instance.resume(function () { 135 | self.start(context, data); 136 | }); 137 | 138 | return; 139 | } 140 | 141 | if (callback === undefined) { 142 | if (next === undefined) { 143 | this.end(context, data); 144 | } else { 145 | next.start(context, data); 146 | } 147 | 148 | return; 149 | } 150 | 151 | instance.queue.push(); 152 | 153 | if (data === undefined) { 154 | data = (new Data()); 155 | } 156 | 157 | data.ref(); 158 | 159 | return callback.call(this, context, data, function (c, d, index) { 160 | if (calledNext === true) { 161 | // If `next` is called more than once, 162 | // then we need to clone the data 163 | next.start(c, d.clone().setSortIndex(index).ref()); 164 | } else { 165 | calledNext = true; 166 | next.start(c, d.setSortIndex(index)); 167 | } 168 | }, function (err) { 169 | data.unref(); 170 | 171 | if (calledNext !== true) { 172 | self.end(context, data); 173 | } 174 | 175 | if (err !== undefined) { 176 | self.error(err); 177 | } 178 | 179 | instance.queue.pop(); 180 | }); 181 | }; 182 | 183 | /** 184 | * Called when we reach the end of the command chain. 185 | * 186 | * @private 187 | */ 188 | 189 | Command.prototype.end = function (context, data) { 190 | var window, parent; 191 | 192 | // We're on the "sentinel node", meaning 193 | // We've reached the end of the command chain 194 | if (context !== undefined) { 195 | if (context.doc === undefined) { 196 | window = context.window; 197 | } else if (context.doc().__window !== undefined) { 198 | window = context.doc().defaultView; 199 | } 200 | 201 | if (window !== undefined) { 202 | // close `window` when it reaches the last command 203 | window.close(); 204 | } 205 | 206 | this.instance.queue.done++; 207 | } 208 | 209 | if (data !== undefined) { 210 | parent = data.parent; 211 | 212 | if (parent !== undefined) { 213 | if (data.isEmpty()) { 214 | data = data.clone(); 215 | 216 | if (context.text !== undefined) { 217 | data.setObject(context.text()); 218 | } else if (context.value !== undefined) { 219 | data.setObject(context.value()); 220 | } 221 | } 222 | 223 | parent.merge(data); 224 | data.unref(); 225 | } 226 | } 227 | }; 228 | 229 | /** 230 | * Get the current options and inherit previous options. 231 | * 232 | * @private 233 | */ 234 | 235 | Command.prototype.getOpts = function () { 236 | var proto; 237 | 238 | if (this.opts !== undefined) { 239 | return this.opts; 240 | } 241 | 242 | if (this.prev !== undefined) { 243 | proto = this.prev.getOpts(); 244 | } else if (this.instance !== undefined) { 245 | proto = this.instance.opts; 246 | } 247 | 248 | this.opts = Object.create(proto); 249 | 250 | return this.opts; 251 | }; 252 | 253 | 254 | /** 255 | * Set an option for the current command. 256 | * 257 | * Clones inherited object values. 258 | * 259 | * @private 260 | */ 261 | 262 | Command.prototype.setOpt = function (name, value) { 263 | var opts = this.getOpts(); 264 | 265 | if (value !== null && 266 | value instanceof Object && 267 | opts[name] !== null && 268 | opts[name] instanceof Object) { 269 | opts[name] = extend(value, opts[name]); 270 | } else { 271 | opts[name] = value; 272 | } 273 | 274 | return opts; 275 | }; 276 | 277 | /** 278 | * Internal HTTP request function. 279 | * 280 | * @param {string} method - Request method 281 | * @param {string} url - URL to load 282 | * @param {object} params - GET query parameters or POST data 283 | * @param {function} callback - Callback function 284 | * @private 285 | */ 286 | 287 | Command.prototype.request = function (method, context, href, params, callback, sortIndex) { 288 | var self = this, 289 | length = callback.length, 290 | instance = self.instance, 291 | opts = Object.create(this.getOpts()), 292 | url, document, key, proxies; 293 | 294 | if (!href || href.length === 0) { 295 | callback("Invalid URL"); 296 | return; 297 | } 298 | 299 | if (length === 3) { 300 | opts.parse = false; 301 | } 302 | 303 | if (context !== undefined) { 304 | document = context.doc(); 305 | 306 | url = URL.parse(document.location.resolve(href), true); 307 | 308 | if (opts.follow_set_referer !== false) { 309 | if (opts.headers === undefined) { 310 | opts.headers = {}; 311 | } 312 | 313 | opts.headers.referer = document.location.href; 314 | } 315 | 316 | if (opts.cookies !== undefined) { 317 | if (document.cookies === undefined) { 318 | document.cookies = {}; 319 | } 320 | 321 | opts.cookies = extend(document.cookies, opts.cookies); 322 | } else { 323 | opts.cookies = document.cookies; 324 | } 325 | 326 | if (method === 'post') { 327 | // Check the enctype if submitting a form 328 | if (formFunctions.isMultipart(context)) { 329 | opts.multipart = true; 330 | } 331 | } 332 | } else if (href.substr(0, 1) === '//') { 333 | url = URL.parse('http:' + href, true); 334 | } else if (href.substr(0, 4) !== 'http') { 335 | url = URL.parse('http://' + href, true); 336 | } else { 337 | url = URL.parse(href, true); 338 | } 339 | 340 | url.method = method; 341 | url.params = params; 342 | 343 | if (method === 'get' && params instanceof Object && params !== null) { 344 | for (key in params) { 345 | url.query[key] = params[key]; 346 | } 347 | 348 | url.params = url.query; 349 | url.search = qs.stringify(url.query); 350 | url.href = URL.format(url); 351 | } 352 | 353 | if (Array.isArray(opts.proxy)) { 354 | opts.proxies = opts.proxy; 355 | } 356 | 357 | if (opts.proxies !== undefined) { 358 | proxies = opts.proxies; 359 | 360 | if (proxies.index === undefined || ++proxies.index >= proxies.length) { 361 | proxies.index = 0; 362 | } 363 | 364 | opts.proxy = proxies[proxies.index]; 365 | } 366 | 367 | instance.queueRequest(url, opts, 368 | function (err, res, document) { 369 | if (err !== null) { 370 | self.error((self.name !== method ? 371 | '[' + method + '] ' : 372 | '') + (url.href) + ' - ' + err); 373 | 374 | if (length === 2) { 375 | callback(err, document); 376 | } else if (length === 3) { 377 | callback(err, res, document); 378 | } 379 | } else { 380 | self.log('loaded [' + method + '] ' + url.href + ' ' + 381 | (params ? 382 | JSON.stringify(params) : 383 | '') + 384 | (opts.proxy ? 385 | ' via ' + opts.proxy : 386 | '') 387 | ); 388 | 389 | if (document instanceof Object && document !== null) { 390 | document._dataSortIndex = sortIndex; 391 | } 392 | 393 | if (length === 1) { 394 | callback(document); 395 | } else if (length === 2) { 396 | callback(null, document); 397 | } else { 398 | callback(null, res, document); 399 | } 400 | } 401 | }); 402 | }; 403 | 404 | /** 405 | * Call a callback when log, error, or debug messages are received. 406 | * 407 | * @name log/error/debug 408 | * @memberof Osmosis; 409 | * @param {function} callback - Callback 410 | */ 411 | 412 | ['log', 'error', 'debug'].forEach(function (name) { 413 | Command.prototype[name] = function (msg, prefixed) { 414 | if (msg instanceof Function) { 415 | this[name] = msg; 416 | this.instance.config(name, true); 417 | } else if (this.next !== undefined) { 418 | if (prefixed === undefined) { 419 | this.next[name]('(' + this.name + ') ' + msg, ''); 420 | } else { 421 | this.next[name](msg, ''); 422 | } 423 | } else if (this.instance.parent !== undefined) { 424 | this.instance.parent[name](msg, true); 425 | } 426 | 427 | return this; 428 | }; 429 | }); 430 | 431 | function extend(object, donor) { 432 | var key, keys = Object.keys(donor), 433 | i = keys.length; 434 | 435 | if (object === undefined) { 436 | object = {}; 437 | } 438 | 439 | while (i--) { 440 | key = keys[i]; 441 | object[key] = donor[key]; 442 | } 443 | 444 | return object; 445 | } 446 | 447 | function runtimeCommand(name, func) { 448 | Command.prototype[name] = (function () { 449 | var length = arguments.length, 450 | self = this, args, i; 451 | 452 | if (length === 0) { 453 | // Allow `.config()`, etc. to get configuration 454 | // options during command chain compile time 455 | return func.call(self); 456 | } 457 | 458 | args = new Array(length); 459 | 460 | for (i = 0; i < length && arguments[i] !== undefined; i++) { 461 | args[i] = arguments[i]; 462 | } 463 | 464 | process.nextTick(function () { 465 | if (self.next !== undefined) { 466 | // We're NOT on the last command, so we call `func` in the 467 | // context of the PRECEEDING command 468 | func.apply(self.prev, args); 469 | } else { 470 | // We're on the last command, so we call `func` in the 471 | // context of the FIRST command 472 | func.apply(self.instance.command, args); 473 | } 474 | }); 475 | 476 | return self; 477 | }); 478 | } 479 | 480 | function contextCommand(name, func) { 481 | Command.prototype[name] = (function () { 482 | var length = arguments.length, 483 | self, i, args; 484 | 485 | if (this.name === undefined) { 486 | self = this; 487 | } else { 488 | self = new Command(this); 489 | } 490 | 491 | self.name = name; 492 | 493 | args = new Array(length); 494 | 495 | for (i = 0; i < length && arguments[i] !== undefined; i++) { 496 | args[i] = arguments[i]; 497 | 498 | if (typeof args[i] === 'object') { 499 | args[i] = this.findCommandArg(args[i]); 500 | } 501 | } 502 | 503 | self.args = args; 504 | 505 | if (func.length === 4) { 506 | self.cb = func; 507 | } else { 508 | self.cb = func.apply(self, self.args); 509 | } 510 | 511 | self.next = new Command(self); 512 | 513 | return self.next; 514 | }); 515 | } 516 | 517 | Command.prototype.findCommandArg = function (obj) { 518 | var keys, key, length, i = 0; 519 | 520 | if (obj instanceof Command) { 521 | obj.instance.setParent(this); 522 | return obj.instance.command; 523 | } 524 | 525 | keys = Object.keys(obj); 526 | length = keys.length; 527 | 528 | for (; i < length; i++) { 529 | key = keys[i]; 530 | switch (typeof obj[key]) { 531 | case 'object': 532 | if (obj[key] !== null) { 533 | obj[key] = this.findCommandArg(obj[key]); 534 | } 535 | 536 | break; 537 | case 'function': 538 | obj[key] = this.findCommandArg(this.then(obj[key])); 539 | } 540 | } 541 | 542 | return obj; 543 | }; 544 | 545 | fs.readdirSync(cmdDir).forEach(function (file) { 546 | var command = require(cmdDir + file); 547 | 548 | if (typeof command === 'object') { 549 | Object.keys(command).forEach(function (name) { 550 | contextCommand(name, command[name]); 551 | }); 552 | } else { 553 | runtimeCommand(file.substr(0, file.length - 3), command); 554 | } 555 | }); 556 | 557 | module.exports = Command; 558 | -------------------------------------------------------------------------------- /lib/Data.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * @constructor Data 6 | * @param {object} [data] - Data object value 7 | * @param {object} [parent] - Parent Data object 8 | * @param {number} [index] - Index in the parent object 9 | * @param {number} [sortIndex] - Sort order of object if coerced into array 10 | * @param {bool} [isArray] - Is the object an array? 11 | * @property {number} refs - Number of references 12 | * @property {number} clones - Number of clones 13 | * @property {object} object - Key/value data storage 14 | * @property {Data} parent - Parent Data object 15 | * @property {string} index - Key to set in the parent object 16 | * @private 17 | */ 18 | 19 | function Data(parent) { 20 | this.stack = { count: 0 }; 21 | 22 | if (parent) { 23 | this.parent = parent; 24 | } 25 | 26 | return this; 27 | } 28 | 29 | /** 30 | * Create an empty child Data object for the parent. 31 | * 32 | */ 33 | 34 | Data.prototype.child = function () { 35 | return new Data(this); 36 | }; 37 | 38 | /** 39 | * Clone a Data object. 40 | * 41 | */ 42 | 43 | Data.prototype.clone = function () { 44 | var clone = this.next(); 45 | 46 | clone.object = this.copy(); 47 | 48 | return clone; 49 | }; 50 | 51 | /** 52 | * Call callback when `Data.stack.count` === 0. 53 | */ 54 | 55 | Data.prototype.done = function (cb) { 56 | this.stack.done = cb; 57 | return this; 58 | }; 59 | 60 | /** 61 | * Get the raw data object. 62 | * 63 | */ 64 | 65 | Data.prototype.getObject = function () { 66 | if (this.object === undefined) { 67 | if (this.isArray() === true) { 68 | this.toArray(); 69 | } else { 70 | this.setObject({}); 71 | } 72 | } 73 | 74 | return this.object; 75 | }; 76 | 77 | /** 78 | * Set the raw data object. 79 | * 80 | */ 81 | 82 | Data.prototype.setObject = function (object) { 83 | this.object = object; 84 | 85 | return this; 86 | }; 87 | 88 | /** 89 | * Create a new Data object to pass to the next Command. 90 | * 91 | */ 92 | 93 | Data.prototype.next = function () { 94 | var clone = new Data(this.parent) 95 | .setSortIndex(this.getSortIndex()) 96 | .setIndex(this.getIndex()) 97 | .isArray(this.isArray()); 98 | 99 | clone.stack = this.stack; 100 | clone.object = this.object; 101 | return clone; 102 | }; 103 | 104 | /** 105 | * Increase the reference count on all ancestors. 106 | * 107 | */ 108 | 109 | Data.prototype.ref = function () { 110 | this.stack.count++; 111 | return this; 112 | }; 113 | 114 | /** 115 | * Decrease the reference count on all ancestors. 116 | * 117 | */ 118 | 119 | Data.prototype.unref = function () { 120 | if (--this.stack.count === 0) { 121 | if (this.stack.done !== undefined) { 122 | this.stack.done.call(this); 123 | } 124 | } 125 | }; 126 | 127 | /** 128 | * Set a key/value in {@link Data.object}. 129 | * 130 | * @param {string|object} key - A key or { key: val } object 131 | * @param {any} val - A value 132 | */ 133 | 134 | Data.prototype.set = function (key, val) { 135 | var object, currentVal, sortKey; 136 | 137 | if (val === undefined) { 138 | return this; 139 | } 140 | 141 | if (this.isArray() === true) { 142 | return this.push(val); 143 | } 144 | 145 | object = this.getObject(); 146 | currentVal = object[key]; 147 | 148 | if (currentVal !== undefined) { 149 | // If the key being set already has a value, 150 | // then convert it to an Array. 151 | if (currentVal instanceof Array) { 152 | currentVal.push(val); 153 | } else { 154 | object[key] = [currentVal, val]; 155 | } 156 | } else { 157 | object[key] = val; 158 | } 159 | 160 | return this; 161 | }; 162 | 163 | /** 164 | * Push a value onto {@link Data.object} array. 165 | */ 166 | 167 | Data.prototype.push = function (val) { 168 | var object = this.toArray(); 169 | 170 | if (val === undefined) { 171 | return this; 172 | } 173 | 174 | object.push(val); 175 | 176 | return this; 177 | }; 178 | 179 | Data.prototype.copy = function () { 180 | var obj = this.object, 181 | data, i, keys, key; 182 | 183 | if (this.isArray()) { 184 | data = obj.slice(0); 185 | } else if (obj instanceof Object) { 186 | data = {}; 187 | 188 | for (i = 0, keys = Object.keys(obj); i < keys.length; i++) { 189 | key = keys[i]; 190 | data[key] = obj[key]; 191 | } 192 | } else { 193 | data = obj; 194 | } 195 | 196 | return data; 197 | }; 198 | 199 | Data.prototype.isArray = function (val) { 200 | if (val !== undefined) { 201 | this._isArray = val === true; 202 | return this; 203 | } 204 | 205 | return (this._isArray === true || this.object instanceof Array); 206 | }; 207 | 208 | Data.prototype.isEmpty = function () { 209 | return (this.object === undefined || 210 | (this.object instanceof Object && 211 | Object.keys(this.object).length === 0) 212 | ); 213 | }; 214 | 215 | Data.prototype.getIndex = function () { 216 | return this._index; 217 | }; 218 | 219 | Data.prototype.setIndex = function (index) { 220 | if (this.isArray() !== true) { 221 | this._index = index; 222 | } 223 | 224 | return this; 225 | }; 226 | 227 | Data.prototype.setSortIndex = function (index) { 228 | if (index !== undefined) { 229 | this.sortIndex = index; 230 | } 231 | 232 | return this; 233 | }; 234 | 235 | Data.prototype.getSortIndex = function () { 236 | return this.sortIndex; 237 | } 238 | 239 | Data.prototype.sortKey = function (key, sortIndex) { 240 | var object = this.getObject(), 241 | currentVal = object[key], 242 | sortArray; 243 | 244 | if (!this.sortArray) { 245 | this.sortArray = {}; 246 | } 247 | 248 | sortArray = this.sortArray[key]; 249 | 250 | if (sortArray === undefined) { 251 | if (currentVal instanceof Array && currentVal.length > 0) { 252 | sortArray = new Array(currentVal.length); 253 | } else { 254 | sortArray = [sortIndex]; 255 | } 256 | 257 | this.sortArray[key] = sortArray; 258 | } 259 | 260 | if (currentVal instanceof Array) { 261 | var diff = currentVal.length - sortArray.length; 262 | 263 | while (diff > 0) { 264 | sortArray.push(sortIndex + (--diff)); 265 | } 266 | 267 | object[key] = sortArray.map(function (v, i) { 268 | return { 269 | value: v, 270 | index: i 271 | }; 272 | }).sort(function (a, b) { 273 | return a.value - b.value; 274 | }).map(function (v, i) { 275 | sortArray[i] = v.value; 276 | 277 | return currentVal[v.index]; 278 | }); 279 | } 280 | } 281 | 282 | Data.prototype.merge = function (child) { 283 | var object = child.object, 284 | index = child.getIndex(), 285 | sortIndex = child.getSortIndex(); 286 | 287 | if (object === undefined) { 288 | return; 289 | } 290 | 291 | if (this.isArray() === true) { 292 | this.push(object); 293 | } else if (index !== undefined) { 294 | this.set(index, object); 295 | } else if (object instanceof Object) { 296 | this.extend(object); 297 | } 298 | 299 | if (sortIndex !== undefined) { 300 | this.sortKey(index, sortIndex); 301 | } 302 | }; 303 | 304 | Data.prototype.toArray = function () { 305 | var object = this.object; 306 | 307 | if (object instanceof Array) { 308 | return object; 309 | } 310 | 311 | if (this.isEmpty()) { 312 | this.setObject([]); 313 | } else { 314 | this.setObject([ object ]); 315 | } 316 | 317 | return this.getObject(); 318 | }; 319 | 320 | 321 | Data.prototype.extend = function (object) { 322 | var key, keys = Object.keys(object), 323 | isArray = this.isArray(), 324 | i = keys.length; 325 | 326 | while (i--) { 327 | key = keys[i]; 328 | 329 | if (isArray) { 330 | this.push(object[key]); 331 | } else { 332 | this.set(key, object[key]); 333 | } 334 | } 335 | 336 | return object; 337 | }; 338 | 339 | module.exports = Data; 340 | -------------------------------------------------------------------------------- /lib/Form.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | var form = {}; 5 | 6 | form.submit = function () { 7 | 8 | }; 9 | 10 | form.isForm = function (node) { 11 | return node.nodeName === 'form'; 12 | }; 13 | 14 | form.getForm = function (node) { 15 | if (form.isForm(node)) { 16 | return node; 17 | } else if (node.hasAttribute('form')) { 18 | return node.doc().getElementById(node.getAttribute('form')); 19 | } else { 20 | return node.get('ancestor-or-self::form'); 21 | } 22 | }; 23 | 24 | form.getInputs = function (node) { 25 | return form.getForm(node).find('[@name ' + 26 | 'and not(@disabled) ' + 27 | 'and not(@type="submit")]'); 28 | }; 29 | 30 | form.getSubmitButton = function (node) { 31 | if (form.isForm(node)) { 32 | return node.get('[@type="submit" and not(@disabled) and ' + 33 | '(not(@form) or @form="' + 34 | node.getAttribute('id') + '"' + 35 | ')][1]'); 36 | } else if ((node.nodeName === 'input' || node.nodeName === 'button') && 37 | node.getAttribute('type') === 'submit') { 38 | return node; 39 | } 40 | 41 | return null; 42 | }; 43 | 44 | form.getAction = function (node) { 45 | var document = node.doc(); 46 | 47 | if (node.hasAttribute('action')) { 48 | return document.location.resolve(node.getAttribute('action')); 49 | } else if (node.hasAttribute('formaction')) { 50 | return document.location.resolve(node.getAttribute('formaction')); 51 | } else { 52 | return document.location.href; 53 | } 54 | }; 55 | 56 | form.getEnctype = function (node) { 57 | if (node.hasAttribute('enctype')) { 58 | return node.getAttribute('enctype'); 59 | } else if (node.hasAttribute('formenctype')) { 60 | return node.getAttribute('formenctype'); 61 | } 62 | 63 | return 'application/x-www-form-urlencoded'; 64 | }; 65 | 66 | form.isMultipart = function (node) { 67 | if (node.hasAttribute === undefined) { 68 | return false; 69 | } 70 | 71 | return (form.getEnctype(node).substr(0, 5) === 'multi'); 72 | }; 73 | 74 | form.getMethod = function (node) { 75 | if (node.hasAttribute('method')) { 76 | return node.getAttribute('method').toLowerCase(); 77 | } else if (node.hasAttribute('formmethod')) { 78 | return node.getAttribute('formmethod').toLowerCase(); 79 | } else { 80 | return 'get'; 81 | } 82 | }; 83 | 84 | form.getParams = function (node) { 85 | var params = {}, 86 | submit = form.getSubmitButton(node), 87 | inputs = form.getInputs(node), 88 | length = inputs.length, 89 | i = 0, input, name, nodeName, type, value; 90 | 91 | for (i = 0; i < length; i++) { 92 | input = inputs[i]; 93 | name = input.getAttribute('name'); 94 | type = input.getAttribute('type'); 95 | nodeName = input.nodeName; 96 | value = null; 97 | 98 | if (name.charAt(name.length - 1) === ']') { 99 | name = name.substr(0, name.length - 2); 100 | } 101 | 102 | if (type) { 103 | type = type.toLowerCase(); 104 | } 105 | 106 | switch (nodeName) { 107 | case 'select': 108 | input = input.get('option[selected]') || 109 | input.get('option:first'); 110 | 111 | if (input !== null) { 112 | if (input.hasAttribute('value')) { 113 | value = input.getAttribute('value'); 114 | } else { 115 | value = input.textContent; 116 | } 117 | } 118 | 119 | break; 120 | case 'textarea': 121 | value = input.textContent; 122 | break; 123 | case 'input': 124 | switch (type) { 125 | case 'radio': 126 | case 'image': 127 | ['x', 'y'].forEach(function (p) { 128 | var array = []; 129 | 130 | if (name) { 131 | array.push(name); 132 | } 133 | 134 | array.push(p); 135 | 136 | params[array.join('.')] = 0; 137 | }); 138 | case 'checkbox': 139 | if (!input.hasAttribute('checked')) { 140 | break; 141 | } 142 | 143 | name = name.replace(/\[\]$/, ''); 144 | value = input.getAttribute('value') || 'on'; 145 | 146 | break; 147 | default: 148 | value = input.getAttribute('value'); 149 | break; 150 | 151 | } 152 | break; 153 | } 154 | 155 | if (value !== null) { 156 | if (params[name] instanceof Array) { 157 | params[name].push(value); 158 | } else if (params[name] !== undefined) { 159 | params[name] = [params[name], value]; 160 | } else { 161 | params[name] = value; 162 | } 163 | } 164 | } 165 | 166 | if (submit !== null) { 167 | if (submit.hasAttribute('name')) { 168 | params[submit.getAttribute('name')] = 169 | submit.getAttribute('value') || 'Submit Query'; 170 | } 171 | } 172 | 173 | return params; 174 | }; 175 | 176 | module.exports = form; 177 | -------------------------------------------------------------------------------- /lib/Queue.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * An Osmosis request queue. 5 | * 6 | * @constructor Queue 7 | * @protected 8 | * @param {object} instance - parent instance 9 | * @returns Command 10 | */ 11 | 12 | function Queue(instance) { 13 | this.instance = instance; 14 | this.opts = instance.opts; 15 | this.queue = []; 16 | } 17 | 18 | Queue.prototype = { 19 | change: 0, 20 | count: 0, 21 | done: 0, 22 | requests: 0, 23 | length: 0, 24 | enqueue: function (object) { 25 | this.queue[this.length++] = object; 26 | }, 27 | dequeue: function () { 28 | var object = this.queue[--this.length]; 29 | 30 | this.queue[this.length] = null; 31 | 32 | return object; 33 | }, 34 | push: function () { 35 | if (++this.change >= 25) { 36 | if (this.instance.resources !== null) { 37 | this.instance.resources(); 38 | } 39 | 40 | this.change = 0; 41 | } 42 | 43 | return ++this.count; 44 | }, 45 | pop: function () { 46 | var self = this; 47 | 48 | if (--self.count === 0) { 49 | process.nextTick(function () { 50 | var instance; 51 | 52 | if (self.count === 0) { 53 | instance = self.instance; 54 | instance.command.done(); 55 | 56 | if (instance.opts.debug === true) { 57 | instance.resources(); 58 | } 59 | } 60 | }); 61 | } 62 | 63 | this.change++; 64 | 65 | return this.count; 66 | } 67 | }; 68 | 69 | module.exports = Queue; 70 | -------------------------------------------------------------------------------- /lib/Request.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var needle = require('needle'), 4 | URL = require('url'), 5 | libxml = require('libxmljs-dom'); 6 | 7 | /** 8 | * Make an HTTP request. 9 | * 10 | * @private 11 | */ 12 | 13 | function Request(method, url, params, opts, tries, callback) { 14 | var location = url; 15 | return needle.request(method, 16 | url.href, 17 | params, 18 | opts, 19 | function (err, res, data) { 20 | 21 | if (!(url.params instanceof Object) || url.params === null) { 22 | url.params = url.query; 23 | } 24 | 25 | if (err !== null) { 26 | callback(err.message); 27 | return; 28 | } 29 | 30 | if (opts.ignore_http_errors !== true && 31 | res !== undefined && 32 | res.statusCode >= 400 && 33 | res.statusCode <= 500 34 | ) { 35 | // HTTP error 36 | callback(res.statusCode + ' ' + res.statusMessage); 37 | return; 38 | } 39 | 40 | if (method !== 'head' && (!data || data.length === 0)) { 41 | callback('Data is empty'); 42 | return; 43 | } 44 | 45 | function next(document) { 46 | if (opts.parse === false) { 47 | callback(null, res, document); 48 | return; 49 | } 50 | 51 | document = libxml.parseHtml(document, 52 | { baseUrl: location.href, huge: true }); 53 | 54 | if (document === null) { 55 | callback('Couldn\'t parse response'); 56 | return; 57 | } 58 | 59 | if (document.errors[0] !== undefined && 60 | document.errors[0].code === 4) { 61 | callback('Document is empty'); 62 | return; 63 | } 64 | 65 | if (document.root() === null) { 66 | callback('Document has no root'); 67 | return; 68 | } 69 | 70 | location.headers = res.req._headers; 71 | location.proxy = opts.proxy; 72 | location.user_agent = opts.user_agent; 73 | 74 | document.location = location; 75 | document.request = location; 76 | 77 | setResponseMeta(document, res, data.length); 78 | setCookies(document, res.cookies); 79 | setCookies(document, opts.cookies); 80 | 81 | if (opts.keep_data === true) { 82 | document.response.data = data; 83 | } 84 | 85 | callback(null, res, document); 86 | } 87 | 88 | if ( 89 | opts.process_response !== undefined && 90 | typeof opts.process_response === 'function' 91 | ) { 92 | if (opts.process_response.length > 2) { 93 | opts.process_response(data, res, next, callback); 94 | return; 95 | } 96 | 97 | next(opts.process_response(data, res)); 98 | } else { 99 | next(data); 100 | } 101 | 102 | }) 103 | .on('redirect', function (href) { 104 | extend(location, URL.parse(URL.resolve(location.href, href))); 105 | }); 106 | } 107 | 108 | function setResponseMeta(document, res, size) { 109 | var response = { 110 | type: getResponseType(res.headers['content-type']), 111 | statusCode: res.statusCode, 112 | statusMessage: res.statusMessage, 113 | headers: res.headers, 114 | size: { 115 | body: size 116 | } 117 | }; 118 | 119 | 120 | if (res.socket !== undefined) { 121 | response.size.total = res.socket.bytesRead; 122 | response.size.headers = res.socket.bytesRead - size; 123 | } 124 | 125 | document.response = response; 126 | } 127 | 128 | function getResponseType(contentType) { 129 | if (contentType === undefined) { 130 | return null; 131 | } 132 | 133 | if (contentType.indexOf('xml') !== -1) { 134 | return 'xml'; 135 | } 136 | 137 | if (contentType.indexOf('html') !== -1) { 138 | return 'html'; 139 | } 140 | 141 | return contentType; 142 | } 143 | 144 | 145 | function setCookies(document, cookies) { 146 | var key, keys, length; 147 | 148 | if (cookies === undefined) { 149 | return; 150 | } 151 | 152 | keys = Object.keys(cookies); 153 | length = keys.length; 154 | 155 | if (length === 0) { 156 | return; 157 | } 158 | 159 | if (document.cookies === undefined) { 160 | document.cookies = {}; 161 | } 162 | 163 | while (length--) { 164 | key = keys[length]; 165 | document.cookies[key] = cookies[key]; 166 | } 167 | } 168 | 169 | function extend(object, donor) { 170 | var key, keys = Object.keys(donor), i = keys.length; 171 | 172 | while (i--) { 173 | key = keys[i]; 174 | object[key] = donor[key]; 175 | } 176 | 177 | return object; 178 | } 179 | 180 | module.exports = Request; 181 | -------------------------------------------------------------------------------- /lib/commands/click.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Click an HTML element and continue after all events finish. 6 | * 7 | * @function click 8 | * @param {Selector} selector - Node(s) to click 9 | * @memberof Command 10 | * @instance 11 | * @example {@lang javascript} 12 | * .click('#nav > a') 13 | * .then(function(window) { 14 | * var ajax = window.document.querySelector("#ajaxContent"); 15 | * 16 | * if (ajax.textContent.length > 0) { 17 | * this.log("ajax loaded"); 18 | * } 19 | * }) 20 | */ 21 | 22 | function Click(context, data, next, done) { 23 | var self = this, 24 | selector = this.args[0], 25 | nodes = context.find(selector), 26 | window; 27 | 28 | if (nodes.length === 0) { 29 | if (this.getOpts().debug === true) { 30 | this.debug('no results for "' + selector + '"'); 31 | } 32 | 33 | return done(); 34 | } 35 | 36 | window = context.doc().defaultView; 37 | window.addEventListener('done', function () { 38 | nodes.forEach(function (node, index) { 39 | node.dispatchEvent('click'); 40 | 41 | window.addEventListener('done', function () { 42 | if (index === nodes.length - 1) { 43 | next(context, data); 44 | done(); 45 | } 46 | }); 47 | 48 | }); 49 | 50 | }); 51 | } 52 | 53 | module.exports.click = Click; 54 | -------------------------------------------------------------------------------- /lib/commands/config.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set configuration options for the **preceeding** command on down the chain. 3 | * 4 | * @function config 5 | * @param {string|object} option - A `key` string or { key: value } object 6 | * @param {string} [value] - A value for the given `key` 7 | * @memberof Command 8 | * @instance 9 | * @see Osmosis.options 10 | * @see Osmosis.config 11 | */ 12 | 13 | module.exports = function (key, val) { 14 | var self = this, opts; 15 | 16 | if (self.name === undefined && self.prev !== undefined) { 17 | self = self.prev; 18 | } 19 | 20 | opts = self.getOpts(); 21 | 22 | if (key === undefined) { 23 | return opts; 24 | } 25 | 26 | if (typeof key === 'object') { 27 | extend(opts, key, true); 28 | } else if (typeof key === 'function') { 29 | key(opts); 30 | } else { 31 | opts[key] = val; 32 | } 33 | 34 | return this; 35 | }; 36 | 37 | function extend(object, donor) { 38 | var key, keys = Object.keys(donor), 39 | i = keys.length; 40 | 41 | while (i--) { 42 | key = keys[i]; 43 | object[key] = donor[key]; 44 | } 45 | 46 | return object; 47 | }; 48 | -------------------------------------------------------------------------------- /lib/commands/contains.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Continue if the context node contains the given string. 6 | * 7 | * @function follow 8 | * @memberof Command 9 | * @param {string|RegExp} match - A string to match. 10 | * @instance 11 | */ 12 | 13 | function Contains(context, data, next, done) { 14 | if (getContent(context).indexOf(this.string) !== -1) { 15 | next(context, data); 16 | } else { 17 | this.debug('"' + this.string + '" not found'); 18 | } 19 | 20 | done(); 21 | } 22 | 23 | function getContent(node) { 24 | if (node.text !== undefined) { 25 | return node.text(); 26 | } else if (node.value !== undefined) { 27 | return node.value(); 28 | } 29 | } 30 | 31 | module.exports.contains = function (string) { 32 | this.string = string; 33 | return Contains; 34 | }; 35 | -------------------------------------------------------------------------------- /lib/commands/cookie.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set a cookie. Short for `.config({ cookies: ... })`. 3 | * 4 | * Note: Setting a cookie to `null` will delete the cookie. 5 | * 6 | * @function cookie 7 | * @param {string} name - Cookie name 8 | * @param {string} value - Cookie value 9 | * @memberof Command 10 | * @instance 11 | * @see {@link Osmosis.config} 12 | * @see {@link Command.config} 13 | */ 14 | 15 | module.exports = function (name, value) { 16 | var opts = this.getOpts(); 17 | 18 | if (!opts.hasOwnProperty('cookies')) { 19 | if (opts.cookies !== undefined) { 20 | opts.cookies = extend({}, opts.cookies); 21 | } else { 22 | opts.cookies = {}; 23 | } 24 | } 25 | 26 | if (value === null) { 27 | delete opts.cookies[name]; 28 | } else { 29 | opts.cookies[name] = value; 30 | } 31 | 32 | return this; 33 | }; 34 | 35 | function extend(object, donor) { 36 | var key, keys = Object.keys(donor), 37 | i = keys.length; 38 | 39 | while (i--) { 40 | key = keys[i]; 41 | object[key] = donor[key]; 42 | } 43 | 44 | return object; 45 | } 46 | -------------------------------------------------------------------------------- /lib/commands/data.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Calls a callback with the current {@data} object. 6 | * 7 | * Note: Don't use this command to modify the {@data} object. Please use 8 | * {@link Command.then} instead. 9 | * 10 | * @function data 11 | * @param {function} callback - A callback with an argument for {@data} 12 | * @memberof Command 13 | * @instance 14 | */ 15 | 16 | function Data(context, data, next, done) { 17 | this.args[0](data.getObject()); 18 | next(context, data); 19 | done(); 20 | } 21 | 22 | module.exports.data = Data; 23 | -------------------------------------------------------------------------------- /lib/commands/delay.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Delay each context before continuing down the chain. 6 | * 7 | * @function delay 8 | * @param {number} delay - A number of milliseconds or a float of seconds. 9 | * @memberof Command 10 | * @instance 11 | */ 12 | 13 | function Delay(context, data, next, done) { 14 | var delay = this.delay, self = this; 15 | 16 | if (this.timeout === undefined) { 17 | this.timeout = delay; 18 | } 19 | 20 | setTimeout(function () { 21 | self.timeout -= delay; 22 | next(context, data); 23 | done(); 24 | }, this.timeout); 25 | 26 | this.timeout += delay; 27 | } 28 | 29 | 30 | module.exports.delay = function (delay) { 31 | this.delay = delay; 32 | 33 | if (this.delay % 1 !== 0) { 34 | this.delay = this.delay * 1000; 35 | } 36 | 37 | return Delay; 38 | }; 39 | -------------------------------------------------------------------------------- /lib/commands/do.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Execute each argument asynchronously using the current context and data. 6 | * 7 | * After each argument has finished, {@link Command.do} will continue to the 8 | * immediately following command using the original {@link context}. 9 | * 10 | * @function do 11 | * @memberof Command 12 | * @param {...(Osmosis|middlewareCallback)} function - Callbacks or instances 13 | * @instance 14 | */ 15 | 16 | var Do = function (context, data, next, done) { 17 | var args = this.args, 18 | length = args.length, 19 | pending = length, 20 | dataDone = function () { 21 | if (--pending !== 0) { 22 | return; 23 | } 24 | 25 | next(context, data); 26 | done(); 27 | }, i; 28 | 29 | for (i = 0; i < length; i++) { 30 | args[i].start(context, data.child().done(dataDone)); 31 | } 32 | }; 33 | 34 | module.exports.do = Do; 35 | -------------------------------------------------------------------------------- /lib/commands/done.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Call a callback when the Osmosis instance has completely finished. 3 | * 4 | * @function done 5 | * @memberof Command 6 | * @param {function} function - Callback function 7 | * @instance 8 | */ 9 | 10 | function Done(cb) { 11 | if (typeof cb === 'function') { 12 | this.done = cb; 13 | } else if (this.next !== undefined) { 14 | this.next.done(); 15 | } 16 | 17 | return this; 18 | } 19 | 20 | module.exports = Done; 21 | -------------------------------------------------------------------------------- /lib/commands/fail.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Continue if the given selector does NOT match any nodes. 6 | * 7 | * If a node is found, a {@link Command.error} message well be sent. 8 | * 9 | * @function fail 10 | * @memberof Command 11 | * @param {Selector} selector - A selector to match. 12 | * @instance 13 | * @see {@link Command.login} 14 | * @see {@link Command.filter} 15 | */ 16 | 17 | function Fail(context, data, next, done) { 18 | if (context.find(this.selector).length > 0) { 19 | this.error('found ' + this.selector); 20 | } else { 21 | next(context, data); 22 | } 23 | 24 | done(); 25 | } 26 | 27 | module.exports.fail = function (selector) { 28 | this.selector = selector; 29 | return Fail; 30 | }; 31 | -------------------------------------------------------------------------------- /lib/commands/filter.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Check that the context node matches the given selector. 6 | * 7 | * @function filter 8 | * @memberof Command 9 | * @param {Selector} match - A Selector to match 10 | * @instance 11 | */ 12 | 13 | 14 | function Filter(context, data, next, done) { 15 | if (context.find(this.selector).length > 0) { 16 | next(context, data); 17 | } 18 | 19 | done(); 20 | } 21 | 22 | module.exports.filter = function (selector) { 23 | this.selector = selector; 24 | return Filter; 25 | }; 26 | -------------------------------------------------------------------------------- /lib/commands/find.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Search for nodes in the current Document. 6 | * 7 | * @function find 8 | * @param {Selector|contextCallback|Command.learn} selector 9 | * @memberof Command 10 | * @see {@link Command.select} 11 | * @instance 12 | */ 13 | 14 | /** 15 | * Search for nodes in the current context. 16 | * 17 | * @function select 18 | * @param {Selector|contextCallback|Command.learn} selector - A selector 19 | * @memberof Command 20 | * @see {@link Command.find} 21 | * @instance 22 | */ 23 | 24 | var Find = function (context, data, next, done) { 25 | var length, nodes, node, selector, i; 26 | 27 | if (this.selector !== undefined) { 28 | selector = this.selector; 29 | } else { 30 | selector = this.contextCallback(context, data); 31 | } 32 | 33 | if (this.relative === true) { 34 | nodes = context.find(selector); 35 | } else { 36 | nodes = context.doc().find(selector); 37 | } 38 | 39 | length = nodes.length; 40 | 41 | if (length === 0) { 42 | done('no results for "' + selector + '"'); 43 | return; 44 | } 45 | 46 | if (this.getOpts().log === true) { 47 | this.log('found ' + length + ' results for "' + selector + '"'); 48 | } 49 | 50 | for (i = 0; i < length; i++) { 51 | node = nodes[i]; 52 | node.last = (length - 1 === i); 53 | node.index = i; 54 | next(node, data, i); 55 | } 56 | 57 | done(); 58 | }; 59 | 60 | module.exports.find = 61 | module.exports.select = function (selector) { 62 | var self = this; 63 | 64 | if (typeof selector === 'function') { 65 | this.contextCallback = selector; 66 | } else if (selector instanceof Array) { 67 | this.selector = selector.join(', '); 68 | } else { 69 | this.selector = selector; 70 | } 71 | 72 | 73 | // Search relative to the context node 74 | if (this.name === 'select') { 75 | this.relative = true; 76 | } else { 77 | // Wait to see if we're a nested instance 78 | process.nextTick(function () { 79 | if (self.instance.parent !== undefined) { 80 | self.relative = true; 81 | } 82 | }); 83 | } 84 | 85 | return Find; 86 | }; 87 | -------------------------------------------------------------------------------- /lib/commands/follow.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Follow a url. 6 | * 7 | * @function follow 8 | * @memberof Command 9 | * @param {Selector} selector - A selector string for link nodes 10 | * @instance 11 | */ 12 | 13 | module.exports.follow = function (context, data, next, done) { 14 | var selector = this.args[0], 15 | self = this, 16 | nodes = context.find(selector), 17 | document = context.doc(), 18 | i = 0, queue = 0, length, node, url, 19 | requestDone = function (err, document) { 20 | if (err === null) { 21 | next(document, data, document._dataSortIndex); 22 | } 23 | 24 | if (--queue === 0) { 25 | done(); 26 | } 27 | }; 28 | 29 | if (nodes === undefined || nodes.length === 0) { 30 | done('no results for "' + selector + 31 | '" in ' + document.location.href); 32 | return; 33 | } 34 | 35 | 36 | for (length = nodes.length, i = 0; i < length; i++) { 37 | node = nodes[i]; 38 | 39 | if (node.value !== undefined) { 40 | url = node.value(); 41 | } else if (url = node.attr('href')) { 42 | // Don't use Attribute.text() or Attribute.value() 43 | // in order to keep URL encoding 44 | url = url.toString(); 45 | url = url.substring(url.indexOf('"') + 1, url.lastIndexOf('"')); 46 | } else { 47 | url = node.text(); 48 | } 49 | 50 | if (url !== null && url.length > 0) { 51 | queue++; 52 | 53 | self.log("url: " + url); 54 | self.request('get', 55 | node, 56 | url, 57 | null, 58 | requestDone, 59 | i); 60 | } 61 | } 62 | 63 | if (queue === 0) { 64 | done(); 65 | } 66 | }; 67 | -------------------------------------------------------------------------------- /lib/commands/get.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | var externalURLRegex = /^((http:|https:)?\/\/|[^\/\.])/; 5 | 6 | /** 7 | * Make an HTTP GET request. 8 | * 9 | * @function get 10 | * @param {(string|contextCallback)} url - An absolute or relative URL or a 11 | * contextCallback that calls a URL. 12 | * @param {object|contextCallback} [params] - HTTP GET query parameters 13 | * @memberof Command 14 | * @instance 15 | * @see {@link Command.post} 16 | */ 17 | 18 | /** 19 | * Make an HTTP POST request. 20 | * @function post 21 | * @param {(string|contextCallback)} url - An absolute or relative URL or a 22 | * contextCallback that calls a URL. 23 | * @param {object|contextCallback} [data] - HTTP POST data 24 | * @memberof Command 25 | * @instance 26 | * @see {@link Command.get} 27 | */ 28 | 29 | function Get(context, data, next, done) { 30 | this.request(this.name, 31 | context, 32 | this.getURL(this.url, context, data), 33 | this.getParam(this.params, context, data), 34 | function (err, context) { 35 | if (err === null) { 36 | next(context, data); 37 | } 38 | 39 | done(); 40 | }); 41 | } 42 | 43 | function getParamArg(url) { 44 | return url; 45 | } 46 | 47 | function getParamFunction(func, context, data) { 48 | var res = func(context, data.getObject()); 49 | 50 | return res; 51 | } 52 | 53 | function getURLArg(url) { 54 | return url; 55 | } 56 | 57 | function getURLFunction(func, context, data) { 58 | var res = func(context, data.getObject()); 59 | 60 | if (res.nodeType !== undefined) { 61 | res = getURLContext(res); 62 | } 63 | 64 | return res; 65 | } 66 | 67 | function getURLContext(context) { 68 | if (context.getAttribute('href')) { 69 | return context.getAttribute('href'); 70 | } 71 | 72 | if (context.text !== undefined) { 73 | return context.text(); 74 | } else if (context.value !== undefined) { 75 | return context.value(); 76 | } 77 | } 78 | 79 | module.exports.get = 80 | module.exports.post = function (url, query) { 81 | var args = this.args, 82 | urlIsFunction = typeof url === 'function', 83 | queryIsFunction = typeof query === 'function'; 84 | 85 | if (typeof args[3] === 'object' || typeof args[4] === 'object') { 86 | console.error("GET/POST: `opts` argument deprecated." + 87 | "Use `.config` instead."); 88 | } 89 | 90 | if (typeof args[3] === 'function' || typeof args[4] === 'function') { 91 | console.error("GET/POST: `callback` argument deprecated." + 92 | "Use `.then` instead."); 93 | } 94 | 95 | if (urlIsFunction === true) { 96 | this.getURL = getURLFunction; 97 | } else { 98 | this.getURL = getURLArg; 99 | } 100 | 101 | if (queryIsFunction === true) { 102 | this.getParam = getParamFunction; 103 | } else { 104 | this.getParam = getParamArg; 105 | } 106 | 107 | this.url = url; 108 | this.params = query; 109 | 110 | return Get; 111 | }; 112 | -------------------------------------------------------------------------------- /lib/commands/header.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set an HTTP header. Short for `.config({ headers: ... })` 3 | * 4 | * @function header 5 | * @param {string} name - Header name 6 | * @param {string} value - Header value 7 | * @memberof Command 8 | * @instance 9 | * @see Osmosis.headers 10 | * @see Osmosis.config 11 | */ 12 | 13 | module.exports = function (name, value) { 14 | var opts = this.getOpts(), headers; 15 | 16 | if (opts.hasOwnProperty('headers')) { 17 | opts.headers[name] = value; 18 | } else { 19 | headers = {}; 20 | headers[name] = value; 21 | this.setOpt('headers', headers); 22 | } 23 | 24 | return this; 25 | }; 26 | -------------------------------------------------------------------------------- /lib/commands/headers.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set multiple HTTP headers. Short for `.config({ headers: ... })`. 3 | * 4 | * @function headers 5 | * @param {object} headers - { headerName: headerValue, ... } 6 | * @memberof Command 7 | * @instance 8 | * @see Osmosis.header 9 | * @see Osmosis.config 10 | */ 11 | 12 | module.exports = function (headers) { 13 | var opts = this.getOpts(), key; 14 | 15 | if (opts.hasOwnProperty('headers')) { 16 | for (key in headers) { 17 | opts.headers[key] = headers[key]; 18 | } 19 | } else { 20 | this.setOpt('headers', headers); 21 | } 22 | 23 | return this; 24 | }; 25 | -------------------------------------------------------------------------------- /lib/commands/if.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Execute the immediately following command if each argument is true. 3 | * 4 | * An argument is considered to be `true` IF: 5 | * - a {@link Selector} argument finds at least one node 6 | * - a nested {@link Osmosis} instance: 7 | * - Successfully {@link Command.set}s some data OR 8 | * - There is at least one {@link context} 9 | * - a {@link contextCallback} doesn't return false, null, or undefined 10 | * 11 | * @function if 12 | * @private 13 | * @param {Selector|Osmosis|contextCallback} [conditions] 14 | * @memberof Command 15 | * @instance 16 | * @see {@link Command.else} 17 | */ 18 | 19 | var If = function () { 20 | 21 | }; 22 | 23 | If.compile = function (command) { 24 | var args = command.args; 25 | 26 | length = args.length; 27 | }; 28 | 29 | module.exports.if = If; 30 | -------------------------------------------------------------------------------- /lib/commands/learn.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Osmosis learns to find dynamic content via static selectors. 3 | * 4 | * @function learn 5 | * @memberof Command 6 | * @param {string} name - The name of the runtime variable 7 | * @instance 8 | * @see {@link Command.use} 9 | */ 10 | 11 | var Learn = function (context) { 12 | var name = this.args[0], 13 | selector = this.lookup(selector), 14 | tData = this.trainingData, 15 | nodes, i; 16 | 17 | if (selector === undefined) { 18 | // No definition, use the learned selector. 19 | return; 20 | } else { 21 | nodes = context.find(selector); 22 | 23 | for (i = 0; i < nodes.length; i++) { 24 | this.nodeSet.push(nodes[i]); 25 | } 26 | 27 | this.selector = getSelector(this.nodeSet); 28 | } 29 | }; 30 | 31 | function getSelector(nodes, isParent) { 32 | 33 | var node = nodes[0], 34 | classes = node.classList, 35 | selector = '', 36 | i, parentSelector, className, matches, 37 | position; 38 | 39 | if (nodes.length === 0) { 40 | return ''; 41 | } 42 | 43 | if (match(nodes, nodeId)) { 44 | return '#' + node.id; 45 | } 46 | 47 | if (match(nodes, nodeName)) { 48 | selector += nodeName; 49 | } 50 | 51 | // Find common class names 52 | for (i = 0; i < classes.length; i++) { 53 | className = classes[i]; 54 | matches = []; 55 | 56 | if (match(nodes, nodeHasClass, className)) { 57 | matches.push(className); 58 | } 59 | 60 | selector = '.' + matches.join('.'); 61 | } 62 | 63 | parentSelector = getSelector(parents(nodes), true); 64 | 65 | if (node.parentNode && isParent !== true) { 66 | position = node.parentNode.childNodes.indexOf(node); 67 | 68 | if (match(nodes, nodePosition)) { 69 | selector += ':nth-of-type(' + position + ')'; 70 | } 71 | } 72 | 73 | if (parentSelector.length > 0) { 74 | return parentSelector + ' > ' + selector; 75 | } 76 | 77 | return selector; 78 | } 79 | 80 | function match(nodes, cb, arg) { 81 | 82 | var value = cb(nodes[0], arg), i; 83 | 84 | for (i = 1; i < nodes.length; i++) { 85 | if (cb(nodes[i], arg) !== value) { 86 | return false; 87 | } 88 | } 89 | 90 | return true; 91 | } 92 | 93 | function parents(nodes) { 94 | var arr = [], 95 | i = 0, 96 | length = nodes.length, 97 | parent; 98 | 99 | 100 | for (i = 0; i < nodes.length; i++) { 101 | parent = nodes[i].parentNode; 102 | 103 | if (parent) { 104 | arr.push(parent); 105 | } 106 | } 107 | 108 | return arr; 109 | } 110 | 111 | function nodeName(node) { 112 | return node.nodeName; 113 | } 114 | 115 | function nodeId(node) { 116 | return node.id; 117 | } 118 | 119 | function nodeHasClass(node, className) { 120 | return node.classList.indexOf(className) !== -1; 121 | } 122 | 123 | function nodePosition(node) { 124 | return node.parentNode.childNodes.indexOf(node); 125 | } 126 | 127 | module.exports.learn = function () { 128 | this.nodeSet = []; 129 | return Learn; 130 | }; 131 | -------------------------------------------------------------------------------- /lib/commands/login.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | /** 4 | * Log in using a web page's login form. 5 | * 6 | * @function login 7 | * @memberof Command 8 | * @param {string} username - Username or email address 9 | * @param {string} password - Password 10 | * @instance 11 | * @see {@link Command.success} 12 | * @see {@link Command.fail} 13 | */ 14 | 15 | var form = require('../Form.js'); 16 | 17 | function Login(context, data, next, done) { 18 | var user = this.args[0], 19 | pass = this.args[1], 20 | params = {}, 21 | loginForm = context.get('form:has(input[type="password"])'), 22 | self = this, 23 | userInput, passInput, 24 | nodes, i, method, url; 25 | 26 | if (loginForm === null) { 27 | this.error('No login form found'); 28 | return; 29 | } 30 | 31 | userInput = loginForm.get('input[(not(@type) or @type="text") and @name]' + 32 | ':before(input[type="password"]):last'); 33 | 34 | if (!userInput) { 35 | done('No user field found'); 36 | return; 37 | } 38 | 39 | passInput = userInput.get('following::input[type="password"]'); 40 | 41 | if (!passInput) { 42 | done('No password field found'); 43 | return; 44 | } 45 | 46 | params = form.getParams(loginForm); 47 | params[userInput.getAttribute('name')] = user; 48 | params[passInput.getAttribute('name')] = pass; 49 | 50 | url = form.getAction(loginForm); 51 | method = form.getMethod(loginForm); 52 | 53 | this.debug(method + ' ' + url + ' ' + JSON.stringify(params)); 54 | 55 | this.request(method, 56 | loginForm, 57 | url, 58 | params, 59 | function (err, document) { 60 | if (err === null) { 61 | next(document, data); 62 | } 63 | 64 | done(); 65 | }); 66 | } 67 | 68 | module.exports.login = function (username, password) { 69 | this.username = username; 70 | this.password = password; 71 | return Login; 72 | }; 73 | -------------------------------------------------------------------------------- /lib/commands/match.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Continue if the context node innerText matches a RegExp. 6 | * 7 | * @function match 8 | * @memberof Command 9 | * @param {string|RegExp} match - A RegExp to match. 10 | * @instance 11 | */ 12 | 13 | function Match(context, data, next, done) { 14 | if (this.regex.test(getContent(context))) { 15 | next(context, data); 16 | } else { 17 | this.debug('"' + this.regex.toString() + '" not found'); 18 | } 19 | 20 | done(); 21 | } 22 | 23 | function getContent(node) { 24 | if (node.text !== undefined) { 25 | return node.text(); 26 | } else if (node.value !== undefined) { 27 | return node.value(); 28 | } 29 | } 30 | 31 | module.exports.match = function (regex) { 32 | this.regex = regex; 33 | return Match; 34 | }; 35 | -------------------------------------------------------------------------------- /lib/commands/paginate.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Loads multiple pages. 6 | * 7 | * The first argument can alternatively be an object representing 8 | * HTTP GET/POST parameters to modify. 9 | * 10 | * If the first argument is an object, numeric values will 11 | * increment the existing parameter value by that amount. 12 | * 13 | * String values are treated as selectors and each corresponding 14 | * parameter's value will be replaced with the content of the selected node. 15 | * 16 | * @function paginate 17 | * @memberof Command 18 | * @param {selector} selector - A link or form to the next page. 19 | * @param {number|Selector|middlewareCallback} [limit] - 20 | Total number of pages to load. 21 | * @instance 22 | */ 23 | 24 | var form = require('../Form.js'); 25 | 26 | function Paginate(context, data, next, done) { 27 | var selector = this.selector, 28 | limit = this.getLimit(this.limit, context, data), 29 | document = context.doc(), 30 | count = document.request.count || 1, 31 | self = this, 32 | params = {}, 33 | method, url, param, node = context, name, value; 34 | 35 | next(context, data, count); 36 | 37 | if (limit !== undefined && count > limit) { 38 | return done(); 39 | } 40 | 41 | method = document.location.url.method || 'get'; 42 | url = document.location.href; 43 | params = {}; 44 | 45 | if (selector instanceof Function) { 46 | var ret = selector(context, data); 47 | 48 | if (typeof ret === 'string') { 49 | url = document.location.resolve(ret); 50 | } else { 51 | params = ret; 52 | } 53 | } else if (selector instanceof Object) { 54 | for (param in selector) { 55 | value = selector[param]; 56 | 57 | if (typeof value !== 'number') { 58 | params[param] = getContent(context.get(value)); 59 | } else { 60 | params[param] = (parseFloat(document.request.params[param]) || 61 | 0) + 62 | value; 63 | } 64 | } 65 | } else { 66 | node = document.get(selector); 67 | 68 | if (!node) { 69 | return done('no results for "' + selector + '" in ' + url); 70 | } else if (node.nodeName === 'form') { 71 | url = form.getAction(node); 72 | method = form.getMethod(node); 73 | params = form.getParams(node); 74 | } else if (node.hasAttribute('href')) { 75 | url = node.getAttribute('href'); 76 | } else { 77 | name = node.getAttribute('name'); 78 | 79 | if (name !== null) { 80 | name = name.value(); 81 | value = node.getAttribute('value'); 82 | 83 | if (value === null) { 84 | value = getContent(node); 85 | } 86 | 87 | params[name] = value; 88 | } else { 89 | return done('no URL found in ' + selector); 90 | } 91 | } 92 | } 93 | 94 | self.log('loading page ' + count + (limit ? 95 | '/' + limit : 96 | '') + ' - ' + url); 97 | 98 | self.request(method, node, url, params, function (document) { 99 | document.request.count = count + 1; 100 | self.start(document, data); 101 | }); 102 | 103 | done(); 104 | } 105 | 106 | function getLimitArg(limit) { 107 | return limit; 108 | } 109 | 110 | function getLimitFunction(callback, context, data) { 111 | var value = callback(context, data.getObject()); 112 | 113 | if (value === false) { 114 | return 0; 115 | } else if (value === true) { 116 | return undefined; 117 | } else { 118 | return value; 119 | } 120 | } 121 | 122 | function getLimitSelector(selector, context) { 123 | var node = context.get(selector), value; 124 | 125 | if (!node) { 126 | return 0; 127 | } 128 | 129 | value = getContent(node); 130 | 131 | if (!value) { 132 | return 0; 133 | } 134 | 135 | value = parseInt(value.replace(/[^0-9\.]+/g, '')); 136 | 137 | return value || 0; 138 | } 139 | 140 | function getContent(node) { 141 | if (node.text !== undefined) { 142 | return node.text(); 143 | } else if (node.value !== undefined) { 144 | return node.value(); 145 | } 146 | } 147 | 148 | module.exports.paginate = function (selector, limit) { 149 | this.selector = selector; 150 | this.limit = limit; 151 | 152 | switch (typeof limit) { 153 | case 'string': 154 | this.getLimit = getLimitSelector; 155 | break; 156 | case 'function': 157 | this.getLimit = getLimitFunction; 158 | break; 159 | default: 160 | this.getLimit = getLimitArg; 161 | break; 162 | } 163 | 164 | return Paginate; 165 | }; 166 | -------------------------------------------------------------------------------- /lib/commands/parse.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Parse HTML or XML data 3 | 4 | * @function parse 5 | * @param {string|buffer} data - XML/HTML data 6 | * @param {object} options - Parse options 7 | * @memberof Command 8 | * @instance 9 | * @see Osmosis.parse 10 | */ 11 | 12 | module.exports.parse = function (context, data, next, done) { 13 | var args = this.args; 14 | 15 | next(this.instance.parse(args[0], args[1]), data); 16 | done(); 17 | return this; 18 | }; 19 | -------------------------------------------------------------------------------- /lib/commands/pause.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Pause an Osmosis instance. 3 | * 4 | * @function pause 5 | * @memberof Command 6 | * @instance 7 | */ 8 | 9 | module.exports = function () { 10 | this.instance.queue.push(); 11 | this.prev.debug('pausing'); 12 | this.instance.paused = true; 13 | }; 14 | -------------------------------------------------------------------------------- /lib/commands/proxy.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set a proxy. Short for `.config({ proxy: ... })` 3 | * 4 | * @function proxy 5 | * @memberof Command 6 | * @param {string|array} proxy - A string or array of HTTP proxy URL(s) 7 | * @instance 8 | * @see Osmosis.config 9 | */ 10 | 11 | module.exports = function (value) { 12 | this.getOpts().proxy = value; 13 | return this; 14 | }; 15 | -------------------------------------------------------------------------------- /lib/commands/resume.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Resume an Osmosis instance. 3 | * 4 | * @function resume 5 | * @memberof Command 6 | * @instance 7 | */ 8 | 9 | module.exports = function () { 10 | this.instance.queue.pop(); 11 | var instance = this.instance; 12 | 13 | this.prev.debug('resuming'); 14 | this.instance.paused = false; 15 | this.instance.resume(); 16 | }; 17 | -------------------------------------------------------------------------------- /lib/commands/rewrite.js: -------------------------------------------------------------------------------- 1 | module.exports.rewrite = function (context, data, next, done) { 2 | console.error('DEPRECATED. Use .find(selector).get(callback) instead.'); 3 | next(context, data); 4 | done(); 5 | }; 6 | -------------------------------------------------------------------------------- /lib/commands/set.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | var sourceSelectorRegexp = /:source$/, 5 | innerHTMLSelectorRegexp = /:html$/; 6 | 7 | /** 8 | * Set values in the {@link data} object. 9 | * 10 | * Note: Also accepts set(key, selector) as parameters 11 | * 12 | * @function set 13 | * @memberof Command 14 | * @param {object} data - Key/selector pairs to set. 15 | * @instance 16 | */ 17 | 18 | module.exports.set = function (key, val) { 19 | var args = key, 20 | isArray = args instanceof Array; 21 | 22 | if (val !== undefined) { 23 | args = {}; 24 | args[key] = val; 25 | } else if (typeof key === 'string') { 26 | args = {}; 27 | args[key] = null; 28 | } 29 | 30 | return setObject(loopObject(args, isArray), isArray); 31 | }; 32 | 33 | function loopObject(obj) { 34 | var keys = Object.keys(obj), 35 | length = keys.length, 36 | isArray = obj instanceof Array, 37 | arr = new Array(length * 3), 38 | i = 0, 39 | ai = 0, 40 | key, val, valIsArray, func, isObject; 41 | 42 | for (; i < length; i++) { 43 | key = keys[i]; 44 | val = obj[key]; 45 | valIsArray = val instanceof Array; 46 | isObject = false; 47 | 48 | if (typeof val === 'object' && val !== null) { 49 | isObject = true; 50 | 51 | if (val.isCommand === true) { 52 | func = setInstance(val, key); 53 | } else if (!valIsArray || val.length > 0) { 54 | func = setObject(loopObject(val), valIsArray, key); 55 | } 56 | } else { 57 | if (val === null) { 58 | func = setContextNull; 59 | } else if (typeof val === 'function') { 60 | func = setContextFunc(val); 61 | } else if (isArray) { 62 | func = setContextArray(val); 63 | } else if (sourceSelectorRegexp.test(val)) { 64 | func = setContextSource(val); 65 | } else if (innerHTMLSelectorRegexp.test(val)) { 66 | func = setContextInnerHTML(val); 67 | } else { 68 | func = setContextVal(val); 69 | } 70 | } 71 | 72 | arr[ai++] = key; 73 | arr[ai++] = func; 74 | arr[ai++] = isObject; 75 | } 76 | 77 | return arr; 78 | } 79 | 80 | function setObject(arr, isArray, index) { 81 | var length = arr.length, 82 | total = length / 3, 83 | isNested = index !== undefined; 84 | 85 | return function (context, data, next, done) { 86 | var count = total, 87 | dataDone = function () { 88 | if (--count !== 0) { 89 | return false; 90 | } 91 | 92 | 93 | if (isNested && data.parent !== undefined) { 94 | data.parent.merge(data); 95 | } 96 | 97 | next(context, data); 98 | 99 | // done will be undefined if setObject is called by setObject 100 | if (done !== undefined) { 101 | done(); 102 | } 103 | 104 | return true; 105 | }, 106 | 107 | key, val, isObject, i; 108 | 109 | if (context === undefined) { 110 | done("No context"); 111 | return; 112 | } 113 | 114 | if (done !== undefined) { 115 | data = data.clone(); 116 | } 117 | 118 | if (isNested === true) { 119 | data = data.child() 120 | .setIndex(index) 121 | .isArray(isArray) 122 | .done(dataDone) 123 | .ref(); 124 | } 125 | 126 | if (isArray === true) { 127 | setArray(context, data, dataDone, arr, 0); 128 | return; 129 | } 130 | 131 | for (i = 0; i < length; i++) { 132 | key = arr[i]; 133 | val = arr[++i]; 134 | isObject = arr[++i]; 135 | 136 | if (isObject === true) { 137 | val(context, data, dataDone); 138 | } else { 139 | data.set(key, val(context, data)); 140 | dataDone(); 141 | } 142 | } 143 | }; 144 | } 145 | 146 | // Call in serial to preserve array order 147 | function setArray(context, data, done, arr, i) { 148 | var key = arr[i++], 149 | val = arr[i++], 150 | isObject = arr[i++]; 151 | 152 | data.toArray(); 153 | 154 | if (isObject === true) { 155 | val(context, data, function () { 156 | if (done() === false) { 157 | setArray(context, data, done, arr, i); 158 | } 159 | }); 160 | } else { 161 | data.push(val(context, data)); 162 | 163 | if (done() === false) { 164 | setArray(context, data, done, arr, i); 165 | } 166 | } 167 | } 168 | 169 | function setInstance(instance, index) { 170 | return function (context, data, done) { 171 | instance.start(context, 172 | data.child() 173 | .setIndex(index) 174 | .done(done) 175 | .ref()); 176 | }; 177 | } 178 | 179 | function setContextNull(context) { 180 | return getContent(context); 181 | } 182 | 183 | function setContextVal(selector) { 184 | return function (context) { 185 | return getContent(context.get(selector)); 186 | }; 187 | } 188 | 189 | function setContextArray(selector) { 190 | return function (context, data) { 191 | var nodes = context.find(selector), 192 | length = nodes.length - 1, 193 | i; 194 | 195 | for (i = 0; i < length; i++) { 196 | data.push(getContent(nodes[i])); 197 | } 198 | 199 | return getContent(nodes[length]); 200 | }; 201 | } 202 | 203 | function setContextFunc(cb) { 204 | return function (context, data) { 205 | 206 | var val = cb(context, data), 207 | content = getContent(val); 208 | 209 | if (content !== undefined) { 210 | return content; 211 | } 212 | 213 | return val; 214 | 215 | }; 216 | } 217 | 218 | function setContextSource(s) { 219 | var selector = s.replace(sourceSelectorRegexp, ''); 220 | 221 | return function (context) { 222 | var node = context.get(selector); 223 | 224 | if (!node) { 225 | return; 226 | } 227 | 228 | return node.toString(); 229 | }; 230 | } 231 | 232 | function setContextInnerHTML(s) { 233 | var selector = s.replace(innerHTMLSelectorRegexp, ''); 234 | 235 | return function (context) { 236 | var node = context.get(selector); 237 | 238 | if (!node) { 239 | return; 240 | } 241 | 242 | return node.innerHTML; 243 | }; 244 | } 245 | 246 | function getContent(node) { 247 | if (!node) { 248 | return; 249 | } 250 | 251 | if (node.text !== undefined) { 252 | return node.text().trim(); 253 | } else if (node.value !== undefined) { 254 | return node.value().trim(); 255 | } 256 | 257 | return; 258 | } 259 | -------------------------------------------------------------------------------- /lib/commands/stop.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Stop an Osmosis instance. 3 | * 4 | * @function stop 5 | * @memberof Command 6 | * @instance 7 | */ 8 | 9 | module.exports = function () { 10 | this.instance.queue.pop(); 11 | this.pause(); 12 | this.instance.stopped = true; 13 | this.instance.paused = true; 14 | this.debug('stopping'); 15 | }; 16 | -------------------------------------------------------------------------------- /lib/commands/submit.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Submit a form. 6 | * 7 | * @function submit 8 | * @memberof Command 9 | * @param {Selector} selector - A selector for a
or submit button. 10 | * @param {object|contextCallback} params - Keys/values for the form's inputs. 11 | * @instance 12 | */ 13 | 14 | var form = require('../Form.js'); 15 | 16 | function Submit(context, data, next, done) { 17 | var node = context.get(this.selector), 18 | method, url, params, param; 19 | 20 | if (node === null) { 21 | return done('No results for ' + this.selector); 22 | } 23 | 24 | method = form.getMethod(node); 25 | url = form.getAction(node); 26 | params = form.getParams(node); 27 | 28 | if (typeof this.params === 'function') { 29 | this.params = this.params(context, data.getObject()); 30 | } 31 | 32 | for (param in this.params) { 33 | params[param] = this.params[param]; 34 | } 35 | 36 | this.request(method, node, url, params, function (err, document) { 37 | if (err === null) { 38 | next(document, data); 39 | } 40 | 41 | done(); 42 | }); 43 | } 44 | 45 | module.exports.submit = function (selector, params) { 46 | this.selector = selector; 47 | this.params = params; 48 | return Submit; 49 | }; 50 | -------------------------------------------------------------------------------- /lib/commands/success.js: -------------------------------------------------------------------------------- 1 | /*jslint node: true */ 2 | 'use strict'; 3 | 4 | /** 5 | * Continue if the given selector matches any nodes. 6 | * 7 | * If no nodes are found, a {@link Command.error} message will be sent. 8 | * 9 | * @function success 10 | * @memberof Command 11 | * @param {Selector} selector - A selector to match. 12 | * @instance 13 | * @see {@link Command.login} 14 | * @see {@link Command.filter} 15 | */ 16 | 17 | function Success(context, data, next, done) { 18 | if (context.find(this.selector).length > 0) { 19 | next(context, data); 20 | } else { 21 | this.error(this.selector + ' not found'); 22 | } 23 | 24 | done(); 25 | } 26 | 27 | module.exports.success = function (selector) { 28 | this.selector = selector; 29 | return Success; 30 | }; 31 | -------------------------------------------------------------------------------- /lib/commands/then.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Execute a given {@link callback}. 3 | * 4 | * @function then 5 | * @memberof Command 6 | * @param {callback} callback 7 | * @instance 8 | */ 9 | 10 | /** 11 | * The next function is used to send a {@link context} and 12 | * a {@link data} object to the immediately following Command. 13 | * 14 | * The {@link next} function must be called if you want to change 15 | * the context or data object. 16 | * 17 | * @callback next 18 | * @param {context} context - The context to send to the following command 19 | * @param {data} data - The data object to send to the following command 20 | * @see done 21 | * @see callback 22 | * @see {@link Command.then} 23 | */ 24 | 25 | /** 26 | * The done function is used to tell Osmosis that a 27 | * callback has finished **asynchronous** execution. 28 | * 29 | * The {@link done} function is required if the 30 | * callback function calls {@link next} asynchronously. 31 | * 32 | * The {@link done} function MUST be called if it is included 33 | * as an argument to the callback function. 34 | * 35 | * Note: You must not call {@link next} after calling done. 36 | * 37 | * @callback done 38 | * @see next 39 | * @see callback 40 | * @see {@link Command.then} 41 | */ 42 | 43 | /** 44 | * A callback function can be used to access and modify 45 | * the {@link context} and {@link data} object at the current 46 | * point in the command chain. 47 | * 48 | * @callback callback 49 | * @param {context} context - The current HTML/XML context 50 | * @param {data} data - The current data object 51 | * @param {next} [next] - Continue a context and data down the chain 52 | * @param {done} [done] - Called when finished calling {@link next} 53 | * @this Command 54 | * @see {@link Command.then} 55 | */ 56 | 57 | 58 | var regexp_function_arg = /^\s*(function\s*)?\(?([^\s\,\)]+)/; 59 | 60 | function Then(callback, getContext) { 61 | var length = callback.length; 62 | 63 | return function (context, data, next, done) { 64 | var self = this, calledDone = false; 65 | 66 | getContext(context, function (context) { 67 | callback.call(self, context, data.getObject(), function (c, d) { 68 | next(c, data.setObject(d)); 69 | 70 | if (length === 3 && calledDone === false) { 71 | process.nextTick(done); 72 | calledDone = true; 73 | } 74 | }, done); 75 | 76 | if (length <= 2) { 77 | next(context, data); 78 | done(); 79 | } 80 | }); 81 | }; 82 | } 83 | 84 | function getContextArg(context, callback) { 85 | callback(context); 86 | } 87 | 88 | function getDocumentArg(context, callback) { 89 | callback(context.document || context.doc()); 90 | } 91 | 92 | function getWindowArg(context, callback) { 93 | context = context.window || context.doc().defaultView; 94 | context.addEventListener('done', function () { 95 | callback(context); 96 | }); 97 | } 98 | 99 | function getJQueryArg(context, callback) { 100 | getWindowArg(context, function (context) { 101 | if (context.jQuery !== undefined) { 102 | callback(context.jQuery); 103 | } else { 104 | callback(context.$); 105 | } 106 | }); 107 | } 108 | 109 | module.exports.then = function (callback) { 110 | var getContext = getContextArg, contextArg; 111 | 112 | if (callback.length > 0) { 113 | contextArg = callback.toString().match(regexp_function_arg)[2]; 114 | } 115 | 116 | if (contextArg === '$') { 117 | getContext = getJQueryArg; 118 | } else if (contextArg === 'window') { 119 | getContext = getWindowArg; 120 | } else if (contextArg === 'document') { 121 | getContext = getDocumentArg; 122 | } 123 | 124 | return Then(callback, getContext); 125 | }; 126 | -------------------------------------------------------------------------------- /lib/commands/train.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Teach Osmosis how to find content. 3 | * 4 | * @function train 5 | * @memberof Command 6 | * @param {object} object - { name: selector } pairs of training variables 7 | * @instance 8 | * @see {@link Command.learn} 9 | */ 10 | 11 | var Train = function () { 12 | }; 13 | 14 | module.exports.train = function () { 15 | return Train; 16 | }; 17 | -------------------------------------------------------------------------------- /lib/commands/trigger.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Trigger a DOM event and continue once it completes. 3 | * 4 | * Note: If no selector is specified, the default event target will be 5 | * the Window object. 6 | * 7 | * @function trigger 8 | * @param {string} event - The name of the event to trigger. 9 | * @param {Selector} [selector] - Nodes to trigger the event on. 10 | * @memberof Command 11 | * @instance 12 | */ 13 | 14 | function Trigger(context, data, next, done) { 15 | var event = this.event, window = context.defaultView; 16 | 17 | window.addEventListener('done', function () { 18 | window.dispatchEvent(event); 19 | next(context, data); 20 | done(); 21 | }); 22 | } 23 | 24 | module.exports.trigger = function (event, selector) { 25 | this.event = event; 26 | this.selector = selector; 27 | return Trigger; 28 | }; 29 | -------------------------------------------------------------------------------- /lib/commands/use.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Use a runtime defined variable 3 | * 4 | * @function use 5 | * @memberof Command 6 | * @instance 7 | * @see {@link Command.run} 8 | * @see {@link Command.learn} 9 | */ 10 | 11 | var Use = function () { 12 | return this.lookup(this.args[0]); 13 | }; 14 | 15 | module.exports.use = function () { 16 | 17 | }; 18 | -------------------------------------------------------------------------------- /lib/commands/using.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Set variables to use. 3 | * 4 | * @function using 5 | * @memberof Command 6 | * @param {object} object - { key: value } pairs of variables 7 | * @instance 8 | * @see {@link Command.use} 9 | */ 10 | 11 | var Using = function () { 12 | }; 13 | 14 | module.exports.using = function () { 15 | return Using; 16 | }; 17 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "osmosis", 3 | "version": "1.1.10", 4 | "lockfileVersion": 1, 5 | "dependencies": { 6 | "abbrev": { 7 | "version": "1.1.0", 8 | "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.0.tgz", 9 | "integrity": "sha1-0FVMIlZjbi9W58LlrRg/hZQo2B8=" 10 | }, 11 | "ajv": { 12 | "version": "4.11.8", 13 | "resolved": "https://registry.npmjs.org/ajv/-/ajv-4.11.8.tgz", 14 | "integrity": "sha1-gv+wKynmYq5TvcIK8VlHcGc5xTY=" 15 | }, 16 | "ansi-regex": { 17 | "version": "2.1.1", 18 | "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-2.1.1.tgz", 19 | "integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8=" 20 | }, 21 | "aproba": { 22 | "version": "1.1.2", 23 | "resolved": "https://registry.npmjs.org/aproba/-/aproba-1.1.2.tgz", 24 | "integrity": "sha512-ZpYajIfO0j2cOFTO955KUMIKNmj6zhX8kVztMAxFsDaMwz+9Z9SV0uou2pC9HJqcfpffOsjnbrDMvkNy+9RXPw==" 25 | }, 26 | "are-we-there-yet": { 27 | "version": "1.1.4", 28 | "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-1.1.4.tgz", 29 | "integrity": "sha1-u13KOCu5TwXhUZQ3PRb9O6HKEQ0=" 30 | }, 31 | "asn1": { 32 | "version": "0.2.3", 33 | "resolved": "https://registry.npmjs.org/asn1/-/asn1-0.2.3.tgz", 34 | "integrity": "sha1-2sh4dxPJlmhJ/IGAd36+nB3fO4Y=" 35 | }, 36 | "assert-plus": { 37 | "version": "0.2.0", 38 | "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-0.2.0.tgz", 39 | "integrity": "sha1-104bh+ev/A24qttwIfP+SBAasjQ=" 40 | }, 41 | "asynckit": { 42 | "version": "0.4.0", 43 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", 44 | "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" 45 | }, 46 | "aws-sign2": { 47 | "version": "0.6.0", 48 | "resolved": "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.6.0.tgz", 49 | "integrity": "sha1-FDQt0428yU0OW4fXY81jYSwOeU8=" 50 | }, 51 | "aws4": { 52 | "version": "1.6.0", 53 | "resolved": "https://registry.npmjs.org/aws4/-/aws4-1.6.0.tgz", 54 | "integrity": "sha1-g+9cqGCysy5KDe7e6MdxudtXRx4=" 55 | }, 56 | "balanced-match": { 57 | "version": "1.0.0", 58 | "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", 59 | "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=" 60 | }, 61 | "bcrypt-pbkdf": { 62 | "version": "1.0.1", 63 | "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.1.tgz", 64 | "integrity": "sha1-Y7xdy2EzG5K8Bf1SiVPDNGKgb40=", 65 | "optional": true 66 | }, 67 | "bindings": { 68 | "version": "1.2.1", 69 | "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.2.1.tgz", 70 | "integrity": "sha1-FK1hE4EtLTfXLme0ystLtyZQXxE=" 71 | }, 72 | "block-stream": { 73 | "version": "0.0.9", 74 | "resolved": "https://registry.npmjs.org/block-stream/-/block-stream-0.0.9.tgz", 75 | "integrity": "sha1-E+v+d4oDIFz+A3UUgeu0szAMEmo=" 76 | }, 77 | "boom": { 78 | "version": "2.10.1", 79 | "resolved": "https://registry.npmjs.org/boom/-/boom-2.10.1.tgz", 80 | "integrity": "sha1-OciRjO/1eZ+D+UkqhI9iWt0Mdm8=" 81 | }, 82 | "brace-expansion": { 83 | "version": "1.1.8", 84 | "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.8.tgz", 85 | "integrity": "sha1-wHshHHyVLsH479Uad+8NHTmQopI=" 86 | }, 87 | "caseless": { 88 | "version": "0.12.0", 89 | "resolved": "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz", 90 | "integrity": "sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw=" 91 | }, 92 | "co": { 93 | "version": "4.6.0", 94 | "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", 95 | "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=" 96 | }, 97 | "code-point-at": { 98 | "version": "1.1.0", 99 | "resolved": "https://registry.npmjs.org/code-point-at/-/code-point-at-1.1.0.tgz", 100 | "integrity": "sha1-DQcLTQQ6W+ozovGkDi7bPZpMz3c=" 101 | }, 102 | "combined-stream": { 103 | "version": "1.0.5", 104 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.5.tgz", 105 | "integrity": "sha1-k4NwpXtKUd6ix3wV1cX9+JUWQAk=" 106 | }, 107 | "concat-map": { 108 | "version": "0.0.1", 109 | "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", 110 | "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=" 111 | }, 112 | "console-control-strings": { 113 | "version": "1.1.0", 114 | "resolved": "https://registry.npmjs.org/console-control-strings/-/console-control-strings-1.1.0.tgz", 115 | "integrity": "sha1-PXz0Rk22RG6mRL9LOVB/mFEAjo4=" 116 | }, 117 | "core-util-is": { 118 | "version": "1.0.2", 119 | "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", 120 | "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" 121 | }, 122 | "cryptiles": { 123 | "version": "2.0.5", 124 | "resolved": "https://registry.npmjs.org/cryptiles/-/cryptiles-2.0.5.tgz", 125 | "integrity": "sha1-O9/s3GCBR8HGcgL6KR59ylnqo7g=" 126 | }, 127 | "css2xpath": { 128 | "version": "0.0.1", 129 | "resolved": "https://registry.npmjs.org/css2xpath/-/css2xpath-0.0.1.tgz", 130 | "integrity": "sha1-8QUC1kkzDWz/toK7IbO7VFVDw2U=" 131 | }, 132 | "dashdash": { 133 | "version": "1.14.1", 134 | "resolved": "https://registry.npmjs.org/dashdash/-/dashdash-1.14.1.tgz", 135 | "integrity": "sha1-hTz6D3y+L+1d4gMmuN1YEDX24vA=", 136 | "dependencies": { 137 | "assert-plus": { 138 | "version": "1.0.0", 139 | "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", 140 | "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" 141 | } 142 | } 143 | }, 144 | "debug": { 145 | "version": "2.6.8", 146 | "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.8.tgz", 147 | "integrity": "sha1-5zFTHKLt4n0YgiJCfaF4IdaP9Pw=" 148 | }, 149 | "deep-extend": { 150 | "version": "0.4.2", 151 | "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.4.2.tgz", 152 | "integrity": "sha1-SLaZwn4zS/ifEIkr5DL25MfTSn8=" 153 | }, 154 | "delayed-stream": { 155 | "version": "1.0.0", 156 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", 157 | "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" 158 | }, 159 | "delegates": { 160 | "version": "1.0.0", 161 | "resolved": "https://registry.npmjs.org/delegates/-/delegates-1.0.0.tgz", 162 | "integrity": "sha1-hMbhWbgZBP3KWaDvRM2HDTElD5o=" 163 | }, 164 | "ecc-jsbn": { 165 | "version": "0.1.1", 166 | "resolved": "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.1.tgz", 167 | "integrity": "sha1-D8c6ntXw1Tw4GTOYUj735UN3dQU=", 168 | "optional": true 169 | }, 170 | "extend": { 171 | "version": "3.0.1", 172 | "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.1.tgz", 173 | "integrity": "sha1-p1Xqe8Gt/MWjHOfnYtuq3F5jZEQ=" 174 | }, 175 | "extsprintf": { 176 | "version": "1.0.2", 177 | "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.0.2.tgz", 178 | "integrity": "sha1-4QgOBljjALBilJkMxw4VAiNf1VA=" 179 | }, 180 | "forever-agent": { 181 | "version": "0.6.1", 182 | "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", 183 | "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" 184 | }, 185 | "form-data": { 186 | "version": "2.1.4", 187 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.1.4.tgz", 188 | "integrity": "sha1-M8GDrPGTJ27KqYFDpp6Uv+4XUNE=" 189 | }, 190 | "fs.realpath": { 191 | "version": "1.0.0", 192 | "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", 193 | "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" 194 | }, 195 | "fstream": { 196 | "version": "1.0.11", 197 | "resolved": "https://registry.npmjs.org/fstream/-/fstream-1.0.11.tgz", 198 | "integrity": "sha1-XB+x8RdHcRTwYyoOtLcbPLD9MXE=" 199 | }, 200 | "fstream-ignore": { 201 | "version": "1.0.5", 202 | "resolved": "https://registry.npmjs.org/fstream-ignore/-/fstream-ignore-1.0.5.tgz", 203 | "integrity": "sha1-nDHa40dnAY/h0kmyTa2mfQktoQU=" 204 | }, 205 | "gauge": { 206 | "version": "2.7.4", 207 | "resolved": "https://registry.npmjs.org/gauge/-/gauge-2.7.4.tgz", 208 | "integrity": "sha1-LANAXHU4w51+s3sxcCLjJfsBi/c=" 209 | }, 210 | "getpass": { 211 | "version": "0.1.7", 212 | "resolved": "https://registry.npmjs.org/getpass/-/getpass-0.1.7.tgz", 213 | "integrity": "sha1-Xv+OPmhNVprkyysSgmBOi6YhSfo=", 214 | "dependencies": { 215 | "assert-plus": { 216 | "version": "1.0.0", 217 | "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", 218 | "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" 219 | } 220 | } 221 | }, 222 | "glob": { 223 | "version": "7.1.2", 224 | "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.2.tgz", 225 | "integrity": "sha512-MJTUg1kjuLeQCJ+ccE4Vpa6kKVXkPYJ2mOCQyUuKLcLQsdrMCpBPUi8qVE6+YuaJkozeA9NusTAw3hLr8Xe5EQ==" 226 | }, 227 | "graceful-fs": { 228 | "version": "4.1.11", 229 | "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", 230 | "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=" 231 | }, 232 | "har-schema": { 233 | "version": "1.0.5", 234 | "resolved": "https://registry.npmjs.org/har-schema/-/har-schema-1.0.5.tgz", 235 | "integrity": "sha1-0mMTX0MwfALGAq/I/pWXDAFRNp4=" 236 | }, 237 | "har-validator": { 238 | "version": "4.2.1", 239 | "resolved": "https://registry.npmjs.org/har-validator/-/har-validator-4.2.1.tgz", 240 | "integrity": "sha1-M0gdDxu/9gDdID11gSpqX7oALio=" 241 | }, 242 | "has-unicode": { 243 | "version": "2.0.1", 244 | "resolved": "https://registry.npmjs.org/has-unicode/-/has-unicode-2.0.1.tgz", 245 | "integrity": "sha1-4Ob+aijPUROIVeCG0Wkedx3iqLk=" 246 | }, 247 | "hawk": { 248 | "version": "3.1.3", 249 | "resolved": "https://registry.npmjs.org/hawk/-/hawk-3.1.3.tgz", 250 | "integrity": "sha1-B4REvXwWQLD+VA0sm3PVlnjo4cQ=" 251 | }, 252 | "hoek": { 253 | "version": "2.16.3", 254 | "resolved": "https://registry.npmjs.org/hoek/-/hoek-2.16.3.tgz", 255 | "integrity": "sha1-ILt0A9POo5jpHcRxCo/xuCdKJe0=" 256 | }, 257 | "http-signature": { 258 | "version": "1.1.1", 259 | "resolved": "https://registry.npmjs.org/http-signature/-/http-signature-1.1.1.tgz", 260 | "integrity": "sha1-33LiZwZs0Kxn+3at+OE0qPvPkb8=" 261 | }, 262 | "iconv-lite": { 263 | "version": "0.4.18", 264 | "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.18.tgz", 265 | "integrity": "sha512-sr1ZQph3UwHTR0XftSbK85OvBbxe/abLGzEnPENCQwmHf7sck8Oyu4ob3LgBxWWxRoM+QszeUyl7jbqapu2TqA==" 266 | }, 267 | "inflight": { 268 | "version": "1.0.6", 269 | "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", 270 | "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=" 271 | }, 272 | "inherits": { 273 | "version": "2.0.3", 274 | "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", 275 | "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" 276 | }, 277 | "ini": { 278 | "version": "1.3.4", 279 | "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.4.tgz", 280 | "integrity": "sha1-BTfLedr1m1mhpRff9wbIbsA5Fi4=" 281 | }, 282 | "is-fullwidth-code-point": { 283 | "version": "1.0.0", 284 | "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz", 285 | "integrity": "sha1-754xOG8DGn8NZDr4L95QxFfvAMs=" 286 | }, 287 | "is-typedarray": { 288 | "version": "1.0.0", 289 | "resolved": "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz", 290 | "integrity": "sha1-5HnICFjfDBsR3dppQPlgEfzaSpo=" 291 | }, 292 | "isarray": { 293 | "version": "1.0.0", 294 | "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", 295 | "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" 296 | }, 297 | "isstream": { 298 | "version": "0.1.2", 299 | "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", 300 | "integrity": "sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo=" 301 | }, 302 | "jsbn": { 303 | "version": "0.1.1", 304 | "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", 305 | "integrity": "sha1-peZUwuWi3rXyAdls77yoDA7y9RM=", 306 | "optional": true 307 | }, 308 | "jscs": { 309 | "version": "3.0.7", 310 | "resolved": "https://registry.npmjs.org/jscs/-/jscs-3.0.7.tgz", 311 | "integrity": "sha1-cUG03/W4bjLQ6Z12S4NnZ8MNIBo=", 312 | "dev": true, 313 | "requires": { 314 | "chalk": "1.1.3", 315 | "cli-table": "0.3.1", 316 | "commander": "2.9.0", 317 | "cst": "0.4.10", 318 | "estraverse": "4.2.0", 319 | "exit": "0.1.2", 320 | "glob": "5.0.15", 321 | "htmlparser2": "3.8.3", 322 | "js-yaml": "3.4.6", 323 | "jscs-jsdoc": "2.0.0", 324 | "jscs-preset-wikimedia": "1.0.1", 325 | "jsonlint": "1.6.3", 326 | "lodash": "3.10.1", 327 | "minimatch": "3.0.4", 328 | "natural-compare": "1.2.2", 329 | "pathval": "0.1.1", 330 | "prompt": "0.2.14", 331 | "reserved-words": "0.1.2", 332 | "resolve": "1.10.0", 333 | "strip-bom": "2.0.0", 334 | "strip-json-comments": "1.0.4", 335 | "to-double-quotes": "2.0.0", 336 | "to-single-quotes": "2.0.1", 337 | "vow": "0.4.19", 338 | "vow-fs": "0.3.6", 339 | "xmlbuilder": "3.1.0" 340 | }, 341 | "dependencies": { 342 | "glob": { 343 | "version": "5.0.15", 344 | "resolved": "https://registry.npmjs.org/glob/-/glob-5.0.15.tgz", 345 | "integrity": "sha1-G8k2ueAvSmA/zCIuz3Yz0wuLk7E=", 346 | "dev": true, 347 | "requires": { 348 | "inflight": "1.0.6", 349 | "inherits": "2.0.3", 350 | "minimatch": "3.0.4", 351 | "once": "1.4.0", 352 | "path-is-absolute": "1.0.1" 353 | } 354 | }, 355 | "strip-json-comments": { 356 | "version": "1.0.4", 357 | "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-1.0.4.tgz", 358 | "integrity": "sha1-HhX7ysl9Pumb8tc7TGVrCCu6+5E=", 359 | "dev": true 360 | } 361 | } 362 | }, 363 | "json-schema": { 364 | "version": "0.2.3", 365 | "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", 366 | "integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=" 367 | }, 368 | "json-stable-stringify": { 369 | "version": "1.0.1", 370 | "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz", 371 | "integrity": "sha1-mnWdOcXy/1A/1TAGRu1EX4jE+a8=" 372 | }, 373 | "json-stringify-safe": { 374 | "version": "5.0.1", 375 | "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz", 376 | "integrity": "sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus=" 377 | }, 378 | "jsonify": { 379 | "version": "0.0.0", 380 | "resolved": "https://registry.npmjs.org/jsonify/-/jsonify-0.0.0.tgz", 381 | "integrity": "sha1-LHS27kHZPKUbe1qu6PUDYx0lKnM=" 382 | }, 383 | "jsprim": { 384 | "version": "1.4.0", 385 | "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.0.tgz", 386 | "integrity": "sha1-o7h+QCmNjDgFUtjMdiigu5WiKRg=", 387 | "dependencies": { 388 | "assert-plus": { 389 | "version": "1.0.0", 390 | "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", 391 | "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" 392 | } 393 | } 394 | }, 395 | "libxmljs": { 396 | "version": "0.19.5", 397 | "resolved": "https://registry.npmjs.org/libxmljs/-/libxmljs-0.18.4.tgz", 398 | "integrity": "sha1-JTAoklkA6fjUVEGANKJbivThUH8=" 399 | }, 400 | "libxmljs-dom": { 401 | "version": "0.0.17", 402 | "resolved": "https://registry.npmjs.org/libxmljs-dom/-/libxmljs-dom-0.0.8.tgz", 403 | "integrity": "sha1-IN7NJkNoArXTuHQJ8yKZEmy4pA8=" 404 | }, 405 | "mime-db": { 406 | "version": "1.27.0", 407 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.27.0.tgz", 408 | "integrity": "sha1-gg9XIpa70g7CXtVeW13oaeVDbrE=" 409 | }, 410 | "mime-types": { 411 | "version": "2.1.15", 412 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.15.tgz", 413 | "integrity": "sha1-pOv1BkCUVpI3uM9wBGd20J/JKu0=" 414 | }, 415 | "minimatch": { 416 | "version": "3.0.4", 417 | "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", 418 | "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==" 419 | }, 420 | "minimist": { 421 | "version": "0.0.8", 422 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", 423 | "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=" 424 | }, 425 | "mkdirp": { 426 | "version": "0.5.1", 427 | "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", 428 | "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=" 429 | }, 430 | "ms": { 431 | "version": "2.0.0", 432 | "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", 433 | "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" 434 | }, 435 | "nan": { 436 | "version": "2.5.1", 437 | "resolved": "https://registry.npmjs.org/nan/-/nan-2.5.1.tgz", 438 | "integrity": "sha1-1bAWkSUzJql6K77p5hxV2NYDUeI=" 439 | }, 440 | "needle": { 441 | "version": "1.6.0", 442 | "resolved": "https://registry.npmjs.org/needle/-/needle-1.6.0.tgz", 443 | "integrity": "sha1-9SpYWJchIWGOAC+OY4TK2sItYk8=" 444 | }, 445 | "node-pre-gyp": { 446 | "version": "0.6.36", 447 | "resolved": "https://registry.npmjs.org/node-pre-gyp/-/node-pre-gyp-0.6.36.tgz", 448 | "integrity": "sha1-22BBEst04NR3VU6bUFsXq936t4Y=" 449 | }, 450 | "nodeunit": { 451 | "version": "0.11.3", 452 | "resolved": "https://registry.npmjs.org/nodeunit/-/nodeunit-0.11.3.tgz", 453 | "integrity": "sha512-gDNxrDWpx07BxYNO/jn1UrGI1vNhDQZrIFphbHMcTCDc5mrrqQBWfQMXPHJ5WSgbFwD1D6bv4HOsqtTrPG03AA==", 454 | "dev": true, 455 | "requires": { 456 | "ejs": "2.6.1", 457 | "tap": "12.5.3" 458 | } 459 | }, 460 | "nopt": { 461 | "version": "4.0.1", 462 | "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.1.tgz", 463 | "integrity": "sha1-0NRoWv1UFRk8jHUFYC0NF81kR00=" 464 | }, 465 | "npmlog": { 466 | "version": "4.1.0", 467 | "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-4.1.0.tgz", 468 | "integrity": "sha512-ocolIkZYZt8UveuiDS0yAkkIjid1o7lPG8cYm05yNYzBn8ykQtaiPMEGp8fY9tKdDgm8okpdKzkvu1y9hUYugA==" 469 | }, 470 | "number-is-nan": { 471 | "version": "1.0.1", 472 | "resolved": "https://registry.npmjs.org/number-is-nan/-/number-is-nan-1.0.1.tgz", 473 | "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=" 474 | }, 475 | "oauth-sign": { 476 | "version": "0.8.2", 477 | "resolved": "https://registry.npmjs.org/oauth-sign/-/oauth-sign-0.8.2.tgz", 478 | "integrity": "sha1-Rqarfwrq2N6unsBWV4C31O/rnUM=" 479 | }, 480 | "object-assign": { 481 | "version": "4.1.1", 482 | "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", 483 | "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" 484 | }, 485 | "once": { 486 | "version": "1.4.0", 487 | "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", 488 | "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=" 489 | }, 490 | "os-homedir": { 491 | "version": "1.0.2", 492 | "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", 493 | "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=" 494 | }, 495 | "os-tmpdir": { 496 | "version": "1.0.2", 497 | "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", 498 | "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=" 499 | }, 500 | "osenv": { 501 | "version": "0.1.4", 502 | "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.4.tgz", 503 | "integrity": "sha1-Qv5tWVPfBsgGS+bxdsPQWqqjRkQ=" 504 | }, 505 | "path-is-absolute": { 506 | "version": "1.0.1", 507 | "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", 508 | "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=" 509 | }, 510 | "performance-now": { 511 | "version": "0.2.0", 512 | "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-0.2.0.tgz", 513 | "integrity": "sha1-M+8wxcd9TqIcWlOGnZG1bY8lVeU=" 514 | }, 515 | "process-nextick-args": { 516 | "version": "1.0.7", 517 | "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-1.0.7.tgz", 518 | "integrity": "sha1-FQ4gt1ZZCtP5EJPyWk8q2L/zC6M=" 519 | }, 520 | "punycode": { 521 | "version": "1.4.1", 522 | "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", 523 | "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=" 524 | }, 525 | "qs": { 526 | "version": "6.4.0", 527 | "resolved": "https://registry.npmjs.org/qs/-/qs-6.4.0.tgz", 528 | "integrity": "sha1-E+JtKK1rD/qpExLNO/cI7TUecjM=" 529 | }, 530 | "rc": { 531 | "version": "1.2.1", 532 | "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.1.tgz", 533 | "integrity": "sha1-LgPo5C7kULjLPc5lvhv4l04d/ZU=", 534 | "dependencies": { 535 | "minimist": { 536 | "version": "1.2.0", 537 | "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", 538 | "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=" 539 | } 540 | } 541 | }, 542 | "readable-stream": { 543 | "version": "2.2.11", 544 | "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.2.11.tgz", 545 | "integrity": "sha512-h+8+r3MKEhkiVrwdKL8aWs1oc1VvBu33ueshOvS26RsZQ3Amhx/oO3TKe4lApSV9ueY6as8EAh7mtuFjdlhg9Q==" 546 | }, 547 | "request": { 548 | "version": "2.81.0", 549 | "resolved": "https://registry.npmjs.org/request/-/request-2.81.0.tgz", 550 | "integrity": "sha1-xpKJRqDgbF+Nb4qTM0af/aRimKA=" 551 | }, 552 | "rimraf": { 553 | "version": "2.6.1", 554 | "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.1.tgz", 555 | "integrity": "sha1-wjOOxkPfeht/5cVPqG9XQopV8z0=" 556 | }, 557 | "safe-buffer": { 558 | "version": "5.0.1", 559 | "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.0.1.tgz", 560 | "integrity": "sha1-0mPKVGls2KMGtcplUekt5XkY++c=" 561 | }, 562 | "semver": { 563 | "version": "5.3.0", 564 | "resolved": "https://registry.npmjs.org/semver/-/semver-5.3.0.tgz", 565 | "integrity": "sha1-myzl094C0XxgEq0yaqa00M9U+U8=" 566 | }, 567 | "set-blocking": { 568 | "version": "2.0.0", 569 | "resolved": "https://registry.npmjs.org/set-blocking/-/set-blocking-2.0.0.tgz", 570 | "integrity": "sha1-BF+XgtARrppoA93TgrJDkrPYkPc=" 571 | }, 572 | "signal-exit": { 573 | "version": "3.0.2", 574 | "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.2.tgz", 575 | "integrity": "sha1-tf3AjxKH6hF4Yo5BXiUTK3NkbG0=" 576 | }, 577 | "sntp": { 578 | "version": "1.0.9", 579 | "resolved": "https://registry.npmjs.org/sntp/-/sntp-1.0.9.tgz", 580 | "integrity": "sha1-ZUEYTMkK7qbG57NeJlkIJEPGYZg=" 581 | }, 582 | "sshpk": { 583 | "version": "1.13.1", 584 | "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.13.1.tgz", 585 | "integrity": "sha1-US322mKHFEMW3EwY/hzx2UBzm+M=", 586 | "dependencies": { 587 | "assert-plus": { 588 | "version": "1.0.0", 589 | "resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz", 590 | "integrity": "sha1-8S4PPF13sLHN2RRpQuTpbB5N1SU=" 591 | } 592 | } 593 | }, 594 | "string_decoder": { 595 | "version": "1.0.2", 596 | "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.0.2.tgz", 597 | "integrity": "sha1-sp4fThEl+pehA4K4pTNze3SR4Xk=" 598 | }, 599 | "string-width": { 600 | "version": "1.0.2", 601 | "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", 602 | "integrity": "sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M=" 603 | }, 604 | "stringstream": { 605 | "version": "0.0.5", 606 | "resolved": "https://registry.npmjs.org/stringstream/-/stringstream-0.0.5.tgz", 607 | "integrity": "sha1-TkhM1N5aC7vuGORjB3EKioFiGHg=" 608 | }, 609 | "strip-ansi": { 610 | "version": "3.0.1", 611 | "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", 612 | "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=" 613 | }, 614 | "strip-json-comments": { 615 | "version": "2.0.1", 616 | "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", 617 | "integrity": "sha1-PFMZQukIwml8DsNEhYwobHygpgo=" 618 | }, 619 | "tar": { 620 | "version": "2.2.1", 621 | "resolved": "https://registry.npmjs.org/tar/-/tar-2.2.1.tgz", 622 | "integrity": "sha1-jk0qJWwOIYXGsYrWlK7JaLg8sdE=" 623 | }, 624 | "tar-pack": { 625 | "version": "3.4.0", 626 | "resolved": "https://registry.npmjs.org/tar-pack/-/tar-pack-3.4.0.tgz", 627 | "integrity": "sha1-I74tf2cagzk3bL2wuP4/3r8xeYQ=" 628 | }, 629 | "tough-cookie": { 630 | "version": "2.3.2", 631 | "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-2.3.2.tgz", 632 | "integrity": "sha1-8IH3bkyFcg5sN6X6ztc3FQ2EByo=" 633 | }, 634 | "tunnel-agent": { 635 | "version": "0.6.0", 636 | "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", 637 | "integrity": "sha1-J6XeoGs2sEoKmWZ3SykIaPD8QP0=" 638 | }, 639 | "tweetnacl": { 640 | "version": "0.14.5", 641 | "resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz", 642 | "integrity": "sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q=", 643 | "optional": true 644 | }, 645 | "uid-number": { 646 | "version": "0.0.6", 647 | "resolved": "https://registry.npmjs.org/uid-number/-/uid-number-0.0.6.tgz", 648 | "integrity": "sha1-DqEOgDXo61uOREnwbaHHMGY7qoE=" 649 | }, 650 | "util-deprecate": { 651 | "version": "1.0.2", 652 | "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", 653 | "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" 654 | }, 655 | "uuid": { 656 | "version": "3.1.0", 657 | "resolved": "https://registry.npmjs.org/uuid/-/uuid-3.1.0.tgz", 658 | "integrity": "sha512-DIWtzUkw04M4k3bf1IcpS2tngXEL26YUD2M0tMDUpnUrz2hgzUBlD55a4FjdLGPvfHxS6uluGWvaVEqgBcVa+g==" 659 | }, 660 | "verror": { 661 | "version": "1.3.6", 662 | "resolved": "https://registry.npmjs.org/verror/-/verror-1.3.6.tgz", 663 | "integrity": "sha1-z/XfEpRtKX0rqu+qJoniW+AcAFw=" 664 | }, 665 | "wide-align": { 666 | "version": "1.1.2", 667 | "resolved": "https://registry.npmjs.org/wide-align/-/wide-align-1.1.2.tgz", 668 | "integrity": "sha512-ijDLlyQ7s6x1JgCLur53osjm/UXUYD9+0PbYKrBsYisYXzCxN+HC3mYDNy/dWdmf3AwqwU3CXwDCvsNgGK1S0w==" 669 | }, 670 | "wrappy": { 671 | "version": "1.0.2", 672 | "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", 673 | "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" 674 | } 675 | } 676 | } 677 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "osmosis", 3 | "version": "1.1.10", 4 | "description": "Web scraper for NodeJS", 5 | "keywords": [ 6 | "web", 7 | "scraper", 8 | "crawler", 9 | "html", 10 | "xml", 11 | "dom", 12 | "parser" 13 | ], 14 | "repository": { 15 | "type": "git", 16 | "url": "https://github.com/rchipka/node-osmosis.git" 17 | }, 18 | "author": { 19 | "name": "rchipka", 20 | "email": "chipka01@email.franklin.edu" 21 | }, 22 | "dependencies": { 23 | "libxmljs-dom": "~0.0.17", 24 | "needle": "^1.6.0" 25 | }, 26 | "devDependencies": { 27 | "jscs": ">=3.0.2", 28 | "nodeunit": "0.11.3" 29 | }, 30 | "scripts": { 31 | "test": "node ./node_modules/.bin/nodeunit test" 32 | }, 33 | "license": "MIT", 34 | "main": "index", 35 | "engines": { 36 | "node": ">= 0.8.0" 37 | }, 38 | "readmeFilename": "Readme.md", 39 | "bugs": { 40 | "url": "https://github.com/rchipka/node-osmosis/issues" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /test/click.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | URL = require('url'), 4 | url = server.host + ':' + server.port; 5 | 6 | process.on('uncaughtException', function (err) { 7 | console.error(err.stack); 8 | }); 9 | 10 | module.exports.ajax = function (assert) { 11 | osmosis.get(url) 12 | .click('.ajax') 13 | .then(function (context) { 14 | assert.ok(context.get('.ajax').text() == 'loaded'); 15 | }) 16 | .done(function () { 17 | assert.done(); 18 | }); 19 | }; 20 | 21 | module.exports.script = function (assert) { 22 | osmosis.get(url) 23 | .click('div') 24 | .then(function (context) { 25 | assert.ok(context.get('div').text() === 'clicked'); 26 | }) 27 | .done(function () { 28 | assert.done(); 29 | }); 30 | }; 31 | 32 | server('/', function (url, req, res) { 33 | res.setHeader("Content-Type", "text/html"); 34 | res.write('
not clicked

'); 35 | res.end(); 36 | }); 37 | 38 | server('/ajax', function (url, req, res) { 39 | res.setHeader("Content-Type", "text/html"); 40 | res.write('loaded'); 41 | res.end(); 42 | }); 43 | 44 | 45 | server('/click.js', function (url, req, res) { 46 | res.setHeader("Content-Type", "text/javascript"); 47 | res.write('(' + (function () { 48 | var div = document.querySelector('div'); 49 | 50 | div.addEventListener('click', function () { 51 | div.innerHTML = 'clicked'; 52 | }); 53 | 54 | var xmlhttp = new XMLHttpRequest(); 55 | var ajax = document.querySelector('.ajax'); 56 | 57 | ajax.addEventListener('click', function () { 58 | xmlhttp.open("GET", "/ajax", true); 59 | xmlhttp.send(); 60 | }); 61 | 62 | xmlhttp.onreadystatechange = function () { 63 | if (xmlhttp.readyState == 4 && xmlhttp.status == 200) { 64 | ajax.innerHTML = xmlhttp.responseText; 65 | } 66 | }; 67 | }).toString() + ')(window)'); 68 | res.end(); 69 | }); 70 | -------------------------------------------------------------------------------- /test/config.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var osmosis = require('../index'), 4 | server = require('./server'), 5 | url = server.host + ':' + server.port, 6 | html = '' + 7 | 'test' + 8 | '
'; 9 | 10 | module.exports.config = function (assert) { 11 | osmosis 12 | .config({ 13 | 'ext': true, 14 | 'one': 1 15 | }); 16 | 17 | osmosis.parse(html) 18 | .config('proxy', 'localhost') 19 | .then(function () { 20 | var opts = this.getOpts(); 21 | 22 | assert.equal(opts.one, 1); 23 | assert.equal(opts.ext, true); 24 | assert.equal(opts.proxy, 'localhost'); 25 | }) 26 | .config('test', true) 27 | .then(function () { 28 | var opts = this.getOpts(); 29 | 30 | assert.equal(opts.one, 1); 31 | assert.equal(opts.ext, true); 32 | assert.equal(opts.test, true); 33 | assert.equal(opts.proxy, 'localhost'); 34 | }) 35 | .done(function () { 36 | var opts = osmosis.getOpts(); 37 | 38 | assert.equal(opts.one, 1); 39 | assert.equal(opts.ext, true); 40 | assert.equal(opts.test, undefined); 41 | assert.equal(opts.proxy, undefined); 42 | assert.done(); 43 | }); 44 | }; 45 | 46 | module.exports.global_cookies = function (assert) { 47 | osmosis.config('cookies', { gc1: 'overwriteMe', fake: true }); 48 | osmosis.config('cookies', { gc1: 'set' }); 49 | 50 | assert.deepEqual(osmosis.getOpts().cookies, 51 | { gc1: 'set' }); 52 | 53 | osmosis(url + '/headers') 54 | .then(function (context) { 55 | assert.ok(context.querySelector('cookie')); 56 | assert.equal(context.querySelector('cookie').textContent, 57 | 'gc1=set'); 58 | }) 59 | .post(url + '/headers') 60 | .cookie('c1', 'yes') 61 | .then(function (context) { 62 | assert.ok(context.querySelector('cookie')); 63 | assert.equal(context.querySelector('cookie').textContent, 64 | 'gc1=set; c1=yes'); 65 | }) 66 | .done(function () { 67 | osmosis.config('cookies', {}); 68 | assert.done(); 69 | }); 70 | }; 71 | 72 | 73 | module.exports.instance_cookies = function (assert) { 74 | var instance, 75 | expected = { 76 | gc1: 'true', 77 | cookie1: 'true', 78 | cookie2: 'true' 79 | }; 80 | 81 | osmosis.config('cookies', { gc1: true }); 82 | 83 | instance = new osmosis(url + '/headers') 84 | .cookie('cookie1', true) 85 | .then(function (context) { 86 | assert.ok(context.querySelector('cookie')); 87 | assert.deepEqual( 88 | parseCookies( 89 | context.querySelector('cookie').textContent), 90 | expected); 91 | }) 92 | .set({ 93 | 'get_cookies': 'cookie', 94 | 'post_cookies': osmosis.post(url + '/set-cookie-redirect') 95 | .follow('a').find('cookie'), 96 | 'follow_cookies': osmosis.follow('a') 97 | .find('cookie') 98 | .then(function (node, data, next) { 99 | next(node, parseCookies(node.textContent)); 100 | }), 101 | 'set_cookies': osmosis.get('/set-cookie-redirect') 102 | .follow('a') 103 | .find('cookie') 104 | .then(function (node, data, next) { 105 | next(node, parseCookies(node.textContent)); 106 | }) 107 | }) 108 | .data(function (data) { 109 | assert.deepEqual(parseCookies(data.get_cookies), expected); 110 | assert.deepEqual(data.follow_cookies, expected); 111 | expected.testSetCookie1 = 'true'; 112 | expected.testSetCookie2 = 'true'; 113 | assert.deepEqual(parseCookies(data.post_cookies), expected); 114 | assert.deepEqual(data.set_cookies, expected); 115 | }) 116 | .done(function () { 117 | osmosis.config('cookies', {}); 118 | assert.done(); 119 | }); 120 | 121 | instance.cookie('cookie2', true); 122 | 123 | instance.run(); 124 | }; 125 | 126 | module.exports.headers = function (assert) { 127 | var calledThen = false; 128 | 129 | osmosis 130 | .get(url + '/headers') 131 | .header('one', 1) 132 | .headers({ 'test': true }) 133 | .then(function (context) { 134 | var opts = this.getOpts(); 135 | 136 | calledThen = true; 137 | 138 | assert.equal(opts.headers.one, 1); 139 | assert.equal(opts.headers.test, true); 140 | assert.equal(context.get('one').text(), '1'); 141 | assert.equal(context.get('test').text(), 'true'); 142 | }) 143 | .done(function () { 144 | assert.ok(calledThen); 145 | assert.done(); 146 | }); 147 | }; 148 | 149 | module.exports.rewrite = function (assert) { 150 | var calledThen = false; 151 | 152 | osmosis 153 | .get(function () { 154 | return url + '/headers'; 155 | }) 156 | .then(function (context) { 157 | assert.equal(context.find('host').length, 1); 158 | calledThen = true; 159 | }) 160 | .done(function () { 161 | assert.ok(calledThen); 162 | assert.done(); 163 | }); 164 | }; 165 | 166 | server('/headers', function (url, req, res) { 167 | var key; 168 | 169 | res.setHeader("Content-Type", "text/html"); 170 | 171 | for (key in req.headers) { 172 | res.write('<' + key + '>' + req.headers[key] + ''); 173 | } 174 | 175 | res.write('test'); 176 | 177 | res.end(); 178 | }); 179 | 180 | /* TODO: Save redirect cookies once Needle is capable. */ 181 | server('/set-cookie-redirect', function (href, req, res) { 182 | res.writeHead(301, { Location: 'http://' + url + '/set-cookie', 183 | 'Set-Cookie': 'testSetCookie1=true' }); 184 | res.end(); 185 | }); 186 | 187 | server('/set-cookie', function (url, req, res) { 188 | res.writeHead(200, { 'Content-Type': 'text/html', 189 | 'Set-Cookie': 'testSetCookie2=true' }); 190 | res.end(''); 191 | }); 192 | 193 | function parseCookies(str) { 194 | var cookies = {}; 195 | 196 | str.split('; ').forEach(function (c) { 197 | var arr = c.split('='); 198 | 199 | cookies[arr[0]] = arr[1]; 200 | }); 201 | 202 | return cookies; 203 | } 204 | -------------------------------------------------------------------------------- /test/do.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | html = '' + 3 | 'test' + 4 | '' + 5 | '' + 6 | '' + 7 | '', 8 | expected = { 9 | title: 'test', 10 | links: ['/rel'] 11 | }; 12 | 13 | module.exports.multiple = function (assert) { 14 | osmosis.parse(html) 15 | .do( 16 | osmosis.set({ 'title': 'title' }), 17 | osmosis.find('body').set('name', 'true').find('none'), // fails 18 | osmosis.set({ 'links': ['a@href'] }) 19 | ) 20 | .data(function (data) { 21 | assert.deepEqual(data, expected); 22 | }) 23 | .done(function () { 24 | assert.done(); 25 | }); 26 | }; 27 | -------------------------------------------------------------------------------- /test/filters.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | html = '' + 3 | 'test' + 4 | '' + 5 | '123' + 6 | ''; 7 | 8 | module.exports.contains = function (assert) { 9 | var count = 0; 10 | 11 | osmosis 12 | .parse(html) 13 | .find("b") 14 | .contains('1') 15 | .then(function () { 16 | count++; 17 | }) 18 | .done(function () { 19 | assert.equal(count, 1); 20 | assert.done(); 21 | }); 22 | }; 23 | 24 | module.exports.fail = function (assert) { 25 | var count = 0, errored = false; 26 | 27 | osmosis 28 | .parse(html) 29 | .find("b") 30 | .fail("node():contains('1')") 31 | .then(function () { 32 | count++; 33 | }) 34 | .error(function (msg) { 35 | if (msg.indexOf('node():contains') > -1) { 36 | errored = true; 37 | } 38 | }) 39 | .done(function () { 40 | assert.ok(errored); 41 | assert.equal(count, 2); 42 | assert.done(); 43 | }); 44 | }; 45 | 46 | module.exports.filter = function (assert) { 47 | var count = 0; 48 | 49 | osmosis 50 | .parse(html) 51 | .find("b") 52 | .filter("node():not(:contains('1'))") 53 | .then(function () { 54 | count++; 55 | }) 56 | .done(function () { 57 | assert.ok(count === 2); 58 | assert.done(); 59 | }); 60 | }; 61 | 62 | module.exports.match = function (assert) { 63 | var count = 0; 64 | 65 | osmosis 66 | .parse(html) 67 | .find("b") 68 | .match(/[1-2]/) 69 | .then(function () { 70 | count++; 71 | }) 72 | .done(function () { 73 | assert.ok(count === 2); 74 | assert.done(); 75 | }); 76 | }; 77 | -------------------------------------------------------------------------------- /test/find.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | URL = require('url'), 4 | url = server.host + ':' + server.port; 5 | 6 | module.exports.selector_array = function (assert) { 7 | var count = 0; 8 | 9 | osmosis.get(url + '/find') 10 | .find(['img', 'b']) 11 | .then(function () { 12 | count++; 13 | }) 14 | .done(function () { 15 | assert.equal(count, 7); 16 | assert.done(); 17 | }); 18 | }; 19 | 20 | module.exports.selector_css = function (assert) { 21 | var count = 0; 22 | 23 | osmosis.get(url + '/find') 24 | .find('.content ul:not([name]) li[2] b:last img') 25 | .then(function (context) { 26 | assert.ok(++count == context.getAttribute('src')); 27 | }) 28 | .done(function () { 29 | assert.equal(count, 3); 30 | assert.done(); 31 | }); 32 | }; 33 | 34 | module.exports.nested = function (assert) { 35 | var calledThen = true; 36 | 37 | osmosis.get(url + '/find') 38 | .find('ul:last') 39 | .set({ 40 | 'b': osmosis.find('b') 41 | }) 42 | .then(function (context, data) { 43 | calledThen = true; 44 | assert.equal(data.b.length, 3); 45 | }) 46 | .done(function () { 47 | assert.ok(calledThen); 48 | assert.done(); 49 | }); 50 | }; 51 | 52 | module.exports.select = function (assert) { 53 | var count = 0; 54 | 55 | osmosis.get(url + '/find') 56 | .find('ul:last > li:last') 57 | .select('b') 58 | .then(function () { 59 | count++; 60 | }) 61 | .done(function () { 62 | assert.equal(count, 2); 63 | assert.done(); 64 | }); 65 | }; 66 | 67 | module.exports.xpath = function (assert) { 68 | var count = 0; 69 | 70 | osmosis.get(url + '/find') 71 | .find('//div[@class]/ul[2]/li') 72 | .then(function () { 73 | count++; 74 | }) 75 | .done(function () { 76 | assert.equal(count, 2); 77 | assert.done(); 78 | }); 79 | }; 80 | 81 | module.exports.both = function (assert) { 82 | var count = 0; 83 | 84 | osmosis.get(url + '/find') 85 | .find('.content//preceding::[@name]') 86 | .then(function () { 87 | count++; 88 | }) 89 | .done(function () { 90 | assert.equal(count, 1); 91 | assert.done(); 92 | }); 93 | }; 94 | 95 | server('/find', function (url, req, res) { 96 | res.setHeader("Content-Type", "text/html"); 97 | res.write('\ 98 |
\ 99 | \ 102 | \ 108 |
\ 109 | '); 110 | res.end(); 111 | }); 112 | -------------------------------------------------------------------------------- /test/follow.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | URL = require('url'), 4 | fs = require('fs'), 5 | url = server.host + ':' + server.port; 6 | 7 | module.exports.href = function (assert) { 8 | var count = 0; 9 | 10 | osmosis.get(url + '/follow') 11 | .follow('li:skip-last > a') 12 | .then(function (context) { 13 | assert.ok(context.request.headers.referer); 14 | assert.ok(context.request.params.page == context.get('div').text()); 15 | count++; 16 | }) 17 | .done(function () { 18 | assert.ok(count == 5); 19 | assert.done(); 20 | }); 21 | }; 22 | 23 | 24 | module.exports.delay = function (assert) { 25 | var count = 0; 26 | 27 | osmosis.get(url + '/follow') 28 | .find('li:skip-last > a') 29 | .delay(0.2) 30 | .follow('@href') 31 | .then(function (context) { 32 | count++; 33 | assert.ok(context.request.headers.referer); 34 | assert.equal(context.request.params.page, context.get('div').text()); 35 | }) 36 | .done(function () { 37 | assert.equal(count, 5); 38 | assert.done(); 39 | }); 40 | }; 41 | 42 | /* 43 | module.exports.not_found = function(assert) { 44 | var count = 0; 45 | osmosis.get(url + '/follow') 46 | .follow('a@href', false, function(url + '/follow') { 47 | return '/404' 48 | }) 49 | .then(function(context, data) { 50 | count++; 51 | }) 52 | .done(function() { 53 | assert.ok(count == 5); 54 | assert.done(); 55 | }) 56 | } 57 | */ 58 | 59 | module.exports.internal = function (assert) { 60 | var count = 0; 61 | 62 | osmosis.get(url + '/follow') 63 | .follow('li > a:internal') 64 | .then(function (context) { 65 | count++; 66 | assert.ok(context.request.headers.referer); 67 | assert.ok(context.request.params.page == context.get('div').text()); 68 | }) 69 | .done(function () { 70 | assert.ok(count == 5); 71 | assert.done(); 72 | }); 73 | }; 74 | 75 | module.exports.unicode = function (assert) { 76 | var calledThen = false; 77 | 78 | osmosis.get(url + '/follow-utf8') 79 | .follow('a') 80 | .then(function (context) { 81 | assert.equal(context.get('div').textContent, 'true'); 82 | calledThen = true; 83 | }) 84 | .done(function () { 85 | assert.ok(calledThen); 86 | assert.done(); 87 | }) 88 | } 89 | 90 | /* 91 | * DEPRECATED. Use .find(selector).get(callback) instead. 92 | 93 | module.exports.rewrite = function (assert) { 94 | var count = 0; 95 | 96 | osmosis.get(url + '/follow') 97 | .follow('a:internal') 98 | .rewrite(function () { 99 | return '/?page=1'; 100 | }) 101 | .then(function (context) { 102 | assert.ok(context.request.headers.referer); 103 | assert.ok(1 == context.get('div').text()); 104 | }) 105 | .done(function () { 106 | assert.done(); 107 | }); 108 | }; 109 | */ 110 | 111 | // TODO: actually save 112 | /* 113 | module.exports.save = function (assert) { 114 | osmosis.get(url + '/follow') 115 | .follow('a:last') 116 | .then(function () { 117 | assert.ok(true); 118 | }) 119 | .done(function () { 120 | assert.done(); 121 | }); 122 | };*/ 123 | 124 | server('/follow-utf8', function (url, req, res) { 125 | res.setHeader("Content-Type", "text/html; charset=utf-8"); 126 | res.write('समाज-विश्व'); 127 | res.end(); 128 | }); 129 | 130 | server('/समाज-विश्व/test/test test test', function (url, req, res) { 131 | res.setHeader("Content-Type", "text/html"); 132 | res.write('
true
'); 133 | res.end(); 134 | }); 135 | 136 | server('/follow', function (url, req, res) { 137 | res.setHeader("Content-Type", "text/html"); 138 | 139 | if (url.query.page) { 140 | res.write('
' + url.query.page + '
'); 141 | } else { 142 | res.write(''); 150 | } 151 | 152 | res.end(); 153 | }); 154 | 155 | 156 | server('/404', function (url, req, res) { 157 | res.writeHead(404, { "Content-Type": "text/html" }); 158 | res.write(''); 159 | res.end(); 160 | }); 161 | -------------------------------------------------------------------------------- /test/get.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | URL = require('url'), 4 | fs = require('fs'), 5 | url = server.host + ':' + server.port; 6 | 7 | 8 | module.exports.function_url = function (assert) { 9 | osmosis.get(url + '/get') 10 | .then(function (context, data, next) { 11 | data.name = 'test'; 12 | next(context, data); 13 | }) 14 | .get(function (context, data) { 15 | return data.name + '-' + context.querySelector('p').innerText; 16 | }) 17 | .then(function (context) { 18 | assert.ok(context.get('p').text().indexOf('success') !== -1); 19 | }) 20 | .done(function () { 21 | assert.done(); 22 | }); 23 | }; 24 | 25 | module.exports.function_params = function (assert) { 26 | osmosis.get(url + '/test-test') 27 | .then(function (context, data, next) { 28 | data.name = 'test'; 29 | next(context, data); 30 | }) 31 | .get(url + '/get', function (context, data) { 32 | var params = {}; 33 | params[data.name] = context.get('p').text(); 34 | return params; 35 | }) 36 | .then(function (context) { 37 | assert.ok(context.get('div').text().indexOf('success') !== -1); 38 | }) 39 | .done(function () { 40 | assert.done(); 41 | }); 42 | }; 43 | 44 | module.exports.redirect = function (assert) { 45 | var calledThen = false, 46 | logged = false; 47 | 48 | osmosis.get(url + '/get?redirect=true') 49 | .then(function (context) { 50 | calledThen = true; 51 | assert.ok(context.request.headers.referer.length > 0); 52 | assert.equal(context.get('div').text(), 53 | context.location.pathname); 54 | assert.ok(context.get('div').text().indexOf('redirect') !== -1); 55 | }) 56 | .log(function (msg) { 57 | if (msg.indexOf('[redirect]') > -1) { 58 | logged = true; 59 | } 60 | }) 61 | .done(function () { 62 | assert.ok(calledThen); 63 | assert.ok(logged); 64 | assert.done(); 65 | }); 66 | }; 67 | 68 | module.exports.error_404 = function (assert) { 69 | var tries = 5, tried = 0; 70 | 71 | osmosis.get(url + '/get-404') 72 | .config('ignore_http_errors', false) 73 | .config('tries', tries) 74 | .error(function (msg) { 75 | if (msg.indexOf('404') > -1) { 76 | tried++; 77 | } 78 | }) 79 | .done(function () { 80 | assert.strictEqual(tries, tried); 81 | assert.done(); 82 | }); 83 | }; 84 | 85 | module.exports.error_redirect = function (assert) { 86 | var max = 4, logged = 0, errored = 0; 87 | 88 | osmosis.get(url + '/error-redirect') 89 | .config('follow', max) 90 | .config('tries', 1) 91 | .log(function (msg) { 92 | if (msg.indexOf('redirect') > -1) { 93 | logged++; 94 | } 95 | }) 96 | .error(function (msg) { 97 | if (msg.indexOf('Max redirects') > -1) { 98 | errored++; 99 | } 100 | }) 101 | .done(function () { 102 | assert.strictEqual(logged, max); 103 | assert.strictEqual(errored, 1); 104 | assert.done(); 105 | }); 106 | }; 107 | 108 | module.exports.error_parse = function (assert) { 109 | var tries = 4; 110 | 111 | osmosis.get(url + '/error-parse') 112 | .config('tries', tries) 113 | .error(function (msg) { 114 | if (msg.indexOf('empty') > -1) { 115 | tries--; 116 | } 117 | }) 118 | .done(function () { 119 | assert.strictEqual(tries, 0); 120 | assert.done(); 121 | }); 122 | }; 123 | 124 | module.exports.multiple = function (assert) { 125 | var totalRequests = 15, 126 | requests = totalRequests, 127 | results = [], 128 | done = false, 129 | timeout; 130 | 131 | while (requests--) { 132 | osmosis.get(url + '/get?count=' + requests) 133 | .set('div', 'div') 134 | .data(function (data) { 135 | var key = JSON.parse(data.div).count; 136 | 137 | if (results.indexOf(key) === -1) { 138 | results.push(key); 139 | } 140 | }) 141 | .done(function () { 142 | if (results.length === totalRequests) { 143 | clearTimeout(timeout); 144 | if (done === false) { 145 | assert.done(); 146 | done = true; 147 | } 148 | } 149 | }); 150 | } 151 | 152 | timeout = setTimeout(function () { 153 | console.log(results); 154 | assert.equal(results.length, totalRequests); 155 | if (done === false) { 156 | assert.done(); 157 | done = true; 158 | } 159 | }, 5000); 160 | } 161 | 162 | module.exports.absentQueryString = function (assert) { 163 | var found = false; 164 | osmosis.get(url + '/test-query-string') 165 | .then(function (document) { 166 | assert.strictEqual(document.location.href, 'http://' + server.host + ':' + server.port + '/test-query-string'); 167 | }) 168 | .find('div') 169 | .set({ content: 'p' }) 170 | .data(function (data) { 171 | found = true 172 | }) 173 | .done(function () { 174 | assert.ok(found); 175 | assert.done(); 176 | }); 177 | }; 178 | 179 | server('/get', function (url, req, res) { 180 | if (url.query.redirect !== undefined) { 181 | res.writeHead(301, { Location: '/redirect' }); 182 | res.end(); 183 | return; 184 | } 185 | 186 | res.write('

test

' + JSON.stringify(url.query) + '
'); 187 | res.end(); 188 | }); 189 | 190 | server('/get-404', function (url, req, res) { 191 | res.writeHead(404); 192 | res.end(); 193 | }); 194 | 195 | server('/error-redirect', function (url, req, res) { 196 | res.writeHead(301, { Location: '/error-redirect' }); 197 | res.end(); 198 | }); 199 | 200 | server('/error-parse', function (url, req, res) { 201 | res.writeHead(200); 202 | res.end(); 203 | }); 204 | 205 | 206 | server('/redirect', function (url, req, res) { 207 | res.write('
/redirect
'); 208 | res.end(); 209 | }); 210 | 211 | server('/test-test', function (url, req, res) { 212 | res.write('

success

'); 213 | res.end(); 214 | }); 215 | 216 | server('/test-query-string', function (url, req, res) { 217 | if (url.path === '/test-query-string?') { 218 | res.writeHead(404); 219 | res.end(); 220 | return; 221 | } 222 | 223 | res.write('

test

'); 224 | res.end(); 225 | }); 226 | -------------------------------------------------------------------------------- /test/internals.js: -------------------------------------------------------------------------------- 1 | module.exports.Data = function (assert) { 2 | var Data = require('../lib/Data.js'), 3 | parent, child; 4 | 5 | // Data root can be an array 6 | parent = (new Data()).isArray(true); 7 | child = parent.child().set('key', 'value'); 8 | parent.merge(child); 9 | assert.deepEqual(parent.getObject(), [{ key: 'value' }]); 10 | 11 | parent = (new Data()).isArray(true); 12 | 13 | // Child objects are pushed to an array 14 | parent.merge(parent.child().set('key', 'value')); 15 | 16 | // Child arrays are pushed to an array 17 | parent.merge(parent.child() 18 | .isArray(true) 19 | .push('val1') 20 | .push('val2')); 21 | 22 | // Arrays should ignore keys for `.set` 23 | parent.merge(parent.child() 24 | .isArray(true) 25 | .set('key', { 'nested': 'value' })); 26 | 27 | // Arrays shouldn't set based on an index 28 | parent.merge(child.child() 29 | .isArray(true) 30 | .setIndex('noIndexForArrays') 31 | .push('testIndex')); 32 | 33 | // Empty arrays shouldn't be merged 34 | parent.merge(child.child() 35 | .isArray(true)); 36 | 37 | // Empty objects shouldn't be merged 38 | parent.merge(child.child()); 39 | 40 | // Calling `.push` should cast an object to an array 41 | parent.merge(child.child().set('key', 'value').push('convertedToArray')); 42 | 43 | assert.deepEqual( 44 | [ 45 | { 'key': 'value' }, 46 | ['val1', 'val2'], 47 | [ 48 | { 'nested': 'value' } 49 | ], 50 | [ 51 | 'testIndex' 52 | ], 53 | [ 54 | { key: 'value' }, 55 | 'convertedToArray' 56 | ] 57 | ], 58 | parent.getObject()); 59 | 60 | 61 | assert.done(); 62 | }; 63 | -------------------------------------------------------------------------------- /test/login.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | URL = require('url'), 4 | fs = require('fs'), 5 | url = server.host + ':' + server.port, 6 | user = 'user', 7 | pass = 'pass'; 8 | 9 | module.exports.form = function (assert) { 10 | var errors = 0; 11 | 12 | osmosis 13 | .get(url + '/form') 14 | .login(user, pass) 15 | .success('div:contains("authenticated")') 16 | .then(function (context) { 17 | assert.equal(context.get('div').text(), 'authenticated'); 18 | }) 19 | .follow('a') 20 | .then(function (context) { 21 | var div = context.get('div'); 22 | 23 | assert.ok(div); 24 | assert.equal(div.text(), 'done'); 25 | }) 26 | .error(function () { 27 | errors++; 28 | }) 29 | .done(function () { 30 | assert.equal(errors, 0); 31 | assert.done(); 32 | }); 33 | }; 34 | 35 | module.exports.basic_auth = function (assert) { 36 | var errors = 0; 37 | 38 | osmosis 39 | .get(url + '/basic_auth') 40 | .config({ username: user, password: pass }) 41 | .then(function (context) { 42 | assert.equal(context.get('div').text(), 'authenticated'); 43 | }) 44 | .follow('a') 45 | .then(function (context) { 46 | assert.equal(context.get('div').text(), 'done'); 47 | }) 48 | .error(function () { 49 | errors++; 50 | }) 51 | .done(function () { 52 | assert.equal(errors, 0); 53 | assert.done(); 54 | }); 55 | }; 56 | 57 | server('/basic_auth', function (url, req, res) { 58 | var base64, arr; 59 | 60 | if (req.headers.authorization) { 61 | base64 = new Buffer( 62 | req.headers.authorization.replace('Basic ', ''), 63 | 'base64'); 64 | arr = base64.toString().split(':'); 65 | 66 | if (arr[0] != user || arr[1] != pass) { 67 | res.write('
Invalid username or password
'); 68 | } else { 69 | if (url.query.next) { 70 | res.write('
done
'); 71 | } else { 72 | res.write('
authenticated
'); 73 | } 74 | } 75 | } else { 76 | res.writeHead(401, { "Content-Type": "text/html", 77 | "Authorization": 'Basic realm="login"' }); 78 | res.write('
unauthenticated
'); 79 | } 80 | 81 | res.end(); 82 | }); 83 | 84 | server('/form', function (url, req, res, data) { 85 | res.setHeader('Content-Type', 'text/html'); 86 | 87 | if (req.method === 'GET') { 88 | if (url.query.next && req.headers.cookie == 'auth=true') { 89 | res.write('
done
'); 90 | } else { 91 | res.write('' + 92 | '' + 93 | '' + 94 | '' + 95 | '' + 96 | '
'); 97 | } 98 | } else { 99 | if (data.user == user && data.pass == pass) { 100 | res.setHeader('Set-Cookie', 'auth=true; Domain=.yahoo.com'); 101 | res.write('
authenticated
unauthenticated'); 104 | } 105 | } 106 | 107 | res.end(); 108 | }); 109 | -------------------------------------------------------------------------------- /test/paginate.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'); 2 | var server = require('./server'); 3 | var URL = require('url'); 4 | 5 | var url = server.host + ':' + server.port; 6 | 7 | module.exports.link = function (assert) { 8 | var count = 0; 9 | 10 | osmosis.get(url + '/paginate') 11 | .paginate('a[rel="next"]', 3) 12 | .set('page', 'div') 13 | .then(function (context, data) { 14 | var params = context.request.params; 15 | var page = (params && params.page) || 1; 16 | 17 | assert.equal(page, data.page); 18 | assert.equal(page, ++count); 19 | }) 20 | .done(function () { 21 | assert.ok(count > 1); 22 | assert.done(); 23 | }); 24 | }; 25 | 26 | module.exports.param = function (assert) { 27 | var count = 0; 28 | 29 | osmosis.get(url + '/paginate', { page: 1 }) 30 | .paginate({ page: +1 }, 3) 31 | .set('page', 'div') 32 | .then(function (context, data) { 33 | var params = context.request.params; 34 | var page = (params && params.page) || 1; 35 | 36 | assert.equal(page, data.page); 37 | assert.equal(page, ++count); 38 | }) 39 | .done(function () { 40 | assert.ok(count > 1); 41 | assert.done(); 42 | }); 43 | }; 44 | 45 | module.exports.form = function (assert) { 46 | var count = 0; 47 | 48 | osmosis.get(url + '/paginate') 49 | .paginate('form', 3) 50 | .set('page', 'div') 51 | .then(function (context, data) { 52 | var params = context.request.params; 53 | var page = (params && params.page) || 1; 54 | 55 | assert.ok(page == data.page); 56 | assert.ok(page == ++count); 57 | }) 58 | .done(function () { 59 | assert.ok(count > 1); 60 | assert.done(); 61 | }); 62 | }; 63 | 64 | module.exports.func_url = function (assert) { 65 | var count = 0; 66 | 67 | osmosis.get(url + '/paginate', { page: 1 }) 68 | .paginate(function (document, data) { 69 | return document.request.pathname + '?page=' + 70 | (parseInt(document.request.query.page, 10) + 1); 71 | }, 3) 72 | .set('page', 'div') 73 | .then(function (context, data) { 74 | var params = context.request.params; 75 | var page = (params && params.page) || 1; 76 | 77 | assert.equal(page, data.page); 78 | assert.equal(page, ++count); 79 | }) 80 | .done(function () { 81 | assert.ok(count > 1); 82 | assert.done(); 83 | }); 84 | }; 85 | 86 | module.exports.func_obj = function (assert) { 87 | var count = 0; 88 | 89 | osmosis.get(url + '/paginate', { page: 1 }) 90 | .paginate(function (document, data) { 91 | return { 92 | page: (parseInt(document.request.query.page, 10) + 1) 93 | }; 94 | }, 3) 95 | .set('page', 'div') 96 | .then(function (context, data) { 97 | var params = context.request.params; 98 | var page = (params && params.page) || 1; 99 | 100 | assert.equal(page, data.page); 101 | assert.equal(page, ++count); 102 | }) 103 | .done(function () { 104 | assert.ok(count > 1); 105 | assert.done(); 106 | }); 107 | }; 108 | 109 | server('/paginate', function (url, req, res, data) { 110 | res.setHeader("Content-Type", "text/html"); 111 | var page = 1; 112 | 113 | if (data && data.page) 114 | page = data.page; 115 | else if (url.query.page) 116 | page = url.query.page; 117 | res.write('
' + page + '
\ 118 |
'); 119 | res.end(); 120 | }); 121 | -------------------------------------------------------------------------------- /test/parse.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | html = ''; 3 | 4 | module.exports.html = function (assert) { 5 | osmosis.parse(html) 6 | .then(function (context) { 7 | assert.equal(context.find('body').length, 1); 8 | }) 9 | .done(function () { 10 | assert.done(); 11 | }); 12 | }; 13 | 14 | module.exports.base_url = function (assert) { 15 | osmosis.parse(html, { baseUrl: 'test.com' }) 16 | .then(function (document) { 17 | assert.ok(document.location.href); 18 | }) 19 | .done(function () { 20 | assert.done(); 21 | }); 22 | }; 23 | -------------------------------------------------------------------------------- /test/process_response_option.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | url = server.host + ':' + server.port; 4 | 5 | 6 | module.exports.process_response_default_none = function (assert) { 7 | test_process_response( 8 | '/response-code-200', 'hi', undefined, assert, 9 | false 10 | ); 11 | }; 12 | 13 | module.exports.process_response_fail_on_200 = function (assert) { 14 | test_process_response( 15 | '/response-code-200', undefined, '200-die', assert, 16 | function(d, r, n, c) { r.statusCode === 200 ? c('200-die') : n(d); } 17 | ); 18 | }; 19 | module.exports.process_response_fail_on_incomplete_html = function (assert) { 20 | test_process_response( 21 | '/response-code-no-body-end', undefined, 'no-body-end', assert, 22 | function(d, r, n, c) { d.toString('utf8').indexOf('') === -1 ? c('no-body-end') : n(d); } 23 | ); 24 | }; 25 | module.exports.process_response_bold_to_italic = function (assert) { 26 | test_process_response( 27 | '/response-bold-hi', 'hi', undefined, assert, 28 | function(d, r, n) { n(d.toString('utf8').replace(/b>/g, 'i>')); } 29 | ); 30 | }; 31 | module.exports.process_response_bold_to_italic_sync = function (assert) { 32 | test_process_response( 33 | '/response-bold-hi', 'hi', undefined, assert, 34 | function(d) { return d.toString('utf8').replace(/b>/g, 'i>'); } 35 | ); 36 | }; 37 | 38 | function test_process_response(req_url, expected_data, expected_error, assert, process_response_option) { 39 | var result_data, result_error; 40 | var opts = {parse: false}; 41 | if (process_response_option) { 42 | opts.process_response = process_response_option; 43 | } 44 | osmosis 45 | .get(url + req_url) 46 | .config(opts) 47 | .then(function (data) { 48 | result_data = data.toString('utf8'); 49 | }) 50 | .error(function (error) { 51 | result_error = error; 52 | }) 53 | .done(function () { 54 | assert.equal(result_data, expected_data); 55 | assert.ok(result_error == expected_error || result_error.indexOf(expected_error) > -1); 56 | assert.ok(true); 57 | assert.done(); 58 | }); 59 | } 60 | 61 | server('/response-code-200', function (url, req, res) { 62 | res.writeHead(200); 63 | res.end('hi'); 64 | }); 65 | server('/response-bold-hi', function (url, req, res) { 66 | res.writeHead(200); 67 | res.end('hi'); 68 | }); 69 | server('/response-code-no-body-end', function (url, req, res) { 70 | res.writeHead(200); 71 | res.end('but no end body'); 72 | }); 73 | -------------------------------------------------------------------------------- /test/proxy.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'); 2 | var server = require('./server'); 3 | var http = require('http'); 4 | 5 | 6 | var url = server.host + ':' + server.port; 7 | 8 | var proxy = function (request, response) { 9 | var req = http.request(request.url + (request.url.indexOf('?') == -1 ? '?' : '&') + 'proxy=' + request.socket.localPort); 10 | 11 | req.addListener('response', function (res) { 12 | res.addListener('data', function (chunk) { 13 | response.write(chunk, 'binary'); 14 | }); 15 | res.addListener('end', function () { 16 | response.end(); 17 | }); 18 | response.writeHead(res.statusCode, res.headers); 19 | }); 20 | request.addListener('data', function (chunk) { 21 | req.write(chunk, 'binary'); 22 | }); 23 | request.addListener('end', function () { 24 | req.end(); 25 | }); 26 | }; 27 | 28 | var proxies = []; 29 | 30 | for (var port = 8080; port < 8090; port++) { 31 | proxies.push(http.createServer(proxy).listen(port)); 32 | } 33 | 34 | module.exports.config = function (assert) { 35 | osmosis.get(url + '/proxy') 36 | .config('proxy', '127.0.0.1:8080') 37 | .then(function (context) { 38 | assert.ok(context.get('div').text() == '8080'); 39 | }) 40 | .done(function () { 41 | assert.done(); 42 | }); 43 | }; 44 | 45 | module.exports.macro = function (assert) { 46 | osmosis.get(url + '/proxy') 47 | .proxy('127.0.0.1:8080') 48 | .then(function (context) { 49 | assert.ok(context.get('div').text() == '8080'); 50 | }) 51 | .done(function () { 52 | assert.done(); 53 | }); 54 | }; 55 | 56 | module.exports.multiple = function (assert) { 57 | var p = []; 58 | 59 | proxies.forEach(function (proxy) { 60 | p.push('localhost:' + proxy.address().port); 61 | }); 62 | 63 | osmosis.get(url + '/proxy') 64 | .config('tries', p.length) 65 | .proxy(p) 66 | .then(function (context) { 67 | assert.equal(context.get('div').text(), '8080'); 68 | }) 69 | .get('/proxy?err=true') 70 | .done(function () { 71 | assert.equal(p.length, 1); 72 | proxies.forEach(function (proxy) { 73 | proxy.close(); 74 | }); 75 | assert.done(); 76 | }); 77 | }; 78 | 79 | server('/proxy', function (url, req, res) { 80 | if (url.query.err !== undefined) { 81 | res.writeHead(500); 82 | res.end(); 83 | return; 84 | } 85 | 86 | res.setHeader("Content-Type", "text/html"); 87 | res.write('
' + url.query.proxy + '
'); 88 | res.end(); 89 | }); 90 | -------------------------------------------------------------------------------- /test/resume.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | url = server.host + ':' + server.port, 4 | pages = 50; 5 | 6 | module.exports.pause = function (assert) { 7 | var paused = false, 8 | count = 0, 9 | instance = 10 | new osmosis.get(url + '/pause') 11 | .follow('a') 12 | .then(function () { 13 | assert.ok(!paused); 14 | count++; 15 | }) 16 | .done(function () { 17 | assert.equal(count, pages); 18 | assert.ok(!paused); 19 | assert.done(); 20 | }); 21 | 22 | instance.run(); 23 | 24 | setTimeout(function () { 25 | paused = true; 26 | assert.ok(count > 0); 27 | assert.ok(count < pages); 28 | instance.pause(); 29 | 30 | setTimeout(function () { 31 | paused = false; 32 | instance.resume(); 33 | }, 300); 34 | }, 300); 35 | }; 36 | 37 | server('/pause', function (url, req, res) { 38 | var i = 0, out = ''; 39 | 40 | res.setHeader("Content-Type", "text/html"); 41 | 42 | for (; i < pages; i++) { 43 | out += ''; 44 | } 45 | 46 | res.write(out); 47 | 48 | setTimeout(function () { 49 | res.end(); 50 | }, 50); 51 | }); 52 | -------------------------------------------------------------------------------- /test/run.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | url = server.host + ':' + server.port; 4 | 5 | var name = function () { 6 | return true; 7 | }; 8 | 9 | module.exports.immediate = function (assert) { 10 | var calledThen = false; 11 | 12 | new osmosis(url + '/run') 13 | .then(function (context, data, next, done) { 14 | assert.equal(context.get('div').textContent, 'loaded'); 15 | calledThen = true; 16 | next(context, data); 17 | done(); 18 | }) 19 | .done(function () { 20 | assert.ok(calledThen); 21 | assert.done(); 22 | }).run(); 23 | }; 24 | 25 | module.exports.multiple = function (assert) { 26 | var count = 0, r1, r2, 27 | instance = 28 | new osmosis.get(url + '/run') 29 | .then(function () { 30 | count++; 31 | }) 32 | .done(function () { 33 | if (count === 2) { 34 | assert.done(); 35 | } 36 | }); 37 | 38 | r1 = instance.run(); 39 | r2 = instance.run(); 40 | }; 41 | 42 | module.exports.new_instance_command = function (assert) { 43 | var calledThen = false, 44 | calledCB = false, 45 | instance = 46 | new osmosis.get(url + '/run') 47 | .then(function (context, data, next, done) { 48 | assert.equal(context.get('div').textContent, 'loaded'); 49 | calledThen = true; 50 | next(context, data); 51 | done(); 52 | }) 53 | .done(function () { 54 | assert.ok(calledCB); 55 | assert.ok(calledThen); 56 | assert.done(); 57 | }); 58 | 59 | setTimeout(function () { 60 | calledCB = true; 61 | instance.run(); 62 | }, 500); 63 | }; 64 | 65 | module.exports.new_instance_get = function (assert) { 66 | var calledThen = false, 67 | calledCB = false, 68 | instance = 69 | new osmosis(url + '/run') 70 | .then(function (context, data, next, done) { 71 | assert.equal(context.get('div').textContent, 'loaded'); 72 | calledThen = true; 73 | next(context, data); 74 | done(); 75 | }) 76 | .done(function () { 77 | assert.ok(calledCB); 78 | assert.ok(calledThen); 79 | assert.done(); 80 | }); 81 | 82 | setTimeout(function () { 83 | calledCB = true; 84 | instance.run(); 85 | }, 500); 86 | }; 87 | 88 | server('/run', function (url, req, res) { 89 | res.setHeader("Content-Type", "text/html"); 90 | res.write('
loaded
'); 91 | res.end(); 92 | }); 93 | -------------------------------------------------------------------------------- /test/save.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rchipka/node-osmosis/baed7239fc5c22ea8d00a5d2dc45f97b2d64b5c5/test/save.js -------------------------------------------------------------------------------- /test/server/index.js: -------------------------------------------------------------------------------- 1 | var http = require('http'), 2 | URL = require("url"), 3 | qs = require("querystring"), 4 | host = 'localhost', 5 | port = 1337, 6 | paths = {}, 7 | server; 8 | 9 | server = http.createServer(function (req, res) { 10 | var url = URL.parse(req.url, true), 11 | uri = decodeURIComponent(url.pathname), 12 | postData = ''; 13 | 14 | if (paths[uri] !== undefined) { 15 | if (req.method === 'POST') { 16 | req.on('data', function (chunk) { 17 | postData += chunk.toString(); 18 | }); 19 | req.on('end', function () { 20 | if (!req.headers['content-type'] || req.headers['content-type'].indexOf('multipart') !== 0) 21 | postData = qs.parse(postData); 22 | paths[uri](url, req, res, postData); 23 | }); 24 | } else { 25 | paths[uri](url, req, res); 26 | } 27 | } else { 28 | res.writeHead(404); 29 | res.end(); 30 | } 31 | }); 32 | 33 | server.on('error', function () { 34 | console.log("ERROR:", error); 35 | }); 36 | 37 | server.listen(port); 38 | 39 | module.exports = function (path, cb) { 40 | if (paths[path]) { 41 | throw new Error("Path " + path + " exists"); 42 | } 43 | 44 | paths[path] = cb; 45 | }; 46 | 47 | module.exports.host = host; 48 | module.exports.port = port; 49 | module.exports.close = function () { 50 | server.close(); 51 | }; 52 | -------------------------------------------------------------------------------- /test/set.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | URL = require('url'), 4 | fs = require('fs'), 5 | expected = { 6 | title: "TITLE", 7 | content: "CONTENT", 8 | innerHTML: 'TITLE', 10 | source: 'TITLE', 11 | object: { 12 | id: 'content' 13 | }, 14 | array: 15 | ['TITLE', 16 | { first_link: '/1' }, 17 | 'TITLE', 18 | 'TITLE', 19 | { all_links: ['/1', '/2'] }, 20 | { title: 'TITLE' }], 21 | find: 'CONTENT', 22 | find_arr: ['/1', '/2'], 23 | get: { 24 | title: "1" 25 | }, 26 | follow: [ 27 | { title: "1" }, 28 | { title: "2" } 29 | ], 30 | follow_array: [ 31 | "/1", 32 | "/2", 33 | { title: "1" }, 34 | { title: "2" } 35 | ], 36 | get_follow: [ 37 | { page: "2", 38 | title: "1" }, 39 | { page: "3", 40 | title: "1" } 41 | ], 42 | get_nested_follow: { 43 | pages: [ 44 | { page: "2" }, 45 | { page: "3" }], 46 | title: "1" 47 | }, 48 | then: { called: true }, 49 | then_multiple: [1, 2, 3], 50 | then_none: {}, 51 | //then_none_done: [{}, {}], 52 | then_new_context: 'TITLE', 53 | then_new_data: [1, 2, 3] 54 | }, 55 | expected_array_root = [ 56 | '/1', 57 | '/2', 58 | { href: '/1', name: '1' }, 59 | { href: '/2', name: '2' }, 60 | [[['/1']]] 61 | ], 62 | expected_callbacks = { 63 | links: [ 64 | { url: '/1', link: 1 }, 65 | { url: '/2', link: 2 } 66 | ], 67 | page2: { title: 2 } 68 | }, 69 | url = server.host + ':' + server.port; 70 | 71 | module.exports.array_root = function (assert) { 72 | var calledThen = false, calledData = false; 73 | 74 | osmosis.get(url + '/set') 75 | .set([ 76 | 'a@href', 77 | osmosis.find('a').set('name').set('href', '@href'), 78 | [[['a:first@href']]] 79 | ]) 80 | .then(function (context, data) { 81 | calledThen = true; 82 | assert.ok(Array.isArray(data)); 83 | }) 84 | .data(function (data) { 85 | calledData = true; 86 | assert.deepEqual(data, expected_array_root); 87 | }) 88 | .done(function () { 89 | assert.ok(calledThen); 90 | assert.ok(calledData); 91 | assert.done(); 92 | }); 93 | }; 94 | 95 | module.exports.callbacks = function (assert) { 96 | var calledThen = false, calledData = false; 97 | 98 | osmosis.get(url + '/set') 99 | .set({ 100 | links: osmosis.find('a') 101 | .set('link', function (link) { 102 | return parseInt(link.innerHTML); 103 | }) 104 | .set('url', function (link) { 105 | return link.getAttribute("href"); 106 | }), 107 | page2: osmosis.get(function (doc) { 108 | return doc.querySelector('a:last'); 109 | }).set('title', 'title') 110 | }) 111 | .then(function () { 112 | calledThen = true; 113 | }) 114 | .data(function (data) { 115 | calledData = true; 116 | assert.deepEqual(data, expected_callbacks); 117 | }) 118 | .done(function () { 119 | assert.ok(calledThen); 120 | assert.ok(calledData); 121 | assert.done(); 122 | }); 123 | }; 124 | 125 | module.exports.nested = function (assert) { 126 | var calledThen = false, calledData = false; 127 | 128 | osmosis.get(url + '/set') 129 | .set({ 130 | title: 'title', 131 | content: '#content', 132 | fake: 'fake-selector', 133 | innerHTML: 'head:html', 134 | source: 'title:source', 135 | object: { 136 | id: 'div@id', 137 | fake: 'fake-selector' 138 | }, 139 | array: [ 140 | 'title', 141 | { first_link: 'a:first@href' }, 142 | osmosis.find('title'), 143 | osmosis.then(function (context, data, next) { 144 | next(context.get('title'), data); 145 | }), 146 | { all_links: ['a@href'] }, 147 | osmosis.find('title').set('title') 148 | ], 149 | find: osmosis.find('div'), 150 | find_arr: osmosis.find('a@href'), 151 | get: osmosis.get('/1').set({ title: 'title' }), 152 | get_fail: osmosis.get('/notfound').set({ title: 'title' }), 153 | follow: osmosis.follow('a').set({ title: 'title' }), 154 | follow_fail: osmosis.follow('fake-selector').set({ title: 'title' }), 155 | follow_array: [ 156 | 'a@href', 157 | 'fake-selector', 158 | osmosis.follow('a').set({ title: 'title' }) 159 | ], 160 | get_follow: 161 | osmosis('/1') 162 | .set({ title: 'title' }) 163 | .follow('a') 164 | .set({ page: 'title' }), 165 | get_nested_follow: osmosis.get('/1').set({ 166 | title: 'title', 167 | pages: osmosis.follow('a').set({ page: 'title' }) 168 | }), 169 | then: osmosis.then(function (context, data, next) { 170 | data.called = true; 171 | next(context, data); 172 | }), 173 | then_multiple: osmosis.then(function (context, data, next, done) { 174 | var i = 1; 175 | 176 | data.called = true; 177 | 178 | for (; i <= 3; i++) { 179 | next(context, i); 180 | } 181 | 182 | done(); 183 | }), 184 | then_new_data: osmosis.then(function (context, data, next) { 185 | next(context, [1, 2, 3]); 186 | }), 187 | then_new_context: osmosis.then(function (context, data, next) { 188 | next(context.get('title'), data); 189 | }), 190 | then_none: osmosis.then(function () { 191 | }) 192 | /*then_done_none: osmosis.then(function(context, data, next, done) { 193 | setTimeout(function() { 194 | next(context, data); 195 | setTimeout(function() { 196 | next(context, data); 197 | done(); 198 | }, 200); 199 | }, 350) 200 | }),*/ 201 | }) 202 | .then(function (context, data, next) { 203 | calledThen = true; 204 | assert.equal(context, context.doc()); 205 | next(context, data); 206 | }) 207 | .data(function (data) { 208 | calledData = true; 209 | assert.deepEqual(data, expected); 210 | }) 211 | .done(function () { 212 | assert.ok(calledThen); 213 | assert.ok(calledData); 214 | assert.done(); 215 | }); 216 | }; 217 | 218 | server('/set', function (url, req, res) { 219 | res.setHeader("Content-Type", "text/html"); 220 | res.write('TITLE' + 221 | '
CONTENT
' + 222 | '12'); 223 | res.end(); 224 | }); 225 | 226 | server('/1', function (url, req, res) { 227 | res.setHeader("Content-Type", "text/html"); 228 | res.write('1' + 229 | ''); 230 | setTimeout(function () { 231 | res.end(); 232 | }, 500); 233 | }); 234 | 235 | server('/2', function (url, req, res) { 236 | res.setHeader("Content-Type", "text/html"); 237 | res.write('2'); 238 | setTimeout(function () { 239 | res.end(); 240 | }, 250); 241 | }); 242 | 243 | server('/3', function (url, req, res) { 244 | res.setHeader("Content-Type", "text/html"); 245 | res.write('3'); 246 | res.end(); 247 | }); 248 | -------------------------------------------------------------------------------- /test/stop.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'), 2 | server = require('./server'), 3 | url = server.host + ':' + server.port; 4 | 5 | module.exports.stop = function (assert) { 6 | var error = false, count = 0, instance = 7 | osmosis.get(url + '/delay-response') 8 | .follow('a') 9 | .follow('a') 10 | .follow('a') 11 | .log(function (msg) { 12 | if (msg.indexOf('loaded') > -1) { 13 | if (++count === 2) { 14 | instance.stop(); 15 | } 16 | } 17 | }) 18 | .then(function () { 19 | error = true; 20 | }) 21 | .done(function () { 22 | assert.equal(count, 2); 23 | assert.equal(error, false); 24 | assert.ok(true); 25 | assert.done(); 26 | }); 27 | }; 28 | 29 | 30 | server('/delay-response', function (url, req, res) { 31 | res.setHeader("Content-Type", "text/html"); 32 | res.end(''); 33 | }); 34 | -------------------------------------------------------------------------------- /test/submit.js: -------------------------------------------------------------------------------- 1 | var osmosis = require('../index'); 2 | var server = require('./server'); 3 | var fs = require('fs'); 4 | var URL = require('url'); 5 | 6 | var url = server.host + ':' + server.port; 7 | 8 | /* 9 | * TODO: Add radio button tests 10 | * Add input[name] case-insensitivity tests 11 | */ 12 | 13 | module.exports.form1 = function (assert) { 14 | var calledThen = false; 15 | 16 | osmosis.get(url + '/submit-form') 17 | .submit('form') 18 | .then(function (context) { 19 | calledThen = true; 20 | assert.deepEqual(JSON.parse(context.get('#data').text()), getInputs(1, 'sub1')); 21 | }) 22 | .done(function () { 23 | assert.ok(calledThen); 24 | assert.done(); 25 | }); 26 | }; 27 | 28 | module.exports.form2 = function (assert) { 29 | var calledThen = false; 30 | 31 | osmosis.get(url + '/submit-form') 32 | .submit('form[2]') 33 | .then(function (context) { 34 | calledThen = true; 35 | assert.deepEqual(JSON.parse(context.get('#data').text()), getInputs(2, 'sub1')); 36 | }) 37 | .done(function () { 38 | assert.ok(calledThen); 39 | assert.done(); 40 | }); 41 | }; 42 | 43 | module.exports.button = function (assert) { 44 | var calledThen = false; 45 | 46 | osmosis.get(url + '/submit-form') 47 | .submit('form:first [name="sub2"]') 48 | .then(function (context) { 49 | calledThen = true; 50 | assert.deepEqual(JSON.parse(context.get('#data').text()), getInputs(1, 'sub2')); 51 | }) 52 | .done(function () { 53 | assert.ok(calledThen); 54 | assert.done(); 55 | }); 56 | }; 57 | 58 | module.exports.form_attr = function (assert) { 59 | var calledThen = false; 60 | var inputs = getInputs(1); 61 | 62 | inputs['sub2'] = 'Submit Query'; 63 | osmosis.get(url + '/submit-form') 64 | .submit('form[2] [name="sub2"]') 65 | .then(function (context) { 66 | calledThen = true; 67 | assert.deepEqual(JSON.parse(context.get('#data').text()), inputs); 68 | }) 69 | .done(function () { 70 | assert.ok(calledThen); 71 | assert.done(); 72 | }); 73 | }; 74 | 75 | module.exports.context_data = function (assert) { 76 | var calledThen = false; 77 | var inputs = getInputs(2, 'sub1'); 78 | 79 | inputs['it1'] = 'success'; 80 | osmosis.get(url + '/submit-form') 81 | .submit('form[2]', function(context) { 82 | return {it1: context.get('#dynamic-data').text()}; 83 | }) 84 | .then(function (context) { 85 | calledThen = true; 86 | assert.deepEqual(JSON.parse(context.get('#data').text()), inputs); 87 | }) 88 | .done(function () { 89 | assert.ok(calledThen); 90 | assert.done(); 91 | }); 92 | }; 93 | 94 | module.exports.multipart = function (assert) { 95 | var calledThen = false; 96 | 97 | osmosis.get(url + '/submit-form') 98 | .submit('form[2] [name="sub3"]', { image: { file: __dirname + '/submit.js', content_type: 'application/javascript' } }) 99 | .then(function (context) { 100 | calledThen = true; 101 | assert.equal(context.get('div').text(), 'success'); 102 | }) 103 | .done(function () { 104 | assert.ok(calledThen); 105 | assert.done(); 106 | }); 107 | }; 108 | 109 | function getInputs(form, submit) { 110 | var obj = {}, 111 | input, 112 | exclude = exclude || []; 113 | 114 | inputs = (form === 2) ? 115 | inputs2 : 116 | inputs1; 117 | 118 | for (input in inputs) { 119 | if (input.substr(0, 3) === 'sub' && input !== submit) { 120 | continue; 121 | } 122 | 123 | if (inputs[input].value === undefined) { 124 | continue; 125 | } 126 | 127 | obj[input] = inputs[input].value; 128 | } 129 | 130 | return obj; 131 | } 132 | 133 | var inputs1 = { 134 | 's1': { 135 | html: '', 136 | value: '2' 137 | }, 138 | 's2': { 139 | html: '', 140 | value: 'two' 141 | }, 142 | 'cb1': { 143 | html: '', 144 | value: undefined 145 | }, 146 | 'cb2': { 147 | html: '', 148 | value: 'two' 149 | }, 150 | 'cb3[0]': { 151 | html: '', 152 | value: 'one' 153 | }, 154 | 'cb3[1]': { 155 | html: '', 156 | value: 'on' 157 | }, 158 | 'cb3[2]': { 159 | html: '', 160 | value: 'on' 161 | }, 162 | 'it': { 163 | html: '', 164 | value: undefined 165 | }, 166 | 'ta': { 167 | html: '', 168 | value: 'text area test' 169 | }, 170 | 'sub1': { 171 | html: '', 172 | value: 'submit' 173 | }, 174 | 'sub2': { 175 | html: '', 176 | value: 'Submit 2' 177 | } 178 | }; 179 | 180 | var inputs2 = { 181 | 'it1': { 182 | html: '', 183 | value: 'test' 184 | }, 185 | 'sub2': { 186 | html: '', 187 | value: 'Submit Query' 188 | }, 189 | 'sub1': { 190 | html: '