├── .gitignore
├── .npmignore
├── .travis.yml
├── Changes.md
├── Readme.md
├── benchmark
├── index.js
├── server
│ ├── index.html
│ └── index.js
└── tests
│ ├── osmosis.js
│ └── x-ray.js
├── index.js
├── jsdoc.json
├── lib
├── Command.js
├── Data.js
├── Form.js
├── Queue.js
├── Request.js
└── commands
│ ├── click.js
│ ├── config.js
│ ├── contains.js
│ ├── cookie.js
│ ├── data.js
│ ├── delay.js
│ ├── do.js
│ ├── done.js
│ ├── fail.js
│ ├── filter.js
│ ├── find.js
│ ├── follow.js
│ ├── get.js
│ ├── header.js
│ ├── headers.js
│ ├── if.js
│ ├── learn.js
│ ├── login.js
│ ├── match.js
│ ├── paginate.js
│ ├── parse.js
│ ├── pause.js
│ ├── proxy.js
│ ├── resume.js
│ ├── rewrite.js
│ ├── set.js
│ ├── stop.js
│ ├── submit.js
│ ├── success.js
│ ├── then.js
│ ├── train.js
│ ├── trigger.js
│ ├── use.js
│ └── using.js
├── package-lock.json
├── package.json
└── test
├── click.js
├── config.js
├── do.js
├── filters.js
├── find.js
├── follow.js
├── get.js
├── internals.js
├── login.js
├── paginate.js
├── parse.js
├── process_response_option.js
├── proxy.js
├── resume.js
├── run.js
├── save.js
├── server
└── index.js
├── set.js
├── stop.js
├── submit.js
├── then.js
├── user_agent_option.js
└── z_close.js
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | docs/
3 | npm-debug.log
4 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | node_modules
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | # - "0.10"
4 | - 8
5 |
--------------------------------------------------------------------------------
/Changes.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | #### TODO:
4 |
5 | * Add `.learn()` to generate a selector for a selected node
6 | * Add `.listen()` for easily creating DOM event listeners
7 | * Add `.trigger()` for easily triggering DOM events
8 | * Add `.on()` for binding callback to a local-only event
9 | * Add `.url()` to set the current URL
10 | * Add `.params()` to set the current URL parameters
11 | * Add `.save()` to save response data to a file
12 | * Add `.add()`, `.remove()` for node creation/deletion?
13 | * Add `.scroll()` to scrape infinite scroll pages
14 | * Add warnings for parser errors?
15 | * Switch to semantic versioning?
16 |
17 | ## Next major release:
18 |
19 | * Event/error handling
20 | * Error.code = 404, 'timeout', etc.
21 | * Error.module = 'http', 'dom', etc.
22 | * return true = retry, false = stop, anything else = continue
23 | * Event for discontinued context/data
24 | * Module system using osmosis.require and modules prefixed with `osmosis-`
25 | * Way to trigger DOM
26 | * Throw unhandled errors?
27 | * `.while()` to do things more than once as long as they call next()
28 |
29 | ## 0.1.5
30 |
31 | * Fixed bug where .get() without `params` caused empty query string ('?')
32 | * Preserve sort order for `.follow()` results within `.set()`
33 |
34 | ## 0.1.4
35 |
36 | #### `get`
37 |
38 | * Removed `opts` and `callback` arguments
39 |
40 | #### `set`
41 |
42 | * Supports an array as the root data object
43 | * Fixed case where nested `.find` searches the entire document
44 |
45 | ## 0.1.3
46 |
47 | * parseHtml uses `huge` option by default
48 | * Fixed nested Osmosis instances inside `set`
49 | * Update to `libxmljs-dom` v0.0.5
50 |
51 | #### `set`
52 |
53 | * Fixed nested Osmosis instances inside `set`
54 | * Added tests for nested set data
55 |
56 | #### `submit`
57 |
58 | * Proper `submit` button handling
59 | * Accepts a `submit` button selector as the first argument
60 | * Supports `submit` button attributes: "form", "formaction", "formenctype" and "formmethod"
61 | * Added tests for `submit` button handling
62 |
63 | ## 0.1.2
64 |
65 | * Update to `libxmljs-dom` v0.0.4
66 |
67 | ## 0.1.1
68 |
69 | * `proxy` option can now be an array of multiple proxies
70 |
71 | #### `proxy`
72 |
73 | * Added `.proxy()` to easily set the `proxy` configuration option
74 |
75 | #### `then`
76 |
77 | * If the first argument's name is:
78 | * "document" - The callback is given the current document
79 | * "window" - The callback is given the Window object
80 | * "$" - The callback is given a jQuery object (if available)
81 |
82 | ### Internal changes:
83 |
84 | * Uses 'use strict'
85 | * Minimize use of array.forEach
86 | * Added libxml specific memoryUsage monitoring
87 | * Switched to static `libxmljs-dom` version
88 |
89 | ## 0.1.0
90 |
91 | * Added `ignore_http_errors` option
92 | * Added `:internal` for selecting internal links
93 | * Added `:external` for selecting external links
94 | * Added `:domain` for searching by domain name
95 | * Added `:path` for searching by path
96 |
97 | #### `config`
98 |
99 | * Configuration options are inherited down the chain
100 |
101 | #### `contains`
102 |
103 | * Added `.contains(string)` to discard nodes whose contents do not match `string`
104 |
105 | #### `do`
106 |
107 | * Added `.do()` to call one or more commands using the current context
108 |
109 | #### `failure` (or `fail`)
110 |
111 | * Added `.failure(selector)` to discard nodes that match the given selector
112 |
113 | #### `filter` (or `success`)
114 |
115 | * Added `.filter(selector)` to discard nodes that do not match the given selector
116 |
117 | #### `get`
118 |
119 | * Accepts a tokenized URL string
120 | * @{...} - Request info (url, method, params, headers, etc.)
121 | * %{...} - `data` object
122 | * ${...} - `context` search
123 |
124 | #### `headers` (or `header`)
125 |
126 | * Added `headers({ key: value })` and `header(key, value)` to set HTTP headers
127 |
128 | #### `match`
129 |
130 | * Added `.match([selector], RegExp)` to discard nodes whose contents do not match
131 |
132 | #### `rewrite`
133 |
134 | * Added `.rewrite(callback)` to set a URL rewriting function for the preceding request
135 |
136 | ### Internal changes:
137 |
138 | * `promise.args` is now an object (used to be an array)
139 | * HTTP 400 errors are now logged and the requests are retried.
140 |
141 | ## 0.0.9
142 |
143 | * DOM and css2xpath functionality have been moved to `libxmljs-dom`
144 | * Added `keep_data` option to retain the original HTTP response
145 | * Added `process_response` option for processing data before parsing
146 | * Added test suite
147 |
148 | #### `click`
149 |
150 | * Added `.click()` for interacting with JS-only content
151 |
152 | #### `delay`
153 |
154 | * Added `.delay(n)` for waiting n seconds before calling next. Accepts a decimal value.
155 |
156 | #### `find`
157 |
158 | * Accepts an array of selectors as the first argument
159 |
160 | #### `follow`
161 |
162 | * Accepts second argument. Boolean (true = follow external links) or a URL rewriting function.
163 |
164 | #### `get`
165 |
166 | * Accepts `function(context, data)` as the first argument. The function must return a URL string.
167 |
168 | #### `parse`
169 |
170 | * Added second argument to associate a base-url to the document
171 |
172 | #### `then`
173 |
174 | * Added optional `done` argument
175 |
176 | #### `select`
177 |
178 | * Added `.select` for finding elements within the current context
179 |
180 | #### `set`
181 |
182 | * Replaces previously set values
183 |
184 | ### Internal changes:
185 |
186 | * Enhanced stack counting
187 | * Added data object ref counting
188 | * Added domain specific cookie handling
189 | * Improved stability of deep instance nesting with `.set()`
190 | * Osmosis instances operate more independently
191 | * Request queues are now a single array for each instance
192 | * Promises must accept and call `done` if they asynchronously
193 | send more than one output context per input context
194 | * If `.then` sends more than one output context per input context,
195 | then it must accept `done()` as its last argument and
196 | call it after calling `next()` for the last time.
197 |
198 | ## 0.0.8
199 |
200 | #### `config`
201 |
202 | * Ensure non-default `needle` options propagate
203 |
204 | ## 0.0.7
205 |
206 | #### `paginate`
207 |
208 | * Added a more intuitive method for pagination
209 |
210 | #### `submit`
211 |
212 | * Added easy form submission
213 |
214 | #### `login`
215 |
216 | * Added easy login support
217 |
218 | #### `pause`, `resume`, `stop`
219 |
220 | * Added pause, resume, and stop functionality
221 |
222 | #### `find`
223 |
224 | * Searches the entire document by default
225 |
226 | #### `set`
227 |
228 | * Supports innerHTML using `:html` or `:source` in selectors
229 | * Supports deep JSON structures and nested Osmosis instances
230 |
231 | #### `data`
232 |
233 | * `.data(null)` clears the data object
234 | * `.data({})` appends keys to data object
235 |
236 | #### `dom`
237 |
238 | * `.dom()` is continuing progress and can now run jQuery
239 |
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 | # Osmosis
2 |
3 | HTML/XML parser and web scraper for NodeJS.
4 |
5 | [](https://www.npmjs.com/package/osmosis)
6 |
7 | [](https://travis-ci.org/rchipka/node-osmosis)
8 |
9 | 
10 |
11 | ## Features
12 |
13 | - Uses native libxml C bindings
14 | - Clean promise-like interface
15 | - Supports CSS 3.0 and XPath 1.0 selector hybrids
16 | - [Sizzle selectors](https://github.com/jquery/sizzle/wiki#other-selectors-and-conventions),
17 | [Slick selectors](http://mootools.net/core/docs/1.6.0/Slick/Slick), and
18 | [more](https://github.com/rchipka/node-osmosis/blob/master/docs/Selectors.md)
19 | - No large dependencies like jQuery, cheerio, or jsdom
20 | - Compose deep and complex data structures
21 |
22 | - HTML parser features
23 | - Fast parsing
24 | - Very fast searching
25 | - Small memory footprint
26 |
27 | - HTML DOM features
28 | - Load and search ajax content
29 | - DOM interaction and events
30 | - Execute embedded and remote scripts
31 | - Execute code in the DOM
32 |
33 | - HTTP request features
34 | - Logs urls, redirects, and errors
35 | - Cookie jar and custom cookies/headers/user agent
36 | - Login/form submission, session cookies, and basic auth
37 | - Single proxy or multiple proxies and handles proxy failure
38 | - Retries and redirect limits
39 |
40 | ## Example
41 |
42 | ```javascript
43 | var osmosis = require('osmosis');
44 |
45 | osmosis
46 | .get('www.craigslist.org/about/sites')
47 | .find('h1 + div a')
48 | .set('location')
49 | .follow('@href')
50 | .find('header + div + div li > a')
51 | .set('category')
52 | .follow('@href')
53 | .paginate('.totallink + a.button.next:first')
54 | .find('p > a')
55 | .follow('@href')
56 | .set({
57 | 'title': 'section > h2',
58 | 'description': '#postingbody',
59 | 'subcategory': 'div.breadbox > span[4]',
60 | 'date': 'time@datetime',
61 | 'latitude': '#map@data-latitude',
62 | 'longitude': '#map@data-longitude',
63 | 'images': ['img@src']
64 | })
65 | .data(function(listing) {
66 | // do something with listing data
67 | })
68 | .log(console.log)
69 | .error(console.log)
70 | .debug(console.log)
71 | ```
72 |
73 | ## Documentation
74 |
75 | For documentation and examples check out [https://rchipka.github.io/node-osmosis/global.html](https://rchipka.github.io/node-osmosis/global.html)
76 |
77 | ## Dependencies
78 |
79 | - [libxmljs-dom](https://github.com/rchipka/node-libxmljs-dom) - DOM wrapper for [libxmljs](https://github.com/libxmljs/libxmljs) C bindings
80 | - [needle](https://github.com/tomas/needle) - Lightweight HTTP wrapper
81 |
82 | ## Donate
83 |
84 | Please consider a donation if you depend on web scraping and Osmosis makes your job a bit easier.
85 | Your contribution allows me to spend more time making this the best web scraper for Node.
86 |
87 | [](https://www.paypal.com/cgi-bin/webscr?item_name=node-osmosis&cmd=_donations&business=NAXMWBMWKUWUU)
88 |
--------------------------------------------------------------------------------
/benchmark/index.js:
--------------------------------------------------------------------------------
1 | var fs = require('fs'),
2 | testPath = __dirname + '/tests/',
3 | tests = fs.readdirSync(testPath),
4 | server = require('./server'),
5 | cycles = 500;
6 |
7 | function Timer(callback) {
8 | this.events = [];
9 | this.callback = callback;
10 |
11 | return this;
12 | }
13 |
14 | Timer.prototype.start = function () {
15 | this.events.push({
16 | name: 'start',
17 | time: ms(),
18 | duration: 0
19 | });
20 | };
21 |
22 | Timer.prototype.done = function (name) {
23 | var event = {},
24 | length = this.events.length,
25 | prev = this.events[length - 1];
26 |
27 | event.name = name;
28 | event.time = ms();
29 | event.duration = event.time - prev.time;
30 |
31 | this.events.push(event);
32 | };
33 |
34 | Timer.prototype.stop = function () {
35 | var event = {
36 | name: 'stop',
37 | time: ms()
38 | },
39 | stop = this.events[0];
40 |
41 | event.duration = event.time - stop.time;
42 |
43 | this.events.push(event);
44 | this.callback.call(this);
45 | };
46 |
47 | function ms() {
48 | return (new Date()).getTime();
49 | }
50 |
51 | (function loadTest(i) {
52 | var test = require(testPath + tests[i]);
53 |
54 | loadBenchmark(test, Object.keys(test), 0, function () {
55 | if (++i < tests.length) {
56 | loadTest(i);
57 | } else {
58 | testsFinished();
59 | }
60 | });
61 | })(0);
62 |
63 | function loadBenchmark(benchmark, keys, index, done) {
64 | var name = keys[index];
65 |
66 | console.log("Starting " + name + " - " + cycles + " cycles");
67 | runBenchmark(name, benchmark[name], function () {
68 | if (keys.length < ++index) {
69 | loadBenchmark(benchmark, keys, index, done);
70 | } else {
71 | done();
72 | }
73 | }, []);
74 | }
75 |
76 | function runBenchmark(name, start, done, array) {
77 | start(new Timer(function () {
78 | var total = 0;
79 |
80 | array.push(this.events[this.events.length - 1].duration);
81 |
82 | if (array.length === cycles) {
83 | array.forEach(function (duration) {
84 | total += duration;
85 | });
86 | console.log("Timing (" + name + "): " + (total / array.length) + 'ms');
87 | done();
88 | } else {
89 | runBenchmark(name, start, done, array);
90 | }
91 | }), server.url);
92 | }
93 |
94 | function testsFinished() {
95 | server.close();
96 | }
97 |
--------------------------------------------------------------------------------
/benchmark/server/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Home
5 |
6 |
7 |
13 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/benchmark/server/index.js:
--------------------------------------------------------------------------------
1 | var http = require('http'),
2 | URL = require('url'),
3 | qs = require('querystring'),
4 | fs = require('fs'),
5 | file = fs.readFileSync(__dirname + '/index.html'),
6 | host = 'localhost',
7 | port = 1337,
8 | server;
9 |
10 | server = http.createServer(function (req, res) {
11 | var url = URL.parse(req.url);
12 |
13 | res.writeHead(200, { 'Content-Type': 'text/html' });
14 | res.end(fs.readFileSync(__dirname + url.pathname));
15 | });
16 |
17 | server.on('error', function () {
18 | console.log('ERROR:', error);
19 | });
20 |
21 | server.listen(port);
22 |
23 |
24 | module.exports.host = host;
25 | module.exports.port = port;
26 | module.exports.url = 'http://' + host + ':' + port + '/index.html';
27 | module.exports.close = function () {
28 | server.close();
29 | };
30 |
--------------------------------------------------------------------------------
/benchmark/tests/osmosis.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var osmosis = require('../../');
4 |
5 | module.exports.osmosis = function (timer, url) {
6 | timer.start();
7 |
8 | osmosis(url)
9 | .set({
10 | links: osmosis.follow('a').find('title')
11 | })
12 | .done(function () {
13 | timer.stop();
14 | });
15 | };
16 |
--------------------------------------------------------------------------------
/benchmark/tests/x-ray.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var Xray = require('x-ray');
4 |
5 | module.exports.xray = function (timer, url) {
6 | var x = Xray(), total = 0;
7 |
8 | timer.start();
9 |
10 | x(url, ['a'])(function (err, arr) {
11 | var i = arr.length + 1;
12 |
13 | while (--i) {
14 | x(url, { title: 'title' })(function () {
15 | if (++total === arr.length) {
16 | timer.stop();
17 | }
18 | });
19 | }
20 | });
21 | };
22 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var Command = require('./lib/Command.js'),
4 | Queue = require('./lib/Queue.js'),
5 | request = require('./lib/Request.js'),
6 | libxml = require('libxmljs-dom'),
7 | instanceId = 0,
8 | memoryUsage = 0,
9 | cachedSelectors = {},
10 | toMB = function (size, num) {
11 | return (size / 1024 / 1024).toFixed(num || 2) + 'Mb';
12 | },
13 |
14 | extend = function (object, donor) {
15 | var key, keys = Object.keys(donor),
16 | i = keys.length;
17 |
18 | while (i--) {
19 | key = keys[i];
20 | object[key] = donor[key];
21 | }
22 |
23 | return object;
24 | };
25 |
26 | /**
27 | *
28 | * Unless called with `new`, Osmosis will start automatically.
29 | * To start an instance created with `new`, use {@link Osmosis.run}.
30 | *
31 | * @constructor Osmosis
32 | *
33 | * @param {(string|contextCallback)} url - A URL
34 | * @param {object} [params] - GET query parameters
35 | * @returns Command
36 | * @see {@link Command.run}
37 | *
38 | * @example {@lang javascript}
39 | *
40 | * // These instances start immediately
41 | * osmosis.get('http://example.com');
42 | * osmosis('http://example.com');
43 | *
44 | * // These instances need started
45 | * instance = new osmosis.get('http://example.com');
46 | * instance.run();
47 | *
48 | * instance = new osmosis('http://example.com');
49 | * instance.run();
50 | */
51 |
52 | function Osmosis(url, params) {
53 | if (url !== undefined) {
54 | if (this instanceof Osmosis) {
55 | return new Osmosis.get(url, params);
56 | }
57 |
58 | return Osmosis.get(url, params);
59 | }
60 |
61 | this.queue = new Queue(this);
62 | this.command = new Command(this);
63 | this.id = ++instanceId;
64 | }
65 |
66 |
67 | /**
68 | * @name options
69 | *
70 | * Osmosis and {@link https://github.com/tomas/needle|needle} options.
71 | *
72 | * @property {string} accept - HTTP Accept header
73 | * @property {bool} compressed - Compress HTTP requests
74 | * @property {number} concurrency - Number of simultaneous HTTP requests
75 | * @property {bool} decode_response - Decode compressed HTTP responses
76 | * @property {number} follow - Number of redirects to follow
77 | * @property {bool} follow_set_cookies - Set cookies for redirects
78 | * @property {bool} follow_set_referer - Set referer header for redirects
79 | * @property {bool} keep_data - Keep raw HTTP data in
80 | context.response.data
81 | * @property {bool} timeout - HTTP request timeout
82 | * @property {bool} tries - HTTP request attempts
83 | * @property {bool} user_agent - HTTP user agent
84 | * @memberof Osmosis
85 | * @instance
86 | * @default
87 | */
88 |
89 | Osmosis.prototype.opts = {
90 | accept: 'text/html,application/xhtml+xml,' +
91 | 'application/xml;q=0.9,*/*;q=0.8',
92 | compressed: true,
93 | concurrency: 5,
94 | decode_response: true,
95 | follow: 3,
96 | follow_set_cookies: true,
97 | follow_set_referer: true,
98 | keep_data: false,
99 | parse_cookies: true, // Parse "Set-Cookie" header
100 | parse_response: false,
101 | rejectUnauthorized: false,
102 | statsThreshold: 25,
103 | timeout: 30 * 1000,
104 | tries: 3,
105 | user_agent: 'Mozilla/5.0 (Windows NT x.y; rv:10.0) ' +
106 | 'Gecko/20100101 Firefox/10.0'
107 | };
108 |
109 | /**
110 | * Configure global Osmosis options.
111 | *
112 | * @function config
113 | * @memberof Osmosis
114 | * @param {string|object} option - A string `key` or an object of
115 | * { key: value } pairs.
116 | * @param {any} [value] - A value for the `key`
117 | * @instance
118 | * @see {@link Command.config}
119 | * @see {@link Osmosis.options}
120 | */
121 |
122 | Osmosis.config =
123 | Osmosis.prototype.config = function (option, value) {
124 | var hasPrototype = (this.prototype !== undefined),
125 | opts, key;
126 |
127 | if (hasPrototype === true) {
128 | opts = this.prototype.opts;
129 | } else if (this.opts === undefined) {
130 | opts = this.opts = {};
131 | } else {
132 | opts = this.opts;
133 | }
134 |
135 | if (option === undefined) {
136 | return opts;
137 | }
138 |
139 | if (value !== undefined) {
140 | opts[option] = value;
141 | } else if (option !== undefined) {
142 | for (key in option) {
143 | opts[key] = option[key];
144 | }
145 | }
146 | };
147 |
148 | /**
149 | * Run (or re-run) an Osmosis instance.
150 | *g
151 | * If you frequently use the same Osmosis instance
152 | * (such as in an Express server), it's much more efficient to
153 | * initialize the instance once and repeatedly use `run` as needed.
154 | *
155 | * @borrows Command.run
156 | * @see {@link Command.run}
157 | */
158 | Osmosis.prototype.run = function () {
159 | var self = this;
160 |
161 | process.nextTick(function () {
162 | self.started = true;
163 | self.command.start();
164 | });
165 | };
166 |
167 | /**
168 | * Make an HTTP request.
169 | *
170 | * @private
171 | */
172 |
173 | Osmosis.prototype.request = function (url, opts, callback, tries) {
174 | var self = this,
175 | href = url.href,
176 | method = url.method,
177 | params = url.params;
178 |
179 | this.requests++;
180 | this.queue.requests++;
181 | this.queue.push();
182 |
183 | if (typeof opts.user_agent === 'function') {
184 | opts.user_agent = opts.user_agent();
185 | }
186 |
187 | request(url.method,
188 | url,
189 | url.params,
190 | opts,
191 | tries,
192 | function (err, res, data) {
193 | var proxies = opts.proxies;
194 |
195 | self.queue.requests--;
196 |
197 | if ((res === undefined || res.statusCode !== 404) &&
198 | proxies !== undefined) {
199 | self.command.error('proxy ' + (proxies.index + 1) +
200 | '/' + proxies.length +
201 | ' failed (' + opts.proxy + ')');
202 |
203 | // remove the failing proxy
204 | if (proxies.length > 1) {
205 | opts.proxies.splice(proxies.index, 1);
206 | opts.proxy = proxies[proxies.index];
207 | }
208 | }
209 |
210 | if (err !== null && ++tries < opts.tries) {
211 | self.queueRequest(url, opts, callback, tries);
212 |
213 | if (self.opts.log === true) {
214 | self.command.error(err + ', retrying ' +
215 | url.href + ' (' +
216 | (tries + 1) + '/' +
217 | opts.tries + ')');
218 | }
219 | } else {
220 | callback(err, res, data);
221 | }
222 |
223 | self.dequeueRequest();
224 | self.queue.pop();
225 | })
226 | .on('redirect', function (new_url) {
227 | if (self.opts.log === true) {
228 | self.command.log('[redirect] ' +
229 | href + ' -> ' + new_url);
230 | }
231 | });
232 | };
233 |
234 | /**
235 | * Add a request to the queue.
236 | *
237 | * @param {string} method - HTTP request method
238 | * @param {string} url - The URL to request
239 | * @param {object} params - HTTP GET/POST Data
240 | * @param {object} opts - HTTP request options
241 | * @param {function} callback - Function to call when done
242 | * @private
243 | */
244 |
245 | Osmosis.prototype.queueRequest = function (url,
246 | opts,
247 | callback,
248 | tries) {
249 | if (tries === undefined) {
250 | tries = 0;
251 | }
252 |
253 | if (this.queue.requests < this.opts.concurrency) {
254 | this.request(url, opts, callback, tries);
255 | } else {
256 | this.queue.enqueue([url, opts, callback, tries]);
257 | }
258 | };
259 |
260 | Osmosis.prototype.dequeueRequest = function () {
261 | var arr, length = this.queue.length;
262 |
263 | if (length === 0 || this.queue.requests >= this.opts.concurrency) {
264 | return;
265 | }
266 |
267 | arr = this.queue.dequeue();
268 |
269 | this.request(arr[0], arr[1], arr[2], arr[3]);
270 | };
271 |
272 | /**
273 | * Parse XML/HTML data.
274 | *
275 | * @param {string|buffer} data - The data to parse
276 | * @param {object} opts - libxmljs parse options
277 | * @private
278 | * @see Command.parse
279 | */
280 |
281 | Osmosis.prototype.parse = function (data, opts) {
282 | /*
283 | * We only use `parseHtml` because we need to
284 | * avoid libxml namespaces when searching the document.
285 | */
286 |
287 | var document = libxml.parseHtml(data, opts);
288 |
289 | if (opts !== undefined && opts.baseUrl !== undefined) {
290 | document.location = opts.baseUrl;
291 | }
292 |
293 | return document;
294 | };
295 |
296 | /**
297 | * Print Node.JS process statistics via {@link Command.debug}.
298 | *
299 | * @private
300 | */
301 |
302 | Osmosis.prototype.resources = function () {
303 | var mem = process.memoryUsage(),
304 | memDiff = toMB(mem.rss - memoryUsage),
305 | libxml_mem = libxml.memoryUsage(),
306 | nodes = libxml.nodeCount();
307 |
308 | if (this.opts.debug !== true) {
309 | this.resources = null;
310 |
311 | return;
312 | }
313 |
314 | if (nodes >= 1000) {
315 | nodes = (nodes / 1000).toFixed(0) + 'k';
316 | }
317 |
318 | if (memDiff.charAt(0) !== '-') {
319 | memDiff = '+' + memDiff;
320 | }
321 |
322 | this.command.debug(
323 | 'stack: ' + this.queue.count + ', ' +
324 |
325 | 'requests: ' + this.requests +
326 | ' (' + this.queue.requests + ' queued), ' +
327 |
328 | 'RAM: ' + toMB(mem.rss) + ' (' + memDiff + '), ' +
329 |
330 | 'libxml: ' + ((libxml_mem / mem.rss) * 100).toFixed(1) +
331 | '% (' + nodes + ' nodes), ' +
332 |
333 | 'heap: ' + ((mem.heapUsed / mem.heapTotal) * 100)
334 | .toFixed(0) + '% of ' +
335 | toMB(mem.heapTotal)
336 | );
337 |
338 | memoryUsage = mem.rss;
339 | };
340 |
341 | /**
342 | * Set the parent instance for this instance.
343 | *
344 | * Inherit the parent's queue and options.
345 | *
346 | * @private
347 | * @param {Command} parent - The parent Command.
348 | */
349 |
350 | Osmosis.prototype.setParent = function (parent) {
351 | this.parent = parent;
352 | this.queue = parent.instance.queue;
353 | this.opts = parent.instance.opts;
354 | };
355 |
356 | /**
357 | * Resume the current instance.
358 | *
359 | * @param {function} callback - A function to call when resuming
360 | * @borrows Command.resume
361 | * @private
362 | */
363 |
364 | Osmosis.prototype.resume = function (arg) {
365 | var length, i;
366 |
367 | if (typeof arg === 'function') {
368 | if (this.resumeQueue === undefined) {
369 | this.resumeQueue = [];
370 | }
371 |
372 | this.resumeQueue.push(arg);
373 | } else {
374 | length = this.resumeQueue.length;
375 |
376 | for (i = 0; i < length; ++i) {
377 | this.resumeQueue[i]();
378 | }
379 |
380 | this.dequeueRequest();
381 | }
382 | };
383 |
384 | Osmosis.prototype.requests = 0;
385 | Osmosis.prototype.paused = false;
386 | Osmosis.prototype.stopped = false;
387 | Osmosis.prototype.inspect = function () {
388 | return 'Osmosis:' + this.id;
389 | };
390 |
391 | // Allow use of commands without creating a new instance:
392 |
393 | Object.keys(Command.prototype).forEach(function (name) {
394 | if (Osmosis[name] !== undefined) {
395 | return;
396 | }
397 |
398 | Osmosis[name] = function StartingFunction(arg1, arg2, arg3) {
399 | var instance = new Osmosis(),
400 | command = instance.command;
401 |
402 | instance.calledWithNew = (this instanceof StartingFunction);
403 |
404 | return command[name](arg1, arg2, arg3);
405 | };
406 | });
407 |
408 | // libxmljs overrides:
409 |
410 | libxml.Document.prototype.findXPath = libxml.Document.prototype.find;
411 | libxml.Element.prototype.findXPath = libxml.Element.prototype.find;
412 |
413 | libxml.Document.prototype.find = function (selector, cache) {
414 | return this.root().find(selector, cache);
415 | };
416 |
417 | libxml.Element.prototype.find = function (selector) {
418 | if (selector.charAt(1) === '/' ||
419 | selector.charAt(0) === '/' ||
420 | selector.charAt(0) === '(') {
421 | return this.findXPath(selector);
422 | } else if (cachedSelectors[selector] === undefined) {
423 | cachedSelectors[selector] = libxml.css2xpath(selector);
424 | }
425 |
426 | return this.findXPath(cachedSelectors[selector]) || [];
427 | };
428 |
429 | /**
430 | * @typedef {object} context
431 | *
432 | * An XML/HTML DOM object represting a Document, Element, Attribute
433 | * or other Node.
434 | */
435 |
436 | /**
437 | * @typedef {object} data
438 | *
439 | * An object containing values set by `.set`
440 | * @see {@link Command.set}
441 | */
442 |
443 | /**
444 | * @typedef {string} Selector
445 | *
446 | * A CSS/XPath selector
447 | * @see {@link https://github.com/css2xpath/css2xpath|Selectors}
448 | */
449 |
450 | /**
451 | * A callback function that returns the desired value.
452 | *
453 | * @callback middlewareCallback
454 | * @param {context} context - The current XML/HTML context node.
455 | * @param {data} data - The current data object.
456 | */
457 |
458 | Osmosis.libxmljs = libxml;
459 |
460 | module.exports = Osmosis;
461 |
--------------------------------------------------------------------------------
/jsdoc.json:
--------------------------------------------------------------------------------
1 | {
2 | "source": {
3 | "include": [ "index.js", "lib" ]
4 | },
5 | "opts": {
6 | "destination": "docs",
7 | "recurse": true,
8 | "template": "node_modules/ink-docstrap/template"
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/lib/Command.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var Data = require('./Data.js'),
4 | URL = require('url'),
5 | fs = require('fs'),
6 | qs = require('querystring'),
7 | formFunctions = require('./Form.js'),
8 | cmdDir = __dirname + '/commands/';
9 |
10 | /**
11 | * An Osmosis command.
12 | *
13 | * @constructor Command
14 | * @protected
15 | * @param {object} parent - parent instance
16 | * @returns Command
17 | */
18 |
19 | function Command(parent) {
20 | var self = this;
21 |
22 | if (Object.getPrototypeOf(parent) === Command.prototype) {
23 | // parent is a Command
24 | this.prev = parent;
25 | Object.defineProperty(this, 'instance', {
26 | get: Command.prototype.getInstance,
27 | set: Command.prototype.setInstance
28 | });
29 | } else if (parent !== undefined) {
30 | // `parent` is an Osmosis instance
31 | this.instance = parent;
32 | // Call `process.nextTick` so other instances can initialize
33 | process.nextTick(function () {
34 | // Attempt to auto-run the instance only IF:
35 | // * Not already running
36 | // * Not created using `new`
37 | // * Not a child instance
38 | if (parent.calledWithNew !== true &&
39 | parent.parent === undefined) {
40 | process.nextTick(function () {
41 | // Run on nextTick to allow any
42 | // runtimeCommands to finish first
43 | parent.run();
44 | });
45 | }
46 | });
47 | }
48 |
49 | return this;
50 | }
51 |
52 | Command.prototype = {
53 | isCommand: true,
54 |
55 | /**
56 | * Change context to the current Document.
57 | *
58 | * @property document
59 | * @see {@link Command.doc}
60 | */
61 |
62 | get document() {
63 | return this.doc();
64 | },
65 |
66 | /**
67 | * Else.
68 | *
69 | * @property else
70 | * @private
71 | * @see {@link Command.if}
72 | */
73 |
74 | get else() {
75 | return this.else();
76 | },
77 |
78 | inherit: function (command) {
79 | command.instance = this.instance;
80 |
81 | return command;
82 | },
83 |
84 | /**
85 | * Change context to the current Window.
86 | *
87 | * @property window
88 | */
89 |
90 | get window() {
91 | return this.getWindow();
92 | },
93 |
94 | getInstance: function () {
95 | return this.prev.instance;
96 | },
97 |
98 | setInstance: function (val) {
99 | return (this.prev.instance = val);
100 | }
101 | };
102 |
103 | Command.prototype.run = function (context, data) {
104 | return this.instance.run(context, data);
105 | };
106 |
107 | /**
108 | * Start a Command.
109 | *
110 | * @private
111 | * @function start
112 | * @param {context} context - HTML/XML context
113 | * @param {data} data - User defined Data
114 | * @memberof Command
115 | */
116 |
117 | Command.prototype.start = function (context, data) {
118 | var self = this,
119 | next = this.next,
120 | instance = this.instance,
121 | callback = this.cb,
122 | calledNext = false,
123 | window;
124 |
125 | if (context === null) {
126 | return;
127 | }
128 |
129 | if (instance.stopped === true) {
130 | return;
131 | }
132 |
133 | if (instance.paused === true) {
134 | instance.resume(function () {
135 | self.start(context, data);
136 | });
137 |
138 | return;
139 | }
140 |
141 | if (callback === undefined) {
142 | if (next === undefined) {
143 | this.end(context, data);
144 | } else {
145 | next.start(context, data);
146 | }
147 |
148 | return;
149 | }
150 |
151 | instance.queue.push();
152 |
153 | if (data === undefined) {
154 | data = (new Data());
155 | }
156 |
157 | data.ref();
158 |
159 | return callback.call(this, context, data, function (c, d, index) {
160 | if (calledNext === true) {
161 | // If `next` is called more than once,
162 | // then we need to clone the data
163 | next.start(c, d.clone().setSortIndex(index).ref());
164 | } else {
165 | calledNext = true;
166 | next.start(c, d.setSortIndex(index));
167 | }
168 | }, function (err) {
169 | data.unref();
170 |
171 | if (calledNext !== true) {
172 | self.end(context, data);
173 | }
174 |
175 | if (err !== undefined) {
176 | self.error(err);
177 | }
178 |
179 | instance.queue.pop();
180 | });
181 | };
182 |
183 | /**
184 | * Called when we reach the end of the command chain.
185 | *
186 | * @private
187 | */
188 |
189 | Command.prototype.end = function (context, data) {
190 | var window, parent;
191 |
192 | // We're on the "sentinel node", meaning
193 | // We've reached the end of the command chain
194 | if (context !== undefined) {
195 | if (context.doc === undefined) {
196 | window = context.window;
197 | } else if (context.doc().__window !== undefined) {
198 | window = context.doc().defaultView;
199 | }
200 |
201 | if (window !== undefined) {
202 | // close `window` when it reaches the last command
203 | window.close();
204 | }
205 |
206 | this.instance.queue.done++;
207 | }
208 |
209 | if (data !== undefined) {
210 | parent = data.parent;
211 |
212 | if (parent !== undefined) {
213 | if (data.isEmpty()) {
214 | data = data.clone();
215 |
216 | if (context.text !== undefined) {
217 | data.setObject(context.text());
218 | } else if (context.value !== undefined) {
219 | data.setObject(context.value());
220 | }
221 | }
222 |
223 | parent.merge(data);
224 | data.unref();
225 | }
226 | }
227 | };
228 |
229 | /**
230 | * Get the current options and inherit previous options.
231 | *
232 | * @private
233 | */
234 |
235 | Command.prototype.getOpts = function () {
236 | var proto;
237 |
238 | if (this.opts !== undefined) {
239 | return this.opts;
240 | }
241 |
242 | if (this.prev !== undefined) {
243 | proto = this.prev.getOpts();
244 | } else if (this.instance !== undefined) {
245 | proto = this.instance.opts;
246 | }
247 |
248 | this.opts = Object.create(proto);
249 |
250 | return this.opts;
251 | };
252 |
253 |
254 | /**
255 | * Set an option for the current command.
256 | *
257 | * Clones inherited object values.
258 | *
259 | * @private
260 | */
261 |
262 | Command.prototype.setOpt = function (name, value) {
263 | var opts = this.getOpts();
264 |
265 | if (value !== null &&
266 | value instanceof Object &&
267 | opts[name] !== null &&
268 | opts[name] instanceof Object) {
269 | opts[name] = extend(value, opts[name]);
270 | } else {
271 | opts[name] = value;
272 | }
273 |
274 | return opts;
275 | };
276 |
277 | /**
278 | * Internal HTTP request function.
279 | *
280 | * @param {string} method - Request method
281 | * @param {string} url - URL to load
282 | * @param {object} params - GET query parameters or POST data
283 | * @param {function} callback - Callback function
284 | * @private
285 | */
286 |
287 | Command.prototype.request = function (method, context, href, params, callback, sortIndex) {
288 | var self = this,
289 | length = callback.length,
290 | instance = self.instance,
291 | opts = Object.create(this.getOpts()),
292 | url, document, key, proxies;
293 |
294 | if (!href || href.length === 0) {
295 | callback("Invalid URL");
296 | return;
297 | }
298 |
299 | if (length === 3) {
300 | opts.parse = false;
301 | }
302 |
303 | if (context !== undefined) {
304 | document = context.doc();
305 |
306 | url = URL.parse(document.location.resolve(href), true);
307 |
308 | if (opts.follow_set_referer !== false) {
309 | if (opts.headers === undefined) {
310 | opts.headers = {};
311 | }
312 |
313 | opts.headers.referer = document.location.href;
314 | }
315 |
316 | if (opts.cookies !== undefined) {
317 | if (document.cookies === undefined) {
318 | document.cookies = {};
319 | }
320 |
321 | opts.cookies = extend(document.cookies, opts.cookies);
322 | } else {
323 | opts.cookies = document.cookies;
324 | }
325 |
326 | if (method === 'post') {
327 | // Check the enctype if submitting a form
328 | if (formFunctions.isMultipart(context)) {
329 | opts.multipart = true;
330 | }
331 | }
332 | } else if (href.substr(0, 1) === '//') {
333 | url = URL.parse('http:' + href, true);
334 | } else if (href.substr(0, 4) !== 'http') {
335 | url = URL.parse('http://' + href, true);
336 | } else {
337 | url = URL.parse(href, true);
338 | }
339 |
340 | url.method = method;
341 | url.params = params;
342 |
343 | if (method === 'get' && params instanceof Object && params !== null) {
344 | for (key in params) {
345 | url.query[key] = params[key];
346 | }
347 |
348 | url.params = url.query;
349 | url.search = qs.stringify(url.query);
350 | url.href = URL.format(url);
351 | }
352 |
353 | if (Array.isArray(opts.proxy)) {
354 | opts.proxies = opts.proxy;
355 | }
356 |
357 | if (opts.proxies !== undefined) {
358 | proxies = opts.proxies;
359 |
360 | if (proxies.index === undefined || ++proxies.index >= proxies.length) {
361 | proxies.index = 0;
362 | }
363 |
364 | opts.proxy = proxies[proxies.index];
365 | }
366 |
367 | instance.queueRequest(url, opts,
368 | function (err, res, document) {
369 | if (err !== null) {
370 | self.error((self.name !== method ?
371 | '[' + method + '] ' :
372 | '') + (url.href) + ' - ' + err);
373 |
374 | if (length === 2) {
375 | callback(err, document);
376 | } else if (length === 3) {
377 | callback(err, res, document);
378 | }
379 | } else {
380 | self.log('loaded [' + method + '] ' + url.href + ' ' +
381 | (params ?
382 | JSON.stringify(params) :
383 | '') +
384 | (opts.proxy ?
385 | ' via ' + opts.proxy :
386 | '')
387 | );
388 |
389 | if (document instanceof Object && document !== null) {
390 | document._dataSortIndex = sortIndex;
391 | }
392 |
393 | if (length === 1) {
394 | callback(document);
395 | } else if (length === 2) {
396 | callback(null, document);
397 | } else {
398 | callback(null, res, document);
399 | }
400 | }
401 | });
402 | };
403 |
404 | /**
405 | * Call a callback when log, error, or debug messages are received.
406 | *
407 | * @name log/error/debug
408 | * @memberof Osmosis;
409 | * @param {function} callback - Callback
410 | */
411 |
412 | ['log', 'error', 'debug'].forEach(function (name) {
413 | Command.prototype[name] = function (msg, prefixed) {
414 | if (msg instanceof Function) {
415 | this[name] = msg;
416 | this.instance.config(name, true);
417 | } else if (this.next !== undefined) {
418 | if (prefixed === undefined) {
419 | this.next[name]('(' + this.name + ') ' + msg, '');
420 | } else {
421 | this.next[name](msg, '');
422 | }
423 | } else if (this.instance.parent !== undefined) {
424 | this.instance.parent[name](msg, true);
425 | }
426 |
427 | return this;
428 | };
429 | });
430 |
431 | function extend(object, donor) {
432 | var key, keys = Object.keys(donor),
433 | i = keys.length;
434 |
435 | if (object === undefined) {
436 | object = {};
437 | }
438 |
439 | while (i--) {
440 | key = keys[i];
441 | object[key] = donor[key];
442 | }
443 |
444 | return object;
445 | }
446 |
447 | function runtimeCommand(name, func) {
448 | Command.prototype[name] = (function () {
449 | var length = arguments.length,
450 | self = this, args, i;
451 |
452 | if (length === 0) {
453 | // Allow `.config()`, etc. to get configuration
454 | // options during command chain compile time
455 | return func.call(self);
456 | }
457 |
458 | args = new Array(length);
459 |
460 | for (i = 0; i < length && arguments[i] !== undefined; i++) {
461 | args[i] = arguments[i];
462 | }
463 |
464 | process.nextTick(function () {
465 | if (self.next !== undefined) {
466 | // We're NOT on the last command, so we call `func` in the
467 | // context of the PRECEEDING command
468 | func.apply(self.prev, args);
469 | } else {
470 | // We're on the last command, so we call `func` in the
471 | // context of the FIRST command
472 | func.apply(self.instance.command, args);
473 | }
474 | });
475 |
476 | return self;
477 | });
478 | }
479 |
480 | function contextCommand(name, func) {
481 | Command.prototype[name] = (function () {
482 | var length = arguments.length,
483 | self, i, args;
484 |
485 | if (this.name === undefined) {
486 | self = this;
487 | } else {
488 | self = new Command(this);
489 | }
490 |
491 | self.name = name;
492 |
493 | args = new Array(length);
494 |
495 | for (i = 0; i < length && arguments[i] !== undefined; i++) {
496 | args[i] = arguments[i];
497 |
498 | if (typeof args[i] === 'object') {
499 | args[i] = this.findCommandArg(args[i]);
500 | }
501 | }
502 |
503 | self.args = args;
504 |
505 | if (func.length === 4) {
506 | self.cb = func;
507 | } else {
508 | self.cb = func.apply(self, self.args);
509 | }
510 |
511 | self.next = new Command(self);
512 |
513 | return self.next;
514 | });
515 | }
516 |
517 | Command.prototype.findCommandArg = function (obj) {
518 | var keys, key, length, i = 0;
519 |
520 | if (obj instanceof Command) {
521 | obj.instance.setParent(this);
522 | return obj.instance.command;
523 | }
524 |
525 | keys = Object.keys(obj);
526 | length = keys.length;
527 |
528 | for (; i < length; i++) {
529 | key = keys[i];
530 | switch (typeof obj[key]) {
531 | case 'object':
532 | if (obj[key] !== null) {
533 | obj[key] = this.findCommandArg(obj[key]);
534 | }
535 |
536 | break;
537 | case 'function':
538 | obj[key] = this.findCommandArg(this.then(obj[key]));
539 | }
540 | }
541 |
542 | return obj;
543 | };
544 |
545 | fs.readdirSync(cmdDir).forEach(function (file) {
546 | var command = require(cmdDir + file);
547 |
548 | if (typeof command === 'object') {
549 | Object.keys(command).forEach(function (name) {
550 | contextCommand(name, command[name]);
551 | });
552 | } else {
553 | runtimeCommand(file.substr(0, file.length - 3), command);
554 | }
555 | });
556 |
557 | module.exports = Command;
558 |
--------------------------------------------------------------------------------
/lib/Data.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * @constructor Data
6 | * @param {object} [data] - Data object value
7 | * @param {object} [parent] - Parent Data object
8 | * @param {number} [index] - Index in the parent object
9 | * @param {number} [sortIndex] - Sort order of object if coerced into array
10 | * @param {bool} [isArray] - Is the object an array?
11 | * @property {number} refs - Number of references
12 | * @property {number} clones - Number of clones
13 | * @property {object} object - Key/value data storage
14 | * @property {Data} parent - Parent Data object
15 | * @property {string} index - Key to set in the parent object
16 | * @private
17 | */
18 |
19 | function Data(parent) {
20 | this.stack = { count: 0 };
21 |
22 | if (parent) {
23 | this.parent = parent;
24 | }
25 |
26 | return this;
27 | }
28 |
29 | /**
30 | * Create an empty child Data object for the parent.
31 | *
32 | */
33 |
34 | Data.prototype.child = function () {
35 | return new Data(this);
36 | };
37 |
38 | /**
39 | * Clone a Data object.
40 | *
41 | */
42 |
43 | Data.prototype.clone = function () {
44 | var clone = this.next();
45 |
46 | clone.object = this.copy();
47 |
48 | return clone;
49 | };
50 |
51 | /**
52 | * Call callback when `Data.stack.count` === 0.
53 | */
54 |
55 | Data.prototype.done = function (cb) {
56 | this.stack.done = cb;
57 | return this;
58 | };
59 |
60 | /**
61 | * Get the raw data object.
62 | *
63 | */
64 |
65 | Data.prototype.getObject = function () {
66 | if (this.object === undefined) {
67 | if (this.isArray() === true) {
68 | this.toArray();
69 | } else {
70 | this.setObject({});
71 | }
72 | }
73 |
74 | return this.object;
75 | };
76 |
77 | /**
78 | * Set the raw data object.
79 | *
80 | */
81 |
82 | Data.prototype.setObject = function (object) {
83 | this.object = object;
84 |
85 | return this;
86 | };
87 |
88 | /**
89 | * Create a new Data object to pass to the next Command.
90 | *
91 | */
92 |
93 | Data.prototype.next = function () {
94 | var clone = new Data(this.parent)
95 | .setSortIndex(this.getSortIndex())
96 | .setIndex(this.getIndex())
97 | .isArray(this.isArray());
98 |
99 | clone.stack = this.stack;
100 | clone.object = this.object;
101 | return clone;
102 | };
103 |
104 | /**
105 | * Increase the reference count on all ancestors.
106 | *
107 | */
108 |
109 | Data.prototype.ref = function () {
110 | this.stack.count++;
111 | return this;
112 | };
113 |
114 | /**
115 | * Decrease the reference count on all ancestors.
116 | *
117 | */
118 |
119 | Data.prototype.unref = function () {
120 | if (--this.stack.count === 0) {
121 | if (this.stack.done !== undefined) {
122 | this.stack.done.call(this);
123 | }
124 | }
125 | };
126 |
127 | /**
128 | * Set a key/value in {@link Data.object}.
129 | *
130 | * @param {string|object} key - A key or { key: val } object
131 | * @param {any} val - A value
132 | */
133 |
134 | Data.prototype.set = function (key, val) {
135 | var object, currentVal, sortKey;
136 |
137 | if (val === undefined) {
138 | return this;
139 | }
140 |
141 | if (this.isArray() === true) {
142 | return this.push(val);
143 | }
144 |
145 | object = this.getObject();
146 | currentVal = object[key];
147 |
148 | if (currentVal !== undefined) {
149 | // If the key being set already has a value,
150 | // then convert it to an Array.
151 | if (currentVal instanceof Array) {
152 | currentVal.push(val);
153 | } else {
154 | object[key] = [currentVal, val];
155 | }
156 | } else {
157 | object[key] = val;
158 | }
159 |
160 | return this;
161 | };
162 |
163 | /**
164 | * Push a value onto {@link Data.object} array.
165 | */
166 |
167 | Data.prototype.push = function (val) {
168 | var object = this.toArray();
169 |
170 | if (val === undefined) {
171 | return this;
172 | }
173 |
174 | object.push(val);
175 |
176 | return this;
177 | };
178 |
179 | Data.prototype.copy = function () {
180 | var obj = this.object,
181 | data, i, keys, key;
182 |
183 | if (this.isArray()) {
184 | data = obj.slice(0);
185 | } else if (obj instanceof Object) {
186 | data = {};
187 |
188 | for (i = 0, keys = Object.keys(obj); i < keys.length; i++) {
189 | key = keys[i];
190 | data[key] = obj[key];
191 | }
192 | } else {
193 | data = obj;
194 | }
195 |
196 | return data;
197 | };
198 |
199 | Data.prototype.isArray = function (val) {
200 | if (val !== undefined) {
201 | this._isArray = val === true;
202 | return this;
203 | }
204 |
205 | return (this._isArray === true || this.object instanceof Array);
206 | };
207 |
208 | Data.prototype.isEmpty = function () {
209 | return (this.object === undefined ||
210 | (this.object instanceof Object &&
211 | Object.keys(this.object).length === 0)
212 | );
213 | };
214 |
215 | Data.prototype.getIndex = function () {
216 | return this._index;
217 | };
218 |
219 | Data.prototype.setIndex = function (index) {
220 | if (this.isArray() !== true) {
221 | this._index = index;
222 | }
223 |
224 | return this;
225 | };
226 |
227 | Data.prototype.setSortIndex = function (index) {
228 | if (index !== undefined) {
229 | this.sortIndex = index;
230 | }
231 |
232 | return this;
233 | };
234 |
235 | Data.prototype.getSortIndex = function () {
236 | return this.sortIndex;
237 | }
238 |
239 | Data.prototype.sortKey = function (key, sortIndex) {
240 | var object = this.getObject(),
241 | currentVal = object[key],
242 | sortArray;
243 |
244 | if (!this.sortArray) {
245 | this.sortArray = {};
246 | }
247 |
248 | sortArray = this.sortArray[key];
249 |
250 | if (sortArray === undefined) {
251 | if (currentVal instanceof Array && currentVal.length > 0) {
252 | sortArray = new Array(currentVal.length);
253 | } else {
254 | sortArray = [sortIndex];
255 | }
256 |
257 | this.sortArray[key] = sortArray;
258 | }
259 |
260 | if (currentVal instanceof Array) {
261 | var diff = currentVal.length - sortArray.length;
262 |
263 | while (diff > 0) {
264 | sortArray.push(sortIndex + (--diff));
265 | }
266 |
267 | object[key] = sortArray.map(function (v, i) {
268 | return {
269 | value: v,
270 | index: i
271 | };
272 | }).sort(function (a, b) {
273 | return a.value - b.value;
274 | }).map(function (v, i) {
275 | sortArray[i] = v.value;
276 |
277 | return currentVal[v.index];
278 | });
279 | }
280 | }
281 |
282 | Data.prototype.merge = function (child) {
283 | var object = child.object,
284 | index = child.getIndex(),
285 | sortIndex = child.getSortIndex();
286 |
287 | if (object === undefined) {
288 | return;
289 | }
290 |
291 | if (this.isArray() === true) {
292 | this.push(object);
293 | } else if (index !== undefined) {
294 | this.set(index, object);
295 | } else if (object instanceof Object) {
296 | this.extend(object);
297 | }
298 |
299 | if (sortIndex !== undefined) {
300 | this.sortKey(index, sortIndex);
301 | }
302 | };
303 |
304 | Data.prototype.toArray = function () {
305 | var object = this.object;
306 |
307 | if (object instanceof Array) {
308 | return object;
309 | }
310 |
311 | if (this.isEmpty()) {
312 | this.setObject([]);
313 | } else {
314 | this.setObject([ object ]);
315 | }
316 |
317 | return this.getObject();
318 | };
319 |
320 |
321 | Data.prototype.extend = function (object) {
322 | var key, keys = Object.keys(object),
323 | isArray = this.isArray(),
324 | i = keys.length;
325 |
326 | while (i--) {
327 | key = keys[i];
328 |
329 | if (isArray) {
330 | this.push(object[key]);
331 | } else {
332 | this.set(key, object[key]);
333 | }
334 | }
335 |
336 | return object;
337 | };
338 |
339 | module.exports = Data;
340 |
--------------------------------------------------------------------------------
/lib/Form.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | var form = {};
5 |
6 | form.submit = function () {
7 |
8 | };
9 |
10 | form.isForm = function (node) {
11 | return node.nodeName === 'form';
12 | };
13 |
14 | form.getForm = function (node) {
15 | if (form.isForm(node)) {
16 | return node;
17 | } else if (node.hasAttribute('form')) {
18 | return node.doc().getElementById(node.getAttribute('form'));
19 | } else {
20 | return node.get('ancestor-or-self::form');
21 | }
22 | };
23 |
24 | form.getInputs = function (node) {
25 | return form.getForm(node).find('[@name ' +
26 | 'and not(@disabled) ' +
27 | 'and not(@type="submit")]');
28 | };
29 |
30 | form.getSubmitButton = function (node) {
31 | if (form.isForm(node)) {
32 | return node.get('[@type="submit" and not(@disabled) and ' +
33 | '(not(@form) or @form="' +
34 | node.getAttribute('id') + '"' +
35 | ')][1]');
36 | } else if ((node.nodeName === 'input' || node.nodeName === 'button') &&
37 | node.getAttribute('type') === 'submit') {
38 | return node;
39 | }
40 |
41 | return null;
42 | };
43 |
44 | form.getAction = function (node) {
45 | var document = node.doc();
46 |
47 | if (node.hasAttribute('action')) {
48 | return document.location.resolve(node.getAttribute('action'));
49 | } else if (node.hasAttribute('formaction')) {
50 | return document.location.resolve(node.getAttribute('formaction'));
51 | } else {
52 | return document.location.href;
53 | }
54 | };
55 |
56 | form.getEnctype = function (node) {
57 | if (node.hasAttribute('enctype')) {
58 | return node.getAttribute('enctype');
59 | } else if (node.hasAttribute('formenctype')) {
60 | return node.getAttribute('formenctype');
61 | }
62 |
63 | return 'application/x-www-form-urlencoded';
64 | };
65 |
66 | form.isMultipart = function (node) {
67 | if (node.hasAttribute === undefined) {
68 | return false;
69 | }
70 |
71 | return (form.getEnctype(node).substr(0, 5) === 'multi');
72 | };
73 |
74 | form.getMethod = function (node) {
75 | if (node.hasAttribute('method')) {
76 | return node.getAttribute('method').toLowerCase();
77 | } else if (node.hasAttribute('formmethod')) {
78 | return node.getAttribute('formmethod').toLowerCase();
79 | } else {
80 | return 'get';
81 | }
82 | };
83 |
84 | form.getParams = function (node) {
85 | var params = {},
86 | submit = form.getSubmitButton(node),
87 | inputs = form.getInputs(node),
88 | length = inputs.length,
89 | i = 0, input, name, nodeName, type, value;
90 |
91 | for (i = 0; i < length; i++) {
92 | input = inputs[i];
93 | name = input.getAttribute('name');
94 | type = input.getAttribute('type');
95 | nodeName = input.nodeName;
96 | value = null;
97 |
98 | if (name.charAt(name.length - 1) === ']') {
99 | name = name.substr(0, name.length - 2);
100 | }
101 |
102 | if (type) {
103 | type = type.toLowerCase();
104 | }
105 |
106 | switch (nodeName) {
107 | case 'select':
108 | input = input.get('option[selected]') ||
109 | input.get('option:first');
110 |
111 | if (input !== null) {
112 | if (input.hasAttribute('value')) {
113 | value = input.getAttribute('value');
114 | } else {
115 | value = input.textContent;
116 | }
117 | }
118 |
119 | break;
120 | case 'textarea':
121 | value = input.textContent;
122 | break;
123 | case 'input':
124 | switch (type) {
125 | case 'radio':
126 | case 'image':
127 | ['x', 'y'].forEach(function (p) {
128 | var array = [];
129 |
130 | if (name) {
131 | array.push(name);
132 | }
133 |
134 | array.push(p);
135 |
136 | params[array.join('.')] = 0;
137 | });
138 | case 'checkbox':
139 | if (!input.hasAttribute('checked')) {
140 | break;
141 | }
142 |
143 | name = name.replace(/\[\]$/, '');
144 | value = input.getAttribute('value') || 'on';
145 |
146 | break;
147 | default:
148 | value = input.getAttribute('value');
149 | break;
150 |
151 | }
152 | break;
153 | }
154 |
155 | if (value !== null) {
156 | if (params[name] instanceof Array) {
157 | params[name].push(value);
158 | } else if (params[name] !== undefined) {
159 | params[name] = [params[name], value];
160 | } else {
161 | params[name] = value;
162 | }
163 | }
164 | }
165 |
166 | if (submit !== null) {
167 | if (submit.hasAttribute('name')) {
168 | params[submit.getAttribute('name')] =
169 | submit.getAttribute('value') || 'Submit Query';
170 | }
171 | }
172 |
173 | return params;
174 | };
175 |
176 | module.exports = form;
177 |
--------------------------------------------------------------------------------
/lib/Queue.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | /**
4 | * An Osmosis request queue.
5 | *
6 | * @constructor Queue
7 | * @protected
8 | * @param {object} instance - parent instance
9 | * @returns Command
10 | */
11 |
12 | function Queue(instance) {
13 | this.instance = instance;
14 | this.opts = instance.opts;
15 | this.queue = [];
16 | }
17 |
18 | Queue.prototype = {
19 | change: 0,
20 | count: 0,
21 | done: 0,
22 | requests: 0,
23 | length: 0,
24 | enqueue: function (object) {
25 | this.queue[this.length++] = object;
26 | },
27 | dequeue: function () {
28 | var object = this.queue[--this.length];
29 |
30 | this.queue[this.length] = null;
31 |
32 | return object;
33 | },
34 | push: function () {
35 | if (++this.change >= 25) {
36 | if (this.instance.resources !== null) {
37 | this.instance.resources();
38 | }
39 |
40 | this.change = 0;
41 | }
42 |
43 | return ++this.count;
44 | },
45 | pop: function () {
46 | var self = this;
47 |
48 | if (--self.count === 0) {
49 | process.nextTick(function () {
50 | var instance;
51 |
52 | if (self.count === 0) {
53 | instance = self.instance;
54 | instance.command.done();
55 |
56 | if (instance.opts.debug === true) {
57 | instance.resources();
58 | }
59 | }
60 | });
61 | }
62 |
63 | this.change++;
64 |
65 | return this.count;
66 | }
67 | };
68 |
69 | module.exports = Queue;
70 |
--------------------------------------------------------------------------------
/lib/Request.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | var needle = require('needle'),
4 | URL = require('url'),
5 | libxml = require('libxmljs-dom');
6 |
7 | /**
8 | * Make an HTTP request.
9 | *
10 | * @private
11 | */
12 |
13 | function Request(method, url, params, opts, tries, callback) {
14 | var location = url;
15 | return needle.request(method,
16 | url.href,
17 | params,
18 | opts,
19 | function (err, res, data) {
20 |
21 | if (!(url.params instanceof Object) || url.params === null) {
22 | url.params = url.query;
23 | }
24 |
25 | if (err !== null) {
26 | callback(err.message);
27 | return;
28 | }
29 |
30 | if (opts.ignore_http_errors !== true &&
31 | res !== undefined &&
32 | res.statusCode >= 400 &&
33 | res.statusCode <= 500
34 | ) {
35 | // HTTP error
36 | callback(res.statusCode + ' ' + res.statusMessage);
37 | return;
38 | }
39 |
40 | if (method !== 'head' && (!data || data.length === 0)) {
41 | callback('Data is empty');
42 | return;
43 | }
44 |
45 | function next(document) {
46 | if (opts.parse === false) {
47 | callback(null, res, document);
48 | return;
49 | }
50 |
51 | document = libxml.parseHtml(document,
52 | { baseUrl: location.href, huge: true });
53 |
54 | if (document === null) {
55 | callback('Couldn\'t parse response');
56 | return;
57 | }
58 |
59 | if (document.errors[0] !== undefined &&
60 | document.errors[0].code === 4) {
61 | callback('Document is empty');
62 | return;
63 | }
64 |
65 | if (document.root() === null) {
66 | callback('Document has no root');
67 | return;
68 | }
69 |
70 | location.headers = res.req._headers;
71 | location.proxy = opts.proxy;
72 | location.user_agent = opts.user_agent;
73 |
74 | document.location = location;
75 | document.request = location;
76 |
77 | setResponseMeta(document, res, data.length);
78 | setCookies(document, res.cookies);
79 | setCookies(document, opts.cookies);
80 |
81 | if (opts.keep_data === true) {
82 | document.response.data = data;
83 | }
84 |
85 | callback(null, res, document);
86 | }
87 |
88 | if (
89 | opts.process_response !== undefined &&
90 | typeof opts.process_response === 'function'
91 | ) {
92 | if (opts.process_response.length > 2) {
93 | opts.process_response(data, res, next, callback);
94 | return;
95 | }
96 |
97 | next(opts.process_response(data, res));
98 | } else {
99 | next(data);
100 | }
101 |
102 | })
103 | .on('redirect', function (href) {
104 | extend(location, URL.parse(URL.resolve(location.href, href)));
105 | });
106 | }
107 |
108 | function setResponseMeta(document, res, size) {
109 | var response = {
110 | type: getResponseType(res.headers['content-type']),
111 | statusCode: res.statusCode,
112 | statusMessage: res.statusMessage,
113 | headers: res.headers,
114 | size: {
115 | body: size
116 | }
117 | };
118 |
119 |
120 | if (res.socket !== undefined) {
121 | response.size.total = res.socket.bytesRead;
122 | response.size.headers = res.socket.bytesRead - size;
123 | }
124 |
125 | document.response = response;
126 | }
127 |
128 | function getResponseType(contentType) {
129 | if (contentType === undefined) {
130 | return null;
131 | }
132 |
133 | if (contentType.indexOf('xml') !== -1) {
134 | return 'xml';
135 | }
136 |
137 | if (contentType.indexOf('html') !== -1) {
138 | return 'html';
139 | }
140 |
141 | return contentType;
142 | }
143 |
144 |
145 | function setCookies(document, cookies) {
146 | var key, keys, length;
147 |
148 | if (cookies === undefined) {
149 | return;
150 | }
151 |
152 | keys = Object.keys(cookies);
153 | length = keys.length;
154 |
155 | if (length === 0) {
156 | return;
157 | }
158 |
159 | if (document.cookies === undefined) {
160 | document.cookies = {};
161 | }
162 |
163 | while (length--) {
164 | key = keys[length];
165 | document.cookies[key] = cookies[key];
166 | }
167 | }
168 |
169 | function extend(object, donor) {
170 | var key, keys = Object.keys(donor), i = keys.length;
171 |
172 | while (i--) {
173 | key = keys[i];
174 | object[key] = donor[key];
175 | }
176 |
177 | return object;
178 | }
179 |
180 | module.exports = Request;
181 |
--------------------------------------------------------------------------------
/lib/commands/click.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Click an HTML element and continue after all events finish.
6 | *
7 | * @function click
8 | * @param {Selector} selector - Node(s) to click
9 | * @memberof Command
10 | * @instance
11 | * @example {@lang javascript}
12 | * .click('#nav > a')
13 | * .then(function(window) {
14 | * var ajax = window.document.querySelector("#ajaxContent");
15 | *
16 | * if (ajax.textContent.length > 0) {
17 | * this.log("ajax loaded");
18 | * }
19 | * })
20 | */
21 |
22 | function Click(context, data, next, done) {
23 | var self = this,
24 | selector = this.args[0],
25 | nodes = context.find(selector),
26 | window;
27 |
28 | if (nodes.length === 0) {
29 | if (this.getOpts().debug === true) {
30 | this.debug('no results for "' + selector + '"');
31 | }
32 |
33 | return done();
34 | }
35 |
36 | window = context.doc().defaultView;
37 | window.addEventListener('done', function () {
38 | nodes.forEach(function (node, index) {
39 | node.dispatchEvent('click');
40 |
41 | window.addEventListener('done', function () {
42 | if (index === nodes.length - 1) {
43 | next(context, data);
44 | done();
45 | }
46 | });
47 |
48 | });
49 |
50 | });
51 | }
52 |
53 | module.exports.click = Click;
54 |
--------------------------------------------------------------------------------
/lib/commands/config.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Set configuration options for the **preceeding** command on down the chain.
3 | *
4 | * @function config
5 | * @param {string|object} option - A `key` string or { key: value } object
6 | * @param {string} [value] - A value for the given `key`
7 | * @memberof Command
8 | * @instance
9 | * @see Osmosis.options
10 | * @see Osmosis.config
11 | */
12 |
13 | module.exports = function (key, val) {
14 | var self = this, opts;
15 |
16 | if (self.name === undefined && self.prev !== undefined) {
17 | self = self.prev;
18 | }
19 |
20 | opts = self.getOpts();
21 |
22 | if (key === undefined) {
23 | return opts;
24 | }
25 |
26 | if (typeof key === 'object') {
27 | extend(opts, key, true);
28 | } else if (typeof key === 'function') {
29 | key(opts);
30 | } else {
31 | opts[key] = val;
32 | }
33 |
34 | return this;
35 | };
36 |
37 | function extend(object, donor) {
38 | var key, keys = Object.keys(donor),
39 | i = keys.length;
40 |
41 | while (i--) {
42 | key = keys[i];
43 | object[key] = donor[key];
44 | }
45 |
46 | return object;
47 | };
48 |
--------------------------------------------------------------------------------
/lib/commands/contains.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Continue if the context node contains the given string.
6 | *
7 | * @function follow
8 | * @memberof Command
9 | * @param {string|RegExp} match - A string to match.
10 | * @instance
11 | */
12 |
13 | function Contains(context, data, next, done) {
14 | if (getContent(context).indexOf(this.string) !== -1) {
15 | next(context, data);
16 | } else {
17 | this.debug('"' + this.string + '" not found');
18 | }
19 |
20 | done();
21 | }
22 |
23 | function getContent(node) {
24 | if (node.text !== undefined) {
25 | return node.text();
26 | } else if (node.value !== undefined) {
27 | return node.value();
28 | }
29 | }
30 |
31 | module.exports.contains = function (string) {
32 | this.string = string;
33 | return Contains;
34 | };
35 |
--------------------------------------------------------------------------------
/lib/commands/cookie.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Set a cookie. Short for `.config({ cookies: ... })`.
3 | *
4 | * Note: Setting a cookie to `null` will delete the cookie.
5 | *
6 | * @function cookie
7 | * @param {string} name - Cookie name
8 | * @param {string} value - Cookie value
9 | * @memberof Command
10 | * @instance
11 | * @see {@link Osmosis.config}
12 | * @see {@link Command.config}
13 | */
14 |
15 | module.exports = function (name, value) {
16 | var opts = this.getOpts();
17 |
18 | if (!opts.hasOwnProperty('cookies')) {
19 | if (opts.cookies !== undefined) {
20 | opts.cookies = extend({}, opts.cookies);
21 | } else {
22 | opts.cookies = {};
23 | }
24 | }
25 |
26 | if (value === null) {
27 | delete opts.cookies[name];
28 | } else {
29 | opts.cookies[name] = value;
30 | }
31 |
32 | return this;
33 | };
34 |
35 | function extend(object, donor) {
36 | var key, keys = Object.keys(donor),
37 | i = keys.length;
38 |
39 | while (i--) {
40 | key = keys[i];
41 | object[key] = donor[key];
42 | }
43 |
44 | return object;
45 | }
46 |
--------------------------------------------------------------------------------
/lib/commands/data.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Calls a callback with the current {@data} object.
6 | *
7 | * Note: Don't use this command to modify the {@data} object. Please use
8 | * {@link Command.then} instead.
9 | *
10 | * @function data
11 | * @param {function} callback - A callback with an argument for {@data}
12 | * @memberof Command
13 | * @instance
14 | */
15 |
16 | function Data(context, data, next, done) {
17 | this.args[0](data.getObject());
18 | next(context, data);
19 | done();
20 | }
21 |
22 | module.exports.data = Data;
23 |
--------------------------------------------------------------------------------
/lib/commands/delay.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Delay each context before continuing down the chain.
6 | *
7 | * @function delay
8 | * @param {number} delay - A number of milliseconds or a float of seconds.
9 | * @memberof Command
10 | * @instance
11 | */
12 |
13 | function Delay(context, data, next, done) {
14 | var delay = this.delay, self = this;
15 |
16 | if (this.timeout === undefined) {
17 | this.timeout = delay;
18 | }
19 |
20 | setTimeout(function () {
21 | self.timeout -= delay;
22 | next(context, data);
23 | done();
24 | }, this.timeout);
25 |
26 | this.timeout += delay;
27 | }
28 |
29 |
30 | module.exports.delay = function (delay) {
31 | this.delay = delay;
32 |
33 | if (this.delay % 1 !== 0) {
34 | this.delay = this.delay * 1000;
35 | }
36 |
37 | return Delay;
38 | };
39 |
--------------------------------------------------------------------------------
/lib/commands/do.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Execute each argument asynchronously using the current context and data.
6 | *
7 | * After each argument has finished, {@link Command.do} will continue to the
8 | * immediately following command using the original {@link context}.
9 | *
10 | * @function do
11 | * @memberof Command
12 | * @param {...(Osmosis|middlewareCallback)} function - Callbacks or instances
13 | * @instance
14 | */
15 |
16 | var Do = function (context, data, next, done) {
17 | var args = this.args,
18 | length = args.length,
19 | pending = length,
20 | dataDone = function () {
21 | if (--pending !== 0) {
22 | return;
23 | }
24 |
25 | next(context, data);
26 | done();
27 | }, i;
28 |
29 | for (i = 0; i < length; i++) {
30 | args[i].start(context, data.child().done(dataDone));
31 | }
32 | };
33 |
34 | module.exports.do = Do;
35 |
--------------------------------------------------------------------------------
/lib/commands/done.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Call a callback when the Osmosis instance has completely finished.
3 | *
4 | * @function done
5 | * @memberof Command
6 | * @param {function} function - Callback function
7 | * @instance
8 | */
9 |
10 | function Done(cb) {
11 | if (typeof cb === 'function') {
12 | this.done = cb;
13 | } else if (this.next !== undefined) {
14 | this.next.done();
15 | }
16 |
17 | return this;
18 | }
19 |
20 | module.exports = Done;
21 |
--------------------------------------------------------------------------------
/lib/commands/fail.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Continue if the given selector does NOT match any nodes.
6 | *
7 | * If a node is found, a {@link Command.error} message well be sent.
8 | *
9 | * @function fail
10 | * @memberof Command
11 | * @param {Selector} selector - A selector to match.
12 | * @instance
13 | * @see {@link Command.login}
14 | * @see {@link Command.filter}
15 | */
16 |
17 | function Fail(context, data, next, done) {
18 | if (context.find(this.selector).length > 0) {
19 | this.error('found ' + this.selector);
20 | } else {
21 | next(context, data);
22 | }
23 |
24 | done();
25 | }
26 |
27 | module.exports.fail = function (selector) {
28 | this.selector = selector;
29 | return Fail;
30 | };
31 |
--------------------------------------------------------------------------------
/lib/commands/filter.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Check that the context node matches the given selector.
6 | *
7 | * @function filter
8 | * @memberof Command
9 | * @param {Selector} match - A Selector to match
10 | * @instance
11 | */
12 |
13 |
14 | function Filter(context, data, next, done) {
15 | if (context.find(this.selector).length > 0) {
16 | next(context, data);
17 | }
18 |
19 | done();
20 | }
21 |
22 | module.exports.filter = function (selector) {
23 | this.selector = selector;
24 | return Filter;
25 | };
26 |
--------------------------------------------------------------------------------
/lib/commands/find.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Search for nodes in the current Document.
6 | *
7 | * @function find
8 | * @param {Selector|contextCallback|Command.learn} selector
9 | * @memberof Command
10 | * @see {@link Command.select}
11 | * @instance
12 | */
13 |
14 | /**
15 | * Search for nodes in the current context.
16 | *
17 | * @function select
18 | * @param {Selector|contextCallback|Command.learn} selector - A selector
19 | * @memberof Command
20 | * @see {@link Command.find}
21 | * @instance
22 | */
23 |
24 | var Find = function (context, data, next, done) {
25 | var length, nodes, node, selector, i;
26 |
27 | if (this.selector !== undefined) {
28 | selector = this.selector;
29 | } else {
30 | selector = this.contextCallback(context, data);
31 | }
32 |
33 | if (this.relative === true) {
34 | nodes = context.find(selector);
35 | } else {
36 | nodes = context.doc().find(selector);
37 | }
38 |
39 | length = nodes.length;
40 |
41 | if (length === 0) {
42 | done('no results for "' + selector + '"');
43 | return;
44 | }
45 |
46 | if (this.getOpts().log === true) {
47 | this.log('found ' + length + ' results for "' + selector + '"');
48 | }
49 |
50 | for (i = 0; i < length; i++) {
51 | node = nodes[i];
52 | node.last = (length - 1 === i);
53 | node.index = i;
54 | next(node, data, i);
55 | }
56 |
57 | done();
58 | };
59 |
60 | module.exports.find =
61 | module.exports.select = function (selector) {
62 | var self = this;
63 |
64 | if (typeof selector === 'function') {
65 | this.contextCallback = selector;
66 | } else if (selector instanceof Array) {
67 | this.selector = selector.join(', ');
68 | } else {
69 | this.selector = selector;
70 | }
71 |
72 |
73 | // Search relative to the context node
74 | if (this.name === 'select') {
75 | this.relative = true;
76 | } else {
77 | // Wait to see if we're a nested instance
78 | process.nextTick(function () {
79 | if (self.instance.parent !== undefined) {
80 | self.relative = true;
81 | }
82 | });
83 | }
84 |
85 | return Find;
86 | };
87 |
--------------------------------------------------------------------------------
/lib/commands/follow.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Follow a url.
6 | *
7 | * @function follow
8 | * @memberof Command
9 | * @param {Selector} selector - A selector string for link nodes
10 | * @instance
11 | */
12 |
13 | module.exports.follow = function (context, data, next, done) {
14 | var selector = this.args[0],
15 | self = this,
16 | nodes = context.find(selector),
17 | document = context.doc(),
18 | i = 0, queue = 0, length, node, url,
19 | requestDone = function (err, document) {
20 | if (err === null) {
21 | next(document, data, document._dataSortIndex);
22 | }
23 |
24 | if (--queue === 0) {
25 | done();
26 | }
27 | };
28 |
29 | if (nodes === undefined || nodes.length === 0) {
30 | done('no results for "' + selector +
31 | '" in ' + document.location.href);
32 | return;
33 | }
34 |
35 |
36 | for (length = nodes.length, i = 0; i < length; i++) {
37 | node = nodes[i];
38 |
39 | if (node.value !== undefined) {
40 | url = node.value();
41 | } else if (url = node.attr('href')) {
42 | // Don't use Attribute.text() or Attribute.value()
43 | // in order to keep URL encoding
44 | url = url.toString();
45 | url = url.substring(url.indexOf('"') + 1, url.lastIndexOf('"'));
46 | } else {
47 | url = node.text();
48 | }
49 |
50 | if (url !== null && url.length > 0) {
51 | queue++;
52 |
53 | self.log("url: " + url);
54 | self.request('get',
55 | node,
56 | url,
57 | null,
58 | requestDone,
59 | i);
60 | }
61 | }
62 |
63 | if (queue === 0) {
64 | done();
65 | }
66 | };
67 |
--------------------------------------------------------------------------------
/lib/commands/get.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | var externalURLRegex = /^((http:|https:)?\/\/|[^\/\.])/;
5 |
6 | /**
7 | * Make an HTTP GET request.
8 | *
9 | * @function get
10 | * @param {(string|contextCallback)} url - An absolute or relative URL or a
11 | * contextCallback that calls a URL.
12 | * @param {object|contextCallback} [params] - HTTP GET query parameters
13 | * @memberof Command
14 | * @instance
15 | * @see {@link Command.post}
16 | */
17 |
18 | /**
19 | * Make an HTTP POST request.
20 | * @function post
21 | * @param {(string|contextCallback)} url - An absolute or relative URL or a
22 | * contextCallback that calls a URL.
23 | * @param {object|contextCallback} [data] - HTTP POST data
24 | * @memberof Command
25 | * @instance
26 | * @see {@link Command.get}
27 | */
28 |
29 | function Get(context, data, next, done) {
30 | this.request(this.name,
31 | context,
32 | this.getURL(this.url, context, data),
33 | this.getParam(this.params, context, data),
34 | function (err, context) {
35 | if (err === null) {
36 | next(context, data);
37 | }
38 |
39 | done();
40 | });
41 | }
42 |
43 | function getParamArg(url) {
44 | return url;
45 | }
46 |
47 | function getParamFunction(func, context, data) {
48 | var res = func(context, data.getObject());
49 |
50 | return res;
51 | }
52 |
53 | function getURLArg(url) {
54 | return url;
55 | }
56 |
57 | function getURLFunction(func, context, data) {
58 | var res = func(context, data.getObject());
59 |
60 | if (res.nodeType !== undefined) {
61 | res = getURLContext(res);
62 | }
63 |
64 | return res;
65 | }
66 |
67 | function getURLContext(context) {
68 | if (context.getAttribute('href')) {
69 | return context.getAttribute('href');
70 | }
71 |
72 | if (context.text !== undefined) {
73 | return context.text();
74 | } else if (context.value !== undefined) {
75 | return context.value();
76 | }
77 | }
78 |
79 | module.exports.get =
80 | module.exports.post = function (url, query) {
81 | var args = this.args,
82 | urlIsFunction = typeof url === 'function',
83 | queryIsFunction = typeof query === 'function';
84 |
85 | if (typeof args[3] === 'object' || typeof args[4] === 'object') {
86 | console.error("GET/POST: `opts` argument deprecated." +
87 | "Use `.config` instead.");
88 | }
89 |
90 | if (typeof args[3] === 'function' || typeof args[4] === 'function') {
91 | console.error("GET/POST: `callback` argument deprecated." +
92 | "Use `.then` instead.");
93 | }
94 |
95 | if (urlIsFunction === true) {
96 | this.getURL = getURLFunction;
97 | } else {
98 | this.getURL = getURLArg;
99 | }
100 |
101 | if (queryIsFunction === true) {
102 | this.getParam = getParamFunction;
103 | } else {
104 | this.getParam = getParamArg;
105 | }
106 |
107 | this.url = url;
108 | this.params = query;
109 |
110 | return Get;
111 | };
112 |
--------------------------------------------------------------------------------
/lib/commands/header.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Set an HTTP header. Short for `.config({ headers: ... })`
3 | *
4 | * @function header
5 | * @param {string} name - Header name
6 | * @param {string} value - Header value
7 | * @memberof Command
8 | * @instance
9 | * @see Osmosis.headers
10 | * @see Osmosis.config
11 | */
12 |
13 | module.exports = function (name, value) {
14 | var opts = this.getOpts(), headers;
15 |
16 | if (opts.hasOwnProperty('headers')) {
17 | opts.headers[name] = value;
18 | } else {
19 | headers = {};
20 | headers[name] = value;
21 | this.setOpt('headers', headers);
22 | }
23 |
24 | return this;
25 | };
26 |
--------------------------------------------------------------------------------
/lib/commands/headers.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Set multiple HTTP headers. Short for `.config({ headers: ... })`.
3 | *
4 | * @function headers
5 | * @param {object} headers - { headerName: headerValue, ... }
6 | * @memberof Command
7 | * @instance
8 | * @see Osmosis.header
9 | * @see Osmosis.config
10 | */
11 |
12 | module.exports = function (headers) {
13 | var opts = this.getOpts(), key;
14 |
15 | if (opts.hasOwnProperty('headers')) {
16 | for (key in headers) {
17 | opts.headers[key] = headers[key];
18 | }
19 | } else {
20 | this.setOpt('headers', headers);
21 | }
22 |
23 | return this;
24 | };
25 |
--------------------------------------------------------------------------------
/lib/commands/if.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Execute the immediately following command if each argument is true.
3 | *
4 | * An argument is considered to be `true` IF:
5 | * - a {@link Selector} argument finds at least one node
6 | * - a nested {@link Osmosis} instance:
7 | * - Successfully {@link Command.set}s some data OR
8 | * - There is at least one {@link context}
9 | * - a {@link contextCallback} doesn't return false, null, or undefined
10 | *
11 | * @function if
12 | * @private
13 | * @param {Selector|Osmosis|contextCallback} [conditions]
14 | * @memberof Command
15 | * @instance
16 | * @see {@link Command.else}
17 | */
18 |
19 | var If = function () {
20 |
21 | };
22 |
23 | If.compile = function (command) {
24 | var args = command.args;
25 |
26 | length = args.length;
27 | };
28 |
29 | module.exports.if = If;
30 |
--------------------------------------------------------------------------------
/lib/commands/learn.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Osmosis learns to find dynamic content via static selectors.
3 | *
4 | * @function learn
5 | * @memberof Command
6 | * @param {string} name - The name of the runtime variable
7 | * @instance
8 | * @see {@link Command.use}
9 | */
10 |
11 | var Learn = function (context) {
12 | var name = this.args[0],
13 | selector = this.lookup(selector),
14 | tData = this.trainingData,
15 | nodes, i;
16 |
17 | if (selector === undefined) {
18 | // No definition, use the learned selector.
19 | return;
20 | } else {
21 | nodes = context.find(selector);
22 |
23 | for (i = 0; i < nodes.length; i++) {
24 | this.nodeSet.push(nodes[i]);
25 | }
26 |
27 | this.selector = getSelector(this.nodeSet);
28 | }
29 | };
30 |
31 | function getSelector(nodes, isParent) {
32 |
33 | var node = nodes[0],
34 | classes = node.classList,
35 | selector = '',
36 | i, parentSelector, className, matches,
37 | position;
38 |
39 | if (nodes.length === 0) {
40 | return '';
41 | }
42 |
43 | if (match(nodes, nodeId)) {
44 | return '#' + node.id;
45 | }
46 |
47 | if (match(nodes, nodeName)) {
48 | selector += nodeName;
49 | }
50 |
51 | // Find common class names
52 | for (i = 0; i < classes.length; i++) {
53 | className = classes[i];
54 | matches = [];
55 |
56 | if (match(nodes, nodeHasClass, className)) {
57 | matches.push(className);
58 | }
59 |
60 | selector = '.' + matches.join('.');
61 | }
62 |
63 | parentSelector = getSelector(parents(nodes), true);
64 |
65 | if (node.parentNode && isParent !== true) {
66 | position = node.parentNode.childNodes.indexOf(node);
67 |
68 | if (match(nodes, nodePosition)) {
69 | selector += ':nth-of-type(' + position + ')';
70 | }
71 | }
72 |
73 | if (parentSelector.length > 0) {
74 | return parentSelector + ' > ' + selector;
75 | }
76 |
77 | return selector;
78 | }
79 |
80 | function match(nodes, cb, arg) {
81 |
82 | var value = cb(nodes[0], arg), i;
83 |
84 | for (i = 1; i < nodes.length; i++) {
85 | if (cb(nodes[i], arg) !== value) {
86 | return false;
87 | }
88 | }
89 |
90 | return true;
91 | }
92 |
93 | function parents(nodes) {
94 | var arr = [],
95 | i = 0,
96 | length = nodes.length,
97 | parent;
98 |
99 |
100 | for (i = 0; i < nodes.length; i++) {
101 | parent = nodes[i].parentNode;
102 |
103 | if (parent) {
104 | arr.push(parent);
105 | }
106 | }
107 |
108 | return arr;
109 | }
110 |
111 | function nodeName(node) {
112 | return node.nodeName;
113 | }
114 |
115 | function nodeId(node) {
116 | return node.id;
117 | }
118 |
119 | function nodeHasClass(node, className) {
120 | return node.classList.indexOf(className) !== -1;
121 | }
122 |
123 | function nodePosition(node) {
124 | return node.parentNode.childNodes.indexOf(node);
125 | }
126 |
127 | module.exports.learn = function () {
128 | this.nodeSet = [];
129 | return Learn;
130 | };
131 |
--------------------------------------------------------------------------------
/lib/commands/login.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | /**
4 | * Log in using a web page's login form.
5 | *
6 | * @function login
7 | * @memberof Command
8 | * @param {string} username - Username or email address
9 | * @param {string} password - Password
10 | * @instance
11 | * @see {@link Command.success}
12 | * @see {@link Command.fail}
13 | */
14 |
15 | var form = require('../Form.js');
16 |
17 | function Login(context, data, next, done) {
18 | var user = this.args[0],
19 | pass = this.args[1],
20 | params = {},
21 | loginForm = context.get('form:has(input[type="password"])'),
22 | self = this,
23 | userInput, passInput,
24 | nodes, i, method, url;
25 |
26 | if (loginForm === null) {
27 | this.error('No login form found');
28 | return;
29 | }
30 |
31 | userInput = loginForm.get('input[(not(@type) or @type="text") and @name]' +
32 | ':before(input[type="password"]):last');
33 |
34 | if (!userInput) {
35 | done('No user field found');
36 | return;
37 | }
38 |
39 | passInput = userInput.get('following::input[type="password"]');
40 |
41 | if (!passInput) {
42 | done('No password field found');
43 | return;
44 | }
45 |
46 | params = form.getParams(loginForm);
47 | params[userInput.getAttribute('name')] = user;
48 | params[passInput.getAttribute('name')] = pass;
49 |
50 | url = form.getAction(loginForm);
51 | method = form.getMethod(loginForm);
52 |
53 | this.debug(method + ' ' + url + ' ' + JSON.stringify(params));
54 |
55 | this.request(method,
56 | loginForm,
57 | url,
58 | params,
59 | function (err, document) {
60 | if (err === null) {
61 | next(document, data);
62 | }
63 |
64 | done();
65 | });
66 | }
67 |
68 | module.exports.login = function (username, password) {
69 | this.username = username;
70 | this.password = password;
71 | return Login;
72 | };
73 |
--------------------------------------------------------------------------------
/lib/commands/match.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Continue if the context node innerText matches a RegExp.
6 | *
7 | * @function match
8 | * @memberof Command
9 | * @param {string|RegExp} match - A RegExp to match.
10 | * @instance
11 | */
12 |
13 | function Match(context, data, next, done) {
14 | if (this.regex.test(getContent(context))) {
15 | next(context, data);
16 | } else {
17 | this.debug('"' + this.regex.toString() + '" not found');
18 | }
19 |
20 | done();
21 | }
22 |
23 | function getContent(node) {
24 | if (node.text !== undefined) {
25 | return node.text();
26 | } else if (node.value !== undefined) {
27 | return node.value();
28 | }
29 | }
30 |
31 | module.exports.match = function (regex) {
32 | this.regex = regex;
33 | return Match;
34 | };
35 |
--------------------------------------------------------------------------------
/lib/commands/paginate.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Loads multiple pages.
6 | *
7 | * The first argument can alternatively be an object representing
8 | * HTTP GET/POST parameters to modify.
9 | *
10 | * If the first argument is an object, numeric values will
11 | * increment the existing parameter value by that amount.
12 | *
13 | * String values are treated as selectors and each corresponding
14 | * parameter's value will be replaced with the content of the selected node.
15 | *
16 | * @function paginate
17 | * @memberof Command
18 | * @param {selector} selector - A link or form to the next page.
19 | * @param {number|Selector|middlewareCallback} [limit] -
20 | Total number of pages to load.
21 | * @instance
22 | */
23 |
24 | var form = require('../Form.js');
25 |
26 | function Paginate(context, data, next, done) {
27 | var selector = this.selector,
28 | limit = this.getLimit(this.limit, context, data),
29 | document = context.doc(),
30 | count = document.request.count || 1,
31 | self = this,
32 | params = {},
33 | method, url, param, node = context, name, value;
34 |
35 | next(context, data, count);
36 |
37 | if (limit !== undefined && count > limit) {
38 | return done();
39 | }
40 |
41 | method = document.location.url.method || 'get';
42 | url = document.location.href;
43 | params = {};
44 |
45 | if (selector instanceof Function) {
46 | var ret = selector(context, data);
47 |
48 | if (typeof ret === 'string') {
49 | url = document.location.resolve(ret);
50 | } else {
51 | params = ret;
52 | }
53 | } else if (selector instanceof Object) {
54 | for (param in selector) {
55 | value = selector[param];
56 |
57 | if (typeof value !== 'number') {
58 | params[param] = getContent(context.get(value));
59 | } else {
60 | params[param] = (parseFloat(document.request.params[param]) ||
61 | 0) +
62 | value;
63 | }
64 | }
65 | } else {
66 | node = document.get(selector);
67 |
68 | if (!node) {
69 | return done('no results for "' + selector + '" in ' + url);
70 | } else if (node.nodeName === 'form') {
71 | url = form.getAction(node);
72 | method = form.getMethod(node);
73 | params = form.getParams(node);
74 | } else if (node.hasAttribute('href')) {
75 | url = node.getAttribute('href');
76 | } else {
77 | name = node.getAttribute('name');
78 |
79 | if (name !== null) {
80 | name = name.value();
81 | value = node.getAttribute('value');
82 |
83 | if (value === null) {
84 | value = getContent(node);
85 | }
86 |
87 | params[name] = value;
88 | } else {
89 | return done('no URL found in ' + selector);
90 | }
91 | }
92 | }
93 |
94 | self.log('loading page ' + count + (limit ?
95 | '/' + limit :
96 | '') + ' - ' + url);
97 |
98 | self.request(method, node, url, params, function (document) {
99 | document.request.count = count + 1;
100 | self.start(document, data);
101 | });
102 |
103 | done();
104 | }
105 |
106 | function getLimitArg(limit) {
107 | return limit;
108 | }
109 |
110 | function getLimitFunction(callback, context, data) {
111 | var value = callback(context, data.getObject());
112 |
113 | if (value === false) {
114 | return 0;
115 | } else if (value === true) {
116 | return undefined;
117 | } else {
118 | return value;
119 | }
120 | }
121 |
122 | function getLimitSelector(selector, context) {
123 | var node = context.get(selector), value;
124 |
125 | if (!node) {
126 | return 0;
127 | }
128 |
129 | value = getContent(node);
130 |
131 | if (!value) {
132 | return 0;
133 | }
134 |
135 | value = parseInt(value.replace(/[^0-9\.]+/g, ''));
136 |
137 | return value || 0;
138 | }
139 |
140 | function getContent(node) {
141 | if (node.text !== undefined) {
142 | return node.text();
143 | } else if (node.value !== undefined) {
144 | return node.value();
145 | }
146 | }
147 |
148 | module.exports.paginate = function (selector, limit) {
149 | this.selector = selector;
150 | this.limit = limit;
151 |
152 | switch (typeof limit) {
153 | case 'string':
154 | this.getLimit = getLimitSelector;
155 | break;
156 | case 'function':
157 | this.getLimit = getLimitFunction;
158 | break;
159 | default:
160 | this.getLimit = getLimitArg;
161 | break;
162 | }
163 |
164 | return Paginate;
165 | };
166 |
--------------------------------------------------------------------------------
/lib/commands/parse.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Parse HTML or XML data
3 |
4 | * @function parse
5 | * @param {string|buffer} data - XML/HTML data
6 | * @param {object} options - Parse options
7 | * @memberof Command
8 | * @instance
9 | * @see Osmosis.parse
10 | */
11 |
12 | module.exports.parse = function (context, data, next, done) {
13 | var args = this.args;
14 |
15 | next(this.instance.parse(args[0], args[1]), data);
16 | done();
17 | return this;
18 | };
19 |
--------------------------------------------------------------------------------
/lib/commands/pause.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Pause an Osmosis instance.
3 | *
4 | * @function pause
5 | * @memberof Command
6 | * @instance
7 | */
8 |
9 | module.exports = function () {
10 | this.instance.queue.push();
11 | this.prev.debug('pausing');
12 | this.instance.paused = true;
13 | };
14 |
--------------------------------------------------------------------------------
/lib/commands/proxy.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Set a proxy. Short for `.config({ proxy: ... })`
3 | *
4 | * @function proxy
5 | * @memberof Command
6 | * @param {string|array} proxy - A string or array of HTTP proxy URL(s)
7 | * @instance
8 | * @see Osmosis.config
9 | */
10 |
11 | module.exports = function (value) {
12 | this.getOpts().proxy = value;
13 | return this;
14 | };
15 |
--------------------------------------------------------------------------------
/lib/commands/resume.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Resume an Osmosis instance.
3 | *
4 | * @function resume
5 | * @memberof Command
6 | * @instance
7 | */
8 |
9 | module.exports = function () {
10 | this.instance.queue.pop();
11 | var instance = this.instance;
12 |
13 | this.prev.debug('resuming');
14 | this.instance.paused = false;
15 | this.instance.resume();
16 | };
17 |
--------------------------------------------------------------------------------
/lib/commands/rewrite.js:
--------------------------------------------------------------------------------
1 | module.exports.rewrite = function (context, data, next, done) {
2 | console.error('DEPRECATED. Use .find(selector).get(callback) instead.');
3 | next(context, data);
4 | done();
5 | };
6 |
--------------------------------------------------------------------------------
/lib/commands/set.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | var sourceSelectorRegexp = /:source$/,
5 | innerHTMLSelectorRegexp = /:html$/;
6 |
7 | /**
8 | * Set values in the {@link data} object.
9 | *
10 | * Note: Also accepts set(key, selector) as parameters
11 | *
12 | * @function set
13 | * @memberof Command
14 | * @param {object} data - Key/selector pairs to set.
15 | * @instance
16 | */
17 |
18 | module.exports.set = function (key, val) {
19 | var args = key,
20 | isArray = args instanceof Array;
21 |
22 | if (val !== undefined) {
23 | args = {};
24 | args[key] = val;
25 | } else if (typeof key === 'string') {
26 | args = {};
27 | args[key] = null;
28 | }
29 |
30 | return setObject(loopObject(args, isArray), isArray);
31 | };
32 |
33 | function loopObject(obj) {
34 | var keys = Object.keys(obj),
35 | length = keys.length,
36 | isArray = obj instanceof Array,
37 | arr = new Array(length * 3),
38 | i = 0,
39 | ai = 0,
40 | key, val, valIsArray, func, isObject;
41 |
42 | for (; i < length; i++) {
43 | key = keys[i];
44 | val = obj[key];
45 | valIsArray = val instanceof Array;
46 | isObject = false;
47 |
48 | if (typeof val === 'object' && val !== null) {
49 | isObject = true;
50 |
51 | if (val.isCommand === true) {
52 | func = setInstance(val, key);
53 | } else if (!valIsArray || val.length > 0) {
54 | func = setObject(loopObject(val), valIsArray, key);
55 | }
56 | } else {
57 | if (val === null) {
58 | func = setContextNull;
59 | } else if (typeof val === 'function') {
60 | func = setContextFunc(val);
61 | } else if (isArray) {
62 | func = setContextArray(val);
63 | } else if (sourceSelectorRegexp.test(val)) {
64 | func = setContextSource(val);
65 | } else if (innerHTMLSelectorRegexp.test(val)) {
66 | func = setContextInnerHTML(val);
67 | } else {
68 | func = setContextVal(val);
69 | }
70 | }
71 |
72 | arr[ai++] = key;
73 | arr[ai++] = func;
74 | arr[ai++] = isObject;
75 | }
76 |
77 | return arr;
78 | }
79 |
80 | function setObject(arr, isArray, index) {
81 | var length = arr.length,
82 | total = length / 3,
83 | isNested = index !== undefined;
84 |
85 | return function (context, data, next, done) {
86 | var count = total,
87 | dataDone = function () {
88 | if (--count !== 0) {
89 | return false;
90 | }
91 |
92 |
93 | if (isNested && data.parent !== undefined) {
94 | data.parent.merge(data);
95 | }
96 |
97 | next(context, data);
98 |
99 | // done will be undefined if setObject is called by setObject
100 | if (done !== undefined) {
101 | done();
102 | }
103 |
104 | return true;
105 | },
106 |
107 | key, val, isObject, i;
108 |
109 | if (context === undefined) {
110 | done("No context");
111 | return;
112 | }
113 |
114 | if (done !== undefined) {
115 | data = data.clone();
116 | }
117 |
118 | if (isNested === true) {
119 | data = data.child()
120 | .setIndex(index)
121 | .isArray(isArray)
122 | .done(dataDone)
123 | .ref();
124 | }
125 |
126 | if (isArray === true) {
127 | setArray(context, data, dataDone, arr, 0);
128 | return;
129 | }
130 |
131 | for (i = 0; i < length; i++) {
132 | key = arr[i];
133 | val = arr[++i];
134 | isObject = arr[++i];
135 |
136 | if (isObject === true) {
137 | val(context, data, dataDone);
138 | } else {
139 | data.set(key, val(context, data));
140 | dataDone();
141 | }
142 | }
143 | };
144 | }
145 |
146 | // Call in serial to preserve array order
147 | function setArray(context, data, done, arr, i) {
148 | var key = arr[i++],
149 | val = arr[i++],
150 | isObject = arr[i++];
151 |
152 | data.toArray();
153 |
154 | if (isObject === true) {
155 | val(context, data, function () {
156 | if (done() === false) {
157 | setArray(context, data, done, arr, i);
158 | }
159 | });
160 | } else {
161 | data.push(val(context, data));
162 |
163 | if (done() === false) {
164 | setArray(context, data, done, arr, i);
165 | }
166 | }
167 | }
168 |
169 | function setInstance(instance, index) {
170 | return function (context, data, done) {
171 | instance.start(context,
172 | data.child()
173 | .setIndex(index)
174 | .done(done)
175 | .ref());
176 | };
177 | }
178 |
179 | function setContextNull(context) {
180 | return getContent(context);
181 | }
182 |
183 | function setContextVal(selector) {
184 | return function (context) {
185 | return getContent(context.get(selector));
186 | };
187 | }
188 |
189 | function setContextArray(selector) {
190 | return function (context, data) {
191 | var nodes = context.find(selector),
192 | length = nodes.length - 1,
193 | i;
194 |
195 | for (i = 0; i < length; i++) {
196 | data.push(getContent(nodes[i]));
197 | }
198 |
199 | return getContent(nodes[length]);
200 | };
201 | }
202 |
203 | function setContextFunc(cb) {
204 | return function (context, data) {
205 |
206 | var val = cb(context, data),
207 | content = getContent(val);
208 |
209 | if (content !== undefined) {
210 | return content;
211 | }
212 |
213 | return val;
214 |
215 | };
216 | }
217 |
218 | function setContextSource(s) {
219 | var selector = s.replace(sourceSelectorRegexp, '');
220 |
221 | return function (context) {
222 | var node = context.get(selector);
223 |
224 | if (!node) {
225 | return;
226 | }
227 |
228 | return node.toString();
229 | };
230 | }
231 |
232 | function setContextInnerHTML(s) {
233 | var selector = s.replace(innerHTMLSelectorRegexp, '');
234 |
235 | return function (context) {
236 | var node = context.get(selector);
237 |
238 | if (!node) {
239 | return;
240 | }
241 |
242 | return node.innerHTML;
243 | };
244 | }
245 |
246 | function getContent(node) {
247 | if (!node) {
248 | return;
249 | }
250 |
251 | if (node.text !== undefined) {
252 | return node.text().trim();
253 | } else if (node.value !== undefined) {
254 | return node.value().trim();
255 | }
256 |
257 | return;
258 | }
259 |
--------------------------------------------------------------------------------
/lib/commands/stop.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Stop an Osmosis instance.
3 | *
4 | * @function stop
5 | * @memberof Command
6 | * @instance
7 | */
8 |
9 | module.exports = function () {
10 | this.instance.queue.pop();
11 | this.pause();
12 | this.instance.stopped = true;
13 | this.instance.paused = true;
14 | this.debug('stopping');
15 | };
16 |
--------------------------------------------------------------------------------
/lib/commands/submit.js:
--------------------------------------------------------------------------------
1 | /*jslint node: true */
2 | 'use strict';
3 |
4 | /**
5 | * Submit a form.
6 | *
7 | * @function submit
8 | * @memberof Command
9 | * @param {Selector} selector - A selector for a ');
97 | }
98 | } else {
99 | if (data.user == user && data.pass == pass) {
100 | res.setHeader('Set-Cookie', 'auth=true; Domain=.yahoo.com');
101 | res.write('authenticated
unauthenticated');
104 | }
105 | }
106 |
107 | res.end();
108 | });
109 |
--------------------------------------------------------------------------------
/test/paginate.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index');
2 | var server = require('./server');
3 | var URL = require('url');
4 |
5 | var url = server.host + ':' + server.port;
6 |
7 | module.exports.link = function (assert) {
8 | var count = 0;
9 |
10 | osmosis.get(url + '/paginate')
11 | .paginate('a[rel="next"]', 3)
12 | .set('page', 'div')
13 | .then(function (context, data) {
14 | var params = context.request.params;
15 | var page = (params && params.page) || 1;
16 |
17 | assert.equal(page, data.page);
18 | assert.equal(page, ++count);
19 | })
20 | .done(function () {
21 | assert.ok(count > 1);
22 | assert.done();
23 | });
24 | };
25 |
26 | module.exports.param = function (assert) {
27 | var count = 0;
28 |
29 | osmosis.get(url + '/paginate', { page: 1 })
30 | .paginate({ page: +1 }, 3)
31 | .set('page', 'div')
32 | .then(function (context, data) {
33 | var params = context.request.params;
34 | var page = (params && params.page) || 1;
35 |
36 | assert.equal(page, data.page);
37 | assert.equal(page, ++count);
38 | })
39 | .done(function () {
40 | assert.ok(count > 1);
41 | assert.done();
42 | });
43 | };
44 |
45 | module.exports.form = function (assert) {
46 | var count = 0;
47 |
48 | osmosis.get(url + '/paginate')
49 | .paginate('form', 3)
50 | .set('page', 'div')
51 | .then(function (context, data) {
52 | var params = context.request.params;
53 | var page = (params && params.page) || 1;
54 |
55 | assert.ok(page == data.page);
56 | assert.ok(page == ++count);
57 | })
58 | .done(function () {
59 | assert.ok(count > 1);
60 | assert.done();
61 | });
62 | };
63 |
64 | module.exports.func_url = function (assert) {
65 | var count = 0;
66 |
67 | osmosis.get(url + '/paginate', { page: 1 })
68 | .paginate(function (document, data) {
69 | return document.request.pathname + '?page=' +
70 | (parseInt(document.request.query.page, 10) + 1);
71 | }, 3)
72 | .set('page', 'div')
73 | .then(function (context, data) {
74 | var params = context.request.params;
75 | var page = (params && params.page) || 1;
76 |
77 | assert.equal(page, data.page);
78 | assert.equal(page, ++count);
79 | })
80 | .done(function () {
81 | assert.ok(count > 1);
82 | assert.done();
83 | });
84 | };
85 |
86 | module.exports.func_obj = function (assert) {
87 | var count = 0;
88 |
89 | osmosis.get(url + '/paginate', { page: 1 })
90 | .paginate(function (document, data) {
91 | return {
92 | page: (parseInt(document.request.query.page, 10) + 1)
93 | };
94 | }, 3)
95 | .set('page', 'div')
96 | .then(function (context, data) {
97 | var params = context.request.params;
98 | var page = (params && params.page) || 1;
99 |
100 | assert.equal(page, data.page);
101 | assert.equal(page, ++count);
102 | })
103 | .done(function () {
104 | assert.ok(count > 1);
105 | assert.done();
106 | });
107 | };
108 |
109 | server('/paginate', function (url, req, res, data) {
110 | res.setHeader("Content-Type", "text/html");
111 | var page = 1;
112 |
113 | if (data && data.page)
114 | page = data.page;
115 | else if (url.query.page)
116 | page = url.query.page;
117 | res.write('' + page + '
Next\
118 | ');
119 | res.end();
120 | });
121 |
--------------------------------------------------------------------------------
/test/parse.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | html = '';
3 |
4 | module.exports.html = function (assert) {
5 | osmosis.parse(html)
6 | .then(function (context) {
7 | assert.equal(context.find('body').length, 1);
8 | })
9 | .done(function () {
10 | assert.done();
11 | });
12 | };
13 |
14 | module.exports.base_url = function (assert) {
15 | osmosis.parse(html, { baseUrl: 'test.com' })
16 | .then(function (document) {
17 | assert.ok(document.location.href);
18 | })
19 | .done(function () {
20 | assert.done();
21 | });
22 | };
23 |
--------------------------------------------------------------------------------
/test/process_response_option.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | server = require('./server'),
3 | url = server.host + ':' + server.port;
4 |
5 |
6 | module.exports.process_response_default_none = function (assert) {
7 | test_process_response(
8 | '/response-code-200', 'hi', undefined, assert,
9 | false
10 | );
11 | };
12 |
13 | module.exports.process_response_fail_on_200 = function (assert) {
14 | test_process_response(
15 | '/response-code-200', undefined, '200-die', assert,
16 | function(d, r, n, c) { r.statusCode === 200 ? c('200-die') : n(d); }
17 | );
18 | };
19 | module.exports.process_response_fail_on_incomplete_html = function (assert) {
20 | test_process_response(
21 | '/response-code-no-body-end', undefined, 'no-body-end', assert,
22 | function(d, r, n, c) { d.toString('utf8').indexOf('hihihibut no end body');
72 | });
73 |
--------------------------------------------------------------------------------
/test/proxy.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index');
2 | var server = require('./server');
3 | var http = require('http');
4 |
5 |
6 | var url = server.host + ':' + server.port;
7 |
8 | var proxy = function (request, response) {
9 | var req = http.request(request.url + (request.url.indexOf('?') == -1 ? '?' : '&') + 'proxy=' + request.socket.localPort);
10 |
11 | req.addListener('response', function (res) {
12 | res.addListener('data', function (chunk) {
13 | response.write(chunk, 'binary');
14 | });
15 | res.addListener('end', function () {
16 | response.end();
17 | });
18 | response.writeHead(res.statusCode, res.headers);
19 | });
20 | request.addListener('data', function (chunk) {
21 | req.write(chunk, 'binary');
22 | });
23 | request.addListener('end', function () {
24 | req.end();
25 | });
26 | };
27 |
28 | var proxies = [];
29 |
30 | for (var port = 8080; port < 8090; port++) {
31 | proxies.push(http.createServer(proxy).listen(port));
32 | }
33 |
34 | module.exports.config = function (assert) {
35 | osmosis.get(url + '/proxy')
36 | .config('proxy', '127.0.0.1:8080')
37 | .then(function (context) {
38 | assert.ok(context.get('div').text() == '8080');
39 | })
40 | .done(function () {
41 | assert.done();
42 | });
43 | };
44 |
45 | module.exports.macro = function (assert) {
46 | osmosis.get(url + '/proxy')
47 | .proxy('127.0.0.1:8080')
48 | .then(function (context) {
49 | assert.ok(context.get('div').text() == '8080');
50 | })
51 | .done(function () {
52 | assert.done();
53 | });
54 | };
55 |
56 | module.exports.multiple = function (assert) {
57 | var p = [];
58 |
59 | proxies.forEach(function (proxy) {
60 | p.push('localhost:' + proxy.address().port);
61 | });
62 |
63 | osmosis.get(url + '/proxy')
64 | .config('tries', p.length)
65 | .proxy(p)
66 | .then(function (context) {
67 | assert.equal(context.get('div').text(), '8080');
68 | })
69 | .get('/proxy?err=true')
70 | .done(function () {
71 | assert.equal(p.length, 1);
72 | proxies.forEach(function (proxy) {
73 | proxy.close();
74 | });
75 | assert.done();
76 | });
77 | };
78 |
79 | server('/proxy', function (url, req, res) {
80 | if (url.query.err !== undefined) {
81 | res.writeHead(500);
82 | res.end();
83 | return;
84 | }
85 |
86 | res.setHeader("Content-Type", "text/html");
87 | res.write('' + url.query.proxy + '
');
88 | res.end();
89 | });
90 |
--------------------------------------------------------------------------------
/test/resume.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | server = require('./server'),
3 | url = server.host + ':' + server.port,
4 | pages = 50;
5 |
6 | module.exports.pause = function (assert) {
7 | var paused = false,
8 | count = 0,
9 | instance =
10 | new osmosis.get(url + '/pause')
11 | .follow('a')
12 | .then(function () {
13 | assert.ok(!paused);
14 | count++;
15 | })
16 | .done(function () {
17 | assert.equal(count, pages);
18 | assert.ok(!paused);
19 | assert.done();
20 | });
21 |
22 | instance.run();
23 |
24 | setTimeout(function () {
25 | paused = true;
26 | assert.ok(count > 0);
27 | assert.ok(count < pages);
28 | instance.pause();
29 |
30 | setTimeout(function () {
31 | paused = false;
32 | instance.resume();
33 | }, 300);
34 | }, 300);
35 | };
36 |
37 | server('/pause', function (url, req, res) {
38 | var i = 0, out = '';
39 |
40 | res.setHeader("Content-Type", "text/html");
41 |
42 | for (; i < pages; i++) {
43 | out += '';
44 | }
45 |
46 | res.write(out);
47 |
48 | setTimeout(function () {
49 | res.end();
50 | }, 50);
51 | });
52 |
--------------------------------------------------------------------------------
/test/run.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | server = require('./server'),
3 | url = server.host + ':' + server.port;
4 |
5 | var name = function () {
6 | return true;
7 | };
8 |
9 | module.exports.immediate = function (assert) {
10 | var calledThen = false;
11 |
12 | new osmosis(url + '/run')
13 | .then(function (context, data, next, done) {
14 | assert.equal(context.get('div').textContent, 'loaded');
15 | calledThen = true;
16 | next(context, data);
17 | done();
18 | })
19 | .done(function () {
20 | assert.ok(calledThen);
21 | assert.done();
22 | }).run();
23 | };
24 |
25 | module.exports.multiple = function (assert) {
26 | var count = 0, r1, r2,
27 | instance =
28 | new osmosis.get(url + '/run')
29 | .then(function () {
30 | count++;
31 | })
32 | .done(function () {
33 | if (count === 2) {
34 | assert.done();
35 | }
36 | });
37 |
38 | r1 = instance.run();
39 | r2 = instance.run();
40 | };
41 |
42 | module.exports.new_instance_command = function (assert) {
43 | var calledThen = false,
44 | calledCB = false,
45 | instance =
46 | new osmosis.get(url + '/run')
47 | .then(function (context, data, next, done) {
48 | assert.equal(context.get('div').textContent, 'loaded');
49 | calledThen = true;
50 | next(context, data);
51 | done();
52 | })
53 | .done(function () {
54 | assert.ok(calledCB);
55 | assert.ok(calledThen);
56 | assert.done();
57 | });
58 |
59 | setTimeout(function () {
60 | calledCB = true;
61 | instance.run();
62 | }, 500);
63 | };
64 |
65 | module.exports.new_instance_get = function (assert) {
66 | var calledThen = false,
67 | calledCB = false,
68 | instance =
69 | new osmosis(url + '/run')
70 | .then(function (context, data, next, done) {
71 | assert.equal(context.get('div').textContent, 'loaded');
72 | calledThen = true;
73 | next(context, data);
74 | done();
75 | })
76 | .done(function () {
77 | assert.ok(calledCB);
78 | assert.ok(calledThen);
79 | assert.done();
80 | });
81 |
82 | setTimeout(function () {
83 | calledCB = true;
84 | instance.run();
85 | }, 500);
86 | };
87 |
88 | server('/run', function (url, req, res) {
89 | res.setHeader("Content-Type", "text/html");
90 | res.write('loaded
');
91 | res.end();
92 | });
93 |
--------------------------------------------------------------------------------
/test/save.js:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rchipka/node-osmosis/baed7239fc5c22ea8d00a5d2dc45f97b2d64b5c5/test/save.js
--------------------------------------------------------------------------------
/test/server/index.js:
--------------------------------------------------------------------------------
1 | var http = require('http'),
2 | URL = require("url"),
3 | qs = require("querystring"),
4 | host = 'localhost',
5 | port = 1337,
6 | paths = {},
7 | server;
8 |
9 | server = http.createServer(function (req, res) {
10 | var url = URL.parse(req.url, true),
11 | uri = decodeURIComponent(url.pathname),
12 | postData = '';
13 |
14 | if (paths[uri] !== undefined) {
15 | if (req.method === 'POST') {
16 | req.on('data', function (chunk) {
17 | postData += chunk.toString();
18 | });
19 | req.on('end', function () {
20 | if (!req.headers['content-type'] || req.headers['content-type'].indexOf('multipart') !== 0)
21 | postData = qs.parse(postData);
22 | paths[uri](url, req, res, postData);
23 | });
24 | } else {
25 | paths[uri](url, req, res);
26 | }
27 | } else {
28 | res.writeHead(404);
29 | res.end();
30 | }
31 | });
32 |
33 | server.on('error', function () {
34 | console.log("ERROR:", error);
35 | });
36 |
37 | server.listen(port);
38 |
39 | module.exports = function (path, cb) {
40 | if (paths[path]) {
41 | throw new Error("Path " + path + " exists");
42 | }
43 |
44 | paths[path] = cb;
45 | };
46 |
47 | module.exports.host = host;
48 | module.exports.port = port;
49 | module.exports.close = function () {
50 | server.close();
51 | };
52 |
--------------------------------------------------------------------------------
/test/set.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | server = require('./server'),
3 | URL = require('url'),
4 | fs = require('fs'),
5 | expected = {
6 | title: "TITLE",
7 | content: "CONTENT",
8 | innerHTML: 'TITLE',
10 | source: 'TITLE',
11 | object: {
12 | id: 'content'
13 | },
14 | array:
15 | ['TITLE',
16 | { first_link: '/1' },
17 | 'TITLE',
18 | 'TITLE',
19 | { all_links: ['/1', '/2'] },
20 | { title: 'TITLE' }],
21 | find: 'CONTENT',
22 | find_arr: ['/1', '/2'],
23 | get: {
24 | title: "1"
25 | },
26 | follow: [
27 | { title: "1" },
28 | { title: "2" }
29 | ],
30 | follow_array: [
31 | "/1",
32 | "/2",
33 | { title: "1" },
34 | { title: "2" }
35 | ],
36 | get_follow: [
37 | { page: "2",
38 | title: "1" },
39 | { page: "3",
40 | title: "1" }
41 | ],
42 | get_nested_follow: {
43 | pages: [
44 | { page: "2" },
45 | { page: "3" }],
46 | title: "1"
47 | },
48 | then: { called: true },
49 | then_multiple: [1, 2, 3],
50 | then_none: {},
51 | //then_none_done: [{}, {}],
52 | then_new_context: 'TITLE',
53 | then_new_data: [1, 2, 3]
54 | },
55 | expected_array_root = [
56 | '/1',
57 | '/2',
58 | { href: '/1', name: '1' },
59 | { href: '/2', name: '2' },
60 | [[['/1']]]
61 | ],
62 | expected_callbacks = {
63 | links: [
64 | { url: '/1', link: 1 },
65 | { url: '/2', link: 2 }
66 | ],
67 | page2: { title: 2 }
68 | },
69 | url = server.host + ':' + server.port;
70 |
71 | module.exports.array_root = function (assert) {
72 | var calledThen = false, calledData = false;
73 |
74 | osmosis.get(url + '/set')
75 | .set([
76 | 'a@href',
77 | osmosis.find('a').set('name').set('href', '@href'),
78 | [[['a:first@href']]]
79 | ])
80 | .then(function (context, data) {
81 | calledThen = true;
82 | assert.ok(Array.isArray(data));
83 | })
84 | .data(function (data) {
85 | calledData = true;
86 | assert.deepEqual(data, expected_array_root);
87 | })
88 | .done(function () {
89 | assert.ok(calledThen);
90 | assert.ok(calledData);
91 | assert.done();
92 | });
93 | };
94 |
95 | module.exports.callbacks = function (assert) {
96 | var calledThen = false, calledData = false;
97 |
98 | osmosis.get(url + '/set')
99 | .set({
100 | links: osmosis.find('a')
101 | .set('link', function (link) {
102 | return parseInt(link.innerHTML);
103 | })
104 | .set('url', function (link) {
105 | return link.getAttribute("href");
106 | }),
107 | page2: osmosis.get(function (doc) {
108 | return doc.querySelector('a:last');
109 | }).set('title', 'title')
110 | })
111 | .then(function () {
112 | calledThen = true;
113 | })
114 | .data(function (data) {
115 | calledData = true;
116 | assert.deepEqual(data, expected_callbacks);
117 | })
118 | .done(function () {
119 | assert.ok(calledThen);
120 | assert.ok(calledData);
121 | assert.done();
122 | });
123 | };
124 |
125 | module.exports.nested = function (assert) {
126 | var calledThen = false, calledData = false;
127 |
128 | osmosis.get(url + '/set')
129 | .set({
130 | title: 'title',
131 | content: '#content',
132 | fake: 'fake-selector',
133 | innerHTML: 'head:html',
134 | source: 'title:source',
135 | object: {
136 | id: 'div@id',
137 | fake: 'fake-selector'
138 | },
139 | array: [
140 | 'title',
141 | { first_link: 'a:first@href' },
142 | osmosis.find('title'),
143 | osmosis.then(function (context, data, next) {
144 | next(context.get('title'), data);
145 | }),
146 | { all_links: ['a@href'] },
147 | osmosis.find('title').set('title')
148 | ],
149 | find: osmosis.find('div'),
150 | find_arr: osmosis.find('a@href'),
151 | get: osmosis.get('/1').set({ title: 'title' }),
152 | get_fail: osmosis.get('/notfound').set({ title: 'title' }),
153 | follow: osmosis.follow('a').set({ title: 'title' }),
154 | follow_fail: osmosis.follow('fake-selector').set({ title: 'title' }),
155 | follow_array: [
156 | 'a@href',
157 | 'fake-selector',
158 | osmosis.follow('a').set({ title: 'title' })
159 | ],
160 | get_follow:
161 | osmosis('/1')
162 | .set({ title: 'title' })
163 | .follow('a')
164 | .set({ page: 'title' }),
165 | get_nested_follow: osmosis.get('/1').set({
166 | title: 'title',
167 | pages: osmosis.follow('a').set({ page: 'title' })
168 | }),
169 | then: osmosis.then(function (context, data, next) {
170 | data.called = true;
171 | next(context, data);
172 | }),
173 | then_multiple: osmosis.then(function (context, data, next, done) {
174 | var i = 1;
175 |
176 | data.called = true;
177 |
178 | for (; i <= 3; i++) {
179 | next(context, i);
180 | }
181 |
182 | done();
183 | }),
184 | then_new_data: osmosis.then(function (context, data, next) {
185 | next(context, [1, 2, 3]);
186 | }),
187 | then_new_context: osmosis.then(function (context, data, next) {
188 | next(context.get('title'), data);
189 | }),
190 | then_none: osmosis.then(function () {
191 | })
192 | /*then_done_none: osmosis.then(function(context, data, next, done) {
193 | setTimeout(function() {
194 | next(context, data);
195 | setTimeout(function() {
196 | next(context, data);
197 | done();
198 | }, 200);
199 | }, 350)
200 | }),*/
201 | })
202 | .then(function (context, data, next) {
203 | calledThen = true;
204 | assert.equal(context, context.doc());
205 | next(context, data);
206 | })
207 | .data(function (data) {
208 | calledData = true;
209 | assert.deepEqual(data, expected);
210 | })
211 | .done(function () {
212 | assert.ok(calledThen);
213 | assert.ok(calledData);
214 | assert.done();
215 | });
216 | };
217 |
218 | server('/set', function (url, req, res) {
219 | res.setHeader("Content-Type", "text/html");
220 | res.write('TITLE' +
221 | 'CONTENT
' +
222 | '12');
223 | res.end();
224 | });
225 |
226 | server('/1', function (url, req, res) {
227 | res.setHeader("Content-Type", "text/html");
228 | res.write('1' +
229 | '');
230 | setTimeout(function () {
231 | res.end();
232 | }, 500);
233 | });
234 |
235 | server('/2', function (url, req, res) {
236 | res.setHeader("Content-Type", "text/html");
237 | res.write('2');
238 | setTimeout(function () {
239 | res.end();
240 | }, 250);
241 | });
242 |
243 | server('/3', function (url, req, res) {
244 | res.setHeader("Content-Type", "text/html");
245 | res.write('3');
246 | res.end();
247 | });
248 |
--------------------------------------------------------------------------------
/test/stop.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | server = require('./server'),
3 | url = server.host + ':' + server.port;
4 |
5 | module.exports.stop = function (assert) {
6 | var error = false, count = 0, instance =
7 | osmosis.get(url + '/delay-response')
8 | .follow('a')
9 | .follow('a')
10 | .follow('a')
11 | .log(function (msg) {
12 | if (msg.indexOf('loaded') > -1) {
13 | if (++count === 2) {
14 | instance.stop();
15 | }
16 | }
17 | })
18 | .then(function () {
19 | error = true;
20 | })
21 | .done(function () {
22 | assert.equal(count, 2);
23 | assert.equal(error, false);
24 | assert.ok(true);
25 | assert.done();
26 | });
27 | };
28 |
29 |
30 | server('/delay-response', function (url, req, res) {
31 | res.setHeader("Content-Type", "text/html");
32 | res.end('');
33 | });
34 |
--------------------------------------------------------------------------------
/test/submit.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index');
2 | var server = require('./server');
3 | var fs = require('fs');
4 | var URL = require('url');
5 |
6 | var url = server.host + ':' + server.port;
7 |
8 | /*
9 | * TODO: Add radio button tests
10 | * Add input[name] case-insensitivity tests
11 | */
12 |
13 | module.exports.form1 = function (assert) {
14 | var calledThen = false;
15 |
16 | osmosis.get(url + '/submit-form')
17 | .submit('form')
18 | .then(function (context) {
19 | calledThen = true;
20 | assert.deepEqual(JSON.parse(context.get('#data').text()), getInputs(1, 'sub1'));
21 | })
22 | .done(function () {
23 | assert.ok(calledThen);
24 | assert.done();
25 | });
26 | };
27 |
28 | module.exports.form2 = function (assert) {
29 | var calledThen = false;
30 |
31 | osmosis.get(url + '/submit-form')
32 | .submit('form[2]')
33 | .then(function (context) {
34 | calledThen = true;
35 | assert.deepEqual(JSON.parse(context.get('#data').text()), getInputs(2, 'sub1'));
36 | })
37 | .done(function () {
38 | assert.ok(calledThen);
39 | assert.done();
40 | });
41 | };
42 |
43 | module.exports.button = function (assert) {
44 | var calledThen = false;
45 |
46 | osmosis.get(url + '/submit-form')
47 | .submit('form:first [name="sub2"]')
48 | .then(function (context) {
49 | calledThen = true;
50 | assert.deepEqual(JSON.parse(context.get('#data').text()), getInputs(1, 'sub2'));
51 | })
52 | .done(function () {
53 | assert.ok(calledThen);
54 | assert.done();
55 | });
56 | };
57 |
58 | module.exports.form_attr = function (assert) {
59 | var calledThen = false;
60 | var inputs = getInputs(1);
61 |
62 | inputs['sub2'] = 'Submit Query';
63 | osmosis.get(url + '/submit-form')
64 | .submit('form[2] [name="sub2"]')
65 | .then(function (context) {
66 | calledThen = true;
67 | assert.deepEqual(JSON.parse(context.get('#data').text()), inputs);
68 | })
69 | .done(function () {
70 | assert.ok(calledThen);
71 | assert.done();
72 | });
73 | };
74 |
75 | module.exports.context_data = function (assert) {
76 | var calledThen = false;
77 | var inputs = getInputs(2, 'sub1');
78 |
79 | inputs['it1'] = 'success';
80 | osmosis.get(url + '/submit-form')
81 | .submit('form[2]', function(context) {
82 | return {it1: context.get('#dynamic-data').text()};
83 | })
84 | .then(function (context) {
85 | calledThen = true;
86 | assert.deepEqual(JSON.parse(context.get('#data').text()), inputs);
87 | })
88 | .done(function () {
89 | assert.ok(calledThen);
90 | assert.done();
91 | });
92 | };
93 |
94 | module.exports.multipart = function (assert) {
95 | var calledThen = false;
96 |
97 | osmosis.get(url + '/submit-form')
98 | .submit('form[2] [name="sub3"]', { image: { file: __dirname + '/submit.js', content_type: 'application/javascript' } })
99 | .then(function (context) {
100 | calledThen = true;
101 | assert.equal(context.get('div').text(), 'success');
102 | })
103 | .done(function () {
104 | assert.ok(calledThen);
105 | assert.done();
106 | });
107 | };
108 |
109 | function getInputs(form, submit) {
110 | var obj = {},
111 | input,
112 | exclude = exclude || [];
113 |
114 | inputs = (form === 2) ?
115 | inputs2 :
116 | inputs1;
117 |
118 | for (input in inputs) {
119 | if (input.substr(0, 3) === 'sub' && input !== submit) {
120 | continue;
121 | }
122 |
123 | if (inputs[input].value === undefined) {
124 | continue;
125 | }
126 |
127 | obj[input] = inputs[input].value;
128 | }
129 |
130 | return obj;
131 | }
132 |
133 | var inputs1 = {
134 | 's1': {
135 | html: '',
136 | value: '2'
137 | },
138 | 's2': {
139 | html: '',
140 | value: 'two'
141 | },
142 | 'cb1': {
143 | html: '',
144 | value: undefined
145 | },
146 | 'cb2': {
147 | html: '',
148 | value: 'two'
149 | },
150 | 'cb3[0]': {
151 | html: '',
152 | value: 'one'
153 | },
154 | 'cb3[1]': {
155 | html: '',
156 | value: 'on'
157 | },
158 | 'cb3[2]': {
159 | html: '',
160 | value: 'on'
161 | },
162 | 'it': {
163 | html: '',
164 | value: undefined
165 | },
166 | 'ta': {
167 | html: '',
168 | value: 'text area test'
169 | },
170 | 'sub1': {
171 | html: '',
172 | value: 'submit'
173 | },
174 | 'sub2': {
175 | html: '',
176 | value: 'Submit 2'
177 | }
178 | };
179 |
180 | var inputs2 = {
181 | 'it1': {
182 | html: '',
183 | value: 'test'
184 | },
185 | 'sub2': {
186 | html: '',
187 | value: 'Submit Query'
188 | },
189 | 'sub1': {
190 | html: '',
191 | value: 'button'
192 | },
193 | 'sub3': {
194 | html: '',
195 | value: '3'
196 | }
197 | };
198 |
199 | server('/submit-form', function (url, req, res, data) {
200 | res.setHeader("Content-Type", "text/html");
201 | var out = '';
202 |
203 | if (data || Object.keys(url.query).length !== 0) {
204 | out += '' + url.href + '
';
205 | out += '' + req.method + '
';
206 | out += '' + JSON.stringify(data || url.query) + '
';
207 | } else {
208 | out += '';
215 | out += '';
222 | out += 'success
';
223 | }
224 |
225 | res.end(out);
226 | });
227 |
228 | server('/form-multipart', function (url, req, res, data) {
229 | res.setHeader("Content-Type", "text/html");
230 | var out = 'success';
231 |
232 | if (req.method !== 'POST')
233 | out = req.method;
234 | else if (req.headers['content-type'].indexOf('multipart/form-data') !== 0)
235 | out = JSON.stringify(req.headers);
236 | else if (!data)
237 | out = 'no data';
238 | else if (data.toString().indexOf('Content-Disposition: form-data') === -1)
239 | out = data;
240 | res.end('' + out + '
');
241 | });
242 |
--------------------------------------------------------------------------------
/test/then.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | html = '123';
3 |
4 | module.exports.two_args = function (assert) {
5 | var count = 0,
6 | calledThen = false,
7 | calledDone = false;
8 |
9 | osmosis.parse(html)
10 | .find('b')
11 | .then(function (context) {
12 | assert.equal(++count, context.text());
13 | calledThen = true;
14 | })
15 | .data(function () {
16 | calledData = true;
17 | })
18 | .done(function () {
19 | assert.equal(count, 3);
20 | assert.ok(calledThen);
21 | assert.ok(calledData);
22 | assert.done();
23 | });
24 | };
25 |
26 | module.exports.three_args = function (assert) {
27 | var count = 0,
28 | calledThen1 = false,
29 | calledThen2 = false,
30 | calledDone = false,
31 | i;
32 |
33 | osmosis.parse(html)
34 | .find('b')
35 | .then(function (context, data, next) {
36 | assert.equal(++count, context.text());
37 | setTimeout(function () {
38 | next(context, data);
39 | }, 200);
40 | calledThen1 = true;
41 | })
42 | .then(function (context, data, next) {
43 | for (i = 0; i < 3; i++) {
44 | next(context, data);
45 | }
46 |
47 | calledThen2 = true;
48 | })
49 | .data(function () {
50 | calledData = true;
51 | })
52 | .done(function () {
53 | assert.equal(count, 3);
54 | assert.ok(calledThen1);
55 | assert.ok(calledThen2);
56 | assert.ok(calledData);
57 | assert.done();
58 | });
59 | };
60 |
61 | module.exports.four_args = function (assert) {
62 | var count = 0,
63 | calledThen = false,
64 | calledDone = false,
65 | i;
66 |
67 | osmosis.parse(html)
68 | .find('b')
69 | .then(function (context, data, next, done) {
70 | assert.equal(++count, context.text());
71 |
72 | for (i = 0; i < 3; i++) {
73 | setTimeout(function (last) {
74 | next(context, data);
75 |
76 | if (last) {
77 | done();
78 | }
79 | }, i * 200, i == 2);
80 | }
81 |
82 | calledThen = true;
83 | })
84 | .data(function () {
85 | calledData = true;
86 | })
87 | .done(function () {
88 | assert.equal(count, 3);
89 | assert.ok(calledThen);
90 | assert.ok(calledData);
91 | assert.done();
92 | });
93 | };
94 |
95 | module.exports.document = function (assert) {
96 | osmosis.parse(html)
97 | .then(function (document) {
98 | assert.ok(document.documentElement);
99 | })
100 | .done(function () {
101 | assert.done();
102 | });
103 | };
104 |
105 | module.exports.window = function (assert) {
106 | osmosis.parse(html)
107 | .then(function (window) {
108 | assert.ok(window.window);
109 | })
110 | .done(function () {
111 | assert.done();
112 | });
113 | };
114 |
115 | /*
116 | module.exports.jquery = function(assert) {
117 | osmosis.parse(html)
118 | .then(function($, data) {
119 | assert.ok(typeof $ === "function")
120 | })
121 | .done(function() {
122 | assert.done();
123 | })
124 | }
125 | */
126 |
--------------------------------------------------------------------------------
/test/user_agent_option.js:
--------------------------------------------------------------------------------
1 | var osmosis = require('../index'),
2 | server = require('./server'),
3 | url = server.host + ':' + server.port;
4 |
5 | module.exports.user_agent_as_function = function (assert) {
6 | var testUserAgent = function () {return 'UserAgent As Function';};
7 | test_user_agent(testUserAgent, testUserAgent(), assert);
8 | };
9 |
10 | module.exports.user_agent_as_string = function (assert) {
11 | var testUserAgent = 'UserAgent As String';
12 | test_user_agent(testUserAgent, testUserAgent, assert);
13 | };
14 | function test_user_agent(ua_req, ua_test, assert) {
15 | var ua_via_response = false;
16 | osmosis
17 | .get(url + '/return-user-agent')
18 | .config({user_agent: ua_req})
19 | .find('b')
20 | .then(function (context) {
21 | ua_via_response = context.textContent;
22 | })
23 | .done(function () {
24 | assert.equal(ua_test, ua_via_response);
25 | assert.ok(true);
26 | assert.done();
27 | });
28 | }
29 |
30 | server('/return-user-agent', function (url, req, res) {
31 | res.end('' + req.headers["user-agent"] + '');
32 | });
33 |
--------------------------------------------------------------------------------
/test/z_close.js:
--------------------------------------------------------------------------------
1 | var server = require('./server');
2 |
3 | module.exports.server = function(assert) {
4 | server.close();
5 | assert.done();
6 | }
7 |
--------------------------------------------------------------------------------