├── package.json ├── spawn.js ├── LICENSE ├── useragents.js ├── errors.js ├── README.md └── index.js /package.json: -------------------------------------------------------------------------------- 1 | { "name" : "curlrequest", 2 | "description" : "A curl wrapper for node", 3 | "version" : "1.0.1", 4 | "homepage" : "https://github.com/node-js-libs/curlrequest", 5 | "author" : "Chris O'Hara ", 6 | "main" : "index", 7 | "repository": { 8 | "type": "git", 9 | "url": "http://github.com/node-js-libs/curlrequest.git" 10 | }, 11 | "engines": { "node": ">= 0.4.0" }, 12 | "licenses": [{ 13 | "type": "MIT", 14 | "url": "http://github.com/node-js-libs/curlrequest/raw/master/LICENSE" 15 | }] 16 | } 17 | -------------------------------------------------------------------------------- /spawn.js: -------------------------------------------------------------------------------- 1 | var child = require('child_process'); 2 | 3 | /** 4 | * Limit the amount of processes that can be spawned per tick. 5 | */ 6 | 7 | var spawned = 0 8 | , max_per_tick = 10 9 | , resetting = false; 10 | 11 | /** 12 | * See `child_process.spawn()`. 13 | */ 14 | 15 | module.exports = function (cmd, args, options, callback) { 16 | var args = Array.prototype.slice.call(arguments); 17 | if (spawned < max_per_tick) { 18 | spawned++; 19 | callback(child.spawn.apply(child, args.slice(0, -1))); 20 | } else { 21 | if (!resetting) { 22 | resetting = true; 23 | process.nextTick(function () { 24 | spawned = 0; 25 | resetting = false; 26 | }); 27 | } 28 | process.nextTick(function () { 29 | module.exports.apply(null, args); 30 | }); 31 | } 32 | }; 33 | 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Chris O'Hara 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /useragents.js: -------------------------------------------------------------------------------- 1 | var userAgents = [ 2 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.905.0 Safari/535.7', 3 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)', 4 | 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)', 5 | 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 6.0)', 6 | 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13', 7 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6', 8 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)', 9 | 'Opera/9.20 (Windows NT 6.0; U; en)', 10 | 'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru; rv:1.9.2) Gecko/20100115 Firefox/3.6', 11 | 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; MS-RTC LM 8)', 12 | 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0', 13 | 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7; en-us) AppleWebKit/533.4 (KHTML, like Gecko) Version/4.1 Safari/533.4', 14 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.22 (KHTML, like Gecko) Chrome/11.0.683.0 Safari/534.22', 15 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', 16 | 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.12 Safari/535.11', 17 | 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2', 18 | 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.809.0 Safari/535.1', 19 | 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:5.0) Gecko/20110619 Firefox/5.0', 20 | 'Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20110814 Firefox/6.0', 21 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0', 22 | 'Mozilla/5.0 (Windows; U; Windows NT 6.1; ru; rv:1.9.2.3) Gecko/20100401 Firefox/4.0 (.NET CLR 3.5.30729)', 23 | 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)', 24 | 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0', 25 | 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/11.0.696.57)', 26 | 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 27 | 'Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00', 28 | 'Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00' 29 | ]; 30 | 31 | module.exports = userAgents; 32 | -------------------------------------------------------------------------------- /errors.js: -------------------------------------------------------------------------------- 1 | module.exports = [ 2 | null, 3 | "Unsupported protocol. This build of curl has no support for this protocol.", 4 | "Failed to initialize.", 5 | "URL malformed. The syntax was not correct.", 6 | null, 7 | "Couldn't resolve proxy. The given proxy host could not be resolved.", 8 | "Couldn't resolve host. The given remote host was not resolved.", 9 | "Failed to connect to host.", 10 | "FTP weird server reply. The server sent data curl couldn't parse.", 11 | "FTP access denied. The server denied login or denied access to the particular resource or directory you wanted to reach. Most often you tried to change to a directory that doesn't exist on the server.", 12 | null, 13 | "FTP weird PASS reply. Curl couldn't parse the reply sent to the PASS request.", 14 | null, 15 | "FTP weird PASV reply, Curl couldn't parse the reply sent to the PASV request.", 16 | "FTP weird 227 format. Curl couldn't parse the 227-line the server sent.", 17 | "FTP can't get host. Couldn't resolve the host IP we got in the 227-line.", 18 | null, 19 | "FTP couldn't set binary. Couldn't change transfer method to binary.", 20 | "Partial file. Only a part of the file was transferred.", 21 | "FTP couldn't download/access the given file, the RETR (or similar) command failed.", 22 | null, 23 | "FTP quote error. A quote command returned error from the server.", 24 | "HTTP page not retrieved. The requested url was not found or returned another error with the HTTP error code being 400 or above. This return code only appears if -f/--fail is used.", 25 | "Write error. Curl couldn't write data to a local filesystem or similar.", 26 | null, 27 | "FTP couldn't STOR file. The server denied the STOR operation, used for FTP uploading.", 28 | "Read error. Various reading problems.", 29 | "Out of memory. A memory allocation request failed.", 30 | "Operation timeout. The specified time-out period was reached according to the conditions.", 31 | null, 32 | "FTP PORT failed. The PORT command failed. Not all FTP servers support the PORT command, try doing a transfer using PASV instead!", 33 | "FTP couldn't use REST. The REST command failed. This command is used for resumed FTP transfers.", 34 | null, 35 | "HTTP range error. The range command didn't work.", 36 | "HTTP post error. Internal post-request generation error.", 37 | "SSL connect error. The SSL handshaking failed.", 38 | "FTP bad download resume. Couldn't continue an earlier aborted download.", 39 | "FILE couldn't read file. Failed to open the file. Permissions?", 40 | "LDAP cannot bind. LDAP bind operation failed.", 41 | "LDAP search failed.", 42 | null, 43 | "Function not found. A required LDAP function was not found.", 44 | "Aborted by callback. An application told curl to abort the operation.", 45 | "Internal error. A function was called with a bad parameter.", 46 | null, 47 | "Interface error. A specified outgoing interface could not be used.", 48 | null, 49 | "Too many redirects. When following redirects, curl hit the maximum amount.", 50 | "Unknown TELNET option specified.", 51 | "Malformed telnet option.", 52 | null, 53 | "The peer's SSL certificate or SSH MD5 fingerprint was not ok.", 54 | "The server didn't reply anything, which here is considered an error.", 55 | "SSL crypto engine not found.", 56 | "Cannot set SSL crypto engine as default.", 57 | "Failed sending network data.", 58 | "Failure in receiving network data.", 59 | null, 60 | "Problem with the local certificate.", 61 | "Couldn't use specified SSL cipher.", 62 | "Peer certificate cannot be authenticated with known CA certificates.", 63 | "Unrecognized transfer encoding.", 64 | "Invalid LDAP URL.", 65 | "Maximum file size exceeded.", 66 | "Requested FTP SSL level failed.", 67 | "Sending the data requires a rewind that failed.", 68 | "Failed to initialise SSL Engine.", 69 | "The user name, password, or similar was not accepted and curl failed to log in.", 70 | "File not found on TFTP server.", 71 | "Permission problem on TFTP server.", 72 | "Out of disk space on TFTP server.", 73 | "Illegal TFTP operation.", 74 | "Unknown TFTP transfer ID.", 75 | "File already exists (TFTP).", 76 | "No such user (TFTP).", 77 | "Character conversion failed.", 78 | "Character conversion functions required.", 79 | "Problem with reading the SSL CA cert (path access rights).", 80 | "The resource referenced in the URL does not exist.", 81 | "An unspecified error occurred during the SSH session.", 82 | "Failed to shut down the SSL connection.", 83 | null, 84 | "Could not load CRL file, missing or wrong format.", 85 | "Issuer check failed." 86 | ]; 87 | 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **curlrequest** is a node wrapper for the command line **curl(1)** 2 | 3 | ```bash 4 | $ npm install curlrequest 5 | ``` 6 | 7 | ## Why? 8 | 9 | Curl has significantly more features than any of the node.js request 10 | libraries. A lot of the logic that you would build on top of a node.js 11 | request library (rate limiting, URL globbing, uploading/downloading 12 | file, better proxy support) already exists in curl 13 | 14 | - Curl is mature, stable, and fast 15 | - Separate requests/processes take advantage of all CPUs 16 | - Spawning processes is relatively cheap and non-blocking 17 | - Better control over connect, request, and retry timeouts. If a request has hung just kill the process 18 | 19 | Note: don't use this if you need to stream the response - use 20 | [mikeal/request](https://github.com/mikeal/request) instead 21 | 22 | ## Usage 23 | 24 | Make a request with curl - callback receives `(err, stdout)` on request 25 | completion 26 | 27 | ```javascript 28 | var curl = require('curlrequest'); 29 | 30 | curl.request(options, callback); 31 | ``` 32 | 33 | Note that you can also call `curl.request(url, callback)` which is 34 | shorthand for `curl.request({ url: url }, callback)`. 35 | 36 | To setup default options and return a function that can be used later 37 | 38 | ```javascript 39 | var request = curl.request(default_options); 40 | 41 | request([options ,] callback); 42 | ``` 43 | 44 | ## Options 45 | 46 | `url` 47 | 48 | The request url. 49 | 50 | `method` - *default: GET* 51 | 52 | The request method. 53 | 54 | `encoding` - *default: utf8* 55 | 56 | Encode the response body as either `utf` or `ascii`. Set to `null` return a 57 | buffer. 58 | 59 | `headers` - *default: {}* 60 | 61 | Set request headers, e.g. `headers: { accept: 'text/*' }` 62 | 63 | `data` - *default: false* 64 | 65 | An object containing data to urlencode and then POST. 66 | 67 | `useragent` - *default: * 68 | 69 | Set the request user-agent. 70 | 71 | `location` - *default: true* 72 | 73 | Whether to follow 30x redirects or not. 74 | 75 | `redirects` - *default: 3* 76 | 77 | The maximum amount of redirects to follow before failing with error "retries". 78 | 79 | `retries` - *default: 0* 80 | 81 | How many times to retry the request in the case of failure. 82 | 83 | `timeout` - *default: false* 84 | 85 | The maximum amount of seconds the request can take before failing with 86 | error "timeout". 87 | 88 | `scope` - *default: {}* 89 | 90 | The scope to call the callback in. 91 | 92 | `require` - *default: null* 93 | 94 | Pass a string or regular expression to search for in the response body. If 95 | there's no match, fail the request with "required string not found". You 96 | can also pass an array of strings / regexps to search for where only one 97 | has to match. 98 | 99 | `require_not` 100 | 101 | The inverse of `require` - fail if the response contains a string. 102 | 103 | `process` - *default: false* 104 | 105 | Pass in a function which modifies the response body before sending it to 106 | the callback. Useful if you need to modify the response in some way before 107 | a higher level library has the chance to modify it. 108 | 109 | `file` - *default: false* 110 | 111 | Open a file and process it like a request response, useful if using 112 | temporary files. 113 | 114 | `stderr` - *default: false* 115 | 116 | Pipe the stderr of each curl process to the main process. Set this to a 117 | string to write stderr to a file. 118 | 119 | `pretend` - *default: false* 120 | 121 | Useful if you want to see what curl command is to be executed without actually 122 | making the request. 123 | 124 | `fail` - *default: false* 125 | 126 | When set to true, a failing response body will be returned as the first 127 | parameter of the callback. 128 | 129 | ```javascript 130 | curl.request({ url: 'http://google.com', pretend: true }, function (err, stdout, meta) { 131 | console.log('%s %s', meta.cmd, meta.args.join(' ')); 132 | }); 133 | ``` 134 | 135 | `curl_path` - *default: 'curl'* 136 | 137 | Use this to specify an alternative path for curl. 138 | 139 | ### Passing options directly to curl 140 | 141 | Any additional options are sent as command line options to curl. See `man 142 | curl` or `curl --manual` for a detailed description of options and usage. 143 | 144 | **Example 1**. Include response headers in the output 145 | 146 | ```javascript 147 | var options = { url: 'google.com', include: true }; 148 | 149 | curl.request(options, function (err, parts) { 150 | parts = parts.split('\r\n'); 151 | var data = parts.pop() 152 | , head = parts.pop(); 153 | }); 154 | ``` 155 | 156 | **Example 2**. Limit the download speed of a transfer 157 | 158 | ```javascript 159 | var options = { 160 | url: 'example.com/some/large/file.zip' 161 | , 'limit-rate': '500k' 162 | , encoding: null 163 | }; 164 | 165 | curl.request(options, function (err, file) { 166 | //file is a Buffer 167 | }); 168 | ``` 169 | 170 | **Example 3**. See what's going on under the hood 171 | 172 | ```javascript 173 | var options = { 174 | url: 'google.com' 175 | , verbose: true 176 | , stderr: true 177 | }; 178 | 179 | curl.request(options, function (err, data) { 180 | //.. 181 | }); 182 | ``` 183 | 184 | ## License 185 | 186 | Copyright (c) 2016 Chris O'Hara 187 | 188 | Permission is hereby granted, free of charge, to any person obtaining 189 | a copy of this software and associated documentation files (the 190 | "Software"), to deal in the Software without restriction, including 191 | without limitation the rights to use, copy, modify, merge, publish, 192 | distribute, sublicense, and/or sell copies of the Software, and to 193 | permit persons to whom the Software is furnished to do so, subject to 194 | the following conditions: 195 | 196 | The above copyright notice and this permission notice shall be 197 | included in all copies or substantial portions of the Software. 198 | 199 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 200 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 202 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 203 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 204 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 205 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 206 | 207 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var util = require('util') 2 | , fs = require('fs') 3 | , spawn = require('./spawn') 4 | , errors = require('./errors') 5 | , cwd = process.cwd(); 6 | 7 | /** 8 | * Make some curl opts friendlier. 9 | */ 10 | 11 | var curl_map = { 12 | timeout: 'max-time' 13 | , redirects: 'max-redirs' 14 | , method: 'request' 15 | , useragent: 'user-agent' 16 | }; 17 | 18 | /** 19 | * Default user-agents. 20 | */ 21 | 22 | var user_agents = require('./useragents.js') 23 | , user_agents_len = user_agents.length; 24 | 25 | /** 26 | * Default request headers. 27 | */ 28 | 29 | var default_headers = { 30 | 'Accept': '*/*' 31 | , 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3' 32 | , 'Accept-Language': 'en-US,en;q=0.8' 33 | }; 34 | 35 | /** 36 | * Make a request with cURL. 37 | * 38 | * @param {Object|String} options (optional) - see `man curl` 39 | * @param {Function} callback (optional) 40 | * @api public 41 | */ 42 | 43 | exports.request = function (options, callback) { 44 | if (arguments.length === 1) { 45 | var defaults = options; 46 | return function (options, callback) { 47 | if (typeof options === 'function') { 48 | callback = options; 49 | options = {}; 50 | } else if (typeof options === 'string') { 51 | options = { url: options }; 52 | } 53 | for (var key in defaults) { 54 | if (typeof options[key] === 'undefined') { 55 | options[key] = defaults[key]; 56 | } 57 | } 58 | exports.request.call(this, options, callback); 59 | }; 60 | } 61 | 62 | if (options.retries) { 63 | var remaining = options.retries; 64 | delete options.retries; 65 | return (function curl() { 66 | exports.request(options, function (err) { 67 | if (!err || !--remaining) { 68 | return callback.apply(this, arguments); 69 | } 70 | process.nextTick(curl); 71 | }); 72 | })(); 73 | } 74 | 75 | if (typeof options === 'string') { 76 | options = { url: options }; 77 | } else { 78 | options = exports.copy(options); 79 | } 80 | 81 | for (var key in curl_map) { 82 | if (typeof options[key] !== 'undefined') { 83 | options[curl_map[key]] = options[key]; 84 | delete options[key]; 85 | } 86 | } 87 | 88 | var curl 89 | , curl_path = 'curl' 90 | , args = ['--silent', '--show-error', '--no-buffer', '--globoff'] 91 | , start = new Date 92 | , err 93 | , stderr = '' 94 | , stdoutlen 95 | , stdout = new Buffer(stdoutlen = 0) 96 | , encoding 97 | , complete 98 | , cleanup 99 | , postprocess 100 | , require_str 101 | , require_not_str 102 | , scope = {} 103 | , cmd = 'curl' 104 | , timeout; 105 | 106 | function finish() { 107 | if (options.fail && stderr) { 108 | err = String(stderr).replace(/^curl: \(\d+\) /, ''); // "curl: (22) The requested URL returned error..." => "The requested URL returned error..." 109 | } else if (err in errors) { 110 | err = errors[err]; 111 | } 112 | callback.call(scope, err, stdout, { 113 | cmd: cmd 114 | , args: args 115 | , time: (new Date().getTime() - start.getTime()) 116 | }); 117 | complete = true; 118 | } 119 | 120 | //Allow for a custom curl path 121 | if (options.curl_path) { 122 | curl_path = options.curl_path; 123 | delete options.curl_path; 124 | cmd = curl_path 125 | } 126 | 127 | //Follow location by default 128 | if ('max-redirs' in options) { 129 | options.location = !!options['max-redirs']; 130 | } else { 131 | options.location = true; 132 | options['max-redirs'] = 3; 133 | } 134 | 135 | //Add an additional setTimeout for max-time 136 | if (options['max-time']) { 137 | timeout = setTimeout(function () { 138 | if (complete) return; 139 | stderr = 'timeout', stdout = null; 140 | finish(); 141 | if (curl && curl.kill) curl.kill('SIGKILL'); 142 | }, 1000 * options['max-time']); 143 | } 144 | 145 | //Default encoding is utf8. Set encoding = null to get a buffer 146 | if (!options.encoding && options.encoding !== null) { 147 | options.encoding = 'utf8'; 148 | } 149 | encoding = options.encoding; 150 | if (encoding === 'ascii') { 151 | options['use-ascii'] = true; 152 | } 153 | delete options.encoding; 154 | 155 | //Parse POST data 156 | if (options.data && typeof options.data === 'object') { 157 | var data = []; 158 | for (var key in options.data) { 159 | data.push(encodeURIComponent(key) + '=' + encodeURIComponent(options.data[key])); 160 | } 161 | options.data = data.join('&'); 162 | } 163 | 164 | //Check for the occurrence of a string and fail if not found 165 | if (options.require) { 166 | require_str = options.require; 167 | if (!Array.isArray(require_str)) { 168 | require_str = [require_str]; 169 | } 170 | delete options.require; 171 | } 172 | 173 | //Check for the occurrence of a string and fail if found 174 | if (options.require_not) { 175 | require_not_str = options.require_not; 176 | if (!Array.isArray(require_not_str)) { 177 | require_not_str = [require_not_str]; 178 | } 179 | delete options.require_not; 180 | } 181 | 182 | //Call the callback in a custom scope 183 | if (options.scope) { 184 | scope = options.scope; 185 | delete options.scope; 186 | } 187 | 188 | //Apply a post-processing function? 189 | if (options.process) { 190 | postprocess = options.process; 191 | delete options.process; 192 | } 193 | 194 | //Setup default headers 195 | var key, headers = {}; 196 | for (key in default_headers) { 197 | headers[key] = default_headers[key]; 198 | } 199 | if (options.headers) { 200 | var normalised_key; 201 | for (key in options.headers) { 202 | normalised_key = key.replace(/[_-]/g, ' ').split(' ').map(function (str) { 203 | if (str.length) { 204 | str = str[0].toUpperCase() + str.substr(1); 205 | } 206 | return str; 207 | }).join('-'); 208 | headers[normalised_key] = options.headers[key]; 209 | } 210 | delete options.headers; 211 | } 212 | options.header = options.header || []; 213 | for (key in headers) { 214 | options.header.push(key + ': ' + headers[key]); 215 | } 216 | 217 | //Select a random user agent if one wasn't provided 218 | if (!headers['User-Agent'] && !options['user-agent']) { 219 | options['user-agent'] = user_agents[Math.random() * user_agents_len | 0]; 220 | } 221 | 222 | //Prepare curl args 223 | var key, values; 224 | for (key in options) { 225 | if (key === 'pretend') { 226 | continue; 227 | } 228 | values = Array.isArray(options[key]) ? options[key] : [options[key]]; 229 | values.forEach(function (value) { 230 | args.push('--' + key); 231 | if (true !== value) { 232 | args.push(value); 233 | } 234 | }); 235 | } 236 | 237 | if (options.file) { 238 | cmd = 'cat'; 239 | args = [options.file]; 240 | } 241 | 242 | //Simulate the spawn? 243 | if (options.pretend) { 244 | return finish(); 245 | } 246 | 247 | //Spawn the process 248 | var child = spawn(cmd, args, { cwd: options.cwd || cwd }, function (curl) { 249 | 250 | //Collect stdout 251 | curl.stdout.on('data', function (data) { 252 | if (complete) return; 253 | var len = data.length, prev = stdout; 254 | stdout = new Buffer(len + stdoutlen); 255 | prev.copy(stdout, 0, 0, stdoutlen); 256 | data.copy(stdout, stdoutlen, 0, len); 257 | stdoutlen += len; 258 | }); 259 | 260 | //Pipe stderr to the current process? 261 | if (options.stderr) { 262 | if (options.stderr === true) { 263 | curl.stderr.pipe(process.stderr); 264 | delete options.stderr 265 | } 266 | } 267 | 268 | curl.stderr.on('data', function (data) { 269 | if (complete) return; 270 | stderr += data; 271 | }); 272 | 273 | //Handle curl exit 274 | curl.on('close', function (code) { 275 | try { 276 | err = code; 277 | if (complete) return; 278 | if (encoding) { 279 | stdout = stdout.toString(encoding); 280 | } 281 | if (postprocess && stdout) { 282 | stdout = postprocess(stdout); 283 | } 284 | if (require_str) { 285 | var valid = false; 286 | if (!encoding) { 287 | stdout = stdout.toString(); 288 | } 289 | var str; 290 | for (var i = 0, l = require_str.length; i < l; i++) { 291 | str = require_str[i]; 292 | if ((util.isRegExp(str) && str.test(stdout)) || stdout.indexOf(str) !== -1) { 293 | valid = true; 294 | break; 295 | } 296 | } 297 | if (!valid) { 298 | err = 'response does not contain required string: ' + str; 299 | stdout = null 300 | } else if (!encoding) { 301 | stdout = new Buffer(stdout); 302 | } 303 | } 304 | if (require_not_str) { 305 | var valid = true; 306 | if (!encoding) { 307 | stdout = stdout.toString(); 308 | } 309 | var str; 310 | for (var i = 0, l = require_not_str.length; i < l; i++) { 311 | str = require_not_str[i]; 312 | if ((util.isRegExp(str) && str.test(stdout)) || stdout.indexOf(str) !== -1) { 313 | valid = false; 314 | break; 315 | } 316 | } 317 | if (!valid) { 318 | err = 'response contains bad string: ' + str; 319 | stdout = null 320 | } else if (!encoding) { 321 | stdout = new Buffer(stdout); 322 | } 323 | } 324 | } catch (e) { 325 | err = typeof e === 'object' ? e.message || '' : e; 326 | } 327 | finish(); 328 | if (timeout) clearTimeout(timeout); 329 | }); 330 | }); 331 | }; 332 | 333 | /** 334 | * Expose a helper for scraping urls from a page. 335 | */ 336 | 337 | var urls = /(?:href|src|HREF|SRC)=["']?([^"' >]+)/g; 338 | 339 | exports.urls = function (data, regex) { 340 | var match, matches = []; 341 | while (match = urls.exec(data)) { 342 | if (regex && !regex.test(match[1])) { 343 | continue; 344 | } 345 | matches.push(match[1].replace(/[\r\n\t\s]/g, '')); 346 | } 347 | return matches; 348 | }; 349 | 350 | /** 351 | * A helper for handling async concurrency. 352 | */ 353 | 354 | exports.concurrent = function (input, concurrency, fn) { 355 | if (arguments.length === 3) { 356 | var len = input.length, pos = 0, remaining = concurrency; 357 | for (var i = 0; i < concurrency; i++) { 358 | (function exec() { 359 | if (pos >= len) { 360 | if (!--remaining) { 361 | fn(null, function () {}); 362 | } 363 | } else { 364 | fn(input[pos++], function () { 365 | process.nextTick(exec); 366 | }); 367 | } 368 | })(); 369 | } 370 | } else { 371 | fn = concurrency; 372 | concurrency = input; 373 | for (var i = 0; i < concurrency; i++) { 374 | (function exec() { 375 | fn(function () { 376 | process.nextTick(exec); 377 | }); 378 | })(); 379 | } 380 | } 381 | }; 382 | 383 | /** 384 | * A helper for copying an object. 385 | */ 386 | 387 | exports.copy = function (obj) { 388 | var copy = {}; 389 | for (var i in obj) { 390 | if (Array.isArray(obj[i])) { 391 | copy[i] = obj[i].map(function (item) { 392 | return item; 393 | }); 394 | } else if (typeof obj[i] === 'object') { 395 | copy[i] = obj[i] ? exports.copy(obj[i]) : null; 396 | } else { 397 | copy[i] = obj[i]; 398 | } 399 | } 400 | return copy; 401 | }; 402 | 403 | --------------------------------------------------------------------------------