├── .gitignore ├── test ├── functional-request-test.js ├── helpers.js ├── complex-request-test.js └── http-agent-test.js ├── package.json ├── README.md └── lib └── http-agent.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | node_modules/* 3 | npm-debug.log -------------------------------------------------------------------------------- /test/functional-request-test.js: -------------------------------------------------------------------------------- 1 | /* 2 | * functional-request-test.js: Tests for functional requests (i.e. requests that generate 3 | * their own ClientRequest) using HttpAgent 4 | * 5 | * (C) 2010 Charlie Robbins 6 | * MIT LICENSE 7 | * 8 | */ 9 | 10 | var path = require('path'), 11 | sys = require('sys'), 12 | http = require('http'), 13 | events = require('events'), 14 | assert = require('assert'), 15 | net = require('net'), 16 | vows = require('vows'), 17 | httpAgent = require('../lib/http-agent'); 18 | 19 | vows.describe('httpAgent').addBatch({ 20 | 21 | }).export(module); -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "http-agent", 3 | "description": "A simple agent for performing a sequence of http requests in node.js", 4 | "version": "0.1.2", 5 | "author": "Charlie Robbins ", 6 | "repository": { 7 | "type": "git", 8 | "url": "http://github.com/indexzero/http-agent.git" 9 | }, 10 | "keywords": ["http-agent", "iterator", "http", "webcrawler"], 11 | "dependencies": { 12 | "request": "1.9.x" 13 | }, 14 | "devDependencies": { 15 | "vows": "0.5.x" 16 | }, 17 | "main": "./lib/http-agent", 18 | "scripts": { "test": "vows test/*-test.js --spec" }, 19 | "engines": { "node": ">= 0.2.0" } 20 | } -------------------------------------------------------------------------------- /test/helpers.js: -------------------------------------------------------------------------------- 1 | /* 2 | * index.js: Tests helpers for http-agent tests 3 | * 4 | * (C) 2010 Charlie Robbins 5 | * MIT LICENSE 6 | * 7 | */ 8 | 9 | var http = require('http'), 10 | path = require('path'), 11 | httpAgent = require('../lib/http-agent'); 12 | 13 | var helpers = exports; 14 | 15 | helpers.createAgent = function (options) { 16 | options = options || {}; 17 | var host = options.host || 'graph.facebook.com'; 18 | var urls = options.urls || ['barackobama', 'facebook', 'google']; 19 | 20 | return httpAgent.create(host, urls); 21 | }; 22 | 23 | helpers.createServer = function (options) { 24 | options = options || {}; 25 | var port = options.port || 8080; 26 | 27 | http.createServer(function (req, res) { 28 | res.sendHeader(200, {'Content-Type': 'text/plain'}); 29 | res.end(); 30 | }).listen(port); 31 | }; -------------------------------------------------------------------------------- /test/complex-request-test.js: -------------------------------------------------------------------------------- 1 | /* 2 | * complex-request-test.js: Tests for complex requests using HttpAgent 3 | * 4 | * (C) 2010 Charlie Robbins 5 | * MIT LICENSE 6 | * 7 | */ 8 | 9 | var path = require('path'), 10 | sys = require('sys'), 11 | http = require('http'), 12 | events = require('events'), 13 | assert = require('assert'), 14 | net = require('net'), 15 | vows = require('vows'), 16 | httpAgent = require('../lib/http-agent'), 17 | helpers = require('./helpers'); 18 | 19 | var complexUrls = [ 20 | { 21 | method: 'GET', 22 | uri: 'barackobama' 23 | }, 24 | { 25 | method: 'GET', 26 | uri: 'facebook' 27 | }, 28 | { 29 | method: 'GET', 30 | uri: 'google' 31 | } 32 | ]; 33 | 34 | vows.describe('httpAgent/object-request').addBatch({ 35 | "When using an httpAgent": { 36 | "to browse an undefined url": { 37 | topic: function () { 38 | var agent = helpers.createAgent({ urls: [undefined] }); 39 | agent.addListener('next', this.callback); 40 | agent.start(); 41 | }, 42 | "should throw an error": function (err, agent) { 43 | assert.isNotNull(err); 44 | } 45 | }, 46 | "to browse a path of complex urls": { 47 | "the next event": { 48 | topic: function () { 49 | var agent = helpers.createAgent({ urls: complexUrls }); 50 | agent.addListener('next', this.callback); 51 | agent.start(); 52 | }, 53 | "should be raised after start": function (e, agent) { 54 | assert.instanceOf(agent, httpAgent.HttpAgent); 55 | assert.isNotNull(agent.response); 56 | } 57 | } 58 | } 59 | } 60 | }).export(module); -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # http-agent 2 | 3 | A simple agent for performing a sequence of http requests in node.js 4 | 5 | ## Installation 6 | 7 | ### Installing npm (node package manager) 8 |
  9 |   curl http://npmjs.org/install.sh | sh
 10 | 
11 | 12 | ### Installing http-agent 13 |
 14 |   npm install http-agent
 15 | 
16 | 17 | ## Usage 18 | 19 | There are several way to use http-agent: 20 | 21 | 1. Simple: Pass it a host and an array of strings to visit all of those URLs. 22 | 2. Complex: Pass it a host and an array of JSON objects representing all relevant parameters (method, request body, etc.) 23 | 3. Iterator: Each time the 'next' event is raised by an agent, you have the opportunity to add or remove URLs you wish to visit. In this sense 24 | 25 | ### Using http-agent to visit a set of URLs on a single host with 'GET' 26 |
 27 |   var util = require('util'),
 28 |       httpAgent = require('path/to/http-agent/lib');
 29 |   
 30 |   var agent = httpAgent.create('graph.facebook.com', ['apple', 'facebook', 'google']);
 31 |   
 32 |   agent.addListener('next', function (e, agent) {
 33 |     // Simple usage: Just output the raw
 34 |     // HTML returned from each request
 35 |     util.puts(agent.body);
 36 |     agent.next();
 37 |   });
 38 |   
 39 |   agent.addListener('stop', function (e, agent) {
 40 |     util.puts('Agent has completed visiting all urls');
 41 |   });
 42 |   
 43 |   // Start the agent
 44 |   agent.start();
 45 | 
46 | 47 | ### Using http-agent to visit a set of URLs on a single host with complex parameters 48 | Since http-agent is based on top of request, it can take a set of JSON objects for request to use. If you're looking for more documentation about what parameters are relevant to http-agent, see [request][0] which http-agent is built on top of. 49 | 50 |
 51 |   var util = require('util'),
 52 |       httpAgent = require('path/to/http-agent/lib');
 53 |   
 54 |   var options = [
 55 |     {
 56 |       method: 'GET',
 57 |       uri: 'apple'
 58 |     },
 59 |     {
 60 |       method: 'GET',
 61 |       uri: 'facebook'
 62 |     },
 63 |     {
 64 |       method: 'GET',
 65 |       uri: 'http://google.com/'
 66 |     }
 67 |   ];
 68 |   var agent = httpAgent.create('graph.facebook.com', options);
 69 |   
 70 |   agent.addListener('next', function (e, agent) {
 71 |     // Simple usage: Just output the raw
 72 |     // HTML returned from each request
 73 |     util.puts(agent.body);
 74 |     agent.next();
 75 |   });
 76 |   
 77 |   agent.addListener('stop', function (e, agent) {
 78 |     util.puts('Agent has completed visiting all urls');
 79 |   });
 80 |   
 81 |   // Start the agent
 82 |   agent.start();
 83 | 
84 | 85 | ### Using http-agent as an iterator over webpages 86 | Each time an instance of http-agent raises the 'next' event the agent is passed back as a parameter. That allows us to change the control flow of pages each time a page is visited. The agent is also passed back to other important events such as 'stop' and 'back'. 87 |
 88 |   var util = require('util'),
 89 |       httpAgent = require('path/to/http-agent/lib');
 90 |   
 91 |   var agent = httpAgent.create('graph.facebook.com', ['apple', 'facebook', 'google']),
 92 |       addPage = true;
 93 |   
 94 |   agent.addListener('next', function (e, agent) {
 95 |     if (addPage) {
 96 |       // The agent will now also visit 'http://graph.facebook.com/yahoo'
 97 |       agent.addUrl('yahoo');
 98 |       addPage = false;
 99 |     }
100 | 
101 |     // Simple usage: Just output the raw
102 |     // HTML returned from each request
103 |     util.puts(agent.body);
104 |     agent.next();
105 |   });
106 |   
107 |   agent.addListener('stop', function (e, agent) {
108 |     util.puts('Agent has completed visiting all urls');
109 |   });
110 |   
111 |   // Start the agent
112 |   agent.start();
113 | 
114 | 115 | ## Run Tests 116 |
117 |   vows test/*-test.js --spec
118 | 
119 | 120 | #### Author: [Charlie Robbins](http://www.charlierobbins.com); 121 | 122 | [0]: https://github.com/mikeal/request 123 | -------------------------------------------------------------------------------- /test/http-agent-test.js: -------------------------------------------------------------------------------- 1 | /* 2 | * http-agent-test.js: Tests for simple HttpAgent usage 3 | * 4 | * (C) 2010 Charlie Robbins 5 | * MIT LICENSE 6 | * 7 | */ 8 | 9 | var path = require('path'), 10 | sys = require('sys'), 11 | events = require('events'), 12 | assert = require('assert'), 13 | vows = require('vows'), 14 | httpAgent = require('../lib/http-agent'), 15 | helpers = require('./helpers'); 16 | 17 | vows.describe('httpAgent').addBatch({ 18 | "When using an httpAgent": { 19 | "to browse a path of urls": { 20 | "the next event": { 21 | topic: function () { 22 | var agent = helpers.createAgent(); 23 | agent.addListener('next', this.callback); 24 | agent.start(); 25 | }, 26 | "should be raised after start": function (e, agent) { 27 | assert.instanceOf(agent, httpAgent.HttpAgent); 28 | assert.isNotNull(agent.response); 29 | } 30 | }, 31 | "the next() method": { 32 | topic: function () { 33 | var agent = helpers.createAgent(); 34 | agent.addListener('next', this.callback); 35 | agent.start(); 36 | }, 37 | "should emit the next event": function (e, agent) { 38 | assert.instanceOf(agent, httpAgent.HttpAgent); 39 | } 40 | } 41 | } 42 | } 43 | }).addBatch({ 44 | "When using an httpAgent": { 45 | "simple usage of": { 46 | "the create() method": { 47 | topic: helpers.createAgent(), 48 | "should return a valid httpAgent": function (agent) { 49 | assert.instanceOf(agent, httpAgent.HttpAgent) 50 | assert.equal(agent.nextUrls.length, 3); 51 | assert.equal(agent.nextUrls[0], 'graph.facebook.com/barackobama'); 52 | assert.equal(agent.prevUrls.length, 0); 53 | assert.equal(agent.host, 'graph.facebook.com'); 54 | }, 55 | "should return a valid event emitter": function (agent) { 56 | assert.isFunction(agent.addListener); 57 | assert.isFunction(agent.removeListener); 58 | assert.isFunction(agent.listeners); 59 | assert.isFunction(agent.emit); 60 | }, 61 | }, 62 | "the stop() method": { 63 | topic: function () { 64 | var agent = helpers.createAgent(); 65 | agent.addListener('stop', this.callback); 66 | agent.start(); 67 | agent.stop(); 68 | }, 69 | "should emit the stopped event when previously started": function (e, agent) { 70 | assert.instanceOf(agent, httpAgent.HttpAgent); 71 | } 72 | }, 73 | "the start() method": { 74 | topic: function () { 75 | var agent = helpers.createAgent(); 76 | agent.addListener('start', this.callback); 77 | agent.start(); 78 | }, 79 | "should emit the started event": function (e, agent) { 80 | assert.instanceOf(agent, httpAgent.HttpAgent); 81 | } 82 | }, 83 | "the next() method": { 84 | topic: function () { 85 | var agent = helpers.createAgent(); 86 | agent.addListener('next', this.callback); 87 | agent.start(); 88 | }, 89 | "should emit the next event": function (e, agent) { 90 | assert.instanceOf(agent, httpAgent.HttpAgent); 91 | assert.equal(agent.nextUrls.length, 2); 92 | assert.equal(agent.nextUrls[0], 'graph.facebook.com/facebook'); 93 | } 94 | }, 95 | "the next() method when passed a url parameter": { 96 | topic: function () { 97 | var agent = helpers.createAgent(); 98 | self = this; 99 | 100 | // Remark: This is a bit of a hack, vows should support 101 | // async topic callbacks for multiple event chains. 102 | var nextCallback = function (e,agent) { 103 | agent.removeListener('next', nextCallback); 104 | agent.addListener('next', self.callback); 105 | agent.next("yahoo"); 106 | }; 107 | 108 | agent.addListener('next', nextCallback); 109 | agent.start(); 110 | }, 111 | "should emit the next event": function (e, agent) { 112 | assert.instanceOf(agent, httpAgent.HttpAgent); 113 | assert.equal(agent.nextUrls.length, 2); 114 | assert.equal(agent.prevUrls.length, 2); 115 | assert.equal(agent.prevUrls[0], "graph.facebook.com/yahoo"); 116 | assert.equal(agent.nextUrls[0], 'graph.facebook.com/facebook'); 117 | } 118 | }, 119 | "the addUrl() method": { 120 | topic: helpers.createAgent(), 121 | "should append a url to the set of nextUrls": function (agent) { 122 | agent.addUrl('apple'); 123 | assert.equal(agent.nextUrls.length, 4); 124 | assert.equal(agent.nextUrls[3], 'graph.facebook.com/apple'); 125 | } 126 | } 127 | } 128 | } 129 | }).addBatch({ 130 | "When using an httpAgent": { 131 | "the back() method": { 132 | "when called before start": { 133 | topic: function () { 134 | var agent = helpers.createAgent(); 135 | agent.addListener('next', this.callback); 136 | 137 | // Remark: Never mess with agent._running when consuming httpAgent. 138 | agent._running = true; 139 | agent.back(); 140 | }, 141 | "should raise the next event with an error": function (e, agent) { 142 | assert.isNotNull(e); 143 | } 144 | }, 145 | "when called after start": { 146 | topic: function () { 147 | var agent = helpers.createAgent(); 148 | self = this; 149 | 150 | // Remark: This is a bit of a hack, vows should support 151 | // async topic callbacks for multiple event chains. 152 | var nextCallback = function (e,agent) { 153 | agent.removeListener('next', nextCallback); 154 | agent.addListener('next', self.callback); 155 | agent.back(); 156 | }; 157 | 158 | agent.addListener('next', nextCallback); 159 | agent.start(); 160 | }, 161 | "should emit the next event": function (e, agent) { 162 | assert.instanceOf(agent, httpAgent.HttpAgent); 163 | assert.equal(agent.nextUrls.length, 2); 164 | assert.equal(agent.prevUrls.length, 2); 165 | assert.equal(agent.prevUrls[0], "graph.facebook.com/barackobama"); 166 | assert.equal(agent.nextUrls[0], 'graph.facebook.com/facebook'); 167 | } 168 | } 169 | } 170 | } 171 | }).export(module); 172 | -------------------------------------------------------------------------------- /lib/http-agent.js: -------------------------------------------------------------------------------- 1 | /* 2 | * http-agent.js: A simple agent for performing a sequence of http requests in node.js 3 | * 4 | * (C) 2010 Charlie Robbins 5 | * MIT LICENSE 6 | * 7 | */ 8 | 9 | var events = require('events'), 10 | http = require('http'), 11 | path = require('path'), 12 | url = require('url'), 13 | util = require('util'), 14 | request = require('request'); 15 | 16 | exports.create = function (host, urls, options) { 17 | return new HttpAgent(host, urls, options); 18 | }; 19 | 20 | var HttpAgent = exports.HttpAgent = function (host, urls, options) { 21 | events.EventEmitter.call(this); 22 | 23 | // 24 | // Arguments parsings. Valid usage: 25 | // 26 | // new HttpAgent('nodejitsu.com', ['/', 'platform', 'pricing']); 27 | // new HttpAgent('nodejitsu.com', ['/', 'platform', 'pricing'], { encoding: 'utf8' }); 28 | // new HttpAgent({ host: 'nodejitsu.com', urls: ['/', 'platform', 'pricing'], encoding: 'utf8' }); 29 | // new HttpAgent({ host: 'nodejitsu.com', encoding: 'utf8' }, ['/', 'platform', 'pricing']); 30 | // 31 | options = options || {}; 32 | 33 | if (typeof host === 'object') { 34 | options = host; 35 | } 36 | else if (typeof host === 'string') { 37 | options.host = host; 38 | } 39 | 40 | if (urls && Array.isArray(urls)) { 41 | options.urls = urls; 42 | } 43 | 44 | // 45 | // Setup some intelligent defaults 46 | // 47 | this.url = ''; 48 | this.body = ''; 49 | this.port = 80; 50 | this.host = options.host || 'localhost'; 51 | this.options = {}; 52 | 53 | // 54 | // Extract the `request` options which persist across 55 | // all HTTP requests made by this instance. 56 | // 57 | var self = this; 58 | ['headers', 'json', 'followRedirect', 'maxRedirects', 'encoding', 'timeout'].forEach(function (opt) { 59 | if (typeof(options[opt]) != 'undefined') { 60 | self.options[opt] = options[opt]; 61 | } 62 | }); 63 | 64 | // 65 | // Configure "private" variables for internal 66 | // state management in `HttpAgent` 67 | // 68 | this._running = false; 69 | this._visited = []; 70 | this._unvisited = options.urls || []; 71 | 72 | this.addListener('error', function (e) { 73 | // 74 | // Suppress `uncaughtException` errors from 75 | // this instance. 76 | // 77 | }); 78 | }; 79 | 80 | // 81 | // Inherit from `events.EventEmitter`. 82 | // 83 | util.inherits(HttpAgent, events.EventEmitter); 84 | 85 | HttpAgent.prototype.__defineGetter__('prevUrls', function () { 86 | var self = this; 87 | return this._visited.map(function (url) { 88 | return path.join(self.host, url); 89 | }); 90 | }); 91 | 92 | HttpAgent.prototype.__defineGetter__('nextUrls', function () { 93 | var self = this; 94 | return this._unvisited.map(function (url) { 95 | return path.join(self.host, url); 96 | }); 97 | }); 98 | 99 | HttpAgent.prototype.addUrl = function(url) { 100 | if (url) { 101 | this._unvisited = this._unvisited.concat(url); 102 | } 103 | }; 104 | 105 | HttpAgent.prototype.start = function () { 106 | if (!this._running) { 107 | this._running = true; 108 | this.emit('start', null, this); 109 | this.next(); 110 | } 111 | }; 112 | 113 | HttpAgent.prototype.stop = function () { 114 | if (this._running) { 115 | this._running = false; 116 | this.emit('stop', null, this); 117 | } 118 | }; 119 | 120 | HttpAgent.prototype.back = function () { 121 | if (this._running) { 122 | return this._visited.length == 0 123 | ? this.emit('next', new Error('Cannot go back to nothing. Did you forget to call next()?')) 124 | : this.next(this._visited[0]); 125 | } 126 | }; 127 | 128 | HttpAgent.prototype.next = function (url) { 129 | if (this._running) { 130 | // If the URL passed in exists, remove it 131 | // from our _unvisited collection 132 | var index = this._unvisited.indexOf(url); 133 | if (index !== -1) { 134 | this._unvisited = this._unvisited.splice(index, 1); 135 | } 136 | 137 | var shouldVisit = url || this._unvisited.length > 0; 138 | 139 | // TODO: Be more robust than just 'GET' 140 | if (shouldVisit) { 141 | this.url = url || this._unvisited.shift(); 142 | this._makeRequest(this.url); 143 | } 144 | else { 145 | this.stop(); 146 | } 147 | } 148 | }; 149 | 150 | HttpAgent.prototype._makeRequest = function (url) { 151 | this.body = ''; 152 | 153 | // Try to create the request or dispatch the error 154 | try { 155 | var options = this._createOptions(url); 156 | } 157 | catch (createErr) { 158 | this.emit('next', createErr); 159 | this.emit('stop', createErr); 160 | return; 161 | } 162 | 163 | var self = this; 164 | 165 | try { 166 | request(options, function (err, response, body) { 167 | if (err) { 168 | return self.emit('next', err, self); 169 | } 170 | 171 | self.current = options; 172 | self._visited.unshift(url); 173 | self.response = response; 174 | self.body = body; 175 | self.emit('next', null, self); 176 | }); 177 | } 178 | catch (requestErr) { 179 | this.emit('next', requestErr); 180 | } 181 | }; 182 | 183 | HttpAgent.prototype._createOptions = function (url) { 184 | var options; 185 | 186 | switch (typeof(url)) { 187 | case 'string': options = { uri: 'http://' + this.host + '/' + url }; break; 188 | case 'object': options = this._createComplexOptions(url); break; 189 | case 'function': return url.call(this); 190 | case 'undefined': throw new Error('Cannot request undefined URL'); 191 | default: throw new Error('Argument Error'); 192 | } 193 | 194 | return mixin({}, this.options, options); 195 | }; 196 | 197 | HttpAgent.prototype._createComplexOptions = function (options) { 198 | if (typeof options.uri === 'undefined') { 199 | throw new Error('uri is required on object based urls.'); 200 | } 201 | 202 | var parsedUri = url.parse(options.uri), 203 | protocol = parsedUri.protocol || 'http:', 204 | host = parsedUri.host || this.host, 205 | pathname = parsedUri.pathname.charAt(0) === '/' ? parsedUri.pathname : '/' + parsedUri.pathname; 206 | 207 | options.uri = protocol + '//' + host + pathname; 208 | 209 | if (typeof parsedUri.query !== 'undefined' && parsedUri.query.length > 0) { 210 | options.uri = options.uri + '?' + parsedUri.query; 211 | } 212 | 213 | return options; 214 | }; 215 | 216 | function mixin (target) { 217 | var objs = Array.prototype.slice.call(arguments, 1); 218 | objs.forEach(function (o) { 219 | Object.keys(o).forEach(function (k) { 220 | if (! o.__lookupGetter__(k)) { 221 | target[k] = o[k]; 222 | } 223 | }); 224 | }); 225 | 226 | return target; 227 | }; 228 | --------------------------------------------------------------------------------