├── .gitignore
├── .eslintrc
├── .travis.yml
├── example
├── pipe.js
├── fetchurl.js
└── fetchstream.js
├── lib
├── cookiejar.js
└── fetch.js
├── Gruntfile.js
├── caching.md
├── package.json
├── LICENSE
├── README.md
└── test
└── fetch-test.js
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules
3 |
--------------------------------------------------------------------------------
/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "nodemailer"
3 | }
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | sudo: false
3 | node_js:
4 | - 6
5 | - 8
6 | notifications:
7 | email:
8 | - andris@kreata.ee
9 |
--------------------------------------------------------------------------------
/example/pipe.js:
--------------------------------------------------------------------------------
1 | /* eslint no-console:0 */
2 |
3 | 'use strict';
4 |
5 | // pipe to file
6 |
7 | const FetchStream = require('../lib/fetch').FetchStream;
8 | const fs = require('fs');
9 |
10 | const inp = new FetchStream('http://google.com');
11 | const out = fs.createWriteStream('google.html');
12 |
13 | inp.on('end', () => {
14 | console.log('downloaded!');
15 | });
16 |
17 | inp.pipe(out);
18 |
--------------------------------------------------------------------------------
/example/fetchurl.js:
--------------------------------------------------------------------------------
1 | /* eslint no-console:0 */
2 |
3 | 'use strict';
4 |
5 | // fetch url and update charset to utf-8
6 | const fetchUrl = require('../lib/fetch').fetchUrl;
7 |
8 | fetchUrl('http://kreata.ee/iso-8859-15.php', (error, meta, body) => {
9 | if (error) {
10 | return console.log('ERROR', error.message || error);
11 | }
12 |
13 | console.log('META INFO');
14 | console.log(meta);
15 |
16 | console.log('BODY');
17 | console.log(body.toString('utf-8'));
18 | });
19 |
--------------------------------------------------------------------------------
/example/fetchstream.js:
--------------------------------------------------------------------------------
1 | /* eslint no-console:0 */
2 |
3 | 'use strict';
4 |
5 | const FetchStream = require('../lib/fetch').FetchStream;
6 |
7 | let fetch = new FetchStream('http://google.com', {
8 | headers: {}
9 | });
10 |
11 | fetch.on('data', chunk => {
12 | console.log(chunk);
13 | });
14 |
15 | fetch.on('meta', meta => {
16 | console.log(meta);
17 | });
18 |
19 | fetch.on('end', () => {
20 | console.log('END');
21 | });
22 |
23 | fetch.on('error', e => {
24 | console.log('ERROR: ' + ((e && e.message) || e));
25 | });
26 |
--------------------------------------------------------------------------------
/lib/cookiejar.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const Biskviit = require('biskviit');
4 |
5 | // Thin layer around biskviit to keep API compatibility
6 | class CookieJar {
7 | constructor(options) {
8 | this.options = options || {};
9 | this.biskviit = new Biskviit({
10 | sessionTimeout: this.options.sessionTimeout || 1800 // expire cookies after 30 minutes by default
11 | });
12 | }
13 |
14 | getCookies(url) {
15 | return this.biskviit.get(url);
16 | }
17 |
18 | setCookie(cookieStr, url) {
19 | this.biskviit.set(cookieStr, url);
20 | }
21 | }
22 |
23 | module.exports.CookieJar = CookieJar;
24 |
--------------------------------------------------------------------------------
/Gruntfile.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | module.exports = function(grunt) {
4 | // Project configuration.
5 | grunt.initConfig({
6 | eslint: {
7 | all: ['lib/*.js', 'test/*.js', 'example/*.js', 'Gruntfile.js']
8 | },
9 |
10 | mochaTest: {
11 | all: {
12 | options: {
13 | reporter: 'spec'
14 | },
15 | src: ['test/*-test.js']
16 | }
17 | }
18 | });
19 |
20 | // Load the plugin(s)
21 | grunt.loadNpmTasks('grunt-eslint');
22 | grunt.loadNpmTasks('grunt-mocha-test');
23 |
24 | // Tasks
25 | grunt.registerTask('default', ['eslint', 'mochaTest']);
26 | };
27 |
--------------------------------------------------------------------------------
/caching.md:
--------------------------------------------------------------------------------
1 | Caching in HTTP
2 | ===============
3 |
4 | ETAG
5 | ----
6 |
7 | Res 200:
8 |
9 | ETag: "fa6e-3e3073913b100"
10 |
11 | Req:
12 |
13 | If-None-Match: "fa6e-3e3073913b100"
14 |
15 | Res 304:
16 |
17 | ETag: "fa6e-3e3073913b100"
18 |
19 |
20 | LAST MODIFIED
21 | -------------
22 |
23 | Res 200:
24 |
25 | Last-Modified: Mon, 28 Jan 2013 22:29:45 GMT
26 |
27 | Req:
28 |
29 | If-Modified-Since: Mon, 28 Jan 2013 22:29:45 GMT
30 |
31 | Res 304:
32 |
33 | Last-Modified:Mon, 28 Jan 2013 22:29:45 GMT
34 |
35 |
36 | EXPIRES
37 | -------
38 |
39 | Res 200:
40 |
41 | Expires: Tue, 19 Mar 2013 11:17:57 GMT
42 |
43 | Do not try again before Tue, 19 Mar 2013 11:17:57 GMT
44 |
45 | CACHE-CONTROL
46 | -------------
47 |
48 | Res 200:
49 |
50 | Cache-Control: max-age=300
51 |
52 | Do not try again until 5 minutes from now
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "httpfetch",
3 | "description": "Fetch URL contents",
4 | "version": "2.0.0",
5 | "author": "Andris Reinman",
6 | "homepage": "http://github.com/andris9/fetch",
7 | "repository": {
8 | "type": "git",
9 | "url": "git://github.com/andris9/fetch.git"
10 | },
11 | "scripts": {
12 | "test": "grunt"
13 | },
14 | "main": "./lib/fetch",
15 | "license": "MIT",
16 | "dependencies": {
17 | "biskviit": "2.0.0",
18 | "encoding": "0.1.12",
19 | "iconv-lite": "^0.4.18"
20 | },
21 | "devDependencies": {
22 | "chai": "^4.1.1",
23 | "eslint-config-nodemailer": "^1.2.0",
24 | "grunt": "^1.0.1",
25 | "grunt-cli": "^1.2.0",
26 | "grunt-eslint": "^20.0.0",
27 | "grunt-mocha-test": "^0.13.2",
28 | "mocha": "^3.5.0"
29 | },
30 | "keywords": ["url"]
31 | }
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011 Andris Reinman
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
12 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
13 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
14 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
15 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
16 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # fetch
2 |
3 | Fetch url contents. Supports gzipped content for quicker download, redirects (with automatic cookie handling, so no eternal redirect loops), streaming and piping etc.
4 |
5 | [](https://travis-ci.org/andris9/fetch)
6 |
7 | ## Install
8 |
9 | npm install fetch
10 |
11 | ## Usage
12 |
13 | See examples folder for a complete example
14 |
15 | ## Fetch from URL
16 |
17 | `fetch.fetchUrl(url [, options], callback)`
18 |
19 | Where
20 |
21 | * **url** is the url to fetch
22 | * **options** is an optional options object
23 | * **callback** is the callback to run - `callback(error, meta, body)`
24 |
25 | Example
26 |
27 | var fetchUrl = require("fetch").fetchUrl;
28 |
29 | // source file is iso-8859-15 but it is converted to utf-8 automatically
30 | fetchUrl("http://kreata.ee/iso-8859-15.php", function(error, meta, body){
31 | console.log(body.toString());
32 | });
33 |
34 | **NB** If the file has been marked with charset other than utf-8, it is converted automatically.
35 |
36 | By default `iconv-lite` is used for charset conversion. If you want to use `node-iconv` module instead,
37 | add `"iconv": "*"` to your package.json file, it will be picked up by `fetch` automatically.
38 |
39 | ## Streaming
40 |
41 | `fetch.FetchStream(url [, options]) -> Stream`
42 |
43 | Where
44 |
45 | * **url** is the url to fetch
46 | * **options** is an optional options object
47 |
48 | With events:
49 |
50 | * **data** with a data chunk - `function(chunk){}`
51 | * **meta** with some information about the response `function(meta){}`
52 | * **end** when the receiving is ready
53 | * **error**
54 |
55 | Example
56 |
57 | var FetchStream = require("fetch").FetchStream;
58 |
59 | var fetch = new FetchStream("http://google.com");
60 |
61 | fetch.on("data", function(chunk){
62 | console.log(chunk);
63 | });
64 |
65 | ## Options
66 |
67 | Possible option values
68 |
69 | * **maxRedirects** how many redirects allowed, defaults to 10
70 | * **disableRedirects** set to true if redirects are not allowed, defaults to false
71 | * **headers** optional header fields, in the form of `{'Header-Field':'value'}`
72 | * **maxResponseLength** maximum allowd length for the file, the remainder is cut off. Defaults to `Infinity`
73 | * **method** defaults to GET
74 | * **payload** request body
75 | * **disableGzip** set to false, to disable content gzipping, needed for Node v0.5.9 which has buggy zlib
76 | * **cookies** an array of cookie definitions in the form of `['name=val']`
77 | * **cookieJar** for sharing cookies between requests, see below
78 | * **outputEncoding** valid for `fetchUrl`
79 | * **disableDecoding** valid for `fetchUrl`, set to true to disable automatic charset decoding to utf-8
80 | * **overrideCharset** valid for `fetchUrl`, set input encoding
81 | * **asyncDnsLoookup** use high performance asyncronous DNS resolution based on c-ares instead of a thread pool calling getaddrinfo(3)
82 | * **timeout** set a timeout in ms
83 | * **agentHttps** pass-through http.request agent parameter for https
84 | * **agentHttp** pass-through http.request agent parameter for http
85 | * **agent** pass-through http.request agent parameter as fallback, if agentHttps or agentHttp are not specified
86 | * **rejectUnauthorized** whether to reject self-signed certificates (`true`, default behavior), or ignore and allow them (`false`)
87 | * **user** is the username for Basic auth
88 | * **pass** is the password for Basic auth
89 |
90 | ## Meta object
91 |
92 | Meta object contains following fields:
93 |
94 | * **status** HTTP status code
95 | * **responseHeaders** response headers
96 | * **finalUrl** last url value, useful with redirects
97 | * **redirectCount** how many redirects happened
98 | * **cookieJar** CookieJar object for sharing/retrieving cookies
99 |
100 | ## Headers
101 |
102 | Request headers can be set with `options.headers`
103 |
104 | options = {
105 | headers:{
106 | "X-My-Header": "This is a custom header field"
107 | }
108 | }
109 |
110 | ## User-Agent
111 | User-Agent value can be set with `options.headers['User-Agent']` value. Defaults to `"FetchStream"`
112 |
113 | options = {
114 | headers: {
115 | "User-Agent": "MyUseragent/1.0"
116 | }
117 | }
118 |
119 | ## Cookies
120 | Cookies can be set with `options.cookies` which takes an array with cookie definitions
121 |
122 | options = {
123 | cookies: ["name=value", "key=value; path=/; secure"]
124 | }
125 |
126 | Paths, domain, expire and other cookie settings are honored, so try not to set cookies with expire dates in the past. If domain is not set, any domain will pass, same for paths.
127 |
128 | **NB** Do not set cookie field directly in request header as it will be overwritten.
129 |
130 | ## Cookie sharing
131 |
132 | Cookies can be shared between different requests, this can be achieved with `CookieJar`
133 |
134 | var fetch = require("fetch");
135 |
136 | var cookies = new fetch.CookieJar();
137 |
138 | // add one cookie for testing
139 | cookies.setCookie('alfa=beta; path=/;');
140 |
141 | // create a FetchStream with custom CookieJar
142 | var f = fetch.FetchStream("http://www.example.com/page1",{cookieJar: cookies});
143 |
144 | f.on("end", function(){
145 | // if cookies were set with the previos request, the data is
146 | // saved in 'cookieJar' and passed to the next request
147 | fetch.FetchStream("http://www.example.com/page1",{cookieJar: cookies});
148 | });
149 |
150 |
151 | ## Redirects
152 |
153 | Redirects are on by default, use `options.disableRedirects` to disable. Maximum redirect count can be set with `options.maxRedirects` (defaults to 10)
154 |
155 | options = {
156 | disableRedirects: true
157 | }
158 |
159 | options = {
160 | maxRedirects: 100
161 | }
162 |
163 | ## Disable Gzip support
164 |
165 | Gzip and Deflate support is automatically on. This is problematic in Node v0.5.9 and below since Zlib support on these versions is buggy with unpacking and tends to yield in error.
166 |
167 | options = {
168 | disableGzip: true
169 | }
170 |
171 | ## Piping to file
172 |
173 | `FetchStream` is a readable Stream object and thus can be piped. For example stream URL contents directly to a file:
174 |
175 | var FetchStream = require("fetch").FetchStream,
176 | fs = require("fs"),
177 | out;
178 |
179 | out = fs.createWriteStream('file.html');
180 | new FetchStream("http://www.example.com/index.php").pipe(out);
181 |
182 | ## License
183 |
184 | BSD
185 |
--------------------------------------------------------------------------------
/test/fetch-test.js:
--------------------------------------------------------------------------------
1 | /* eslint no-unused-expressions:0 */
2 | /* globals afterEach, beforeEach, describe, it */
3 |
4 | 'use strict';
5 |
6 | const chai = require('chai');
7 | const expect = chai.expect;
8 |
9 | //var http = require('http');
10 | const fetch = require('../lib/fetch');
11 | const http = require('http');
12 | const https = require('https');
13 |
14 | chai.config.includeStack = true;
15 |
16 | const HTTP_PORT = 9998;
17 | const HTTPS_PORT = 9993;
18 |
19 | let httpsOptions = {
20 | key:
21 | '-----BEGIN RSA PRIVATE KEY-----\n' +
22 | 'MIIEpAIBAAKCAQEA6Z5Qqhw+oWfhtEiMHE32Ht94mwTBpAfjt3vPpX8M7DMCTwHs\n' +
23 | '1xcXvQ4lQ3rwreDTOWdoJeEEy7gMxXqH0jw0WfBx+8IIJU69xstOyT7FRFDvA1yT\n' +
24 | 'RXY2yt9K5s6SKken/ebMfmZR+03ND4UFsDzkz0FfgcjrkXmrMF5Eh5UXX/+9YHeU\n' +
25 | 'xlp0gMAt+/SumSmgCaysxZLjLpd4uXz+X+JVxsk1ACg1NoEO7lWJC/3WBP7MIcu2\n' +
26 | 'wVsMd2XegLT0gWYfT1/jsIH64U/mS/SVXC9QhxMl9Yfko2kx1OiYhDxhHs75RJZh\n' +
27 | 'rNRxgfiwgSb50Gw4NAQaDIxr/DJPdLhgnpY6UQIDAQABAoIBAE+tfzWFjJbgJ0ql\n' +
28 | 's6Ozs020Sh4U8TZQuonJ4HhBbNbiTtdDgNObPK1uNadeNtgW5fOeIRdKN6iDjVeN\n' +
29 | 'AuXhQrmqGDYVZ1HSGUfD74sTrZQvRlWPLWtzdhybK6Css41YAyPFo9k4bJ2ZW2b/\n' +
30 | 'p4EEQ8WsNja9oBpttMU6YYUchGxo1gujN8hmfDdXUQx3k5Xwx4KA68dveJ8GasIt\n' +
31 | 'd+0Jd/FVwCyyx8HTiF1FF8QZYQeAXxbXJgLBuCsMQJghlcpBEzWkscBR3Ap1U0Zi\n' +
32 | '4oat8wrPZGCblaA6rNkRUVbc/+Vw0stnuJ/BLHbPxyBs6w495yBSjBqUWZMvljNz\n' +
33 | 'm9/aK0ECgYEA9oVIVAd0enjSVIyAZNbw11ElidzdtBkeIJdsxqhmXzeIFZbB39Gd\n' +
34 | 'bjtAVclVbq5mLsI1j22ER2rHA4Ygkn6vlLghK3ZMPxZa57oJtmL3oP0RvOjE4zRV\n' +
35 | 'dzKexNGo9gU/x9SQbuyOmuauvAYhXZxeLpv+lEfsZTqqrvPUGeBiEQcCgYEA8poG\n' +
36 | 'WVnykWuTmCe0bMmvYDsWpAEiZnFLDaKcSbz3O7RMGbPy1cypmqSinIYUpURBT/WY\n' +
37 | 'wVPAGtjkuTXtd1Cy58m7PqziB7NNWMcsMGj+lWrTPZ6hCHIBcAImKEPpd+Y9vGJX\n' +
38 | 'oatFJguqAGOz7rigBq6iPfeQOCWpmprNAuah++cCgYB1gcybOT59TnA7mwlsh8Qf\n' +
39 | 'bm+tSllnin2A3Y0dGJJLmsXEPKtHS7x2Gcot2h1d98V/TlWHe5WNEUmx1VJbYgXB\n' +
40 | 'pw8wj2ACxl4ojNYqWPxegaLd4DpRbtW6Tqe9e47FTnU7hIggR6QmFAWAXI+09l8y\n' +
41 | 'amssNShqjE9lu5YDi6BTKwKBgQCuIlKGViLfsKjrYSyHnajNWPxiUhIgGBf4PI0T\n' +
42 | '/Jg1ea/aDykxv0rKHnw9/5vYGIsM2st/kR7l5mMecg/2Qa145HsLfMptHo1ZOPWF\n' +
43 | '9gcuttPTegY6aqKPhGthIYX2MwSDMM+X0ri6m0q2JtqjclAjG7yG4CjbtGTt/UlE\n' +
44 | 'WMlSZwKBgQDslGeLUnkW0bsV5EG3AKRUyPKz/6DVNuxaIRRhOeWVKV101claqXAT\n' +
45 | 'wXOpdKrvkjZbT4AzcNrlGtRl3l7dEVXTu+dN7/ZieJRu7zaStlAQZkIyP9O3DdQ3\n' +
46 | 'rIcetQpfrJ1cAqz6Ng0pD0mh77vQ13WG1BBmDFa2A9BuzLoBituf4g==\n' +
47 | '-----END RSA PRIVATE KEY-----',
48 | cert:
49 | '-----BEGIN CERTIFICATE-----\n' +
50 | 'MIICpDCCAYwCCQCuVLVKVTXnAjANBgkqhkiG9w0BAQsFADAUMRIwEAYDVQQDEwls\n' +
51 | 'b2NhbGhvc3QwHhcNMTUwMjEyMTEzMjU4WhcNMjUwMjA5MTEzMjU4WjAUMRIwEAYD\n' +
52 | 'VQQDEwlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDp\n' +
53 | 'nlCqHD6hZ+G0SIwcTfYe33ibBMGkB+O3e8+lfwzsMwJPAezXFxe9DiVDevCt4NM5\n' +
54 | 'Z2gl4QTLuAzFeofSPDRZ8HH7wgglTr3Gy07JPsVEUO8DXJNFdjbK30rmzpIqR6f9\n' +
55 | '5sx+ZlH7Tc0PhQWwPOTPQV+ByOuReaswXkSHlRdf/71gd5TGWnSAwC379K6ZKaAJ\n' +
56 | 'rKzFkuMul3i5fP5f4lXGyTUAKDU2gQ7uVYkL/dYE/swhy7bBWwx3Zd6AtPSBZh9P\n' +
57 | 'X+OwgfrhT+ZL9JVcL1CHEyX1h+SjaTHU6JiEPGEezvlElmGs1HGB+LCBJvnQbDg0\n' +
58 | 'BBoMjGv8Mk90uGCeljpRAgMBAAEwDQYJKoZIhvcNAQELBQADggEBABXm8GPdY0sc\n' +
59 | 'mMUFlgDqFzcevjdGDce0QfboR+M7WDdm512Jz2SbRTgZD/4na42ThODOZz9z1AcM\n' +
60 | 'zLgx2ZNZzVhBz0odCU4JVhOCEks/OzSyKeGwjIb4JAY7dh+Kju1+6MNfQJ4r1Hza\n' +
61 | 'SVXH0+JlpJDaJ73NQ2JyfqELmJ1mTcptkA/N6rQWhlzycTBSlfogwf9xawgVPATP\n' +
62 | '4AuwgjHl12JI2HVVs1gu65Y3slvaHRCr0B4+Kg1GYNLLcbFcK+NEHrHmPxy9TnTh\n' +
63 | 'Zwp1dsNQU+Xkylz8IUANWSLHYZOMtN2e5SKIdwTtl5C8YxveuY8YKb1gDExnMraT\n' +
64 | 'VGXQDqPleug=\n' +
65 | '-----END CERTIFICATE-----'
66 | };
67 |
68 | describe('fetch tests', function() {
69 | this.timeout(10000); // eslint-disable-line
70 | let httpServer, httpsServer;
71 |
72 | beforeEach(done => {
73 | httpServer = http.createServer((req, res) => {
74 | switch (req.url) {
75 | case '/redirect6':
76 | res.writeHead(302, {
77 | Location: '/redirect5'
78 | });
79 | res.end();
80 | break;
81 |
82 | case '/redirect5':
83 | res.writeHead(302, {
84 | Location: '/redirect4'
85 | });
86 | res.end();
87 | break;
88 |
89 | case '/redirect4':
90 | res.writeHead(302, {
91 | Location: '/redirect3'
92 | });
93 | res.end();
94 | break;
95 |
96 | case '/redirect3':
97 | res.writeHead(302, {
98 | Location: '/redirect2'
99 | });
100 | res.end();
101 | break;
102 |
103 | case '/redirect2':
104 | res.writeHead(302, {
105 | Location: '/redirect1'
106 | });
107 | res.end();
108 | break;
109 |
110 | case '/redirect1':
111 | res.writeHead(302, {
112 | Location: '/'
113 | });
114 | res.end();
115 | break;
116 |
117 | case '/gzip': {
118 | res.writeHead(200, {
119 | 'Content-Type': 'text/plain',
120 | 'Content-Encoding': 'gzip'
121 | });
122 | let str = 'H4sIAAAAAAAAA/NIzcnJVwjPL8pJUfAICQngAgCwsOrsEQAAAA==';
123 | let strBuf = Buffer.from(str, 'base64');
124 | res.end(strBuf);
125 | break;
126 | }
127 |
128 | case '/invalid':
129 | res.writeHead(500, {
130 | 'Content-Type': 'text/plain'
131 | });
132 | res.end('Hello World HTTP\n');
133 | break;
134 |
135 | case '/auth': {
136 | let auth = (req.headers.authorization || '').toString().split(' ').pop().trim();
137 | if (Buffer.from(auth, 'base64').toString() === 'user:pass') {
138 | res.writeHead(200, {
139 | 'Content-Type': 'text/plain'
140 | });
141 | res.end(Buffer.from(auth, 'base64'));
142 | } else {
143 | res.writeHead(401, {
144 | 'Content-Type': 'text/plain',
145 | 'WWW-Authenticate': 'Basic realm="User Visible Realm"'
146 | });
147 | res.end('Authentication required');
148 | }
149 |
150 | break;
151 | }
152 | default:
153 | res.writeHead(200, {
154 | 'Content-Type': 'text/plain'
155 | });
156 | res.end('Hello World HTTP\n');
157 | }
158 | });
159 |
160 | httpsServer = https.createServer(httpsOptions, (req, res) => {
161 | res.writeHead(200, {
162 | 'Content-Type': 'text/plain'
163 | });
164 | res.end('Hello World HTTPS\n');
165 | });
166 |
167 | httpServer.listen(HTTP_PORT, () => {
168 | httpsServer.listen(HTTPS_PORT, done);
169 | });
170 | });
171 |
172 | afterEach(done => {
173 | httpServer.close(() => {
174 | httpsServer.close(done);
175 | });
176 | });
177 |
178 | it('should fetch HTTP data', done => {
179 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT);
180 | let buf = [];
181 | req.on('data', chunk => {
182 | buf.push(chunk);
183 | });
184 | req.on('end', () => {
185 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTP\n');
186 | done();
187 | });
188 | });
189 |
190 | it('should fetch HTTPS data', done => {
191 | let req = new fetch.FetchStream('https://localhost:' + HTTPS_PORT, {
192 | rejectUnauthorized: false
193 | });
194 | let buf = [];
195 | req.on('data', chunk => {
196 | buf.push(chunk);
197 | });
198 | req.on('end', () => {
199 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTPS\n');
200 | done();
201 | });
202 | });
203 |
204 | it('should fail on self signed HTTPS certificate', done => {
205 | let req = new fetch.FetchStream('https://localhost:' + HTTPS_PORT);
206 | req.on('error', err => {
207 | expect(err).to.exist;
208 | done();
209 | });
210 | req.on('data', () => {
211 | expect(false).to.be.true;
212 | });
213 | req.on('end', () => {
214 | expect(false).to.be.true;
215 | });
216 | });
217 |
218 | it('should fetch HTTP data with redirects', done => {
219 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/redirect3');
220 | let buf = [];
221 | req.on('data', chunk => {
222 | buf.push(chunk);
223 | });
224 | req.on('end', () => {
225 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTP\n');
226 | done();
227 | });
228 | });
229 |
230 | it('should not follow too many redirects', done => {
231 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/redirect6', {
232 | maxRedirects: 5
233 | });
234 |
235 | req.on('meta', meta => {
236 | expect(meta.status).to.equal(302);
237 | });
238 |
239 | let buf = [];
240 | req.on('data', chunk => {
241 | buf.push(chunk);
242 | });
243 | req.on('end', () => {
244 | done();
245 | });
246 | });
247 |
248 | it('should unzip compressed HTTP data', done => {
249 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/gzip');
250 | let buf = [];
251 | req.on('data', chunk => {
252 | buf.push(chunk);
253 | });
254 | req.on('end', () => {
255 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTP\n');
256 | done();
257 | });
258 | });
259 |
260 | it('should return error for unresolved host', done => {
261 | let req = new fetch.FetchStream('http://asfhaskhhgbjdsfhgbsdjgk');
262 | let buf = [];
263 | req.on('data', chunk => {
264 | buf.push(chunk);
265 | });
266 | req.on('error', err => {
267 | expect(err).to.exist;
268 | done();
269 | });
270 | req.on('end', () => {});
271 | });
272 |
273 | it('should fail basic HTTP auth', done => {
274 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/auth');
275 | let buf = [];
276 | req.on('data', chunk => {
277 | buf.push(chunk);
278 | });
279 | req.on('error', err => {
280 | expect(err).to.exist;
281 | done();
282 | });
283 | req.on('meta', meta => {
284 | expect(meta.status).to.equal(401);
285 | });
286 | req.on('end', () => {
287 | done();
288 | });
289 | });
290 |
291 | it('should handle basic HTTP auth', done => {
292 | let req = new fetch.FetchStream('http://user:pass@localhost:' + HTTP_PORT + '/auth');
293 | let buf = [];
294 | req.on('data', chunk => {
295 | buf.push(chunk);
296 | });
297 | req.on('end', () => {
298 | expect(Buffer.concat(buf).toString()).to.equal('user:pass');
299 | done();
300 | });
301 | });
302 |
303 | it('should handle basic HTTP auth from options', done => {
304 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/auth', {
305 | user: 'user',
306 | pass: 'pass'
307 | });
308 | let buf = [];
309 | req.on('data', chunk => {
310 | buf.push(chunk);
311 | });
312 | req.on('end', () => {
313 | expect(Buffer.concat(buf).toString()).to.equal('user:pass');
314 | done();
315 | });
316 | });
317 |
318 | it('should return error for invalid protocol', done => {
319 | let req = new fetch.FetchStream('http://localhost:' + HTTPS_PORT, {
320 | timeout: 1000
321 | });
322 | let buf = [];
323 |
324 | req.on('data', chunk => {
325 | buf.push(chunk);
326 | });
327 | req.on('error', err => {
328 | expect(err).to.exist;
329 | done();
330 | });
331 | req.on('end', () => {});
332 | });
333 | });
334 |
--------------------------------------------------------------------------------
/lib/fetch.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 |
3 | const http = require('http');
4 | const https = require('https');
5 | const urllib = require('url');
6 | const zlib = require('zlib');
7 | const dns = require('dns');
8 | const Stream = require('stream').Readable;
9 | const CookieJar = require('./cookiejar').CookieJar;
10 | const iconv = require('iconv-lite');
11 | const net = require('net');
12 |
13 | class FetchStream extends Stream {
14 | constructor(url, options) {
15 | super();
16 |
17 | options = options || {};
18 |
19 | this.url = url;
20 | if (!this.url) {
21 | return this.emit('error', new Error('url not defined'));
22 | }
23 |
24 | this.userAgent = options.userAgent || 'FetchStream';
25 |
26 | this._redirectCount = 0;
27 |
28 | this.options = options || {};
29 | this.normalizeOptions();
30 |
31 | // prevent errors before 'error' handler is set by defferring actions
32 | if (typeof setImmediate !== 'undefined') {
33 | setImmediate(this.runStream.bind(this, url));
34 | } else {
35 | process.nextTick(this.runStream.bind(this, url));
36 | }
37 | this.responseBuffer = Buffer.alloc(0);
38 | this.ended = false;
39 | this.readyToRead = 0;
40 | }
41 |
42 | _read(size) {
43 | if (this.ended && this.responseBuffer.length === 0) {
44 | this.push(null);
45 | return;
46 | }
47 | this.readyToRead += size;
48 | this.drainBuffer();
49 | }
50 |
51 | drainBuffer() {
52 | if (this.readyToRead === 0) {
53 | return;
54 | }
55 | if (this.responseBuffer.length === 0) {
56 | return;
57 | }
58 | let push;
59 | let rest;
60 | let restSize;
61 |
62 | if (this.responseBuffer.length > this.readyToRead) {
63 | push = Buffer.alloc(this.readyToRead);
64 | this.responseBuffer.copy(push, 0, 0, this.readyToRead);
65 | restSize = this.responseBuffer.length - this.readyToRead;
66 | rest = Buffer.alloc(restSize);
67 | this.responseBuffer.copy(rest, 0, this.readyToRead);
68 | } else {
69 | push = this.responseBuffer;
70 | rest = Buffer.alloc(0);
71 | }
72 | this.responseBuffer = rest;
73 | this.readyToRead = 0;
74 | if (this.options.encoding) {
75 | this.push(push, this.options.encoding);
76 | } else {
77 | this.push(push);
78 | }
79 | }
80 |
81 | destroy(ex) {
82 | this.emit('destroy', ex);
83 | }
84 |
85 | normalizeOptions() {
86 | // cookiejar
87 | this.cookieJar = this.options.cookieJar || new CookieJar();
88 |
89 | // default redirects - 10
90 | // if disableRedirect is set, then 0
91 | if (!this.options.disableRedirect && typeof this.options.maxRedirects !== 'number' && !(this.options.maxRedirects instanceof Number)) {
92 | this.options.maxRedirects = 10;
93 | } else if (this.options.disableRedirects) {
94 | this.options.maxRedirects = 0;
95 | }
96 |
97 | // normalize header keys
98 | // HTTP and HTTPS takes in key names in case insensitive but to find
99 | // an exact value from an object key name needs to be case sensitive
100 | // so we're just lowercasing all input keys
101 | this.options.headers = this.options.headers || {};
102 |
103 | let keys = Object.keys(this.options.headers);
104 | let newheaders = {};
105 | let i;
106 |
107 | for (i = keys.length - 1; i >= 0; i--) {
108 | newheaders[keys[i].toLowerCase().trim()] = this.options.headers[keys[i]];
109 | }
110 |
111 | this.options.headers = newheaders;
112 |
113 | if (!this.options.headers['user-agent']) {
114 | this.options.headers['user-agent'] = this.userAgent;
115 | }
116 |
117 | if (!this.options.headers.pragma) {
118 | this.options.headers.pragma = 'no-cache';
119 | }
120 |
121 | if (!this.options.headers['cache-control']) {
122 | this.options.headers['cache-control'] = 'no-cache';
123 | }
124 |
125 | if (!this.options.disableGzip) {
126 | this.options.headers['accept-encoding'] = 'gzip, deflate';
127 | } else {
128 | delete this.options.headers['accept-encoding'];
129 | }
130 |
131 | // max length for the response,
132 | // if not set, default is Infinity
133 | if (!this.options.maxResponseLength) {
134 | this.options.maxResponseLength = Infinity;
135 | }
136 |
137 | // method:
138 | // defaults to GET, or when payload present to POST
139 | if (!this.options.method) {
140 | this.options.method = this.options.payload || this.options.payloadSize ? 'POST' : 'GET';
141 | }
142 |
143 | // set cookies
144 | // takes full cookie definition strings as params
145 | if (this.options.cookies) {
146 | for (i = 0; i < this.options.cookies.length; i++) {
147 | this.cookieJar.setCookie(this.options.cookies[i], this.url);
148 | }
149 | }
150 |
151 | // rejectUnauthorized
152 | if (typeof this.options.rejectUnauthorized === 'undefined') {
153 | this.options.rejectUnauthorized = true;
154 | }
155 | }
156 |
157 | parseUrl(url) {
158 | let urlparts = urllib.parse(url, false, true);
159 | let transport;
160 | let urloptions = {
161 | host: urlparts.hostname || urlparts.host,
162 | port: urlparts.port,
163 | path: urlparts.pathname + (urlparts.search || '') || '/',
164 | method: this.options.method,
165 | rejectUnauthorized: this.options.rejectUnauthorized
166 | };
167 |
168 | switch (urlparts.protocol) {
169 | case 'https:':
170 | transport = https;
171 | break;
172 | case 'http:':
173 | default:
174 | transport = http;
175 | break;
176 | }
177 |
178 | if (transport === https) {
179 | if ('agentHttps' in this.options) {
180 | urloptions.agent = this.options.agentHttps;
181 | } else if ('agent' in this.options) {
182 | urloptions.agent = this.options.agent;
183 | }
184 | } else if ('agentHttp' in this.options) {
185 | urloptions.agent = this.options.agentHttp;
186 | } else if ('agent' in this.options) {
187 | urloptions.agent = this.options.agent;
188 | }
189 |
190 | if (!urloptions.port) {
191 | switch (urlparts.protocol) {
192 | case 'https:':
193 | urloptions.port = 443;
194 | break;
195 | case 'http:':
196 | default:
197 | urloptions.port = 80;
198 | break;
199 | }
200 | }
201 |
202 | if (this.options.localAddress) {
203 | urloptions.localAddress = this.options.localAddress;
204 | }
205 |
206 | urloptions.headers = this.options.headers || {};
207 |
208 | if (this.options.user) {
209 | let buf = Buffer.from([].concat(this.options.user).concat(this.options.pass || []).join(':'));
210 | urloptions.headers.Authorization = 'Basic ' + buf.toString('base64');
211 | } else if (urlparts.auth) {
212 | let buf = Buffer.from(urlparts.auth);
213 | urloptions.headers.Authorization = 'Basic ' + buf.toString('base64');
214 | }
215 |
216 | return {
217 | urloptions,
218 | transport
219 | };
220 | }
221 |
222 | setEncoding(encoding) {
223 | this.options.encoding = encoding;
224 | }
225 |
226 | runStream(url) {
227 | let urlData = this.parseUrl(url);
228 | let cookies = this.cookieJar.getCookies(url);
229 |
230 | if (cookies) {
231 | urlData.urloptions.headers.cookie = cookies;
232 | } else {
233 | delete urlData.urloptions.headers.cookie;
234 | }
235 |
236 | if (this.options.payload) {
237 | urlData.urloptions.headers['content-length'] = Buffer.byteLength(this.options.payload || '', 'utf-8');
238 | }
239 |
240 | if (this.options.payloadSize) {
241 | urlData.urloptions.headers['content-length'] = this.options.payloadSize;
242 | }
243 |
244 | if (this.options.asyncDnsLoookup) {
245 | let dnsCallback = function(err, addresses) {
246 | if (err) {
247 | this.emit('error', err);
248 | return;
249 | }
250 |
251 | urlData.urloptions.headers.host = urlData.urloptions.hostname || urlData.urloptions.host;
252 | urlData.urloptions.hostname = addresses[0];
253 | urlData.urloptions.host = urlData.urloptions.headers.host + (urlData.urloptions.port ? ':' + urlData.urloptions.port : '');
254 |
255 | this._runStream(urlData, url);
256 | }.bind(this);
257 |
258 | if (net.isIP(urlData.urloptions.host)) {
259 | dnsCallback(null, [urlData.urloptions.host]);
260 | } else {
261 | dns.resolve4(urlData.urloptions.host, dnsCallback);
262 | }
263 | } else {
264 | this._runStream(urlData, url);
265 | }
266 | }
267 |
268 | _runStream(urlData, url) {
269 | let req = urlData.transport.request(urlData.urloptions, res => {
270 | // catch new cookies before potential redirect
271 | if (Array.isArray(res.headers['set-cookie'])) {
272 | for (let i = 0; i < res.headers['set-cookie'].length; i++) {
273 | this.cookieJar.setCookie(res.headers['set-cookie'][i], url);
274 | }
275 | }
276 |
277 | if ([301, 302, 303, 307, 308].includes(res.statusCode)) {
278 | if (!this.options.disableRedirects && this.options.maxRedirects > this._redirectCount && res.headers.location) {
279 | this._redirectCount++;
280 | req.destroy();
281 | this.runStream(urllib.resolve(url, res.headers.location));
282 | return;
283 | }
284 | }
285 |
286 | this.meta = {
287 | status: res.statusCode,
288 | responseHeaders: res.headers,
289 | finalUrl: url,
290 | redirectCount: this._redirectCount,
291 | cookieJar: this.cookieJar
292 | };
293 |
294 | let curlen = 0;
295 | let maxlen;
296 |
297 | let receive = chunk => {
298 | if (curlen + chunk.length > this.options.maxResponseLength) {
299 | maxlen = this.options.maxResponseLength - curlen;
300 | } else {
301 | maxlen = chunk.length;
302 | }
303 |
304 | if (maxlen <= 0) {
305 | return;
306 | }
307 |
308 | curlen += Math.min(maxlen, chunk.length);
309 | if (maxlen >= chunk.length) {
310 | if (this.responseBuffer.length === 0) {
311 | this.responseBuffer = chunk;
312 | } else {
313 | this.responseBuffer = Buffer.concat([this.responseBuffer, chunk]);
314 | }
315 | } else {
316 | this.responseBuffer = Buffer.concat([this.responseBuffer, chunk], this.responseBuffer.length + maxlen);
317 | }
318 | this.drainBuffer();
319 | };
320 |
321 | let error = err => {
322 | this.ended = true;
323 | this.emit('error', err);
324 | this.drainBuffer();
325 | };
326 |
327 | let end = () => {
328 | this.ended = true;
329 | if (this.responseBuffer.length === 0) {
330 | this.push(null);
331 | }
332 | };
333 |
334 | let unpack = (type, res) => {
335 | let z = zlib['create' + type]();
336 | z.on('data', receive);
337 | z.on('error', error);
338 | z.on('end', end);
339 | res.pipe(z);
340 | };
341 |
342 | this.emit('meta', this.meta);
343 |
344 | if (res.headers['content-encoding']) {
345 | switch (res.headers['content-encoding'].toLowerCase().trim()) {
346 | case 'gzip':
347 | return unpack('Gunzip', res);
348 | case 'deflate':
349 | return unpack('InflateRaw', res);
350 | }
351 | }
352 |
353 | res.on('data', receive);
354 | res.on('end', end);
355 | });
356 |
357 | req.on('error', e => {
358 | this.emit('error', e);
359 | });
360 |
361 | if (this.options.timeout) {
362 | req.setTimeout(this.options.timeout, req.abort.bind(req));
363 | }
364 | this.on('destroy', req.abort.bind(req));
365 |
366 | if (this.options.payload) {
367 | req.end(this.options.payload);
368 | } else if (this.options.payloadStream) {
369 | this.options.payloadStream.pipe(req);
370 | this.options.payloadStream.resume();
371 | } else {
372 | req.end();
373 | }
374 | }
375 | }
376 |
377 | function fetchUrl(url, options, callback) {
378 | if (!callback && typeof options === 'function') {
379 | callback = options;
380 | options = undefined;
381 | }
382 | options = options || {};
383 |
384 | let fetchstream = new FetchStream(url, options);
385 | let responseData;
386 | let chunks = [];
387 | let chunklen = 0;
388 | let buffer;
389 | let contentType;
390 | let callbackFired = false;
391 |
392 | fetchstream.on('meta', meta => {
393 | responseData = meta;
394 | contentType = _parseContentType(meta.responseHeaders['content-type']);
395 | });
396 |
397 | fetchstream.on('data', chunk => {
398 | if (chunk) {
399 | chunks.push(chunk);
400 | chunklen += chunk.length;
401 | }
402 | });
403 |
404 | fetchstream.on('error', error => {
405 | if (error && error.code === 'HPE_INVALID_CONSTANT') {
406 | // skip invalid formatting errors
407 | return;
408 | }
409 | if (callbackFired) {
410 | return;
411 | }
412 | callbackFired = true;
413 | callback(error);
414 | });
415 |
416 | fetchstream.on('end', () => {
417 | if (callbackFired) {
418 | return;
419 | }
420 | callbackFired = true;
421 |
422 | buffer = Buffer.concat(chunks, chunklen);
423 |
424 | if (!options.disableDecoding && !options.outputEncoding) {
425 | return callback(null, responseData, buffer);
426 | }
427 |
428 | if (contentType.mimeType === 'text/html') {
429 | contentType.charset = _findHTMLCharset(buffer) || contentType.charset;
430 | }
431 |
432 | contentType.charset = (options.overrideCharset || contentType.charset || 'utf-8').trim().toLowerCase();
433 |
434 | if (!options.disableDecoding && !contentType.charset.match(/^utf-?8$/i)) {
435 | try {
436 | buffer = iconv.decode(buffer, contentType.charset);
437 | if (options.outputEncoding && ['base64', 'hex'].includes(options.outputEncoding.toLowerCase())) {
438 | buffer = Buffer.from(buffer);
439 | }
440 | } catch (E) {
441 | // failed decoding
442 | }
443 | }
444 |
445 | if (options.outputEncoding) {
446 | return callback(null, responseData, typeof buffer === 'string' ? buffer : buffer.toString(options.outputEncoding));
447 | } else {
448 | return callback(null, responseData, buffer);
449 | }
450 | });
451 | }
452 |
453 | function _parseContentType(str) {
454 | if (!str) {
455 | return {};
456 | }
457 | let parts = str.split(';'),
458 | mimeType = parts.shift(),
459 | charset,
460 | chparts;
461 |
462 | for (let i = 0, len = parts.length; i < len; i++) {
463 | chparts = parts[i].split('=');
464 | if (chparts.length > 1) {
465 | if (chparts[0].trim().toLowerCase() === 'charset') {
466 | charset = chparts[1];
467 | }
468 | }
469 | }
470 |
471 | return {
472 | mimeType: (mimeType || '').trim().toLowerCase(),
473 | charset: (charset || 'UTF-8').trim().toLowerCase() // defaults to UTF-8
474 | };
475 | }
476 |
477 | function _findHTMLCharset(htmlbuffer) {
478 | let body = htmlbuffer.toString('ascii'),
479 | input,
480 | meta,
481 | charset;
482 |
483 | if ((meta = body.match(/]*?>/i))) {
484 | input = meta[0];
485 | }
486 |
487 | if (input) {
488 | charset = input.match(/charset\s?=\s?([a-zA-Z\-0-9]*);?/);
489 | if (charset) {
490 | charset = (charset[1] || '').trim().toLowerCase();
491 | }
492 | }
493 |
494 | if (!charset && (meta = body.match(/