├── .gitignore ├── .eslintrc ├── .travis.yml ├── example ├── pipe.js ├── fetchurl.js └── fetchstream.js ├── lib ├── cookiejar.js └── fetch.js ├── Gruntfile.js ├── caching.md ├── package.json ├── LICENSE ├── README.md └── test └── fetch-test.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "nodemailer" 3 | } 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | sudo: false 3 | node_js: 4 | - 6 5 | - 8 6 | notifications: 7 | email: 8 | - andris@kreata.ee 9 | -------------------------------------------------------------------------------- /example/pipe.js: -------------------------------------------------------------------------------- 1 | /* eslint no-console:0 */ 2 | 3 | 'use strict'; 4 | 5 | // pipe to file 6 | 7 | const FetchStream = require('../lib/fetch').FetchStream; 8 | const fs = require('fs'); 9 | 10 | const inp = new FetchStream('http://google.com'); 11 | const out = fs.createWriteStream('google.html'); 12 | 13 | inp.on('end', () => { 14 | console.log('downloaded!'); 15 | }); 16 | 17 | inp.pipe(out); 18 | -------------------------------------------------------------------------------- /example/fetchurl.js: -------------------------------------------------------------------------------- 1 | /* eslint no-console:0 */ 2 | 3 | 'use strict'; 4 | 5 | // fetch url and update charset to utf-8 6 | const fetchUrl = require('../lib/fetch').fetchUrl; 7 | 8 | fetchUrl('http://kreata.ee/iso-8859-15.php', (error, meta, body) => { 9 | if (error) { 10 | return console.log('ERROR', error.message || error); 11 | } 12 | 13 | console.log('META INFO'); 14 | console.log(meta); 15 | 16 | console.log('BODY'); 17 | console.log(body.toString('utf-8')); 18 | }); 19 | -------------------------------------------------------------------------------- /example/fetchstream.js: -------------------------------------------------------------------------------- 1 | /* eslint no-console:0 */ 2 | 3 | 'use strict'; 4 | 5 | const FetchStream = require('../lib/fetch').FetchStream; 6 | 7 | let fetch = new FetchStream('http://google.com', { 8 | headers: {} 9 | }); 10 | 11 | fetch.on('data', chunk => { 12 | console.log(chunk); 13 | }); 14 | 15 | fetch.on('meta', meta => { 16 | console.log(meta); 17 | }); 18 | 19 | fetch.on('end', () => { 20 | console.log('END'); 21 | }); 22 | 23 | fetch.on('error', e => { 24 | console.log('ERROR: ' + ((e && e.message) || e)); 25 | }); 26 | -------------------------------------------------------------------------------- /lib/cookiejar.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const Biskviit = require('biskviit'); 4 | 5 | // Thin layer around biskviit to keep API compatibility 6 | class CookieJar { 7 | constructor(options) { 8 | this.options = options || {}; 9 | this.biskviit = new Biskviit({ 10 | sessionTimeout: this.options.sessionTimeout || 1800 // expire cookies after 30 minutes by default 11 | }); 12 | } 13 | 14 | getCookies(url) { 15 | return this.biskviit.get(url); 16 | } 17 | 18 | setCookie(cookieStr, url) { 19 | this.biskviit.set(cookieStr, url); 20 | } 21 | } 22 | 23 | module.exports.CookieJar = CookieJar; 24 | -------------------------------------------------------------------------------- /Gruntfile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | module.exports = function(grunt) { 4 | // Project configuration. 5 | grunt.initConfig({ 6 | eslint: { 7 | all: ['lib/*.js', 'test/*.js', 'example/*.js', 'Gruntfile.js'] 8 | }, 9 | 10 | mochaTest: { 11 | all: { 12 | options: { 13 | reporter: 'spec' 14 | }, 15 | src: ['test/*-test.js'] 16 | } 17 | } 18 | }); 19 | 20 | // Load the plugin(s) 21 | grunt.loadNpmTasks('grunt-eslint'); 22 | grunt.loadNpmTasks('grunt-mocha-test'); 23 | 24 | // Tasks 25 | grunt.registerTask('default', ['eslint', 'mochaTest']); 26 | }; 27 | -------------------------------------------------------------------------------- /caching.md: -------------------------------------------------------------------------------- 1 | Caching in HTTP 2 | =============== 3 | 4 | ETAG 5 | ---- 6 | 7 | Res 200: 8 | 9 | ETag: "fa6e-3e3073913b100" 10 | 11 | Req: 12 | 13 | If-None-Match: "fa6e-3e3073913b100" 14 | 15 | Res 304: 16 | 17 | ETag: "fa6e-3e3073913b100" 18 | 19 | 20 | LAST MODIFIED 21 | ------------- 22 | 23 | Res 200: 24 | 25 | Last-Modified: Mon, 28 Jan 2013 22:29:45 GMT 26 | 27 | Req: 28 | 29 | If-Modified-Since: Mon, 28 Jan 2013 22:29:45 GMT 30 | 31 | Res 304: 32 | 33 | Last-Modified:Mon, 28 Jan 2013 22:29:45 GMT 34 | 35 | 36 | EXPIRES 37 | ------- 38 | 39 | Res 200: 40 | 41 | Expires: Tue, 19 Mar 2013 11:17:57 GMT 42 | 43 | Do not try again before Tue, 19 Mar 2013 11:17:57 GMT 44 | 45 | CACHE-CONTROL 46 | ------------- 47 | 48 | Res 200: 49 | 50 | Cache-Control: max-age=300 51 | 52 | Do not try again until 5 minutes from now -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "httpfetch", 3 | "description": "Fetch URL contents", 4 | "version": "2.0.0", 5 | "author": "Andris Reinman", 6 | "homepage": "http://github.com/andris9/fetch", 7 | "repository": { 8 | "type": "git", 9 | "url": "git://github.com/andris9/fetch.git" 10 | }, 11 | "scripts": { 12 | "test": "grunt" 13 | }, 14 | "main": "./lib/fetch", 15 | "license": "MIT", 16 | "dependencies": { 17 | "biskviit": "2.0.0", 18 | "encoding": "0.1.12", 19 | "iconv-lite": "^0.4.18" 20 | }, 21 | "devDependencies": { 22 | "chai": "^4.1.1", 23 | "eslint-config-nodemailer": "^1.2.0", 24 | "grunt": "^1.0.1", 25 | "grunt-cli": "^1.2.0", 26 | "grunt-eslint": "^20.0.0", 27 | "grunt-mocha-test": "^0.13.2", 28 | "mocha": "^3.5.0" 29 | }, 30 | "keywords": ["url"] 31 | } 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Andris Reinman 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 11 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 12 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 14 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 15 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 16 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fetch 2 | 3 | Fetch url contents. Supports gzipped content for quicker download, redirects (with automatic cookie handling, so no eternal redirect loops), streaming and piping etc. 4 | 5 | [![Build Status](https://travis-ci.org/andris9/fetch.svg?branch=master)](https://travis-ci.org/andris9/fetch) 6 | 7 | ## Install 8 | 9 | npm install fetch 10 | 11 | ## Usage 12 | 13 | See examples folder for a complete example 14 | 15 | ## Fetch from URL 16 | 17 | `fetch.fetchUrl(url [, options], callback)` 18 | 19 | Where 20 | 21 | * **url** is the url to fetch 22 | * **options** is an optional options object 23 | * **callback** is the callback to run - `callback(error, meta, body)` 24 | 25 | Example 26 | 27 | var fetchUrl = require("fetch").fetchUrl; 28 | 29 | // source file is iso-8859-15 but it is converted to utf-8 automatically 30 | fetchUrl("http://kreata.ee/iso-8859-15.php", function(error, meta, body){ 31 | console.log(body.toString()); 32 | }); 33 | 34 | **NB** If the file has been marked with charset other than utf-8, it is converted automatically. 35 | 36 | By default `iconv-lite` is used for charset conversion. If you want to use `node-iconv` module instead, 37 | add `"iconv": "*"` to your package.json file, it will be picked up by `fetch` automatically. 38 | 39 | ## Streaming 40 | 41 | `fetch.FetchStream(url [, options]) -> Stream` 42 | 43 | Where 44 | 45 | * **url** is the url to fetch 46 | * **options** is an optional options object 47 | 48 | With events: 49 | 50 | * **data** with a data chunk - `function(chunk){}` 51 | * **meta** with some information about the response `function(meta){}` 52 | * **end** when the receiving is ready 53 | * **error** 54 | 55 | Example 56 | 57 | var FetchStream = require("fetch").FetchStream; 58 | 59 | var fetch = new FetchStream("http://google.com"); 60 | 61 | fetch.on("data", function(chunk){ 62 | console.log(chunk); 63 | }); 64 | 65 | ## Options 66 | 67 | Possible option values 68 | 69 | * **maxRedirects** how many redirects allowed, defaults to 10 70 | * **disableRedirects** set to true if redirects are not allowed, defaults to false 71 | * **headers** optional header fields, in the form of `{'Header-Field':'value'}` 72 | * **maxResponseLength** maximum allowd length for the file, the remainder is cut off. Defaults to `Infinity` 73 | * **method** defaults to GET 74 | * **payload** request body 75 | * **disableGzip** set to false, to disable content gzipping, needed for Node v0.5.9 which has buggy zlib 76 | * **cookies** an array of cookie definitions in the form of `['name=val']` 77 | * **cookieJar** for sharing cookies between requests, see below 78 | * **outputEncoding** valid for `fetchUrl` 79 | * **disableDecoding** valid for `fetchUrl`, set to true to disable automatic charset decoding to utf-8 80 | * **overrideCharset** valid for `fetchUrl`, set input encoding 81 | * **asyncDnsLoookup** use high performance asyncronous DNS resolution based on c-ares instead of a thread pool calling getaddrinfo(3) 82 | * **timeout** set a timeout in ms 83 | * **agentHttps** pass-through http.request agent parameter for https 84 | * **agentHttp** pass-through http.request agent parameter for http 85 | * **agent** pass-through http.request agent parameter as fallback, if agentHttps or agentHttp are not specified 86 | * **rejectUnauthorized** whether to reject self-signed certificates (`true`, default behavior), or ignore and allow them (`false`) 87 | * **user** is the username for Basic auth 88 | * **pass** is the password for Basic auth 89 | 90 | ## Meta object 91 | 92 | Meta object contains following fields: 93 | 94 | * **status** HTTP status code 95 | * **responseHeaders** response headers 96 | * **finalUrl** last url value, useful with redirects 97 | * **redirectCount** how many redirects happened 98 | * **cookieJar** CookieJar object for sharing/retrieving cookies 99 | 100 | ## Headers 101 | 102 | Request headers can be set with `options.headers` 103 | 104 | options = { 105 | headers:{ 106 | "X-My-Header": "This is a custom header field" 107 | } 108 | } 109 | 110 | ## User-Agent 111 | User-Agent value can be set with `options.headers['User-Agent']` value. Defaults to `"FetchStream"` 112 | 113 | options = { 114 | headers: { 115 | "User-Agent": "MyUseragent/1.0" 116 | } 117 | } 118 | 119 | ## Cookies 120 | Cookies can be set with `options.cookies` which takes an array with cookie definitions 121 | 122 | options = { 123 | cookies: ["name=value", "key=value; path=/; secure"] 124 | } 125 | 126 | Paths, domain, expire and other cookie settings are honored, so try not to set cookies with expire dates in the past. If domain is not set, any domain will pass, same for paths. 127 | 128 | **NB** Do not set cookie field directly in request header as it will be overwritten. 129 | 130 | ## Cookie sharing 131 | 132 | Cookies can be shared between different requests, this can be achieved with `CookieJar` 133 | 134 | var fetch = require("fetch"); 135 | 136 | var cookies = new fetch.CookieJar(); 137 | 138 | // add one cookie for testing 139 | cookies.setCookie('alfa=beta; path=/;'); 140 | 141 | // create a FetchStream with custom CookieJar 142 | var f = fetch.FetchStream("http://www.example.com/page1",{cookieJar: cookies}); 143 | 144 | f.on("end", function(){ 145 | // if cookies were set with the previos request, the data is 146 | // saved in 'cookieJar' and passed to the next request 147 | fetch.FetchStream("http://www.example.com/page1",{cookieJar: cookies}); 148 | }); 149 | 150 | 151 | ## Redirects 152 | 153 | Redirects are on by default, use `options.disableRedirects` to disable. Maximum redirect count can be set with `options.maxRedirects` (defaults to 10) 154 | 155 | options = { 156 | disableRedirects: true 157 | } 158 | 159 | options = { 160 | maxRedirects: 100 161 | } 162 | 163 | ## Disable Gzip support 164 | 165 | Gzip and Deflate support is automatically on. This is problematic in Node v0.5.9 and below since Zlib support on these versions is buggy with unpacking and tends to yield in error. 166 | 167 | options = { 168 | disableGzip: true 169 | } 170 | 171 | ## Piping to file 172 | 173 | `FetchStream` is a readable Stream object and thus can be piped. For example stream URL contents directly to a file: 174 | 175 | var FetchStream = require("fetch").FetchStream, 176 | fs = require("fs"), 177 | out; 178 | 179 | out = fs.createWriteStream('file.html'); 180 | new FetchStream("http://www.example.com/index.php").pipe(out); 181 | 182 | ## License 183 | 184 | BSD 185 | -------------------------------------------------------------------------------- /test/fetch-test.js: -------------------------------------------------------------------------------- 1 | /* eslint no-unused-expressions:0 */ 2 | /* globals afterEach, beforeEach, describe, it */ 3 | 4 | 'use strict'; 5 | 6 | const chai = require('chai'); 7 | const expect = chai.expect; 8 | 9 | //var http = require('http'); 10 | const fetch = require('../lib/fetch'); 11 | const http = require('http'); 12 | const https = require('https'); 13 | 14 | chai.config.includeStack = true; 15 | 16 | const HTTP_PORT = 9998; 17 | const HTTPS_PORT = 9993; 18 | 19 | let httpsOptions = { 20 | key: 21 | '-----BEGIN RSA PRIVATE KEY-----\n' + 22 | 'MIIEpAIBAAKCAQEA6Z5Qqhw+oWfhtEiMHE32Ht94mwTBpAfjt3vPpX8M7DMCTwHs\n' + 23 | '1xcXvQ4lQ3rwreDTOWdoJeEEy7gMxXqH0jw0WfBx+8IIJU69xstOyT7FRFDvA1yT\n' + 24 | 'RXY2yt9K5s6SKken/ebMfmZR+03ND4UFsDzkz0FfgcjrkXmrMF5Eh5UXX/+9YHeU\n' + 25 | 'xlp0gMAt+/SumSmgCaysxZLjLpd4uXz+X+JVxsk1ACg1NoEO7lWJC/3WBP7MIcu2\n' + 26 | 'wVsMd2XegLT0gWYfT1/jsIH64U/mS/SVXC9QhxMl9Yfko2kx1OiYhDxhHs75RJZh\n' + 27 | 'rNRxgfiwgSb50Gw4NAQaDIxr/DJPdLhgnpY6UQIDAQABAoIBAE+tfzWFjJbgJ0ql\n' + 28 | 's6Ozs020Sh4U8TZQuonJ4HhBbNbiTtdDgNObPK1uNadeNtgW5fOeIRdKN6iDjVeN\n' + 29 | 'AuXhQrmqGDYVZ1HSGUfD74sTrZQvRlWPLWtzdhybK6Css41YAyPFo9k4bJ2ZW2b/\n' + 30 | 'p4EEQ8WsNja9oBpttMU6YYUchGxo1gujN8hmfDdXUQx3k5Xwx4KA68dveJ8GasIt\n' + 31 | 'd+0Jd/FVwCyyx8HTiF1FF8QZYQeAXxbXJgLBuCsMQJghlcpBEzWkscBR3Ap1U0Zi\n' + 32 | '4oat8wrPZGCblaA6rNkRUVbc/+Vw0stnuJ/BLHbPxyBs6w495yBSjBqUWZMvljNz\n' + 33 | 'm9/aK0ECgYEA9oVIVAd0enjSVIyAZNbw11ElidzdtBkeIJdsxqhmXzeIFZbB39Gd\n' + 34 | 'bjtAVclVbq5mLsI1j22ER2rHA4Ygkn6vlLghK3ZMPxZa57oJtmL3oP0RvOjE4zRV\n' + 35 | 'dzKexNGo9gU/x9SQbuyOmuauvAYhXZxeLpv+lEfsZTqqrvPUGeBiEQcCgYEA8poG\n' + 36 | 'WVnykWuTmCe0bMmvYDsWpAEiZnFLDaKcSbz3O7RMGbPy1cypmqSinIYUpURBT/WY\n' + 37 | 'wVPAGtjkuTXtd1Cy58m7PqziB7NNWMcsMGj+lWrTPZ6hCHIBcAImKEPpd+Y9vGJX\n' + 38 | 'oatFJguqAGOz7rigBq6iPfeQOCWpmprNAuah++cCgYB1gcybOT59TnA7mwlsh8Qf\n' + 39 | 'bm+tSllnin2A3Y0dGJJLmsXEPKtHS7x2Gcot2h1d98V/TlWHe5WNEUmx1VJbYgXB\n' + 40 | 'pw8wj2ACxl4ojNYqWPxegaLd4DpRbtW6Tqe9e47FTnU7hIggR6QmFAWAXI+09l8y\n' + 41 | 'amssNShqjE9lu5YDi6BTKwKBgQCuIlKGViLfsKjrYSyHnajNWPxiUhIgGBf4PI0T\n' + 42 | '/Jg1ea/aDykxv0rKHnw9/5vYGIsM2st/kR7l5mMecg/2Qa145HsLfMptHo1ZOPWF\n' + 43 | '9gcuttPTegY6aqKPhGthIYX2MwSDMM+X0ri6m0q2JtqjclAjG7yG4CjbtGTt/UlE\n' + 44 | 'WMlSZwKBgQDslGeLUnkW0bsV5EG3AKRUyPKz/6DVNuxaIRRhOeWVKV101claqXAT\n' + 45 | 'wXOpdKrvkjZbT4AzcNrlGtRl3l7dEVXTu+dN7/ZieJRu7zaStlAQZkIyP9O3DdQ3\n' + 46 | 'rIcetQpfrJ1cAqz6Ng0pD0mh77vQ13WG1BBmDFa2A9BuzLoBituf4g==\n' + 47 | '-----END RSA PRIVATE KEY-----', 48 | cert: 49 | '-----BEGIN CERTIFICATE-----\n' + 50 | 'MIICpDCCAYwCCQCuVLVKVTXnAjANBgkqhkiG9w0BAQsFADAUMRIwEAYDVQQDEwls\n' + 51 | 'b2NhbGhvc3QwHhcNMTUwMjEyMTEzMjU4WhcNMjUwMjA5MTEzMjU4WjAUMRIwEAYD\n' + 52 | 'VQQDEwlsb2NhbGhvc3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDp\n' + 53 | 'nlCqHD6hZ+G0SIwcTfYe33ibBMGkB+O3e8+lfwzsMwJPAezXFxe9DiVDevCt4NM5\n' + 54 | 'Z2gl4QTLuAzFeofSPDRZ8HH7wgglTr3Gy07JPsVEUO8DXJNFdjbK30rmzpIqR6f9\n' + 55 | '5sx+ZlH7Tc0PhQWwPOTPQV+ByOuReaswXkSHlRdf/71gd5TGWnSAwC379K6ZKaAJ\n' + 56 | 'rKzFkuMul3i5fP5f4lXGyTUAKDU2gQ7uVYkL/dYE/swhy7bBWwx3Zd6AtPSBZh9P\n' + 57 | 'X+OwgfrhT+ZL9JVcL1CHEyX1h+SjaTHU6JiEPGEezvlElmGs1HGB+LCBJvnQbDg0\n' + 58 | 'BBoMjGv8Mk90uGCeljpRAgMBAAEwDQYJKoZIhvcNAQELBQADggEBABXm8GPdY0sc\n' + 59 | 'mMUFlgDqFzcevjdGDce0QfboR+M7WDdm512Jz2SbRTgZD/4na42ThODOZz9z1AcM\n' + 60 | 'zLgx2ZNZzVhBz0odCU4JVhOCEks/OzSyKeGwjIb4JAY7dh+Kju1+6MNfQJ4r1Hza\n' + 61 | 'SVXH0+JlpJDaJ73NQ2JyfqELmJ1mTcptkA/N6rQWhlzycTBSlfogwf9xawgVPATP\n' + 62 | '4AuwgjHl12JI2HVVs1gu65Y3slvaHRCr0B4+Kg1GYNLLcbFcK+NEHrHmPxy9TnTh\n' + 63 | 'Zwp1dsNQU+Xkylz8IUANWSLHYZOMtN2e5SKIdwTtl5C8YxveuY8YKb1gDExnMraT\n' + 64 | 'VGXQDqPleug=\n' + 65 | '-----END CERTIFICATE-----' 66 | }; 67 | 68 | describe('fetch tests', function() { 69 | this.timeout(10000); // eslint-disable-line 70 | let httpServer, httpsServer; 71 | 72 | beforeEach(done => { 73 | httpServer = http.createServer((req, res) => { 74 | switch (req.url) { 75 | case '/redirect6': 76 | res.writeHead(302, { 77 | Location: '/redirect5' 78 | }); 79 | res.end(); 80 | break; 81 | 82 | case '/redirect5': 83 | res.writeHead(302, { 84 | Location: '/redirect4' 85 | }); 86 | res.end(); 87 | break; 88 | 89 | case '/redirect4': 90 | res.writeHead(302, { 91 | Location: '/redirect3' 92 | }); 93 | res.end(); 94 | break; 95 | 96 | case '/redirect3': 97 | res.writeHead(302, { 98 | Location: '/redirect2' 99 | }); 100 | res.end(); 101 | break; 102 | 103 | case '/redirect2': 104 | res.writeHead(302, { 105 | Location: '/redirect1' 106 | }); 107 | res.end(); 108 | break; 109 | 110 | case '/redirect1': 111 | res.writeHead(302, { 112 | Location: '/' 113 | }); 114 | res.end(); 115 | break; 116 | 117 | case '/gzip': { 118 | res.writeHead(200, { 119 | 'Content-Type': 'text/plain', 120 | 'Content-Encoding': 'gzip' 121 | }); 122 | let str = 'H4sIAAAAAAAAA/NIzcnJVwjPL8pJUfAICQngAgCwsOrsEQAAAA=='; 123 | let strBuf = Buffer.from(str, 'base64'); 124 | res.end(strBuf); 125 | break; 126 | } 127 | 128 | case '/invalid': 129 | res.writeHead(500, { 130 | 'Content-Type': 'text/plain' 131 | }); 132 | res.end('Hello World HTTP\n'); 133 | break; 134 | 135 | case '/auth': { 136 | let auth = (req.headers.authorization || '').toString().split(' ').pop().trim(); 137 | if (Buffer.from(auth, 'base64').toString() === 'user:pass') { 138 | res.writeHead(200, { 139 | 'Content-Type': 'text/plain' 140 | }); 141 | res.end(Buffer.from(auth, 'base64')); 142 | } else { 143 | res.writeHead(401, { 144 | 'Content-Type': 'text/plain', 145 | 'WWW-Authenticate': 'Basic realm="User Visible Realm"' 146 | }); 147 | res.end('Authentication required'); 148 | } 149 | 150 | break; 151 | } 152 | default: 153 | res.writeHead(200, { 154 | 'Content-Type': 'text/plain' 155 | }); 156 | res.end('Hello World HTTP\n'); 157 | } 158 | }); 159 | 160 | httpsServer = https.createServer(httpsOptions, (req, res) => { 161 | res.writeHead(200, { 162 | 'Content-Type': 'text/plain' 163 | }); 164 | res.end('Hello World HTTPS\n'); 165 | }); 166 | 167 | httpServer.listen(HTTP_PORT, () => { 168 | httpsServer.listen(HTTPS_PORT, done); 169 | }); 170 | }); 171 | 172 | afterEach(done => { 173 | httpServer.close(() => { 174 | httpsServer.close(done); 175 | }); 176 | }); 177 | 178 | it('should fetch HTTP data', done => { 179 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT); 180 | let buf = []; 181 | req.on('data', chunk => { 182 | buf.push(chunk); 183 | }); 184 | req.on('end', () => { 185 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTP\n'); 186 | done(); 187 | }); 188 | }); 189 | 190 | it('should fetch HTTPS data', done => { 191 | let req = new fetch.FetchStream('https://localhost:' + HTTPS_PORT, { 192 | rejectUnauthorized: false 193 | }); 194 | let buf = []; 195 | req.on('data', chunk => { 196 | buf.push(chunk); 197 | }); 198 | req.on('end', () => { 199 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTPS\n'); 200 | done(); 201 | }); 202 | }); 203 | 204 | it('should fail on self signed HTTPS certificate', done => { 205 | let req = new fetch.FetchStream('https://localhost:' + HTTPS_PORT); 206 | req.on('error', err => { 207 | expect(err).to.exist; 208 | done(); 209 | }); 210 | req.on('data', () => { 211 | expect(false).to.be.true; 212 | }); 213 | req.on('end', () => { 214 | expect(false).to.be.true; 215 | }); 216 | }); 217 | 218 | it('should fetch HTTP data with redirects', done => { 219 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/redirect3'); 220 | let buf = []; 221 | req.on('data', chunk => { 222 | buf.push(chunk); 223 | }); 224 | req.on('end', () => { 225 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTP\n'); 226 | done(); 227 | }); 228 | }); 229 | 230 | it('should not follow too many redirects', done => { 231 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/redirect6', { 232 | maxRedirects: 5 233 | }); 234 | 235 | req.on('meta', meta => { 236 | expect(meta.status).to.equal(302); 237 | }); 238 | 239 | let buf = []; 240 | req.on('data', chunk => { 241 | buf.push(chunk); 242 | }); 243 | req.on('end', () => { 244 | done(); 245 | }); 246 | }); 247 | 248 | it('should unzip compressed HTTP data', done => { 249 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/gzip'); 250 | let buf = []; 251 | req.on('data', chunk => { 252 | buf.push(chunk); 253 | }); 254 | req.on('end', () => { 255 | expect(Buffer.concat(buf).toString()).to.equal('Hello World HTTP\n'); 256 | done(); 257 | }); 258 | }); 259 | 260 | it('should return error for unresolved host', done => { 261 | let req = new fetch.FetchStream('http://asfhaskhhgbjdsfhgbsdjgk'); 262 | let buf = []; 263 | req.on('data', chunk => { 264 | buf.push(chunk); 265 | }); 266 | req.on('error', err => { 267 | expect(err).to.exist; 268 | done(); 269 | }); 270 | req.on('end', () => {}); 271 | }); 272 | 273 | it('should fail basic HTTP auth', done => { 274 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/auth'); 275 | let buf = []; 276 | req.on('data', chunk => { 277 | buf.push(chunk); 278 | }); 279 | req.on('error', err => { 280 | expect(err).to.exist; 281 | done(); 282 | }); 283 | req.on('meta', meta => { 284 | expect(meta.status).to.equal(401); 285 | }); 286 | req.on('end', () => { 287 | done(); 288 | }); 289 | }); 290 | 291 | it('should handle basic HTTP auth', done => { 292 | let req = new fetch.FetchStream('http://user:pass@localhost:' + HTTP_PORT + '/auth'); 293 | let buf = []; 294 | req.on('data', chunk => { 295 | buf.push(chunk); 296 | }); 297 | req.on('end', () => { 298 | expect(Buffer.concat(buf).toString()).to.equal('user:pass'); 299 | done(); 300 | }); 301 | }); 302 | 303 | it('should handle basic HTTP auth from options', done => { 304 | let req = new fetch.FetchStream('http://localhost:' + HTTP_PORT + '/auth', { 305 | user: 'user', 306 | pass: 'pass' 307 | }); 308 | let buf = []; 309 | req.on('data', chunk => { 310 | buf.push(chunk); 311 | }); 312 | req.on('end', () => { 313 | expect(Buffer.concat(buf).toString()).to.equal('user:pass'); 314 | done(); 315 | }); 316 | }); 317 | 318 | it('should return error for invalid protocol', done => { 319 | let req = new fetch.FetchStream('http://localhost:' + HTTPS_PORT, { 320 | timeout: 1000 321 | }); 322 | let buf = []; 323 | 324 | req.on('data', chunk => { 325 | buf.push(chunk); 326 | }); 327 | req.on('error', err => { 328 | expect(err).to.exist; 329 | done(); 330 | }); 331 | req.on('end', () => {}); 332 | }); 333 | }); 334 | -------------------------------------------------------------------------------- /lib/fetch.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const http = require('http'); 4 | const https = require('https'); 5 | const urllib = require('url'); 6 | const zlib = require('zlib'); 7 | const dns = require('dns'); 8 | const Stream = require('stream').Readable; 9 | const CookieJar = require('./cookiejar').CookieJar; 10 | const iconv = require('iconv-lite'); 11 | const net = require('net'); 12 | 13 | class FetchStream extends Stream { 14 | constructor(url, options) { 15 | super(); 16 | 17 | options = options || {}; 18 | 19 | this.url = url; 20 | if (!this.url) { 21 | return this.emit('error', new Error('url not defined')); 22 | } 23 | 24 | this.userAgent = options.userAgent || 'FetchStream'; 25 | 26 | this._redirectCount = 0; 27 | 28 | this.options = options || {}; 29 | this.normalizeOptions(); 30 | 31 | // prevent errors before 'error' handler is set by defferring actions 32 | if (typeof setImmediate !== 'undefined') { 33 | setImmediate(this.runStream.bind(this, url)); 34 | } else { 35 | process.nextTick(this.runStream.bind(this, url)); 36 | } 37 | this.responseBuffer = Buffer.alloc(0); 38 | this.ended = false; 39 | this.readyToRead = 0; 40 | } 41 | 42 | _read(size) { 43 | if (this.ended && this.responseBuffer.length === 0) { 44 | this.push(null); 45 | return; 46 | } 47 | this.readyToRead += size; 48 | this.drainBuffer(); 49 | } 50 | 51 | drainBuffer() { 52 | if (this.readyToRead === 0) { 53 | return; 54 | } 55 | if (this.responseBuffer.length === 0) { 56 | return; 57 | } 58 | let push; 59 | let rest; 60 | let restSize; 61 | 62 | if (this.responseBuffer.length > this.readyToRead) { 63 | push = Buffer.alloc(this.readyToRead); 64 | this.responseBuffer.copy(push, 0, 0, this.readyToRead); 65 | restSize = this.responseBuffer.length - this.readyToRead; 66 | rest = Buffer.alloc(restSize); 67 | this.responseBuffer.copy(rest, 0, this.readyToRead); 68 | } else { 69 | push = this.responseBuffer; 70 | rest = Buffer.alloc(0); 71 | } 72 | this.responseBuffer = rest; 73 | this.readyToRead = 0; 74 | if (this.options.encoding) { 75 | this.push(push, this.options.encoding); 76 | } else { 77 | this.push(push); 78 | } 79 | } 80 | 81 | destroy(ex) { 82 | this.emit('destroy', ex); 83 | } 84 | 85 | normalizeOptions() { 86 | // cookiejar 87 | this.cookieJar = this.options.cookieJar || new CookieJar(); 88 | 89 | // default redirects - 10 90 | // if disableRedirect is set, then 0 91 | if (!this.options.disableRedirect && typeof this.options.maxRedirects !== 'number' && !(this.options.maxRedirects instanceof Number)) { 92 | this.options.maxRedirects = 10; 93 | } else if (this.options.disableRedirects) { 94 | this.options.maxRedirects = 0; 95 | } 96 | 97 | // normalize header keys 98 | // HTTP and HTTPS takes in key names in case insensitive but to find 99 | // an exact value from an object key name needs to be case sensitive 100 | // so we're just lowercasing all input keys 101 | this.options.headers = this.options.headers || {}; 102 | 103 | let keys = Object.keys(this.options.headers); 104 | let newheaders = {}; 105 | let i; 106 | 107 | for (i = keys.length - 1; i >= 0; i--) { 108 | newheaders[keys[i].toLowerCase().trim()] = this.options.headers[keys[i]]; 109 | } 110 | 111 | this.options.headers = newheaders; 112 | 113 | if (!this.options.headers['user-agent']) { 114 | this.options.headers['user-agent'] = this.userAgent; 115 | } 116 | 117 | if (!this.options.headers.pragma) { 118 | this.options.headers.pragma = 'no-cache'; 119 | } 120 | 121 | if (!this.options.headers['cache-control']) { 122 | this.options.headers['cache-control'] = 'no-cache'; 123 | } 124 | 125 | if (!this.options.disableGzip) { 126 | this.options.headers['accept-encoding'] = 'gzip, deflate'; 127 | } else { 128 | delete this.options.headers['accept-encoding']; 129 | } 130 | 131 | // max length for the response, 132 | // if not set, default is Infinity 133 | if (!this.options.maxResponseLength) { 134 | this.options.maxResponseLength = Infinity; 135 | } 136 | 137 | // method: 138 | // defaults to GET, or when payload present to POST 139 | if (!this.options.method) { 140 | this.options.method = this.options.payload || this.options.payloadSize ? 'POST' : 'GET'; 141 | } 142 | 143 | // set cookies 144 | // takes full cookie definition strings as params 145 | if (this.options.cookies) { 146 | for (i = 0; i < this.options.cookies.length; i++) { 147 | this.cookieJar.setCookie(this.options.cookies[i], this.url); 148 | } 149 | } 150 | 151 | // rejectUnauthorized 152 | if (typeof this.options.rejectUnauthorized === 'undefined') { 153 | this.options.rejectUnauthorized = true; 154 | } 155 | } 156 | 157 | parseUrl(url) { 158 | let urlparts = urllib.parse(url, false, true); 159 | let transport; 160 | let urloptions = { 161 | host: urlparts.hostname || urlparts.host, 162 | port: urlparts.port, 163 | path: urlparts.pathname + (urlparts.search || '') || '/', 164 | method: this.options.method, 165 | rejectUnauthorized: this.options.rejectUnauthorized 166 | }; 167 | 168 | switch (urlparts.protocol) { 169 | case 'https:': 170 | transport = https; 171 | break; 172 | case 'http:': 173 | default: 174 | transport = http; 175 | break; 176 | } 177 | 178 | if (transport === https) { 179 | if ('agentHttps' in this.options) { 180 | urloptions.agent = this.options.agentHttps; 181 | } else if ('agent' in this.options) { 182 | urloptions.agent = this.options.agent; 183 | } 184 | } else if ('agentHttp' in this.options) { 185 | urloptions.agent = this.options.agentHttp; 186 | } else if ('agent' in this.options) { 187 | urloptions.agent = this.options.agent; 188 | } 189 | 190 | if (!urloptions.port) { 191 | switch (urlparts.protocol) { 192 | case 'https:': 193 | urloptions.port = 443; 194 | break; 195 | case 'http:': 196 | default: 197 | urloptions.port = 80; 198 | break; 199 | } 200 | } 201 | 202 | if (this.options.localAddress) { 203 | urloptions.localAddress = this.options.localAddress; 204 | } 205 | 206 | urloptions.headers = this.options.headers || {}; 207 | 208 | if (this.options.user) { 209 | let buf = Buffer.from([].concat(this.options.user).concat(this.options.pass || []).join(':')); 210 | urloptions.headers.Authorization = 'Basic ' + buf.toString('base64'); 211 | } else if (urlparts.auth) { 212 | let buf = Buffer.from(urlparts.auth); 213 | urloptions.headers.Authorization = 'Basic ' + buf.toString('base64'); 214 | } 215 | 216 | return { 217 | urloptions, 218 | transport 219 | }; 220 | } 221 | 222 | setEncoding(encoding) { 223 | this.options.encoding = encoding; 224 | } 225 | 226 | runStream(url) { 227 | let urlData = this.parseUrl(url); 228 | let cookies = this.cookieJar.getCookies(url); 229 | 230 | if (cookies) { 231 | urlData.urloptions.headers.cookie = cookies; 232 | } else { 233 | delete urlData.urloptions.headers.cookie; 234 | } 235 | 236 | if (this.options.payload) { 237 | urlData.urloptions.headers['content-length'] = Buffer.byteLength(this.options.payload || '', 'utf-8'); 238 | } 239 | 240 | if (this.options.payloadSize) { 241 | urlData.urloptions.headers['content-length'] = this.options.payloadSize; 242 | } 243 | 244 | if (this.options.asyncDnsLoookup) { 245 | let dnsCallback = function(err, addresses) { 246 | if (err) { 247 | this.emit('error', err); 248 | return; 249 | } 250 | 251 | urlData.urloptions.headers.host = urlData.urloptions.hostname || urlData.urloptions.host; 252 | urlData.urloptions.hostname = addresses[0]; 253 | urlData.urloptions.host = urlData.urloptions.headers.host + (urlData.urloptions.port ? ':' + urlData.urloptions.port : ''); 254 | 255 | this._runStream(urlData, url); 256 | }.bind(this); 257 | 258 | if (net.isIP(urlData.urloptions.host)) { 259 | dnsCallback(null, [urlData.urloptions.host]); 260 | } else { 261 | dns.resolve4(urlData.urloptions.host, dnsCallback); 262 | } 263 | } else { 264 | this._runStream(urlData, url); 265 | } 266 | } 267 | 268 | _runStream(urlData, url) { 269 | let req = urlData.transport.request(urlData.urloptions, res => { 270 | // catch new cookies before potential redirect 271 | if (Array.isArray(res.headers['set-cookie'])) { 272 | for (let i = 0; i < res.headers['set-cookie'].length; i++) { 273 | this.cookieJar.setCookie(res.headers['set-cookie'][i], url); 274 | } 275 | } 276 | 277 | if ([301, 302, 303, 307, 308].includes(res.statusCode)) { 278 | if (!this.options.disableRedirects && this.options.maxRedirects > this._redirectCount && res.headers.location) { 279 | this._redirectCount++; 280 | req.destroy(); 281 | this.runStream(urllib.resolve(url, res.headers.location)); 282 | return; 283 | } 284 | } 285 | 286 | this.meta = { 287 | status: res.statusCode, 288 | responseHeaders: res.headers, 289 | finalUrl: url, 290 | redirectCount: this._redirectCount, 291 | cookieJar: this.cookieJar 292 | }; 293 | 294 | let curlen = 0; 295 | let maxlen; 296 | 297 | let receive = chunk => { 298 | if (curlen + chunk.length > this.options.maxResponseLength) { 299 | maxlen = this.options.maxResponseLength - curlen; 300 | } else { 301 | maxlen = chunk.length; 302 | } 303 | 304 | if (maxlen <= 0) { 305 | return; 306 | } 307 | 308 | curlen += Math.min(maxlen, chunk.length); 309 | if (maxlen >= chunk.length) { 310 | if (this.responseBuffer.length === 0) { 311 | this.responseBuffer = chunk; 312 | } else { 313 | this.responseBuffer = Buffer.concat([this.responseBuffer, chunk]); 314 | } 315 | } else { 316 | this.responseBuffer = Buffer.concat([this.responseBuffer, chunk], this.responseBuffer.length + maxlen); 317 | } 318 | this.drainBuffer(); 319 | }; 320 | 321 | let error = err => { 322 | this.ended = true; 323 | this.emit('error', err); 324 | this.drainBuffer(); 325 | }; 326 | 327 | let end = () => { 328 | this.ended = true; 329 | if (this.responseBuffer.length === 0) { 330 | this.push(null); 331 | } 332 | }; 333 | 334 | let unpack = (type, res) => { 335 | let z = zlib['create' + type](); 336 | z.on('data', receive); 337 | z.on('error', error); 338 | z.on('end', end); 339 | res.pipe(z); 340 | }; 341 | 342 | this.emit('meta', this.meta); 343 | 344 | if (res.headers['content-encoding']) { 345 | switch (res.headers['content-encoding'].toLowerCase().trim()) { 346 | case 'gzip': 347 | return unpack('Gunzip', res); 348 | case 'deflate': 349 | return unpack('InflateRaw', res); 350 | } 351 | } 352 | 353 | res.on('data', receive); 354 | res.on('end', end); 355 | }); 356 | 357 | req.on('error', e => { 358 | this.emit('error', e); 359 | }); 360 | 361 | if (this.options.timeout) { 362 | req.setTimeout(this.options.timeout, req.abort.bind(req)); 363 | } 364 | this.on('destroy', req.abort.bind(req)); 365 | 366 | if (this.options.payload) { 367 | req.end(this.options.payload); 368 | } else if (this.options.payloadStream) { 369 | this.options.payloadStream.pipe(req); 370 | this.options.payloadStream.resume(); 371 | } else { 372 | req.end(); 373 | } 374 | } 375 | } 376 | 377 | function fetchUrl(url, options, callback) { 378 | if (!callback && typeof options === 'function') { 379 | callback = options; 380 | options = undefined; 381 | } 382 | options = options || {}; 383 | 384 | let fetchstream = new FetchStream(url, options); 385 | let responseData; 386 | let chunks = []; 387 | let chunklen = 0; 388 | let buffer; 389 | let contentType; 390 | let callbackFired = false; 391 | 392 | fetchstream.on('meta', meta => { 393 | responseData = meta; 394 | contentType = _parseContentType(meta.responseHeaders['content-type']); 395 | }); 396 | 397 | fetchstream.on('data', chunk => { 398 | if (chunk) { 399 | chunks.push(chunk); 400 | chunklen += chunk.length; 401 | } 402 | }); 403 | 404 | fetchstream.on('error', error => { 405 | if (error && error.code === 'HPE_INVALID_CONSTANT') { 406 | // skip invalid formatting errors 407 | return; 408 | } 409 | if (callbackFired) { 410 | return; 411 | } 412 | callbackFired = true; 413 | callback(error); 414 | }); 415 | 416 | fetchstream.on('end', () => { 417 | if (callbackFired) { 418 | return; 419 | } 420 | callbackFired = true; 421 | 422 | buffer = Buffer.concat(chunks, chunklen); 423 | 424 | if (!options.disableDecoding && !options.outputEncoding) { 425 | return callback(null, responseData, buffer); 426 | } 427 | 428 | if (contentType.mimeType === 'text/html') { 429 | contentType.charset = _findHTMLCharset(buffer) || contentType.charset; 430 | } 431 | 432 | contentType.charset = (options.overrideCharset || contentType.charset || 'utf-8').trim().toLowerCase(); 433 | 434 | if (!options.disableDecoding && !contentType.charset.match(/^utf-?8$/i)) { 435 | try { 436 | buffer = iconv.decode(buffer, contentType.charset); 437 | if (options.outputEncoding && ['base64', 'hex'].includes(options.outputEncoding.toLowerCase())) { 438 | buffer = Buffer.from(buffer); 439 | } 440 | } catch (E) { 441 | // failed decoding 442 | } 443 | } 444 | 445 | if (options.outputEncoding) { 446 | return callback(null, responseData, typeof buffer === 'string' ? buffer : buffer.toString(options.outputEncoding)); 447 | } else { 448 | return callback(null, responseData, buffer); 449 | } 450 | }); 451 | } 452 | 453 | function _parseContentType(str) { 454 | if (!str) { 455 | return {}; 456 | } 457 | let parts = str.split(';'), 458 | mimeType = parts.shift(), 459 | charset, 460 | chparts; 461 | 462 | for (let i = 0, len = parts.length; i < len; i++) { 463 | chparts = parts[i].split('='); 464 | if (chparts.length > 1) { 465 | if (chparts[0].trim().toLowerCase() === 'charset') { 466 | charset = chparts[1]; 467 | } 468 | } 469 | } 470 | 471 | return { 472 | mimeType: (mimeType || '').trim().toLowerCase(), 473 | charset: (charset || 'UTF-8').trim().toLowerCase() // defaults to UTF-8 474 | }; 475 | } 476 | 477 | function _findHTMLCharset(htmlbuffer) { 478 | let body = htmlbuffer.toString('ascii'), 479 | input, 480 | meta, 481 | charset; 482 | 483 | if ((meta = body.match(/]*?>/i))) { 484 | input = meta[0]; 485 | } 486 | 487 | if (input) { 488 | charset = input.match(/charset\s?=\s?([a-zA-Z\-0-9]*);?/); 489 | if (charset) { 490 | charset = (charset[1] || '').trim().toLowerCase(); 491 | } 492 | } 493 | 494 | if (!charset && (meta = body.match(/