├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── index.js ├── package.json └── test ├── index.js └── urls.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .*.sw? 2 | .DS_Store 3 | node_modules 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "0.12" 4 | - "0.11" 5 | - "0.10" 6 | - "0.8" 7 | - "0.6" 8 | - "iojs" 9 | - "iojs-v1.0.4" 10 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Welcome! 2 | 3 | We're so glad you're thinking about contributing to an 18F open source project! If you're unsure or afraid of anything, just ask or submit the issue or pull request anyways. The worst that can happen is that you'll be politely asked to change something. We appreciate any sort of contribution, and don't want a wall of rules to get in the way of that. 4 | 5 | Before contributing, we encourage you to read our CONTRIBUTING policy (you are here), our LICENSE, and our README, all of which should be in this repository. If you have any questions, or want to read more about our underlying policies, you can consult the 18F Open Source Policy GitHub repository at https://github.com/18f/open-source-policy, or just shoot us an email/official government letterhead note to [18f@gsa.gov](mailto:18f@gsa.gov). 6 | 7 | ## Public domain 8 | 9 | This project is in the public domain within the United States, and 10 | copyright and related rights in the work worldwide are waived through 11 | the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/). 12 | 13 | All contributions to this project will be released under the CC0 14 | dedication. By submitting a pull request, you are agreeing to comply 15 | with this waiver of copyright interest. 16 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | As a work of the United States Government, this project is in the 2 | public domain within the United States. 3 | 4 | Additionally, we waive copyright and related rights in the work 5 | worldwide through the CC0 1.0 Universal public domain dedication. 6 | 7 | ## CC0 1.0 Universal Summary 8 | 9 | This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode). 10 | 11 | ### No Copyright 12 | 13 | The person who associated a work with this deed has dedicated the work to 14 | the public domain by waiving all of his or her rights to the work worldwide 15 | under copyright law, including all related and neighboring rights, to the 16 | extent allowed by law. 17 | 18 | You can copy, modify, distribute and perform the work, even for commercial 19 | purposes, all without asking permission. 20 | 21 | ### Other Information 22 | 23 | In no way are the patent or trademark rights of any person affected by CC0, 24 | nor are the rights that other persons may have in the work or in how the 25 | work is used, such as publicity or privacy rights. 26 | 27 | Unless expressly stated otherwise, the person who associated a work with 28 | this deed makes no warranties about the work, and disclaims liability for 29 | all uses of the work, to the fullest extent permitted by applicable law. 30 | When using or citing the work, you should not imply endorsement by the 31 | author or the affirmer. 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # urlsize 2 | **urlsize** is a [Node](http://nodejs.org/)-powered command-line 3 | utility for getting the file sizes of one or more URLs. 4 | 5 | You can install it with `npm install -g urlsize`. 6 | 7 | ## Usage 8 | ``` 9 | urlsize [options] [...] 10 | 11 | Options: 12 | --file, -f read URLs from a text file (one per line) 13 | -d sort URLs by size descending (default: ascending) 14 | --csv, -c output comma-separated values 15 | --tsv, -t output tab-separated values 16 | --help, -h show this helpful message 17 | -v print more helpful messages to stderr 18 | ``` 19 | 20 | ### Examples 21 | Just get the size of a single URL: 22 | ```sh 23 | $ urlsize google.com 24 | 50.8K http://google.com 25 | ``` 26 | 27 | Get the size of multiple URLs: 28 | ```sh 29 | $ urlsize google.com yahoo.com 30 | 50.8K http://google.com 31 | 286.1K http://yahoo.com 32 | ``` 33 | 34 | Read the list of URLs from a text file: 35 | ```sh 36 | $ echo "usa.gov\ncensus.gov" > urls.txt 37 | $ urlsize --file urls.txt 38 | 36.3K http://usa.gov 39 | 182.7K http://census.gov 40 | ``` 41 | 42 | Output the sizes as tab-separated values, where the `length` column is the size in bytes: 43 | ```sh 44 | $ urlsize --tsv census.gov usa.gov 45 | url size length 46 | http://usa.gov 36.3K 37126 47 | http://census.gov 182.7K 187063 48 | ``` 49 | 50 | By default, URLs are sorted in the output by size ascending. You can sort them in descending 51 | order with the `-d` flag: 52 | ```sh 53 | $ urlsize -d census.gov usa.gov 54 | 182.7K http://census.gov 55 | 36.3K http://usa.gov 56 | ``` 57 | 58 | ### Public domain 59 | 60 | This project is in the worldwide [public domain](LICENSE.md). As stated in 61 | [CONTRIBUTING](CONTRIBUTING.md): 62 | 63 | > This project is in the public domain within the United States, and copyright 64 | > and related rights in the work worldwide are waived through the [CC0 1.0 65 | > Universal public domain 66 | > dedication](https://creativecommons.org/publicdomain/zero/1.0/). 67 | > 68 | > All contributions to this project will be released under the CC0 dedication. 69 | > By submitting a pull request, you are agreeing to comply with this waiver of 70 | > copyright interest. 71 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | var filesize = require('filesize'), 3 | request = require('request'), 4 | async = require('async'), 5 | fs = require('fs'), 6 | rw = require('rw'), 7 | csv = require('fast-csv'), 8 | yargs = require('yargs') 9 | .usage('$0 [options] [...]') 10 | .describe('file', 'read URLs from a text file (one per line)') 11 | .alias('file', 'f') 12 | .describe('d', 'sort URLs by size descending (default: ascending)') 13 | .boolean('d') 14 | .describe('csv', 'output comma-separated values') 15 | .boolean('csv') 16 | .alias('csv', 'c') 17 | .describe('tsv', 'output tab-separated values') 18 | .boolean('tsv') 19 | .alias('tsv', 't') 20 | .describe('help', 'show this helpful message') 21 | .describe('v', 'print more helpful messages to stderr') 22 | .alias('help', 'h'), 23 | options = yargs.argv, 24 | fopts = { 25 | unix: true 26 | }, 27 | urls = options._, 28 | sort = options.d 29 | ? function(a, b) { return b - a; } 30 | : function(a, b) { return a - b; }, 31 | help = options.help; 32 | 33 | if (!options.file && !urls.length) { 34 | help = true; 35 | } 36 | 37 | if (help) { 38 | yargs.showHelp(); 39 | return process.exit(1); 40 | } 41 | 42 | if (options.file) { 43 | var src = (options.file === '-' || options.file === true) 44 | ? '/dev/stdin' 45 | : options.file; 46 | LOG('reading URLs from %s ...', src); 47 | rw.readFile(src, {}, function(error, buffer) { 48 | if (error) return ERROR('unable to read from %s: %s', src, error); 49 | urls = buffer.toString() 50 | .split(/[\r\n]+/) 51 | .filter(notEmpty); 52 | LOG('read %d URLs from %s', urls.length, src); 53 | main(urls); 54 | }); 55 | } else { 56 | main(urls); 57 | } 58 | 59 | function main(urls) { 60 | async.map(urls, getFileSize, done); 61 | } 62 | 63 | function getFileSize(url, next) { 64 | if (!url.match(/^https?:\/\//)) { 65 | url = 'http://' + url; 66 | } 67 | LOG('getting %s ...', url); 68 | var length = 0, 69 | status, 70 | stream; 71 | stream = request(url) 72 | .on('error', done) 73 | .on('response', function onResponse(res) { 74 | status = res.statusCode; 75 | if ('content-length' in res.headers) { 76 | LOG('got content-length header from %s', url); 77 | length = res.headers['content-length']; 78 | stream.end(); 79 | } else { 80 | LOG('reading %s ...', url); 81 | res.on('data', function onData(chunk) { 82 | length += chunk.length; 83 | }); 84 | } 85 | }) 86 | .on('end', function() { 87 | var size = filesize(length, fopts); 88 | next(null, { 89 | url: url, 90 | length: length, 91 | size: size 92 | }); 93 | }); 94 | } 95 | 96 | function done(error, urls) { 97 | if (error) return ERROR('error:', error); 98 | 99 | // sort the URLs by length 100 | urls.sort(function(a, b) { 101 | return sort(a.length, b.length); 102 | }); 103 | 104 | if (options.csv || options.tsv) { 105 | var opts = { 106 | delimiter: options.tsv ? '\t' : ',', 107 | headers: ['url', 'size', 'length'] 108 | }; 109 | var out = options.out 110 | ? fs.createWriteStream(out) 111 | : process.stdout, 112 | dsv = csv.createWriteStream(opts); 113 | dsv.pipe(out); 114 | urls.forEach(function(d) { 115 | dsv.write(d); 116 | }); 117 | } else { 118 | urls.forEach(function(d) { 119 | console.log([d.size, d.url].join('\t') + '\t'); 120 | }); 121 | } 122 | } 123 | 124 | function notEmpty(str) { 125 | return str && str.length; 126 | } 127 | 128 | function LOG() { 129 | options.v && console.log.apply(console, arguments); 130 | } 131 | 132 | function ERROR() { 133 | console.error.apply(console, arguments); 134 | } 135 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "urlsize", 3 | "version": "1.0.2", 4 | "description": "get the human-readable size of a URL", 5 | "main": "index.js", 6 | "bin": { 7 | "urlsize": "index.js" 8 | }, 9 | "scripts": { 10 | "test": "mocha" 11 | }, 12 | "keywords": [ 13 | "url", 14 | "size" 15 | ], 16 | "author": { 17 | "name": "Shawn Allen", 18 | "url": "https://github.com/shawnbot" 19 | }, 20 | "license": "CC0", 21 | "dependencies": { 22 | "async": "^0.9.0", 23 | "fast-csv": "^0.5.6", 24 | "filesize": "^3.1.1", 25 | "request": "^2.53.0", 26 | "rw": "^0.1.4", 27 | "yargs": "^3.3.1" 28 | }, 29 | "devDependencies": { 30 | "mocha": "^2.1.0" 31 | }, 32 | "directories": { 33 | "test": "test" 34 | }, 35 | "repository": { 36 | "type": "git", 37 | "url": "https://github.com/18F/urlsize.git" 38 | }, 39 | "bugs": { 40 | "url": "https://github.com/18F/urlsize/issues" 41 | }, 42 | "homepage": "https://github.com/18F/urlsize" 43 | } 44 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | var cmd = './index.js', 2 | fs = require('fs'), 3 | path = require('path'), 4 | assert = require('assert'), 5 | child = require('child_process'), 6 | csv = require('fast-csv'); 7 | 8 | describe('cli', function() { 9 | // we need to give these commands lots of time to run 10 | this.timeout(10000); 11 | 12 | var testFilename = path.join(__dirname, 'urls.txt'), 13 | testURLs = splitLines(fs.readFileSync(testFilename).toString()); 14 | 15 | it('complains when it gets too few args', function(done) { 16 | var proc = run([]); 17 | assertExitCode(proc, 1, done); 18 | }); 19 | 20 | it('exits 0 when it gets enough args', function(done) { 21 | var proc = run(['-']); 22 | assertExitCode(proc, 0, done); 23 | }); 24 | 25 | it('takes a single URL', function(done) { 26 | var proc = run(['google.com']); 27 | assertIO(proc, function(output) { 28 | assert.ok(output, 'no output!'); 29 | assert.ok(output.indexOf('google.com') > -1, 'google.com not in the output: ' + output); 30 | done(); 31 | }); 32 | }); 33 | 34 | it('takes multiple URLs', function(done) { 35 | var proc = run(['google.com', 'yahoo.com']); 36 | assertIO(proc, function(stdout) { 37 | assert.ok(stdout, 'no output!'); 38 | assert.ok(stdout.indexOf('google.com') > -1, 'google.com not in stdout: ' + stdout); 39 | assert.ok(stdout.indexOf('yahoo.com') > -1, 'yahoo.com not in stdout: ' + stdout); 40 | var lines = splitLines(stdout); 41 | assert.equal(lines.length, 2, 'expected 2 lines of output, got ' + lines.length); 42 | done(); 43 | }); 44 | }); 45 | 46 | it('reads URLs from a file', function(done) { 47 | var proc = run(['--file', testFilename]); 48 | assertIO(proc, function(stdout) { 49 | assert.ok(stdout, 'no output!'); 50 | testURLs.forEach(function(url) { 51 | assert.ok(stdout.indexOf(url) > -1, url + 'not present in stdout: ' + stdout); 52 | }); 53 | done(); 54 | }); 55 | }); 56 | 57 | it('reads URLs from stdin', function(done) { 58 | var proc = run(['--file', '-']); 59 | assertIO(proc, 'google.com\nyahoo.com', function(stdout) { 60 | assert.ok(stdout, 'no output!'); 61 | assert.ok(stdout.indexOf('google.com') > -1, 'google.com not in stdout: ' + stdout); 62 | assert.ok(stdout.indexOf('yahoo.com') > -1, 'yahoo.com not in stdout: ' + stdout); 63 | done(); 64 | }); 65 | }); 66 | 67 | it('sorts sizes ascending', function(done) { 68 | var proc = run(['--file', testFilename]); 69 | assertIO(proc, function(stdout) { 70 | var lines = splitLines(stdout), 71 | sizes = lines.map(function(line) { 72 | var size = line.split('\t').shift(); 73 | return +size.match(/^(\d+)/)[0]; 74 | }), 75 | sorted = sizes.slice().sort(ascending); 76 | assert.deepEqual(sizes, sorted, 'bad sort order: ' + sizes + ', expected ' + sorted); 77 | done(); 78 | }); 79 | }); 80 | 81 | it('sorts sizes descending', function(done) { 82 | var proc = run(['-d', '--file', testFilename]); 83 | assertIO(proc, function(stdout) { 84 | var lines = splitLines(stdout), 85 | sizes = lines.map(function(line) { 86 | var size = line.split('\t').shift(); 87 | return +size.match(/^(\d+)/)[0]; 88 | }), 89 | sorted = sizes.slice().sort(descending); 90 | assert.deepEqual(sizes, sorted, 'bad sort order: ' + sizes + ', expected ' + sorted); 91 | done(); 92 | }); 93 | }); 94 | 95 | it('formats csv', function(done) { 96 | var proc = run(['--csv', 'google.com']); 97 | assertIO(proc, function(stdout) { 98 | parseCSV(stdout, ',', function(error, rows) { 99 | assert.ok(!error, 'csv parse error: ' + error); 100 | assert.equal(rows.length, 1, 'expected 1 row, got ' + rows.length); 101 | assert.deepEqual(Object.keys(rows[0]), ['url', 'size', 'length']); 102 | assert.equal(rows[0].url, 'http://google.com', 'bad row 0: ' + JSON.stringify(rows[0])); 103 | done(); 104 | }); 105 | }); 106 | }); 107 | 108 | it('formats tsv', function(done) { 109 | var proc = run(['--tsv', 'google.com']); 110 | assertIO(proc, function(stdout) { 111 | parseCSV(stdout, '\t', function(error, rows) { 112 | assert.ok(!error, 'tsv parse error: ' + error); 113 | assert.equal(rows.length, 1, 'expected 1 row, got ' + rows.length); 114 | assert.deepEqual(Object.keys(rows[0]), ['url', 'size', 'length']); 115 | assert.equal(rows[0].url, 'http://google.com', 'bad row 0: ' + JSON.stringify(rows[0])); 116 | done(); 117 | }); 118 | }); 119 | }); 120 | 121 | }); 122 | 123 | function run(args) { 124 | return child.spawn(cmd, args, { 125 | stdio: 'pipe' 126 | }); 127 | } 128 | 129 | function assertExitCode(process, code, done) { 130 | process.on('close', function(c, signal) { 131 | assert.equal(code, c, 'exit code mismatch: expected ' + code + ', got ' + c); 132 | done(); 133 | }); 134 | } 135 | 136 | function assertIO(process, stdin, check) { 137 | if (arguments.length < 3) { 138 | check = stdin; 139 | stdin = null; 140 | } 141 | 142 | var stdout = []; 143 | process.stdout 144 | .on('data', function(chunk) { 145 | stdout.push(chunk); 146 | }); 147 | 148 | process.on('exit', done); 149 | 150 | if (stdin) { 151 | // console.log('writing:', stdin); 152 | process.stdin.write(stdin); 153 | process.stdin.end(); 154 | } 155 | 156 | function done() { 157 | stdout = stdout.join(''); 158 | if (typeof check === 'function') { 159 | check(stdout); 160 | } else { 161 | assert.equal(stdout, check, 'i/o mismatch: ' + stdout); 162 | } 163 | } 164 | } 165 | 166 | function splitLines(str) { 167 | return str.trim().split('\n').filter(function(line) { 168 | return line; 169 | }); 170 | } 171 | 172 | function parseCSV(str, delimiter, done) { 173 | // XXX csv.fromString() wasn't working for me, 174 | // but this won't parse quotes 175 | var lines = splitLines(str), 176 | cols = lines.shift().split(delimiter), 177 | rows = lines.map(function(line) { 178 | var row = {}; 179 | line.split(delimiter).forEach(function(d, i) { 180 | row[cols[i]] = d; 181 | }); 182 | return row; 183 | }); 184 | done(null, rows); 185 | } 186 | 187 | function ascending(a, b) { 188 | return a - b; 189 | } 190 | 191 | function descending(a, b) { 192 | return b - a; 193 | } 194 | -------------------------------------------------------------------------------- /test/urls.txt: -------------------------------------------------------------------------------- 1 | http://cdn.leafletjs.com/leaflet-0.7.3/leaflet.js 2 | http://code.jquery.com/jquery.min.js 3 | http://d3js.org/d3.v3.min.js 4 | http://github.com/DmitryBaranovskiy/raphael/raw/master/raphael-min.js 5 | --------------------------------------------------------------------------------