├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── index.js
├── package.json
└── test
    ├── index.js
    └── urls.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | .*.sw?
2 | .DS_Store
3 | node_modules
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | node_js:
 3 | - "0.12"
 4 | - "0.11"
 5 | - "0.10"
 6 | - "0.8"
 7 | - "0.6"
 8 | - "iojs"
 9 | - "iojs-v1.0.4" 
10 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Welcome!
 2 | 
 3 | We're so glad you're thinking about contributing to an 18F open source project! If you're unsure or afraid of anything, just ask or submit the issue or pull request anyways. The worst that can happen is that you'll be politely asked to change something. We appreciate any sort of contribution, and don't want a wall of rules to get in the way of that.
 4 | 
 5 | Before contributing, we encourage you to read our CONTRIBUTING policy (you are here), our LICENSE, and our README, all of which should be in this repository. If you have any questions, or want to read more about our underlying policies, you can consult the 18F Open Source Policy GitHub repository at https://github.com/18f/open-source-policy, or just shoot us an email/official government letterhead note to [18f@gsa.gov](mailto:18f@gsa.gov).
 6 | 
 7 | ## Public domain
 8 | 
 9 | This project is in the public domain within the United States, and
10 | copyright and related rights in the work worldwide are waived through
11 | the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
12 | 
13 | All contributions to this project will be released under the CC0
14 | dedication. By submitting a pull request, you are agreeing to comply
15 | with this waiver of copyright interest.
16 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | As a work of the United States Government, this project is in the
 2 | public domain within the United States.
 3 | 
 4 | Additionally, we waive copyright and related rights in the work
 5 | worldwide through the CC0 1.0 Universal public domain dedication.
 6 | 
 7 | ## CC0 1.0 Universal Summary
 8 | 
 9 | This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
10 | 
11 | ### No Copyright
12 | 
13 | The person who associated a work with this deed has dedicated the work to
14 | the public domain by waiving all of his or her rights to the work worldwide
15 | under copyright law, including all related and neighboring rights, to the
16 | extent allowed by law.
17 | 
18 | You can copy, modify, distribute and perform the work, even for commercial
19 | purposes, all without asking permission.
20 | 
21 | ### Other Information
22 | 
23 | In no way are the patent or trademark rights of any person affected by CC0,
24 | nor are the rights that other persons may have in the work or in how the
25 | work is used, such as publicity or privacy rights.
26 | 
27 | Unless expressly stated otherwise, the person who associated a work with
28 | this deed makes no warranties about the work, and disclaims liability for
29 | all uses of the work, to the fullest extent permitted by applicable law.
30 | When using or citing the work, you should not imply endorsement by the
31 | author or the affirmer.
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # urlsize
 2 | **urlsize** is a [Node](http://nodejs.org/)-powered command-line
 3 | utility for getting the file sizes of one or more URLs.
 4 | 
 5 | You can install it with `npm install -g urlsize`.
 6 | 
 7 | ## Usage
 8 | ```
 9 | urlsize [options] [<url>...]
10 | 
11 | Options:
12 |   --file, -f  read URLs from a text file (one per line)
13 |   -d          sort URLs by size descending (default: ascending)
14 |   --csv, -c   output comma-separated values
15 |   --tsv, -t   output tab-separated values
16 |   --help, -h  show this helpful message
17 |   -v          print more helpful messages to stderr
18 | ```
19 | 
20 | ### Examples
21 | Just get the size of a single URL:
22 | ```sh
23 | $ urlsize google.com
24 | 50.8K   http://google.com
25 | ```
26 | 
27 | Get the size of multiple URLs:
28 | ```sh
29 | $ urlsize google.com yahoo.com
30 | 50.8K   http://google.com
31 | 286.1K  http://yahoo.com
32 | ```
33 | 
34 | Read the list of URLs from a text file:
35 | ```sh
36 | $ echo "usa.gov\ncensus.gov" > urls.txt
37 | $ urlsize --file urls.txt
38 | 36.3K   http://usa.gov  
39 | 182.7K  http://census.gov
40 | ```
41 | 
42 | Output the sizes as tab-separated values, where the `length` column is the size in bytes:
43 | ```sh
44 | $ urlsize --tsv census.gov usa.gov
45 | url     size    length
46 | http://usa.gov  36.3K   37126
47 | http://census.gov       182.7K  187063
48 | ```
49 | 
50 | By default, URLs are sorted in the output by size ascending. You can sort them in descending
51 | order with the `-d` flag:
52 | ```sh
53 | $ urlsize -d census.gov usa.gov
54 | 182.7K  http://census.gov
55 | 36.3K   http://usa.gov  
56 | ```
57 | 
58 | ### Public domain
59 | 
60 | This project is in the worldwide [public domain](LICENSE.md). As stated in
61 | [CONTRIBUTING](CONTRIBUTING.md):
62 | 
63 | > This project is in the public domain within the United States, and copyright
64 | > and related rights in the work worldwide are waived through the [CC0 1.0
65 | > Universal public domain
66 | > dedication](https://creativecommons.org/publicdomain/zero/1.0/).
67 | >
68 | > All contributions to this project will be released under the CC0 dedication.
69 | > By submitting a pull request, you are agreeing to comply with this waiver of
70 | > copyright interest.
71 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | var filesize = require('filesize'),
  3 |     request = require('request'),
  4 |     async = require('async'),
  5 |     fs = require('fs'),
  6 |     rw = require('rw'),
  7 |     csv = require('fast-csv'),
  8 |     yargs = require('yargs')
  9 |       .usage('$0 [options] [<url>...]')
 10 |       .describe('file', 'read URLs from a text file (one per line)')
 11 |         .alias('file', 'f')
 12 |       .describe('d', 'sort URLs by size descending (default: ascending)')
 13 |         .boolean('d')
 14 |       .describe('csv', 'output comma-separated values')
 15 |         .boolean('csv')
 16 |         .alias('csv', 'c')
 17 |       .describe('tsv', 'output tab-separated values')
 18 |         .boolean('tsv')
 19 |         .alias('tsv', 't')
 20 |       .describe('help', 'show this helpful message')
 21 |       .describe('v', 'print more helpful messages to stderr')
 22 |       .alias('help', 'h'),
 23 |     options = yargs.argv,
 24 |     fopts = {
 25 |       unix: true
 26 |     },
 27 |     urls = options._,
 28 |     sort = options.d
 29 |       ? function(a, b) { return b - a; }
 30 |       : function(a, b) { return a - b; },
 31 |     help = options.help;
 32 | 
 33 | if (!options.file && !urls.length) {
 34 |   help = true;
 35 | }
 36 | 
 37 | if (help) {
 38 |   yargs.showHelp();
 39 |   return process.exit(1);
 40 | }
 41 | 
 42 | if (options.file) {
 43 |   var src = (options.file === '-' || options.file === true)
 44 |     ? '/dev/stdin'
 45 |     : options.file;
 46 |   LOG('reading URLs from %s ...', src);
 47 |   rw.readFile(src, {}, function(error, buffer) {
 48 |     if (error) return ERROR('unable to read from %s: %s', src, error);
 49 |     urls = buffer.toString()
 50 |       .split(/[\r\n]+/)
 51 |       .filter(notEmpty);
 52 |     LOG('read %d URLs from %s', urls.length, src);
 53 |     main(urls);
 54 |   });
 55 | } else {
 56 |   main(urls);
 57 | }
 58 | 
 59 | function main(urls) {
 60 |   async.map(urls, getFileSize, done);
 61 | }
 62 | 
 63 | function getFileSize(url, next) {
 64 |   if (!url.match(/^https?:\/\//)) {
 65 |     url = 'http://' + url;
 66 |   }
 67 |   LOG('getting %s ...', url);
 68 |   var length = 0,
 69 |       status,
 70 |       stream;
 71 |   stream = request(url)
 72 |     .on('error', done)
 73 |     .on('response', function onResponse(res) {
 74 |       status = res.statusCode;
 75 |       if ('content-length' in res.headers) {
 76 |         LOG('got content-length header from %s', url);
 77 |         length = res.headers['content-length'];
 78 |         stream.end();
 79 |       } else {
 80 |         LOG('reading %s ...', url);
 81 |         res.on('data', function onData(chunk) {
 82 |           length += chunk.length;
 83 |         });
 84 |       }
 85 |     })
 86 |     .on('end', function() {
 87 |       var size = filesize(length, fopts);
 88 |       next(null, {
 89 |         url: url,
 90 |         length: length,
 91 |         size: size
 92 |       });
 93 |     });
 94 | }
 95 | 
 96 | function done(error, urls) {
 97 |   if (error) return ERROR('error:', error);
 98 | 
 99 |   // sort the URLs by length
100 |   urls.sort(function(a, b) {
101 |     return sort(a.length, b.length);
102 |   });
103 | 
104 |   if (options.csv || options.tsv) {
105 |     var opts = {
106 |       delimiter: options.tsv ? '\t' : ',',
107 |       headers: ['url', 'size', 'length']
108 |     };
109 |     var out = options.out
110 |           ? fs.createWriteStream(out)
111 |           : process.stdout,
112 |         dsv = csv.createWriteStream(opts);
113 |     dsv.pipe(out);
114 |     urls.forEach(function(d) {
115 |       dsv.write(d);
116 |     });
117 |   } else {
118 |     urls.forEach(function(d) {
119 |       console.log([d.size, d.url].join('\t') + '\t');
120 |     });
121 |   }
122 | }
123 | 
124 | function notEmpty(str) {
125 |   return str && str.length;
126 | }
127 | 
128 | function LOG() {
129 |   options.v && console.log.apply(console, arguments);
130 | }
131 | 
132 | function ERROR() {
133 |   console.error.apply(console, arguments);
134 | }
135 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "urlsize",
 3 |   "version": "1.0.2",
 4 |   "description": "get the human-readable size of a URL",
 5 |   "main": "index.js",
 6 |   "bin": {
 7 |     "urlsize": "index.js"
 8 |   },
 9 |   "scripts": {
10 |     "test": "mocha"
11 |   },
12 |   "keywords": [
13 |     "url",
14 |     "size"
15 |   ],
16 |   "author": {
17 |     "name": "Shawn Allen",
18 |     "url": "https://github.com/shawnbot"
19 |   },
20 |   "license": "CC0",
21 |   "dependencies": {
22 |     "async": "^0.9.0",
23 |     "fast-csv": "^0.5.6",
24 |     "filesize": "^3.1.1",
25 |     "request": "^2.53.0",
26 |     "rw": "^0.1.4",
27 |     "yargs": "^3.3.1"
28 |   },
29 |   "devDependencies": {
30 |     "mocha": "^2.1.0"
31 |   },
32 |   "directories": {
33 |     "test": "test"
34 |   },
35 |   "repository": {
36 |     "type": "git",
37 |     "url": "https://github.com/18F/urlsize.git"
38 |   },
39 |   "bugs": {
40 |     "url": "https://github.com/18F/urlsize/issues"
41 |   },
42 |   "homepage": "https://github.com/18F/urlsize"
43 | }
44 | 


--------------------------------------------------------------------------------
/test/index.js:
--------------------------------------------------------------------------------
  1 | var cmd = './index.js',
  2 |     fs = require('fs'),
  3 |     path = require('path'),
  4 |     assert = require('assert'),
  5 |     child = require('child_process'),
  6 |     csv = require('fast-csv');
  7 | 
  8 | describe('cli', function() {
  9 |   // we need to give these commands lots of time to run
 10 |   this.timeout(10000);
 11 | 
 12 |   var testFilename = path.join(__dirname, 'urls.txt'),
 13 |       testURLs = splitLines(fs.readFileSync(testFilename).toString());
 14 | 
 15 |   it('complains when it gets too few args', function(done) {
 16 |     var proc = run([]);
 17 |     assertExitCode(proc, 1, done);
 18 |   });
 19 | 
 20 |   it('exits 0 when it gets enough args', function(done) {
 21 |     var proc = run(['-']);
 22 |     assertExitCode(proc, 0, done);
 23 |   });
 24 | 
 25 |   it('takes a single URL', function(done) {
 26 |     var proc = run(['google.com']);
 27 |     assertIO(proc, function(output) {
 28 |       assert.ok(output, 'no output!');
 29 |       assert.ok(output.indexOf('google.com') > -1, 'google.com not in the output: ' + output);
 30 |       done();
 31 |     });
 32 |   });
 33 | 
 34 |   it('takes multiple URLs', function(done) {
 35 |     var proc = run(['google.com', 'yahoo.com']);
 36 |     assertIO(proc, function(stdout) {
 37 |       assert.ok(stdout, 'no output!');
 38 |       assert.ok(stdout.indexOf('google.com') > -1, 'google.com not in stdout: ' + stdout);
 39 |       assert.ok(stdout.indexOf('yahoo.com') > -1, 'yahoo.com not in stdout: ' + stdout);
 40 |       var lines = splitLines(stdout);
 41 |       assert.equal(lines.length, 2, 'expected 2 lines of output, got ' + lines.length);
 42 |       done();
 43 |     });
 44 |   });
 45 | 
 46 |   it('reads URLs from a file', function(done) {
 47 |     var proc = run(['--file', testFilename]);
 48 |     assertIO(proc, function(stdout) {
 49 |       assert.ok(stdout, 'no output!');
 50 |       testURLs.forEach(function(url) {
 51 |         assert.ok(stdout.indexOf(url) > -1, url + 'not present in stdout: ' + stdout);
 52 |       });
 53 |       done();
 54 |     });
 55 |   });
 56 | 
 57 |   it('reads URLs from stdin', function(done) {
 58 |     var proc = run(['--file', '-']);
 59 |     assertIO(proc, 'google.com\nyahoo.com', function(stdout) {
 60 |       assert.ok(stdout, 'no output!');
 61 |       assert.ok(stdout.indexOf('google.com') > -1, 'google.com not in stdout: ' + stdout);
 62 |       assert.ok(stdout.indexOf('yahoo.com') > -1, 'yahoo.com not in stdout: ' + stdout);
 63 |       done();
 64 |     });
 65 |   });
 66 | 
 67 |   it('sorts sizes ascending', function(done) {
 68 |     var proc = run(['--file', testFilename]);
 69 |     assertIO(proc, function(stdout) {
 70 |       var lines = splitLines(stdout),
 71 |           sizes = lines.map(function(line) {
 72 |             var size = line.split('\t').shift();
 73 |             return +size.match(/^(\d+)/)[0];
 74 |           }),
 75 |           sorted = sizes.slice().sort(ascending);
 76 |       assert.deepEqual(sizes, sorted, 'bad sort order: ' + sizes + ', expected ' + sorted);
 77 |       done();
 78 |     });
 79 |   });
 80 | 
 81 |   it('sorts sizes descending', function(done) {
 82 |     var proc = run(['-d', '--file', testFilename]);
 83 |     assertIO(proc, function(stdout) {
 84 |       var lines = splitLines(stdout),
 85 |           sizes = lines.map(function(line) {
 86 |             var size = line.split('\t').shift();
 87 |             return +size.match(/^(\d+)/)[0];
 88 |           }),
 89 |           sorted = sizes.slice().sort(descending);
 90 |       assert.deepEqual(sizes, sorted, 'bad sort order: ' + sizes + ', expected ' + sorted);
 91 |       done();
 92 |     });
 93 |   });
 94 | 
 95 |   it('formats csv', function(done) {
 96 |     var proc = run(['--csv', 'google.com']);
 97 |     assertIO(proc, function(stdout) {
 98 |       parseCSV(stdout, ',', function(error, rows) {
 99 |         assert.ok(!error, 'csv parse error: ' + error);
100 |         assert.equal(rows.length, 1, 'expected 1 row, got ' + rows.length);
101 |         assert.deepEqual(Object.keys(rows[0]), ['url', 'size', 'length']);
102 |         assert.equal(rows[0].url, 'http://google.com', 'bad row 0: ' + JSON.stringify(rows[0]));
103 |         done();
104 |       });
105 |     });
106 |   });
107 | 
108 |   it('formats tsv', function(done) {
109 |     var proc = run(['--tsv', 'google.com']);
110 |     assertIO(proc, function(stdout) {
111 |       parseCSV(stdout, '\t', function(error, rows) {
112 |         assert.ok(!error, 'tsv parse error: ' + error);
113 |         assert.equal(rows.length, 1, 'expected 1 row, got ' + rows.length);
114 |         assert.deepEqual(Object.keys(rows[0]), ['url', 'size', 'length']);
115 |         assert.equal(rows[0].url, 'http://google.com', 'bad row 0: ' + JSON.stringify(rows[0]));
116 |         done();
117 |       });
118 |     });
119 |   });
120 | 
121 | });
122 | 
123 | function run(args) {
124 |   return child.spawn(cmd, args, {
125 |     stdio: 'pipe'
126 |   });
127 | }
128 | 
129 | function assertExitCode(process, code, done) {
130 |   process.on('close', function(c, signal) {
131 |     assert.equal(code, c, 'exit code mismatch: expected ' + code + ', got ' + c);
132 |     done();
133 |   });
134 | }
135 | 
136 | function assertIO(process, stdin, check) {
137 |   if (arguments.length < 3) {
138 |     check = stdin;
139 |     stdin = null;
140 |   }
141 | 
142 |   var stdout = [];
143 |   process.stdout
144 |     .on('data', function(chunk) {
145 |       stdout.push(chunk);
146 |     });
147 | 
148 |   process.on('exit', done);
149 | 
150 |   if (stdin) {
151 |     // console.log('writing:', stdin);
152 |     process.stdin.write(stdin);
153 |     process.stdin.end();
154 |   }
155 | 
156 |   function done() {
157 |     stdout = stdout.join('');
158 |     if (typeof check === 'function') {
159 |       check(stdout);
160 |     } else {
161 |       assert.equal(stdout, check, 'i/o mismatch: ' + stdout);
162 |     }
163 |   }
164 | }
165 | 
166 | function splitLines(str) {
167 |   return str.trim().split('\n').filter(function(line) {
168 |     return line;
169 |   });
170 | }
171 | 
172 | function parseCSV(str, delimiter, done) {
173 |   // XXX csv.fromString() wasn't working for me,
174 |   // but this won't parse quotes
175 |   var lines = splitLines(str),
176 |       cols = lines.shift().split(delimiter),
177 |       rows = lines.map(function(line) {
178 |         var row = {};
179 |         line.split(delimiter).forEach(function(d, i) {
180 |           row[cols[i]] = d;
181 |         });
182 |         return row;
183 |       });
184 |   done(null, rows);
185 | }
186 | 
187 | function ascending(a, b) {
188 |   return a - b;
189 | }
190 | 
191 | function descending(a, b) {
192 |   return b - a;
193 | }
194 | 


--------------------------------------------------------------------------------
/test/urls.txt:
--------------------------------------------------------------------------------
1 | http://cdn.leafletjs.com/leaflet-0.7.3/leaflet.js
2 | http://code.jquery.com/jquery.min.js
3 | http://d3js.org/d3.v3.min.js
4 | http://github.com/DmitryBaranovskiy/raphael/raw/master/raphael-min.js
5 | 


--------------------------------------------------------------------------------