├── .gitignore ├── .npmignore ├── .npmrc ├── .travis.yml ├── bin └── getcss ├── index.js ├── license ├── package.json ├── readme.md ├── test ├── results.json ├── test.js └── utils │ ├── create-link-test.js │ ├── html-test.js │ └── resolve-url-test.js └── utils ├── create-link.js ├── get-link-contents.js └── resolve-url.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | package-lock.json 3 | .DS_Store -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | test/results.json 2 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '7' 4 | - '6' 5 | -------------------------------------------------------------------------------- /bin/getcss: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var fs = require('fs'); 4 | var program = require('commander'); 5 | var normalizeUrl = require('normalize-url'); 6 | var getCss = require('..'); 7 | 8 | program 9 | .version('0.0.2') 10 | .usage('[options] ') 11 | .option("-j, --json","output downloaded html and css as json") 12 | .option("-t, --timeout [ms]","timeout in ms to wait for responses", 30000) 13 | .option("-s, --strip_wayback_css","remove wayback toolbar css from internet archive captures") 14 | .action(function(url, options) { 15 | if(url) { 16 | url = normalizeUrl(url, { stripWWW: false }); 17 | getCss(url, { verbose: true, timeout: options.timeout, stripWayback: options.strip_wayback_css }).then(function(css) { 18 | if (options.json){ 19 | console.log(JSON.stringify(css)); 20 | } 21 | else { 22 | console.log(css.css); 23 | } 24 | }); 25 | } 26 | }); 27 | 28 | program.parse(process.argv); 29 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var q = require('q') 2 | var isCss = require('is-css') 3 | var isPresent = require('is-present') 4 | var isBlank = require('is-blank') 5 | var isUrl = require('is-url-superb') 6 | var request = require('requestretry') 7 | var cheerio = require('cheerio') 8 | var normalizeUrl = require('normalize-url') 9 | var stripHtmlComments = require('strip-html-comments') 10 | var stripWaybackToolbar = require('strip-wayback-toolbar') 11 | var resolveCssImportUrls = require('resolve-css-import-urls') 12 | var ua = require('ua-string') 13 | 14 | var getLinkContents = require('./utils/get-link-contents') 15 | var createLink = require('./utils/create-link') 16 | 17 | module.exports = function(url, options, html) { 18 | var deferred = q.defer() 19 | var options = options || {} 20 | options.headers = options.headers || {} 21 | options.headers['User-Agent'] = options.headers['User-Agent'] || ua 22 | options.timeout = options.timeout || 5000 23 | options.stripWayback = options.stripWayback || false 24 | options.gzip = true 25 | 26 | if (typeof url !== 'string' || isBlank(url) || !isUrl(url)) { 27 | throw new TypeError('get-css expected a url as a string') 28 | } 29 | 30 | url = normalizeUrl(url, { stripWWW: false }) 31 | options.url = url 32 | 33 | if (options.ignoreCerts) { 34 | process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0' 35 | } 36 | 37 | var status = { 38 | parsed: 0, 39 | total: 0 40 | } 41 | 42 | var result = { 43 | links: [], 44 | styles: [], 45 | css: '' 46 | } 47 | 48 | function handleResolve() { 49 | if (status.parsed >= status.total) { 50 | deferred.resolve(result) 51 | } 52 | } 53 | 54 | function parseHtml(html) { 55 | if (options.stripWayback) { 56 | html = stripWaybackToolbar(html) 57 | } 58 | var $ = cheerio.load(html) 59 | result.pageTitle = $('head > title').text() 60 | result.html = html 61 | 62 | $('[rel=stylesheet]').each(function() { 63 | var link = $(this).attr('href') 64 | if (isPresent(link)) { 65 | result.links.push(createLink(link, url)) 66 | } else { 67 | result.styles.push(stripHtmlComments($(this).text())) 68 | } 69 | }) 70 | 71 | $('style').each(function() { 72 | result.styles.push(stripHtmlComments($(this).text())) 73 | }) 74 | 75 | status.total = result.links.length + result.styles.length 76 | if (!status.total) { 77 | deferred.resolve(false) 78 | } 79 | 80 | result.links.forEach(function(link) { 81 | getLinkContents(link.url, options) 82 | .then(function(css) { 83 | handleCssFromLink(link, css) 84 | }) 85 | .catch(function(error) { 86 | link.error = error 87 | status.parsed++ 88 | handleResolve() 89 | }) 90 | }) 91 | 92 | result.styles.forEach(function(css) { 93 | result.css += css 94 | status.parsed++ 95 | handleResolve() 96 | }) 97 | } 98 | 99 | function handleCssFromLink(link, css) { 100 | link.css += css 101 | 102 | parseCssForImports(link, css) 103 | 104 | status.parsed++ 105 | handleResolve() 106 | } 107 | 108 | // Handle potential @import url(foo.css) statements in the CSS. 109 | function parseCssForImports(link, css) { 110 | link.imports = resolveCssImportUrls(link.url, css) 111 | status.total += link.imports.length 112 | result.css += css 113 | 114 | link.imports.forEach(function(importUrl) { 115 | var importLink = createLink(importUrl, importUrl) 116 | result.links.push(importLink) 117 | 118 | getLinkContents(importLink.url, options) 119 | .then(function(css) { 120 | handleCssFromLink(importLink, css) 121 | }) 122 | .catch(function(error) { 123 | link.error = error 124 | status.parsed++ 125 | handleResolve() 126 | }) 127 | }) 128 | } 129 | 130 | function handleBody(body) { 131 | if (isCss(url)) { 132 | var link = createLink(url, url) 133 | result.links.push(link) 134 | handleCssFromLink(link, body) 135 | } else { 136 | parseHtml(body) 137 | } 138 | } 139 | 140 | if (html) { 141 | handleBody(html) 142 | } else { 143 | request(options, function(error, response, body) { 144 | if (error) { 145 | if (options.verbose) console.log('Error from ' + url + ' ' + error) 146 | deferred.reject(error) 147 | return 148 | } 149 | 150 | if (response && response.statusCode != 200) { 151 | if (options.verbose) 152 | console.log('Received a ' + response.statusCode + ' from: ' + url) 153 | deferred.reject({ url: url, statusCode: response.code }) 154 | return 155 | } 156 | 157 | handleBody(body) 158 | }) 159 | } 160 | 161 | return deferred.promise 162 | } 163 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 @jxnblk, @mrmrs_, @4lpine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "get-css", 3 | "version": "1.2.7-2", 4 | "description": "Get CSS from a URL", 5 | "main": "index.js", 6 | "scripts": { 7 | "format": "prettier --no-semi --single-quote --write {test,utils,bin}/**/*.js index.js", 8 | "test": "node test/test && mocha test 'test/**/*.js'" 9 | }, 10 | "bin": { 11 | "getcss": "./bin/getcss" 12 | }, 13 | "author": "Brent Jackson", 14 | "license": "MIT", 15 | "dependencies": { 16 | "cheerio": "^0.22.0", 17 | "commander": "^2.9.0", 18 | "is-blank": "^1.1.0", 19 | "is-css": "^2.0.0", 20 | "is-present": "^1.0.0", 21 | "is-url-superb": "^2.0.0", 22 | "normalize-url": "^1.8.0", 23 | "q": "^1.4.1", 24 | "request": "^2.79.0", 25 | "requestretry": "^1.13.0", 26 | "resolve-css-import-urls": "1.0.0", 27 | "strip-html-comments": "1.0.0", 28 | "strip-wayback-toolbar": "^1.0.4", 29 | "ua-string": "^1.0.0" 30 | }, 31 | "devDependencies": { 32 | "mocha": "^3.2.0", 33 | "prettier": "^1.11.1" 34 | }, 35 | "repository": { 36 | "type": "git", 37 | "url": "https://github.com/cssstats/get-css.git" 38 | }, 39 | "keywords": [ 40 | "CSS", 41 | "request", 42 | "parse", 43 | "get", 44 | "getcss" 45 | ], 46 | "bugs": { 47 | "url": "https://github.com/cssstats/get-css/issues" 48 | }, 49 | "homepage": "https://github.com/cssstats/get-css" 50 | } 51 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # get-css [![Build Status](https://travis-ci.org/cssstats/get-css.svg?branch=master)](https://travis-ci.org/cssstats/get-css) 2 | 3 | Node module to get CSS from a URL. 4 | 5 | Returns a promise for an object with details about a document's CSS, used in . 6 | 7 | ## Installation 8 | 9 | ```sh 10 | npm i --save get-css 11 | ``` 12 | 13 | For the CLI 14 | 15 | ```sh 16 | npm i -g get-css 17 | ``` 18 | 19 | ## Usage 20 | 21 | ```js 22 | var getCss = require('get-css'); 23 | 24 | var options = { 25 | timeout: 5000 26 | }; 27 | 28 | getCss('http://github.com', options) 29 | .then(function(response) { 30 | console.log(response); 31 | }) 32 | .catch(function(error) { 33 | console.error(error); 34 | }); 35 | ``` 36 | 37 | ### Using the CLI 38 | 39 | ``` 40 | npm i -g get-css 41 | getcss google.com > google.css 42 | ``` 43 | 44 | ## Response 45 | 46 | ### `links` 47 | An array of objects base on `rel=stylesheet` links found in the document. 48 | 49 | Each object has the following: 50 | 51 | - `link` - the value from the `href` attribute for each link tag 52 | - `url` - an absolute url representation of the link 53 | - `css` - the contents of the file in the link 54 | - `imports` - an array of urls for `@import` rules 55 | 56 | ### `styles` 57 | An array of contents from `style` tags found in the document. 58 | 59 | ### `css` 60 | A concatenated string of all css found in links and styles 61 | 62 | ### `pageTitle` 63 | The contents of the `title` tag in the document. 64 | 65 | ## Options 66 | 67 | ### `timeout` 68 | An integer to reflect the timeout for the request. Default: `5000` 69 | 70 | ### `ignoreCerts` 71 | A boolean to determine whether invalid certificates are ignored. Default: `false` 72 | 73 | ### `verbose` 74 | A boolean to determine whether errors should be `console.log`ged. Default: `false` 75 | 76 | ## License 77 | 78 | MIT 79 | 80 | ## Contributing 81 | 82 | 1. Fork it 83 | 2. Create your feature branch (`git checkout -b my-new-feature`) 84 | 3. Commit your changes (`git commit -am 'Add some feature'`) 85 | 4. Push to the branch (`git push origin my-new-feature`) 86 | 5. Create new Pull Request 87 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | 2 | var fs = require('fs'); 3 | var getCss = require('../'); 4 | 5 | var results = []; 6 | 7 | function writeLog () { 8 | fs.writeFileSync('./test/results.json', JSON.stringify(results, null, 2)); 9 | } 10 | 11 | function logResults (response) { 12 | results.push(response); 13 | console.log('Results from: ', response.pageTitle); 14 | console.log(response.styles.length + ' Style Tags'); 15 | console.log(response.links.length + ' Stylesheets'); 16 | response.links.forEach(function (link) { 17 | console.log(link.url); 18 | }); 19 | writeLog(); 20 | } 21 | 22 | getCss('http://google.com') 23 | .then(logResults) 24 | .catch(function (err) { 25 | console.error(err); 26 | }); 27 | 28 | getCss('http://amazon.com') 29 | .then(logResults) 30 | .catch(function (err) { 31 | console.error(err); 32 | }); 33 | 34 | getCss('http://twitter.com/jxnblk') 35 | .then(logResults) 36 | .catch(function (err) { 37 | console.error(err); 38 | }); 39 | 40 | getCss('http://facebook.com') 41 | .then(logResults) 42 | .catch(function (err) { 43 | console.error(err); 44 | }); 45 | 46 | getCss('http://johnotander.com/public/css/c.min.css') 47 | .then(logResults) 48 | .catch(function (err) { 49 | console.error(err); 50 | }); 51 | -------------------------------------------------------------------------------- /test/utils/create-link-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var createLink = require('../../utils/create-link') 3 | 4 | describe('create-link', function() { 5 | it('should create the correct link object', function() { 6 | assert.deepEqual( 7 | createLink('../bar.css', 'http://foo.com/css/my-css.css'), 8 | { link: '../bar.css', url: 'http://foo.com/bar.css', css: '' } 9 | ) 10 | }) 11 | 12 | it('should correctly resolve full url links', function() { 13 | assert.deepEqual( 14 | createLink('http://foo.com/bar.css', 'http://foo.com/css/my-css.css'), 15 | { link: 'http://foo.com/bar.css', url: 'http://foo.com/bar.css', css: '' } 16 | ) 17 | }) 18 | }) 19 | -------------------------------------------------------------------------------- /test/utils/html-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var getCss = require('../../') 3 | 4 | var css = 'h1 { color: tomato; }' 5 | var html = '

Hello, world!

' 6 | 7 | describe('html', function() { 8 | it('should correctly extract css from raw html', function() { 9 | getCss('http://example.com/', null, html).then(function(response) { 10 | asset.deepEqual(css, response.css) 11 | }) 12 | }) 13 | }) 14 | -------------------------------------------------------------------------------- /test/utils/resolve-url-test.js: -------------------------------------------------------------------------------- 1 | var assert = require('assert') 2 | var resolveUrl = require('../../utils/resolve-url') 3 | 4 | describe('resolve-url', function() { 5 | it('should correctly resolve a .. relative link', function() { 6 | assert.equal( 7 | resolveUrl('http://foo.com/some/path', '../bar.css'), 8 | 'http://foo.com/some/bar.css' 9 | ) 10 | }) 11 | 12 | it('should correctly resolve a .. relative link when the url has a trailing /', function() { 13 | assert.equal( 14 | resolveUrl('http://foo.com/some/path/', '../bar.css'), 15 | 'http://foo.com/some/bar.css' 16 | ) 17 | }) 18 | 19 | it('should correctly resolve a relative link', function() { 20 | assert.equal( 21 | resolveUrl('http://foo.com/some/path', 'bar.css'), 22 | 'http://foo.com/some/path/bar.css' 23 | ) 24 | }) 25 | 26 | it('should correctly return a full link', function() { 27 | assert.equal( 28 | resolveUrl('http://foo.com', 'http://foo.com/some/path/bar.css'), 29 | 'http://foo.com/some/path/bar.css' 30 | ) 31 | }) 32 | 33 | it('should correctly resolve an absolute link', function() { 34 | assert.equal( 35 | resolveUrl('http://foo.com/some/path', '/bar.css'), 36 | 'http://foo.com/bar.css' 37 | ) 38 | }) 39 | 40 | it('should correctly resolve a relative url from an html file', function() { 41 | assert.equal( 42 | resolveUrl('http://foo.bar/awesome/baz.html', 'baz.css'), 43 | 'http://foo.bar/awesome/baz.css' 44 | ) 45 | }) 46 | 47 | it('should correctly resolve an absolute url from an html file', function() { 48 | assert.equal( 49 | resolveUrl('http://foo.bar/awesome/baz.html', '/baz.css'), 50 | 'http://foo.bar/baz.css' 51 | ) 52 | }) 53 | }) 54 | -------------------------------------------------------------------------------- /utils/create-link.js: -------------------------------------------------------------------------------- 1 | var resolveUrl = require('./resolve-url') 2 | 3 | module.exports = function createLink(link, url) { 4 | return { 5 | link: link, 6 | url: resolveUrl(url, link), 7 | css: '' 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /utils/get-link-contents.js: -------------------------------------------------------------------------------- 1 | var q = require('q') 2 | var request = require('request') 3 | 4 | module.exports = function getLinkContents(linkUrl, options) { 5 | var d = q.defer() 6 | 7 | // expect linked css content 8 | if (!/\.css$/i.test(linkUrl)) { 9 | d.resolve('') 10 | return d.promise 11 | } 12 | 13 | request({ url: linkUrl, timeout: options.timeout, gzip: true }, function( 14 | error, 15 | response, 16 | body 17 | ) { 18 | if (error || response.statusCode !== 200) { 19 | d.reject(error) 20 | } 21 | 22 | d.resolve(body) 23 | }) 24 | 25 | return d.promise 26 | } 27 | -------------------------------------------------------------------------------- /utils/resolve-url.js: -------------------------------------------------------------------------------- 1 | var urlResolver = require('url').resolve 2 | 3 | module.exports = function resolveUrl(url, link) { 4 | if (link.match(/^(http|https)/g)) { 5 | return link 6 | } else { 7 | if (isCssFile(url)) { 8 | removeExtension(url) 9 | } else if (!endsInForwardSlash(url)) { 10 | if (!isHtmlUrl(url)) { 11 | url += '/' 12 | } 13 | } 14 | 15 | return urlResolver(url, link) 16 | } 17 | } 18 | 19 | function endsInForwardSlash(url) { 20 | return url.indexOf('/', url.length - 1) != -1 21 | } 22 | 23 | function isCssFile(url) { 24 | return url.indexOf('.css', url.length - 4) != -1 25 | } 26 | 27 | function isHtmlUrl(url) { 28 | return url.indexOf('.html', url.length - 5) != -1 29 | } 30 | 31 | function removeExtension(url) { 32 | url.replace(/\.[^/.]+$/, '') 33 | } 34 | --------------------------------------------------------------------------------