├── .npmignore
├── .npmrc
├── .gitignore
├── .travis.yml
├── utils
├── create-link.js
├── get-link-contents.js
└── resolve-url.js
├── test
├── utils
│ ├── html-test.js
│ ├── create-link-test.js
│ └── resolve-url-test.js
└── test.js
├── bin
└── getcss
├── license
├── package.json
├── readme.md
└── index.js
/.npmignore:
--------------------------------------------------------------------------------
1 | test/results.json
2 |
--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | package-lock=false
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | package-lock.json
3 | .DS_Store
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | - '7'
4 | - '6'
5 |
--------------------------------------------------------------------------------
/utils/create-link.js:
--------------------------------------------------------------------------------
1 | var resolveUrl = require('./resolve-url')
2 |
3 | module.exports = function createLink(link, url) {
4 | return {
5 | link: link,
6 | url: resolveUrl(url, link),
7 | css: ''
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/test/utils/html-test.js:
--------------------------------------------------------------------------------
1 | var assert = require('assert')
2 | var getCss = require('../../')
3 |
4 | var css = 'h1 { color: tomato; }'
5 | var html = '
Hello, world!
'
6 |
7 | describe('html', function() {
8 | it('should correctly extract css from raw html', function() {
9 | getCss('http://example.com/', null, html).then(function(response) {
10 | asset.deepEqual(css, response.css)
11 | })
12 | })
13 | })
14 |
--------------------------------------------------------------------------------
/utils/get-link-contents.js:
--------------------------------------------------------------------------------
1 | var q = require('q')
2 | var request = require('request')
3 |
4 | module.exports = function getLinkContents(linkUrl, options) {
5 | var d = q.defer()
6 |
7 | // expect linked css content
8 | if (!/\.css$/i.test(linkUrl)) {
9 | d.resolve('')
10 | return d.promise
11 | }
12 |
13 | request({ url: linkUrl, timeout: options.timeout, gzip: true }, function(
14 | error,
15 | response,
16 | body
17 | ) {
18 | if (error || response.statusCode !== 200) {
19 | d.reject(error)
20 | }
21 |
22 | d.resolve(body)
23 | })
24 |
25 | return d.promise
26 | }
27 |
--------------------------------------------------------------------------------
/test/utils/create-link-test.js:
--------------------------------------------------------------------------------
1 | var assert = require('assert')
2 | var createLink = require('../../utils/create-link')
3 |
4 | describe('create-link', function() {
5 | it('should create the correct link object', function() {
6 | assert.deepEqual(
7 | createLink('../bar.css', 'http://foo.com/css/my-css.css'),
8 | { link: '../bar.css', url: 'http://foo.com/bar.css', css: '' }
9 | )
10 | })
11 |
12 | it('should correctly resolve full url links', function() {
13 | assert.deepEqual(
14 | createLink('http://foo.com/bar.css', 'http://foo.com/css/my-css.css'),
15 | { link: 'http://foo.com/bar.css', url: 'http://foo.com/bar.css', css: '' }
16 | )
17 | })
18 | })
19 |
--------------------------------------------------------------------------------
/utils/resolve-url.js:
--------------------------------------------------------------------------------
1 | var urlResolver = require('url').resolve
2 |
3 | module.exports = function resolveUrl(url, link) {
4 | if (link.match(/^(http|https)/g)) {
5 | return link
6 | } else {
7 | if (isCssFile(url)) {
8 | removeExtension(url)
9 | } else if (!endsInForwardSlash(url)) {
10 | if (!isHtmlUrl(url)) {
11 | url += '/'
12 | }
13 | }
14 |
15 | return urlResolver(url, link)
16 | }
17 | }
18 |
19 | function endsInForwardSlash(url) {
20 | return url.indexOf('/', url.length - 1) != -1
21 | }
22 |
23 | function isCssFile(url) {
24 | return url.indexOf('.css', url.length - 4) != -1
25 | }
26 |
27 | function isHtmlUrl(url) {
28 | return url.indexOf('.html', url.length - 5) != -1
29 | }
30 |
31 | function removeExtension(url) {
32 | url.replace(/\.[^/.]+$/, '')
33 | }
34 |
--------------------------------------------------------------------------------
/bin/getcss:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | var fs = require('fs');
4 | var program = require('commander');
5 | var normalizeUrl = require('normalize-url');
6 | var getCss = require('..');
7 |
8 | program
9 | .version('0.0.2')
10 | .usage('[options] ')
11 | .option("-j, --json","output downloaded html and css as json")
12 | .option("-t, --timeout [ms]","timeout in ms to wait for responses", 30000)
13 | .option("-s, --strip_wayback_css","remove wayback toolbar css from internet archive captures")
14 | .action(function(url, options) {
15 | if(url) {
16 | url = normalizeUrl(url, { stripWWW: false });
17 | getCss(url, { verbose: true, timeout: options.timeout, stripWayback: options.strip_wayback_css }).then(function(css) {
18 | if (options.json){
19 | console.log(JSON.stringify(css));
20 | }
21 | else {
22 | console.log(css.css);
23 | }
24 | });
25 | }
26 | });
27 |
28 | program.parse(process.argv);
29 |
--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 @jxnblk, @mrmrs_, @4lpine
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
1 |
2 | var fs = require('fs');
3 | var getCss = require('../');
4 |
5 | var results = [];
6 |
7 | function writeLog () {
8 | fs.writeFileSync('./test/results.json', JSON.stringify(results, null, 2));
9 | }
10 |
11 | function logResults (response) {
12 | results.push(response);
13 | console.log('Results from: ', response.pageTitle);
14 | console.log(response.styles.length + ' Style Tags');
15 | console.log(response.links.length + ' Stylesheets');
16 | response.links.forEach(function (link) {
17 | console.log(link.url);
18 | });
19 | writeLog();
20 | }
21 |
22 | getCss('http://google.com')
23 | .then(logResults)
24 | .catch(function (err) {
25 | console.error(err);
26 | });
27 |
28 | getCss('http://amazon.com')
29 | .then(logResults)
30 | .catch(function (err) {
31 | console.error(err);
32 | });
33 |
34 | getCss('http://twitter.com/jxnblk')
35 | .then(logResults)
36 | .catch(function (err) {
37 | console.error(err);
38 | });
39 |
40 | getCss('http://facebook.com')
41 | .then(logResults)
42 | .catch(function (err) {
43 | console.error(err);
44 | });
45 |
46 | getCss('http://johnotander.com/public/css/c.min.css')
47 | .then(logResults)
48 | .catch(function (err) {
49 | console.error(err);
50 | });
51 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "get-css",
3 | "version": "1.2.7-2",
4 | "description": "Get CSS from a URL",
5 | "main": "index.js",
6 | "scripts": {
7 | "format": "prettier --no-semi --single-quote --write {test,utils,bin}/**/*.js index.js",
8 | "test": "node test/test && mocha test 'test/**/*.js'"
9 | },
10 | "bin": {
11 | "getcss": "./bin/getcss"
12 | },
13 | "author": "Brent Jackson",
14 | "license": "MIT",
15 | "dependencies": {
16 | "cheerio": "^0.22.0",
17 | "commander": "^2.9.0",
18 | "is-blank": "^1.1.0",
19 | "is-css": "^2.0.0",
20 | "is-present": "^1.0.0",
21 | "is-url-superb": "^2.0.0",
22 | "normalize-url": "^1.8.0",
23 | "q": "^1.4.1",
24 | "request": "^2.79.0",
25 | "requestretry": "^1.13.0",
26 | "resolve-css-import-urls": "1.0.0",
27 | "strip-html-comments": "1.0.0",
28 | "strip-wayback-toolbar": "^1.0.4",
29 | "ua-string": "^1.0.0"
30 | },
31 | "devDependencies": {
32 | "mocha": "^3.2.0",
33 | "prettier": "^1.11.1"
34 | },
35 | "repository": {
36 | "type": "git",
37 | "url": "https://github.com/cssstats/get-css.git"
38 | },
39 | "keywords": [
40 | "CSS",
41 | "request",
42 | "parse",
43 | "get",
44 | "getcss"
45 | ],
46 | "bugs": {
47 | "url": "https://github.com/cssstats/get-css/issues"
48 | },
49 | "homepage": "https://github.com/cssstats/get-css"
50 | }
51 |
--------------------------------------------------------------------------------
/test/utils/resolve-url-test.js:
--------------------------------------------------------------------------------
1 | var assert = require('assert')
2 | var resolveUrl = require('../../utils/resolve-url')
3 |
4 | describe('resolve-url', function() {
5 | it('should correctly resolve a .. relative link', function() {
6 | assert.equal(
7 | resolveUrl('http://foo.com/some/path', '../bar.css'),
8 | 'http://foo.com/some/bar.css'
9 | )
10 | })
11 |
12 | it('should correctly resolve a .. relative link when the url has a trailing /', function() {
13 | assert.equal(
14 | resolveUrl('http://foo.com/some/path/', '../bar.css'),
15 | 'http://foo.com/some/bar.css'
16 | )
17 | })
18 |
19 | it('should correctly resolve a relative link', function() {
20 | assert.equal(
21 | resolveUrl('http://foo.com/some/path', 'bar.css'),
22 | 'http://foo.com/some/path/bar.css'
23 | )
24 | })
25 |
26 | it('should correctly return a full link', function() {
27 | assert.equal(
28 | resolveUrl('http://foo.com', 'http://foo.com/some/path/bar.css'),
29 | 'http://foo.com/some/path/bar.css'
30 | )
31 | })
32 |
33 | it('should correctly resolve an absolute link', function() {
34 | assert.equal(
35 | resolveUrl('http://foo.com/some/path', '/bar.css'),
36 | 'http://foo.com/bar.css'
37 | )
38 | })
39 |
40 | it('should correctly resolve a relative url from an html file', function() {
41 | assert.equal(
42 | resolveUrl('http://foo.bar/awesome/baz.html', 'baz.css'),
43 | 'http://foo.bar/awesome/baz.css'
44 | )
45 | })
46 |
47 | it('should correctly resolve an absolute url from an html file', function() {
48 | assert.equal(
49 | resolveUrl('http://foo.bar/awesome/baz.html', '/baz.css'),
50 | 'http://foo.bar/baz.css'
51 | )
52 | })
53 | })
54 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # get-css [](https://travis-ci.org/cssstats/get-css)
2 |
3 | Node module to get CSS from a URL.
4 |
5 | Returns a promise for an object with details about a document's CSS, used in .
6 |
7 | ## Installation
8 |
9 | ```sh
10 | npm i --save get-css
11 | ```
12 |
13 | For the CLI
14 |
15 | ```sh
16 | npm i -g get-css
17 | ```
18 |
19 | ## Usage
20 |
21 | ```js
22 | var getCss = require('get-css');
23 |
24 | var options = {
25 | timeout: 5000
26 | };
27 |
28 | getCss('http://github.com', options)
29 | .then(function(response) {
30 | console.log(response);
31 | })
32 | .catch(function(error) {
33 | console.error(error);
34 | });
35 | ```
36 |
37 | ### Using the CLI
38 |
39 | ```
40 | npm i -g get-css
41 | getcss google.com > google.css
42 | ```
43 |
44 | ## Response
45 |
46 | ### `links`
47 | An array of objects base on `rel=stylesheet` links found in the document.
48 |
49 | Each object has the following:
50 |
51 | - `link` - the value from the `href` attribute for each link tag
52 | - `url` - an absolute url representation of the link
53 | - `css` - the contents of the file in the link
54 | - `imports` - an array of urls for `@import` rules
55 |
56 | ### `styles`
57 | An array of contents from `style` tags found in the document.
58 |
59 | ### `css`
60 | A concatenated string of all css found in links and styles
61 |
62 | ### `pageTitle`
63 | The contents of the `title` tag in the document.
64 |
65 | ## Options
66 |
67 | ### `timeout`
68 | An integer to reflect the timeout for the request. Default: `5000`
69 |
70 | ### `ignoreCerts`
71 | A boolean to determine whether invalid certificates are ignored. Default: `false`
72 |
73 | ### `verbose`
74 | A boolean to determine whether errors should be `console.log`ged. Default: `false`
75 |
76 | ## License
77 |
78 | MIT
79 |
80 | ## Contributing
81 |
82 | 1. Fork it
83 | 2. Create your feature branch (`git checkout -b my-new-feature`)
84 | 3. Commit your changes (`git commit -am 'Add some feature'`)
85 | 4. Push to the branch (`git push origin my-new-feature`)
86 | 5. Create new Pull Request
87 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | var q = require('q')
2 | var isCss = require('is-css')
3 | var isPresent = require('is-present')
4 | var isBlank = require('is-blank')
5 | var isUrl = require('is-url-superb')
6 | var request = require('requestretry')
7 | var cheerio = require('cheerio')
8 | var normalizeUrl = require('normalize-url')
9 | var stripHtmlComments = require('strip-html-comments')
10 | var stripWaybackToolbar = require('strip-wayback-toolbar')
11 | var resolveCssImportUrls = require('resolve-css-import-urls')
12 | var ua = require('ua-string')
13 |
14 | var getLinkContents = require('./utils/get-link-contents')
15 | var createLink = require('./utils/create-link')
16 |
17 | module.exports = function(url, options, html) {
18 | var deferred = q.defer()
19 | var options = options || {}
20 | options.headers = options.headers || {}
21 | options.headers['User-Agent'] = options.headers['User-Agent'] || ua
22 | options.timeout = options.timeout || 5000
23 | options.stripWayback = options.stripWayback || false
24 | options.gzip = true
25 |
26 | if (typeof url !== 'string' || isBlank(url) || !isUrl(url)) {
27 | throw new TypeError('get-css expected a url as a string')
28 | }
29 |
30 | url = normalizeUrl(url, { stripWWW: false })
31 | options.url = url
32 |
33 | if (options.ignoreCerts) {
34 | process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0'
35 | }
36 |
37 | var status = {
38 | parsed: 0,
39 | total: 0
40 | }
41 |
42 | var result = {
43 | links: [],
44 | styles: [],
45 | css: ''
46 | }
47 |
48 | function handleResolve() {
49 | if (status.parsed >= status.total) {
50 | deferred.resolve(result)
51 | }
52 | }
53 |
54 | function parseHtml(html) {
55 | if (options.stripWayback) {
56 | html = stripWaybackToolbar(html)
57 | }
58 | var $ = cheerio.load(html)
59 | result.pageTitle = $('head > title').text()
60 | result.html = html
61 |
62 | $('[rel=stylesheet]').each(function() {
63 | var link = $(this).attr('href')
64 | if (isPresent(link)) {
65 | result.links.push(createLink(link, url))
66 | } else {
67 | result.styles.push(stripHtmlComments($(this).text()))
68 | }
69 | })
70 |
71 | $('style').each(function() {
72 | result.styles.push(stripHtmlComments($(this).text()))
73 | })
74 |
75 | status.total = result.links.length + result.styles.length
76 | if (!status.total) {
77 | deferred.resolve(false)
78 | }
79 |
80 | result.links.forEach(function(link) {
81 | getLinkContents(link.url, options)
82 | .then(function(css) {
83 | handleCssFromLink(link, css)
84 | })
85 | .catch(function(error) {
86 | link.error = error
87 | status.parsed++
88 | handleResolve()
89 | })
90 | })
91 |
92 | result.styles.forEach(function(css) {
93 | result.css += css
94 | status.parsed++
95 | handleResolve()
96 | })
97 | }
98 |
99 | function handleCssFromLink(link, css) {
100 | link.css += css
101 |
102 | parseCssForImports(link, css)
103 |
104 | status.parsed++
105 | handleResolve()
106 | }
107 |
108 | // Handle potential @import url(foo.css) statements in the CSS.
109 | function parseCssForImports(link, css) {
110 | link.imports = resolveCssImportUrls(link.url, css)
111 | status.total += link.imports.length
112 | result.css += css
113 |
114 | link.imports.forEach(function(importUrl) {
115 | var importLink = createLink(importUrl, importUrl)
116 | result.links.push(importLink)
117 |
118 | getLinkContents(importLink.url, options)
119 | .then(function(css) {
120 | handleCssFromLink(importLink, css)
121 | })
122 | .catch(function(error) {
123 | link.error = error
124 | status.parsed++
125 | handleResolve()
126 | })
127 | })
128 | }
129 |
130 | function handleBody(body) {
131 | if (isCss(url)) {
132 | var link = createLink(url, url)
133 | result.links.push(link)
134 | handleCssFromLink(link, body)
135 | } else {
136 | parseHtml(body)
137 | }
138 | }
139 |
140 | if (html) {
141 | handleBody(html)
142 | } else {
143 | request(options, function(error, response, body) {
144 | if (error) {
145 | if (options.verbose) console.log('Error from ' + url + ' ' + error)
146 | deferred.reject(error)
147 | return
148 | }
149 |
150 | if (response && response.statusCode != 200) {
151 | if (options.verbose)
152 | console.log('Received a ' + response.statusCode + ' from: ' + url)
153 | deferred.reject({ url: url, statusCode: response.code })
154 | return
155 | }
156 |
157 | handleBody(body)
158 | })
159 | }
160 |
161 | return deferred.promise
162 | }
163 |
--------------------------------------------------------------------------------