├── check_mixedcontent.sh
├── README.md
├── LICENSE.md
├── report-mixed-content.js
└── require-http-200.py


/check_mixedcontent.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ $# -eq 2 && $2 -eq "debug" ]];
 4 | then
 5 |     debug="true"
 6 | else
 7 |     debug="false"
 8 | fi
 9 | echo "Running mixed content report"
10 | # e.g. arg could be _site for a jekyll site
11 | find $1 -name "*.html" | xargs phantomjs --debug=$debug --local-to-remote-url-access=true --web-security=false --disk-cache=true --ignore-ssl-errors=false --load-images=true --output-encoding=utf-8  report-mixed-content.js ;
12 | echo "report finsihed."
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Report mixed content on web pages using PhantomJS
 2 | 
 3 | Script edited [acdha](https://github.com/acdha/phantomjs-mixed-content-scan).
 4 | 
 5 | This script is intended to be added to a travis build to check static jekyll generated HTML for possible mixed content warnings.
 6 | 
 7 | 
 8 | ## Requirements
 9 | 
10 | * PhantomJS 2.0
11 | 
12 | ```npm -g phantomjs```
13 | 
14 | Note: travis build machines [trusty](https://docs.travis-ci.com/user/reference/trusty/#Headless-Browser-Testing-Tools) and [precise](https://docs.travis-ci.com/user/reference/precise/#Headless-Browser-Testing-Tools) come with phantomjs pre-installed.
15 | 
16 | ## Usage
17 | 
18 | ```
19 |     ./check_mixedcontent.sh [root directory for static html]
20 | ```
21 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | As a work of the United States Government, this project is in the
 2 | public domain within the United States.
 3 | 
 4 | Additionally, we waive copyright and related rights in the work
 5 | worldwide through the CC0 1.0 Universal public domain dedication.
 6 | 
 7 | ## CC0 1.0 Universal Summary
 8 | 
 9 | This is a human-readable summary of the
10 | [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
11 | 
12 | ### No Copyright
13 | 
14 | The person who associated a work with this deed has dedicated the work to
15 | the public domain by waiving all of his or her rights to the work worldwide
16 | under copyright law, including all related and neighboring rights, to the
17 | extent allowed by law.
18 | 
19 | You can copy, modify, distribute and perform the work, even for commercial
20 | purposes, all without asking permission.
21 | 
22 | ### Other Information
23 | 
24 | In no way are the patent or trademark rights of any person affected by CC0,
25 | nor are the rights that other persons may have in the work or in how the
26 | work is used, such as publicity or privacy rights.
27 | 
28 | Unless expressly stated otherwise, the person who associated a work with
29 | this deed makes no warranties about the work, and disclaims liability for
30 | all uses of the work, to the fullest extent permitted by applicable law.
31 | When using or citing the work, you should not imply endorsement by the
32 | author or the affirmer.
33 | 


--------------------------------------------------------------------------------
/report-mixed-content.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env phantomjs --web-security=false --disk-cache=true --ignore-ssl-errors=false --load-images=true --output-encoding=utf-8 
 2 | 'use strict';
 3 | 
 4 | var system = require('system'),
 5 |     webpage = require('webpage');
 6 | 
 7 | var args = system.args,
 8 |     URLs = [];
 9 | 
10 | args.slice(1).forEach(function(url) {
11 |     if (url.substr(0, 8) !== 'https://') {
12 |         //console.debug('Rewriting HTTP URL to use HTTPS:', url);
13 |         url = url.replace('http:', 'https:');
14 |     }
15 | 
16 |     URLs.push(url);
17 | });
18 | 
19 | if (URLs.length < 1) {
20 |     console.log('Usage:', args[0], 'URL [URL2]');
21 |     phantom.exit(1);
22 | }
23 | 
24 | function initPage() {
25 |     var page = new WebPage();
26 | 
27 |     page.onResourceRequested = function(requestData, networkRequest) {
28 |         var originalURL = currentURL = requestData.url;
29 | 
30 |         var currentPageURL = page.url || page.originalURL;
31 | 
32 |         if (currentURL.substr(0, 8) !== 'https://' && currentURL.substr(0, 5) !== 'data:' && currentURL.substr(0, 5) !== 'file:') {
33 |             console.log('❗️ ', currentPageURL, 'loaded an insecure resource:', originalURL);
34 |         }
35 |     };
36 | 
37 |     page.onError = function (msg, trace) {
38 |         logError('🌋 Page error:', msg);
39 |         trace.forEach(function(item) {
40 |             logError('  ', item.file, ':', item.line);
41 |         });
42 |     };
43 | 
44 |     page.onConsoleMessage = function(msg) {
45 |         if (msg == 'GOTO_NEXT_PAGE') {
46 |             page.close();
47 |             //crawlNextPage();
48 |         } else if (msg.indexOf('insecure content from') >= 0) {
49 |             // We can format WebKit's native error messages nicely:
50 |             console.log('❕ ', msg.trim().replace('The page at ', ''));
51 |         } else {
52 |             console.log('\t💻', msg);
53 |         }
54 |     };
55 | 
56 |     return page;
57 | }
58 | 
59 | function crawlNextPage() {
60 |     if (URLs.length < 1) {
61 |         console.log('… done!');
62 |         phantom.exit();
63 |     }
64 | 
65 |     var url = URLs.shift();
66 |     console.log('Checking ' + url);
67 |     var page = initPage();
68 | 
69 |     page.originalURL = url;
70 | 
71 |     page.open(url, function (status) {
72 |         if (status === 'success') {
73 |             //console.log('✅ ', url);
74 |         } else {
75 |             console.log('❌ ', url);
76 |         }
77 |         page.close()
78 |         crawlNextPage();
79 |     });
80 | }
81 | 
82 | crawlNextPage();
83 | 


--------------------------------------------------------------------------------
/require-http-200.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python -u
 2 | # encoding: utf-8
 3 | """
 4 | Filter URLs for those which return HTTP 200
 5 | 
 6 | Provide URLs as arguments or lines on standard input.
 7 | 
 8 | All URLs which returned an HTTP 200 will be written to standard output.
 9 | Any errors or non-200 HTTP status will be logged to standard error.
10 | """
11 | 
12 | from __future__ import (absolute_import, division, print_function,
13 |                         unicode_literals)
14 | 
15 | import argparse
16 | import sys
17 | from itertools import chain
18 | 
19 | import requests
20 | 
21 | parser = argparse.ArgumentParser(description=__doc__.strip())
22 | parser.add_argument('--follow-redirects', action='store_true', default=False,
23 |                     help='Attempt to follow redirect chains, confirming that the final page returns HTTP 200')
24 | parser.add_argument('--rewrite-insecure-redirects', action='store_true', default=False,
25 |                     help='Instead of failing when a secure URL redirects to an insecure target, '
26 |                          'see whether it will work using HTTPS')
27 | args, unknown = parser.parse_known_args()
28 | 
29 | all_urls = [i.strip() for i in unknown]
30 | if not sys.stdin.isatty():
31 |     all_urls = chain(all_urls, sys.stdin.readlines())
32 | 
33 | with requests.session() as s:
34 |     for original_url in all_urls:
35 |         url = original_url = original_url.strip()
36 | 
37 |         while url:
38 |             try:
39 |                 resp = s.get(url, allow_redirects=False)
40 |             except IOError as exc:
41 |                 print('Error checking {original_url}: {url} raised {exc}'.format(original_url=original_url,
42 |                                                                                  url=url,
43 |                                                                                  exc=exc),
44 |                       file=sys.stderr)
45 |                 continue
46 | 
47 |             if resp.status_code == 200:
48 |                 print(original_url)
49 |             elif resp.is_redirect and args.follow_redirects:
50 |                 url = resp.headers['Location']
51 | 
52 |                 if original_url.startswith('https') and url.startswith('http:'):
53 |                     if args.rewrite_insecure_redirects:
54 |                         print('{0} redirects to insecure {1}: rewriting'.format(original_url, url),
55 |                               file=sys.stderr)
56 |                         url = url.replace('http:', 'https:')
57 |                         continue
58 |                     else:
59 |                         print('{0} redirects to insecure {1}: aborting'.format(original_url, url),
60 |                               file=sys.stderr)
61 |                         break
62 |             else:
63 |                 print(resp.status_code, url, file=sys.stderr)
64 | 
65 |             break
66 | 


--------------------------------------------------------------------------------