├── check_mixedcontent.sh ├── README.md ├── LICENSE.md ├── report-mixed-content.js └── require-http-200.py /check_mixedcontent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# -eq 2 && $2 -eq "debug" ]]; 4 | then 5 | debug="true" 6 | else 7 | debug="false" 8 | fi 9 | echo "Running mixed content report" 10 | # e.g. arg could be _site for a jekyll site 11 | find $1 -name "*.html" | xargs phantomjs --debug=$debug --local-to-remote-url-access=true --web-security=false --disk-cache=true --ignore-ssl-errors=false --load-images=true --output-encoding=utf-8 report-mixed-content.js ; 12 | echo "report finsihed." 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Report mixed content on web pages using PhantomJS 2 | 3 | Script edited [acdha](https://github.com/acdha/phantomjs-mixed-content-scan). 4 | 5 | This script is intended to be added to a travis build to check static jekyll generated HTML for possible mixed content warnings. 6 | 7 | 8 | ## Requirements 9 | 10 | * PhantomJS 2.0 11 | 12 | ```npm -g phantomjs``` 13 | 14 | Note: travis build machines [trusty](https://docs.travis-ci.com/user/reference/trusty/#Headless-Browser-Testing-Tools) and [precise](https://docs.travis-ci.com/user/reference/precise/#Headless-Browser-Testing-Tools) come with phantomjs pre-installed. 15 | 16 | ## Usage 17 | 18 | ``` 19 | ./check_mixedcontent.sh [root directory for static html] 20 | ``` 21 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | As a work of the United States Government, this project is in the 2 | public domain within the United States. 3 | 4 | Additionally, we waive copyright and related rights in the work 5 | worldwide through the CC0 1.0 Universal public domain dedication. 6 | 7 | ## CC0 1.0 Universal Summary 8 | 9 | This is a human-readable summary of the 10 | [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode). 11 | 12 | ### No Copyright 13 | 14 | The person who associated a work with this deed has dedicated the work to 15 | the public domain by waiving all of his or her rights to the work worldwide 16 | under copyright law, including all related and neighboring rights, to the 17 | extent allowed by law. 18 | 19 | You can copy, modify, distribute and perform the work, even for commercial 20 | purposes, all without asking permission. 21 | 22 | ### Other Information 23 | 24 | In no way are the patent or trademark rights of any person affected by CC0, 25 | nor are the rights that other persons may have in the work or in how the 26 | work is used, such as publicity or privacy rights. 27 | 28 | Unless expressly stated otherwise, the person who associated a work with 29 | this deed makes no warranties about the work, and disclaims liability for 30 | all uses of the work, to the fullest extent permitted by applicable law. 31 | When using or citing the work, you should not imply endorsement by the 32 | author or the affirmer. 33 | -------------------------------------------------------------------------------- /report-mixed-content.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env phantomjs --web-security=false --disk-cache=true --ignore-ssl-errors=false --load-images=true --output-encoding=utf-8 2 | 'use strict'; 3 | 4 | var system = require('system'), 5 | webpage = require('webpage'); 6 | 7 | var args = system.args, 8 | URLs = []; 9 | 10 | args.slice(1).forEach(function(url) { 11 | if (url.substr(0, 8) !== 'https://') { 12 | //console.debug('Rewriting HTTP URL to use HTTPS:', url); 13 | url = url.replace('http:', 'https:'); 14 | } 15 | 16 | URLs.push(url); 17 | }); 18 | 19 | if (URLs.length < 1) { 20 | console.log('Usage:', args[0], 'URL [URL2]'); 21 | phantom.exit(1); 22 | } 23 | 24 | function initPage() { 25 | var page = new WebPage(); 26 | 27 | page.onResourceRequested = function(requestData, networkRequest) { 28 | var originalURL = currentURL = requestData.url; 29 | 30 | var currentPageURL = page.url || page.originalURL; 31 | 32 | if (currentURL.substr(0, 8) !== 'https://' && currentURL.substr(0, 5) !== 'data:' && currentURL.substr(0, 5) !== 'file:') { 33 | console.log('❗️ ', currentPageURL, 'loaded an insecure resource:', originalURL); 34 | } 35 | }; 36 | 37 | page.onError = function (msg, trace) { 38 | logError('🌋 Page error:', msg); 39 | trace.forEach(function(item) { 40 | logError(' ', item.file, ':', item.line); 41 | }); 42 | }; 43 | 44 | page.onConsoleMessage = function(msg) { 45 | if (msg == 'GOTO_NEXT_PAGE') { 46 | page.close(); 47 | //crawlNextPage(); 48 | } else if (msg.indexOf('insecure content from') >= 0) { 49 | // We can format WebKit's native error messages nicely: 50 | console.log('❕ ', msg.trim().replace('The page at ', '')); 51 | } else { 52 | console.log('\t💻', msg); 53 | } 54 | }; 55 | 56 | return page; 57 | } 58 | 59 | function crawlNextPage() { 60 | if (URLs.length < 1) { 61 | console.log('… done!'); 62 | phantom.exit(); 63 | } 64 | 65 | var url = URLs.shift(); 66 | console.log('Checking ' + url); 67 | var page = initPage(); 68 | 69 | page.originalURL = url; 70 | 71 | page.open(url, function (status) { 72 | if (status === 'success') { 73 | //console.log('✅ ', url); 74 | } else { 75 | console.log('❌ ', url); 76 | } 77 | page.close() 78 | crawlNextPage(); 79 | }); 80 | } 81 | 82 | crawlNextPage(); 83 | -------------------------------------------------------------------------------- /require-http-200.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python -u 2 | # encoding: utf-8 3 | """ 4 | Filter URLs for those which return HTTP 200 5 | 6 | Provide URLs as arguments or lines on standard input. 7 | 8 | All URLs which returned an HTTP 200 will be written to standard output. 9 | Any errors or non-200 HTTP status will be logged to standard error. 10 | """ 11 | 12 | from __future__ import (absolute_import, division, print_function, 13 | unicode_literals) 14 | 15 | import argparse 16 | import sys 17 | from itertools import chain 18 | 19 | import requests 20 | 21 | parser = argparse.ArgumentParser(description=__doc__.strip()) 22 | parser.add_argument('--follow-redirects', action='store_true', default=False, 23 | help='Attempt to follow redirect chains, confirming that the final page returns HTTP 200') 24 | parser.add_argument('--rewrite-insecure-redirects', action='store_true', default=False, 25 | help='Instead of failing when a secure URL redirects to an insecure target, ' 26 | 'see whether it will work using HTTPS') 27 | args, unknown = parser.parse_known_args() 28 | 29 | all_urls = [i.strip() for i in unknown] 30 | if not sys.stdin.isatty(): 31 | all_urls = chain(all_urls, sys.stdin.readlines()) 32 | 33 | with requests.session() as s: 34 | for original_url in all_urls: 35 | url = original_url = original_url.strip() 36 | 37 | while url: 38 | try: 39 | resp = s.get(url, allow_redirects=False) 40 | except IOError as exc: 41 | print('Error checking {original_url}: {url} raised {exc}'.format(original_url=original_url, 42 | url=url, 43 | exc=exc), 44 | file=sys.stderr) 45 | continue 46 | 47 | if resp.status_code == 200: 48 | print(original_url) 49 | elif resp.is_redirect and args.follow_redirects: 50 | url = resp.headers['Location'] 51 | 52 | if original_url.startswith('https') and url.startswith('http:'): 53 | if args.rewrite_insecure_redirects: 54 | print('{0} redirects to insecure {1}: rewriting'.format(original_url, url), 55 | file=sys.stderr) 56 | url = url.replace('http:', 'https:') 57 | continue 58 | else: 59 | print('{0} redirects to insecure {1}: aborting'.format(original_url, url), 60 | file=sys.stderr) 61 | break 62 | else: 63 | print(resp.status_code, url, file=sys.stderr) 64 | 65 | break 66 | --------------------------------------------------------------------------------