├── LICENSE.md ├── README.md └── articles ├── README.md ├── analyzing-one-million-robots-txt-files ├── README.md ├── download-robots-txt.py ├── download-top-1m.sh ├── rule-parser.py └── summarize-data.py ├── aopic-algorithm ├── .babelrc ├── .eslintrc ├── README.md ├── miscellaneous.py ├── package.json ├── src │ ├── components │ │ ├── AOPICPlayer.jsx │ │ └── styles.css │ ├── graph.js │ ├── index.html │ └── index.jsx ├── webpack.config.js └── yarn.lock ├── chrome-extensions-with-selenium ├── README.md ├── extension │ ├── content.js │ └── manifest.json └── launch_chrome.py ├── circleci-artifacts ├── README.md ├── app.js ├── circleci-artifacts-policy.json ├── circleci-artifacts-role-policy-document.json ├── deploy-app.sh ├── lambda.js ├── package.json └── yarn.lock ├── clear-the-chrome-browser-cache ├── README.md └── clear_chrome_cache.py ├── clear-the-firefox-browser-cache ├── README.md ├── clear_firefox_57_cache.py ├── clear_firefox_61_cache.py ├── clear_firefox_cache.py └── evaluate-clear-cache.py ├── email-spy └── README.md ├── fantasy-football-for-hackers ├── README.md ├── points.py ├── scrape-projections.py └── simulation.py ├── firefox-extensions-with-selenium ├── README.md └── firefox_profile.py ├── javascript-injection ├── README.md ├── extension │ ├── injected-javascript.js │ └── manifest.json ├── marionette-execute-async-script.py ├── mitm-injector.py ├── puppeteer-evaluate-on-new-document.js ├── selenium-custom-web-extension.py ├── selenium-execute-async-script.py ├── selenium-mitmproxy.py └── test-page.html ├── keras-weight-transfer ├── README.md ├── frontend │ ├── .babelrc │ ├── .eslintrc │ ├── package.json │ ├── src │ │ ├── index.html │ │ ├── index.js │ │ └── sample.js │ ├── webpack.config.js │ └── yarn.lock └── neural-net │ ├── download-encoder.sh │ ├── mnist-cnn.py │ └── requirements.txt ├── making-chrome-headless-undetectable ├── README.md ├── chrome-headless-test.html ├── chrome-headless-test.js ├── inject.py ├── injected-test-bypasses.js └── test-headless.js ├── neural-network-initialization ├── README.md ├── images │ ├── relu-output-progression-violinplot.png │ └── training-losses.png ├── plot-activation-layers.py ├── plot-loss-progression.py ├── requirements.txt └── utils.py ├── nightmare-network-idle ├── README.md ├── package.json ├── test.js └── waitUntilNetworkIdle.js ├── node-package-manager-benchmarks └── README.md ├── not-possible-to-block-chrome-headless ├── README.md ├── chrome-headless-test.html ├── chrome-headless-test.js ├── img │ ├── headless-final-results.png │ └── headless-initial-results.png ├── package.json ├── test-headless-final.js └── test-headless-initial.js ├── power-assert ├── .babelrc ├── README.md ├── package.json ├── test │ └── test-assertion-errors.js └── yarn.lock ├── python-slicing-in-javascript ├── README.md ├── fizz-buzz.py ├── primitive-negative-indexing.js ├── slice-array.js ├── slice-probe.py └── slice.js ├── running-selenium-with-headless-chrome-in-ruby ├── README.md └── take-screenshot.rb ├── running-selenium-with-headless-chrome ├── README.md └── scrape-facebook-posts.py ├── sandbox-breakout ├── README.md ├── extension │ ├── manifest.json │ └── sandbox-breakout.js └── language-test.html ├── scrape-infinite-scroll ├── README.md ├── package.json └── scrape-infinite-scroll.js ├── steam-scraper └── README.md ├── webpack-markdown-setup ├── .babelrc ├── .eslintrc ├── README.md ├── package.json ├── src │ ├── article.md │ ├── components │ │ └── Markdown │ │ │ ├── gruvbox-dark.css │ │ │ ├── index.jsx │ │ │ └── markdown.css │ ├── index.html │ └── index.jsx ├── webpack.config.js └── yarn.lock └── youtube-mp3-downloader ├── .gitignore ├── README.md ├── app.js ├── deploy-everything.sh ├── download.html ├── lambda.js ├── package.json ├── transcoder.js └── yarn.lock /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2-Clause BSD License 2 | ====================== 3 | 4 | *Copyright © `2017-present`, `Intoli, LLC`* 5 | 6 | *https://github.com/intoli/intoli-article-materials/* 7 | 8 | *https://intoli.com/blog/* 9 | 10 | *All rights reserved.* 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are met: 14 | 15 | 1. Redistributions of source code must retain the above copyright notice, this 16 | list of conditions and the following disclaimer. 17 | 2. Redistributions in binary form must reproduce the above copyright notice, 18 | this list of conditions and the following disclaimer in the documentation 19 | and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 28 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | The views and conclusions contained in the software and documentation are those 33 | of the authors and should not be interpreted as representing official policies, 34 | either expressed or implied, of the FreeBSD Project. 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intoli Article Materials 2 | 3 | This repository holds supplementary articles materials, such as code files, for posts on the [Intoli Blog](https://intoli.com/blog/); basically any stuff that doesn't quite warrant the creation of its own repository. 4 | These materials are often also available for download on [intoli.com](https://intoli.com), but the repository offers an alternative mechanism to browse through the files. 5 | Additionally, you can watch or start this repository to be made aware of new updates on our blog. 6 | 7 | 8 | ## Our Latest Article 9 | 10 | This is our most recent article, we hope that you'll enjoy it! 11 | 12 | - [Performing Efficient Broad Crawls with the AOPIC Algorithm](articles/aopic-algorithm/) - Learn how to efficiently allocate your bandwidth to the most important pages encountered during a broad crawl. 13 | 14 | 15 | ## Articles 16 | 17 | - [Analyzing One Million robots.txt Files](articles/analyzing-one-million-robots-txt-files) - Explores downloading and analyzing the `robots.txt` files for the Alex top one million websites. 18 | - [Breaking Out of the Chrome/WebExtension Sandbox](articles/sandbox-breakout/) - Uses some JavaScript trickery to break out of browser extension context to directly modify webpage native properties. 19 | - [Email Spy](articles/email-spy) - An open source Chrome/Firefox Web Extension that lets you find contact emails for any domain with a single click. 20 | - [Extending CircleCI's API with a Custom Microservice on AWS Lambda](articles/circleci-artifacts) - Explains how to deploy a nodejs express app as a microservice on AWS Lambda. 21 | - [Fantasy Football for Hackers](articles/fantasy-football-for-hackers) - Scrapes Fantasy Football projections and uses them to simulate league dynamics and calculate baseline subtracted values for players to use as a draft strategy. 22 | - [How to Clear the Chrome Browser Cache With Selenium WebDriver/ChromeDriver](articles/clear-the-chrome-browser-cache) - Demonstrates how to clear the Chrome browser cache with Selenium. 23 | - [How to Clear the Firefox Browser Cache With Selenium WebDriver/geckodriver](articles/clear-the-firefox-browser-cache) - Demonstrates how to clear the Firefox browser cache with Selenium. 24 | - [How to Run a Keras Model in the Browser with Keras.js](articles/keras-weight-transfer) - Shows how to export a Keras neural network model and use it in the browser with `keras-js`. 25 | - [Implementing a Custom Waiting Action in Nightmare JS](articles/nightmare-network-idle/) - Learn how to have your Nightmare JS tests wait until the network is silent. 26 | - [It is *not* possible to detect and block Chrome headless](articles/not-possible-to-block-chrome-headless) - An updated exploration of techniques to avoid detection. 27 | - [JavaScript Injection with Selenium, Puppeteer, and Marionette in Chrome and Firefox](articles/javascript-injection) - An exploration of different browser automation methods to inject JavaScript into webpages. 28 | - [Making a YouTube MP3 Downloader with Exodus, FFmpeg, and AWS Lambda](articles/youtube-mp3-downloader) - Walks through the process of building a YouTube MP3 bookmarklet using AWS Lambda. 29 | - [Making Chrome Headless Undetectable](articles/making-chrome-headless-undetectable) - Bypasses some common Chrome Headless tests by injecting JavaScript into pages before the test code has a chance to run. 30 | - [No API Is the Best API — The elegant power of Power Assert](articles/power-assert) - Looks at how Power Assert can be used to get rich contextual error messages without the need to use a specialized assertion API. 31 | - [Performing Efficient Broad Crawls with the AOPIC Algorithm](articles/aopic-algorithm/) - Learn how to efficiently allocate your bandwidth to the most important pages encountered during a broad crawl. 32 | - [Recreating Python's Slice Syntax in JavaScript Using ES6 Proxies](articles/python-slicing-in-javascript) - Explores how Proxies work in JavaScript, and walks through the process of building the [Slice](https://github.com/intoli/slice) package for negative indexing and extended slicing in JavaScript. 33 | - [Running FFmpeg on AWS Lambda for 1.9% the cost of AWS Elastic Transcoder](articles/youtube-mp3-downloader) - Develops an AWS Lambda function that can transcode video and audio on the fly. 34 | - [Running Selenium with Headless Chrome](articles/running-selenium-with-headless-chrome) - Demonstrates how to run Google Chrome in headless mode using Selenium in Python. 35 | - [Running Selenium with Headless Chrome in Ruby](articles/running-selenium-with-headless-chrome-in-ruby) - A Ruby flavored version of our headless Chrome setup guide. 36 | - [Scraping User-Submitted Reviews from the Steam Store](articles/steam-scraper) - Walks through the process of building an advanced Scrapy spider for the purpose of scraping user reviews from the Steam Store. 37 | - [Understanding Neural Network Weight Initialization](articles/neural-network-initialization/) - Explores the effects of neural network weight initialization strategies. 38 | - [Using Firefox WebExtensions with Selenium](articles/firefox-extensions-with-selenium) - A guide to launching Firefox with extensions preloaded using Selenium. 39 | - [Using Google Chrome Extensions with Selenium](articles/chrome-extensions-with-selenium) - A simple guide to launching Google Chrome with extensions preloaded using Selenium. 40 | - [Using Puppeteer to Scrape Websites with Infinite Scrolling](articles/scrape-infinite-scroll/) - Learn how to scrape an infinitely scrolling data feed with a headless browser. 41 | - [Using Webpack to Render Markdown in React Apps](articles/webpack-markdown-setup/) - A short tutorial about automatically rendering Markdown documents for usage in React apps. 42 | - [Why I Still Don't Use Yarn](articles/node-package-manager-benchmarks) - Benchmarks `npm`, `pnpm`, and `yarn` for installation time and storage space given a few common project configurations. 43 | 44 | 45 | ## Honorable Mentions 46 | 47 | These are articles where we don't have any supplementary materials available, but that we still highly recommend. 48 | 49 | - [A Brief Tour of Grouping and Aggregating in Pandas](https://intoli.com/blog/pandas-aggregation/) - Learn how to use pandas to easily slice up a dataset and quickly extract useful statistics. 50 | - [Building Data Science Pipelines with Luigi and Jupyter Notebooks](https://intoli.com/blog/luigi-jupyter-notebooks/) - Learn about the Luigi task runner and how to use Jupyter notebooks in your Luigi workflows. 51 | - [Dangerous Pickles — Malicious Python Serialization](https://intoli.com/blog/dangerous-pickles/) - A light introduction to the Python pickle protocol, the Pickle Machine, and constructing malicious pickles. 52 | - [Designing The Wayback Machine Loading Animation](https://intoli.com/blog/designing-the-wayback-machine-loading-animation/) - A walkthrough of how we helped The Internet Archive design a new loading animation for the Wayback Machine. 53 | - [Fantasy Football for Hackers II](https://intoli.com/blog/average-draft-position-vs-season-projections/) — An interactive visualization of Average Draft Position vs Season Projections. 54 | - [Finding Pareto Optimal Blogs on Hacker News](https://intoli.com/blog/pareto-optimal-blogs/) - An analysis of submissions on Hacker News for the purpose of identifying high quality technology blogs. 55 | - [How Are Principal Component Analysis and Singular Value Decomposition Related?](https://intoli.com/blog/pca-and-svd/) - Explores the relationship between singular value decomposition and principal component analysis. 56 | - [How F5Bot Slurps All of Reddit](https://intoli.com/blog/f5bot/) - A guest post in which the creator of F5Bot explains in detail how it works, and how it's able to scrape million of Reddit comments per day in real-time. 57 | - [How to Test If Your Website Logs Errors to the Console](https://intoli.com/blog/nightmare-console-errors/) - Use Nightmare JS to write useful Mocha-based console output tests. 58 | - [Markov's and Chebyshev's Inequalities Explained](https://intoli.com/blog/chebyshevs-inequality/) - A look at why Chebyshev's Inequality holds true and some potential applications. 59 | - [Patching a Linux Kernel Module](https://intoli.com/blog/patching-a-linux-kernel-module/) - A case-study in debugging and patching kernel-level issues on Linux. 60 | - [Predicting Hacker News article success with neural networks and TensorFlow](https://intoli.com/blog/hacker-news-title-tool/) - An interactive tool that uses TensorFlow to predict how well submissions will do on Hacker News based on their titles. 61 | - [Running Selenium with Headless Firefox](https://intoli.com/blog/running-selenium-with-headless-firefox/) - A look at connecting Selenium WebDriver to Firefox's headless mode. 62 | - [Saving Images from a Headless Browser](https://intoli.com/blog/saving-images/) - Learn how to save any image from a headless browser in this Puppeteer tutorial. 63 | - [Terminal Recorders: A Comprehensive Guide](https://intoli.com/blog/terminal-recorders/) - An in-depth comparision of different methods to record animations of terminal sessions. 64 | -------------------------------------------------------------------------------- /articles/README.md: -------------------------------------------------------------------------------- 1 | # Intoli Articles 2 | 3 | This is just a subdirectory to house all of the article directories and keep the top-level directory from getting too cluttered. 4 | You can vist the main [README.md](../README.md) file to see an annotate index of the articles. 5 | -------------------------------------------------------------------------------- /articles/analyzing-one-million-robots-txt-files/README.md: -------------------------------------------------------------------------------- 1 | # Analyzing One Million robots.txt Files 2 | 3 | [Analyzing One Million robots.txt Files](https://intoli.com/blog/analyzing-one-million-robots-txt-files/) involves downloading 4 | 5 | - [download-top-1m.sh](download-top-1m.sh) - Downloads and extracts the list of Alexa top one million websites into a CSV file called `top-1m.csv`. 6 | - [download-robots-txt.py](download-robots-txt.py) - Reads in `top-1m.csv` and downloads the `robots.txt` file for each of them. 7 | The results are written out into a JSON Lines formatted file called `robots-txt.jl`. 8 | Note that it takes an extremely long time to download all of the `robots.txt` files (like weeks). 9 | - [rule-parser.py](rule-parser.py) - Defines a `RuleParser` class that extends `RobotExclusionRulesParser` from the [robotexclusionrulesparser](http://nikitathespider.com/python/rerp/) Python package. 10 | The main addition is a `line_count` attribute that can be used to determine the size of a file. 11 | - [summarize-data.py](summarize-data.py) - Uses `RuleParser` to analyze the `robots-txt.jl` and prints out some basic summary statistics. 12 | 13 | The analysis in the article goes into far more depth than this and performs a t-SNE dimensional reduction of the dataset based on the Levenshtein distance between files. 14 | That's a little out of scope for the supplementary materials, but you can [give it a read](https://intoli.com/blog/analyzing-one-million-robots-txt-files/) to learn more about those components of the analysis. 15 | -------------------------------------------------------------------------------- /articles/analyzing-one-million-robots-txt-files/download-robots-txt.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import json 4 | import sys 5 | from urllib import request 6 | 7 | from tqdm import tqdm 8 | 9 | output_filename = 'robots-txt.jl' if len(sys.argv) != 2 else sys.argv[1] 10 | 11 | with open('top-1m.csv', 'r') as f_in: 12 | with open(output_filename, 'a') as f_out: 13 | for line in tqdm(f_in, total=10**6): 14 | rank, domain = line.strip().split(',') 15 | url = f'http://{domain}/robots.txt' 16 | try: 17 | with request.urlopen(url, timeout=10) as r: 18 | data = r.read() 19 | text = data.decode() 20 | url = r.geturl() 21 | except: 22 | text = None 23 | 24 | f_out.write(json.dumps({ 25 | 'rank': int(rank), 26 | 'domain': domain, 27 | 'url': url, 28 | 'robots_txt': text, 29 | }) + '\n') 30 | -------------------------------------------------------------------------------- /articles/analyzing-one-million-robots-txt-files/download-top-1m.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | # Download and extract the Alexa top one million sites. 4 | wget http://s3.amazonaws.com/alexa-static/top-1m.csv.zip 5 | unzip top-1m.csv.zip 6 | 7 | # Preview the top ten websites on the list. 8 | echo "The top ten website on the list are:" 9 | head top-1m.csv.zip 10 | -------------------------------------------------------------------------------- /articles/analyzing-one-million-robots-txt-files/rule-parser.py: -------------------------------------------------------------------------------- 1 | from robotexclusionrulesparser import RobotExclusionRulesParser, _end_of_line_regex 2 | 3 | 4 | class RulesParser(RobotExclusionRulesParser): 5 | def __init__(self, domain, rank, url, robots_txt): 6 | super().__init__() 7 | self.domain = domain 8 | self.rank = rank 9 | self.url = url 10 | 11 | self.lines = [] 12 | self.comments = [] 13 | self.line_count = 0 14 | self.missing = False 15 | self.html = False 16 | 17 | self.parse(robots_txt) 18 | 19 | def parse(self, text): 20 | if not text: 21 | self.missing = True 22 | return 23 | elif ' 0: 22 | totals['sitemaps'] += 1 23 | totals['all'] += 1 24 | 25 | print('Total Counts:') 26 | print(json.dumps(totals, indent=2)) 27 | 28 | sitemap_counter = Counter() 29 | for robots_txt in load_robots_txt(): 30 | sitemap_paths = map(lambda url: urlparse(url).path, robots_txt.sitemaps) 31 | sitemap_counter.update(sitemap_paths) 32 | 33 | print('Most Common Sitemaps:') 34 | print(json.dumps(sitemap_counter.most_common(10), indent=2)) 35 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "env", 4 | "react", 5 | "stage-2" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "extends": "airbnb", 4 | "env": { 5 | "browser": true, 6 | "es6": true, 7 | "node": true 8 | }, 9 | "settings": { 10 | "import/resolver": { 11 | "webpack": { 12 | "config": "./webpack.config.js" 13 | } 14 | } 15 | }, 16 | "rules": { 17 | "class-methods-use-this": "off", 18 | "function-paren-newline": "off", 19 | "object-curly-newline": ["error", { 20 | "consistent": true, 21 | "minProperties": 5 22 | }] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/README.md: -------------------------------------------------------------------------------- 1 | # Performing Efficient Broad Crawls with the AOPIC Algorithm 2 | 3 | [Performing Efficient Broad Crawls with the AOPIC Algorithm](https://intoli.com/blog/aopic-algorithm/) article explains how the the Adaptive On-Line Page Importance Computation (AOPIC) algorithm for performing efficient broad crawls works. 4 | AOPIC is similar to Google's PageRank in that it iteratively estimates page importance based on links between pages, but it's pretty simple to understand and implement, and produces good results in practice. 5 | 6 | This folder contains the code used to generate the AOPIC simulation widgets and plots in the article. 7 | 8 | 9 | ## Widget Development 10 | 11 | To get started, clone this repository and navigate to this article's directory: 12 | 13 | ```bash 14 | git clone https://github.com/intoli/intoli-article-materials.git 15 | cd intoli-article-materials/articles/aopic-algorithm 16 | ``` 17 | 18 | Then, install the project's dependencies via Yarn 19 | 20 | ```bash 21 | yarn install 22 | ``` 23 | 24 | This will also copy the required [Cytoscape](http://js.cytoscape.org/) global dependency into `./public/`, to be served with the application. 25 | With the basic setup out of the way, you can start the development app with hot reloading with 26 | 27 | ```bash 28 | yarn start 29 | ``` 30 | 31 | You can view the app at `http://localhost:3000`, and build with `yarn build`. 32 | 33 | 34 | ## Plot Generation 35 | 36 | The scale-free network graph example and dynamic history update plots from the article were generated using code in [miscellaneous.py](./miscellaneous.py). 37 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from pprint import pprint 3 | 4 | import networkx as nx 5 | import numpy as np 6 | 7 | from matplotlib import pyplot as plt 8 | from matplotlib import collections as mc 9 | 10 | 11 | # Generate second example graph 12 | # ----------------------------- 13 | 14 | # delta_in (float) – Bias for choosing nodes from in-degree distribution. 15 | # delta_out (float) – Bias for choosing nodes from out-degree distribution. 16 | alpha = 0.05 17 | beta = 0.5 18 | D = nx.scale_free_graph( 19 | 24, 20 | delta_in=0.5, 21 | delta_out=0.2, 22 | ) 23 | plt.figure(figsize=(10, 10)) 24 | nx.draw(D, node_size=10) 25 | 26 | 27 | # Print out graph for copy-pasting to JavaScript. 28 | js_graph = [] 29 | for node in D.nodes(): 30 | adjacencies = list(D.neighbors(node)) 31 | js_graph.append({ 'id': node + 1, 'links': list(map(lambda x: x+1, adjacencies)) }) 32 | pprint(js_graph) 33 | 34 | 35 | # Generate plots explaining linear credit accumulation formulas 36 | # ------------------------------------------------------------- 37 | 38 | # S > T case 39 | # ++++++++++ 40 | 41 | plt.xkcd() 42 | 43 | fig = plt.figure(figsize=(10,6)) 44 | ax = fig.add_subplot(1, 1, 1) 45 | ax.spines['right'].set_color('none') 46 | ax.spines['top'].set_color('none') 47 | plt.xticks([]) 48 | plt.yticks([]) 49 | 50 | m = 1.2 51 | T = 50 52 | t0 = 20 53 | t1 = 30 54 | t2 = t1 + 50 55 | 56 | shift = 0.3 57 | 58 | # Plot the horiizontnal line of length S. 59 | xS = t0 + np.arange(t2-t0) + shift 60 | plt.plot(xS, [t0*m]*len(xS), color='xkcd:orange') 61 | 62 | plt.text(t0 + (t2 - t0)/2.0, m*t0 - 10, '$S$', color='xkcd:orange', fontsize=20) 63 | 64 | # Plot the horizontal line of length T. 65 | xT = t1 + np.arange(t2-t1) - shift 66 | plt.plot(xT, [t1*m]*len(xT), color='xkcd:ocean blue') 67 | 68 | plt.text(t1 + (t2 - t1)/2.0, m*t1 + 5, '$T$', color='xkcd:ocean blue', fontsize=20) 69 | 70 | 71 | # Plot vertical line corresponding to S. 72 | yHS = m*t0 + np.arange(m*(t2+shift) - m*t0) 73 | xHS = len(yHS) * [t2+shift] 74 | plt.plot(xHS, yHS, color='xkcd:orange') 75 | 76 | plt.text(t2+2, m*t0 + m*(t2-t0)/2.0 - 5, '$C(i)$', color='xkcd:orange', fontsize=20) 77 | 78 | # Plot vertical line corresponding to T. 79 | yHT = m*t1 + np.arange(m*(t2 - t1)) - shift 80 | xHT = len(yHt) * [t2 - shift] 81 | plt.plot(xHT, yHT, color='xkcd:ocean blue') 82 | 83 | plt.text(t2-6, m*t1 + m*(t2-t1)/2.0 - 5, '$H_t$', color='xkcd:ocean blue', fontsize=20) 84 | 85 | 86 | # Plot the linear history accumulation line. 87 | x = np.arange(100) 88 | y = x * m 89 | plt.plot(x, y, color='xkcd:light red') 90 | 91 | # Plot the visits. 92 | plt.scatter([t0, t2], [m*t0, m*t2], s=100, zorder=3, color='xkcd:light red') 93 | 94 | 95 | plt.xlabel('time') 96 | plt.ylabel('credits') 97 | plt.title('T < S') 98 | plt.show() 99 | 100 | 101 | # S < T case 102 | # ++++++++++ 103 | 104 | plt.xkcd() 105 | 106 | fig = plt.figure(figsize=(10,6)) 107 | ax = fig.add_subplot(1, 1, 1) 108 | ax.spines['right'].set_color('none') 109 | ax.spines['top'].set_color('none') 110 | plt.xticks([]) 111 | plt.yticks([]) 112 | 113 | m = 1.2 114 | 115 | T = 50 116 | S = 30 117 | t0 = 10 118 | t1 = t0 + S 119 | t2 = t0 + T 120 | t3 = t1 + T 121 | 122 | shift = 0.4 123 | 124 | 125 | # Plot the horizontal line for previous T. 126 | xTp = t0 + np.arange(t2-t0) 127 | plt.plot(xTp, [t0*m]*len(xTp), color='xkcd:slate') 128 | 129 | plt.text(t0 + (t2 - t0)/2.0, m*t0 + 5, '$T$', color='xkcd:slate', fontsize=20) 130 | 131 | # Plot vertical line corresponding to previous T. 132 | yHTp = m*t0 + np.arange(m*(t2 - t0)) 133 | xHTp = len(yHTp) * [t2] 134 | plt.plot(xHTp, yHTp, color='xkcd:slate') 135 | 136 | plt.text(t2-10, m*t0 + m*(t2-t0)/2.0 - 10, '$H_{t-S}$', color='xkcd:slate', fontsize=20) 137 | 138 | 139 | # Plot the horizontal line for current T. 140 | xT = t1 + np.arange(t3-t1) 141 | plt.plot(xT, [t1*m]*len(xT), color='xkcd:ocean blue') 142 | 143 | plt.text(t1 + (t3 - t1)/2.0, m*t1 + 5, '$T$', color='xkcd:ocean blue', fontsize=20) 144 | 145 | # Plot vertical line corresponding to current T. 146 | yHT = m*t1 + np.arange(m*(t3 - t1)) + shift 147 | xHT = len(yHt) * [t3 + shift] 148 | plt.plot(xHT, yHT, color='xkcd:ocean blue') 149 | 150 | plt.text(t3+2, m*t1 + m*(t3-t1)/2.0 - 5, '$H_t$', color='xkcd:ocean blue', fontsize=20) 151 | 152 | 153 | # Plot horizontal line corresponding to S. 154 | xS = t2 + np.arange(t3-t2) 155 | yS = len(xS) * [m*t2 - shift] 156 | plt.plot(xS, yS, color='xkcd:orange') 157 | 158 | plt.text(t2 + (t3-t2)/2.0 - 2, m*t2 - 10, '$S$', fontsize=20, color='xkcd:orange') 159 | 160 | # Plot vertical line corresponding to C(i). 161 | ySh = m*t2 + np.arange(m*(t3 - t2)) 162 | xSh = len(ySh) * [t3 - shift] 163 | plt.plot(xSh, ySh, color='xkcd:orange') 164 | 165 | plt.text(t3-8, m*t2 + m*(t3-t2)/2.0 - 5, '$C(i)$', color='xkcd:orange', fontsize=20) 166 | 167 | 168 | 169 | # Plot the linear history accumulation line. 170 | x = np.arange(100) 171 | y = x * m 172 | plt.plot(x, y, color='xkcd:light red') 173 | 174 | # Plot the visits 175 | plt.scatter([t2, t3], [m*t2, m*t3], s=100, zorder=3, color='xkcd:light red') 176 | 177 | plt.xlabel('time') 178 | plt.ylabel('credits') 179 | plt.title('S < T') 180 | plt.show() 181 | 182 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "build": "rimraf ./build && NODE_ENV=production webpack --config webpack.config.js", 4 | "postinstall": "mkdir -p ./public && cp ./node_modules/cytoscape/dist/cytoscape.min.js ./public/", 5 | "lint": "eslint --ignore-path .gitignore --ext js,jsx src", 6 | "start": "NODE_ENV=development webpack-dev-server --config webpack.config.js --hot" 7 | }, 8 | "devDependencies": { 9 | "babel-core": "^6.26.0", 10 | "babel-eslint": "^8.2.2", 11 | "babel-loader": "^7.1.3", 12 | "babel-preset-env": "^1.6.1", 13 | "babel-preset-react": "^6.24.1", 14 | "babel-preset-stage-2": "^6.24.1", 15 | "eslint": "^4.18.2", 16 | "eslint-config-airbnb": "^16.1.0", 17 | "eslint-loader": "^2.0.0", 18 | "eslint-plugin-import": "^2.9.0", 19 | "eslint-plugin-jsx-a11y": "^6.0.3", 20 | "eslint-plugin-react": "^7.7.0", 21 | "html-webpack-plugin": "^3.0.4", 22 | "rimraf": "^2.6.2", 23 | "uglifyjs-webpack-plugin": "1.3.0", 24 | "webpack": "^3.10.0", 25 | "webpack-cli": "^2.0.10", 26 | "webpack-dev-server": "^2.11.2" 27 | }, 28 | "dependencies": { 29 | "babel-polyfill": "^6.26.0", 30 | "classnames": "^2.2.6", 31 | "css-loader": "^1.0.0", 32 | "cytoscape": "^3.2.16", 33 | "prop-types": "^15.6.2", 34 | "react": "^16.3.2", 35 | "react-dom": "^16.3.2", 36 | "style-loader": "^0.23.0" 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/src/components/styles.css: -------------------------------------------------------------------------------- 1 | .first-step { 2 | margin-top: 30px; 3 | } 4 | 5 | .step-list { 6 | margin: 10px 0; 7 | } 8 | 9 | .step-list li { 10 | border-width: 0; 11 | } 12 | 13 | .control-wrapper { 14 | display: flex; 15 | padding-bottom: 20px; 16 | } 17 | 18 | .control-wrapper > * { 19 | margin-right: 6px; 20 | flex: 1; 21 | } 22 | 23 | .control-wrapper :last-child { 24 | margin-right: 0; 25 | } 26 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/src/graph.js: -------------------------------------------------------------------------------- 1 | export class GraphRenderer { 2 | constructor( 3 | graph, 4 | crawler, 5 | container, 6 | { 7 | layout = undefined, 8 | labelType = 'cash-history', 9 | }, 10 | ) { 11 | this.graph = graph; 12 | this.crawler = crawler; 13 | this.container = container; 14 | this.cytoscape = null; 15 | this.highlightedIds = new Set(); 16 | this.labelType = labelType; 17 | this.layout = layout; 18 | 19 | this.backgroundColor = '#ffffdb'; 20 | this.borderColor = '#3a3a3a'; 21 | this.highlightBackgroundColor = '#ffe4d9'; 22 | this.highlightColor = '#c70000'; 23 | } 24 | 25 | cashHistoryLabel(id) { 26 | return `${this.crawler.cash[id].toFixed(0)}\n${this.crawler.history[id].toFixed(0)}`; 27 | } 28 | 29 | clearHighlights() { 30 | this.selectHighlightedElements().removeClass('highlight'); 31 | this.highlightedIds = new Set(); 32 | } 33 | 34 | destroy() { 35 | this.cytoscape.destroy(); 36 | } 37 | 38 | highlightIds(ids) { 39 | ids.forEach((id) => { 40 | this.highlightedIds.add(`#${id}`); 41 | }); 42 | this.selectHighlightedElements().addClass('highlight'); 43 | } 44 | 45 | highlightNode(id) { 46 | this.highlightIds([id]); 47 | } 48 | 49 | highlightEdges(fromId, toIds) { 50 | if (toIds.length === 0) { 51 | return; 52 | } 53 | this.highlightIds( 54 | toIds.map(toId => `${fromId}-${toId}`), 55 | ); 56 | } 57 | 58 | highlightNodes(ids) { 59 | if (ids.length === 0) { 60 | return; 61 | } 62 | this.highlightIds(ids); 63 | } 64 | 65 | importanceLabel(id) { 66 | return (this.crawler.importance(id) || 0).toFixed(3); 67 | } 68 | 69 | mount() { 70 | const elements = []; 71 | this.graph.forEach(({ id, links }) => { 72 | // Nodes. 73 | elements.push({ 74 | data: { 75 | id: id.toString(), 76 | label: this.renderLabel(id), 77 | }, 78 | classes: this.highlightedIds.has(id.toString()) ? 'highlight' : undefined, 79 | }); 80 | 81 | // Edges. 82 | links.forEach((toId) => { 83 | if (toId.toString() === '0') { 84 | return; 85 | } 86 | elements.push({ 87 | data: { 88 | id: `${id}-${toId}`, 89 | source: `${id}`, 90 | target: `${toId}`, 91 | }, 92 | classes: this.highlightedIds.has(`${id}-${toId}`) ? 'highlight' : undefined, 93 | }); 94 | }); 95 | }); 96 | 97 | // eslint-disable-next-line no-undef 98 | this.cytoscape = cytoscape({ 99 | autoungrabify: true, 100 | container: this.container, 101 | elements, 102 | style: [ 103 | { 104 | selector: 'core', 105 | style: { 106 | // Hide background circle when clicking on canvas. 107 | 'active-bg-size': 0, 108 | }, 109 | }, 110 | { 111 | selector: 'node', 112 | style: { 113 | 'background-color': this.backgroundColor, 114 | 'border-color': this.borderColor, 115 | 'border-width': 2, 116 | color: this.borderColor, 117 | width: 60, 118 | height: 60, 119 | content: 'data(label)', 120 | 'overlay-opacity': 0, 121 | 'text-halign': 'center', 122 | 'text-valign': 'center', 123 | 'text-wrap': 'wrap', 124 | }, 125 | }, 126 | { 127 | selector: 'edge', 128 | style: { 129 | 'curve-style': 'bezier', 130 | 'line-color': this.borderColor, 131 | 'target-arrow-color': this.borderColor, 132 | 'target-arrow-shape': 'triangle', 133 | width: 2, 134 | }, 135 | }, 136 | { 137 | selector: 'node.highlight', 138 | style: { 139 | color: this.highlightColor, 140 | 'background-color': this.highlightBackgroundColor, 141 | 'border-color': this.highlightColor, 142 | 'border-width': 4, 143 | }, 144 | }, 145 | { 146 | selector: 'edge.highlight', 147 | style: { 148 | 'line-color': this.highlightColor, 149 | 'target-arrow-color': this.highlightColor, 150 | width: 4, 151 | }, 152 | }, 153 | ], 154 | layout: this.layout, 155 | userPanningEnabled: false, 156 | }); 157 | } 158 | 159 | renderLabel(id) { 160 | return this.labelType === 'cash-history' ? 161 | this.cashHistoryLabel(id) : this.importanceLabel(id); 162 | } 163 | 164 | selectHighlightedElements() { 165 | return this.cytoscape.$( 166 | Array.from(this.highlightedIds).join(', '), 167 | ); 168 | } 169 | 170 | setLabelType(labelType) { 171 | this.labelType = labelType; 172 | } 173 | 174 | updateLabels() { 175 | this.graph.forEach(({ id }) => { 176 | this.cytoscape.$(`#${id}`).data('label', this.renderLabel(id)); 177 | }); 178 | } 179 | } 180 | 181 | 182 | export class GraphCrawler { 183 | constructor(graph, { 184 | totalCash = 100, 185 | virtual = false, 186 | strategy = 'random', 187 | }) { 188 | this.graph = graph; 189 | this.virtual = virtual; 190 | this.strategy = strategy; 191 | this.previousId = null; 192 | 193 | this.totalHistory = 0; 194 | this.totalCash = totalCash; 195 | 196 | const nodes = graph.map(({ id }) => id); 197 | if (virtual && !nodes.includes(0)) { 198 | nodes.push(0); 199 | } 200 | 201 | this.history = {}; 202 | this.cash = {}; 203 | nodes.forEach((id) => { 204 | this.history[id] = 0; 205 | this.cash[id] = totalCash / nodes.length; 206 | }); 207 | } 208 | 209 | errorBound() { 210 | return 1 / this.totalHistory; 211 | } 212 | 213 | importance(id) { 214 | return (this.history[id] + this.cash[id]) / (this.totalHistory + this.totalCash); 215 | } 216 | 217 | next() { 218 | if (this.strategy === 'random') { 219 | const ids = Object.entries(this.cash).map(([id]) => id); 220 | const nontrivialIds = ids.filter(id => this.cash[id] > 0); 221 | const candidateIds = nontrivialIds.length > 0 ? nontrivialIds : ids; 222 | 223 | // If we have a choice, avoid picking the same element. 224 | let id = candidateIds[Math.floor(Math.random() * candidateIds.length)]; 225 | if (id === this.previousId && candidateIds.length > 1) { 226 | id = candidateIds[(candidateIds.indexOf(id) + 1) % candidateIds.length]; 227 | } 228 | this.previousId = id; 229 | return id; 230 | } 231 | 232 | // Return the node with most cash. 233 | let max = -1; 234 | let maxId = null; 235 | Object.entries(this.cash).forEach(([id, cash]) => { 236 | if (cash >= max) { 237 | max = cash; 238 | maxId = id; 239 | } 240 | }); 241 | return maxId; 242 | } 243 | 244 | allocateCash(id) { 245 | const cash = this.cash[id]; 246 | const linkedNodes = this.graph.getLinkedNodes(id); 247 | if (this.virtual && id !== 0) { 248 | linkedNodes.push(0); 249 | } 250 | 251 | const change = cash / linkedNodes.length; 252 | linkedNodes.forEach((linkedNode) => { 253 | this.cash[linkedNode] += change; 254 | }); 255 | } 256 | 257 | updateHistory(id) { 258 | const cash = this.cash[id]; 259 | this.totalHistory += cash; 260 | this.history[id] += cash; 261 | this.cash[id] = 0; 262 | } 263 | 264 | visitNode(id) { 265 | this.allocateCash(id); 266 | this.updateHistory(id); 267 | } 268 | } 269 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Minimal Frontend App 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/src/index.jsx: -------------------------------------------------------------------------------- 1 | import 'babel-polyfill'; 2 | import React from 'react'; 3 | import ReactDOM from 'react-dom'; 4 | 5 | import AOPICPlayer from './components/AOPICPlayer'; 6 | 7 | 8 | // Add a getLinkedNodes method to a node-link specification. Use this only for toy graphs. 9 | const makeGraph = (nodes) => { 10 | const graphById = {}; 11 | nodes.forEach(({ id, links }) => { 12 | graphById[id] = links; 13 | }); 14 | 15 | // eslint-disable-next-line no-param-reassign 16 | nodes.getLinkedNodes = function getLinkedNodes(nodeId) { 17 | if (nodeId === 0 || nodeId === '0') { 18 | return this.map(({ id }) => id); 19 | } 20 | return graphById[nodeId]; 21 | }; 22 | 23 | return nodes; 24 | }; 25 | 26 | 27 | const smallGraph = makeGraph([{ 28 | id: 1, 29 | links: [5], 30 | }, { 31 | id: 2, 32 | links: [1, 5], 33 | }, { 34 | id: 3, 35 | links: [6], 36 | }, { 37 | id: 4, 38 | links: [1, 7], 39 | }, { 40 | id: 5, 41 | links: [4, 3, 9], 42 | }, { 43 | id: 6, 44 | links: [3, 9], 45 | }, { 46 | id: 7, 47 | links: [5, 8], 48 | }, { 49 | id: 8, 50 | links: [5], 51 | }, { 52 | id: 9, 53 | links: [5], 54 | }]); 55 | 56 | const smallLayout = { 57 | name: 'grid', 58 | rows: 3, 59 | }; 60 | 61 | 62 | ReactDOM.render( 63 | , 64 | document.getElementById('small-aopic-example'), 65 | ); 66 | 67 | 68 | const scaleFreeGraph = makeGraph([{ 69 | id: 1, 70 | links: [2, 5], 71 | }, { 72 | id: 2, 73 | links: [3, 1, 5, 7, 11], 74 | }, { 75 | id: 3, 76 | links: [1, 2, 7, 18], 77 | }, { 78 | id: 4, 79 | links: [2], 80 | }, { 81 | id: 5, 82 | links: [2], 83 | }, { 84 | id: 6, 85 | links: [3], 86 | }, { 87 | id: 7, 88 | links: [], 89 | }, { 90 | id: 8, 91 | links: [5, 9], 92 | }, { 93 | id: 9, 94 | links: [5, 3, 2, 4, 7], 95 | }, { 96 | id: 10, 97 | links: [3, 9], 98 | }, { 99 | id: 11, 100 | links: [5], 101 | }, { 102 | id: 12, 103 | links: [5], 104 | }, { 105 | id: 13, 106 | links: [11], 107 | }, { 108 | id: 14, 109 | links: [12, 9], 110 | }, { 111 | id: 15, 112 | links: [3], 113 | }, { 114 | id: 16, 115 | links: [1], 116 | }, { 117 | id: 17, 118 | links: [5], 119 | }, { 120 | id: 18, 121 | links: [], 122 | }, { 123 | id: 19, 124 | links: [2], 125 | }, { 126 | id: 20, 127 | links: [5], 128 | }, { 129 | id: 21, 130 | links: [20], 131 | }, { 132 | id: 22, 133 | links: [5], 134 | }, { 135 | id: 23, 136 | links: [9], 137 | }, { 138 | id: 24, 139 | links: [2], 140 | }]); 141 | 142 | const scaleFreeLayout = { 143 | name: 'cose', 144 | }; 145 | 146 | ReactDOM.render( 147 | , 155 | document.getElementById('scale-free-aopic-example'), 156 | ); 157 | -------------------------------------------------------------------------------- /articles/aopic-algorithm/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | const HtmlWebpackPlugin = require('html-webpack-plugin'); 4 | const UglifyJSWebpackPlugin = require('uglifyjs-webpack-plugin'); 5 | 6 | 7 | const config = { 8 | devServer: { 9 | clientLogLevel: 'info', 10 | contentBase: './public', 11 | historyApiFallback: true, 12 | overlay: { 13 | errors: true, 14 | warnings: false, 15 | }, 16 | port: 3000, 17 | publicPath: '/', 18 | stats: { 19 | modules: false, 20 | chunks: false, 21 | }, 22 | }, 23 | devtool: 'cheap-module-source-map', 24 | entry: path.resolve(__dirname, 'src', 'index.jsx'), 25 | module: { 26 | rules: [ 27 | { 28 | test: /\.(js|jsx)$/, 29 | exclude: /node_modules/, 30 | enforce: 'pre', 31 | loader: 'eslint-loader', 32 | }, 33 | { 34 | test: /\.(js|jsx)$/, 35 | exclude: /node_modules/, 36 | loader: 'babel-loader', 37 | }, 38 | { 39 | test: /\.css$/, 40 | exclude: /node_modules/, 41 | use: [ 42 | 'style-loader', 43 | { 44 | loader: 'css-loader', 45 | options: { 46 | camelCase: true, 47 | modules: true, 48 | }, 49 | }, 50 | ], 51 | }, 52 | ], 53 | }, 54 | output: { 55 | filename: 'bundle.js', 56 | path: path.resolve(__dirname, 'build'), 57 | publicPath: '/', 58 | }, 59 | plugins: [ 60 | new HtmlWebpackPlugin({ 61 | inject: true, 62 | template: './src/index.html', 63 | }), 64 | ...(process.env.NODE_ENV === 'production' ? [new UglifyJSWebpackPlugin()] : []), 65 | ], 66 | resolve: { 67 | modules: ['./src', './node_modules'], 68 | extensions: [ 69 | '.js', 70 | '.jsx', 71 | '.react.js', 72 | ], 73 | }, 74 | watchOptions: { 75 | ignored: /build/, 76 | }, 77 | }; 78 | 79 | 80 | module.exports = config; 81 | -------------------------------------------------------------------------------- /articles/chrome-extensions-with-selenium/README.md: -------------------------------------------------------------------------------- 1 | # Using Google Chrome Extensions with Selenium 2 | 3 | [Using Google Chrome Extensions with Selenium](https://intoli.com/blog/chrome-extensions-with-selenium/) demonstrates how to launch Google Chrome with a custom extension using Python and Selenium. 4 | 5 | A custom extension that replaced every webpage that is visited with a "Successfully Installed!" message is used to verify that the extension is loading properly. 6 | This extension is defined in the [extension](extension) subdirectory. 7 | 8 | - [extension/manifest.json](extension/manifest.json) - The manifest for the extension. 9 | - [extension/content.js](extension/content.js) - The injected JavaScript. 10 | 11 | The actual code for launch Google Chrome with the unpackaged extension installed is in: 12 | 13 | - [launch_chrome.py](launch_chrome.py) - A script to launch Chrome with the extension installed and print out the test results. 14 | -------------------------------------------------------------------------------- /articles/chrome-extensions-with-selenium/extension/content.js: -------------------------------------------------------------------------------- 1 | // Wait for the DOM to completely load. 2 | document.addEventListener("DOMContentLoaded", () => { 3 | // Overwrite the contents of the body. 4 | document.body.innerHTML = '

Successfully Installed!

'; 5 | }); 6 | -------------------------------------------------------------------------------- /articles/chrome-extensions-with-selenium/extension/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | "name": "Chrome Extensions With Selenium", 4 | "version": "1.0.0", 5 | "content_scripts": [ 6 | { 7 | "matches": ["*://*/*"], 8 | "js": ["content.js"], 9 | "run_at": "document_start" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /articles/chrome-extensions-with-selenium/launch_chrome.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.common.exceptions import NoSuchElementException 3 | 4 | 5 | # Configure the necessary command-line option. 6 | options = webdriver.ChromeOptions() 7 | # Note that `chrome-extension` is the path to the unpackaged extension. 8 | options.add_argument('--load-extension=chrome-extension') 9 | 10 | # Navigate to any page... well, not just *any* page... 11 | driver = webdriver.Chrome(chrome_options=options) 12 | driver.get('https://intoli.com') 13 | 14 | # Check if the extension worked and log the result. 15 | try: 16 | header = driver.find_element_by_id('successfully-installed') 17 | print('Success! :-)') 18 | except NoSuchElementException: 19 | print('Failure! :-(') 20 | finally: 21 | # Clean up. 22 | driver.quit() 23 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/README.md: -------------------------------------------------------------------------------- 1 | # Extending CircleCI's API with a Custom Microservice on AWS Lambda 2 | 3 | [Extending CircleCI's API with a Custom Microservice on AWS Lambda](https://intoli.com/blog/circleci-artifacts/) is a tutorial that describes the process of setting up a [nodejs](https://nodejs.org/) [express](https://expressjs.com/) app as an API using Amazon's [Lambda service](https://aws.amazon.com/lambda/). 4 | The guide is comprehensive and covers everything from writing the initial express app to deploying it as an API on a custom domain name. 5 | The actual purpose of the service that is developed is to provide a mechanism to access the latest version of a build artifact from [CircleCI](https://circleci.com/). 6 | The finished API is provided free of charge to any open source projects that would like to use it, and it's accessible using the following URL pattern. 7 | 8 | ``` 9 | https://circleci.intoli.com/artifacts/github-username/repo-name/path/to/the/artifact 10 | ``` 11 | 12 | All of the resources required to deploy your own version of the proxy app are included inside of this directory. 13 | The JavaScript dependencies are included in [package.json](package.json) and [yarn.lock](yarn.lock), and they can be installed by running the following. 14 | 15 | ```bash 16 | yarn install 17 | ``` 18 | 19 | The app itself is defined in [app.js](app.js). 20 | You can run this locally on your own machine by invoking it directly with node. 21 | 22 | ```bash 23 | node app.js 24 | ``` 25 | 26 | A little bit of glue is required in order to get the script working on Lambda, and this is provided by the [lambda.js](lambda.js) file which exports a Lambda handler that will proxy requests to the app. 27 | 28 | You'll need to package the app before deploying it. 29 | This can be done using the `zip` command. 30 | 31 | ```bash 32 | zip -r circleci-artifacts.zip app.js lambda.js node_modules/ package.json 33 | ``` 34 | 35 | The above command will create a `circleci-artifacts.zip` file that contains everything necessary to run the app on Amazon Lambda. 36 | 37 | The [deploy-app.sh](deploy-app.sh) script walks through all of the steps necessary to actually deploy the app. 38 | Note, however, that it isn't really meant to be run directly. 39 | There is a point where you will need to confirm domain ownership before proceeding. 40 | You'll also need to replace `example.com` with your own domain name. 41 | 42 | The last two pieces of supporting materials are [circleci-artifacts-role-policy-document.json](circleci-artifacts-role-policy-document.json) and [circleci-artifacts-policy.json](circleci-artifacts-policy.json). 43 | These are used by the commands in [deploy-app.sh](deploy-app.sh) to specify the AWS role and policy for the service. 44 | 45 | If any of this is confusing, then by sure to check out the original [Extending CircleCI's API with a Custom Microservice on AWS Lambda](https://intoli.com/blog/circleci-artifacts/) article. 46 | This directory is meant to be a supplement to the longer explanations there rather than a replacement. 47 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/app.js: -------------------------------------------------------------------------------- 1 | const https = require('https'); 2 | 3 | const express = require('express'); 4 | const app = express(); 5 | 6 | 7 | app.get('/artifacts/:username/:project/*', (req, res) => { 8 | // Mandatory positional arguments. 9 | const file = req.params[0]; 10 | const { project, username } = req.params; 11 | 12 | // Optional query string parameters. 13 | const branch = req.query.branch || 'master'; 14 | const build = req.query.build || 'latest'; 15 | const filter = req.query.filter || 'successful'; 16 | const vcsType = req.query.vcsType || 'github'; 17 | 18 | // Construct the request options for hitting CircleCI's API. 19 | const requestOptions = { 20 | hostname: 'circleci.com', 21 | path: `/api/v1.1/project/${vcsType}/${username}/${project}` + 22 | `/${build}/artifacts?branch=${branch}&filter=${filter}`, 23 | port: 443, 24 | method: 'GET', 25 | headers: { 26 | 'Accept': 'application/json', 27 | }, 28 | }; 29 | 30 | // Make the request. 31 | https.get(requestOptions, response => { 32 | // Accumulate the response body. 33 | let body = ''; 34 | response.setEncoding('utf8'); 35 | response.on('data', data => body += data); 36 | 37 | // Process the complete response. 38 | response.on('end', () => { 39 | try { 40 | // Loop through and try to find the specified artifact. 41 | const artifacts = JSON.parse(body); 42 | for (let i = 0; i < artifacts.length; i++) { 43 | const artifact = artifacts[i]; 44 | if (artifact.path === file) { 45 | // Redirect to the artifact URL if we can find it. 46 | return res.redirect(303, artifact.url); 47 | } 48 | } 49 | // Return a 404 if there are no matching artifacts. 50 | return res.status(404).send('Not found.'); 51 | } catch (e) { 52 | console.error(e); 53 | return res.status(500).send(`Something went wrong: ${e.message}`); 54 | } 55 | }); 56 | }); 57 | }); 58 | 59 | 60 | // Run the app when the file is being run as a script. 61 | if (!module.parent) { 62 | app.listen(3000, () => console.log('Listening on port 3000!')) 63 | } 64 | 65 | // Export the app for use with lambda. 66 | module.exports = app; 67 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/circleci-artifacts-policy.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Action": [ 7 | "apigateway:*" 8 | ], 9 | "Resource": "arn:aws:apigateway:*::/*" 10 | }, 11 | { 12 | "Effect": "Allow", 13 | "Action": [ 14 | "execute-api:Invoke" 15 | ], 16 | "Resource": "arn:aws:execute-api:*:*:*" 17 | }, 18 | { 19 | "Effect": "Allow", 20 | "Action": [ 21 | "lambda:*" 22 | ], 23 | "Resource": "*" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/circleci-artifacts-role-policy-document.json: -------------------------------------------------------------------------------- 1 | { 2 | "Version": "2012-10-17", 3 | "Statement": [ 4 | { 5 | "Effect": "Allow", 6 | "Principal": { 7 | "Service": [ 8 | "apigateway.amazonaws.com", 9 | "lambda.amazonaws.com" 10 | ] 11 | }, 12 | "Action": "sts:AssumeRole" 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/deploy-app.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | 4 | # Create the initial role. 5 | response="$(aws iam create-role \ 6 | --role-name CircleciArtifactsRole \ 7 | --assume-role-policy-document file://circleci-artifacts-role-policy-document.json)" 8 | # Echo the response in the terminal. 9 | echo "${response}" 10 | # Store the role ARN for future usage. 11 | role_arn="$(jq -r .Role.Arn <<< "${response}")" 12 | 13 | 14 | # Attach the policy. 15 | aws iam put-role-policy \ 16 | --role-name CircleciArtifactsRole \ 17 | --policy-name CircleciArtifactsPolicy \ 18 | --policy-document file://circleci-artifacts-policy.json 19 | 20 | 21 | # Create the lambda function. 22 | response="$(aws lambda create-function \ 23 | --function-name CircleciArtifactsFunction \ 24 | --zip-file fileb://circleci-artifacts.zip \ 25 | --handler lambda.handler \ 26 | --runtime nodejs6.10 \ 27 | --role "${role_arn}")" 28 | # Echo the response in the terminal. 29 | echo "${response}" 30 | # Store the function ARN for future usage. 31 | function_arn="$(jq -r .FunctionArn <<< "${response}")" 32 | 33 | 34 | # Create a new API. 35 | response="$(aws apigateway create-rest-api \ 36 | --name CircleciArtifactsApi \ 37 | --endpoint-configuration types=REGIONAL)" 38 | # Echo the response in the terminal. 39 | echo "${response}" 40 | # Store the API ID for future usage. 41 | api_id="$(jq -r .id <<< "${response}")" 42 | 43 | 44 | # Fetch the API resources. 45 | response="$(aws apigateway get-resources \ 46 | --rest-api-id "${api_id}")" 47 | # Echo the response in the terminal. 48 | echo "${response}" 49 | # Store the root resource ID for future usage. 50 | root_resource_id="$(jq -r .items[0].id <<< "${response}")" 51 | 52 | 53 | # Create a new API resource. 54 | response="$(aws apigateway create-resource \ 55 | --rest-api-id "${api_id}" \ 56 | --parent-id "${root_resource_id}" \ 57 | --path-part '{proxy+}')" 58 | # Echo the response in the terminal. 59 | echo "${response}" 60 | # Store the proxy resource ID for future usage. 61 | proxy_resource_id="$(jq -r .id <<< "${response}")" 62 | 63 | 64 | # Allow GET methods on the resource. 65 | aws apigateway put-method \ 66 | --rest-api-id "${api_id}" \ 67 | --resource-id "${proxy_resource_id}" \ 68 | --http-method GET \ 69 | --authorization-type NONE 70 | 71 | 72 | # Integrate the endpoint with the Lambda function. 73 | aws apigateway put-integration \ 74 | --rest-api-id "${api_id}" \ 75 | --resource-id "${proxy_resource_id}" \ 76 | --http-method GET \ 77 | --integration-http-method POST \ 78 | --type AWS_PROXY \ 79 | --uri "arn:aws:apigateway:us-east-2:lambda:path/2015-03-31/functions/${function_arn}/invocations" \ 80 | --credentials "${role_arn}" 81 | 82 | 83 | # Deploy the API. 84 | aws apigateway create-deployment \ 85 | --rest-api-id "${api_id}" \ 86 | --stage-name v1 87 | 88 | 89 | # Request a certificate. 90 | response="$(aws acm request-certificate \ 91 | --domain-name circleci.example.com \ 92 | --validation-method DNS \ 93 | --idempotency-token 1111)" 94 | # Echo the response in the terminal. 95 | echo "${response}" 96 | # Store the certificate ID for future usage. 97 | certificate_arn="$(jq -r .CertificateArn <<< "${response}")" 98 | 99 | 100 | echo NOTE: You must actually verify your domain ownership before doing the next steps, exiting... 101 | exit 0 102 | 103 | 104 | # Create an API Gateway domain name. 105 | aws apigateway create-domain-name \ 106 | --domain-name circleci.example.com \ 107 | --endpoint-configuration types=REGIONAL \ 108 | --regional-certificate-arn "${certificate_arn}" 109 | 110 | 111 | # Map the domain to the API. 112 | aws apigateway create-base-path-mapping \ 113 | --domain-name circleci.example.com \ 114 | --rest-api-id "${api_id}" \ 115 | --stage v1 116 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/lambda.js: -------------------------------------------------------------------------------- 1 | const awsServerlessExpress = require('aws-serverless-express'); 2 | const app = require('./app'); 3 | const server = awsServerlessExpress.createServer(app); 4 | 5 | exports.handler = (event, context) => ( 6 | awsServerlessExpress.proxy(server, event, context) 7 | ); 8 | -------------------------------------------------------------------------------- /articles/circleci-artifacts/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "aws-serverless-express": "^3.0.2", 4 | "express": "^4.16.2" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /articles/clear-the-chrome-browser-cache/README.md: -------------------------------------------------------------------------------- 1 | # How to Clear the Chrome Browser Cache With Selenium WebDriver/ChromeDriver 2 | 3 | [How to Clear the Chrome Browser Cache With Selenium WebDriver/ChromeDriver](https://intoli.com/blog/clear-the-chrome-browser-cache/) develops a method to clear the Chrome browser cache in Selenium. 4 | 5 | - [clear_chrome_cache.py](clear_chrome_cache.py) - Defines the `clear_cache()` method. 6 | -------------------------------------------------------------------------------- /articles/clear-the-chrome-browser-cache/clear_chrome_cache.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.support.ui import WebDriverWait 2 | 3 | 4 | def get_clear_browsing_button(driver): 5 | """Find the "CLEAR BROWSING BUTTON" on the Chrome settings page.""" 6 | return driver.find_element_by_css_selector('* /deep/ #clearBrowsingDataConfirm') 7 | 8 | 9 | def clear_cache(driver, timeout=60): 10 | """Clear the cookies and cache for the ChromeDriver instance.""" 11 | # navigate to the settings page 12 | driver.get('chrome://settings/clearBrowserData') 13 | 14 | # wait for the button to appear 15 | wait = WebDriverWait(driver, timeout) 16 | wait.until(get_clear_browsing_button) 17 | 18 | # click the button to clear the cache 19 | get_clear_browsing_button(driver).click() 20 | 21 | # wait for the button to be gone before returning 22 | wait.until_not(get_clear_browsing_button) 23 | -------------------------------------------------------------------------------- /articles/clear-the-firefox-browser-cache/README.md: -------------------------------------------------------------------------------- 1 | # How to Clear the Firefox Browser Cache With Selenium WebDriver/geckodriver 2 | 3 | [How to Clear the Firefox Browser Cache With Selenium WebDriver/geckodriver](https://intoli.com/blog/clear-the-firefox-browser-cache/) shows how to clear the Firefox site data, including the cache and cookies, with Selenium. 4 | There are two version of the script described in detail in this article. 5 | 6 | 1. [clear_firefox_61_cache.py](clear_firefox_61_cache.py) - Written for Firefox 61 released on June 26, 2018. The script will keep working until Firefox's slow-changing preferences page is modified by Mozilla. 7 | 2. [clear_firefox_57_cache.py](clear_firefox_57_cache.py) - Written for Firefox 57 released on November 14, 2017. The script should work on version of Firefox with a similar preferences page. 8 | 9 | The scripts both work in the same way: they visit `about:preferences#privacy` and automate interactions with the interface there to clear the cache. 10 | To use either script, simply use the `clear_firefox_cache()` utility found in either script. 11 | See the [evaluate-clear-cache.py](evaluate-clear-cache.py) script for a complete usage example (it assumes Firefox 61, at least). 12 | 13 | You'll need to have `geckodriver` installed on your system (on Linux, this is done by installing it from your package manager) as well as Selenium. 14 | If you're using [clear_firefox_61_cache.py](clear_firefox_61_cache.py), make sure that you have Selenium version v3.14.0 or above installed. 15 | You can install it globablly via `pip` with: 16 | 17 | ```bash 18 | pip install --user selenium 19 | ``` 20 | 21 | or upgrade your existing version with 22 | 23 | ```bash 24 | pip install --user -U selenium 25 | ``` 26 | 27 | Then, run the script with 28 | 29 | ```bash 30 | python evaluate-clear-cache.py 31 | ``` 32 | -------------------------------------------------------------------------------- /articles/clear-the-firefox-browser-cache/clear_firefox_57_cache.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.common.alert import Alert 2 | from selenium.webdriver.support import expected_conditions as EC 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | 5 | 6 | def get_clear_cache_button(driver): 7 | return driver.find_element_by_css_selector('#clearCacheButton') 8 | 9 | 10 | def get_clear_site_data_button(driver): 11 | return driver.find_element_by_css_selector('#clearSiteDataButton') 12 | 13 | 14 | def clear_firefox_cache(driver, timeout=10): 15 | driver.get('about:preferences#privacy') 16 | wait = WebDriverWait(driver, timeout) 17 | 18 | # Click the "Clear Now" button under "Cached Web Content" 19 | wait.until(get_clear_cache_button) 20 | get_clear_cache_button(driver).click() 21 | 22 | # Click the "Clear All Data" button under "Site Data" and accept the alert 23 | wait.until(get_clear_site_data_button) 24 | get_clear_site_data_button(driver).click() 25 | 26 | wait.until(EC.alert_is_present()) 27 | alert = Alert(driver) 28 | alert.accept() 29 | -------------------------------------------------------------------------------- /articles/clear-the-firefox-browser-cache/clear_firefox_61_cache.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.common.alert import Alert 2 | from selenium.webdriver.support import expected_conditions as EC 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | 5 | 6 | dialog_selector = '#dialogOverlay-0 > groupbox:nth-child(1) > browser:nth-child(2)' 7 | 8 | accept_dialog_script = ( 9 | f"const browser = document.querySelector('{dialog_selector}');" + 10 | "browser.contentDocument.documentElement.querySelector('#clearButton').click();" 11 | ) 12 | 13 | 14 | def get_clear_site_data_button(driver): 15 | return driver.find_element_by_css_selector('#clearSiteDataButton') 16 | 17 | 18 | def get_clear_site_data_dialog(driver): 19 | return driver.find_element_by_css_selector(dialog_selector) 20 | 21 | 22 | def get_clear_site_data_confirmation_button(driver): 23 | return driver.find_element_by_css_selector('#clearButton') 24 | 25 | 26 | def clear_firefox_cache(driver, timeout=10): 27 | driver.get('about:preferences#privacy') 28 | wait = WebDriverWait(driver, timeout) 29 | 30 | # Click the "Clear Data..." button under "Cookies and Site Data". 31 | wait.until(get_clear_site_data_button) 32 | get_clear_site_data_button(driver).click() 33 | 34 | # Accept the "Clear Data" dialog by clicking on the "Clear" button. 35 | wait.until(get_clear_site_data_dialog) 36 | driver.execute_script(accept_dialog_script) 37 | 38 | # Accept the confirmation alert. 39 | wait.until(EC.alert_is_present()) 40 | alert = Alert(driver) 41 | alert.accept() 42 | -------------------------------------------------------------------------------- /articles/clear-the-firefox-browser-cache/clear_firefox_cache.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.common.alert import Alert 2 | from selenium.webdriver.support import expected_conditions as EC 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | 5 | 6 | def get_clear_cache_button(driver): 7 | return driver.find_element_by_css_selector('#clearCacheButton') 8 | 9 | 10 | def get_clear_site_data_button(driver): 11 | return driver.find_element_by_css_selector('#clearSiteDataButton') 12 | 13 | 14 | def clear_firefox_cache(driver, timeout=10): 15 | driver.get('about:preferences#privacy') 16 | wait = WebDriverWait(driver, timeout) 17 | 18 | # Click the "Clear Now" button under "Cached Web Content" 19 | wait.until(get_clear_cache_button) 20 | get_clear_cache_button(driver).click() 21 | 22 | # Click the "Clear All Data" button under "Site Data" and accept the alert 23 | wait.until(get_clear_site_data_button) 24 | get_clear_site_data_button(driver).click() 25 | 26 | wait.until(EC.alert_is_present()) 27 | alert = Alert(driver) 28 | alert.accept() 29 | -------------------------------------------------------------------------------- /articles/clear-the-firefox-browser-cache/evaluate-clear-cache.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | from selenium import webdriver 3 | from clear_firefox_61_cache import clear_firefox_cache 4 | 5 | # Visit a website that places data in local storage 6 | driver = webdriver.Firefox() 7 | driver.get('https://overstock.com') 8 | 9 | # Navigate to the preferences page to see that the cache is not empty. 10 | driver.get('about:preferences#privacy') 11 | sleep(5) 12 | 13 | # Clear the cache and hang around to manually confirm that it worked. 14 | clear_firefox_cache(driver) 15 | sleep(5) 16 | 17 | driver.quit() 18 | -------------------------------------------------------------------------------- /articles/email-spy/README.md: -------------------------------------------------------------------------------- 1 | # Email Spy 2 | 3 | [Email Spy](https://intoli.com/blog/email-spy/) is an open source browser extension that we developed for finding contact emails for various domains as you browse. 4 | This one was large enough to get it's own repository--so there aren't any supplementary materials--but you check out the full source code [here](https://github.com/sangaline/email-spy). 5 | -------------------------------------------------------------------------------- /articles/fantasy-football-for-hackers/README.md: -------------------------------------------------------------------------------- 1 | # Fantasy Football for Hackers 2 | 3 | [Fantasy Football for Hackers](https://intoli.com/blog/fantasy-football-for-hackers/) walks through the process of scraping Fantasy Football projections, calculating player and team points given custom league rules, and then simulating league dynamics to develop baseline subtracted projections. 4 | 5 | 6 | - [points.py](points.py) - Lays out how to calculate the expected player and team points given projections. 7 | - [scrape-projections.py](scrape-projections.py) - Defines methods for scraping weekly projections from [FantasySharks.com](https://fantasysharks.com). 8 | - [simulation.py](simulation.py) - Develops abstractions for players, teams, and leagues that can be used in simulations to generate baselines for players. 9 | -------------------------------------------------------------------------------- /articles/fantasy-football-for-hackers/points.py: -------------------------------------------------------------------------------- 1 | player_rules = { 2 | 'pass yds': 0.04, # Pass Yards 3 | 'pass tds': 4, # Pass Touchdowns 4 | 'int': -2, # Interceptions 5 | 'rush yds': 0.1, # Rush Yards 6 | 'rush tds': 6, # Rush Touchdowns 7 | 'rec yds': 0.1, # Reception Yards 8 | 'rec tds': 6, # Reception Touchdowns 9 | 'fum': -2, # Fumbles 10 | '10-19 fgm': 3, # 10-19 Yard Field Goal 11 | '20-29 fgm': 3, # 20-29 Yard Field Goal 12 | '30-39 fgm': 3, # 30-39 Yard Field Goal 13 | '40-49 fgm': 3, # 40-49 Yard Field Goal 14 | '50+ fgm': 5, # 50+ Yard Field Goal 15 | 'xpm': 1, # Extra Point 16 | } 17 | 18 | def calculate_player_points(performance): 19 | points = 0 20 | for rule, value in player_rules.items(): 21 | points += float(performance.get(rule, 0))*value 22 | return points 23 | 24 | 25 | team_rules = { 26 | 'scks': 1, # Sacks 27 | 'int': 2, # Interceptions 28 | 'fum': 2, # Fumbles 29 | 'deftd': 6, # Defensive Touchdowns 30 | 'safts': 2, # Safeties 31 | } 32 | 33 | def calculate_team_points(performance): 34 | points = 0 35 | for rule, value in team_rules.items(): 36 | points += float(performance[rule])*value 37 | 38 | # special brackets for "Points Against" 39 | points_against = float(performance['pts agn']) 40 | if points_against == 0: 41 | points += 10 42 | elif points_against < 7: 43 | points += 7 44 | elif points_against < 14: 45 | points += 2 46 | 47 | return points 48 | 49 | 50 | def calculate_points(performance): 51 | if performance['position'] == 'D': 52 | return calculate_team_points(performance) 53 | return calculate_player_points(performance) 54 | -------------------------------------------------------------------------------- /articles/fantasy-football-for-hackers/scrape-projections.py: -------------------------------------------------------------------------------- 1 | import time 2 | import urllib.request 3 | 4 | from bs4 import BeautifulSoup 5 | 6 | 7 | def fetch_projections_page(week, position_id): 8 | assert 1 <= week <= 17, f'Invalid week: {week}' 9 | 10 | base_url = 'https://www.fantasysharks.com/apps/bert/forecasts/projections.php' 11 | url = f'{base_url}?League=-1&Position={position_id}&scoring=1&Segment={595 + week}&uid=4' 12 | 13 | request = urllib.request.Request(url) 14 | request.add_header('User-Agent', 'projection-scraper 0.1') 15 | with urllib.request.urlopen(request) as response: 16 | return response.read() 17 | 18 | 19 | def scrape_projections(): 20 | for week in range(1, 17): 21 | position_map = { 'RB': 2, 'WR': 4, 'TE': 5, 'QB': 1, 'D': 6, 'K': 7 } 22 | for position, position_id in position_map.items(): 23 | time.sleep(5) # be polite 24 | html = fetch_projections_page(week, position_map[position]) 25 | soup = BeautifulSoup(html, 'lxml') 26 | 27 | table = soup.find('table', id='toolData') 28 | header_row = table.find('tr') 29 | column_names = [th.text for th in header_row.find_all('th')] 30 | 31 | for row in table.find_all('tr'): 32 | column_entries = [tr.text for tr in row.find_all('td')] 33 | 34 | # exclude repeated header rows and the "Tier N" rows 35 | if len(column_entries) != len(column_names): 36 | continue 37 | 38 | # extract Fantasy Shark's player id 39 | player_link = row.find('a') 40 | player_id = int(player_link['href'].split('=')[-1].strip()) 41 | 42 | # yield a dictionary of this player's weekly projection 43 | player = { 'id': player_id, 'week': week, 'position': position } 44 | for key, entry in zip(column_names, column_entries): 45 | player[key.lower()] = entry 46 | yield player 47 | -------------------------------------------------------------------------------- /articles/fantasy-football-for-hackers/simulation.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import random 3 | 4 | 5 | class Player: 6 | def __init__(self, id, position, name, team): 7 | self.id = id 8 | self.position = position 9 | self.name = name 10 | self.team = team 11 | self.points_per_week = [0]*18 12 | 13 | def add_projection(self, projection): 14 | assert self.id == projection['id'] 15 | self.points_per_week[projection['week']] = calculate_points(projection) 16 | 17 | def season_points(self): 18 | return sum(self.points_per_week) 19 | 20 | def week_points(self, week): 21 | assert 1 <= week <= 17 22 | return self.points_per_week[week] 23 | 24 | 25 | class Team: 26 | allowed_flex_positions = ['RB', 'TE', 'WR'] 27 | maximum_players = 18 28 | starting_positions = ['K', 'D', 'FLEX', 'QB', 'RB', 'RB', 'TE', 'WR', 'WR'] 29 | weeks = list(range(1, 17)) 30 | 31 | def __init__(self): 32 | self.players_by_id = {} 33 | 34 | def add_player(self, player): 35 | assert self.player_count() < self.maximum_players 36 | self.players_by_id[player.id] = player 37 | 38 | def remove_player(self, player): 39 | del self.players_by_id[player.id] 40 | 41 | def clear_players(self): 42 | self.players_by_id = {} 43 | 44 | def players(self): 45 | return self.players_by_id.values() 46 | 47 | def player_count(self): 48 | return len(self.players_by_id) 49 | 50 | def team_full(self): 51 | return self.player_count() == self.maximum_players 52 | 53 | def starters(self, week): 54 | remaining_players = sorted(self.players_by_id.values(), 55 | key=lambda player: player.week_points(week), reverse=True) 56 | starters = [] 57 | flex_count = 0 58 | for position in self.starting_positions: 59 | # we'll handle flex players later 60 | if position == 'FLEX': 61 | flex_count += 1 62 | continue 63 | # fnd the best player with this position 64 | for i, player in enumerate(remaining_players): 65 | if player.position == position: 66 | starters.append(player) 67 | del remaining_players[i] 68 | break 69 | 70 | # do the same for flex players 71 | for i in range(flex_count): 72 | for j, player in enumerate(remaining_players): 73 | if player.position in self.allowed_flex_positions: 74 | starters.append(player) 75 | del remaining_players[j] 76 | 77 | return starters 78 | 79 | def season_points(self): 80 | return sum((self.week_points(week) for week in self.weeks)) 81 | 82 | def week_points(self, week): 83 | return sum((player.week_points(week) for player in self.starters(week))) 84 | 85 | 86 | class League: 87 | number_of_teams = 12 88 | team_class = Team 89 | 90 | def __init__(self, players): 91 | self.teams = [self.team_class() for i in range(self.number_of_teams)] 92 | self.all_players = [player for player in players] 93 | self.available_players = [player for player in players] 94 | 95 | 96 | def clear_teams(self): 97 | self.available_players = [player for player in self.all_players] 98 | for team in self.teams: 99 | team.clear_players() 100 | 101 | def calculate_baselines(self): 102 | projections = defaultdict(list) 103 | for player in self.available_players: 104 | points = sum((player.week_points(week) for week in self.teams[0].weeks)) 105 | projections[player.position].append(points) 106 | return { position: max(points) for position, points in projections.items() } 107 | 108 | def optimize_teams(self, same_positions=False): 109 | # cycle through and pick up available players 110 | optimal = False 111 | trades = 0 112 | while not optimal: 113 | optimal = True 114 | for team in sorted(self.teams, key=lambda t: random.random()): 115 | for original_player in list(team.players()): 116 | # find the best trade with available players 117 | original_points = team.season_points() 118 | team.remove_player(original_player) 119 | best_player, best_points = original_player, original_points 120 | for new_player in self.available_players: 121 | if same_positions and new_player.position != original_player.position: 122 | continue 123 | # don't bother computing if the new player is strictly worse 124 | if new_player.position == original_player.position: 125 | for week in team.weeks: 126 | if new_player.week_points(week) > original_player.week_points(week): 127 | break 128 | else: 129 | # strictly worse 130 | continue 131 | 132 | team.add_player(new_player) 133 | new_points = team.season_points() 134 | if new_points > best_points: 135 | best_points = new_points 136 | best_player = new_player 137 | team.remove_player(new_player) 138 | 139 | # update the team if an available player is better 140 | if best_player != original_player: 141 | optimal = False 142 | trades += 1 143 | self.available_players.append(original_player) 144 | self.available_players.remove(best_player) 145 | team.add_player(best_player) 146 | else: 147 | team.add_player(original_player) 148 | 149 | def fill_teams_greedily(self): 150 | self.clear_teams() 151 | for i in range(self.team_class.maximum_players): 152 | for team in sorted(self.teams, key=lambda t: random.random()): 153 | best_player, best_points = None, None 154 | for new_player in self.available_players: 155 | team.add_player(new_player) 156 | new_points = team.season_points() 157 | if not best_player or new_points > best_points: 158 | best_points = new_points 159 | best_player = new_player 160 | team.remove_player(new_player) 161 | team.add_player(best_player) 162 | self.available_players.remove(best_player) 163 | 164 | def randomize_teams(self): 165 | self.clear_teams() 166 | for team in self.teams: 167 | while not team.team_full(): 168 | index = random.randint(0, len(self.available_players) - 1) 169 | team.add_player(self.available_players.pop(index)) 170 | 171 | def set_weeks(self, weeks): 172 | for team in self.teams: 173 | team.weeks = weeks 174 | -------------------------------------------------------------------------------- /articles/firefox-extensions-with-selenium/README.md: -------------------------------------------------------------------------------- 1 | # Using Firefox WebExtensions with Selenium 2 | 3 | [Using Firefox WebExtensions with Selenium](https://intoli.com/blog/firefox-extensions-with-selenium/) develops a modified version of Selenium's `webdriver.FirefoxProfile` class that supports the newer Web Extensions add-on format. 4 | 5 | - [firefox_profile.py](firefox_profile.py) - Defines the `FirefoxProfileWithWebExtensionSupport` class. 6 | -------------------------------------------------------------------------------- /articles/firefox-extensions-with-selenium/firefox_profile.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | 5 | from selenium.webdriver.firefox.firefox_profile import AddonFormatError 6 | 7 | 8 | class FirefoxProfileWithWebExtensionSupport(webdriver.FirefoxProfile): 9 | def _addon_details(self, addon_path): 10 | try: 11 | return super()._addon_details(addon_path) 12 | except AddonFormatError: 13 | try: 14 | with open(os.path.join(addon_path, 'manifest.json'), 'r') as f: 15 | manifest = json.load(f) 16 | return { 17 | 'id': manifest['applications']['gecko']['id'], 18 | 'version': manifest['version'], 19 | 'name': manifest['name'], 20 | 'unpack': False, 21 | } 22 | except (IOError, KeyError) as e: 23 | raise AddonFormatError(str(e), sys.exc_info()[2]) 24 | -------------------------------------------------------------------------------- /articles/javascript-injection/README.md: -------------------------------------------------------------------------------- 1 | # JavaScript Injection with Selenium, Puppeteer, and Marionette in Chrome and Firefox 2 | 3 | [JavaScript Injection with Selenium, Puppeteer, and Marionette in Chrome and Firefox](https://intoli.com/blog/javascript-injection/) benchmarks a variety of JavaScript injection methods to determine whether the injected code executes before or after code in the webpage being visited. 4 | 5 | The test itself is defined in: 6 | 7 | - [test-page.html](test-page.html) - The page that displays the timing results. 8 | 9 | The direct Selenium, Puppeteer, and Marionette tests are defined in: 10 | 11 | - [marionette-execute-async-script.py](marionette-execute-async-script.py) - The Marionette test script. 12 | - [puppeteer-evaluate-on-new-document.js](puppeteer-evaluate-on-new-document.js) - The Puppeteer test script. 13 | - [selenium-execute-async-script.py](selenium-execute-async-script.py) - The Selnium test script. 14 | 15 | The Web Extension for script injection is then defined in the [extension](extension) subdirectory. 16 | 17 | - [./extension/injected-javascript.js](./extension/injected-javascript.js) - The script to be injected. 18 | - [./extension/manifest.json](./extension/manifest.json) - The manifest for the extension. 19 | 20 | The script for performing the extension test is then located in: 21 | 22 | - [selenium-custom-web-extension.py](selenium-custom-web-extension.py) - Launches Chrome and Firefox with the extension loaded and performs the test. 23 | 24 | Finally, there is a test that uses [mitmproxy](https://mitmproxy.org/) to inject a script tag. 25 | This consists of two parts: 26 | 27 | - [mitm-injector.py](mitm-injector.py) - The injection script. 28 | - [selenium-mitmproxy.py](selenium-mitmproxy.py) - The test script that goes through the proxy. 29 | -------------------------------------------------------------------------------- /articles/javascript-injection/extension/injected-javascript.js: -------------------------------------------------------------------------------- 1 | ((time) => { 2 | const handleDocumentLoaded = () => { 3 | document.getElementById("injected-time").innerHTML = time; 4 | }; 5 | if (document.readyState === "loading") { 6 | document.addEventListener("DOMContentLoaded", handleDocumentLoaded); 7 | } else { 8 | handleDocumentLoaded(); 9 | } 10 | })(Date.now()); 11 | -------------------------------------------------------------------------------- /articles/javascript-injection/extension/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | "name": "JavaScript Injection Test Extension", 4 | "version": "1.0.0", 5 | "applications": { 6 | "gecko": { 7 | "id": "javascript-injection@intoli.com" 8 | } 9 | }, 10 | "content_scripts": [ 11 | { 12 | "matches": ["*://*/*"], 13 | "js": ["injected-javascript.js"], 14 | "run_at": "document_start" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /articles/javascript-injection/marionette-execute-async-script.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python2 2 | 3 | import os 4 | import time 5 | 6 | from marionette_driver.marionette import Marionette 7 | 8 | 9 | # The JavaScript that we want to inject. 10 | # `arguments[0]` is how Selenium passes in the callback for `execute_async_script()`. 11 | injected_javascript = ( 12 | 'const time = Date.now();' 13 | 'const callback = arguments[0];' 14 | 'const handleDocumentLoaded = () => {' 15 | ' document.getElementById("injected-time").innerHTML = time;' 16 | ' callback();' 17 | '};' 18 | 'if (document.readyState === "loading") {' 19 | ' document.addEventListener("DOMContentLoaded", handleDocumentLoaded);' 20 | '} else {' 21 | ' handleDocumentLoaded();' 22 | '}' 23 | ) 24 | 25 | # The location of the Firefox binary, will depend on system. 26 | # Be careful to use the actual binary and not a wrapper script. 27 | binary = '/usr/lib/firefox/firefox' 28 | 29 | # Loop through the four different configurations. 30 | for mode in ['headless', 'graphical']: 31 | # Set up the client with the appropriate settings. 32 | if mode == 'headless': 33 | os.environ['MOZ_HEADLESS'] = '1' 34 | else: 35 | os.environ.pop('MOZ_HEADLESS', None) 36 | client = Marionette('localhost', bin=binary, port=2828) 37 | client.start_session() 38 | 39 | 40 | # Navigate to the test page and inject the JavaScript. 41 | client.navigate('https://intoli.com/blog/javascript-injection/test-page.html') 42 | client.execute_async_script(injected_javascript) 43 | 44 | # Save the results as an image. 45 | filename = os.path.join('img', 46 | 'marionette-execute-async-scripy-firefox-%s-results.png' % mode) 47 | with open(filename, 'wb') as f: 48 | f.write(client.screenshot(format='binary')) 49 | print 'Saved "%s".' % filename 50 | 51 | # Cleanup the client before the next test. 52 | client.cleanup() 53 | -------------------------------------------------------------------------------- /articles/javascript-injection/mitm-injector.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from mitmproxy import ctx 3 | 4 | 5 | # Load in the javascript to inject. 6 | with open('extension/injected-javascript.js', 'r') as f: 7 | injected_javascript = f.read() 8 | 9 | def response(flow): 10 | # Only process 200 responses of HTML content. 11 | if flow.response.headers['Content-Type'] != 'text/html': 12 | return 13 | if not flow.response.status_code == 200: 14 | return 15 | 16 | # Inject a script tag containing the JavaScript. 17 | html = BeautifulSoup(flow.response.text, 'lxml') 18 | container = html.head or html.body 19 | if container: 20 | script = html.new_tag('script', type='text/javascript') 21 | script.string = injected_javascript 22 | container.insert(0, script) 23 | flow.response.text = str(html) 24 | 25 | ctx.log.info('Successfully injected the `injected-javascript.js` script.') 26 | -------------------------------------------------------------------------------- /articles/javascript-injection/puppeteer-evaluate-on-new-document.js: -------------------------------------------------------------------------------- 1 | const puppeteer = require('puppeteer'); 2 | 3 | const runTest = async (mode) => { 4 | const browser = await puppeteer.launch({ 5 | args: ['--no-sandbox'], 6 | headless: mode === 'headless', 7 | }); 8 | const page = await browser.newPage(); 9 | await page.evaluateOnNewDocument(() => { 10 | const time = Date.now(); 11 | const handleDocumentLoaded = () => { 12 | document.getElementById("injected-time").innerHTML = time; 13 | }; 14 | if (document.readyState === "loading") { 15 | document.addEventListener("DOMContentLoaded", handleDocumentLoaded); 16 | } else { 17 | handleDocumentLoaded(); 18 | } 19 | }); 20 | await page.goto('https://intoli.com/blog/javascript-injection/test-page.html'); 21 | const filename = `img/puppeteer-evaluate-on-new-document-chrome-${mode}.png`; 22 | await page.screenshot({ path: filename }); 23 | console.log(`Saved "${filename}".`); 24 | 25 | await browser.close(); 26 | }; 27 | 28 | (async () => { 29 | await runTest('headless'); 30 | await runTest('graphical'); 31 | })(); 32 | -------------------------------------------------------------------------------- /articles/javascript-injection/selenium-custom-web-extension.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | 5 | from selenium import webdriver 6 | from selenium.webdriver.firefox.firefox_profile import AddonFormatError 7 | 8 | 9 | # This must be the developer edition to use an unsigned extension. 10 | firefox_binary = '/usr/bin/firefox-developer-edition' 11 | extension_directory = 'extension' 12 | 13 | 14 | # Patch in support for WebExtensions in Firefox. 15 | # See: https://intoli.com/blog/firefox-extensions-with-selenium/ 16 | class FirefoxProfileWithWebExtensionSupport(webdriver.FirefoxProfile): 17 | def _addon_details(self, addon_path): 18 | try: 19 | return super()._addon_details(addon_path) 20 | except AddonFormatError: 21 | try: 22 | with open(os.path.join(addon_path, 'manifest.json'), 'r') as f: 23 | manifest = json.load(f) 24 | return { 25 | 'id': manifest['applications']['gecko']['id'], 26 | 'version': manifest['version'], 27 | 'name': manifest['name'], 28 | 'unpack': False, 29 | } 30 | except (IOError, KeyError) as e: 31 | raise AddonFormatError(str(e), sys.exc_info()[2]) 32 | webdriver.FirefoxProfile = FirefoxProfileWithWebExtensionSupport 33 | 34 | 35 | # Loop through the four different configurations. 36 | for browser in ['chrome', 'firefox']: 37 | for mode in ['headless', 'graphical']: 38 | # Set up the driver with the appropriate settings. 39 | if browser == 'chrome': 40 | options = webdriver.ChromeOptions() 41 | if mode == 'headless': 42 | options.add_argument('headless') 43 | options.add_argument(f'load-extension={extension_directory}') 44 | driver = webdriver.Chrome(chrome_options=options) 45 | elif browser == 'firefox': 46 | if mode == 'headless': 47 | os.environ['MOZ_HEADLESS'] = '1' 48 | elif mode == 'graphical': 49 | os.environ.pop('MOZ_HEADLESS', None) 50 | profile = webdriver.FirefoxProfile() 51 | profile.add_extension(extension_directory) 52 | driver = webdriver.Firefox(profile, firefox_binary=firefox_binary) 53 | 54 | # Navigate to the test page and let the extension do its thing. 55 | driver.get('https://intoli.com/blog/javascript-injection/test-page.html') 56 | 57 | # Save the results as an image. 58 | os.makedirs('img', exist_ok=True) 59 | filename = os.path.join('img', 60 | f'selenium-custom-web-extension-{browser}-{mode}-results.png') 61 | driver.get_screenshot_as_file(filename) 62 | print(f'Saved "{filename}".') 63 | 64 | # Cleanup the driver before the next test. 65 | driver.quit() 66 | -------------------------------------------------------------------------------- /articles/javascript-injection/selenium-execute-async-script.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from selenium import webdriver 4 | 5 | 6 | # The JavaScript that we want to inject. 7 | # `arguments[0]` is how Selenium passes in the callback for `execute_async_script()`. 8 | injected_javascript = ( 9 | 'const time = Date.now();' 10 | 'const callback = arguments[0];' 11 | 'const handleDocumentLoaded = () => {' 12 | ' document.getElementById("injected-time").innerHTML = time;' 13 | ' callback();' 14 | '};' 15 | 'if (document.readyState === "loading") {' 16 | ' document.addEventListener("DOMContentLoaded", handleDocumentLoaded);' 17 | '} else {' 18 | ' handleDocumentLoaded();' 19 | '}' 20 | ) 21 | 22 | 23 | # Loop through the four different configurations. 24 | for browser in ['chrome', 'firefox']: 25 | for mode in ['headless', 'graphical']: 26 | # Set up the driver with the appropriate settings. 27 | if browser == 'chrome': 28 | options = webdriver.ChromeOptions() 29 | if mode == 'headless': 30 | options.add_argument('headless') 31 | driver = webdriver.Chrome(chrome_options=options) 32 | elif browser == 'firefox': 33 | if mode == 'headless': 34 | os.environ['MOZ_HEADLESS'] = '1' 35 | elif mode == 'graphical': 36 | os.environ.pop('MOZ_HEADLESS', None) 37 | driver = webdriver.Firefox() 38 | 39 | # Navigate to the test page and inject the JavaScript. 40 | driver.get('https://intoli.com/blog/javascript-injection/test-page.html') 41 | driver.execute_async_script(injected_javascript) 42 | 43 | # Save the results as an image. 44 | os.makedirs('img', exist_ok=True) 45 | filename = os.path.join('img', 46 | f'selenium-execute-async-script-{browser}-{mode}-results.png') 47 | driver.get_screenshot_as_file(filename) 48 | print(f'Saved "{filename}".') 49 | 50 | # Cleanup the driver before the next test. 51 | driver.quit() 52 | -------------------------------------------------------------------------------- /articles/javascript-injection/selenium-mitmproxy.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from selenium import webdriver 4 | 5 | 6 | # The proxy settings. 7 | proxy_host = 'localhost' 8 | proxy_port = 8080 9 | 10 | # Loop through the four different configurations. 11 | for browser in ['chrome', 'firefox']: 12 | for mode in ['headless', 'graphical']: 13 | # Set up the driver with the appropriate settings. 14 | if browser == 'chrome': 15 | # Enable headless mode. 16 | options = webdriver.ChromeOptions() 17 | if mode == 'headless': 18 | options.add_argument('--headless') 19 | 20 | # Specify the proxy. 21 | options.add_argument('--proxy-server=%s:%s' % (proxy_host, proxy_port)) 22 | 23 | # Launch Chrome. 24 | driver = webdriver.Chrome(chrome_options=options) 25 | 26 | elif browser == 'firefox': 27 | # Enable headless mode. 28 | if mode == 'headless': 29 | os.environ['MOZ_HEADLESS'] = '1' 30 | elif mode == 'graphical': 31 | os.environ.pop('MOZ_HEADLESS', None) 32 | 33 | firefox_profile = webdriver.FirefoxProfile() 34 | # Specify to use manual proxy configuration. 35 | firefox_profile.set_preference('network.proxy.type', 1) 36 | # Set the host/port. 37 | firefox_profile.set_preference('network.proxy.http', proxy_host) 38 | firefox_profile.set_preference('network.proxy.https_port', proxy_port) 39 | firefox_profile.set_preference('network.proxy.ssl', proxy_host) 40 | firefox_profile.set_preference('network.proxy.ssl_port', proxy_port) 41 | 42 | # Launch Firefox. 43 | driver = webdriver.Firefox(firefox_profile=firefox_profile) 44 | 45 | # Navigate to the test page and inject the JavaScript. 46 | driver.get('https://intoli.com/blog/javascript-injection/test-page.html') 47 | 48 | # Save the results as an image. 49 | os.makedirs('img', exist_ok=True) 50 | filename = os.path.join('img', 51 | f'selenium-mitmproxy-{browser}-{mode}-results.png') 52 | driver.get_screenshot_as_file(filename) 53 | print(f'Saved "{filename}".') 54 | 55 | # Cleanup the driver before the next test. 56 | driver.quit() 57 | -------------------------------------------------------------------------------- /articles/javascript-injection/test-page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 10 | 11 | 12 | 13 | 14 |

Inlined Script Time:

15 |

Injected Script Time:

16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/README.md: -------------------------------------------------------------------------------- 1 | # How to Run a Keras Model in the Browser with Keras.js 2 | 3 | [How to Run a Keras Model in the Browser with Keras.js](https://intoli.com/blog/keras-weight-transfer) is a worked out end-to-end example explaining how to export weights from a [Keras](https://keras.io/) model, and then import and use them in the browser via [keras-js](https://github.com/transcranial/keras-js). 4 | Since the article was originally written, the `keras-js` has improved their utilities and documentation, so the only difficulty is in using compatible versions of the packages involved in this process. 5 | 6 | 7 | ## Export the Weights 8 | 9 | The model in question ([neural-net/mnist-cnn.py](neural-net/mnist-cnn.py)) is a version of [Keras's sample MNIST cassifier](https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py) modified to train quickly (by restricting the data and limiting the training to one epoch). 10 | To get started, you need to first export the weights from this model. 11 | Clone this repo, then `cd` to the [nerual-net](neural-net/) folder, and start and activate a new virtualenv: 12 | 13 | ```bash 14 | cd neural-net 15 | virtualenv env 16 | . env/bin/activate 17 | ``` 18 | 19 | Install the python requirements: 20 | 21 | ```bash 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | Train and save the model to `model.h5`: 26 | 27 | ```bash 28 | python ./mnist-cnn.py 29 | ``` 30 | 31 | Download [a compatible version of the model preparation script and its dependency](https://github.com/transcranial/keras-js/tree/a5e6d2cc330ec8d979310bd17a47f07882fac778/python) from the keras-js repo: 32 | 33 | ```bash 34 | bash ./download-encoder.sh 35 | ``` 36 | 37 | Finally, prepare the model with: 38 | 39 | ```bash 40 | python ./encoder.py -q model.h5 41 | ``` 42 | 43 | This will produce a `model.bin` file that can be used in the `filepath` optoin of a [`keras-js` Model](https://transcranial.github.io/keras-js-docs/usage/). 44 | I used Python 3.6 for this example, but things should work with Python 2 as well. 45 | 46 | 47 | ## Set Up the Frontend 48 | 49 | To actually use these files, you need to run the [frontend/src/index.js](frontend/src/index.js) script in the browser. 50 | The included [webpack](https://webpack.js.org/) config can help you get started. 51 | First, install the project's JavaScript build and runtime requirements with 52 | 53 | ```bash 54 | cd frontend/ 55 | yarn install 56 | ``` 57 | 58 | Make sure that `model.bin` from above exists, and execute 59 | 60 | ```bash 61 | yarn watch 62 | ``` 63 | 64 | to start a live-reloading development server accessible at `localhost:3000`. 65 | Visiting that address in a browser like Chrome should go from showing `Loading...` to 66 | 67 | ```literal 68 | Predicted 3 with probability 0.297. 69 | ``` 70 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "env", 4 | "stage-2" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "extends": "airbnb", 4 | "env": { 5 | "browser": true, 6 | "es6": true, 7 | "node": true 8 | }, 9 | "settings": { 10 | "import/resolver": { 11 | "webpack": { 12 | "config": "./webpack.config.js" 13 | } 14 | } 15 | }, 16 | "rules": { 17 | "class-methods-use-this": "off", 18 | "function-paren-newline": "off", 19 | "object-curly-newline": ["error", { 20 | "consistent": true, 21 | "minProperties": 5 22 | }] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "lint": "eslint --ignore-path .gitignore --ext js,jsx src", 4 | "watch": "NODE_ENV=development webpack-dev-server --config webpack.config.js --hot" 5 | }, 6 | "devDependencies": { 7 | "babel-core": "^6.26.0", 8 | "babel-eslint": "^8.2.2", 9 | "babel-loader": "^7.1.3", 10 | "babel-plugin-import": "^1.6.5", 11 | "babel-preset-env": "^1.6.1", 12 | "babel-preset-stage-2": "^6.24.1", 13 | "copy-webpack-plugin": "^4.5.0", 14 | "eslint": "^4.18.2", 15 | "eslint-config-airbnb": "^16.1.0", 16 | "eslint-loader": "^2.0.0", 17 | "eslint-plugin-import": "^2.9.0", 18 | "eslint-plugin-jsx-a11y": "^6.0.3", 19 | "eslint-plugin-react": "^7.7.0", 20 | "html-webpack-plugin": "^3.0.4", 21 | "url-loader": "^1.0.1", 22 | "webpack": "3.10.0", 23 | "webpack-cli": "2.0.9", 24 | "webpack-dev-server": "2.11.2" 25 | }, 26 | "dependencies": { 27 | "keras-js": "1.0.3" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Keras Weight Transfer 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/src/index.js: -------------------------------------------------------------------------------- 1 | import { Model } from 'keras-js'; 2 | import sample from './sample'; 3 | 4 | 5 | document.addEventListener('DOMContentLoaded', () => { 6 | document.write('Loading...'); 7 | }); 8 | 9 | // Make sure to copy model.bin to the public directory. 10 | const model = new Model({ 11 | filepath: 'model.bin', 12 | }); 13 | 14 | // Perform a prediction and write the results to the console. 15 | model.ready() 16 | .then(() => model.predict({ 17 | input: new Float32Array(sample), 18 | })) 19 | .then(({ output }) => { 20 | let predictionProbability = -1; 21 | let predictedDigit = null; 22 | Object.entries(output).forEach(([digit, probability]) => { 23 | if (probability > predictionProbability) { 24 | predictionProbability = probability; 25 | predictedDigit = digit; 26 | } 27 | }); 28 | document.write( 29 | `Predicted ${predictedDigit} with probability ${predictionProbability.toFixed(3)}.`, 30 | ); 31 | }) 32 | .catch((error) => { 33 | console.log(error); 34 | }); 35 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/src/sample.js: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line max-len 2 | export default [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4509804, 0.4745098, 0.9137255, 0.85490197, 0.4745098, 0.4745098, 0.4745098, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22352941, 0.94509804, 0.9843137, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.93333334, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05882353, 0.36862746, 0.67058825, 0.9411765, 0.99215686, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.46666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16078432, 0.9882353, 0.9882353, 0.9882353, 0.99215686, 0.85490197, 0.6745098, 0.6745098, 0.53333336, 0.15294118, 0.7254902, 0.9882353, 0.9882353, 0.46666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11764706, 0.7254902, 0.7254902, 0.7254902, 0.20784314, 0.12156863, 0.0, 0.0, 0.0, 0.05882353, 0.7607843, 0.9882353, 0.9882353, 0.46666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.19607843, 0.78431374, 0.9882353, 0.9882353, 0.9764706, 0.36862746, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13333334, 0.6509804, 0.9882353, 0.9882353, 0.9137255, 0.29411766, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.047058824, 0.3019608, 0.93333334, 0.9882353, 0.9882353, 0.83137256, 0.3254902, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03137255, 0.05490196, 0.5764706, 0.7490196, 0.9882353, 0.9882353, 0.972549, 0.8235294, 0.12941177, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57254905, 0.9882353, 0.99215686, 0.9882353, 0.9882353, 0.9882353, 0.7254902, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5764706, 0.99215686, 1.0, 0.99215686, 0.99215686, 0.99215686, 0.85490197, 0.37254903, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33333334, 0.5686275, 0.57254905, 0.5686275, 0.94509804, 0.9882353, 0.9882353, 0.9764706, 0.29803923, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28627452, 0.92941177, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8352941, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09803922, 0.8745098, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11372549, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09019608, 0.77254903, 0.9882353, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.078431375, 0.654902, 0.84313726, 0.5137255, 0.11764706, 0.0, 0.0, 0.0, 0.0, 0.08627451, 0.16078432, 0.78431374, 0.9882353, 0.9882353, 0.94509804, 0.72156864, 0.09803922, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4745098, 0.9882353, 0.9882353, 0.9882353, 0.8901961, 0.627451, 0.627451, 0.627451, 0.627451, 0.8156863, 0.99215686, 0.9882353, 0.9882353, 0.8980392, 0.3764706, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6313726, 0.98039216, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.99215686, 0.9372549, 0.7411765, 0.06666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.41960785, 0.6313726, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.47058824, 0.023529412, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; 3 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/frontend/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | const CopyWebpackPlugin = require('copy-webpack-plugin') 4 | const HtmlWebpackPlugin = require('html-webpack-plugin'); 5 | 6 | 7 | const config = { 8 | devServer: { 9 | clientLogLevel: 'info', 10 | contentBase: path.join(__dirname, 'build'), 11 | historyApiFallback: true, 12 | overlay: { 13 | errors: true, 14 | warnings: false, 15 | }, 16 | port: 3000, 17 | publicPath: '/', 18 | stats: { 19 | modules: false, 20 | chunks: false, 21 | }, 22 | }, 23 | devtool: 'cheap-module-source-map', 24 | entry: path.join(__dirname, 'src', 'index.js'), 25 | externals: { 26 | fs: 'empty', 27 | }, 28 | module: { 29 | rules: [ 30 | { 31 | test: /\.(js|jsx)$/, 32 | exclude: /node_modules/, 33 | enforce: 'pre', 34 | loader: 'eslint-loader', 35 | }, 36 | { 37 | test: /\.(js|jsx)$/, 38 | exclude: /node_modules/, 39 | loader: 'babel-loader', 40 | }, 41 | ], 42 | }, 43 | output: { 44 | filename: 'bundle.js', 45 | path: path.resolve(__dirname, 'build'), 46 | publicPath: '/', 47 | }, 48 | plugins: [ 49 | new HtmlWebpackPlugin({ 50 | inject: true, 51 | template: './src/index.html', 52 | }), 53 | new CopyWebpackPlugin([ 54 | { 55 | from: '../neural-net/model.bin', 56 | to: path.join(__dirname, 'build'), 57 | }, 58 | ]), 59 | ], 60 | watchOptions: { 61 | ignored: /build/, 62 | }, 63 | }; 64 | 65 | 66 | module.exports = config; 67 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/neural-net/download-encoder.sh: -------------------------------------------------------------------------------- 1 | # Download encoder.py and its dependency from a commit of keras-js compatible with the code in this 2 | # folder. These are used for preparing an exported Keras model for keras-js. Run with 3 | # 4 | # python encoder.py -p model.h5 5 | # 6 | # to produce an ingestable model.bin file. 7 | 8 | curl https://raw.githubusercontent.com/transcranial/keras-js/a5e6d2cc330ec8d979310bd17a47f07882fac778/python/encoder.py -o encoder.py 9 | curl https://raw.githubusercontent.com/transcranial/keras-js/a5e6d2cc330ec8d979310bd17a47f07882fac778/python/model_pb2.py -o model_pb2.py 10 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/neural-net/mnist-cnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | COPYRIGHT 3 | 4 | All contributions by François Chollet: 5 | Copyright (c) 2015, François Chollet. 6 | All rights reserved. 7 | 8 | All contributions by Google: 9 | Copyright (c) 2015, Google, Inc. 10 | All rights reserved. 11 | 12 | All contributions by Microsoft: 13 | Copyright (c) 2017, Microsoft, Inc. 14 | All rights reserved. 15 | 16 | All other contributions: 17 | Copyright (c) 2015 - 2017, the respective contributors. 18 | All rights reserved. 19 | 20 | Each contributor holds copyright over their respective contributions. 21 | The project versioning (Git) records all such contribution source information. 22 | 23 | LICENSE 24 | 25 | The MIT License (MIT) 26 | 27 | Permission is hereby granted, free of charge, to any person obtaining a copy 28 | of this software and associated documentation files (the "Software"), to deal 29 | in the Software without restriction, including without limitation the rights 30 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 31 | copies of the Software, and to permit persons to whom the Software is 32 | furnished to do so, subject to the following conditions: 33 | 34 | The above copyright notice and this permission notice shall be included in all 35 | copies or substantial portions of the Software. 36 | 37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 39 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 40 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 43 | SOFTWARE. 44 | 45 | --- 46 | 47 | This is a modification of a Keras example CNN script [1] for the purposes of the 48 | 'How to Run a Keras Model in the Browser with Keras.js' article [2] published on the Intoli blog. 49 | 50 | [1]: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py 51 | [2]: https://intoli.com/blog/keras-weight-transfer/ 52 | """ 53 | from __future__ import print_function 54 | import keras 55 | from keras.datasets import mnist 56 | from keras.models import Sequential 57 | from keras.layers import Dense, Dropout, Flatten 58 | from keras.layers import Conv2D, MaxPooling2D 59 | from keras import backend as K 60 | 61 | batch_size = 128 62 | num_classes = 10 63 | epochs = 1 # Note that this script uses only one epoch. 64 | 65 | # Load, restrict, and prepare data. 66 | img_rows, img_cols = 28, 28 67 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 68 | 69 | x_train = x_train[:1280] 70 | y_train = y_train[:1280] 71 | 72 | x_test = x_test[:512] 73 | y_test = y_test[:512] 74 | 75 | if K.image_data_format() == 'channels_first': 76 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 77 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 78 | input_shape = (1, img_rows, img_cols) 79 | else: 80 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 81 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 82 | input_shape = (img_rows, img_cols, 1) 83 | 84 | x_train = x_train.astype('float32') 85 | x_test = x_test.astype('float32') 86 | x_train /= 255 87 | x_test /= 255 88 | 89 | print('x_train shape:', x_train.shape) 90 | print(x_train.shape[0], 'train samples') 91 | print(x_test.shape[0], 'test samples') 92 | print(epochs, 'epochs') 93 | 94 | y_train = keras.utils.to_categorical(y_train, num_classes) 95 | y_test = keras.utils.to_categorical(y_test, num_classes) 96 | 97 | # Define, compile, and train model. 98 | model = Sequential() 99 | model.add(Conv2D(32, kernel_size=(3, 3), 100 | activation='relu', 101 | input_shape=input_shape)) 102 | model.add(Conv2D(64, (3, 3), activation='relu')) 103 | model.add(MaxPooling2D(pool_size=(2, 2))) 104 | model.add(Dropout(0.25)) 105 | model.add(Flatten()) 106 | model.add(Dense(128, activation='relu')) 107 | model.add(Dropout(0.5)) 108 | model.add(Dense(num_classes, activation='softmax')) 109 | 110 | model.compile(loss=keras.losses.categorical_crossentropy, 111 | optimizer=keras.optimizers.Adadelta(), 112 | metrics=['accuracy']) 113 | 114 | model.fit(x_train, y_train, 115 | batch_size=batch_size, 116 | epochs=epochs, 117 | verbose=1, 118 | validation_data=(x_test, y_test)) 119 | score = model.evaluate(x_test, y_test, verbose=0) 120 | 121 | print('Test loss:', score[0]) 122 | print('Test accuracy:', score[1]) 123 | print('-' * 80) 124 | 125 | # Export the trained model. 126 | model.save('model.h5') 127 | -------------------------------------------------------------------------------- /articles/keras-weight-transfer/neural-net/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.1.10 2 | astor==0.6.2 3 | bleach==1.5.0 4 | gast==0.2.0 5 | grpcio==1.10.0 6 | h5py==2.7.1 7 | html5lib==0.9999999 8 | Keras==2.1.2 9 | Markdown==2.6.11 10 | numpy==1.14.1 11 | protobuf==3.5.1 12 | PyYAML==3.12 13 | scipy==1.0.0 14 | six==1.11.0 15 | tensorboard==1.6.0 16 | tensorflow==1.6.0 17 | termcolor==1.1.0 18 | Werkzeug==0.14.1 19 | -------------------------------------------------------------------------------- /articles/making-chrome-headless-undetectable/README.md: -------------------------------------------------------------------------------- 1 | # Making Chrome Headless Undetectable 2 | 3 | [Making Chrome Headless Undetectable](https://intoli.com/blog/making-chrome-headless-undetectable/) is a response to a set of JavaScript based tests that were floating around the internet as a way to block users of headless browser. 4 | It shows that these tests have high false positive rates and can be easily bypassed. 5 | 6 | The tests were implemented as a web page that displays the results in a visual table. 7 | The code for the tests are located in: 8 | 9 | - [chrome-headless-test.html](chrome-headless-test.html) - The page that defines the results table and imports the test script. 10 | - [chrome-headless-test.js](chrome-headless-test.js) - The associated JavaScript that performs the actual tests and populates the table. 11 | 12 | The tests are then bypasses by injecting JavaScript into the page before it loads. 13 | 14 | - [injected-test-bypasses.js](injected-test-bypasses.js) - The test bypasses that are developed in the article. 15 | - [inject.py](inject.py) - A [mitmproxy](https://mitmproxy.org/) script for injecting `injected-test-bypasses.js`. 16 | - [test-headless.js](test-headless.js) - A browser automation script written using the [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/) which visits the test page and records a screenshot of the results. 17 | 18 | Details for running the proxy and installing the dependencies can be found in [the original article](https://intoli.com/blog/making-chrome-headless-undetectable/). 19 | -------------------------------------------------------------------------------- /articles/making-chrome-headless-undetectable/chrome-headless-test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Chrome Headless Detection 4 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
Test NameResult
User Agent
Plugins Length
Languages
WebGL Vendor
WebGL Renderer
Hairline Feature
Broken Image Dimensions
53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /articles/making-chrome-headless-undetectable/chrome-headless-test.js: -------------------------------------------------------------------------------- 1 | // User-Agent Test 2 | const userAgentElement = document.getElementById('user-agent'); 3 | userAgentElement.innerHTML = window.navigator.userAgent; 4 | if (/HeadlessChrome/.test(window.navigator.userAgent)) { 5 | userAgentElement.classList.add('failed'); 6 | } 7 | 8 | // Plugins Length Test 9 | const pluginsLengthElement = document.getElementById('plugins-length'); 10 | pluginsLengthElement.innerHTML = navigator.plugins.length; 11 | if (navigator.plugins.length === 0) { 12 | pluginsLengthElement.classList.add('failed'); 13 | } 14 | 15 | // Languages Test 16 | const languagesElement = document.getElementById('languages'); 17 | languagesElement.innerHTML = navigator.languages; 18 | if (!navigator.languages || navigator.languages.length === 0) { 19 | languagesElement.classList.add('failed'); 20 | } 21 | 22 | // WebGL Tests 23 | const canvas = document.createElement('canvas'); 24 | const gl = canvas.getContext('webgl') || canvas.getContext('webgl-experimental'); 25 | if (gl) { 26 | const debugInfo = gl.getExtension('WEBGL_debug_renderer_info'); 27 | 28 | // WebGL Vendor Test 29 | const webGLVendorElement = document.getElementById('webgl-vendor'); 30 | const vendor = gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL); 31 | webGLVendorElement.innerHTML = vendor; 32 | if (vendor === 'Brian Paul') { 33 | webGLVendorElement.classList.add('failed'); 34 | } 35 | 36 | // WebGL Renderer Test 37 | const webGLRendererElement = document.getElementById('webgl-renderer'); 38 | const renderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL); 39 | webGLRendererElement.innerHTML = renderer; 40 | if (renderer === 'Mesa OffScreen') { 41 | webGLRendererElement.classList.add('failed'); 42 | } 43 | } 44 | 45 | // Hairline Feature Test 46 | const hairlineFeatureElement = document.getElementById('hairline-feature'); 47 | if (Modernizr.hairline) { 48 | hairlineFeatureElement.innerHTML = 'present'; 49 | } else { 50 | hairlineFeatureElement.innerHTML = 'missing'; 51 | hairlineFeatureElement.classList.add('failed'); 52 | } 53 | 54 | // Broken Image Dimensions Test 55 | const brokenImageDimensionsElement = document.getElementById('broken-image-dimensions'); 56 | const body = document.body; 57 | const image = document.createElement('img'); 58 | image.onerror = function(){ 59 | brokenImageDimensionsElement.innerHTML = `${image.width}x${image.height}`; 60 | if (image.width == 0 && image.height == 0) { 61 | brokenImageDimensionsElement.classList.add('failed'); 62 | } 63 | }; 64 | body.appendChild(image); 65 | image.src = 'https://intoli.com/nonexistent-image.png'; 66 | -------------------------------------------------------------------------------- /articles/making-chrome-headless-undetectable/inject.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from mitmproxy import ctx 3 | 4 | 5 | # load in the javascript to inject 6 | with open('injected-test-bypasses.js', 'r') as f: 7 | content_js = f.read() 8 | 9 | 10 | def response(flow): 11 | # only process 200 responses of html content 12 | if flow.response.headers['Content-Type'] != 'text/html': 13 | return 14 | if not flow.response.status_code == 200: 15 | return 16 | 17 | # inject the script tag 18 | html = BeautifulSoup(flow.response.text, 'lxml') 19 | container = html.head or html.body 20 | if container: 21 | script = html.new_tag('script', type='text/javascript') 22 | script.string = content_js 23 | container.insert(0, script) 24 | flow.response.text = str(html) 25 | 26 | ctx.log.info('Successfully injected the injected-test-bypasses.js script.') 27 | -------------------------------------------------------------------------------- /articles/making-chrome-headless-undetectable/injected-test-bypasses.js: -------------------------------------------------------------------------------- 1 | // 2 | // Bypass the Languages Test. 3 | // 4 | 5 | // Overwrite the `languages` property to use a custom getter. 6 | Object.defineProperty(navigator, 'languages', { 7 | get: function() { 8 | return ['en-US', 'en']; 9 | }, 10 | }); 11 | 12 | 13 | // 14 | // Bypass the Plugins Test. 15 | // 16 | 17 | // Overwrite the `plugins` property to use a custom getter. 18 | Object.defineProperty(navigator, 'plugins', { 19 | get: function() { 20 | // This just needs to have `length > 0`, but we could mock the plugins too. 21 | return [1, 2, 3, 4, 5]; 22 | }, 23 | }); 24 | 25 | 26 | // 27 | // Bypass the WebGL test. 28 | // 29 | 30 | const getParameter = WebGLRenderingContext.getParameter; 31 | WebGLRenderingContext.prototype.getParameter = function(parameter) { 32 | // UNMASKED_VENDOR_WEBGL 33 | if (parameter === 37445) { 34 | return 'Intel Open Source Technology Center'; 35 | } 36 | // UNMASKED_RENDERER_WEBGL 37 | if (parameter === 37446) { 38 | return 'Mesa DRI Intel(R) Ivybridge Mobile '; 39 | } 40 | 41 | return getParameter(parameter); 42 | }; 43 | 44 | 45 | // 46 | // Bypass the Broken Image Test. 47 | // 48 | 49 | ['height', 'width'].forEach(property => { 50 | // Store the existing descriptor. 51 | const imageDescriptor = Object.getOwnPropertyDescriptor(HTMLImageElement.prototype, property); 52 | 53 | // Redefine the property with a patched descriptor. 54 | Object.defineProperty(HTMLImageElement.prototype, property, { 55 | ...imageDescriptor, 56 | get: function() { 57 | // Return an arbitrary non-zero dimension if the image failed to load. 58 | if (this.complete && this.naturalHeight == 0) { 59 | return 20; 60 | } 61 | // Otherwise, return the actual dimension. 62 | return imageDescriptor.get.apply(this); 63 | }, 64 | }); 65 | }); 66 | 67 | 68 | // 69 | // Bypass the Retina/HiDPI Hairline Feature Test. 70 | // 71 | 72 | // Store the existing descriptor. 73 | const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight'); 74 | 75 | // Redefine the property with a patched descriptor. 76 | Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', { 77 | ...elementDescriptor, 78 | get: function() { 79 | if (this.id === 'modernizr') { 80 | return 1; 81 | } 82 | return elementDescriptor.get.apply(this); 83 | }, 84 | }); 85 | -------------------------------------------------------------------------------- /articles/making-chrome-headless-undetectable/test-headless.js: -------------------------------------------------------------------------------- 1 | const CDP = require('chrome-remote-interface'); 2 | const fs = require('fs'); 3 | 4 | // global settings 5 | const filename = 'headless-results.png'; 6 | const url = 'https://intoli.com/blog/making-chrome-headless-undetectable/chrome-headless-test.html'; 7 | const userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36' 8 | 9 | CDP(async function(client) { 10 | const {Network, Page, Security} = client; 11 | await Page.enable(); 12 | await Network.enable(); 13 | await Network.setUserAgentOverride({userAgent}); 14 | 15 | // ignore all certificate errors to support mitmproxy certificates 16 | await Security.enable(); 17 | await Security.setOverrideCertificateErrors({override: true}); 18 | Security.certificateError(({eventId}) => { 19 | Security.handleCertificateError({ 20 | eventId, 21 | action: 'continue' 22 | }); 23 | }); 24 | 25 | // navigate to the page and wait for it to load 26 | await Page.navigate({url}); 27 | await Page.loadEventFired(); 28 | 29 | setTimeout(async function() { 30 | // save the screenshot 31 | const screenshot = await Page.captureScreenshot({format: 'png'}); 32 | const buffer = new Buffer(screenshot.data, 'base64'); 33 | fs.writeFile(filename, buffer, 'base64', function(err) { 34 | if (err) { 35 | console.error(`Error saving screenshot: ${err}`); 36 | } else { 37 | console.log(`"${filename}" written successfully.`); 38 | } 39 | client.close(); 40 | }); 41 | }, 1000); // 1 second delay for the tests to complete 42 | }).on('error', err => { 43 | console.error(`Error connecting to Chrome: ${err}`); 44 | }); 45 | -------------------------------------------------------------------------------- /articles/neural-network-initialization/README.md: -------------------------------------------------------------------------------- 1 | # Understanding Neural Network Weight Initialization 2 | 3 | This folder contains scripts for producing the plots used in the [Understanding Neural Network Weight Initialization](https://intoli.com/blog/neural-network-initialization/) article published on the [Intoli blog](https://intoli.com/blog/): 4 | 5 | - [plot-activation-layers.py](plot-activation-layers.py) visualizes the distribution of activations over 5 hidden layers of a Multi-Layer Perceptron using three different initializations. 6 | The script uses ReLu activations, although the article also includes a plot generated by changing `activation = 'relu'` to `activation = 'linear'` on line 52. 7 | ![ReLU MLP Activations under Three Initializations](images/relu-output-progression-violinplot.png) 8 | 9 | - [plot-loss-progression.py](plot-plot-progression.py) visualizes training loss over time as the network is trained using three different initializations. 10 | ![Loss over Time under Three Initializations](images/training-losses.png) 11 | 12 | To run the scripts, first grab the files from this folder: 13 | 14 | ```bash 15 | git clone https://github.com/Intoli/intoli-article-materials.git 16 | cd intoli-article-materials/articles/neural-network-initialization 17 | ``` 18 | 19 | Then, create a virtualenv and install the dependencies: 20 | 21 | ```bash 22 | virtualenv env 23 | . env/bin/activate 24 | pip install -r requirements.txt 25 | ``` 26 | 27 | You may also need to choose a Matplotlib backend in order to successfully produce plots from a virtualenv. 28 | On macOS, this could be done with 29 | 30 | ```bash 31 | echo "backend: TkAgg" >> ~/.matplotlib/matplotlibrc 32 | ``` 33 | 34 | while on Linux you might have luck with 35 | 36 | ```bash 37 | echo "backend: Agg" >> ~/.matplotlib/matplotlibrc 38 | ``` 39 | 40 | Note that the scripts do not save files to disk and simply show the plot in a Matplotlib window. 41 | To make the plots just run the scripts using Python from the virtualenv: 42 | 43 | ```bash 44 | python plot-activation-layers.py 45 | ``` 46 | 47 | Note that [plot-loss-progression.py](plot-loss-progression.py) takes quite a while to run, since it trains a neural network on 10000 MNIST images three times. 48 | Also, if you use Python 3.6, TensorFlow might issue a runtime warning about having "compiletime version 3.5," but the scripts should still work. 49 | -------------------------------------------------------------------------------- /articles/neural-network-initialization/images/relu-output-progression-violinplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/neural-network-initialization/images/relu-output-progression-violinplot.png -------------------------------------------------------------------------------- /articles/neural-network-initialization/images/training-losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/neural-network-initialization/images/training-losses.png -------------------------------------------------------------------------------- /articles/neural-network-initialization/plot-activation-layers.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | import seaborn as sns 6 | from keras import initializers 7 | from keras.datasets import mnist 8 | 9 | from utils import ( 10 | compile_model, 11 | create_mlp_model, 12 | get_activations, 13 | grid_axes_it, 14 | ) 15 | 16 | 17 | seed = 10 18 | 19 | # Number of points to plot 20 | n_train = 1000 21 | n_test = 100 22 | n_classes = 10 23 | 24 | # Network params 25 | n_hidden_layers = 5 26 | dim_layer = 100 27 | batch_size = n_train 28 | epochs = 1 29 | 30 | # Load and prepare MNIST dataset. 31 | n_train = 60000 32 | n_test = 10000 33 | 34 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 35 | num_classes = len(np.unique(y_test)) 36 | data_dim = 28 * 28 37 | 38 | x_train = x_train.reshape(60000, 784).astype('float32')[:n_train] 39 | x_test = x_test.reshape(10000, 784).astype('float32')[:n_train] 40 | x_train /= 255 41 | x_test /= 255 42 | 43 | y_train = keras.utils.to_categorical(y_train, num_classes) 44 | y_test = keras.utils.to_categorical(y_test, num_classes) 45 | 46 | # Run the data through a few MLP models and save the activations from 47 | # each layer into a Pandas DataFrame. 48 | rows = [] 49 | sigmas = [0.10, 0.14, 0.28] 50 | for stddev in sigmas: 51 | init = initializers.RandomNormal(mean=0.0, stddev=stddev, seed=seed) 52 | activation = 'relu' 53 | 54 | model = create_mlp_model( 55 | n_hidden_layers, 56 | dim_layer, 57 | (data_dim,), 58 | n_classes, 59 | init, 60 | 'zeros', 61 | activation 62 | ) 63 | compile_model(model) 64 | output_elts = get_activations(model, x_test) 65 | n_layers = len(model.layers) 66 | i_output_layer = n_layers - 1 67 | 68 | for i, out in enumerate(output_elts[:-1]): 69 | if i > 0 and i != i_output_layer: 70 | for out_i in out.ravel()[::20]: 71 | rows.append([i, stddev, out_i]) 72 | 73 | df = pd.DataFrame(rows, columns=['Hidden Layer', 'Standard Deviation', 'Output']) 74 | 75 | # Plot previously saved activations from the 5 hidden layers 76 | # using different initialization schemes. 77 | fig = plt.figure(figsize=(12, 6)) 78 | axes = grid_axes_it(len(sigmas), 1, fig=fig) 79 | for sig in sigmas: 80 | ax = next(axes) 81 | ddf = df[df['Standard Deviation'] == sig] 82 | sns.violinplot(x='Hidden Layer', y='Output', data=ddf, ax=ax, scale='count', inner=None) 83 | 84 | ax.set_xlabel('') 85 | ax.set_ylabel('') 86 | 87 | ax.set_title('Weights Drawn from $N(\mu = 0, \sigma = {%.2f})$' % sig, fontsize=13) 88 | 89 | if sig == sigmas[1]: 90 | ax.set_ylabel("ReLu Neuron Outputs") 91 | if sig != sigmas[-1]: 92 | ax.set_xticklabels(()) 93 | else: 94 | ax.set_xlabel("Hidden Layer") 95 | 96 | plt.tight_layout() 97 | plt.show() 98 | -------------------------------------------------------------------------------- /articles/neural-network-initialization/plot-loss-progression.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | import seaborn as sns 4 | from keras import initializers 5 | from keras.datasets import mnist 6 | from matplotlib import pyplot as plt 7 | 8 | from utils import ( 9 | get_init_id, 10 | grid_axes_it, 11 | compile_model, 12 | create_cnn_model, 13 | LossHistory, 14 | ) 15 | 16 | 17 | sns.set_style('white') 18 | sns.set_palette('colorblind') 19 | 20 | batch_size = 128 21 | num_classes = 10 22 | epochs = 12 23 | 24 | # Load MNIST training data. 25 | img_rows, img_cols = 28, 28 26 | 27 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 28 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 29 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 30 | input_shape = (img_rows, img_cols, 1) 31 | 32 | x_train = x_train.astype('float32') 33 | x_test = x_test.astype('float32') 34 | x_train /= 255 35 | x_test /= 255 36 | 37 | y_train = keras.utils.to_categorical(y_train, num_classes) 38 | y_test = keras.utils.to_categorical(y_test, num_classes) 39 | 40 | print('x_train shape:', x_train.shape) 41 | print(x_train.shape[0], 'train samples') 42 | print(x_test.shape[0], 'test samples') 43 | 44 | # Train the CNN under three differnet initialization schemes, 45 | # and record loss over time. 46 | inits = [ 47 | initializers.Zeros(), 48 | initializers.RandomNormal(mean=0.0, stddev=0.4, seed=10), 49 | initializers.VarianceScaling( 50 | scale=2.0, mode='fan_in', distribution='normal', seed=10 51 | ), 52 | ] 53 | 54 | loss_histories = {} 55 | models = {} 56 | 57 | for i, init in enumerate(inits): 58 | init_id = get_init_id(init) 59 | 60 | print("Training CNN with initializer:") 61 | print(' ' + str(init)) 62 | print(' ' + str(init.get_config())) 63 | 64 | model = create_cnn_model(input_shape, num_classes, kernel_initializer=init) 65 | compile_model(model) 66 | 67 | loss_history = LossHistory() 68 | model.fit(x_train, y_train, 69 | batch_size=batch_size, 70 | epochs=epochs, 71 | verbose=1, 72 | validation_data=(x_test, y_test), 73 | callbacks=[loss_history]) 74 | 75 | losses = loss_history.losses 76 | 77 | loss_histories[init_id] = loss_history 78 | models[init_id] = model 79 | 80 | 81 | # Plot the loss over time for three initialization schemes. 82 | colors = sns.color_palette('colorblind', 6) 83 | cases = [ 84 | ( 85 | 'Zeros|', 86 | 'Loss with Initial Weights Set to Zero', 87 | colors[3], 88 | ), 89 | ( 90 | 'RandomNormal|mean-0.0__stddev-0.4', 91 | 'Loss with Initial Weights Drawn from $N(0, \sigma = 0.4)$', 92 | colors[1], 93 | ), 94 | ( 95 | 'VarianceScaling|scale-2.0__mode-fan_in__distribution-normal', 96 | 'Loss with Initial Weights Drawn from $N(0, \sigma \sim \sqrt{2/n_i})$', 97 | colors[2], 98 | ), 99 | ] 100 | 101 | plt.figure(figsize=(12, 6)) 102 | axes = grid_axes_it(3, 3) 103 | 104 | for i, (case_id, label, color) in enumerate(cases): 105 | ax = next(axes) 106 | case_loss = loss_histories[case_id].losses 107 | n_steps = 12 108 | pseqs = [] 109 | for step in range(n_steps): 110 | seq = [float(x) for x in case_loss[step::n_steps]] 111 | pseqs.append(seq) 112 | 113 | mlen = max([len(x) for x in pseqs]) 114 | seqs = [np.array(seq[:mlen]) for seq in pseqs] 115 | 116 | sns.tsplot(np.array(seqs), ax=ax, color=color) 117 | 118 | # These plotting methos assume that there are 12 epochs to correctly draw xticks. 119 | assert epochs == 12 120 | 121 | def get_label(x): 122 | if x == 0.0: 123 | return '' 124 | else: 125 | return str(int(x / len(seqs[0]) * 12)) 126 | 127 | xticks = [x * len(seq) / 6.0 for x in range(6)] 128 | ax.set_xticks(xticks) 129 | ax.set_xticklabels([get_label(x) for x in xticks]) 130 | 131 | if i < 100: 132 | ax.set_xlabel("Epoch", fontsize=14) 133 | if i == 0: 134 | ax.set_ylabel("Loss", fontsize=14) 135 | ax.set_title(label, fontsize=15) 136 | 137 | 138 | plt.tight_layout() 139 | plt.show() 140 | -------------------------------------------------------------------------------- /articles/neural-network-initialization/requirements.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.0 2 | bleach==1.5.0 3 | cycler==0.10.0 4 | decorator==4.2.1 5 | entrypoints==0.2.3 6 | enum34==1.1.6 7 | html5lib==0.9999999 8 | ipykernel==4.8.0 9 | ipython==6.2.1 10 | ipython-genutils==0.2.0 11 | ipywidgets==7.1.1 12 | jedi==0.11.1 13 | Jinja2==2.10 14 | jsonschema==2.6.0 15 | jupyter==1.0.0 16 | jupyter-client==5.2.2 17 | jupyter-console==5.2.0 18 | jupyter-core==4.4.0 19 | Keras==2.1.3 20 | Markdown==2.6.11 21 | MarkupSafe==1.0 22 | matplotlib==2.1.2 23 | mistune==0.8.3 24 | nbconvert==5.3.1 25 | nbformat==4.4.0 26 | notebook==5.3.1 27 | numpy==1.14.0 28 | pandas==0.22.0 29 | pandocfilters==1.4.2 30 | parso==0.1.1 31 | pexpect==4.3.1 32 | pickleshare==0.7.4 33 | prompt-toolkit==1.0.15 34 | protobuf==3.5.1 35 | ptyprocess==0.5.2 36 | Pygments==2.2.0 37 | pyparsing==2.2.0 38 | python-dateutil==2.6.1 39 | pytz==2017.3 40 | PyYAML==3.12 41 | pyzmq==16.0.4 42 | qtconsole==4.3.1 43 | scipy==1.0.0 44 | seaborn==0.8.1 45 | Send2Trash==1.4.2 46 | simplegeneric==0.8.1 47 | six==1.11.0 48 | tensorflow==1.4.1 49 | tensorflow-tensorboard==0.4.0 50 | terminado==0.8.1 51 | testpath==0.3.1 52 | tornado==4.5.3 53 | traitlets==4.3.2 54 | wcwidth==0.1.7 55 | webencodings==0.5.1 56 | Werkzeug==0.14.1 57 | widgetsnbextension==3.1.3 58 | -------------------------------------------------------------------------------- /articles/neural-network-initialization/utils.py: -------------------------------------------------------------------------------- 1 | import keras 2 | from keras.models import Sequential 3 | from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten 4 | from keras import backend as K 5 | 6 | from matplotlib import pyplot as plt 7 | from matplotlib import rcParamsDefault 8 | 9 | 10 | def grid_axes_it(n_plots, n_cols=3, enumerate=False, fig=None): 11 | """ 12 | Iterate through Axes objects on a grid with n_cols columns and as many 13 | rows as needed to accommodate n_plots many plots. 14 | 15 | Args: 16 | n_plots: Number of plots to plot onto figure. 17 | n_cols: Number of columns to divide the figure into. 18 | fig: Optional figure reference. 19 | 20 | Yields: 21 | n_plots many Axes objects on a grid. 22 | """ 23 | n_rows = n_plots / n_cols + int(n_plots % n_cols > 0) 24 | 25 | if not fig: 26 | default_figsize = rcParamsDefault['figure.figsize'] 27 | fig = plt.figure(figsize=( 28 | default_figsize[0] * n_cols, 29 | default_figsize[1] * n_rows 30 | )) 31 | 32 | for i in range(1, n_plots + 1): 33 | ax = plt.subplot(n_rows, n_cols, i) 34 | yield ax 35 | 36 | 37 | def create_mlp_model( 38 | n_hidden_layers, 39 | dim_layer, 40 | input_shape, 41 | n_classes, 42 | kernel_initializer, 43 | bias_initializer, 44 | activation, 45 | ): 46 | """Create Multi-Layer Perceptron with given parameters.""" 47 | model = Sequential() 48 | model.add(Dense(dim_layer, input_shape=input_shape, kernel_initializer=kernel_initializer, 49 | bias_initializer=bias_initializer)) 50 | for i in range(n_hidden_layers): 51 | model.add(Dense(dim_layer, activation=activation, kernel_initializer=kernel_initializer, 52 | bias_initializer=bias_initializer)) 53 | model.add(Dense(n_classes, activation='softmax', kernel_initializer=kernel_initializer, 54 | bias_initializer=bias_initializer)) 55 | return model 56 | 57 | 58 | def create_cnn_model(input_shape, num_classes, kernel_initializer='glorot_uniform', 59 | bias_initializer='zeros'): 60 | """Create CNN model similar to 61 | https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py.""" 62 | model = Sequential() 63 | model.add(Conv2D(32, kernel_size=(3, 3), 64 | activation='relu', 65 | input_shape=input_shape, 66 | kernel_initializer=kernel_initializer, 67 | bias_initializer=bias_initializer)) 68 | model.add(Conv2D(64, (3, 3), activation='relu', 69 | kernel_initializer=kernel_initializer, 70 | bias_initializer=bias_initializer)) 71 | model.add(MaxPooling2D(pool_size=(2, 2))) 72 | model.add(Dropout(0.25)) 73 | model.add(Flatten()) 74 | model.add(Dense(128, activation='relu', 75 | kernel_initializer=kernel_initializer, 76 | bias_initializer=bias_initializer)) 77 | model.add(Dropout(0.5)) 78 | model.add(Dense(num_classes, activation='softmax', 79 | kernel_initializer=kernel_initializer, 80 | bias_initializer=bias_initializer)) 81 | return model 82 | 83 | 84 | def compile_model(model): 85 | model.compile(loss=keras.losses.categorical_crossentropy, 86 | optimizer=keras.optimizers.RMSprop(), 87 | metrics=['accuracy']) 88 | return model 89 | 90 | 91 | def get_init_id(init): 92 | """ 93 | Returns string ID summarizing initialization scheme and its parameters. 94 | 95 | Args: 96 | init: Instance of some initializer from keras.initializers. 97 | """ 98 | try: 99 | init_name = str(init).split('.')[2].split(' ')[0] 100 | except: 101 | init_name = str(init).split(' ')[0].replace('.', '_') 102 | 103 | param_list = [] 104 | config = init.get_config() 105 | for k, v in config.items(): 106 | if k == 'seed': 107 | continue 108 | param_list.append('{k}-{v}'.format(k=k, v=v)) 109 | init_params = '__'.join(param_list) 110 | 111 | return '|'.join([init_name, init_params]) 112 | 113 | 114 | def get_activations(model, x, mode=0.0): 115 | """Extract activations with given model and input vector x.""" 116 | outputs = [layer.output for layer in model.layers] 117 | activations = K.function([model.input], outputs) 118 | output_elts = activations([x, mode]) 119 | return output_elts 120 | 121 | 122 | class LossHistory(keras.callbacks.Callback): 123 | """A custom keras callback for recording losses during network training.""" 124 | 125 | def on_train_begin(self, logs={}): 126 | self.losses = [] 127 | self.epoch_losses = [] 128 | self.epoch_val_losses = [] 129 | 130 | def on_batch_end(self, batch, logs={}): 131 | self.losses.append(logs.get('loss')) 132 | 133 | def on_epoch_end(self, epoch, logs={}): 134 | self.epoch_losses.append(logs.get('loss')) 135 | self.epoch_val_losses.append(logs.get('val_loss')) 136 | -------------------------------------------------------------------------------- /articles/nightmare-network-idle/README.md: -------------------------------------------------------------------------------- 1 | # Implementing a Custom Waiting Action in Nightmare JS 2 | 3 | This directory contains a custom [Nightmare](http://www.nightmarejs.org/) action defined in [waitUntilNetworkIdle.js](waitUntilNetworkIdle.js) which waits until there has been no incoming responses for a given amount of time. 4 | The script's implementation details are described in the [Implementing a Custom Waiting Action in Nightmare JS](https://intoli.com/blog/nightmare-network-idle/) article published on the [Intoli blog](https://intoli.com/blog/). 5 | 6 | To run the script, you need to have [Node.js](https://nodejs.org/en/) and [yarn](https://yarnpkg.com/en/) installed. 7 | With that out of the way, download the contents of this directory to disk. 8 | 9 | ```bash 10 | git clone https://github.com/Intoli/intoli-article-materials.git 11 | cd intoli-article-materials/articles/nightmare-network-idle 12 | ``` 13 | 14 | Then install the dependencies with 15 | 16 | ```bash 17 | yarn install 18 | ``` 19 | 20 | The mocha test script [test.js](test.js) runs the custom action a few times. 21 | Run the test with 22 | 23 | ```bash 24 | yarn run test 25 | ``` 26 | -------------------------------------------------------------------------------- /articles/nightmare-network-idle/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nightmare-network-idle", 3 | "version": "1.0.0", 4 | "description": "A script which uses Puppeteer to scrape pages with infinite scroll.", 5 | "repository": { 6 | "type": "git", 7 | "url": "https://github.com/Intoli/intoli-article-materials.git" 8 | }, 9 | "scripts": { 10 | "test": "./node_modules/.bin/mocha" 11 | }, 12 | "keywords": [ 13 | "testing", 14 | "javascript", 15 | "nightmare" 16 | ], 17 | "author": "Andre Perunicic / Intoli, LLC", 18 | "license": "BSD-2-Clause", 19 | "bugs": { 20 | "url": "https://github.com/Intoli/intoli-article-materials/issues" 21 | }, 22 | "homepage": "https://intoli.com/blog/nightmare-network-idle/", 23 | "dependencies": { 24 | "mocha": "^5.0.0", 25 | "nightmare": "^2.10.0" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /articles/nightmare-network-idle/test.js: -------------------------------------------------------------------------------- 1 | const Nightmare = require('./waitUntilNetworkIdle.js'); 2 | const assert = require('assert'); 3 | 4 | describe('waitUntilNetworkIdle', function() { 5 | const waitTimes = [500, 1500, 5000]; 6 | let startTime; 7 | waitTimes.forEach(function(waitTime) { 8 | it(`should wait for at least ${waitTime} ms after the last response`, 9 | function(done) { 10 | this.timeout(20000); 11 | 12 | const nightmare = new Nightmare({ show: true }); 13 | startTime = Date.now(); 14 | 15 | nightmare 16 | .on('did-get-response-details', () => { 17 | startTime = Date.now(); 18 | }) 19 | .goto('https://intoli.com/blog/nightmare-network-idle/demo.html') 20 | .waitUntilNetworkIdle(waitTime) 21 | .evaluate(() => { 22 | const body = document.querySelector('body'); 23 | return body.innerText; 24 | }) 25 | .end() 26 | .then((result) => { 27 | const elapsedTime = Date.now() - startTime; 28 | 29 | // Verify the requests completed as expected. 30 | assert.equal(result, 'All three requests received.'); 31 | 32 | // Verify that the action caused Nightmare to wait long enough. 33 | assert(elapsedTime >= waitTime, 'Wait period too short'); 34 | 35 | done(); 36 | }) 37 | .catch(done) 38 | }); 39 | }); 40 | }); 41 | -------------------------------------------------------------------------------- /articles/nightmare-network-idle/waitUntilNetworkIdle.js: -------------------------------------------------------------------------------- 1 | const Nightmare = require('nightmare'); 2 | 3 | Nightmare.action('waitUntilNetworkIdle', 4 | // The first callback defines the action on Electron's end, 5 | // making some internal objects available. 6 | function (name, options, parent, win, renderer, done) { 7 | 8 | // `parent` is Electron's reference to the object that 9 | // passes messages between Electron and Nightmare. 10 | parent.respondTo('waitUntilNetworkIdle', (waitTime, done) => { 11 | let lastRequestTime = Date.now(); 12 | 13 | // win.webContents allows us to control the internal 14 | // Electron BrowserWindow instance. 15 | win.webContents.on('did-get-response-details', () => { 16 | lastRequestTime = Date.now(); 17 | }); 18 | 19 | const check = () => { 20 | const now = Date.now(); 21 | const elapsedTime = now - lastRequestTime; 22 | if (elapsedTime >= waitTime) { 23 | done(); // Complete the action. 24 | } else { 25 | setTimeout(check, waitTime - elapsedTime); 26 | } 27 | } 28 | setTimeout(check, waitTime); 29 | }); 30 | 31 | done(); // Complete the action's *creation*. 32 | }, 33 | // The second callback runs on Nightmare's end and determines 34 | // the action's interface. 35 | function (waitTime, done) { 36 | // This is necessary because the action will only work if 37 | // action arguments are specified before `done`, and because 38 | // we wish to support calls without arguments. 39 | if (!done) { 40 | done = waitTime; 41 | waitTime = 500; 42 | } 43 | 44 | // `this.child` is Nightmare's reference to the object that 45 | // passes messages between Electron and Nightmare. 46 | this.child.call('waitUntilNetworkIdle', waitTime, done); 47 | }); 48 | 49 | module.exports = Nightmare; 50 | -------------------------------------------------------------------------------- /articles/node-package-manager-benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # Node Package Manager Benchmarks 2 | 3 | The article is actually called [Why I Still Don't Use Yarn](https://intoli.com/blog/node-package-manager-benchmarks/), but it really centers around benchmarking `yarn`, `npm`, and `pnpm`. 4 | The associated benchmark code is in its own repository called [node-package-manager-benchmarks](https://github.com/sangaline/node-package-manager-benchmarks). 5 | -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/README.md: -------------------------------------------------------------------------------- 1 | # It is *not* possible to detect and block Chrome headless 2 | 3 | [It is *not* possible to detect and block Chrome headless](https://intoli.com/blog/not-possible-to-block-chrome-headless/) is our second installment of techniques to bypass the user-hostile practice of blocking users based on characteristics of their web browsers (see also: [Making Chrome Headless Undetectable](https://intoli.com/blog/making-chrome-headless-undetectable/)). 4 | The test suite is implemented in [chrome-headless-test.html](chrome-headless-test.html) and [chrome-headless-test.js](chrome-headless-test.js). 5 | You can visit the live test page at [https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html](https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html) to see how your current browser would fair. 6 | The results should look something like this, where red indicates a headless Chrome indicator. 7 | 8 | ![Headless Chromium Results](img/headless-initial-results.png) 9 | 10 | The test results used in the article are generated using two scripts: [test-headless-initial.js](test-headless-initial.js) and [test-headless-final.js](test-headless-final.js). 11 | These both use [Puppeteer](https://github.com/GoogleChrome/puppeteer) as a browser automation framework to visit the test page and take a screenshot of the results. 12 | The Puppeteer dependency is included in the [package.json](package.json) file and you can install the dependencies by running 13 | 14 | ```bash 15 | yarn install 16 | ``` 17 | 18 | in this directory. 19 | You can then run the [test-headless-initial.js](test-headless-inital.js) script, which doesn't include any bypasses, with the following command. 20 | 21 | ```bash 22 | node test-headless-initial.js 23 | ``` 24 | 25 | This will create the [headless-initial-results.png](img/headless-initial-results.png) that you can see above. 26 | 27 | To run the tests with the bypasses, you simply need to change the name of the script to [test-headless-final.js](test-headless-final.js). 28 | 29 | ```bash 30 | node test-headless-final.js 31 | ``` 32 | 33 | This will create a second [headless-final-results.png](img/headless-final-results.png) image which looks like this. 34 | 35 | ![Headless Chromium Results with Bypasses](img/headless-final-results.png) 36 | 37 | As you can see, all of the tests have been bypassed! 38 | You can peruse the [test-headless-final.js](test-headless-final.js) source code to see how the bypasses are implemented, or visit [the original article](https://intoli.com/blog/not-possible-to-block-chrome-headless/) for a more in-depth explanation of how they work. 39 | -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/chrome-headless-test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Chrome Headless Detection (Round II) 4 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 |
Test NameResult
User Agent (Old)
WebDriver (New)missing (passed)
Chrome (New)present (passed)
Permissions (New)
Plugins Length (Old)
Languages (Old)
54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/chrome-headless-test.js: -------------------------------------------------------------------------------- 1 | // User-Agent Test 2 | const userAgentElement = document.getElementById('user-agent-result'); 3 | userAgentElement.innerHTML = navigator.userAgent; 4 | if (/HeadlessChrome/.test(navigator.userAgent)) { 5 | userAgentElement.classList.add('failed'); 6 | } 7 | 8 | // Webdriver Test 9 | const webdriverElement = document.getElementById('webdriver-result'); 10 | if (navigator.webdriver) { 11 | webdriverElement.classList.add('failed'); 12 | webdriverElement.innerHTML = 'present (failed)'; 13 | } 14 | 15 | // Chrome Test 16 | const chromeElement = document.getElementById('chrome-result'); 17 | if (!window.chrome) { 18 | chromeElement.classList.add('failed'); 19 | chromeElement.innerHTML = 'missing (failed)'; 20 | } 21 | 22 | // Permissions Test 23 | const permissionsElement = document.getElementById('permissions-result'); 24 | (async () => { 25 | const permissionStatus = await navigator.permissions.query({ name: 'notifications' }); 26 | permissionsElement.innerHTML = permissionStatus.state; 27 | if(Notification.permission === 'denied' && permissionStatus.state === 'prompt') { 28 | permissionsElement.classList.add('failed'); 29 | } 30 | })(); 31 | 32 | // Plugins Length Test 33 | const pluginsLengthElement = document.getElementById('plugins-length-result'); 34 | pluginsLengthElement.innerHTML = navigator.plugins.length; 35 | if (navigator.plugins.length === 0) { 36 | pluginsLengthElement.classList.add('failed'); 37 | } 38 | 39 | // Languages Test 40 | const languagesElement = document.getElementById('languages-result'); 41 | languagesElement.innerHTML = navigator.languages; 42 | if (!navigator.languages || navigator.languages.length === 0) { 43 | languagesElement.classList.add('failed'); 44 | } 45 | -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/img/headless-final-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/not-possible-to-block-chrome-headless/img/headless-final-results.png -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/img/headless-initial-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/not-possible-to-block-chrome-headless/img/headless-initial-results.png -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "puppeteer": "^1.0.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/test-headless-final.js: -------------------------------------------------------------------------------- 1 | // We'll use Puppeteer is our browser automation framework. 2 | const puppeteer = require('puppeteer'); 3 | 4 | // This is where we'll put the code to get around the tests. 5 | const preparePageForTests = async (page) => { 6 | // Pass the User-Agent Test. 7 | const userAgent = 'Mozilla/5.0 (X11; Linux x86_64)' + 8 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36'; 9 | await page.setUserAgent(userAgent); 10 | 11 | // Pass the Webdriver Test. 12 | await page.evaluateOnNewDocument(() => { 13 | Object.defineProperty(navigator, 'webdriver', { 14 | get: () => false, 15 | }); 16 | }); 17 | 18 | // Pass the Chrome Test. 19 | await page.evaluateOnNewDocument(() => { 20 | // We can mock this in as much depth as we need for the test. 21 | window.navigator.chrome = { 22 | runtime: {}, 23 | // etc. 24 | }; 25 | }); 26 | 27 | // Pass the Permissions Test. 28 | await page.evaluateOnNewDocument(() => { 29 | const originalQuery = window.navigator.permissions.query; 30 | return window.navigator.permissions.query = (parameters) => ( 31 | parameters.name === 'notifications' ? 32 | Promise.resolve({ state: Notification.permission }) : 33 | originalQuery(parameters) 34 | ); 35 | }); 36 | 37 | // Pass the Plugins Length Test. 38 | await page.evaluateOnNewDocument(() => { 39 | // Overwrite the `plugins` property to use a custom getter. 40 | Object.defineProperty(navigator, 'plugins', { 41 | // This just needs to have `length > 0` for the current test, 42 | // but we could mock the plugins too if necessary. 43 | get: () => [1, 2, 3, 4, 5], 44 | }); 45 | }); 46 | 47 | // Pass the Languages Test. 48 | await page.evaluateOnNewDocument(() => { 49 | // Overwrite the `plugins` property to use a custom getter. 50 | Object.defineProperty(navigator, 'languages', { 51 | get: () => ['en-US', 'en'], 52 | }); 53 | }); 54 | } 55 | 56 | (async () => { 57 | // Launch the browser in headless mode and set up a page. 58 | const browser = await puppeteer.launch({ 59 | args: ['--no-sandbox'], 60 | headless: true, 61 | }); 62 | const page = await browser.newPage(); 63 | 64 | // Prepare for the tests (not yet implemented). 65 | await preparePageForTests(page); 66 | 67 | // Navigate to the page that will perform the tests. 68 | const testUrl = 'https://intoli.com/blog/' + 69 | 'not-possible-to-block-chrome-headless/chrome-headless-test.html'; 70 | await page.goto(testUrl); 71 | 72 | // Save a screenshot of the results. 73 | await page.screenshot({path: 'headless-final-results.png'}); 74 | 75 | // Clean up. 76 | await browser.close() 77 | })(); 78 | -------------------------------------------------------------------------------- /articles/not-possible-to-block-chrome-headless/test-headless-initial.js: -------------------------------------------------------------------------------- 1 | // We'll use Puppeteer is our browser automation framework. 2 | const puppeteer = require('puppeteer'); 3 | 4 | // This is where we'll put the code to get around the tests. 5 | const preparePageForTests = async (page) => { 6 | // TODO: Not implemented yet. 7 | } 8 | 9 | (async () => { 10 | // Launch the browser in headless mode and set up a page. 11 | const browser = await puppeteer.launch({ 12 | args: ['--no-sandbox'], 13 | headless: true, 14 | }); 15 | const page = await browser.newPage(); 16 | 17 | // Prepare for the tests (not yet implemented). 18 | await preparePageForTests(page); 19 | 20 | // Navigate to the page that will perform the tests. 21 | const testUrl = 'https://intoli.com/blog/' + 22 | 'not-possible-to-block-chrome-headless/chrome-headless-test.html'; 23 | await page.goto(testUrl); 24 | 25 | // Save a screenshot of the results. 26 | await page.screenshot({path: 'headless-initial-results.png'}); 27 | 28 | // Clean up. 29 | await browser.close() 30 | })(); 31 | -------------------------------------------------------------------------------- /articles/power-assert/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "testing": { 4 | "presets": [ 5 | "power-assert" 6 | ] 7 | } 8 | }, 9 | "presets": [["env", { 10 | "targets": { 11 | "node": "6.10" 12 | } 13 | }]] 14 | } 15 | -------------------------------------------------------------------------------- /articles/power-assert/README.md: -------------------------------------------------------------------------------- 1 | # No API Is the Best API — The elegant power of Power Assert 2 | 3 | In [No API Is the Best API — The elegant power of Power Assert](https://intoli.com/blog/power-assert), we take a look at how [Power Assert](https://github.com/power-assert-js/power-assert) can be used to automatically generate contextual error messages when using Node's [assert](https://nodejs.org/api/assert.html) module for assertions. 4 | This allows you to get the best of both worlds; you can use a very simple assertion API while still taking advantage of rich and useful assertion messages. 5 | Power Assert accomplishes this by transforming your tests before they run and using the code itself to determine the relevant information to display. 6 | You can check out [the original article](https://intoli.com/blog/power-assert) for more details, but here we'll just focus on the project configuration and running the tests. 7 | 8 | 9 | ## Installing Dependencies 10 | 11 | The project dependencies are listed in [package.json](package.json) and [yarn.lock](yarn.lock). 12 | You can install them by running the following. 13 | 14 | ```bash 15 | # Or: `npm install` 16 | yarn install 17 | ``` 18 | 19 | This will install the [Mocha](https://github.com/mochajs/mocha) test runner, a few [Babel](https://babeljs.io/)-related packages, [Power Assert](https://github.com/power-assert-js/power-assert), and the [Power Assert Babel Preset](https://github.com/power-assert-js/babel-preset-power-assert) inside of `node_modules/`. 20 | 21 | 22 | ## The Babel Configuration 23 | 24 | The key to the tests being transformed when the tests are run is the Babel configuration. 25 | It's located in [.babelrc](.babelrc), and it tells Babel to use the Power Assert preset when `NODE_ENV` is set to `testing`. 26 | It also specifies that the [Babel env preset](https://babeljs.io/docs/plugins/preset-env/) should be used to target Node v6.10. 27 | The non-testing Babel configuration can be customized freely without impacting the use of Power Assert. 28 | 29 | It's also worth mentioning that Mocha needs to be configured to use Babel in order for the tests to be transformed. 30 | This is accomplished using [Babel register](https://babeljs.io/docs/usage/babel-register/) and the Mocha `--require` option. 31 | 32 | ```bash 33 | NODE_ENV=testing mocha --exit --require babel-register" 34 | ``` 35 | 36 | This same command is included in [package.json](package.json) as a script, so it's equivalent to running `yarn test`. 37 | 38 | 39 | ## The Tests 40 | 41 | The tests themselves are all located in [test/test-assertion-errors.js](test/test-assertion-errors.js). 42 | There's nothing specific to Power-Assert in the tests, they're just generic tests that use Node's `assert` module. 43 | They are each purposely designed to fail however, so that you can easily see what the error messages generated by Power Assert look like. 44 | -------------------------------------------------------------------------------- /articles/power-assert/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "test": "NODE_ENV=testing mocha --exit --require babel-register" 4 | }, 5 | "devDependencies": { 6 | "babel": "^6.23.0", 7 | "babel-core": "^6.26.3", 8 | "babel-preset-env": "^1.7.0", 9 | "babel-preset-power-assert": "^2.0.0", 10 | "babel-register": "^6.26.0", 11 | "mocha": "^5.2.0", 12 | "power-assert": "^1.5.0" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /articles/power-assert/test/test-assertion-errors.js: -------------------------------------------------------------------------------- 1 | import assert from 'assert'; 2 | 3 | 4 | // Note that all of these tests are designed to fail, so we can see the error messages! 5 | describe('Power Assert Testing Examples', () => { 6 | it('check that an unexpected substring is not found', () => { 7 | const result = 'Hello World'; 8 | const unexpectedSubstring = 'World'; 9 | 10 | // Jest Equivalent: expect(result).toEqual(expect.not.stringContaining(unexpectedSubstring)); 11 | assert(!result.includes(unexpectedSubstring)); 12 | }); 13 | 14 | it('check that no members of an array are included in another array', () => { 15 | const result = ['Hello', 'World']; 16 | const unexpectedMembers = ['Evan', 'World']; 17 | // Jest Equivalent: expect(result).toEqual(expect.not.arrayContaining(unexpectedMembers)); 18 | unexpectedMembers.forEach(member => 19 | assert(!result.includes(member)) 20 | ); 21 | }); 22 | 23 | it('check that a regular expression matches a string', () => { 24 | const regex = /^Hello World!/; 25 | const result = 'Hello World'; 26 | // Jest Equivalent: expect(result).toEqual(expect.stringMatching(regex)); 27 | assert(regex.test(result)); 28 | }); 29 | 30 | it('check that an array contains at least one number', () => { 31 | const result = ['Hello', 'World']; 32 | // Jest Equivalent: expect(result).toContainEqual(expect.any(Number)); 33 | assert(result.some(member => typeof member === 'number')); 34 | }); 35 | 36 | it('check for deep equality between two objects', () => { 37 | const expectedResult = { 'a': [1, 2], 'b': [1, 2] } 38 | const result = { 'a': [1, 2], 'b': [1, 2, 3] } 39 | // Jest Equivalent: expect(result).toEqual(expectedResult); 40 | assert.deepEqual(result, expectedResult); 41 | }); 42 | }); 43 | -------------------------------------------------------------------------------- /articles/python-slicing-in-javascript/README.md: -------------------------------------------------------------------------------- 1 | # Recreating Python's Slice Syntax in JavaScript Using ES6 Proxies 2 | 3 | [Recreating Python's Slice Syntax in JavaScript Using ES6 Proxies](https://intoli.com/blog/python-slicing-in-javascript) explores how [Proxies](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Proxy) work in JavaScript, and uses them to build a `SliceArray` class that supports a variant of Python's negative indexing and extended slice syntax. 4 | It's not possible to implement Python's syntax exactly due to the fact that the behavior of colons can't be modified in JavaScript. 5 | Instead, a double bracket syntax is introduced where double brackets are used for access and colons are replaced with commas. 6 | For example, you could write `array[::-1]` in Python to reverse an array, while the equivalent in JavaScript would be `array[[,,-1]]`. 7 | 8 | The code from the article has since been improved and released as an npm package called [Slice](https://github.com/intoli/slice). 9 | If you're interested in using extended slice syntax and negative indexing in your own project, then it's highly recommended that you use the package there instead of the original code from the article. 10 | The package additionally contains a `SliceString` class that introduces the same syntax for strings, and a `range()` method that works in the same way as the one from Python. 11 | You can find installation and usage instructions in [the GitHub repository for the project](https://github.com/intoli/slice). 12 | 13 | The article begins by constructing a crude implementation of negative indexing that doesn't use proxies. 14 | This isn't particularly useful in practice, but it serves to demonstrate the advantages of proxies over more primitive methods. 15 | 16 | - [primitive-negative-indexing.js](primitive-negative-indexing.js) - An implementation of negative indexing in JavaScript that doesn't use proxies. 17 | 18 | 19 | After that, it moves on to explore how slicing works in Python. 20 | The main code examples from that section have been condensed into these two files. 21 | 22 | - [slice-probe.py](slice-probe.py) - Implements the `SliceProbe` class that is used to understand how slicing works in Python. 23 | The class prints out the keys that are passed to a class when brackets are used for object access, and the `slice-probe.py` file uses this behavior to prove what the keys are for various slices. 24 | - [fizz-buzz.py](fizz-buzz.py) - A Fizz Buzz solution that uses slicing instead of explicit iteration or recursion. 25 | This primarily demonstrated the power and flexibility of the extended slice syntax. 26 | 27 | Finally, a `Slice` class is developed to provide the underlying slicing functionality, and a `SliceArray` class is developed which wraps `Slice` with the double bracket syntactic sugar using proxies. 28 | The implementation of these two classes can be found in these two files, respectively. 29 | 30 | - [slice.js](slice.js) - Implements the `Slice` class. 31 | - [slice-array.js](slice-array.js) - Implements the `SliceArray` class. 32 | -------------------------------------------------------------------------------- /articles/python-slicing-in-javascript/fizz-buzz.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | 4 | # Populate a list from 1 through 100. 5 | outputs = list(range(1, 100 + 1)) 6 | 7 | # Replace every 3rd element with 'Fizz'. 8 | outputs[(3 - 1)::3] = (100 // 3) * ['Fizz'] 9 | # Replace every 5th element with 'Buzz'. 10 | outputs[(5 - 1)::5] = (100 // 5) * ['Buzz'] 11 | # Replace every (3 * 5)th element with 'Fizz Buzz'. 12 | outputs[((3 * 5) - 1)::(3 * 5)] = (100 // (3 * 5)) * ['Fizz Buzz'] 13 | 14 | # Congrats on your new job! Please report to HR for orientation. 15 | print(outputs) 16 | -------------------------------------------------------------------------------- /articles/python-slicing-in-javascript/primitive-negative-indexing.js: -------------------------------------------------------------------------------- 1 | function wrapArray(array) { 2 | var wrappedArray = {}; 3 | for (var i = 0; i < array.length; i++) { 4 | (function(i) { 5 | // Normal array indexing: `array[0]`, `array[1]`, etc. 6 | Object.defineProperty(wrappedArray, i.toString(), { 7 | get: function() { 8 | return array[i]; 9 | }, 10 | set: function(value) { 11 | array[i] = value; 12 | }, 13 | }); 14 | // Fancy negative slice indexing to count back from the end. 15 | Object.defineProperty(wrappedArray, '-' + i.toString(), { 16 | get: function() { 17 | return array[array.length - i]; 18 | }, 19 | set: function(value) { 20 | array[array.length - i] = value; 21 | }, 22 | }); 23 | })(i); 24 | } 25 | return wrappedArray; 26 | } 27 | 28 | 29 | // Wrap an array of 5 elements. 30 | var array = wrapArray([0, 1, 2, 3, 4]); 31 | 32 | // Outputs: 1 33 | console.log(array[1]); 34 | 35 | // Outputs: 3 36 | console.log(array[-1]); 37 | 38 | // Outputs: 'three' 39 | array[-2] = 'three'; 40 | console.log(array[3]); 41 | -------------------------------------------------------------------------------- /articles/python-slicing-in-javascript/slice-array.js: -------------------------------------------------------------------------------- 1 | const Slice = require('./slice'); 2 | 3 | class SliceArray extends Array { 4 | constructor(...args) { 5 | super(...args); 6 | 7 | // Helper method that constructs either a `get` or `set` trap. 8 | const constructTrap = action => (target, name, value) => { 9 | const key = (name || '').toString() 10 | .replace(/\s/g, '') // Remove all whitespace. 11 | .replace(/,/g, ':'); // Replace commas with colons. 12 | 13 | // Handle negative indices. 14 | if (/^-\d+$/.test(key)) { 15 | return Reflect[action](target, this.length + parseInt(key, 10), value); 16 | } 17 | 18 | // Handle slices. 19 | if (/^(-?\d+)?(:(-?\d+)?(:(-?\d+)?)?)$/.test(key)) { 20 | const [start, stop, step] = key.split(':').map(part => part.length ? part : undefined); 21 | const slice = new Slice(start, stop, step); 22 | return slice[action](target, value); 23 | } 24 | 25 | // Fall back to the array's own properties. 26 | return Reflect[action](target, name, value); 27 | }; 28 | 29 | return new Proxy(this, { 30 | get: constructTrap('get'), 31 | set: constructTrap('set'), 32 | }); 33 | } 34 | } 35 | 36 | module.exports = SliceArray; 37 | -------------------------------------------------------------------------------- /articles/python-slicing-in-javascript/slice-probe.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | 4 | class SliceProbe: 5 | """Simple class that overrides `[]` access to return the key.""" 6 | def __getitem__(self, key): 7 | return key 8 | 9 | 10 | # Create an instance of the class to use for probing. 11 | probe = SliceProbe() 12 | 13 | 14 | # Outputs: 1 15 | print(probe[1]) 16 | 17 | # Outputs: -2 18 | print(probe[-2]) 19 | 20 | # Outputs: slice(None, 1, None) 21 | print(probe[:1]) 22 | 23 | # Outputs: slice(1, None, None) 24 | print(probe[1:]) 25 | 26 | # Outputs: slice(1, 2, None) 27 | print(probe[1:2]) 28 | 29 | # Outputs: slice(1, -2, None) 30 | print(probe[1:-2]) 31 | 32 | # Outputs: slice(None, None, 2) 33 | print(probe[::2]) 34 | 35 | # Outputs: slice(1, None, -4) 36 | print(probe[1::-4]) 37 | 38 | # Outputs: slice(1, 2, 3) 39 | print(probe[1:2:3]) 40 | -------------------------------------------------------------------------------- /articles/python-slicing-in-javascript/slice.js: -------------------------------------------------------------------------------- 1 | class Slice { 2 | constructor(start, stop, step) { 3 | // Support the `Slice(stop)` signature. 4 | if (stop === undefined && step === undefined) { 5 | [start, stop] = [stop, start]; 6 | } 7 | 8 | // Support numerical strings. 9 | this.start = start == null ? start : parseInt(start, 10); 10 | this.stop = stop == null ? stop : parseInt(stop, 10); 11 | this.step = step == null ? step : parseInt(step, 10); 12 | } 13 | 14 | indices(array) { 15 | // Handle negative indices while preserving `null` values. 16 | const start = this.start < 0 ? this.start + array.length : this.start; 17 | const stop = this.stop < 0 ? this.stop + array.length : this.stop; 18 | 19 | // Set the default step to `1`. 20 | const step = this.step == null ? 1 : this.step; 21 | if (step === 0) { 22 | throw new Error('slice step cannot be zero'); 23 | } 24 | 25 | // Find the starting index, and construct a check for if an index should be included. 26 | let currentIndex; 27 | let indexIsValid; 28 | if (step > 0) { 29 | currentIndex = start == null ? 0 : Math.max(start, 0); 30 | const maximumPossibleIndex = stop == null ? array.length - 1 : stop - 1; 31 | indexIsValid = (index) => index <= maximumPossibleIndex; 32 | } else { 33 | currentIndex = start == null ? array.length - 1 : Math.min(start, array.length - 1); 34 | const minimumPossibleIndex = stop == null ? 0 : stop + 1; 35 | indexIsValid = (index) => index >= minimumPossibleIndex; 36 | } 37 | 38 | // Loop through and add indices until we've completed the loop. 39 | const indices = []; 40 | while (indexIsValid(currentIndex)) { 41 | if (currentIndex >= 0 && currentIndex < array.length) { 42 | indices.push(currentIndex); 43 | } 44 | currentIndex += step; 45 | } 46 | 47 | return indices; 48 | }; 49 | 50 | apply(array, values) { 51 | return values ? this.set(array, values) : this.get(array); 52 | } 53 | 54 | get(array) { 55 | // We can use the built in `Array.slice()` method for this special case. 56 | if (this.step == null || this.step === 1) { 57 | const start = this.start == null ? undefined : this.start; 58 | const stop = this.stop == null ? undefined : this.stop; 59 | return array.slice(start, stop); 60 | 61 | } 62 | 63 | return this.indices(array) 64 | .map(index => array[index]); 65 | } 66 | 67 | set(array, values) { 68 | // We can insert arrays of any length for unextended slices. 69 | if (this.step == null || this.step === 1) { 70 | const start = this.start < 0 ? this.start + array.length : this.start; 71 | const stop = this.stop < 0 ? this.stop + array.length : this.stop; 72 | const deleteCount = this.stop == null ? array.length : stop - start; 73 | array.splice(start, deleteCount, ...values); 74 | return array; 75 | } 76 | 77 | // Otherwise, the lengths must match and we need to do them one-by-one. 78 | const indices = this.indices(array); 79 | if (indices.length !== values.length) { 80 | throw new Error( 81 | `attempt to assign sequence of size ${values.length} ` + 82 | `to extended slice of size ${indices.length}` 83 | ); 84 | } 85 | this.indices(array) 86 | .forEach((arrayIndex, valuesIndex) => array[arrayIndex] = values[valuesIndex]); 87 | return array; 88 | } 89 | }; 90 | 91 | module.exports = Slice; 92 | -------------------------------------------------------------------------------- /articles/running-selenium-with-headless-chrome-in-ruby/README.md: -------------------------------------------------------------------------------- 1 | # Running Selenium with Headless Chrome in Ruby 2 | 3 | [Running Selenium with Headless Chrome in Ruby](view-source:https://intoli.com/blog/running-selenium-with-headless-chrome-in-ruby/) demonstrates how to use headless Chrome in Ruby with Selenium. 4 | 5 | - [take-screenshot.rb](take-screenshot.rb) - A simple script to launch headless Chrome and save a screenshot. 6 | -------------------------------------------------------------------------------- /articles/running-selenium-with-headless-chrome-in-ruby/take-screenshot.rb: -------------------------------------------------------------------------------- 1 | require "selenium-webdriver" 2 | 3 | 4 | # Configure the driver to run in headless mode. 5 | options = Selenium::WebDriver::Chrome::Options.new 6 | options.add_argument('--headless') 7 | driver = Selenium::WebDriver.for :chrome, options: options 8 | 9 | # Navigate to a really super awesome blog. 10 | driver.navigate.to "https://intoli.com/blog/" 11 | 12 | # Resize the window and take a screenshot. 13 | driver.manage.window.resize_to(800, 800) 14 | driver.save_screenshot "intoli-screenshot.png" 15 | -------------------------------------------------------------------------------- /articles/running-selenium-with-headless-chrome/README.md: -------------------------------------------------------------------------------- 1 | # Running Selenium with Headless Chrome 2 | 3 | [Running Selenium with Headless Chrome](https://intoli.com/blog/running-selenium-with-headless-chrome/) illustrates how to run the Google Chrome browser in its new headless mode using Selenium in Python. 4 | The [scrape-facebook-posts.py](scrape-facebook-posts.py) script launches a headless Chrome session, navigates to Facebook, logins, takes a screenshot, and prints out author and content information from posts. 5 | -------------------------------------------------------------------------------- /articles/running-selenium-with-headless-chrome/scrape-facebook-posts.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | import sys 4 | 5 | from selenium import webdriver 6 | 7 | 8 | # Construct a ChromeOptions instance to configure. 9 | options = webdriver.ChromeOptions() 10 | 11 | # Specify that we'll run in headless mode. 12 | options.add_argument('headless') 13 | 14 | # Set the window size. 15 | options.add_argument('window-size=1200x600') 16 | 17 | # Initialize the driver. 18 | driver = webdriver.Chrome(chrome_options=options) 19 | 20 | # Navigate to Facebook. 21 | driver.get('https://facebook.com') 22 | 23 | # Wait up to 10 seconds for the elements to become available. 24 | driver.implicitly_wait(10) 25 | 26 | # Use CSS selectors to grab the login inputs. 27 | email = driver.find_element_by_css_selector('input[type=email]') 28 | password = driver.find_element_by_css_selector('input[type=password]') 29 | login = driver.find_element_by_css_selector('input[value="Log In"]') 30 | 31 | # Parse the command-line options. 32 | if len(sys.argv) == 3: 33 | email, password = sys.argv[1:] 34 | else: 35 | print('You probably want to specify your email address and password as arguments.') 36 | email, password = 'evan@intoli.com', 'hunter2' 37 | 38 | # Enter our credentials. 39 | email.send_keys(email) 40 | password.send_keys(password) 41 | 42 | # Save a screenshot of the page with our email/password entered. 43 | driver.get_screenshot_as_file('main-page-with-information-entered.png') 44 | 45 | # Login. 46 | login.click() 47 | 48 | # Navigate to Evan's profile. 49 | driver.get('https://www.facebook.com/profile.php?id=100009447446864') 50 | 51 | # Take another screenshot. 52 | driver.get_screenshot_as_file('evans-profile.png') 53 | 54 | # Cycle through the posts and print out the authors and content. 55 | posts = driver.find_elements_by_css_selector('#stream_pagelet .fbUserContent') 56 | for post in posts: 57 | try: 58 | author = post.find_elements_by_css_selector('a[data-hovercard*=user]')[-1].get_attribute('innerHTML') 59 | content = post.find_elements_by_css_selector('div.userContent')[-1].get_attribute('innerHTML') 60 | except IndexError: 61 | # It's an advertisement. 62 | pass 63 | print(f'{author}: "{content}"') 64 | -------------------------------------------------------------------------------- /articles/sandbox-breakout/README.md: -------------------------------------------------------------------------------- 1 | # Breaking Out of the Chrome/WebExtension Sandbox 2 | 3 | [Breaking Out of the Chrome/WebExtension Sandbox](https://intoli.com/blog/sandbox-breakout/) is a guide to breaking out of the content script context of a browser extension so that you can interact with the page context directly. 4 | There are three supplemental materials for the article that are included here: 5 | 6 | - [language-test.html](language-test.html) - A simple test page that populates a header element with the current value of `window.navigator`. 7 | - [extension/manifest.json](extension/manifest.json) - The manifest for the extension that overwrites the `window.navigator` property. 8 | - [extension/sandbox-breakout.js](extension/sandbox-breakout.js]) - The implementation of the code which breaks out of the sandbox and overwrites `window.navigator`. 9 | 10 | A Chrome browser instance can be launched to run the tests with the following command. 11 | 12 | ```bash 13 | google-chrome --load-extension=./extension/ language-test.html 14 | ``` 15 | 16 | If the sandbox breakout works as expected, this should open a webpage that displays the text `xx-XX`. 17 | You can see the [original article](https://intoli.com/blog/sandbox-breakout/) for details about how things work. 18 | 19 | 20 | ## The runInPageContext() Method 21 | 22 | This is defined in [extension/sandbox-breakout.js](extension/sandbox-breakout.js]), but the portion of code that you're most likely interested in is this. 23 | 24 | ```javascript 25 | // Breaks out of the content script context by injecting a specially 26 | // constructed script tag and injecting it into the page. 27 | const runInPageContext = (method, ...args) => { 28 | // The stringified method which will be parsed as a function object. 29 | const stringifiedMethod = method instanceof Function 30 | ? method.toString() 31 | : `() => { ${method} }`; 32 | 33 | // The stringified arguments for the method as JS code that will reconstruct the array. 34 | const stringifiedArgs = JSON.stringify(args); 35 | 36 | // The full content of the script tag. 37 | const scriptContent = ` 38 | // Parse and run the method with its arguments. 39 | (${stringifiedMethod})(...${stringifiedArgs}); 40 | 41 | // Remove the script element to cover our tracks. 42 | document.currentScript.parentElement 43 | .removeChild(document.currentScript); 44 | `; 45 | 46 | // Create a script tag and inject it into the document. 47 | const scriptElement = document.createElement('script'); 48 | scriptElement.innerHTML = scriptContent; 49 | document.documentElement.prepend(scriptElement); 50 | }; 51 | ``` 52 | 53 | This function can be called from an extension's content script context in order to evaluate JavaScript code in the corresponding page context. 54 | The first argument can be either a string containing JavaScript code or a function object. 55 | If it is a function object, then any additional arguments will be passed to the function when it is evaluated. 56 | -------------------------------------------------------------------------------- /articles/sandbox-breakout/extension/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | "name": "Content Script Sandbox Breakout Extension", 4 | "version": "1.0.0", 5 | "applications": { 6 | "gecko": { 7 | "id": "sandbox-breakout@intoli.com" 8 | } 9 | }, 10 | "content_scripts": [ 11 | { 12 | "matches": [""], 13 | "js": ["sandbox-breakout.js"], 14 | "run_at": "document_start" 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /articles/sandbox-breakout/extension/sandbox-breakout.js: -------------------------------------------------------------------------------- 1 | // Overwrite the `navigator.language` property to return a custom value. 2 | const overwriteLanguage = (language) => { 3 | Object.defineProperty(navigator, 'language', { 4 | get: () => language, 5 | }); 6 | }; 7 | 8 | 9 | // Breaks out of the content script context by injecting a specially 10 | // constructed script tag and injecting it into the page. 11 | const runInPageContext = (method, ...args) => { 12 | // The stringified method which will be parsed as a function object. 13 | const stringifiedMethod = method instanceof Function 14 | ? method.toString() 15 | : `() => { ${method} }`; 16 | 17 | // The stringified arguments for the method as JS code that will reconstruct the array. 18 | const stringifiedArgs = JSON.stringify(args); 19 | 20 | // The full content of the script tag. 21 | const scriptContent = ` 22 | // Parse and run the method with its arguments. 23 | (${stringifiedMethod})(...${stringifiedArgs}); 24 | 25 | // Remove the script element to cover our tracks. 26 | document.currentScript.parentElement 27 | .removeChild(document.currentScript); 28 | `; 29 | 30 | // Create a script tag and inject it into the document. 31 | const scriptElement = document.createElement('script'); 32 | scriptElement.innerHTML = scriptContent; 33 | document.documentElement.prepend(scriptElement); 34 | }; 35 | 36 | 37 | // This won't work, it's sandboxed from the page context. 38 | overwriteLanguage('xx-XX'); 39 | 40 | // This will work, it breaks out of the sandbox. 41 | runInPageContext(overwriteLanguage, 'xx-XX'); 42 | -------------------------------------------------------------------------------- /articles/sandbox-breakout/language-test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Please Wait...

4 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /articles/scrape-infinite-scroll/README.md: -------------------------------------------------------------------------------- 1 | # Using Puppeteer to Scrape Websites with Infinite Scrolling 2 | 3 | This directory is centered around [scrape-infinite-scroll.js](scrape-infinite-scroll.js), which uses Puppeteer to scrape infinite scroll items from a [demo page](https://intoli.com/blog/scrape-infinite-scroll/demo.html) set up for it. 4 | The script's implementation details are described in the [Using Puppeteer to Scrape Websites with Infinite Scrolling](https://intoli.com/blog/scrape-infinite-scroll/) article published on the Intoli blog. 5 | Customizing the script should be straightfoward after reading this article. 6 | 7 | To run the script, you need to have [Node.js](https://nodejs.org/en/) installed, which you can do using [nvm](https://github.com/creationix/nvm). 8 | With that out of the way, download the contents of this directory to disk. 9 | 10 | ```bash 11 | git clone https://github.com/Intoli/intoli-article-materials.git 12 | cd intoli-article-materials/articles/scrape-infinite-scroll 13 | ``` 14 | 15 | And then install Puppeteer with 16 | 17 | ```bash 18 | npm install 19 | ``` 20 | 21 | Finally, run the script with 22 | 23 | ```bash 24 | node scrape-infinite-scroll.js 25 | ``` 26 | -------------------------------------------------------------------------------- /articles/scrape-infinite-scroll/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scrape-infinite-scroll", 3 | "version": "1.0.0", 4 | "description": "A script which uses Puppeteer to scrape pages with infinite scroll.", 5 | "main": "scrape-infinite-scroll.js", 6 | "repository": { 7 | "type": "git", 8 | "url": "https://github.com/Intoli/intoli-article-materials.git" 9 | }, 10 | "keywords": [ 11 | "scraping", 12 | "javascript", 13 | "puppeteer", 14 | "headless", 15 | "chrome" 16 | ], 17 | "author": "Andre Perunicic / Intoli, LLC", 18 | "license": "BSD-2-Clause", 19 | "bugs": { 20 | "url": "https://github.com/Intoli/intoli-article-materials/issues" 21 | }, 22 | "homepage": "https://intoli.com/blog/scrape-infinite-scroll/", 23 | "dependencies": { 24 | "puppeteer": "^1.0.0" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /articles/scrape-infinite-scroll/scrape-infinite-scroll.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const puppeteer = require('puppeteer'); 3 | 4 | /** 5 | * This function is injected into the page and used to scrape items from it. 6 | */ 7 | function extractItems() { 8 | const extractedElements = document.querySelectorAll('#boxes > div.box'); 9 | const items = []; 10 | for (let element of extractedElements) { 11 | items.push(element.innerText); 12 | } 13 | return items; 14 | } 15 | 16 | /** 17 | * Scrolls and extracts content from a page. 18 | * @param {object} page - A loaded Puppeteer Page instance. 19 | * @param {function} extractItems - Item extraction function that is injected into the page. 20 | * @param {number} itemTargetConut - The target number of items to extract before stopping. 21 | * @param {number} scrollDelay - The time (in milliseconds) to wait between scrolls. 22 | */ 23 | async function scrapeInfiniteScrollItems(page, extractItems, itemTargetCount, scrollDelay = 1000) { 24 | let items = []; 25 | try { 26 | let previousHeight; 27 | while (items.length < itemTargetCount) { 28 | items = await page.evaluate(extractItems); 29 | previousHeight = await page.evaluate('document.body.scrollHeight'); 30 | await page.evaluate('window.scrollTo(0, document.body.scrollHeight)'); 31 | await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`); 32 | await page.waitFor(scrollDelay); 33 | } 34 | } catch(e) { } 35 | return items; 36 | } 37 | 38 | (async () => { 39 | // Set up browser and page. 40 | const browser = await puppeteer.launch({ 41 | headless: false, 42 | args: ['--no-sandbox', '--disable-setuid-sandbox'], 43 | }); 44 | const page = await browser.newPage(); 45 | page.setViewport({ width: 1280, height: 926 }); 46 | 47 | // Navigate to the demo page. 48 | await page.goto('https://intoli.com/blog/scrape-infinite-scroll/demo.html'); 49 | 50 | // Scroll and extract items from the page. 51 | const items = await scrapeInfiniteScrollItems(page, extractItems, 100); 52 | 53 | // Save extracted items to a file. 54 | fs.writeFileSync('./items.txt', items.join('\n') + '\n'); 55 | 56 | // Close the browser. 57 | await browser.close(); 58 | })(); 59 | -------------------------------------------------------------------------------- /articles/steam-scraper/README.md: -------------------------------------------------------------------------------- 1 | # Scraping User-Submitted Reviews from the Steam Store 2 | 3 | [Scraping User-Submitted Reviews from the Steam Store](https://intoli.com/blog/steam-scraper/) is a Scrapy tutorial that we produced in conjunction with our friends over at [Scraping Hub](https://scrapinghub.com/). 4 | It explores building an advanced Scrapy spider that involves bypassing access checkpoints on the [Steam's website](steampowered.com) in order to download user reviews. 5 | The full code of the scrapers can be found in the [steam-scraper](https://github.com/prncc/steam-scraper) repository. 6 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | "env", 4 | "react", 5 | "stage-2" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "babel-eslint", 3 | "extends": "airbnb", 4 | "env": { 5 | "browser": true, 6 | "es6": true, 7 | "node": true 8 | }, 9 | "settings": { 10 | "import/resolver": { 11 | "webpack": { 12 | "config": "./webpack.config.js" 13 | } 14 | } 15 | }, 16 | "rules": { 17 | "class-methods-use-this": "off", 18 | "function-paren-newline": "off", 19 | "object-curly-newline": ["error", { 20 | "consistent": true, 21 | "minProperties": 5 22 | }] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/README.md: -------------------------------------------------------------------------------- 1 | # Using Webpack to Render Markdown in React Apps 2 | 3 | [Using Webpack to Render Markdown in React Apps](https://intoli.com/blog/webpack-markdown-setup) is a short article describing the concrete steps you need to take in order to automatically render Markdown documents through Webpack. 4 | This folder contains a working example of the configuration described in that tutorial. 5 | The code is organized as follows: 6 | 7 | - [webpack.config.js](webpack.config.js) - Contains the loader setup which makes Markdown rendering and code highlighting possible. 8 | - [src/](src/) - Contains a wep app built by the above Webpack config. 9 | It's entry point, [src/index.jsx](src/index.jsx), shows how to load a React component that accepts imported Markdown content. 10 | - [src/article.md](src/article.md) - The Markdown file that's renderd by this app is a listing of various Markdown features, and serves to show off how a wide range of elements get rendered. 11 | 12 | 13 | ## Running This Example 14 | 15 | First, clone this repository and navigate to this article's directory: 16 | 17 | ```bash 18 | git clone https://github.com/intoli/intoli-article-materials.git 19 | cd intoli-article-materials/articles/webpack-markdown-setup 20 | ``` 21 | 22 | Then, install the project's dependencies via Yarn 23 | 24 | ```bash 25 | yarn install 26 | ``` 27 | 28 | With the basic setup out of the way, you can start the app. 29 | The default script is run via 30 | 31 | ```bash 32 | yarn start 33 | ``` 34 | 35 | and its starts a hot reloading server that will re-render the app on any chanages in real time. 36 | You can view the app at `http://localhost:3000` (customizable in the Webpack config). 37 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "scripts": { 3 | "lint": "eslint --ignore-path .gitignore --ext js,jsx src", 4 | "start": "NODE_ENV=development webpack-dev-server --config webpack.config.js --hot" 5 | }, 6 | "devDependencies": { 7 | "babel-core": "^6.26.0", 8 | "babel-eslint": "^8.2.2", 9 | "babel-loader": "^7.1.3", 10 | "babel-preset-env": "^1.6.1", 11 | "babel-preset-react": "^6.24.1", 12 | "babel-preset-stage-2": "^6.24.1", 13 | "css-loader": "^0.28.11", 14 | "eslint": "^4.18.2", 15 | "eslint-config-airbnb": "^16.1.0", 16 | "eslint-loader": "^2.0.0", 17 | "eslint-plugin-import": "^2.9.0", 18 | "eslint-plugin-jsx-a11y": "^6.0.3", 19 | "eslint-plugin-react": "^7.7.0", 20 | "highlight.js": "^9.12.0", 21 | "html-loader": "^0.5.5", 22 | "html-webpack-plugin": "^3.0.4", 23 | "markdown-loader": "^2.0.2", 24 | "style-loader": "^0.21.0", 25 | "webpack": "^3.10.0", 26 | "webpack-cli": "^2.0.10", 27 | "webpack-dev-server": "^2.11.2" 28 | }, 29 | "dependencies": { 30 | "babel-polyfill": "^6.26.0", 31 | "prop-types": "^15.6.1", 32 | "react": "^16.3.2", 33 | "react-dom": "^16.3.2" 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/src/components/Markdown/gruvbox-dark.css: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Gruvbox style (dark) (c) Pavel Pertsev (original style at https://github.com/morhetz/gruvbox) 4 | 5 | */ 6 | 7 | .hljs { 8 | display: block; 9 | overflow-x: auto; 10 | padding: 0.5em; 11 | background: #282828; 12 | } 13 | 14 | .hljs, 15 | .hljs-subst { 16 | color: #ebdbb2; 17 | } 18 | 19 | /* Gruvbox Red */ 20 | .hljs-deletion, 21 | .hljs-formula, 22 | .hljs-keyword, 23 | .hljs-link, 24 | .hljs-selector-tag { 25 | color: #fb4934; 26 | } 27 | 28 | /* Gruvbox Blue */ 29 | .hljs-built_in, 30 | .hljs-emphasis, 31 | .hljs-name, 32 | .hljs-quote, 33 | .hljs-strong, 34 | .hljs-title, 35 | .hljs-variable { 36 | color: #83a598; 37 | } 38 | 39 | /* Gruvbox Yellow */ 40 | .hljs-attr, 41 | .hljs-params, 42 | .hljs-template-tag, 43 | .hljs-type { 44 | color: #fabd2f; 45 | } 46 | 47 | /* Gruvbox Purple */ 48 | .hljs-builtin-name, 49 | .hljs-doctag, 50 | .hljs-literal, 51 | .hljs-number { 52 | color: #8f3f71; 53 | } 54 | 55 | /* Gruvbox Orange */ 56 | .hljs-code, 57 | .hljs-meta, 58 | .hljs-regexp, 59 | .hljs-selector-id, 60 | .hljs-template-variable { 61 | color: #fe8019; 62 | } 63 | 64 | /* Gruvbox Green */ 65 | .hljs-addition, 66 | .hljs-meta-string, 67 | .hljs-section, 68 | .hljs-selector-attr, 69 | .hljs-selector-class, 70 | .hljs-string, 71 | .hljs-symbol { 72 | color: #b8bb26; 73 | } 74 | 75 | /* Gruvbox Aqua */ 76 | .hljs-attribute, 77 | .hljs-bullet, 78 | .hljs-class, 79 | .hljs-function, 80 | .hljs-function .hljs-keyword, 81 | .hljs-meta-keyword, 82 | .hljs-selector-pseudo, 83 | .hljs-tag { 84 | color: #8ec07c; 85 | } 86 | 87 | /* Gruvbox Gray */ 88 | .hljs-comment { 89 | color: #928374; 90 | } 91 | 92 | /* Gruvbox Purple */ 93 | .hljs-link_label, 94 | .hljs-literal, 95 | .hljs-number { 96 | color: #d3869b; 97 | } 98 | 99 | .hljs-comment, 100 | .hljs-emphasis { 101 | font-style: italic; 102 | } 103 | 104 | .hljs-section, 105 | .hljs-strong, 106 | .hljs-tag { 107 | font-weight: bold; 108 | } 109 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/src/components/Markdown/index.jsx: -------------------------------------------------------------------------------- 1 | import PropTypes from 'prop-types'; 2 | import React from 'react'; 3 | 4 | // Set the rendered code theme. 5 | import './gruvbox-dark.css'; 6 | // Customize the way markdown is rendered. 7 | import './markdown.css'; 8 | 9 | 10 | const wrapMarkup = html => ({ 11 | __html: html, 12 | }); 13 | 14 | 15 | const Markdown = ({ content }) => ( 16 | // eslint-disable-next-line react/no-danger 17 |
18 | ); 19 | 20 | Markdown.propTypes = { 21 | content: PropTypes.string.isRequired, 22 | }; 23 | 24 | 25 | export default Markdown; 26 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/src/components/Markdown/markdown.css: -------------------------------------------------------------------------------- 1 | .markdown { 2 | margin: auto; 3 | width: 800px; 4 | } 5 | 6 | .markdown table { 7 | margin-bottom: 13px; 8 | } 9 | 10 | .markdown table td, 11 | .markdown table th { 12 | padding: 6px 13px; 13 | text-align: left; 14 | } 15 | 16 | .markdown table th { 17 | background: #d0dae5; 18 | } 19 | 20 | .markdown table tr { 21 | border-top: 1px solid #dfe2e5; 22 | } 23 | 24 | .markdown table tr:nth-child(even) { 25 | background: #f6f8fa; 26 | } 27 | 28 | .markdown pre { 29 | margin-bottom: -7px; 30 | } 31 | 32 | .markdown pre code { 33 | padding: 0; 34 | } 35 | 36 | .markdown code { 37 | background: #ededed; 38 | font-size: 13px; 39 | padding: 1px 2px; 40 | } 41 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Markdown Webpack Setup Demo 7 | 8 | 9 | 10 |
11 | 12 | 13 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/src/index.jsx: -------------------------------------------------------------------------------- 1 | import 'babel-polyfill'; 2 | import React from 'react'; 3 | import ReactDOM from 'react-dom'; 4 | 5 | import Markdown from './components/Markdown'; 6 | import content from './article.md'; 7 | 8 | 9 | ReactDOM.render( 10 | , 11 | document.getElementById('app'), 12 | ); 13 | -------------------------------------------------------------------------------- /articles/webpack-markdown-setup/webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | 3 | const highlight = require('highlight.js'); 4 | const HtmlWebpackPlugin = require('html-webpack-plugin'); 5 | 6 | 7 | const isProduction = process.env.NODE_ENV === 'production'; 8 | 9 | const config = { 10 | devServer: { 11 | clientLogLevel: 'info', 12 | contentBase: './frontend', 13 | historyApiFallback: true, 14 | overlay: { 15 | errors: true, 16 | warnings: false, 17 | }, 18 | port: 3000, 19 | publicPath: '/', 20 | stats: { 21 | modules: false, 22 | chunks: false, 23 | }, 24 | }, 25 | devtool: 'cheap-module-source-map', 26 | entry: path.resolve(__dirname, 'src', 'index.jsx'), 27 | module: { 28 | rules: [ 29 | { 30 | test: /\.(js|jsx)$/, 31 | exclude: /node_modules/, 32 | enforce: 'pre', 33 | loader: 'eslint-loader', 34 | }, 35 | { 36 | test: /\.(js|jsx)$/, 37 | exclude: /node_modules/, 38 | loader: 'babel-loader', 39 | }, 40 | { 41 | test: /\.css$/, 42 | exclude: /node_modules/, 43 | use: [ 44 | { 45 | loader: 'style-loader', 46 | options: { 47 | sourceMap: !isProduction, 48 | }, 49 | }, 50 | { 51 | loader: 'css-loader', 52 | options: { 53 | importLoaders: 0, 54 | sourceMap: !isProduction, 55 | }, 56 | }, 57 | ], 58 | }, 59 | { 60 | test: /\.(md)$/, 61 | use: [ 62 | 'html-loader', 63 | { 64 | loader: 'markdown-loader', 65 | options: { 66 | highlight: (code, lang) => { 67 | if (!lang || ['text', 'literal', 'nohighlight'].includes(lang)) { 68 | return `
${code}
`; 69 | } 70 | const html = highlight.highlight(lang, code).value; 71 | return `${html}`; 72 | }, 73 | }, 74 | }, 75 | ], 76 | }, 77 | ], 78 | }, 79 | output: { 80 | filename: 'bundle.js', 81 | path: path.resolve(__dirname, 'build'), 82 | publicPath: '/', 83 | }, 84 | plugins: [ 85 | new HtmlWebpackPlugin({ 86 | inject: true, 87 | template: './src/index.html', 88 | }), 89 | ], 90 | resolve: { 91 | extensions: [ 92 | '.js', 93 | '.jsx', 94 | ], 95 | }, 96 | watchOptions: { 97 | ignored: /build/, 98 | }, 99 | }; 100 | 101 | 102 | module.exports = config; 103 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/.gitignore: -------------------------------------------------------------------------------- 1 | exodus/ 2 | node_modules/ 3 | .tern-port 4 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/README.md: -------------------------------------------------------------------------------- 1 | # Building a YouTube MP3 Downloader with Exodus, FFmpeg, and AWS Lambda 2 | 3 | [Running FFmpeg on AWS Lambda for 1.9% the cost of AWS Elastic Transcoder](https://intoli.com/blog/transcoding-on-aws-lambda) and [Building a YouTube MP3 Downloader with Exodus, FFmpeg, and AWS Lambda](https://intoli.com/blog/youtube-mp3-downloader) form a two part tutorial for building a practical bookmarklet that uses [AWS Lambda](https://aws.amazon.com/lambda/) to convert YouTube videos to MP3 files and then downloads them. 4 | The project consists of two Lambda functions: 5 | 6 | - `YoutubeMP3TranscoderFunction` - Defined in [transcoder.js](transcoder.js), this function first downloads a configurable media file, converts it to an MP3 using a bundled version of [FFmpeg](https://www.ffmpeg.org), and then uploads the MP3 to an S3 bucket. 7 | The behavior of the Lambda function can be controlled, by specifying the following keys in the invocation event. 8 | - `filename` - The filename to use in the MP3 file's [Content-Disposition header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition) when a user downloads it. 9 | This determines the filename that will be suggested to the user when they save it to their computer. 10 | - `logKey` - An S3 key where the output of FFmpeg will be placed for logging purposes. 11 | - `mp3Key` - An S3 key where the converted MP3 file will be placed. 12 | - `s3Bucket` - The S3 bucket where the log and MP3 files will be placed. 13 | - `url` - The URL where the input audio/video file can be downloaded from. 14 | - `YoutubeMP3DownloaderFunction` - Defined in [app.js](app.js) and [lambda.js](lambda.js), this function is designed to integrate with [API Gateway](https://aws.amazon.com/api-gateway/) using [aws-serverless-express](https://github.com/awslabs/aws-serverless-express). 15 | This function will serve up the YouTube MP3 Downloader's HTML download page as well as provide the internal API methods that it uses behind the scenes. 16 | 17 | Note that you'll need to perform the deployment steps on Linux because we'll be bundling a locally installed version of FFmpeg. 18 | If you use either Windows or macOS, then you'll need to work inside of a virtual machine running Linux. 19 | You could alternatively spin up an EC2 instance, and work on the remote machine. 20 | 21 | Before deploying the Lambda function, it will be necessary to install several dependencies. 22 | You'll first need to make sure that `aws-cli`, `jq`, `git`, `node`, `npm`, `yarn`, `python`, `pip`, and `ffmpeg` are all available. 23 | All of these should be available in your system package manager, and you already likely have most of them installed. 24 | 25 | After that, you'll need to install [Exodus](https://github.com/intoli/exodus). 26 | This can be done by running the following. 27 | 28 | ```bash 29 | # Install the `exodus_bundler` package. 30 | pip install --user exodus_bundler 31 | 32 | # Make sure that `exodus` is in your `PATH`. 33 | export PATH="${HOME}/.local/bin/:${PATH}" 34 | ``` 35 | 36 | You might also want to add the `export PATH="${HOME}/.local/bin/:${PATH}"` line to your `~/.bashrc` file, so that the `exodus` command will be in your path in the future. 37 | 38 | Next, you'll need to clone the [intoli-article-materials repository](https://github.com/intoli/intoli-article-materials), move into this article's directory, and install the Node dependencies. 39 | 40 | ```bash 41 | # Clone the repository and move into the directory. 42 | git clone https://github.com/intoli/intoli-article-materials.git 43 | cd intoli-article-materials/articles/youtube-mp3-downloader/ 44 | 45 | # Install the node dependencies from `package.json` and `yarn.lock`. 46 | yarn install 47 | ``` 48 | 49 | After the Node dependencies finish installing, you must create a local Exodus bundle for FFmpeg. 50 | The following command will create a local directory called `exodus` that includes FFmpeg as well as all of its dependencies. 51 | 52 | ```bash 53 | # Create an `ffmpeg` bundle and extract it in the current directory. 54 | exodus --tarball ffmpeg | tar -zx 55 | ``` 56 | 57 | At this stage, you're very close to being ready to deploy everything. 58 | The last thing that you need to do is to customize the names of the S3 bucket, the Lambda functions, and other AWS resources which must have unique names. 59 | These are defined at the top of the [deploy-everything.sh](deploy-everything.sh) and [app.js](app.js) files. 60 | After setting these to uniques values, you can simply run 61 | 62 | ```bash 63 | ./deploy-everything.sh 64 | ``` 65 | 66 | to deploy all of the AWS resources. 67 | This will echo out a lot of information about the AWS resources being created, and then at the end you should see something like this. 68 | 69 | ``` 70 | Now just create a bookmarklet with the following contents! 71 | javascript:window.open(`https://osacfvxuq7.execute-api.us-east-2.amazonaws.com/v1/${window.location.href}`); 72 | ``` 73 | 74 | Then just create the bookmarklet, navigate to a video on YouTube, and click the bookmarklet to try it out! 75 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/app.js: -------------------------------------------------------------------------------- 1 | const AWS = require('aws-sdk'); 2 | const express = require('express'); 3 | const nunjucks = require('nunjucks'); 4 | const ytdl = require('ytdl-core'); 5 | 6 | const apiStage = 'v1'; 7 | const transcoderFunctionName = 'YoutubeMp3TranscoderFunction'; 8 | const lambda = new AWS.Lambda({ region: 'us-east-2' }); 9 | const s3 = new AWS.S3({ signatureVersion: 'v4' }); 10 | // You'll need to change this to point towards your own bucket. 11 | const s3Bucket = 'youtube-mp3-downloader'; 12 | 13 | const app = express(); 14 | nunjucks.configure('.', { express: app }); 15 | const router = express.Router(); 16 | 17 | 18 | router.get('/transcode/:videoId', (req, res) => { 19 | const timestamp = Date.now().toString(); 20 | const { videoId } = req.params; 21 | const videoUrl = `https://www.youtube.com/watch?v=${videoId}`; 22 | 23 | // Get information on the available video file formats. 24 | Promise.resolve().then(() => new Promise((resolve, revoke) => { 25 | ytdl.getInfo(videoUrl, (error, info) => error ? revoke(error) : resolve(info)) 26 | })) 27 | // Choose the best format and construct the Lambda event. 28 | .then(({ formats, title }) => { 29 | // We'll just pick the largest audio source file size for simplicity here, 30 | // you could prioritize things based on bitrate, file format, etc. if you wanted to. 31 | const format = formats 32 | .filter(format => format.audioEncoding != null) 33 | .filter(format => format.clen != null) 34 | .sort((a, b) => parseInt(b.clen, 10) - parseInt(a.clen, 10))[0]; 35 | 36 | return { 37 | filename: `${title}.mp3`, 38 | logKey: `log/${timestamp} - ${title}.log`, 39 | mp3Key: `mp3/${timestamp} - ${title}.mp3`, 40 | s3Bucket, 41 | url: format.url, 42 | }; 43 | }) 44 | // Trigger the actual conversion in the other Lambda function. 45 | .then(lambdaEvent => new Promise((resolve, revoke) => { 46 | lambda.invoke({ 47 | FunctionName: transcoderFunctionName, 48 | InvocationType: 'Event', 49 | Payload: JSON.stringify(lambdaEvent), 50 | }, (error, data) => error ? revoke(error) : resolve(lambdaEvent)); 51 | })) 52 | // Send a response 53 | .then(({ logKey, mp3Key }) => { 54 | res.status(200).send(JSON.stringify({ logKey, mp3Key })); 55 | }) 56 | // Handle errors. 57 | .catch((error) => { 58 | return res.status(500).send(`Something went wrong: ${error.message}`); 59 | }); 60 | }); 61 | 62 | 63 | router.get('/signed-url/:logKey/:mp3Key', (req, res) => { 64 | const logKey = decodeURIComponent(req.params.logKey); 65 | const mp3Key = decodeURIComponent(req.params.mp3Key); 66 | s3.headObject({ 67 | Bucket: s3Bucket, 68 | Key: logKey, 69 | }, (error) => { 70 | if (error && error.code === 'NotFound') { 71 | res.status(200).send(JSON.stringify({ url: null })); 72 | } else { 73 | s3.getSignedUrl('getObject', { 74 | Bucket: s3Bucket, 75 | Expires: 3600, 76 | Key: mp3Key, 77 | }, (error, url) => { 78 | res.status(200).send(JSON.stringify({ url })); 79 | }); 80 | } 81 | }); 82 | }); 83 | 84 | 85 | router.get('/*', (req, res) => { 86 | // Handle extracting the path from the original URL. 87 | const originalUrl = module.parent ? req.originalUrl.slice(1) : 88 | req.originalUrl.slice(`/${apiStage}/`.length); 89 | const path = decodeURIComponent(originalUrl); 90 | 91 | // Handle full youtube URLs or just the video ID. 92 | const urlPrefixes = ['https://', 'http://', 'www.youtube.com', 'youtube.com']; 93 | let videoId, videoUrl; 94 | if (urlPrefixes.some(prefix => path.startsWith(prefix))) { 95 | videoUrl = path; 96 | videoId = videoUrl.match(/v=([^&]*)/)[1]; 97 | } else { 98 | videoId = path; 99 | videoUrl = `https://www.youtube.com/watch?v=${videoId}`; 100 | } 101 | 102 | // Render the download page template. 103 | res.render('download.html', { apiStage, videoId, videoUrl }); 104 | }); 105 | 106 | // Run the app when the file is being run as a script. 107 | if (!module.parent) { 108 | app.use(`/${apiStage}/`, router); 109 | app.listen(3000, () => console.log('Listening on port 3000!')) 110 | } else { 111 | app.use('/', router); 112 | } 113 | 114 | // Export the app for use with lambda. 115 | module.exports = app; 116 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/deploy-everything.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # You'll need to configure these settings, they must be unique. 4 | # Be sure to also change the bucket name in `app.js`! 5 | export bucket_name="youtube-mp3-downloader" 6 | 7 | export role_name="YoutubeMp3DownloaderRole" 8 | export policy_name="YoutubeMp3DownloaderPolicy" 9 | 10 | export transcoder_function_name="YoutubeMp3TranscoderFunction" 11 | export downloader_function_name="YoutubeMp3DownloaderFunction" 12 | export downloader_api_name="YoutubeDownloaderApi" 13 | 14 | 15 | # Make a new S3 bucket. 16 | aws s3 mb "s3://${bucket_name}" 17 | 18 | 19 | # Create a new role. 20 | read -r -d '' role_policy_document <<'EOF' 21 | { 22 | "Version": "2012-10-17", 23 | "Statement": [ 24 | { 25 | "Effect": "Allow", 26 | "Principal": { 27 | "Service": [ 28 | "apigateway.amazonaws.com", 29 | "lambda.amazonaws.com" 30 | ] 31 | }, 32 | "Action": "sts:AssumeRole" 33 | } 34 | ] 35 | } 36 | EOF 37 | response="$(aws iam create-role \ 38 | --role-name "${role_name}" \ 39 | --assume-role-policy-document "${role_policy_document}")" 40 | echo "${response}" 41 | role_arn="$(jq -r .Role.Arn <<< "${response}")" 42 | 43 | 44 | # Assign a role to the policy. 45 | read -r -d '' policy_document < 2 | 3 | 4 | 5 | 6 | Youtube MP3 Downloader 7 | 9 | 17 | 18 | 19 |
20 | 21 |
22 |
23 | 30 |
31 |
32 | 0% 33 |
34 |
35 |
36 |
37 |
38 | 39 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/lambda.js: -------------------------------------------------------------------------------- 1 | const awsServerlessExpress = require('aws-serverless-express'); 2 | const app = require('./app'); 3 | const server = awsServerlessExpress.createServer(app); 4 | 5 | exports.handler = (event, context) => ( 6 | awsServerlessExpress.proxy(server, event, context) 7 | ); 8 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "aws-sdk": "^2.222.1", 4 | "aws-serverless-express": "^3.1.3", 5 | "express": "^4.16.3", 6 | "nunjucks": "^3.1.2", 7 | "request": "^2.85.0", 8 | "tempy": "^0.2.1", 9 | "ytdl": "^0.10.1", 10 | "ytdl-core": "^0.20.2" 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /articles/youtube-mp3-downloader/transcoder.js: -------------------------------------------------------------------------------- 1 | const child_process = require('child_process'); 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | 5 | const AWS = require('aws-sdk'); 6 | const request = require('request'); 7 | const tempy = require('tempy'); 8 | 9 | const s3 = new AWS.S3(); 10 | 11 | 12 | exports.handler = (event, context, callback) => { 13 | // We're going to do the transcoding asynchronously, so we callback immediately. 14 | callback(); 15 | 16 | // Extract the event parameters. 17 | const { mp3Key, url } = event; 18 | const filename = event.filename || path.basename(mp3Key); 19 | const logKey = event.logKey || `${mp3Key}.log`; 20 | const s3Bucket = event.s3Bucket || 'youtube-mp3-downloader'; 21 | 22 | // Create temporary input/output filenames that we can clean up afterwards. 23 | const inputFilename = tempy.file(); 24 | const mp3Filename = tempy.file({ extension: 'mp3' }); 25 | 26 | // Download the source file. 27 | Promise.resolve().then(() => new Promise((resolve, revoke) => { 28 | const writeStream = fs.createWriteStream(inputFilename); 29 | writeStream.on('finish', resolve); 30 | writeStream.on('error', revoke); 31 | request(url).pipe(writeStream); 32 | })) 33 | // Perform the actual transcoding. 34 | .then(() => { 35 | // Use the Exodus ffmpeg bundled executable. 36 | const ffmpeg = path.resolve(__dirname, 'exodus', 'bin', 'ffmpeg'); 37 | 38 | // Convert the FLV file to an MP3 file using ffmpeg. 39 | const ffmpegArgs = [ 40 | '-i', inputFilename, 41 | '-vn', // Disable the video stream in the output. 42 | '-acodec', 'libmp3lame', // Use Lame for the mp3 encoding. 43 | '-ac', '2', // Set 2 audio channels. 44 | '-q:a', '6', // Set the quality to be roughly 128 kb/s. 45 | mp3Filename, 46 | ]; 47 | const process = child_process.spawnSync(ffmpeg, ffmpegArgs); 48 | return process.stdout.toString() + process.stderr.toString(); 49 | }) 50 | // Upload the generated MP3 to S3. 51 | .then(logContent => new Promise((resolve, revoke) => { 52 | s3.putObject({ 53 | Body: fs.createReadStream(mp3Filename), 54 | Bucket: s3Bucket, 55 | Key: mp3Key, 56 | ContentDisposition: `attachment; filename="${filename.replace('"', '\'')}"`, 57 | ContentType: 'audio/mpeg', 58 | }, (error) => { 59 | if (error) { 60 | revoke(error); 61 | } else { 62 | // Update a log of the FFmpeg output. 63 | const logFilename = path.basename(logKey); 64 | s3.putObject({ 65 | Body: logContent, 66 | Bucket: s3Bucket, 67 | ContentType: 'text/plain', 68 | ContentDisposition: `inline; filename="${logFilename.replace('"', '\'')}"`, 69 | Key: logKey, 70 | }, resolve); 71 | } 72 | }) 73 | })) 74 | .catch(console.error) 75 | // Delete the temporary files. 76 | .then(() => { 77 | [inputFilename, mp3Filename].forEach((filename) => { 78 | if (fs.existsSync(filename)) { 79 | fs.unlinkSync(filename); 80 | } 81 | }); 82 | }); 83 | }; 84 | --------------------------------------------------------------------------------