';
5 | });
6 |
--------------------------------------------------------------------------------
/articles/chrome-extensions-with-selenium/extension/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 2,
3 | "name": "Chrome Extensions With Selenium",
4 | "version": "1.0.0",
5 | "content_scripts": [
6 | {
7 | "matches": ["*://*/*"],
8 | "js": ["content.js"],
9 | "run_at": "document_start"
10 | }
11 | ]
12 | }
13 |
--------------------------------------------------------------------------------
/articles/chrome-extensions-with-selenium/launch_chrome.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.common.exceptions import NoSuchElementException
3 |
4 |
5 | # Configure the necessary command-line option.
6 | options = webdriver.ChromeOptions()
7 | # Note that `chrome-extension` is the path to the unpackaged extension.
8 | options.add_argument('--load-extension=chrome-extension')
9 |
10 | # Navigate to any page... well, not just *any* page...
11 | driver = webdriver.Chrome(chrome_options=options)
12 | driver.get('https://intoli.com')
13 |
14 | # Check if the extension worked and log the result.
15 | try:
16 | header = driver.find_element_by_id('successfully-installed')
17 | print('Success! :-)')
18 | except NoSuchElementException:
19 | print('Failure! :-(')
20 | finally:
21 | # Clean up.
22 | driver.quit()
23 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/README.md:
--------------------------------------------------------------------------------
1 | # Extending CircleCI's API with a Custom Microservice on AWS Lambda
2 |
3 | [Extending CircleCI's API with a Custom Microservice on AWS Lambda](https://intoli.com/blog/circleci-artifacts/) is a tutorial that describes the process of setting up a [nodejs](https://nodejs.org/) [express](https://expressjs.com/) app as an API using Amazon's [Lambda service](https://aws.amazon.com/lambda/).
4 | The guide is comprehensive and covers everything from writing the initial express app to deploying it as an API on a custom domain name.
5 | The actual purpose of the service that is developed is to provide a mechanism to access the latest version of a build artifact from [CircleCI](https://circleci.com/).
6 | The finished API is provided free of charge to any open source projects that would like to use it, and it's accessible using the following URL pattern.
7 |
8 | ```
9 | https://circleci.intoli.com/artifacts/github-username/repo-name/path/to/the/artifact
10 | ```
11 |
12 | All of the resources required to deploy your own version of the proxy app are included inside of this directory.
13 | The JavaScript dependencies are included in [package.json](package.json) and [yarn.lock](yarn.lock), and they can be installed by running the following.
14 |
15 | ```bash
16 | yarn install
17 | ```
18 |
19 | The app itself is defined in [app.js](app.js).
20 | You can run this locally on your own machine by invoking it directly with node.
21 |
22 | ```bash
23 | node app.js
24 | ```
25 |
26 | A little bit of glue is required in order to get the script working on Lambda, and this is provided by the [lambda.js](lambda.js) file which exports a Lambda handler that will proxy requests to the app.
27 |
28 | You'll need to package the app before deploying it.
29 | This can be done using the `zip` command.
30 |
31 | ```bash
32 | zip -r circleci-artifacts.zip app.js lambda.js node_modules/ package.json
33 | ```
34 |
35 | The above command will create a `circleci-artifacts.zip` file that contains everything necessary to run the app on Amazon Lambda.
36 |
37 | The [deploy-app.sh](deploy-app.sh) script walks through all of the steps necessary to actually deploy the app.
38 | Note, however, that it isn't really meant to be run directly.
39 | There is a point where you will need to confirm domain ownership before proceeding.
40 | You'll also need to replace `example.com` with your own domain name.
41 |
42 | The last two pieces of supporting materials are [circleci-artifacts-role-policy-document.json](circleci-artifacts-role-policy-document.json) and [circleci-artifacts-policy.json](circleci-artifacts-policy.json).
43 | These are used by the commands in [deploy-app.sh](deploy-app.sh) to specify the AWS role and policy for the service.
44 |
45 | If any of this is confusing, then by sure to check out the original [Extending CircleCI's API with a Custom Microservice on AWS Lambda](https://intoli.com/blog/circleci-artifacts/) article.
46 | This directory is meant to be a supplement to the longer explanations there rather than a replacement.
47 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/app.js:
--------------------------------------------------------------------------------
1 | const https = require('https');
2 |
3 | const express = require('express');
4 | const app = express();
5 |
6 |
7 | app.get('/artifacts/:username/:project/*', (req, res) => {
8 | // Mandatory positional arguments.
9 | const file = req.params[0];
10 | const { project, username } = req.params;
11 |
12 | // Optional query string parameters.
13 | const branch = req.query.branch || 'master';
14 | const build = req.query.build || 'latest';
15 | const filter = req.query.filter || 'successful';
16 | const vcsType = req.query.vcsType || 'github';
17 |
18 | // Construct the request options for hitting CircleCI's API.
19 | const requestOptions = {
20 | hostname: 'circleci.com',
21 | path: `/api/v1.1/project/${vcsType}/${username}/${project}` +
22 | `/${build}/artifacts?branch=${branch}&filter=${filter}`,
23 | port: 443,
24 | method: 'GET',
25 | headers: {
26 | 'Accept': 'application/json',
27 | },
28 | };
29 |
30 | // Make the request.
31 | https.get(requestOptions, response => {
32 | // Accumulate the response body.
33 | let body = '';
34 | response.setEncoding('utf8');
35 | response.on('data', data => body += data);
36 |
37 | // Process the complete response.
38 | response.on('end', () => {
39 | try {
40 | // Loop through and try to find the specified artifact.
41 | const artifacts = JSON.parse(body);
42 | for (let i = 0; i < artifacts.length; i++) {
43 | const artifact = artifacts[i];
44 | if (artifact.path === file) {
45 | // Redirect to the artifact URL if we can find it.
46 | return res.redirect(303, artifact.url);
47 | }
48 | }
49 | // Return a 404 if there are no matching artifacts.
50 | return res.status(404).send('Not found.');
51 | } catch (e) {
52 | console.error(e);
53 | return res.status(500).send(`Something went wrong: ${e.message}`);
54 | }
55 | });
56 | });
57 | });
58 |
59 |
60 | // Run the app when the file is being run as a script.
61 | if (!module.parent) {
62 | app.listen(3000, () => console.log('Listening on port 3000!'))
63 | }
64 |
65 | // Export the app for use with lambda.
66 | module.exports = app;
67 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/circleci-artifacts-policy.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Effect": "Allow",
6 | "Action": [
7 | "apigateway:*"
8 | ],
9 | "Resource": "arn:aws:apigateway:*::/*"
10 | },
11 | {
12 | "Effect": "Allow",
13 | "Action": [
14 | "execute-api:Invoke"
15 | ],
16 | "Resource": "arn:aws:execute-api:*:*:*"
17 | },
18 | {
19 | "Effect": "Allow",
20 | "Action": [
21 | "lambda:*"
22 | ],
23 | "Resource": "*"
24 | }
25 | ]
26 | }
27 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/circleci-artifacts-role-policy-document.json:
--------------------------------------------------------------------------------
1 | {
2 | "Version": "2012-10-17",
3 | "Statement": [
4 | {
5 | "Effect": "Allow",
6 | "Principal": {
7 | "Service": [
8 | "apigateway.amazonaws.com",
9 | "lambda.amazonaws.com"
10 | ]
11 | },
12 | "Action": "sts:AssumeRole"
13 | }
14 | ]
15 | }
16 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/deploy-app.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 |
4 | # Create the initial role.
5 | response="$(aws iam create-role \
6 | --role-name CircleciArtifactsRole \
7 | --assume-role-policy-document file://circleci-artifacts-role-policy-document.json)"
8 | # Echo the response in the terminal.
9 | echo "${response}"
10 | # Store the role ARN for future usage.
11 | role_arn="$(jq -r .Role.Arn <<< "${response}")"
12 |
13 |
14 | # Attach the policy.
15 | aws iam put-role-policy \
16 | --role-name CircleciArtifactsRole \
17 | --policy-name CircleciArtifactsPolicy \
18 | --policy-document file://circleci-artifacts-policy.json
19 |
20 |
21 | # Create the lambda function.
22 | response="$(aws lambda create-function \
23 | --function-name CircleciArtifactsFunction \
24 | --zip-file fileb://circleci-artifacts.zip \
25 | --handler lambda.handler \
26 | --runtime nodejs6.10 \
27 | --role "${role_arn}")"
28 | # Echo the response in the terminal.
29 | echo "${response}"
30 | # Store the function ARN for future usage.
31 | function_arn="$(jq -r .FunctionArn <<< "${response}")"
32 |
33 |
34 | # Create a new API.
35 | response="$(aws apigateway create-rest-api \
36 | --name CircleciArtifactsApi \
37 | --endpoint-configuration types=REGIONAL)"
38 | # Echo the response in the terminal.
39 | echo "${response}"
40 | # Store the API ID for future usage.
41 | api_id="$(jq -r .id <<< "${response}")"
42 |
43 |
44 | # Fetch the API resources.
45 | response="$(aws apigateway get-resources \
46 | --rest-api-id "${api_id}")"
47 | # Echo the response in the terminal.
48 | echo "${response}"
49 | # Store the root resource ID for future usage.
50 | root_resource_id="$(jq -r .items[0].id <<< "${response}")"
51 |
52 |
53 | # Create a new API resource.
54 | response="$(aws apigateway create-resource \
55 | --rest-api-id "${api_id}" \
56 | --parent-id "${root_resource_id}" \
57 | --path-part '{proxy+}')"
58 | # Echo the response in the terminal.
59 | echo "${response}"
60 | # Store the proxy resource ID for future usage.
61 | proxy_resource_id="$(jq -r .id <<< "${response}")"
62 |
63 |
64 | # Allow GET methods on the resource.
65 | aws apigateway put-method \
66 | --rest-api-id "${api_id}" \
67 | --resource-id "${proxy_resource_id}" \
68 | --http-method GET \
69 | --authorization-type NONE
70 |
71 |
72 | # Integrate the endpoint with the Lambda function.
73 | aws apigateway put-integration \
74 | --rest-api-id "${api_id}" \
75 | --resource-id "${proxy_resource_id}" \
76 | --http-method GET \
77 | --integration-http-method POST \
78 | --type AWS_PROXY \
79 | --uri "arn:aws:apigateway:us-east-2:lambda:path/2015-03-31/functions/${function_arn}/invocations" \
80 | --credentials "${role_arn}"
81 |
82 |
83 | # Deploy the API.
84 | aws apigateway create-deployment \
85 | --rest-api-id "${api_id}" \
86 | --stage-name v1
87 |
88 |
89 | # Request a certificate.
90 | response="$(aws acm request-certificate \
91 | --domain-name circleci.example.com \
92 | --validation-method DNS \
93 | --idempotency-token 1111)"
94 | # Echo the response in the terminal.
95 | echo "${response}"
96 | # Store the certificate ID for future usage.
97 | certificate_arn="$(jq -r .CertificateArn <<< "${response}")"
98 |
99 |
100 | echo NOTE: You must actually verify your domain ownership before doing the next steps, exiting...
101 | exit 0
102 |
103 |
104 | # Create an API Gateway domain name.
105 | aws apigateway create-domain-name \
106 | --domain-name circleci.example.com \
107 | --endpoint-configuration types=REGIONAL \
108 | --regional-certificate-arn "${certificate_arn}"
109 |
110 |
111 | # Map the domain to the API.
112 | aws apigateway create-base-path-mapping \
113 | --domain-name circleci.example.com \
114 | --rest-api-id "${api_id}" \
115 | --stage v1
116 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/lambda.js:
--------------------------------------------------------------------------------
1 | const awsServerlessExpress = require('aws-serverless-express');
2 | const app = require('./app');
3 | const server = awsServerlessExpress.createServer(app);
4 |
5 | exports.handler = (event, context) => (
6 | awsServerlessExpress.proxy(server, event, context)
7 | );
8 |
--------------------------------------------------------------------------------
/articles/circleci-artifacts/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "aws-serverless-express": "^3.0.2",
4 | "express": "^4.16.2"
5 | }
6 | }
7 |
--------------------------------------------------------------------------------
/articles/clear-the-chrome-browser-cache/README.md:
--------------------------------------------------------------------------------
1 | # How to Clear the Chrome Browser Cache With Selenium WebDriver/ChromeDriver
2 |
3 | [How to Clear the Chrome Browser Cache With Selenium WebDriver/ChromeDriver](https://intoli.com/blog/clear-the-chrome-browser-cache/) develops a method to clear the Chrome browser cache in Selenium.
4 |
5 | - [clear_chrome_cache.py](clear_chrome_cache.py) - Defines the `clear_cache()` method.
6 |
--------------------------------------------------------------------------------
/articles/clear-the-chrome-browser-cache/clear_chrome_cache.py:
--------------------------------------------------------------------------------
1 | from selenium.webdriver.support.ui import WebDriverWait
2 |
3 |
4 | def get_clear_browsing_button(driver):
5 | """Find the "CLEAR BROWSING BUTTON" on the Chrome settings page."""
6 | return driver.find_element_by_css_selector('* /deep/ #clearBrowsingDataConfirm')
7 |
8 |
9 | def clear_cache(driver, timeout=60):
10 | """Clear the cookies and cache for the ChromeDriver instance."""
11 | # navigate to the settings page
12 | driver.get('chrome://settings/clearBrowserData')
13 |
14 | # wait for the button to appear
15 | wait = WebDriverWait(driver, timeout)
16 | wait.until(get_clear_browsing_button)
17 |
18 | # click the button to clear the cache
19 | get_clear_browsing_button(driver).click()
20 |
21 | # wait for the button to be gone before returning
22 | wait.until_not(get_clear_browsing_button)
23 |
--------------------------------------------------------------------------------
/articles/clear-the-firefox-browser-cache/README.md:
--------------------------------------------------------------------------------
1 | # How to Clear the Firefox Browser Cache With Selenium WebDriver/geckodriver
2 |
3 | [How to Clear the Firefox Browser Cache With Selenium WebDriver/geckodriver](https://intoli.com/blog/clear-the-firefox-browser-cache/) shows how to clear the Firefox site data, including the cache and cookies, with Selenium.
4 | There are two version of the script described in detail in this article.
5 |
6 | 1. [clear_firefox_61_cache.py](clear_firefox_61_cache.py) - Written for Firefox 61 released on June 26, 2018. The script will keep working until Firefox's slow-changing preferences page is modified by Mozilla.
7 | 2. [clear_firefox_57_cache.py](clear_firefox_57_cache.py) - Written for Firefox 57 released on November 14, 2017. The script should work on version of Firefox with a similar preferences page.
8 |
9 | The scripts both work in the same way: they visit `about:preferences#privacy` and automate interactions with the interface there to clear the cache.
10 | To use either script, simply use the `clear_firefox_cache()` utility found in either script.
11 | See the [evaluate-clear-cache.py](evaluate-clear-cache.py) script for a complete usage example (it assumes Firefox 61, at least).
12 |
13 | You'll need to have `geckodriver` installed on your system (on Linux, this is done by installing it from your package manager) as well as Selenium.
14 | If you're using [clear_firefox_61_cache.py](clear_firefox_61_cache.py), make sure that you have Selenium version v3.14.0 or above installed.
15 | You can install it globablly via `pip` with:
16 |
17 | ```bash
18 | pip install --user selenium
19 | ```
20 |
21 | or upgrade your existing version with
22 |
23 | ```bash
24 | pip install --user -U selenium
25 | ```
26 |
27 | Then, run the script with
28 |
29 | ```bash
30 | python evaluate-clear-cache.py
31 | ```
32 |
--------------------------------------------------------------------------------
/articles/clear-the-firefox-browser-cache/clear_firefox_57_cache.py:
--------------------------------------------------------------------------------
1 | from selenium.webdriver.common.alert import Alert
2 | from selenium.webdriver.support import expected_conditions as EC
3 | from selenium.webdriver.support.ui import WebDriverWait
4 |
5 |
6 | def get_clear_cache_button(driver):
7 | return driver.find_element_by_css_selector('#clearCacheButton')
8 |
9 |
10 | def get_clear_site_data_button(driver):
11 | return driver.find_element_by_css_selector('#clearSiteDataButton')
12 |
13 |
14 | def clear_firefox_cache(driver, timeout=10):
15 | driver.get('about:preferences#privacy')
16 | wait = WebDriverWait(driver, timeout)
17 |
18 | # Click the "Clear Now" button under "Cached Web Content"
19 | wait.until(get_clear_cache_button)
20 | get_clear_cache_button(driver).click()
21 |
22 | # Click the "Clear All Data" button under "Site Data" and accept the alert
23 | wait.until(get_clear_site_data_button)
24 | get_clear_site_data_button(driver).click()
25 |
26 | wait.until(EC.alert_is_present())
27 | alert = Alert(driver)
28 | alert.accept()
29 |
--------------------------------------------------------------------------------
/articles/clear-the-firefox-browser-cache/clear_firefox_61_cache.py:
--------------------------------------------------------------------------------
1 | from selenium.webdriver.common.alert import Alert
2 | from selenium.webdriver.support import expected_conditions as EC
3 | from selenium.webdriver.support.ui import WebDriverWait
4 |
5 |
6 | dialog_selector = '#dialogOverlay-0 > groupbox:nth-child(1) > browser:nth-child(2)'
7 |
8 | accept_dialog_script = (
9 | f"const browser = document.querySelector('{dialog_selector}');" +
10 | "browser.contentDocument.documentElement.querySelector('#clearButton').click();"
11 | )
12 |
13 |
14 | def get_clear_site_data_button(driver):
15 | return driver.find_element_by_css_selector('#clearSiteDataButton')
16 |
17 |
18 | def get_clear_site_data_dialog(driver):
19 | return driver.find_element_by_css_selector(dialog_selector)
20 |
21 |
22 | def get_clear_site_data_confirmation_button(driver):
23 | return driver.find_element_by_css_selector('#clearButton')
24 |
25 |
26 | def clear_firefox_cache(driver, timeout=10):
27 | driver.get('about:preferences#privacy')
28 | wait = WebDriverWait(driver, timeout)
29 |
30 | # Click the "Clear Data..." button under "Cookies and Site Data".
31 | wait.until(get_clear_site_data_button)
32 | get_clear_site_data_button(driver).click()
33 |
34 | # Accept the "Clear Data" dialog by clicking on the "Clear" button.
35 | wait.until(get_clear_site_data_dialog)
36 | driver.execute_script(accept_dialog_script)
37 |
38 | # Accept the confirmation alert.
39 | wait.until(EC.alert_is_present())
40 | alert = Alert(driver)
41 | alert.accept()
42 |
--------------------------------------------------------------------------------
/articles/clear-the-firefox-browser-cache/clear_firefox_cache.py:
--------------------------------------------------------------------------------
1 | from selenium.webdriver.common.alert import Alert
2 | from selenium.webdriver.support import expected_conditions as EC
3 | from selenium.webdriver.support.ui import WebDriverWait
4 |
5 |
6 | def get_clear_cache_button(driver):
7 | return driver.find_element_by_css_selector('#clearCacheButton')
8 |
9 |
10 | def get_clear_site_data_button(driver):
11 | return driver.find_element_by_css_selector('#clearSiteDataButton')
12 |
13 |
14 | def clear_firefox_cache(driver, timeout=10):
15 | driver.get('about:preferences#privacy')
16 | wait = WebDriverWait(driver, timeout)
17 |
18 | # Click the "Clear Now" button under "Cached Web Content"
19 | wait.until(get_clear_cache_button)
20 | get_clear_cache_button(driver).click()
21 |
22 | # Click the "Clear All Data" button under "Site Data" and accept the alert
23 | wait.until(get_clear_site_data_button)
24 | get_clear_site_data_button(driver).click()
25 |
26 | wait.until(EC.alert_is_present())
27 | alert = Alert(driver)
28 | alert.accept()
29 |
--------------------------------------------------------------------------------
/articles/clear-the-firefox-browser-cache/evaluate-clear-cache.py:
--------------------------------------------------------------------------------
1 | from time import sleep
2 | from selenium import webdriver
3 | from clear_firefox_61_cache import clear_firefox_cache
4 |
5 | # Visit a website that places data in local storage
6 | driver = webdriver.Firefox()
7 | driver.get('https://overstock.com')
8 |
9 | # Navigate to the preferences page to see that the cache is not empty.
10 | driver.get('about:preferences#privacy')
11 | sleep(5)
12 |
13 | # Clear the cache and hang around to manually confirm that it worked.
14 | clear_firefox_cache(driver)
15 | sleep(5)
16 |
17 | driver.quit()
18 |
--------------------------------------------------------------------------------
/articles/email-spy/README.md:
--------------------------------------------------------------------------------
1 | # Email Spy
2 |
3 | [Email Spy](https://intoli.com/blog/email-spy/) is an open source browser extension that we developed for finding contact emails for various domains as you browse.
4 | This one was large enough to get it's own repository--so there aren't any supplementary materials--but you check out the full source code [here](https://github.com/sangaline/email-spy).
5 |
--------------------------------------------------------------------------------
/articles/fantasy-football-for-hackers/README.md:
--------------------------------------------------------------------------------
1 | # Fantasy Football for Hackers
2 |
3 | [Fantasy Football for Hackers](https://intoli.com/blog/fantasy-football-for-hackers/) walks through the process of scraping Fantasy Football projections, calculating player and team points given custom league rules, and then simulating league dynamics to develop baseline subtracted projections.
4 |
5 |
6 | - [points.py](points.py) - Lays out how to calculate the expected player and team points given projections.
7 | - [scrape-projections.py](scrape-projections.py) - Defines methods for scraping weekly projections from [FantasySharks.com](https://fantasysharks.com).
8 | - [simulation.py](simulation.py) - Develops abstractions for players, teams, and leagues that can be used in simulations to generate baselines for players.
9 |
--------------------------------------------------------------------------------
/articles/fantasy-football-for-hackers/points.py:
--------------------------------------------------------------------------------
1 | player_rules = {
2 | 'pass yds': 0.04, # Pass Yards
3 | 'pass tds': 4, # Pass Touchdowns
4 | 'int': -2, # Interceptions
5 | 'rush yds': 0.1, # Rush Yards
6 | 'rush tds': 6, # Rush Touchdowns
7 | 'rec yds': 0.1, # Reception Yards
8 | 'rec tds': 6, # Reception Touchdowns
9 | 'fum': -2, # Fumbles
10 | '10-19 fgm': 3, # 10-19 Yard Field Goal
11 | '20-29 fgm': 3, # 20-29 Yard Field Goal
12 | '30-39 fgm': 3, # 30-39 Yard Field Goal
13 | '40-49 fgm': 3, # 40-49 Yard Field Goal
14 | '50+ fgm': 5, # 50+ Yard Field Goal
15 | 'xpm': 1, # Extra Point
16 | }
17 |
18 | def calculate_player_points(performance):
19 | points = 0
20 | for rule, value in player_rules.items():
21 | points += float(performance.get(rule, 0))*value
22 | return points
23 |
24 |
25 | team_rules = {
26 | 'scks': 1, # Sacks
27 | 'int': 2, # Interceptions
28 | 'fum': 2, # Fumbles
29 | 'deftd': 6, # Defensive Touchdowns
30 | 'safts': 2, # Safeties
31 | }
32 |
33 | def calculate_team_points(performance):
34 | points = 0
35 | for rule, value in team_rules.items():
36 | points += float(performance[rule])*value
37 |
38 | # special brackets for "Points Against"
39 | points_against = float(performance['pts agn'])
40 | if points_against == 0:
41 | points += 10
42 | elif points_against < 7:
43 | points += 7
44 | elif points_against < 14:
45 | points += 2
46 |
47 | return points
48 |
49 |
50 | def calculate_points(performance):
51 | if performance['position'] == 'D':
52 | return calculate_team_points(performance)
53 | return calculate_player_points(performance)
54 |
--------------------------------------------------------------------------------
/articles/fantasy-football-for-hackers/scrape-projections.py:
--------------------------------------------------------------------------------
1 | import time
2 | import urllib.request
3 |
4 | from bs4 import BeautifulSoup
5 |
6 |
7 | def fetch_projections_page(week, position_id):
8 | assert 1 <= week <= 17, f'Invalid week: {week}'
9 |
10 | base_url = 'https://www.fantasysharks.com/apps/bert/forecasts/projections.php'
11 | url = f'{base_url}?League=-1&Position={position_id}&scoring=1&Segment={595 + week}&uid=4'
12 |
13 | request = urllib.request.Request(url)
14 | request.add_header('User-Agent', 'projection-scraper 0.1')
15 | with urllib.request.urlopen(request) as response:
16 | return response.read()
17 |
18 |
19 | def scrape_projections():
20 | for week in range(1, 17):
21 | position_map = { 'RB': 2, 'WR': 4, 'TE': 5, 'QB': 1, 'D': 6, 'K': 7 }
22 | for position, position_id in position_map.items():
23 | time.sleep(5) # be polite
24 | html = fetch_projections_page(week, position_map[position])
25 | soup = BeautifulSoup(html, 'lxml')
26 |
27 | table = soup.find('table', id='toolData')
28 | header_row = table.find('tr')
29 | column_names = [th.text for th in header_row.find_all('th')]
30 |
31 | for row in table.find_all('tr'):
32 | column_entries = [tr.text for tr in row.find_all('td')]
33 |
34 | # exclude repeated header rows and the "Tier N" rows
35 | if len(column_entries) != len(column_names):
36 | continue
37 |
38 | # extract Fantasy Shark's player id
39 | player_link = row.find('a')
40 | player_id = int(player_link['href'].split('=')[-1].strip())
41 |
42 | # yield a dictionary of this player's weekly projection
43 | player = { 'id': player_id, 'week': week, 'position': position }
44 | for key, entry in zip(column_names, column_entries):
45 | player[key.lower()] = entry
46 | yield player
47 |
--------------------------------------------------------------------------------
/articles/fantasy-football-for-hackers/simulation.py:
--------------------------------------------------------------------------------
1 | from collections import defaultdict
2 | import random
3 |
4 |
5 | class Player:
6 | def __init__(self, id, position, name, team):
7 | self.id = id
8 | self.position = position
9 | self.name = name
10 | self.team = team
11 | self.points_per_week = [0]*18
12 |
13 | def add_projection(self, projection):
14 | assert self.id == projection['id']
15 | self.points_per_week[projection['week']] = calculate_points(projection)
16 |
17 | def season_points(self):
18 | return sum(self.points_per_week)
19 |
20 | def week_points(self, week):
21 | assert 1 <= week <= 17
22 | return self.points_per_week[week]
23 |
24 |
25 | class Team:
26 | allowed_flex_positions = ['RB', 'TE', 'WR']
27 | maximum_players = 18
28 | starting_positions = ['K', 'D', 'FLEX', 'QB', 'RB', 'RB', 'TE', 'WR', 'WR']
29 | weeks = list(range(1, 17))
30 |
31 | def __init__(self):
32 | self.players_by_id = {}
33 |
34 | def add_player(self, player):
35 | assert self.player_count() < self.maximum_players
36 | self.players_by_id[player.id] = player
37 |
38 | def remove_player(self, player):
39 | del self.players_by_id[player.id]
40 |
41 | def clear_players(self):
42 | self.players_by_id = {}
43 |
44 | def players(self):
45 | return self.players_by_id.values()
46 |
47 | def player_count(self):
48 | return len(self.players_by_id)
49 |
50 | def team_full(self):
51 | return self.player_count() == self.maximum_players
52 |
53 | def starters(self, week):
54 | remaining_players = sorted(self.players_by_id.values(),
55 | key=lambda player: player.week_points(week), reverse=True)
56 | starters = []
57 | flex_count = 0
58 | for position in self.starting_positions:
59 | # we'll handle flex players later
60 | if position == 'FLEX':
61 | flex_count += 1
62 | continue
63 | # fnd the best player with this position
64 | for i, player in enumerate(remaining_players):
65 | if player.position == position:
66 | starters.append(player)
67 | del remaining_players[i]
68 | break
69 |
70 | # do the same for flex players
71 | for i in range(flex_count):
72 | for j, player in enumerate(remaining_players):
73 | if player.position in self.allowed_flex_positions:
74 | starters.append(player)
75 | del remaining_players[j]
76 |
77 | return starters
78 |
79 | def season_points(self):
80 | return sum((self.week_points(week) for week in self.weeks))
81 |
82 | def week_points(self, week):
83 | return sum((player.week_points(week) for player in self.starters(week)))
84 |
85 |
86 | class League:
87 | number_of_teams = 12
88 | team_class = Team
89 |
90 | def __init__(self, players):
91 | self.teams = [self.team_class() for i in range(self.number_of_teams)]
92 | self.all_players = [player for player in players]
93 | self.available_players = [player for player in players]
94 |
95 |
96 | def clear_teams(self):
97 | self.available_players = [player for player in self.all_players]
98 | for team in self.teams:
99 | team.clear_players()
100 |
101 | def calculate_baselines(self):
102 | projections = defaultdict(list)
103 | for player in self.available_players:
104 | points = sum((player.week_points(week) for week in self.teams[0].weeks))
105 | projections[player.position].append(points)
106 | return { position: max(points) for position, points in projections.items() }
107 |
108 | def optimize_teams(self, same_positions=False):
109 | # cycle through and pick up available players
110 | optimal = False
111 | trades = 0
112 | while not optimal:
113 | optimal = True
114 | for team in sorted(self.teams, key=lambda t: random.random()):
115 | for original_player in list(team.players()):
116 | # find the best trade with available players
117 | original_points = team.season_points()
118 | team.remove_player(original_player)
119 | best_player, best_points = original_player, original_points
120 | for new_player in self.available_players:
121 | if same_positions and new_player.position != original_player.position:
122 | continue
123 | # don't bother computing if the new player is strictly worse
124 | if new_player.position == original_player.position:
125 | for week in team.weeks:
126 | if new_player.week_points(week) > original_player.week_points(week):
127 | break
128 | else:
129 | # strictly worse
130 | continue
131 |
132 | team.add_player(new_player)
133 | new_points = team.season_points()
134 | if new_points > best_points:
135 | best_points = new_points
136 | best_player = new_player
137 | team.remove_player(new_player)
138 |
139 | # update the team if an available player is better
140 | if best_player != original_player:
141 | optimal = False
142 | trades += 1
143 | self.available_players.append(original_player)
144 | self.available_players.remove(best_player)
145 | team.add_player(best_player)
146 | else:
147 | team.add_player(original_player)
148 |
149 | def fill_teams_greedily(self):
150 | self.clear_teams()
151 | for i in range(self.team_class.maximum_players):
152 | for team in sorted(self.teams, key=lambda t: random.random()):
153 | best_player, best_points = None, None
154 | for new_player in self.available_players:
155 | team.add_player(new_player)
156 | new_points = team.season_points()
157 | if not best_player or new_points > best_points:
158 | best_points = new_points
159 | best_player = new_player
160 | team.remove_player(new_player)
161 | team.add_player(best_player)
162 | self.available_players.remove(best_player)
163 |
164 | def randomize_teams(self):
165 | self.clear_teams()
166 | for team in self.teams:
167 | while not team.team_full():
168 | index = random.randint(0, len(self.available_players) - 1)
169 | team.add_player(self.available_players.pop(index))
170 |
171 | def set_weeks(self, weeks):
172 | for team in self.teams:
173 | team.weeks = weeks
174 |
--------------------------------------------------------------------------------
/articles/firefox-extensions-with-selenium/README.md:
--------------------------------------------------------------------------------
1 | # Using Firefox WebExtensions with Selenium
2 |
3 | [Using Firefox WebExtensions with Selenium](https://intoli.com/blog/firefox-extensions-with-selenium/) develops a modified version of Selenium's `webdriver.FirefoxProfile` class that supports the newer Web Extensions add-on format.
4 |
5 | - [firefox_profile.py](firefox_profile.py) - Defines the `FirefoxProfileWithWebExtensionSupport` class.
6 |
--------------------------------------------------------------------------------
/articles/firefox-extensions-with-selenium/firefox_profile.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import sys
4 |
5 | from selenium.webdriver.firefox.firefox_profile import AddonFormatError
6 |
7 |
8 | class FirefoxProfileWithWebExtensionSupport(webdriver.FirefoxProfile):
9 | def _addon_details(self, addon_path):
10 | try:
11 | return super()._addon_details(addon_path)
12 | except AddonFormatError:
13 | try:
14 | with open(os.path.join(addon_path, 'manifest.json'), 'r') as f:
15 | manifest = json.load(f)
16 | return {
17 | 'id': manifest['applications']['gecko']['id'],
18 | 'version': manifest['version'],
19 | 'name': manifest['name'],
20 | 'unpack': False,
21 | }
22 | except (IOError, KeyError) as e:
23 | raise AddonFormatError(str(e), sys.exc_info()[2])
24 |
--------------------------------------------------------------------------------
/articles/javascript-injection/README.md:
--------------------------------------------------------------------------------
1 | # JavaScript Injection with Selenium, Puppeteer, and Marionette in Chrome and Firefox
2 |
3 | [JavaScript Injection with Selenium, Puppeteer, and Marionette in Chrome and Firefox](https://intoli.com/blog/javascript-injection/) benchmarks a variety of JavaScript injection methods to determine whether the injected code executes before or after code in the webpage being visited.
4 |
5 | The test itself is defined in:
6 |
7 | - [test-page.html](test-page.html) - The page that displays the timing results.
8 |
9 | The direct Selenium, Puppeteer, and Marionette tests are defined in:
10 |
11 | - [marionette-execute-async-script.py](marionette-execute-async-script.py) - The Marionette test script.
12 | - [puppeteer-evaluate-on-new-document.js](puppeteer-evaluate-on-new-document.js) - The Puppeteer test script.
13 | - [selenium-execute-async-script.py](selenium-execute-async-script.py) - The Selnium test script.
14 |
15 | The Web Extension for script injection is then defined in the [extension](extension) subdirectory.
16 |
17 | - [./extension/injected-javascript.js](./extension/injected-javascript.js) - The script to be injected.
18 | - [./extension/manifest.json](./extension/manifest.json) - The manifest for the extension.
19 |
20 | The script for performing the extension test is then located in:
21 |
22 | - [selenium-custom-web-extension.py](selenium-custom-web-extension.py) - Launches Chrome and Firefox with the extension loaded and performs the test.
23 |
24 | Finally, there is a test that uses [mitmproxy](https://mitmproxy.org/) to inject a script tag.
25 | This consists of two parts:
26 |
27 | - [mitm-injector.py](mitm-injector.py) - The injection script.
28 | - [selenium-mitmproxy.py](selenium-mitmproxy.py) - The test script that goes through the proxy.
29 |
--------------------------------------------------------------------------------
/articles/javascript-injection/extension/injected-javascript.js:
--------------------------------------------------------------------------------
1 | ((time) => {
2 | const handleDocumentLoaded = () => {
3 | document.getElementById("injected-time").innerHTML = time;
4 | };
5 | if (document.readyState === "loading") {
6 | document.addEventListener("DOMContentLoaded", handleDocumentLoaded);
7 | } else {
8 | handleDocumentLoaded();
9 | }
10 | })(Date.now());
11 |
--------------------------------------------------------------------------------
/articles/javascript-injection/extension/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 2,
3 | "name": "JavaScript Injection Test Extension",
4 | "version": "1.0.0",
5 | "applications": {
6 | "gecko": {
7 | "id": "javascript-injection@intoli.com"
8 | }
9 | },
10 | "content_scripts": [
11 | {
12 | "matches": ["*://*/*"],
13 | "js": ["injected-javascript.js"],
14 | "run_at": "document_start"
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/articles/javascript-injection/marionette-execute-async-script.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python2
2 |
3 | import os
4 | import time
5 |
6 | from marionette_driver.marionette import Marionette
7 |
8 |
9 | # The JavaScript that we want to inject.
10 | # `arguments[0]` is how Selenium passes in the callback for `execute_async_script()`.
11 | injected_javascript = (
12 | 'const time = Date.now();'
13 | 'const callback = arguments[0];'
14 | 'const handleDocumentLoaded = () => {'
15 | ' document.getElementById("injected-time").innerHTML = time;'
16 | ' callback();'
17 | '};'
18 | 'if (document.readyState === "loading") {'
19 | ' document.addEventListener("DOMContentLoaded", handleDocumentLoaded);'
20 | '} else {'
21 | ' handleDocumentLoaded();'
22 | '}'
23 | )
24 |
25 | # The location of the Firefox binary, will depend on system.
26 | # Be careful to use the actual binary and not a wrapper script.
27 | binary = '/usr/lib/firefox/firefox'
28 |
29 | # Loop through the four different configurations.
30 | for mode in ['headless', 'graphical']:
31 | # Set up the client with the appropriate settings.
32 | if mode == 'headless':
33 | os.environ['MOZ_HEADLESS'] = '1'
34 | else:
35 | os.environ.pop('MOZ_HEADLESS', None)
36 | client = Marionette('localhost', bin=binary, port=2828)
37 | client.start_session()
38 |
39 |
40 | # Navigate to the test page and inject the JavaScript.
41 | client.navigate('https://intoli.com/blog/javascript-injection/test-page.html')
42 | client.execute_async_script(injected_javascript)
43 |
44 | # Save the results as an image.
45 | filename = os.path.join('img',
46 | 'marionette-execute-async-scripy-firefox-%s-results.png' % mode)
47 | with open(filename, 'wb') as f:
48 | f.write(client.screenshot(format='binary'))
49 | print 'Saved "%s".' % filename
50 |
51 | # Cleanup the client before the next test.
52 | client.cleanup()
53 |
--------------------------------------------------------------------------------
/articles/javascript-injection/mitm-injector.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from mitmproxy import ctx
3 |
4 |
5 | # Load in the javascript to inject.
6 | with open('extension/injected-javascript.js', 'r') as f:
7 | injected_javascript = f.read()
8 |
9 | def response(flow):
10 | # Only process 200 responses of HTML content.
11 | if flow.response.headers['Content-Type'] != 'text/html':
12 | return
13 | if not flow.response.status_code == 200:
14 | return
15 |
16 | # Inject a script tag containing the JavaScript.
17 | html = BeautifulSoup(flow.response.text, 'lxml')
18 | container = html.head or html.body
19 | if container:
20 | script = html.new_tag('script', type='text/javascript')
21 | script.string = injected_javascript
22 | container.insert(0, script)
23 | flow.response.text = str(html)
24 |
25 | ctx.log.info('Successfully injected the `injected-javascript.js` script.')
26 |
--------------------------------------------------------------------------------
/articles/javascript-injection/puppeteer-evaluate-on-new-document.js:
--------------------------------------------------------------------------------
1 | const puppeteer = require('puppeteer');
2 |
3 | const runTest = async (mode) => {
4 | const browser = await puppeteer.launch({
5 | args: ['--no-sandbox'],
6 | headless: mode === 'headless',
7 | });
8 | const page = await browser.newPage();
9 | await page.evaluateOnNewDocument(() => {
10 | const time = Date.now();
11 | const handleDocumentLoaded = () => {
12 | document.getElementById("injected-time").innerHTML = time;
13 | };
14 | if (document.readyState === "loading") {
15 | document.addEventListener("DOMContentLoaded", handleDocumentLoaded);
16 | } else {
17 | handleDocumentLoaded();
18 | }
19 | });
20 | await page.goto('https://intoli.com/blog/javascript-injection/test-page.html');
21 | const filename = `img/puppeteer-evaluate-on-new-document-chrome-${mode}.png`;
22 | await page.screenshot({ path: filename });
23 | console.log(`Saved "${filename}".`);
24 |
25 | await browser.close();
26 | };
27 |
28 | (async () => {
29 | await runTest('headless');
30 | await runTest('graphical');
31 | })();
32 |
--------------------------------------------------------------------------------
/articles/javascript-injection/selenium-custom-web-extension.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import sys
4 |
5 | from selenium import webdriver
6 | from selenium.webdriver.firefox.firefox_profile import AddonFormatError
7 |
8 |
9 | # This must be the developer edition to use an unsigned extension.
10 | firefox_binary = '/usr/bin/firefox-developer-edition'
11 | extension_directory = 'extension'
12 |
13 |
14 | # Patch in support for WebExtensions in Firefox.
15 | # See: https://intoli.com/blog/firefox-extensions-with-selenium/
16 | class FirefoxProfileWithWebExtensionSupport(webdriver.FirefoxProfile):
17 | def _addon_details(self, addon_path):
18 | try:
19 | return super()._addon_details(addon_path)
20 | except AddonFormatError:
21 | try:
22 | with open(os.path.join(addon_path, 'manifest.json'), 'r') as f:
23 | manifest = json.load(f)
24 | return {
25 | 'id': manifest['applications']['gecko']['id'],
26 | 'version': manifest['version'],
27 | 'name': manifest['name'],
28 | 'unpack': False,
29 | }
30 | except (IOError, KeyError) as e:
31 | raise AddonFormatError(str(e), sys.exc_info()[2])
32 | webdriver.FirefoxProfile = FirefoxProfileWithWebExtensionSupport
33 |
34 |
35 | # Loop through the four different configurations.
36 | for browser in ['chrome', 'firefox']:
37 | for mode in ['headless', 'graphical']:
38 | # Set up the driver with the appropriate settings.
39 | if browser == 'chrome':
40 | options = webdriver.ChromeOptions()
41 | if mode == 'headless':
42 | options.add_argument('headless')
43 | options.add_argument(f'load-extension={extension_directory}')
44 | driver = webdriver.Chrome(chrome_options=options)
45 | elif browser == 'firefox':
46 | if mode == 'headless':
47 | os.environ['MOZ_HEADLESS'] = '1'
48 | elif mode == 'graphical':
49 | os.environ.pop('MOZ_HEADLESS', None)
50 | profile = webdriver.FirefoxProfile()
51 | profile.add_extension(extension_directory)
52 | driver = webdriver.Firefox(profile, firefox_binary=firefox_binary)
53 |
54 | # Navigate to the test page and let the extension do its thing.
55 | driver.get('https://intoli.com/blog/javascript-injection/test-page.html')
56 |
57 | # Save the results as an image.
58 | os.makedirs('img', exist_ok=True)
59 | filename = os.path.join('img',
60 | f'selenium-custom-web-extension-{browser}-{mode}-results.png')
61 | driver.get_screenshot_as_file(filename)
62 | print(f'Saved "{filename}".')
63 |
64 | # Cleanup the driver before the next test.
65 | driver.quit()
66 |
--------------------------------------------------------------------------------
/articles/javascript-injection/selenium-execute-async-script.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from selenium import webdriver
4 |
5 |
6 | # The JavaScript that we want to inject.
7 | # `arguments[0]` is how Selenium passes in the callback for `execute_async_script()`.
8 | injected_javascript = (
9 | 'const time = Date.now();'
10 | 'const callback = arguments[0];'
11 | 'const handleDocumentLoaded = () => {'
12 | ' document.getElementById("injected-time").innerHTML = time;'
13 | ' callback();'
14 | '};'
15 | 'if (document.readyState === "loading") {'
16 | ' document.addEventListener("DOMContentLoaded", handleDocumentLoaded);'
17 | '} else {'
18 | ' handleDocumentLoaded();'
19 | '}'
20 | )
21 |
22 |
23 | # Loop through the four different configurations.
24 | for browser in ['chrome', 'firefox']:
25 | for mode in ['headless', 'graphical']:
26 | # Set up the driver with the appropriate settings.
27 | if browser == 'chrome':
28 | options = webdriver.ChromeOptions()
29 | if mode == 'headless':
30 | options.add_argument('headless')
31 | driver = webdriver.Chrome(chrome_options=options)
32 | elif browser == 'firefox':
33 | if mode == 'headless':
34 | os.environ['MOZ_HEADLESS'] = '1'
35 | elif mode == 'graphical':
36 | os.environ.pop('MOZ_HEADLESS', None)
37 | driver = webdriver.Firefox()
38 |
39 | # Navigate to the test page and inject the JavaScript.
40 | driver.get('https://intoli.com/blog/javascript-injection/test-page.html')
41 | driver.execute_async_script(injected_javascript)
42 |
43 | # Save the results as an image.
44 | os.makedirs('img', exist_ok=True)
45 | filename = os.path.join('img',
46 | f'selenium-execute-async-script-{browser}-{mode}-results.png')
47 | driver.get_screenshot_as_file(filename)
48 | print(f'Saved "{filename}".')
49 |
50 | # Cleanup the driver before the next test.
51 | driver.quit()
52 |
--------------------------------------------------------------------------------
/articles/javascript-injection/selenium-mitmproxy.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from selenium import webdriver
4 |
5 |
6 | # The proxy settings.
7 | proxy_host = 'localhost'
8 | proxy_port = 8080
9 |
10 | # Loop through the four different configurations.
11 | for browser in ['chrome', 'firefox']:
12 | for mode in ['headless', 'graphical']:
13 | # Set up the driver with the appropriate settings.
14 | if browser == 'chrome':
15 | # Enable headless mode.
16 | options = webdriver.ChromeOptions()
17 | if mode == 'headless':
18 | options.add_argument('--headless')
19 |
20 | # Specify the proxy.
21 | options.add_argument('--proxy-server=%s:%s' % (proxy_host, proxy_port))
22 |
23 | # Launch Chrome.
24 | driver = webdriver.Chrome(chrome_options=options)
25 |
26 | elif browser == 'firefox':
27 | # Enable headless mode.
28 | if mode == 'headless':
29 | os.environ['MOZ_HEADLESS'] = '1'
30 | elif mode == 'graphical':
31 | os.environ.pop('MOZ_HEADLESS', None)
32 |
33 | firefox_profile = webdriver.FirefoxProfile()
34 | # Specify to use manual proxy configuration.
35 | firefox_profile.set_preference('network.proxy.type', 1)
36 | # Set the host/port.
37 | firefox_profile.set_preference('network.proxy.http', proxy_host)
38 | firefox_profile.set_preference('network.proxy.https_port', proxy_port)
39 | firefox_profile.set_preference('network.proxy.ssl', proxy_host)
40 | firefox_profile.set_preference('network.proxy.ssl_port', proxy_port)
41 |
42 | # Launch Firefox.
43 | driver = webdriver.Firefox(firefox_profile=firefox_profile)
44 |
45 | # Navigate to the test page and inject the JavaScript.
46 | driver.get('https://intoli.com/blog/javascript-injection/test-page.html')
47 |
48 | # Save the results as an image.
49 | os.makedirs('img', exist_ok=True)
50 | filename = os.path.join('img',
51 | f'selenium-mitmproxy-{browser}-{mode}-results.png')
52 | driver.get_screenshot_as_file(filename)
53 | print(f'Saved "{filename}".')
54 |
55 | # Cleanup the driver before the next test.
56 | driver.quit()
57 |
--------------------------------------------------------------------------------
/articles/javascript-injection/test-page.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
11 |
12 |
13 |
14 |
Inlined Script Time:
15 |
Injected Script Time:
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/README.md:
--------------------------------------------------------------------------------
1 | # How to Run a Keras Model in the Browser with Keras.js
2 |
3 | [How to Run a Keras Model in the Browser with Keras.js](https://intoli.com/blog/keras-weight-transfer) is a worked out end-to-end example explaining how to export weights from a [Keras](https://keras.io/) model, and then import and use them in the browser via [keras-js](https://github.com/transcranial/keras-js).
4 | Since the article was originally written, the `keras-js` has improved their utilities and documentation, so the only difficulty is in using compatible versions of the packages involved in this process.
5 |
6 |
7 | ## Export the Weights
8 |
9 | The model in question ([neural-net/mnist-cnn.py](neural-net/mnist-cnn.py)) is a version of [Keras's sample MNIST cassifier](https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py) modified to train quickly (by restricting the data and limiting the training to one epoch).
10 | To get started, you need to first export the weights from this model.
11 | Clone this repo, then `cd` to the [nerual-net](neural-net/) folder, and start and activate a new virtualenv:
12 |
13 | ```bash
14 | cd neural-net
15 | virtualenv env
16 | . env/bin/activate
17 | ```
18 |
19 | Install the python requirements:
20 |
21 | ```bash
22 | pip install -r requirements.txt
23 | ```
24 |
25 | Train and save the model to `model.h5`:
26 |
27 | ```bash
28 | python ./mnist-cnn.py
29 | ```
30 |
31 | Download [a compatible version of the model preparation script and its dependency](https://github.com/transcranial/keras-js/tree/a5e6d2cc330ec8d979310bd17a47f07882fac778/python) from the keras-js repo:
32 |
33 | ```bash
34 | bash ./download-encoder.sh
35 | ```
36 |
37 | Finally, prepare the model with:
38 |
39 | ```bash
40 | python ./encoder.py -q model.h5
41 | ```
42 |
43 | This will produce a `model.bin` file that can be used in the `filepath` optoin of a [`keras-js` Model](https://transcranial.github.io/keras-js-docs/usage/).
44 | I used Python 3.6 for this example, but things should work with Python 2 as well.
45 |
46 |
47 | ## Set Up the Frontend
48 |
49 | To actually use these files, you need to run the [frontend/src/index.js](frontend/src/index.js) script in the browser.
50 | The included [webpack](https://webpack.js.org/) config can help you get started.
51 | First, install the project's JavaScript build and runtime requirements with
52 |
53 | ```bash
54 | cd frontend/
55 | yarn install
56 | ```
57 |
58 | Make sure that `model.bin` from above exists, and execute
59 |
60 | ```bash
61 | yarn watch
62 | ```
63 |
64 | to start a live-reloading development server accessible at `localhost:3000`.
65 | Visiting that address in a browser like Chrome should go from showing `Loading...` to
66 |
67 | ```literal
68 | Predicted 3 with probability 0.297.
69 | ```
70 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [
3 | "env",
4 | "stage-2"
5 | ]
6 | }
7 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "parser": "babel-eslint",
3 | "extends": "airbnb",
4 | "env": {
5 | "browser": true,
6 | "es6": true,
7 | "node": true
8 | },
9 | "settings": {
10 | "import/resolver": {
11 | "webpack": {
12 | "config": "./webpack.config.js"
13 | }
14 | }
15 | },
16 | "rules": {
17 | "class-methods-use-this": "off",
18 | "function-paren-newline": "off",
19 | "object-curly-newline": ["error", {
20 | "consistent": true,
21 | "minProperties": 5
22 | }]
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "scripts": {
3 | "lint": "eslint --ignore-path .gitignore --ext js,jsx src",
4 | "watch": "NODE_ENV=development webpack-dev-server --config webpack.config.js --hot"
5 | },
6 | "devDependencies": {
7 | "babel-core": "^6.26.0",
8 | "babel-eslint": "^8.2.2",
9 | "babel-loader": "^7.1.3",
10 | "babel-plugin-import": "^1.6.5",
11 | "babel-preset-env": "^1.6.1",
12 | "babel-preset-stage-2": "^6.24.1",
13 | "copy-webpack-plugin": "^4.5.0",
14 | "eslint": "^4.18.2",
15 | "eslint-config-airbnb": "^16.1.0",
16 | "eslint-loader": "^2.0.0",
17 | "eslint-plugin-import": "^2.9.0",
18 | "eslint-plugin-jsx-a11y": "^6.0.3",
19 | "eslint-plugin-react": "^7.7.0",
20 | "html-webpack-plugin": "^3.0.4",
21 | "url-loader": "^1.0.1",
22 | "webpack": "3.10.0",
23 | "webpack-cli": "2.0.9",
24 | "webpack-dev-server": "2.11.2"
25 | },
26 | "dependencies": {
27 | "keras-js": "1.0.3"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/src/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Keras Weight Transfer
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/src/index.js:
--------------------------------------------------------------------------------
1 | import { Model } from 'keras-js';
2 | import sample from './sample';
3 |
4 |
5 | document.addEventListener('DOMContentLoaded', () => {
6 | document.write('Loading...');
7 | });
8 |
9 | // Make sure to copy model.bin to the public directory.
10 | const model = new Model({
11 | filepath: 'model.bin',
12 | });
13 |
14 | // Perform a prediction and write the results to the console.
15 | model.ready()
16 | .then(() => model.predict({
17 | input: new Float32Array(sample),
18 | }))
19 | .then(({ output }) => {
20 | let predictionProbability = -1;
21 | let predictedDigit = null;
22 | Object.entries(output).forEach(([digit, probability]) => {
23 | if (probability > predictionProbability) {
24 | predictionProbability = probability;
25 | predictedDigit = digit;
26 | }
27 | });
28 | document.write(
29 | `Predicted ${predictedDigit} with probability ${predictionProbability.toFixed(3)}.`,
30 | );
31 | })
32 | .catch((error) => {
33 | console.log(error);
34 | });
35 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/src/sample.js:
--------------------------------------------------------------------------------
1 | // eslint-disable-next-line max-len
2 | export default [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4509804, 0.4745098, 0.9137255, 0.85490197, 0.4745098, 0.4745098, 0.4745098, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.22352941, 0.94509804, 0.9843137, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.93333334, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05882353, 0.36862746, 0.67058825, 0.9411765, 0.99215686, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.46666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.16078432, 0.9882353, 0.9882353, 0.9882353, 0.99215686, 0.85490197, 0.6745098, 0.6745098, 0.53333336, 0.15294118, 0.7254902, 0.9882353, 0.9882353, 0.46666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11764706, 0.7254902, 0.7254902, 0.7254902, 0.20784314, 0.12156863, 0.0, 0.0, 0.0, 0.05882353, 0.7607843, 0.9882353, 0.9882353, 0.46666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.19607843, 0.78431374, 0.9882353, 0.9882353, 0.9764706, 0.36862746, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13333334, 0.6509804, 0.9882353, 0.9882353, 0.9137255, 0.29411766, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.047058824, 0.3019608, 0.93333334, 0.9882353, 0.9882353, 0.83137256, 0.3254902, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03137255, 0.05490196, 0.5764706, 0.7490196, 0.9882353, 0.9882353, 0.972549, 0.8235294, 0.12941177, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.57254905, 0.9882353, 0.99215686, 0.9882353, 0.9882353, 0.9882353, 0.7254902, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5764706, 0.99215686, 1.0, 0.99215686, 0.99215686, 0.99215686, 0.85490197, 0.37254903, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.33333334, 0.5686275, 0.57254905, 0.5686275, 0.94509804, 0.9882353, 0.9882353, 0.9764706, 0.29803923, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.28627452, 0.92941177, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8352941, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09803922, 0.8745098, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.11372549, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09019608, 0.77254903, 0.9882353, 0.9882353, 0.9882353, 0.30980393, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.078431375, 0.654902, 0.84313726, 0.5137255, 0.11764706, 0.0, 0.0, 0.0, 0.0, 0.08627451, 0.16078432, 0.78431374, 0.9882353, 0.9882353, 0.94509804, 0.72156864, 0.09803922, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4745098, 0.9882353, 0.9882353, 0.9882353, 0.8901961, 0.627451, 0.627451, 0.627451, 0.627451, 0.8156863, 0.99215686, 0.9882353, 0.9882353, 0.8980392, 0.3764706, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.6313726, 0.98039216, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.99215686, 0.9372549, 0.7411765, 0.06666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.41960785, 0.6313726, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.9882353, 0.47058824, 0.023529412, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
3 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/frontend/webpack.config.js:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 |
3 | const CopyWebpackPlugin = require('copy-webpack-plugin')
4 | const HtmlWebpackPlugin = require('html-webpack-plugin');
5 |
6 |
7 | const config = {
8 | devServer: {
9 | clientLogLevel: 'info',
10 | contentBase: path.join(__dirname, 'build'),
11 | historyApiFallback: true,
12 | overlay: {
13 | errors: true,
14 | warnings: false,
15 | },
16 | port: 3000,
17 | publicPath: '/',
18 | stats: {
19 | modules: false,
20 | chunks: false,
21 | },
22 | },
23 | devtool: 'cheap-module-source-map',
24 | entry: path.join(__dirname, 'src', 'index.js'),
25 | externals: {
26 | fs: 'empty',
27 | },
28 | module: {
29 | rules: [
30 | {
31 | test: /\.(js|jsx)$/,
32 | exclude: /node_modules/,
33 | enforce: 'pre',
34 | loader: 'eslint-loader',
35 | },
36 | {
37 | test: /\.(js|jsx)$/,
38 | exclude: /node_modules/,
39 | loader: 'babel-loader',
40 | },
41 | ],
42 | },
43 | output: {
44 | filename: 'bundle.js',
45 | path: path.resolve(__dirname, 'build'),
46 | publicPath: '/',
47 | },
48 | plugins: [
49 | new HtmlWebpackPlugin({
50 | inject: true,
51 | template: './src/index.html',
52 | }),
53 | new CopyWebpackPlugin([
54 | {
55 | from: '../neural-net/model.bin',
56 | to: path.join(__dirname, 'build'),
57 | },
58 | ]),
59 | ],
60 | watchOptions: {
61 | ignored: /build/,
62 | },
63 | };
64 |
65 |
66 | module.exports = config;
67 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/neural-net/download-encoder.sh:
--------------------------------------------------------------------------------
1 | # Download encoder.py and its dependency from a commit of keras-js compatible with the code in this
2 | # folder. These are used for preparing an exported Keras model for keras-js. Run with
3 | #
4 | # python encoder.py -p model.h5
5 | #
6 | # to produce an ingestable model.bin file.
7 |
8 | curl https://raw.githubusercontent.com/transcranial/keras-js/a5e6d2cc330ec8d979310bd17a47f07882fac778/python/encoder.py -o encoder.py
9 | curl https://raw.githubusercontent.com/transcranial/keras-js/a5e6d2cc330ec8d979310bd17a47f07882fac778/python/model_pb2.py -o model_pb2.py
10 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/neural-net/mnist-cnn.py:
--------------------------------------------------------------------------------
1 | """
2 | COPYRIGHT
3 |
4 | All contributions by François Chollet:
5 | Copyright (c) 2015, François Chollet.
6 | All rights reserved.
7 |
8 | All contributions by Google:
9 | Copyright (c) 2015, Google, Inc.
10 | All rights reserved.
11 |
12 | All contributions by Microsoft:
13 | Copyright (c) 2017, Microsoft, Inc.
14 | All rights reserved.
15 |
16 | All other contributions:
17 | Copyright (c) 2015 - 2017, the respective contributors.
18 | All rights reserved.
19 |
20 | Each contributor holds copyright over their respective contributions.
21 | The project versioning (Git) records all such contribution source information.
22 |
23 | LICENSE
24 |
25 | The MIT License (MIT)
26 |
27 | Permission is hereby granted, free of charge, to any person obtaining a copy
28 | of this software and associated documentation files (the "Software"), to deal
29 | in the Software without restriction, including without limitation the rights
30 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31 | copies of the Software, and to permit persons to whom the Software is
32 | furnished to do so, subject to the following conditions:
33 |
34 | The above copyright notice and this permission notice shall be included in all
35 | copies or substantial portions of the Software.
36 |
37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43 | SOFTWARE.
44 |
45 | ---
46 |
47 | This is a modification of a Keras example CNN script [1] for the purposes of the
48 | 'How to Run a Keras Model in the Browser with Keras.js' article [2] published on the Intoli blog.
49 |
50 | [1]: https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py
51 | [2]: https://intoli.com/blog/keras-weight-transfer/
52 | """
53 | from __future__ import print_function
54 | import keras
55 | from keras.datasets import mnist
56 | from keras.models import Sequential
57 | from keras.layers import Dense, Dropout, Flatten
58 | from keras.layers import Conv2D, MaxPooling2D
59 | from keras import backend as K
60 |
61 | batch_size = 128
62 | num_classes = 10
63 | epochs = 1 # Note that this script uses only one epoch.
64 |
65 | # Load, restrict, and prepare data.
66 | img_rows, img_cols = 28, 28
67 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
68 |
69 | x_train = x_train[:1280]
70 | y_train = y_train[:1280]
71 |
72 | x_test = x_test[:512]
73 | y_test = y_test[:512]
74 |
75 | if K.image_data_format() == 'channels_first':
76 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
77 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
78 | input_shape = (1, img_rows, img_cols)
79 | else:
80 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
81 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
82 | input_shape = (img_rows, img_cols, 1)
83 |
84 | x_train = x_train.astype('float32')
85 | x_test = x_test.astype('float32')
86 | x_train /= 255
87 | x_test /= 255
88 |
89 | print('x_train shape:', x_train.shape)
90 | print(x_train.shape[0], 'train samples')
91 | print(x_test.shape[0], 'test samples')
92 | print(epochs, 'epochs')
93 |
94 | y_train = keras.utils.to_categorical(y_train, num_classes)
95 | y_test = keras.utils.to_categorical(y_test, num_classes)
96 |
97 | # Define, compile, and train model.
98 | model = Sequential()
99 | model.add(Conv2D(32, kernel_size=(3, 3),
100 | activation='relu',
101 | input_shape=input_shape))
102 | model.add(Conv2D(64, (3, 3), activation='relu'))
103 | model.add(MaxPooling2D(pool_size=(2, 2)))
104 | model.add(Dropout(0.25))
105 | model.add(Flatten())
106 | model.add(Dense(128, activation='relu'))
107 | model.add(Dropout(0.5))
108 | model.add(Dense(num_classes, activation='softmax'))
109 |
110 | model.compile(loss=keras.losses.categorical_crossentropy,
111 | optimizer=keras.optimizers.Adadelta(),
112 | metrics=['accuracy'])
113 |
114 | model.fit(x_train, y_train,
115 | batch_size=batch_size,
116 | epochs=epochs,
117 | verbose=1,
118 | validation_data=(x_test, y_test))
119 | score = model.evaluate(x_test, y_test, verbose=0)
120 |
121 | print('Test loss:', score[0])
122 | print('Test accuracy:', score[1])
123 | print('-' * 80)
124 |
125 | # Export the trained model.
126 | model.save('model.h5')
127 |
--------------------------------------------------------------------------------
/articles/keras-weight-transfer/neural-net/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.1.10
2 | astor==0.6.2
3 | bleach==1.5.0
4 | gast==0.2.0
5 | grpcio==1.10.0
6 | h5py==2.7.1
7 | html5lib==0.9999999
8 | Keras==2.1.2
9 | Markdown==2.6.11
10 | numpy==1.14.1
11 | protobuf==3.5.1
12 | PyYAML==3.12
13 | scipy==1.0.0
14 | six==1.11.0
15 | tensorboard==1.6.0
16 | tensorflow==1.6.0
17 | termcolor==1.1.0
18 | Werkzeug==0.14.1
19 |
--------------------------------------------------------------------------------
/articles/making-chrome-headless-undetectable/README.md:
--------------------------------------------------------------------------------
1 | # Making Chrome Headless Undetectable
2 |
3 | [Making Chrome Headless Undetectable](https://intoli.com/blog/making-chrome-headless-undetectable/) is a response to a set of JavaScript based tests that were floating around the internet as a way to block users of headless browser.
4 | It shows that these tests have high false positive rates and can be easily bypassed.
5 |
6 | The tests were implemented as a web page that displays the results in a visual table.
7 | The code for the tests are located in:
8 |
9 | - [chrome-headless-test.html](chrome-headless-test.html) - The page that defines the results table and imports the test script.
10 | - [chrome-headless-test.js](chrome-headless-test.js) - The associated JavaScript that performs the actual tests and populates the table.
11 |
12 | The tests are then bypasses by injecting JavaScript into the page before it loads.
13 |
14 | - [injected-test-bypasses.js](injected-test-bypasses.js) - The test bypasses that are developed in the article.
15 | - [inject.py](inject.py) - A [mitmproxy](https://mitmproxy.org/) script for injecting `injected-test-bypasses.js`.
16 | - [test-headless.js](test-headless.js) - A browser automation script written using the [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/) which visits the test page and records a screenshot of the results.
17 |
18 | Details for running the proxy and installing the dependencies can be found in [the original article](https://intoli.com/blog/making-chrome-headless-undetectable/).
19 |
--------------------------------------------------------------------------------
/articles/making-chrome-headless-undetectable/chrome-headless-test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Chrome Headless Detection
4 |
17 |
18 |
19 |
20 |
21 |
Test Name
22 |
Result
23 |
24 |
25 |
User Agent
26 |
27 |
28 |
29 |
Plugins Length
30 |
31 |
32 |
33 |
Languages
34 |
35 |
36 |
37 |
WebGL Vendor
38 |
39 |
40 |
41 |
WebGL Renderer
42 |
43 |
44 |
45 |
Hairline Feature
46 |
47 |
48 |
49 |
Broken Image Dimensions
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/articles/making-chrome-headless-undetectable/chrome-headless-test.js:
--------------------------------------------------------------------------------
1 | // User-Agent Test
2 | const userAgentElement = document.getElementById('user-agent');
3 | userAgentElement.innerHTML = window.navigator.userAgent;
4 | if (/HeadlessChrome/.test(window.navigator.userAgent)) {
5 | userAgentElement.classList.add('failed');
6 | }
7 |
8 | // Plugins Length Test
9 | const pluginsLengthElement = document.getElementById('plugins-length');
10 | pluginsLengthElement.innerHTML = navigator.plugins.length;
11 | if (navigator.plugins.length === 0) {
12 | pluginsLengthElement.classList.add('failed');
13 | }
14 |
15 | // Languages Test
16 | const languagesElement = document.getElementById('languages');
17 | languagesElement.innerHTML = navigator.languages;
18 | if (!navigator.languages || navigator.languages.length === 0) {
19 | languagesElement.classList.add('failed');
20 | }
21 |
22 | // WebGL Tests
23 | const canvas = document.createElement('canvas');
24 | const gl = canvas.getContext('webgl') || canvas.getContext('webgl-experimental');
25 | if (gl) {
26 | const debugInfo = gl.getExtension('WEBGL_debug_renderer_info');
27 |
28 | // WebGL Vendor Test
29 | const webGLVendorElement = document.getElementById('webgl-vendor');
30 | const vendor = gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL);
31 | webGLVendorElement.innerHTML = vendor;
32 | if (vendor === 'Brian Paul') {
33 | webGLVendorElement.classList.add('failed');
34 | }
35 |
36 | // WebGL Renderer Test
37 | const webGLRendererElement = document.getElementById('webgl-renderer');
38 | const renderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL);
39 | webGLRendererElement.innerHTML = renderer;
40 | if (renderer === 'Mesa OffScreen') {
41 | webGLRendererElement.classList.add('failed');
42 | }
43 | }
44 |
45 | // Hairline Feature Test
46 | const hairlineFeatureElement = document.getElementById('hairline-feature');
47 | if (Modernizr.hairline) {
48 | hairlineFeatureElement.innerHTML = 'present';
49 | } else {
50 | hairlineFeatureElement.innerHTML = 'missing';
51 | hairlineFeatureElement.classList.add('failed');
52 | }
53 |
54 | // Broken Image Dimensions Test
55 | const brokenImageDimensionsElement = document.getElementById('broken-image-dimensions');
56 | const body = document.body;
57 | const image = document.createElement('img');
58 | image.onerror = function(){
59 | brokenImageDimensionsElement.innerHTML = `${image.width}x${image.height}`;
60 | if (image.width == 0 && image.height == 0) {
61 | brokenImageDimensionsElement.classList.add('failed');
62 | }
63 | };
64 | body.appendChild(image);
65 | image.src = 'https://intoli.com/nonexistent-image.png';
66 |
--------------------------------------------------------------------------------
/articles/making-chrome-headless-undetectable/inject.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from mitmproxy import ctx
3 |
4 |
5 | # load in the javascript to inject
6 | with open('injected-test-bypasses.js', 'r') as f:
7 | content_js = f.read()
8 |
9 |
10 | def response(flow):
11 | # only process 200 responses of html content
12 | if flow.response.headers['Content-Type'] != 'text/html':
13 | return
14 | if not flow.response.status_code == 200:
15 | return
16 |
17 | # inject the script tag
18 | html = BeautifulSoup(flow.response.text, 'lxml')
19 | container = html.head or html.body
20 | if container:
21 | script = html.new_tag('script', type='text/javascript')
22 | script.string = content_js
23 | container.insert(0, script)
24 | flow.response.text = str(html)
25 |
26 | ctx.log.info('Successfully injected the injected-test-bypasses.js script.')
27 |
--------------------------------------------------------------------------------
/articles/making-chrome-headless-undetectable/injected-test-bypasses.js:
--------------------------------------------------------------------------------
1 | //
2 | // Bypass the Languages Test.
3 | //
4 |
5 | // Overwrite the `languages` property to use a custom getter.
6 | Object.defineProperty(navigator, 'languages', {
7 | get: function() {
8 | return ['en-US', 'en'];
9 | },
10 | });
11 |
12 |
13 | //
14 | // Bypass the Plugins Test.
15 | //
16 |
17 | // Overwrite the `plugins` property to use a custom getter.
18 | Object.defineProperty(navigator, 'plugins', {
19 | get: function() {
20 | // This just needs to have `length > 0`, but we could mock the plugins too.
21 | return [1, 2, 3, 4, 5];
22 | },
23 | });
24 |
25 |
26 | //
27 | // Bypass the WebGL test.
28 | //
29 |
30 | const getParameter = WebGLRenderingContext.getParameter;
31 | WebGLRenderingContext.prototype.getParameter = function(parameter) {
32 | // UNMASKED_VENDOR_WEBGL
33 | if (parameter === 37445) {
34 | return 'Intel Open Source Technology Center';
35 | }
36 | // UNMASKED_RENDERER_WEBGL
37 | if (parameter === 37446) {
38 | return 'Mesa DRI Intel(R) Ivybridge Mobile ';
39 | }
40 |
41 | return getParameter(parameter);
42 | };
43 |
44 |
45 | //
46 | // Bypass the Broken Image Test.
47 | //
48 |
49 | ['height', 'width'].forEach(property => {
50 | // Store the existing descriptor.
51 | const imageDescriptor = Object.getOwnPropertyDescriptor(HTMLImageElement.prototype, property);
52 |
53 | // Redefine the property with a patched descriptor.
54 | Object.defineProperty(HTMLImageElement.prototype, property, {
55 | ...imageDescriptor,
56 | get: function() {
57 | // Return an arbitrary non-zero dimension if the image failed to load.
58 | if (this.complete && this.naturalHeight == 0) {
59 | return 20;
60 | }
61 | // Otherwise, return the actual dimension.
62 | return imageDescriptor.get.apply(this);
63 | },
64 | });
65 | });
66 |
67 |
68 | //
69 | // Bypass the Retina/HiDPI Hairline Feature Test.
70 | //
71 |
72 | // Store the existing descriptor.
73 | const elementDescriptor = Object.getOwnPropertyDescriptor(HTMLElement.prototype, 'offsetHeight');
74 |
75 | // Redefine the property with a patched descriptor.
76 | Object.defineProperty(HTMLDivElement.prototype, 'offsetHeight', {
77 | ...elementDescriptor,
78 | get: function() {
79 | if (this.id === 'modernizr') {
80 | return 1;
81 | }
82 | return elementDescriptor.get.apply(this);
83 | },
84 | });
85 |
--------------------------------------------------------------------------------
/articles/making-chrome-headless-undetectable/test-headless.js:
--------------------------------------------------------------------------------
1 | const CDP = require('chrome-remote-interface');
2 | const fs = require('fs');
3 |
4 | // global settings
5 | const filename = 'headless-results.png';
6 | const url = 'https://intoli.com/blog/making-chrome-headless-undetectable/chrome-headless-test.html';
7 | const userAgent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36'
8 |
9 | CDP(async function(client) {
10 | const {Network, Page, Security} = client;
11 | await Page.enable();
12 | await Network.enable();
13 | await Network.setUserAgentOverride({userAgent});
14 |
15 | // ignore all certificate errors to support mitmproxy certificates
16 | await Security.enable();
17 | await Security.setOverrideCertificateErrors({override: true});
18 | Security.certificateError(({eventId}) => {
19 | Security.handleCertificateError({
20 | eventId,
21 | action: 'continue'
22 | });
23 | });
24 |
25 | // navigate to the page and wait for it to load
26 | await Page.navigate({url});
27 | await Page.loadEventFired();
28 |
29 | setTimeout(async function() {
30 | // save the screenshot
31 | const screenshot = await Page.captureScreenshot({format: 'png'});
32 | const buffer = new Buffer(screenshot.data, 'base64');
33 | fs.writeFile(filename, buffer, 'base64', function(err) {
34 | if (err) {
35 | console.error(`Error saving screenshot: ${err}`);
36 | } else {
37 | console.log(`"${filename}" written successfully.`);
38 | }
39 | client.close();
40 | });
41 | }, 1000); // 1 second delay for the tests to complete
42 | }).on('error', err => {
43 | console.error(`Error connecting to Chrome: ${err}`);
44 | });
45 |
--------------------------------------------------------------------------------
/articles/neural-network-initialization/README.md:
--------------------------------------------------------------------------------
1 | # Understanding Neural Network Weight Initialization
2 |
3 | This folder contains scripts for producing the plots used in the [Understanding Neural Network Weight Initialization](https://intoli.com/blog/neural-network-initialization/) article published on the [Intoli blog](https://intoli.com/blog/):
4 |
5 | - [plot-activation-layers.py](plot-activation-layers.py) visualizes the distribution of activations over 5 hidden layers of a Multi-Layer Perceptron using three different initializations.
6 | The script uses ReLu activations, although the article also includes a plot generated by changing `activation = 'relu'` to `activation = 'linear'` on line 52.
7 | 
8 |
9 | - [plot-loss-progression.py](plot-plot-progression.py) visualizes training loss over time as the network is trained using three different initializations.
10 | 
11 |
12 | To run the scripts, first grab the files from this folder:
13 |
14 | ```bash
15 | git clone https://github.com/Intoli/intoli-article-materials.git
16 | cd intoli-article-materials/articles/neural-network-initialization
17 | ```
18 |
19 | Then, create a virtualenv and install the dependencies:
20 |
21 | ```bash
22 | virtualenv env
23 | . env/bin/activate
24 | pip install -r requirements.txt
25 | ```
26 |
27 | You may also need to choose a Matplotlib backend in order to successfully produce plots from a virtualenv.
28 | On macOS, this could be done with
29 |
30 | ```bash
31 | echo "backend: TkAgg" >> ~/.matplotlib/matplotlibrc
32 | ```
33 |
34 | while on Linux you might have luck with
35 |
36 | ```bash
37 | echo "backend: Agg" >> ~/.matplotlib/matplotlibrc
38 | ```
39 |
40 | Note that the scripts do not save files to disk and simply show the plot in a Matplotlib window.
41 | To make the plots just run the scripts using Python from the virtualenv:
42 |
43 | ```bash
44 | python plot-activation-layers.py
45 | ```
46 |
47 | Note that [plot-loss-progression.py](plot-loss-progression.py) takes quite a while to run, since it trains a neural network on 10000 MNIST images three times.
48 | Also, if you use Python 3.6, TensorFlow might issue a runtime warning about having "compiletime version 3.5," but the scripts should still work.
49 |
--------------------------------------------------------------------------------
/articles/neural-network-initialization/images/relu-output-progression-violinplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/neural-network-initialization/images/relu-output-progression-violinplot.png
--------------------------------------------------------------------------------
/articles/neural-network-initialization/images/training-losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/neural-network-initialization/images/training-losses.png
--------------------------------------------------------------------------------
/articles/neural-network-initialization/plot-activation-layers.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import matplotlib.pyplot as plt
3 | import numpy as np
4 | import pandas as pd
5 | import seaborn as sns
6 | from keras import initializers
7 | from keras.datasets import mnist
8 |
9 | from utils import (
10 | compile_model,
11 | create_mlp_model,
12 | get_activations,
13 | grid_axes_it,
14 | )
15 |
16 |
17 | seed = 10
18 |
19 | # Number of points to plot
20 | n_train = 1000
21 | n_test = 100
22 | n_classes = 10
23 |
24 | # Network params
25 | n_hidden_layers = 5
26 | dim_layer = 100
27 | batch_size = n_train
28 | epochs = 1
29 |
30 | # Load and prepare MNIST dataset.
31 | n_train = 60000
32 | n_test = 10000
33 |
34 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
35 | num_classes = len(np.unique(y_test))
36 | data_dim = 28 * 28
37 |
38 | x_train = x_train.reshape(60000, 784).astype('float32')[:n_train]
39 | x_test = x_test.reshape(10000, 784).astype('float32')[:n_train]
40 | x_train /= 255
41 | x_test /= 255
42 |
43 | y_train = keras.utils.to_categorical(y_train, num_classes)
44 | y_test = keras.utils.to_categorical(y_test, num_classes)
45 |
46 | # Run the data through a few MLP models and save the activations from
47 | # each layer into a Pandas DataFrame.
48 | rows = []
49 | sigmas = [0.10, 0.14, 0.28]
50 | for stddev in sigmas:
51 | init = initializers.RandomNormal(mean=0.0, stddev=stddev, seed=seed)
52 | activation = 'relu'
53 |
54 | model = create_mlp_model(
55 | n_hidden_layers,
56 | dim_layer,
57 | (data_dim,),
58 | n_classes,
59 | init,
60 | 'zeros',
61 | activation
62 | )
63 | compile_model(model)
64 | output_elts = get_activations(model, x_test)
65 | n_layers = len(model.layers)
66 | i_output_layer = n_layers - 1
67 |
68 | for i, out in enumerate(output_elts[:-1]):
69 | if i > 0 and i != i_output_layer:
70 | for out_i in out.ravel()[::20]:
71 | rows.append([i, stddev, out_i])
72 |
73 | df = pd.DataFrame(rows, columns=['Hidden Layer', 'Standard Deviation', 'Output'])
74 |
75 | # Plot previously saved activations from the 5 hidden layers
76 | # using different initialization schemes.
77 | fig = plt.figure(figsize=(12, 6))
78 | axes = grid_axes_it(len(sigmas), 1, fig=fig)
79 | for sig in sigmas:
80 | ax = next(axes)
81 | ddf = df[df['Standard Deviation'] == sig]
82 | sns.violinplot(x='Hidden Layer', y='Output', data=ddf, ax=ax, scale='count', inner=None)
83 |
84 | ax.set_xlabel('')
85 | ax.set_ylabel('')
86 |
87 | ax.set_title('Weights Drawn from $N(\mu = 0, \sigma = {%.2f})$' % sig, fontsize=13)
88 |
89 | if sig == sigmas[1]:
90 | ax.set_ylabel("ReLu Neuron Outputs")
91 | if sig != sigmas[-1]:
92 | ax.set_xticklabels(())
93 | else:
94 | ax.set_xlabel("Hidden Layer")
95 |
96 | plt.tight_layout()
97 | plt.show()
98 |
--------------------------------------------------------------------------------
/articles/neural-network-initialization/plot-loss-progression.py:
--------------------------------------------------------------------------------
1 | import keras
2 | import numpy as np
3 | import seaborn as sns
4 | from keras import initializers
5 | from keras.datasets import mnist
6 | from matplotlib import pyplot as plt
7 |
8 | from utils import (
9 | get_init_id,
10 | grid_axes_it,
11 | compile_model,
12 | create_cnn_model,
13 | LossHistory,
14 | )
15 |
16 |
17 | sns.set_style('white')
18 | sns.set_palette('colorblind')
19 |
20 | batch_size = 128
21 | num_classes = 10
22 | epochs = 12
23 |
24 | # Load MNIST training data.
25 | img_rows, img_cols = 28, 28
26 |
27 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
28 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
29 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
30 | input_shape = (img_rows, img_cols, 1)
31 |
32 | x_train = x_train.astype('float32')
33 | x_test = x_test.astype('float32')
34 | x_train /= 255
35 | x_test /= 255
36 |
37 | y_train = keras.utils.to_categorical(y_train, num_classes)
38 | y_test = keras.utils.to_categorical(y_test, num_classes)
39 |
40 | print('x_train shape:', x_train.shape)
41 | print(x_train.shape[0], 'train samples')
42 | print(x_test.shape[0], 'test samples')
43 |
44 | # Train the CNN under three differnet initialization schemes,
45 | # and record loss over time.
46 | inits = [
47 | initializers.Zeros(),
48 | initializers.RandomNormal(mean=0.0, stddev=0.4, seed=10),
49 | initializers.VarianceScaling(
50 | scale=2.0, mode='fan_in', distribution='normal', seed=10
51 | ),
52 | ]
53 |
54 | loss_histories = {}
55 | models = {}
56 |
57 | for i, init in enumerate(inits):
58 | init_id = get_init_id(init)
59 |
60 | print("Training CNN with initializer:")
61 | print(' ' + str(init))
62 | print(' ' + str(init.get_config()))
63 |
64 | model = create_cnn_model(input_shape, num_classes, kernel_initializer=init)
65 | compile_model(model)
66 |
67 | loss_history = LossHistory()
68 | model.fit(x_train, y_train,
69 | batch_size=batch_size,
70 | epochs=epochs,
71 | verbose=1,
72 | validation_data=(x_test, y_test),
73 | callbacks=[loss_history])
74 |
75 | losses = loss_history.losses
76 |
77 | loss_histories[init_id] = loss_history
78 | models[init_id] = model
79 |
80 |
81 | # Plot the loss over time for three initialization schemes.
82 | colors = sns.color_palette('colorblind', 6)
83 | cases = [
84 | (
85 | 'Zeros|',
86 | 'Loss with Initial Weights Set to Zero',
87 | colors[3],
88 | ),
89 | (
90 | 'RandomNormal|mean-0.0__stddev-0.4',
91 | 'Loss with Initial Weights Drawn from $N(0, \sigma = 0.4)$',
92 | colors[1],
93 | ),
94 | (
95 | 'VarianceScaling|scale-2.0__mode-fan_in__distribution-normal',
96 | 'Loss with Initial Weights Drawn from $N(0, \sigma \sim \sqrt{2/n_i})$',
97 | colors[2],
98 | ),
99 | ]
100 |
101 | plt.figure(figsize=(12, 6))
102 | axes = grid_axes_it(3, 3)
103 |
104 | for i, (case_id, label, color) in enumerate(cases):
105 | ax = next(axes)
106 | case_loss = loss_histories[case_id].losses
107 | n_steps = 12
108 | pseqs = []
109 | for step in range(n_steps):
110 | seq = [float(x) for x in case_loss[step::n_steps]]
111 | pseqs.append(seq)
112 |
113 | mlen = max([len(x) for x in pseqs])
114 | seqs = [np.array(seq[:mlen]) for seq in pseqs]
115 |
116 | sns.tsplot(np.array(seqs), ax=ax, color=color)
117 |
118 | # These plotting methos assume that there are 12 epochs to correctly draw xticks.
119 | assert epochs == 12
120 |
121 | def get_label(x):
122 | if x == 0.0:
123 | return ''
124 | else:
125 | return str(int(x / len(seqs[0]) * 12))
126 |
127 | xticks = [x * len(seq) / 6.0 for x in range(6)]
128 | ax.set_xticks(xticks)
129 | ax.set_xticklabels([get_label(x) for x in xticks])
130 |
131 | if i < 100:
132 | ax.set_xlabel("Epoch", fontsize=14)
133 | if i == 0:
134 | ax.set_ylabel("Loss", fontsize=14)
135 | ax.set_title(label, fontsize=15)
136 |
137 |
138 | plt.tight_layout()
139 | plt.show()
140 |
--------------------------------------------------------------------------------
/articles/neural-network-initialization/requirements.txt:
--------------------------------------------------------------------------------
1 | appnope==0.1.0
2 | bleach==1.5.0
3 | cycler==0.10.0
4 | decorator==4.2.1
5 | entrypoints==0.2.3
6 | enum34==1.1.6
7 | html5lib==0.9999999
8 | ipykernel==4.8.0
9 | ipython==6.2.1
10 | ipython-genutils==0.2.0
11 | ipywidgets==7.1.1
12 | jedi==0.11.1
13 | Jinja2==2.10
14 | jsonschema==2.6.0
15 | jupyter==1.0.0
16 | jupyter-client==5.2.2
17 | jupyter-console==5.2.0
18 | jupyter-core==4.4.0
19 | Keras==2.1.3
20 | Markdown==2.6.11
21 | MarkupSafe==1.0
22 | matplotlib==2.1.2
23 | mistune==0.8.3
24 | nbconvert==5.3.1
25 | nbformat==4.4.0
26 | notebook==5.3.1
27 | numpy==1.14.0
28 | pandas==0.22.0
29 | pandocfilters==1.4.2
30 | parso==0.1.1
31 | pexpect==4.3.1
32 | pickleshare==0.7.4
33 | prompt-toolkit==1.0.15
34 | protobuf==3.5.1
35 | ptyprocess==0.5.2
36 | Pygments==2.2.0
37 | pyparsing==2.2.0
38 | python-dateutil==2.6.1
39 | pytz==2017.3
40 | PyYAML==3.12
41 | pyzmq==16.0.4
42 | qtconsole==4.3.1
43 | scipy==1.0.0
44 | seaborn==0.8.1
45 | Send2Trash==1.4.2
46 | simplegeneric==0.8.1
47 | six==1.11.0
48 | tensorflow==1.4.1
49 | tensorflow-tensorboard==0.4.0
50 | terminado==0.8.1
51 | testpath==0.3.1
52 | tornado==4.5.3
53 | traitlets==4.3.2
54 | wcwidth==0.1.7
55 | webencodings==0.5.1
56 | Werkzeug==0.14.1
57 | widgetsnbextension==3.1.3
58 |
--------------------------------------------------------------------------------
/articles/neural-network-initialization/utils.py:
--------------------------------------------------------------------------------
1 | import keras
2 | from keras.models import Sequential
3 | from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
4 | from keras import backend as K
5 |
6 | from matplotlib import pyplot as plt
7 | from matplotlib import rcParamsDefault
8 |
9 |
10 | def grid_axes_it(n_plots, n_cols=3, enumerate=False, fig=None):
11 | """
12 | Iterate through Axes objects on a grid with n_cols columns and as many
13 | rows as needed to accommodate n_plots many plots.
14 |
15 | Args:
16 | n_plots: Number of plots to plot onto figure.
17 | n_cols: Number of columns to divide the figure into.
18 | fig: Optional figure reference.
19 |
20 | Yields:
21 | n_plots many Axes objects on a grid.
22 | """
23 | n_rows = n_plots / n_cols + int(n_plots % n_cols > 0)
24 |
25 | if not fig:
26 | default_figsize = rcParamsDefault['figure.figsize']
27 | fig = plt.figure(figsize=(
28 | default_figsize[0] * n_cols,
29 | default_figsize[1] * n_rows
30 | ))
31 |
32 | for i in range(1, n_plots + 1):
33 | ax = plt.subplot(n_rows, n_cols, i)
34 | yield ax
35 |
36 |
37 | def create_mlp_model(
38 | n_hidden_layers,
39 | dim_layer,
40 | input_shape,
41 | n_classes,
42 | kernel_initializer,
43 | bias_initializer,
44 | activation,
45 | ):
46 | """Create Multi-Layer Perceptron with given parameters."""
47 | model = Sequential()
48 | model.add(Dense(dim_layer, input_shape=input_shape, kernel_initializer=kernel_initializer,
49 | bias_initializer=bias_initializer))
50 | for i in range(n_hidden_layers):
51 | model.add(Dense(dim_layer, activation=activation, kernel_initializer=kernel_initializer,
52 | bias_initializer=bias_initializer))
53 | model.add(Dense(n_classes, activation='softmax', kernel_initializer=kernel_initializer,
54 | bias_initializer=bias_initializer))
55 | return model
56 |
57 |
58 | def create_cnn_model(input_shape, num_classes, kernel_initializer='glorot_uniform',
59 | bias_initializer='zeros'):
60 | """Create CNN model similar to
61 | https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py."""
62 | model = Sequential()
63 | model.add(Conv2D(32, kernel_size=(3, 3),
64 | activation='relu',
65 | input_shape=input_shape,
66 | kernel_initializer=kernel_initializer,
67 | bias_initializer=bias_initializer))
68 | model.add(Conv2D(64, (3, 3), activation='relu',
69 | kernel_initializer=kernel_initializer,
70 | bias_initializer=bias_initializer))
71 | model.add(MaxPooling2D(pool_size=(2, 2)))
72 | model.add(Dropout(0.25))
73 | model.add(Flatten())
74 | model.add(Dense(128, activation='relu',
75 | kernel_initializer=kernel_initializer,
76 | bias_initializer=bias_initializer))
77 | model.add(Dropout(0.5))
78 | model.add(Dense(num_classes, activation='softmax',
79 | kernel_initializer=kernel_initializer,
80 | bias_initializer=bias_initializer))
81 | return model
82 |
83 |
84 | def compile_model(model):
85 | model.compile(loss=keras.losses.categorical_crossentropy,
86 | optimizer=keras.optimizers.RMSprop(),
87 | metrics=['accuracy'])
88 | return model
89 |
90 |
91 | def get_init_id(init):
92 | """
93 | Returns string ID summarizing initialization scheme and its parameters.
94 |
95 | Args:
96 | init: Instance of some initializer from keras.initializers.
97 | """
98 | try:
99 | init_name = str(init).split('.')[2].split(' ')[0]
100 | except:
101 | init_name = str(init).split(' ')[0].replace('.', '_')
102 |
103 | param_list = []
104 | config = init.get_config()
105 | for k, v in config.items():
106 | if k == 'seed':
107 | continue
108 | param_list.append('{k}-{v}'.format(k=k, v=v))
109 | init_params = '__'.join(param_list)
110 |
111 | return '|'.join([init_name, init_params])
112 |
113 |
114 | def get_activations(model, x, mode=0.0):
115 | """Extract activations with given model and input vector x."""
116 | outputs = [layer.output for layer in model.layers]
117 | activations = K.function([model.input], outputs)
118 | output_elts = activations([x, mode])
119 | return output_elts
120 |
121 |
122 | class LossHistory(keras.callbacks.Callback):
123 | """A custom keras callback for recording losses during network training."""
124 |
125 | def on_train_begin(self, logs={}):
126 | self.losses = []
127 | self.epoch_losses = []
128 | self.epoch_val_losses = []
129 |
130 | def on_batch_end(self, batch, logs={}):
131 | self.losses.append(logs.get('loss'))
132 |
133 | def on_epoch_end(self, epoch, logs={}):
134 | self.epoch_losses.append(logs.get('loss'))
135 | self.epoch_val_losses.append(logs.get('val_loss'))
136 |
--------------------------------------------------------------------------------
/articles/nightmare-network-idle/README.md:
--------------------------------------------------------------------------------
1 | # Implementing a Custom Waiting Action in Nightmare JS
2 |
3 | This directory contains a custom [Nightmare](http://www.nightmarejs.org/) action defined in [waitUntilNetworkIdle.js](waitUntilNetworkIdle.js) which waits until there has been no incoming responses for a given amount of time.
4 | The script's implementation details are described in the [Implementing a Custom Waiting Action in Nightmare JS](https://intoli.com/blog/nightmare-network-idle/) article published on the [Intoli blog](https://intoli.com/blog/).
5 |
6 | To run the script, you need to have [Node.js](https://nodejs.org/en/) and [yarn](https://yarnpkg.com/en/) installed.
7 | With that out of the way, download the contents of this directory to disk.
8 |
9 | ```bash
10 | git clone https://github.com/Intoli/intoli-article-materials.git
11 | cd intoli-article-materials/articles/nightmare-network-idle
12 | ```
13 |
14 | Then install the dependencies with
15 |
16 | ```bash
17 | yarn install
18 | ```
19 |
20 | The mocha test script [test.js](test.js) runs the custom action a few times.
21 | Run the test with
22 |
23 | ```bash
24 | yarn run test
25 | ```
26 |
--------------------------------------------------------------------------------
/articles/nightmare-network-idle/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "nightmare-network-idle",
3 | "version": "1.0.0",
4 | "description": "A script which uses Puppeteer to scrape pages with infinite scroll.",
5 | "repository": {
6 | "type": "git",
7 | "url": "https://github.com/Intoli/intoli-article-materials.git"
8 | },
9 | "scripts": {
10 | "test": "./node_modules/.bin/mocha"
11 | },
12 | "keywords": [
13 | "testing",
14 | "javascript",
15 | "nightmare"
16 | ],
17 | "author": "Andre Perunicic / Intoli, LLC",
18 | "license": "BSD-2-Clause",
19 | "bugs": {
20 | "url": "https://github.com/Intoli/intoli-article-materials/issues"
21 | },
22 | "homepage": "https://intoli.com/blog/nightmare-network-idle/",
23 | "dependencies": {
24 | "mocha": "^5.0.0",
25 | "nightmare": "^2.10.0"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/articles/nightmare-network-idle/test.js:
--------------------------------------------------------------------------------
1 | const Nightmare = require('./waitUntilNetworkIdle.js');
2 | const assert = require('assert');
3 |
4 | describe('waitUntilNetworkIdle', function() {
5 | const waitTimes = [500, 1500, 5000];
6 | let startTime;
7 | waitTimes.forEach(function(waitTime) {
8 | it(`should wait for at least ${waitTime} ms after the last response`,
9 | function(done) {
10 | this.timeout(20000);
11 |
12 | const nightmare = new Nightmare({ show: true });
13 | startTime = Date.now();
14 |
15 | nightmare
16 | .on('did-get-response-details', () => {
17 | startTime = Date.now();
18 | })
19 | .goto('https://intoli.com/blog/nightmare-network-idle/demo.html')
20 | .waitUntilNetworkIdle(waitTime)
21 | .evaluate(() => {
22 | const body = document.querySelector('body');
23 | return body.innerText;
24 | })
25 | .end()
26 | .then((result) => {
27 | const elapsedTime = Date.now() - startTime;
28 |
29 | // Verify the requests completed as expected.
30 | assert.equal(result, 'All three requests received.');
31 |
32 | // Verify that the action caused Nightmare to wait long enough.
33 | assert(elapsedTime >= waitTime, 'Wait period too short');
34 |
35 | done();
36 | })
37 | .catch(done)
38 | });
39 | });
40 | });
41 |
--------------------------------------------------------------------------------
/articles/nightmare-network-idle/waitUntilNetworkIdle.js:
--------------------------------------------------------------------------------
1 | const Nightmare = require('nightmare');
2 |
3 | Nightmare.action('waitUntilNetworkIdle',
4 | // The first callback defines the action on Electron's end,
5 | // making some internal objects available.
6 | function (name, options, parent, win, renderer, done) {
7 |
8 | // `parent` is Electron's reference to the object that
9 | // passes messages between Electron and Nightmare.
10 | parent.respondTo('waitUntilNetworkIdle', (waitTime, done) => {
11 | let lastRequestTime = Date.now();
12 |
13 | // win.webContents allows us to control the internal
14 | // Electron BrowserWindow instance.
15 | win.webContents.on('did-get-response-details', () => {
16 | lastRequestTime = Date.now();
17 | });
18 |
19 | const check = () => {
20 | const now = Date.now();
21 | const elapsedTime = now - lastRequestTime;
22 | if (elapsedTime >= waitTime) {
23 | done(); // Complete the action.
24 | } else {
25 | setTimeout(check, waitTime - elapsedTime);
26 | }
27 | }
28 | setTimeout(check, waitTime);
29 | });
30 |
31 | done(); // Complete the action's *creation*.
32 | },
33 | // The second callback runs on Nightmare's end and determines
34 | // the action's interface.
35 | function (waitTime, done) {
36 | // This is necessary because the action will only work if
37 | // action arguments are specified before `done`, and because
38 | // we wish to support calls without arguments.
39 | if (!done) {
40 | done = waitTime;
41 | waitTime = 500;
42 | }
43 |
44 | // `this.child` is Nightmare's reference to the object that
45 | // passes messages between Electron and Nightmare.
46 | this.child.call('waitUntilNetworkIdle', waitTime, done);
47 | });
48 |
49 | module.exports = Nightmare;
50 |
--------------------------------------------------------------------------------
/articles/node-package-manager-benchmarks/README.md:
--------------------------------------------------------------------------------
1 | # Node Package Manager Benchmarks
2 |
3 | The article is actually called [Why I Still Don't Use Yarn](https://intoli.com/blog/node-package-manager-benchmarks/), but it really centers around benchmarking `yarn`, `npm`, and `pnpm`.
4 | The associated benchmark code is in its own repository called [node-package-manager-benchmarks](https://github.com/sangaline/node-package-manager-benchmarks).
5 |
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/README.md:
--------------------------------------------------------------------------------
1 | # It is *not* possible to detect and block Chrome headless
2 |
3 | [It is *not* possible to detect and block Chrome headless](https://intoli.com/blog/not-possible-to-block-chrome-headless/) is our second installment of techniques to bypass the user-hostile practice of blocking users based on characteristics of their web browsers (see also: [Making Chrome Headless Undetectable](https://intoli.com/blog/making-chrome-headless-undetectable/)).
4 | The test suite is implemented in [chrome-headless-test.html](chrome-headless-test.html) and [chrome-headless-test.js](chrome-headless-test.js).
5 | You can visit the live test page at [https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html](https://intoli.com/blog/not-possible-to-block-chrome-headless/chrome-headless-test.html) to see how your current browser would fair.
6 | The results should look something like this, where red indicates a headless Chrome indicator.
7 |
8 | 
9 |
10 | The test results used in the article are generated using two scripts: [test-headless-initial.js](test-headless-initial.js) and [test-headless-final.js](test-headless-final.js).
11 | These both use [Puppeteer](https://github.com/GoogleChrome/puppeteer) as a browser automation framework to visit the test page and take a screenshot of the results.
12 | The Puppeteer dependency is included in the [package.json](package.json) file and you can install the dependencies by running
13 |
14 | ```bash
15 | yarn install
16 | ```
17 |
18 | in this directory.
19 | You can then run the [test-headless-initial.js](test-headless-inital.js) script, which doesn't include any bypasses, with the following command.
20 |
21 | ```bash
22 | node test-headless-initial.js
23 | ```
24 |
25 | This will create the [headless-initial-results.png](img/headless-initial-results.png) that you can see above.
26 |
27 | To run the tests with the bypasses, you simply need to change the name of the script to [test-headless-final.js](test-headless-final.js).
28 |
29 | ```bash
30 | node test-headless-final.js
31 | ```
32 |
33 | This will create a second [headless-final-results.png](img/headless-final-results.png) image which looks like this.
34 |
35 | 
36 |
37 | As you can see, all of the tests have been bypassed!
38 | You can peruse the [test-headless-final.js](test-headless-final.js) source code to see how the bypasses are implemented, or visit [the original article](https://intoli.com/blog/not-possible-to-block-chrome-headless/) for a more in-depth explanation of how they work.
39 |
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/chrome-headless-test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Chrome Headless Detection (Round II)
4 |
22 |
23 |
24 |
25 |
26 |
Test Name
27 |
Result
28 |
29 |
30 |
User Agent (Old)
31 |
32 |
33 |
34 |
WebDriver (New)
35 |
missing (passed)
36 |
37 |
38 |
Chrome (New)
39 |
present (passed)
40 |
41 |
42 |
Permissions (New)
43 |
44 |
45 |
46 |
Plugins Length (Old)
47 |
48 |
49 |
50 |
Languages (Old)
51 |
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/chrome-headless-test.js:
--------------------------------------------------------------------------------
1 | // User-Agent Test
2 | const userAgentElement = document.getElementById('user-agent-result');
3 | userAgentElement.innerHTML = navigator.userAgent;
4 | if (/HeadlessChrome/.test(navigator.userAgent)) {
5 | userAgentElement.classList.add('failed');
6 | }
7 |
8 | // Webdriver Test
9 | const webdriverElement = document.getElementById('webdriver-result');
10 | if (navigator.webdriver) {
11 | webdriverElement.classList.add('failed');
12 | webdriverElement.innerHTML = 'present (failed)';
13 | }
14 |
15 | // Chrome Test
16 | const chromeElement = document.getElementById('chrome-result');
17 | if (!window.chrome) {
18 | chromeElement.classList.add('failed');
19 | chromeElement.innerHTML = 'missing (failed)';
20 | }
21 |
22 | // Permissions Test
23 | const permissionsElement = document.getElementById('permissions-result');
24 | (async () => {
25 | const permissionStatus = await navigator.permissions.query({ name: 'notifications' });
26 | permissionsElement.innerHTML = permissionStatus.state;
27 | if(Notification.permission === 'denied' && permissionStatus.state === 'prompt') {
28 | permissionsElement.classList.add('failed');
29 | }
30 | })();
31 |
32 | // Plugins Length Test
33 | const pluginsLengthElement = document.getElementById('plugins-length-result');
34 | pluginsLengthElement.innerHTML = navigator.plugins.length;
35 | if (navigator.plugins.length === 0) {
36 | pluginsLengthElement.classList.add('failed');
37 | }
38 |
39 | // Languages Test
40 | const languagesElement = document.getElementById('languages-result');
41 | languagesElement.innerHTML = navigator.languages;
42 | if (!navigator.languages || navigator.languages.length === 0) {
43 | languagesElement.classList.add('failed');
44 | }
45 |
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/img/headless-final-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/not-possible-to-block-chrome-headless/img/headless-final-results.png
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/img/headless-initial-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intoli/intoli-article-materials/b01010ddc769ac20ce492bce478ee49c859c5db3/articles/not-possible-to-block-chrome-headless/img/headless-initial-results.png
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "puppeteer": "^1.0.0"
4 | }
5 | }
6 |
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/test-headless-final.js:
--------------------------------------------------------------------------------
1 | // We'll use Puppeteer is our browser automation framework.
2 | const puppeteer = require('puppeteer');
3 |
4 | // This is where we'll put the code to get around the tests.
5 | const preparePageForTests = async (page) => {
6 | // Pass the User-Agent Test.
7 | const userAgent = 'Mozilla/5.0 (X11; Linux x86_64)' +
8 | 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.39 Safari/537.36';
9 | await page.setUserAgent(userAgent);
10 |
11 | // Pass the Webdriver Test.
12 | await page.evaluateOnNewDocument(() => {
13 | Object.defineProperty(navigator, 'webdriver', {
14 | get: () => false,
15 | });
16 | });
17 |
18 | // Pass the Chrome Test.
19 | await page.evaluateOnNewDocument(() => {
20 | // We can mock this in as much depth as we need for the test.
21 | window.navigator.chrome = {
22 | runtime: {},
23 | // etc.
24 | };
25 | });
26 |
27 | // Pass the Permissions Test.
28 | await page.evaluateOnNewDocument(() => {
29 | const originalQuery = window.navigator.permissions.query;
30 | return window.navigator.permissions.query = (parameters) => (
31 | parameters.name === 'notifications' ?
32 | Promise.resolve({ state: Notification.permission }) :
33 | originalQuery(parameters)
34 | );
35 | });
36 |
37 | // Pass the Plugins Length Test.
38 | await page.evaluateOnNewDocument(() => {
39 | // Overwrite the `plugins` property to use a custom getter.
40 | Object.defineProperty(navigator, 'plugins', {
41 | // This just needs to have `length > 0` for the current test,
42 | // but we could mock the plugins too if necessary.
43 | get: () => [1, 2, 3, 4, 5],
44 | });
45 | });
46 |
47 | // Pass the Languages Test.
48 | await page.evaluateOnNewDocument(() => {
49 | // Overwrite the `plugins` property to use a custom getter.
50 | Object.defineProperty(navigator, 'languages', {
51 | get: () => ['en-US', 'en'],
52 | });
53 | });
54 | }
55 |
56 | (async () => {
57 | // Launch the browser in headless mode and set up a page.
58 | const browser = await puppeteer.launch({
59 | args: ['--no-sandbox'],
60 | headless: true,
61 | });
62 | const page = await browser.newPage();
63 |
64 | // Prepare for the tests (not yet implemented).
65 | await preparePageForTests(page);
66 |
67 | // Navigate to the page that will perform the tests.
68 | const testUrl = 'https://intoli.com/blog/' +
69 | 'not-possible-to-block-chrome-headless/chrome-headless-test.html';
70 | await page.goto(testUrl);
71 |
72 | // Save a screenshot of the results.
73 | await page.screenshot({path: 'headless-final-results.png'});
74 |
75 | // Clean up.
76 | await browser.close()
77 | })();
78 |
--------------------------------------------------------------------------------
/articles/not-possible-to-block-chrome-headless/test-headless-initial.js:
--------------------------------------------------------------------------------
1 | // We'll use Puppeteer is our browser automation framework.
2 | const puppeteer = require('puppeteer');
3 |
4 | // This is where we'll put the code to get around the tests.
5 | const preparePageForTests = async (page) => {
6 | // TODO: Not implemented yet.
7 | }
8 |
9 | (async () => {
10 | // Launch the browser in headless mode and set up a page.
11 | const browser = await puppeteer.launch({
12 | args: ['--no-sandbox'],
13 | headless: true,
14 | });
15 | const page = await browser.newPage();
16 |
17 | // Prepare for the tests (not yet implemented).
18 | await preparePageForTests(page);
19 |
20 | // Navigate to the page that will perform the tests.
21 | const testUrl = 'https://intoli.com/blog/' +
22 | 'not-possible-to-block-chrome-headless/chrome-headless-test.html';
23 | await page.goto(testUrl);
24 |
25 | // Save a screenshot of the results.
26 | await page.screenshot({path: 'headless-initial-results.png'});
27 |
28 | // Clean up.
29 | await browser.close()
30 | })();
31 |
--------------------------------------------------------------------------------
/articles/power-assert/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "env": {
3 | "testing": {
4 | "presets": [
5 | "power-assert"
6 | ]
7 | }
8 | },
9 | "presets": [["env", {
10 | "targets": {
11 | "node": "6.10"
12 | }
13 | }]]
14 | }
15 |
--------------------------------------------------------------------------------
/articles/power-assert/README.md:
--------------------------------------------------------------------------------
1 | # No API Is the Best API — The elegant power of Power Assert
2 |
3 | In [No API Is the Best API — The elegant power of Power Assert](https://intoli.com/blog/power-assert), we take a look at how [Power Assert](https://github.com/power-assert-js/power-assert) can be used to automatically generate contextual error messages when using Node's [assert](https://nodejs.org/api/assert.html) module for assertions.
4 | This allows you to get the best of both worlds; you can use a very simple assertion API while still taking advantage of rich and useful assertion messages.
5 | Power Assert accomplishes this by transforming your tests before they run and using the code itself to determine the relevant information to display.
6 | You can check out [the original article](https://intoli.com/blog/power-assert) for more details, but here we'll just focus on the project configuration and running the tests.
7 |
8 |
9 | ## Installing Dependencies
10 |
11 | The project dependencies are listed in [package.json](package.json) and [yarn.lock](yarn.lock).
12 | You can install them by running the following.
13 |
14 | ```bash
15 | # Or: `npm install`
16 | yarn install
17 | ```
18 |
19 | This will install the [Mocha](https://github.com/mochajs/mocha) test runner, a few [Babel](https://babeljs.io/)-related packages, [Power Assert](https://github.com/power-assert-js/power-assert), and the [Power Assert Babel Preset](https://github.com/power-assert-js/babel-preset-power-assert) inside of `node_modules/`.
20 |
21 |
22 | ## The Babel Configuration
23 |
24 | The key to the tests being transformed when the tests are run is the Babel configuration.
25 | It's located in [.babelrc](.babelrc), and it tells Babel to use the Power Assert preset when `NODE_ENV` is set to `testing`.
26 | It also specifies that the [Babel env preset](https://babeljs.io/docs/plugins/preset-env/) should be used to target Node v6.10.
27 | The non-testing Babel configuration can be customized freely without impacting the use of Power Assert.
28 |
29 | It's also worth mentioning that Mocha needs to be configured to use Babel in order for the tests to be transformed.
30 | This is accomplished using [Babel register](https://babeljs.io/docs/usage/babel-register/) and the Mocha `--require` option.
31 |
32 | ```bash
33 | NODE_ENV=testing mocha --exit --require babel-register"
34 | ```
35 |
36 | This same command is included in [package.json](package.json) as a script, so it's equivalent to running `yarn test`.
37 |
38 |
39 | ## The Tests
40 |
41 | The tests themselves are all located in [test/test-assertion-errors.js](test/test-assertion-errors.js).
42 | There's nothing specific to Power-Assert in the tests, they're just generic tests that use Node's `assert` module.
43 | They are each purposely designed to fail however, so that you can easily see what the error messages generated by Power Assert look like.
44 |
--------------------------------------------------------------------------------
/articles/power-assert/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "scripts": {
3 | "test": "NODE_ENV=testing mocha --exit --require babel-register"
4 | },
5 | "devDependencies": {
6 | "babel": "^6.23.0",
7 | "babel-core": "^6.26.3",
8 | "babel-preset-env": "^1.7.0",
9 | "babel-preset-power-assert": "^2.0.0",
10 | "babel-register": "^6.26.0",
11 | "mocha": "^5.2.0",
12 | "power-assert": "^1.5.0"
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/articles/power-assert/test/test-assertion-errors.js:
--------------------------------------------------------------------------------
1 | import assert from 'assert';
2 |
3 |
4 | // Note that all of these tests are designed to fail, so we can see the error messages!
5 | describe('Power Assert Testing Examples', () => {
6 | it('check that an unexpected substring is not found', () => {
7 | const result = 'Hello World';
8 | const unexpectedSubstring = 'World';
9 |
10 | // Jest Equivalent: expect(result).toEqual(expect.not.stringContaining(unexpectedSubstring));
11 | assert(!result.includes(unexpectedSubstring));
12 | });
13 |
14 | it('check that no members of an array are included in another array', () => {
15 | const result = ['Hello', 'World'];
16 | const unexpectedMembers = ['Evan', 'World'];
17 | // Jest Equivalent: expect(result).toEqual(expect.not.arrayContaining(unexpectedMembers));
18 | unexpectedMembers.forEach(member =>
19 | assert(!result.includes(member))
20 | );
21 | });
22 |
23 | it('check that a regular expression matches a string', () => {
24 | const regex = /^Hello World!/;
25 | const result = 'Hello World';
26 | // Jest Equivalent: expect(result).toEqual(expect.stringMatching(regex));
27 | assert(regex.test(result));
28 | });
29 |
30 | it('check that an array contains at least one number', () => {
31 | const result = ['Hello', 'World'];
32 | // Jest Equivalent: expect(result).toContainEqual(expect.any(Number));
33 | assert(result.some(member => typeof member === 'number'));
34 | });
35 |
36 | it('check for deep equality between two objects', () => {
37 | const expectedResult = { 'a': [1, 2], 'b': [1, 2] }
38 | const result = { 'a': [1, 2], 'b': [1, 2, 3] }
39 | // Jest Equivalent: expect(result).toEqual(expectedResult);
40 | assert.deepEqual(result, expectedResult);
41 | });
42 | });
43 |
--------------------------------------------------------------------------------
/articles/python-slicing-in-javascript/README.md:
--------------------------------------------------------------------------------
1 | # Recreating Python's Slice Syntax in JavaScript Using ES6 Proxies
2 |
3 | [Recreating Python's Slice Syntax in JavaScript Using ES6 Proxies](https://intoli.com/blog/python-slicing-in-javascript) explores how [Proxies](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Proxy) work in JavaScript, and uses them to build a `SliceArray` class that supports a variant of Python's negative indexing and extended slice syntax.
4 | It's not possible to implement Python's syntax exactly due to the fact that the behavior of colons can't be modified in JavaScript.
5 | Instead, a double bracket syntax is introduced where double brackets are used for access and colons are replaced with commas.
6 | For example, you could write `array[::-1]` in Python to reverse an array, while the equivalent in JavaScript would be `array[[,,-1]]`.
7 |
8 | The code from the article has since been improved and released as an npm package called [Slice](https://github.com/intoli/slice).
9 | If you're interested in using extended slice syntax and negative indexing in your own project, then it's highly recommended that you use the package there instead of the original code from the article.
10 | The package additionally contains a `SliceString` class that introduces the same syntax for strings, and a `range()` method that works in the same way as the one from Python.
11 | You can find installation and usage instructions in [the GitHub repository for the project](https://github.com/intoli/slice).
12 |
13 | The article begins by constructing a crude implementation of negative indexing that doesn't use proxies.
14 | This isn't particularly useful in practice, but it serves to demonstrate the advantages of proxies over more primitive methods.
15 |
16 | - [primitive-negative-indexing.js](primitive-negative-indexing.js) - An implementation of negative indexing in JavaScript that doesn't use proxies.
17 |
18 |
19 | After that, it moves on to explore how slicing works in Python.
20 | The main code examples from that section have been condensed into these two files.
21 |
22 | - [slice-probe.py](slice-probe.py) - Implements the `SliceProbe` class that is used to understand how slicing works in Python.
23 | The class prints out the keys that are passed to a class when brackets are used for object access, and the `slice-probe.py` file uses this behavior to prove what the keys are for various slices.
24 | - [fizz-buzz.py](fizz-buzz.py) - A Fizz Buzz solution that uses slicing instead of explicit iteration or recursion.
25 | This primarily demonstrated the power and flexibility of the extended slice syntax.
26 |
27 | Finally, a `Slice` class is developed to provide the underlying slicing functionality, and a `SliceArray` class is developed which wraps `Slice` with the double bracket syntactic sugar using proxies.
28 | The implementation of these two classes can be found in these two files, respectively.
29 |
30 | - [slice.js](slice.js) - Implements the `Slice` class.
31 | - [slice-array.js](slice-array.js) - Implements the `SliceArray` class.
32 |
--------------------------------------------------------------------------------
/articles/python-slicing-in-javascript/fizz-buzz.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 |
4 | # Populate a list from 1 through 100.
5 | outputs = list(range(1, 100 + 1))
6 |
7 | # Replace every 3rd element with 'Fizz'.
8 | outputs[(3 - 1)::3] = (100 // 3) * ['Fizz']
9 | # Replace every 5th element with 'Buzz'.
10 | outputs[(5 - 1)::5] = (100 // 5) * ['Buzz']
11 | # Replace every (3 * 5)th element with 'Fizz Buzz'.
12 | outputs[((3 * 5) - 1)::(3 * 5)] = (100 // (3 * 5)) * ['Fizz Buzz']
13 |
14 | # Congrats on your new job! Please report to HR for orientation.
15 | print(outputs)
16 |
--------------------------------------------------------------------------------
/articles/python-slicing-in-javascript/primitive-negative-indexing.js:
--------------------------------------------------------------------------------
1 | function wrapArray(array) {
2 | var wrappedArray = {};
3 | for (var i = 0; i < array.length; i++) {
4 | (function(i) {
5 | // Normal array indexing: `array[0]`, `array[1]`, etc.
6 | Object.defineProperty(wrappedArray, i.toString(), {
7 | get: function() {
8 | return array[i];
9 | },
10 | set: function(value) {
11 | array[i] = value;
12 | },
13 | });
14 | // Fancy negative slice indexing to count back from the end.
15 | Object.defineProperty(wrappedArray, '-' + i.toString(), {
16 | get: function() {
17 | return array[array.length - i];
18 | },
19 | set: function(value) {
20 | array[array.length - i] = value;
21 | },
22 | });
23 | })(i);
24 | }
25 | return wrappedArray;
26 | }
27 |
28 |
29 | // Wrap an array of 5 elements.
30 | var array = wrapArray([0, 1, 2, 3, 4]);
31 |
32 | // Outputs: 1
33 | console.log(array[1]);
34 |
35 | // Outputs: 3
36 | console.log(array[-1]);
37 |
38 | // Outputs: 'three'
39 | array[-2] = 'three';
40 | console.log(array[3]);
41 |
--------------------------------------------------------------------------------
/articles/python-slicing-in-javascript/slice-array.js:
--------------------------------------------------------------------------------
1 | const Slice = require('./slice');
2 |
3 | class SliceArray extends Array {
4 | constructor(...args) {
5 | super(...args);
6 |
7 | // Helper method that constructs either a `get` or `set` trap.
8 | const constructTrap = action => (target, name, value) => {
9 | const key = (name || '').toString()
10 | .replace(/\s/g, '') // Remove all whitespace.
11 | .replace(/,/g, ':'); // Replace commas with colons.
12 |
13 | // Handle negative indices.
14 | if (/^-\d+$/.test(key)) {
15 | return Reflect[action](target, this.length + parseInt(key, 10), value);
16 | }
17 |
18 | // Handle slices.
19 | if (/^(-?\d+)?(:(-?\d+)?(:(-?\d+)?)?)$/.test(key)) {
20 | const [start, stop, step] = key.split(':').map(part => part.length ? part : undefined);
21 | const slice = new Slice(start, stop, step);
22 | return slice[action](target, value);
23 | }
24 |
25 | // Fall back to the array's own properties.
26 | return Reflect[action](target, name, value);
27 | };
28 |
29 | return new Proxy(this, {
30 | get: constructTrap('get'),
31 | set: constructTrap('set'),
32 | });
33 | }
34 | }
35 |
36 | module.exports = SliceArray;
37 |
--------------------------------------------------------------------------------
/articles/python-slicing-in-javascript/slice-probe.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 |
4 | class SliceProbe:
5 | """Simple class that overrides `[]` access to return the key."""
6 | def __getitem__(self, key):
7 | return key
8 |
9 |
10 | # Create an instance of the class to use for probing.
11 | probe = SliceProbe()
12 |
13 |
14 | # Outputs: 1
15 | print(probe[1])
16 |
17 | # Outputs: -2
18 | print(probe[-2])
19 |
20 | # Outputs: slice(None, 1, None)
21 | print(probe[:1])
22 |
23 | # Outputs: slice(1, None, None)
24 | print(probe[1:])
25 |
26 | # Outputs: slice(1, 2, None)
27 | print(probe[1:2])
28 |
29 | # Outputs: slice(1, -2, None)
30 | print(probe[1:-2])
31 |
32 | # Outputs: slice(None, None, 2)
33 | print(probe[::2])
34 |
35 | # Outputs: slice(1, None, -4)
36 | print(probe[1::-4])
37 |
38 | # Outputs: slice(1, 2, 3)
39 | print(probe[1:2:3])
40 |
--------------------------------------------------------------------------------
/articles/python-slicing-in-javascript/slice.js:
--------------------------------------------------------------------------------
1 | class Slice {
2 | constructor(start, stop, step) {
3 | // Support the `Slice(stop)` signature.
4 | if (stop === undefined && step === undefined) {
5 | [start, stop] = [stop, start];
6 | }
7 |
8 | // Support numerical strings.
9 | this.start = start == null ? start : parseInt(start, 10);
10 | this.stop = stop == null ? stop : parseInt(stop, 10);
11 | this.step = step == null ? step : parseInt(step, 10);
12 | }
13 |
14 | indices(array) {
15 | // Handle negative indices while preserving `null` values.
16 | const start = this.start < 0 ? this.start + array.length : this.start;
17 | const stop = this.stop < 0 ? this.stop + array.length : this.stop;
18 |
19 | // Set the default step to `1`.
20 | const step = this.step == null ? 1 : this.step;
21 | if (step === 0) {
22 | throw new Error('slice step cannot be zero');
23 | }
24 |
25 | // Find the starting index, and construct a check for if an index should be included.
26 | let currentIndex;
27 | let indexIsValid;
28 | if (step > 0) {
29 | currentIndex = start == null ? 0 : Math.max(start, 0);
30 | const maximumPossibleIndex = stop == null ? array.length - 1 : stop - 1;
31 | indexIsValid = (index) => index <= maximumPossibleIndex;
32 | } else {
33 | currentIndex = start == null ? array.length - 1 : Math.min(start, array.length - 1);
34 | const minimumPossibleIndex = stop == null ? 0 : stop + 1;
35 | indexIsValid = (index) => index >= minimumPossibleIndex;
36 | }
37 |
38 | // Loop through and add indices until we've completed the loop.
39 | const indices = [];
40 | while (indexIsValid(currentIndex)) {
41 | if (currentIndex >= 0 && currentIndex < array.length) {
42 | indices.push(currentIndex);
43 | }
44 | currentIndex += step;
45 | }
46 |
47 | return indices;
48 | };
49 |
50 | apply(array, values) {
51 | return values ? this.set(array, values) : this.get(array);
52 | }
53 |
54 | get(array) {
55 | // We can use the built in `Array.slice()` method for this special case.
56 | if (this.step == null || this.step === 1) {
57 | const start = this.start == null ? undefined : this.start;
58 | const stop = this.stop == null ? undefined : this.stop;
59 | return array.slice(start, stop);
60 |
61 | }
62 |
63 | return this.indices(array)
64 | .map(index => array[index]);
65 | }
66 |
67 | set(array, values) {
68 | // We can insert arrays of any length for unextended slices.
69 | if (this.step == null || this.step === 1) {
70 | const start = this.start < 0 ? this.start + array.length : this.start;
71 | const stop = this.stop < 0 ? this.stop + array.length : this.stop;
72 | const deleteCount = this.stop == null ? array.length : stop - start;
73 | array.splice(start, deleteCount, ...values);
74 | return array;
75 | }
76 |
77 | // Otherwise, the lengths must match and we need to do them one-by-one.
78 | const indices = this.indices(array);
79 | if (indices.length !== values.length) {
80 | throw new Error(
81 | `attempt to assign sequence of size ${values.length} ` +
82 | `to extended slice of size ${indices.length}`
83 | );
84 | }
85 | this.indices(array)
86 | .forEach((arrayIndex, valuesIndex) => array[arrayIndex] = values[valuesIndex]);
87 | return array;
88 | }
89 | };
90 |
91 | module.exports = Slice;
92 |
--------------------------------------------------------------------------------
/articles/running-selenium-with-headless-chrome-in-ruby/README.md:
--------------------------------------------------------------------------------
1 | # Running Selenium with Headless Chrome in Ruby
2 |
3 | [Running Selenium with Headless Chrome in Ruby](view-source:https://intoli.com/blog/running-selenium-with-headless-chrome-in-ruby/) demonstrates how to use headless Chrome in Ruby with Selenium.
4 |
5 | - [take-screenshot.rb](take-screenshot.rb) - A simple script to launch headless Chrome and save a screenshot.
6 |
--------------------------------------------------------------------------------
/articles/running-selenium-with-headless-chrome-in-ruby/take-screenshot.rb:
--------------------------------------------------------------------------------
1 | require "selenium-webdriver"
2 |
3 |
4 | # Configure the driver to run in headless mode.
5 | options = Selenium::WebDriver::Chrome::Options.new
6 | options.add_argument('--headless')
7 | driver = Selenium::WebDriver.for :chrome, options: options
8 |
9 | # Navigate to a really super awesome blog.
10 | driver.navigate.to "https://intoli.com/blog/"
11 |
12 | # Resize the window and take a screenshot.
13 | driver.manage.window.resize_to(800, 800)
14 | driver.save_screenshot "intoli-screenshot.png"
15 |
--------------------------------------------------------------------------------
/articles/running-selenium-with-headless-chrome/README.md:
--------------------------------------------------------------------------------
1 | # Running Selenium with Headless Chrome
2 |
3 | [Running Selenium with Headless Chrome](https://intoli.com/blog/running-selenium-with-headless-chrome/) illustrates how to run the Google Chrome browser in its new headless mode using Selenium in Python.
4 | The [scrape-facebook-posts.py](scrape-facebook-posts.py) script launches a headless Chrome session, navigates to Facebook, logins, takes a screenshot, and prints out author and content information from posts.
5 |
--------------------------------------------------------------------------------
/articles/running-selenium-with-headless-chrome/scrape-facebook-posts.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | import sys
4 |
5 | from selenium import webdriver
6 |
7 |
8 | # Construct a ChromeOptions instance to configure.
9 | options = webdriver.ChromeOptions()
10 |
11 | # Specify that we'll run in headless mode.
12 | options.add_argument('headless')
13 |
14 | # Set the window size.
15 | options.add_argument('window-size=1200x600')
16 |
17 | # Initialize the driver.
18 | driver = webdriver.Chrome(chrome_options=options)
19 |
20 | # Navigate to Facebook.
21 | driver.get('https://facebook.com')
22 |
23 | # Wait up to 10 seconds for the elements to become available.
24 | driver.implicitly_wait(10)
25 |
26 | # Use CSS selectors to grab the login inputs.
27 | email = driver.find_element_by_css_selector('input[type=email]')
28 | password = driver.find_element_by_css_selector('input[type=password]')
29 | login = driver.find_element_by_css_selector('input[value="Log In"]')
30 |
31 | # Parse the command-line options.
32 | if len(sys.argv) == 3:
33 | email, password = sys.argv[1:]
34 | else:
35 | print('You probably want to specify your email address and password as arguments.')
36 | email, password = 'evan@intoli.com', 'hunter2'
37 |
38 | # Enter our credentials.
39 | email.send_keys(email)
40 | password.send_keys(password)
41 |
42 | # Save a screenshot of the page with our email/password entered.
43 | driver.get_screenshot_as_file('main-page-with-information-entered.png')
44 |
45 | # Login.
46 | login.click()
47 |
48 | # Navigate to Evan's profile.
49 | driver.get('https://www.facebook.com/profile.php?id=100009447446864')
50 |
51 | # Take another screenshot.
52 | driver.get_screenshot_as_file('evans-profile.png')
53 |
54 | # Cycle through the posts and print out the authors and content.
55 | posts = driver.find_elements_by_css_selector('#stream_pagelet .fbUserContent')
56 | for post in posts:
57 | try:
58 | author = post.find_elements_by_css_selector('a[data-hovercard*=user]')[-1].get_attribute('innerHTML')
59 | content = post.find_elements_by_css_selector('div.userContent')[-1].get_attribute('innerHTML')
60 | except IndexError:
61 | # It's an advertisement.
62 | pass
63 | print(f'{author}: "{content}"')
64 |
--------------------------------------------------------------------------------
/articles/sandbox-breakout/README.md:
--------------------------------------------------------------------------------
1 | # Breaking Out of the Chrome/WebExtension Sandbox
2 |
3 | [Breaking Out of the Chrome/WebExtension Sandbox](https://intoli.com/blog/sandbox-breakout/) is a guide to breaking out of the content script context of a browser extension so that you can interact with the page context directly.
4 | There are three supplemental materials for the article that are included here:
5 |
6 | - [language-test.html](language-test.html) - A simple test page that populates a header element with the current value of `window.navigator`.
7 | - [extension/manifest.json](extension/manifest.json) - The manifest for the extension that overwrites the `window.navigator` property.
8 | - [extension/sandbox-breakout.js](extension/sandbox-breakout.js]) - The implementation of the code which breaks out of the sandbox and overwrites `window.navigator`.
9 |
10 | A Chrome browser instance can be launched to run the tests with the following command.
11 |
12 | ```bash
13 | google-chrome --load-extension=./extension/ language-test.html
14 | ```
15 |
16 | If the sandbox breakout works as expected, this should open a webpage that displays the text `xx-XX`.
17 | You can see the [original article](https://intoli.com/blog/sandbox-breakout/) for details about how things work.
18 |
19 |
20 | ## The runInPageContext() Method
21 |
22 | This is defined in [extension/sandbox-breakout.js](extension/sandbox-breakout.js]), but the portion of code that you're most likely interested in is this.
23 |
24 | ```javascript
25 | // Breaks out of the content script context by injecting a specially
26 | // constructed script tag and injecting it into the page.
27 | const runInPageContext = (method, ...args) => {
28 | // The stringified method which will be parsed as a function object.
29 | const stringifiedMethod = method instanceof Function
30 | ? method.toString()
31 | : `() => { ${method} }`;
32 |
33 | // The stringified arguments for the method as JS code that will reconstruct the array.
34 | const stringifiedArgs = JSON.stringify(args);
35 |
36 | // The full content of the script tag.
37 | const scriptContent = `
38 | // Parse and run the method with its arguments.
39 | (${stringifiedMethod})(...${stringifiedArgs});
40 |
41 | // Remove the script element to cover our tracks.
42 | document.currentScript.parentElement
43 | .removeChild(document.currentScript);
44 | `;
45 |
46 | // Create a script tag and inject it into the document.
47 | const scriptElement = document.createElement('script');
48 | scriptElement.innerHTML = scriptContent;
49 | document.documentElement.prepend(scriptElement);
50 | };
51 | ```
52 |
53 | This function can be called from an extension's content script context in order to evaluate JavaScript code in the corresponding page context.
54 | The first argument can be either a string containing JavaScript code or a function object.
55 | If it is a function object, then any additional arguments will be passed to the function when it is evaluated.
56 |
--------------------------------------------------------------------------------
/articles/sandbox-breakout/extension/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 2,
3 | "name": "Content Script Sandbox Breakout Extension",
4 | "version": "1.0.0",
5 | "applications": {
6 | "gecko": {
7 | "id": "sandbox-breakout@intoli.com"
8 | }
9 | },
10 | "content_scripts": [
11 | {
12 | "matches": [""],
13 | "js": ["sandbox-breakout.js"],
14 | "run_at": "document_start"
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/articles/sandbox-breakout/extension/sandbox-breakout.js:
--------------------------------------------------------------------------------
1 | // Overwrite the `navigator.language` property to return a custom value.
2 | const overwriteLanguage = (language) => {
3 | Object.defineProperty(navigator, 'language', {
4 | get: () => language,
5 | });
6 | };
7 |
8 |
9 | // Breaks out of the content script context by injecting a specially
10 | // constructed script tag and injecting it into the page.
11 | const runInPageContext = (method, ...args) => {
12 | // The stringified method which will be parsed as a function object.
13 | const stringifiedMethod = method instanceof Function
14 | ? method.toString()
15 | : `() => { ${method} }`;
16 |
17 | // The stringified arguments for the method as JS code that will reconstruct the array.
18 | const stringifiedArgs = JSON.stringify(args);
19 |
20 | // The full content of the script tag.
21 | const scriptContent = `
22 | // Parse and run the method with its arguments.
23 | (${stringifiedMethod})(...${stringifiedArgs});
24 |
25 | // Remove the script element to cover our tracks.
26 | document.currentScript.parentElement
27 | .removeChild(document.currentScript);
28 | `;
29 |
30 | // Create a script tag and inject it into the document.
31 | const scriptElement = document.createElement('script');
32 | scriptElement.innerHTML = scriptContent;
33 | document.documentElement.prepend(scriptElement);
34 | };
35 |
36 |
37 | // This won't work, it's sandboxed from the page context.
38 | overwriteLanguage('xx-XX');
39 |
40 | // This will work, it breaks out of the sandbox.
41 | runInPageContext(overwriteLanguage, 'xx-XX');
42 |
--------------------------------------------------------------------------------
/articles/sandbox-breakout/language-test.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
Please Wait...
4 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/articles/scrape-infinite-scroll/README.md:
--------------------------------------------------------------------------------
1 | # Using Puppeteer to Scrape Websites with Infinite Scrolling
2 |
3 | This directory is centered around [scrape-infinite-scroll.js](scrape-infinite-scroll.js), which uses Puppeteer to scrape infinite scroll items from a [demo page](https://intoli.com/blog/scrape-infinite-scroll/demo.html) set up for it.
4 | The script's implementation details are described in the [Using Puppeteer to Scrape Websites with Infinite Scrolling](https://intoli.com/blog/scrape-infinite-scroll/) article published on the Intoli blog.
5 | Customizing the script should be straightfoward after reading this article.
6 |
7 | To run the script, you need to have [Node.js](https://nodejs.org/en/) installed, which you can do using [nvm](https://github.com/creationix/nvm).
8 | With that out of the way, download the contents of this directory to disk.
9 |
10 | ```bash
11 | git clone https://github.com/Intoli/intoli-article-materials.git
12 | cd intoli-article-materials/articles/scrape-infinite-scroll
13 | ```
14 |
15 | And then install Puppeteer with
16 |
17 | ```bash
18 | npm install
19 | ```
20 |
21 | Finally, run the script with
22 |
23 | ```bash
24 | node scrape-infinite-scroll.js
25 | ```
26 |
--------------------------------------------------------------------------------
/articles/scrape-infinite-scroll/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "scrape-infinite-scroll",
3 | "version": "1.0.0",
4 | "description": "A script which uses Puppeteer to scrape pages with infinite scroll.",
5 | "main": "scrape-infinite-scroll.js",
6 | "repository": {
7 | "type": "git",
8 | "url": "https://github.com/Intoli/intoli-article-materials.git"
9 | },
10 | "keywords": [
11 | "scraping",
12 | "javascript",
13 | "puppeteer",
14 | "headless",
15 | "chrome"
16 | ],
17 | "author": "Andre Perunicic / Intoli, LLC",
18 | "license": "BSD-2-Clause",
19 | "bugs": {
20 | "url": "https://github.com/Intoli/intoli-article-materials/issues"
21 | },
22 | "homepage": "https://intoli.com/blog/scrape-infinite-scroll/",
23 | "dependencies": {
24 | "puppeteer": "^1.0.0"
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/articles/scrape-infinite-scroll/scrape-infinite-scroll.js:
--------------------------------------------------------------------------------
1 | const fs = require('fs');
2 | const puppeteer = require('puppeteer');
3 |
4 | /**
5 | * This function is injected into the page and used to scrape items from it.
6 | */
7 | function extractItems() {
8 | const extractedElements = document.querySelectorAll('#boxes > div.box');
9 | const items = [];
10 | for (let element of extractedElements) {
11 | items.push(element.innerText);
12 | }
13 | return items;
14 | }
15 |
16 | /**
17 | * Scrolls and extracts content from a page.
18 | * @param {object} page - A loaded Puppeteer Page instance.
19 | * @param {function} extractItems - Item extraction function that is injected into the page.
20 | * @param {number} itemTargetConut - The target number of items to extract before stopping.
21 | * @param {number} scrollDelay - The time (in milliseconds) to wait between scrolls.
22 | */
23 | async function scrapeInfiniteScrollItems(page, extractItems, itemTargetCount, scrollDelay = 1000) {
24 | let items = [];
25 | try {
26 | let previousHeight;
27 | while (items.length < itemTargetCount) {
28 | items = await page.evaluate(extractItems);
29 | previousHeight = await page.evaluate('document.body.scrollHeight');
30 | await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
31 | await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
32 | await page.waitFor(scrollDelay);
33 | }
34 | } catch(e) { }
35 | return items;
36 | }
37 |
38 | (async () => {
39 | // Set up browser and page.
40 | const browser = await puppeteer.launch({
41 | headless: false,
42 | args: ['--no-sandbox', '--disable-setuid-sandbox'],
43 | });
44 | const page = await browser.newPage();
45 | page.setViewport({ width: 1280, height: 926 });
46 |
47 | // Navigate to the demo page.
48 | await page.goto('https://intoli.com/blog/scrape-infinite-scroll/demo.html');
49 |
50 | // Scroll and extract items from the page.
51 | const items = await scrapeInfiniteScrollItems(page, extractItems, 100);
52 |
53 | // Save extracted items to a file.
54 | fs.writeFileSync('./items.txt', items.join('\n') + '\n');
55 |
56 | // Close the browser.
57 | await browser.close();
58 | })();
59 |
--------------------------------------------------------------------------------
/articles/steam-scraper/README.md:
--------------------------------------------------------------------------------
1 | # Scraping User-Submitted Reviews from the Steam Store
2 |
3 | [Scraping User-Submitted Reviews from the Steam Store](https://intoli.com/blog/steam-scraper/) is a Scrapy tutorial that we produced in conjunction with our friends over at [Scraping Hub](https://scrapinghub.com/).
4 | It explores building an advanced Scrapy spider that involves bypassing access checkpoints on the [Steam's website](steampowered.com) in order to download user reviews.
5 | The full code of the scrapers can be found in the [steam-scraper](https://github.com/prncc/steam-scraper) repository.
6 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [
3 | "env",
4 | "react",
5 | "stage-2"
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "parser": "babel-eslint",
3 | "extends": "airbnb",
4 | "env": {
5 | "browser": true,
6 | "es6": true,
7 | "node": true
8 | },
9 | "settings": {
10 | "import/resolver": {
11 | "webpack": {
12 | "config": "./webpack.config.js"
13 | }
14 | }
15 | },
16 | "rules": {
17 | "class-methods-use-this": "off",
18 | "function-paren-newline": "off",
19 | "object-curly-newline": ["error", {
20 | "consistent": true,
21 | "minProperties": 5
22 | }]
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/README.md:
--------------------------------------------------------------------------------
1 | # Using Webpack to Render Markdown in React Apps
2 |
3 | [Using Webpack to Render Markdown in React Apps](https://intoli.com/blog/webpack-markdown-setup) is a short article describing the concrete steps you need to take in order to automatically render Markdown documents through Webpack.
4 | This folder contains a working example of the configuration described in that tutorial.
5 | The code is organized as follows:
6 |
7 | - [webpack.config.js](webpack.config.js) - Contains the loader setup which makes Markdown rendering and code highlighting possible.
8 | - [src/](src/) - Contains a wep app built by the above Webpack config.
9 | It's entry point, [src/index.jsx](src/index.jsx), shows how to load a React component that accepts imported Markdown content.
10 | - [src/article.md](src/article.md) - The Markdown file that's renderd by this app is a listing of various Markdown features, and serves to show off how a wide range of elements get rendered.
11 |
12 |
13 | ## Running This Example
14 |
15 | First, clone this repository and navigate to this article's directory:
16 |
17 | ```bash
18 | git clone https://github.com/intoli/intoli-article-materials.git
19 | cd intoli-article-materials/articles/webpack-markdown-setup
20 | ```
21 |
22 | Then, install the project's dependencies via Yarn
23 |
24 | ```bash
25 | yarn install
26 | ```
27 |
28 | With the basic setup out of the way, you can start the app.
29 | The default script is run via
30 |
31 | ```bash
32 | yarn start
33 | ```
34 |
35 | and its starts a hot reloading server that will re-render the app on any chanages in real time.
36 | You can view the app at `http://localhost:3000` (customizable in the Webpack config).
37 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "scripts": {
3 | "lint": "eslint --ignore-path .gitignore --ext js,jsx src",
4 | "start": "NODE_ENV=development webpack-dev-server --config webpack.config.js --hot"
5 | },
6 | "devDependencies": {
7 | "babel-core": "^6.26.0",
8 | "babel-eslint": "^8.2.2",
9 | "babel-loader": "^7.1.3",
10 | "babel-preset-env": "^1.6.1",
11 | "babel-preset-react": "^6.24.1",
12 | "babel-preset-stage-2": "^6.24.1",
13 | "css-loader": "^0.28.11",
14 | "eslint": "^4.18.2",
15 | "eslint-config-airbnb": "^16.1.0",
16 | "eslint-loader": "^2.0.0",
17 | "eslint-plugin-import": "^2.9.0",
18 | "eslint-plugin-jsx-a11y": "^6.0.3",
19 | "eslint-plugin-react": "^7.7.0",
20 | "highlight.js": "^9.12.0",
21 | "html-loader": "^0.5.5",
22 | "html-webpack-plugin": "^3.0.4",
23 | "markdown-loader": "^2.0.2",
24 | "style-loader": "^0.21.0",
25 | "webpack": "^3.10.0",
26 | "webpack-cli": "^2.0.10",
27 | "webpack-dev-server": "^2.11.2"
28 | },
29 | "dependencies": {
30 | "babel-polyfill": "^6.26.0",
31 | "prop-types": "^15.6.1",
32 | "react": "^16.3.2",
33 | "react-dom": "^16.3.2"
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/src/components/Markdown/gruvbox-dark.css:
--------------------------------------------------------------------------------
1 | /*
2 |
3 | Gruvbox style (dark) (c) Pavel Pertsev (original style at https://github.com/morhetz/gruvbox)
4 |
5 | */
6 |
7 | .hljs {
8 | display: block;
9 | overflow-x: auto;
10 | padding: 0.5em;
11 | background: #282828;
12 | }
13 |
14 | .hljs,
15 | .hljs-subst {
16 | color: #ebdbb2;
17 | }
18 |
19 | /* Gruvbox Red */
20 | .hljs-deletion,
21 | .hljs-formula,
22 | .hljs-keyword,
23 | .hljs-link,
24 | .hljs-selector-tag {
25 | color: #fb4934;
26 | }
27 |
28 | /* Gruvbox Blue */
29 | .hljs-built_in,
30 | .hljs-emphasis,
31 | .hljs-name,
32 | .hljs-quote,
33 | .hljs-strong,
34 | .hljs-title,
35 | .hljs-variable {
36 | color: #83a598;
37 | }
38 |
39 | /* Gruvbox Yellow */
40 | .hljs-attr,
41 | .hljs-params,
42 | .hljs-template-tag,
43 | .hljs-type {
44 | color: #fabd2f;
45 | }
46 |
47 | /* Gruvbox Purple */
48 | .hljs-builtin-name,
49 | .hljs-doctag,
50 | .hljs-literal,
51 | .hljs-number {
52 | color: #8f3f71;
53 | }
54 |
55 | /* Gruvbox Orange */
56 | .hljs-code,
57 | .hljs-meta,
58 | .hljs-regexp,
59 | .hljs-selector-id,
60 | .hljs-template-variable {
61 | color: #fe8019;
62 | }
63 |
64 | /* Gruvbox Green */
65 | .hljs-addition,
66 | .hljs-meta-string,
67 | .hljs-section,
68 | .hljs-selector-attr,
69 | .hljs-selector-class,
70 | .hljs-string,
71 | .hljs-symbol {
72 | color: #b8bb26;
73 | }
74 |
75 | /* Gruvbox Aqua */
76 | .hljs-attribute,
77 | .hljs-bullet,
78 | .hljs-class,
79 | .hljs-function,
80 | .hljs-function .hljs-keyword,
81 | .hljs-meta-keyword,
82 | .hljs-selector-pseudo,
83 | .hljs-tag {
84 | color: #8ec07c;
85 | }
86 |
87 | /* Gruvbox Gray */
88 | .hljs-comment {
89 | color: #928374;
90 | }
91 |
92 | /* Gruvbox Purple */
93 | .hljs-link_label,
94 | .hljs-literal,
95 | .hljs-number {
96 | color: #d3869b;
97 | }
98 |
99 | .hljs-comment,
100 | .hljs-emphasis {
101 | font-style: italic;
102 | }
103 |
104 | .hljs-section,
105 | .hljs-strong,
106 | .hljs-tag {
107 | font-weight: bold;
108 | }
109 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/src/components/Markdown/index.jsx:
--------------------------------------------------------------------------------
1 | import PropTypes from 'prop-types';
2 | import React from 'react';
3 |
4 | // Set the rendered code theme.
5 | import './gruvbox-dark.css';
6 | // Customize the way markdown is rendered.
7 | import './markdown.css';
8 |
9 |
10 | const wrapMarkup = html => ({
11 | __html: html,
12 | });
13 |
14 |
15 | const Markdown = ({ content }) => (
16 | // eslint-disable-next-line react/no-danger
17 |
18 | );
19 |
20 | Markdown.propTypes = {
21 | content: PropTypes.string.isRequired,
22 | };
23 |
24 |
25 | export default Markdown;
26 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/src/components/Markdown/markdown.css:
--------------------------------------------------------------------------------
1 | .markdown {
2 | margin: auto;
3 | width: 800px;
4 | }
5 |
6 | .markdown table {
7 | margin-bottom: 13px;
8 | }
9 |
10 | .markdown table td,
11 | .markdown table th {
12 | padding: 6px 13px;
13 | text-align: left;
14 | }
15 |
16 | .markdown table th {
17 | background: #d0dae5;
18 | }
19 |
20 | .markdown table tr {
21 | border-top: 1px solid #dfe2e5;
22 | }
23 |
24 | .markdown table tr:nth-child(even) {
25 | background: #f6f8fa;
26 | }
27 |
28 | .markdown pre {
29 | margin-bottom: -7px;
30 | }
31 |
32 | .markdown pre code {
33 | padding: 0;
34 | }
35 |
36 | .markdown code {
37 | background: #ededed;
38 | font-size: 13px;
39 | padding: 1px 2px;
40 | }
41 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/src/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Markdown Webpack Setup Demo
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/src/index.jsx:
--------------------------------------------------------------------------------
1 | import 'babel-polyfill';
2 | import React from 'react';
3 | import ReactDOM from 'react-dom';
4 |
5 | import Markdown from './components/Markdown';
6 | import content from './article.md';
7 |
8 |
9 | ReactDOM.render(
10 | ,
11 | document.getElementById('app'),
12 | );
13 |
--------------------------------------------------------------------------------
/articles/webpack-markdown-setup/webpack.config.js:
--------------------------------------------------------------------------------
1 | const path = require('path');
2 |
3 | const highlight = require('highlight.js');
4 | const HtmlWebpackPlugin = require('html-webpack-plugin');
5 |
6 |
7 | const isProduction = process.env.NODE_ENV === 'production';
8 |
9 | const config = {
10 | devServer: {
11 | clientLogLevel: 'info',
12 | contentBase: './frontend',
13 | historyApiFallback: true,
14 | overlay: {
15 | errors: true,
16 | warnings: false,
17 | },
18 | port: 3000,
19 | publicPath: '/',
20 | stats: {
21 | modules: false,
22 | chunks: false,
23 | },
24 | },
25 | devtool: 'cheap-module-source-map',
26 | entry: path.resolve(__dirname, 'src', 'index.jsx'),
27 | module: {
28 | rules: [
29 | {
30 | test: /\.(js|jsx)$/,
31 | exclude: /node_modules/,
32 | enforce: 'pre',
33 | loader: 'eslint-loader',
34 | },
35 | {
36 | test: /\.(js|jsx)$/,
37 | exclude: /node_modules/,
38 | loader: 'babel-loader',
39 | },
40 | {
41 | test: /\.css$/,
42 | exclude: /node_modules/,
43 | use: [
44 | {
45 | loader: 'style-loader',
46 | options: {
47 | sourceMap: !isProduction,
48 | },
49 | },
50 | {
51 | loader: 'css-loader',
52 | options: {
53 | importLoaders: 0,
54 | sourceMap: !isProduction,
55 | },
56 | },
57 | ],
58 | },
59 | {
60 | test: /\.(md)$/,
61 | use: [
62 | 'html-loader',
63 | {
64 | loader: 'markdown-loader',
65 | options: {
66 | highlight: (code, lang) => {
67 | if (!lang || ['text', 'literal', 'nohighlight'].includes(lang)) {
68 | return `
${code}
`;
69 | }
70 | const html = highlight.highlight(lang, code).value;
71 | return `${html}`;
72 | },
73 | },
74 | },
75 | ],
76 | },
77 | ],
78 | },
79 | output: {
80 | filename: 'bundle.js',
81 | path: path.resolve(__dirname, 'build'),
82 | publicPath: '/',
83 | },
84 | plugins: [
85 | new HtmlWebpackPlugin({
86 | inject: true,
87 | template: './src/index.html',
88 | }),
89 | ],
90 | resolve: {
91 | extensions: [
92 | '.js',
93 | '.jsx',
94 | ],
95 | },
96 | watchOptions: {
97 | ignored: /build/,
98 | },
99 | };
100 |
101 |
102 | module.exports = config;
103 |
--------------------------------------------------------------------------------
/articles/youtube-mp3-downloader/.gitignore:
--------------------------------------------------------------------------------
1 | exodus/
2 | node_modules/
3 | .tern-port
4 |
--------------------------------------------------------------------------------
/articles/youtube-mp3-downloader/README.md:
--------------------------------------------------------------------------------
1 | # Building a YouTube MP3 Downloader with Exodus, FFmpeg, and AWS Lambda
2 |
3 | [Running FFmpeg on AWS Lambda for 1.9% the cost of AWS Elastic Transcoder](https://intoli.com/blog/transcoding-on-aws-lambda) and [Building a YouTube MP3 Downloader with Exodus, FFmpeg, and AWS Lambda](https://intoli.com/blog/youtube-mp3-downloader) form a two part tutorial for building a practical bookmarklet that uses [AWS Lambda](https://aws.amazon.com/lambda/) to convert YouTube videos to MP3 files and then downloads them.
4 | The project consists of two Lambda functions:
5 |
6 | - `YoutubeMP3TranscoderFunction` - Defined in [transcoder.js](transcoder.js), this function first downloads a configurable media file, converts it to an MP3 using a bundled version of [FFmpeg](https://www.ffmpeg.org), and then uploads the MP3 to an S3 bucket.
7 | The behavior of the Lambda function can be controlled, by specifying the following keys in the invocation event.
8 | - `filename` - The filename to use in the MP3 file's [Content-Disposition header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition) when a user downloads it.
9 | This determines the filename that will be suggested to the user when they save it to their computer.
10 | - `logKey` - An S3 key where the output of FFmpeg will be placed for logging purposes.
11 | - `mp3Key` - An S3 key where the converted MP3 file will be placed.
12 | - `s3Bucket` - The S3 bucket where the log and MP3 files will be placed.
13 | - `url` - The URL where the input audio/video file can be downloaded from.
14 | - `YoutubeMP3DownloaderFunction` - Defined in [app.js](app.js) and [lambda.js](lambda.js), this function is designed to integrate with [API Gateway](https://aws.amazon.com/api-gateway/) using [aws-serverless-express](https://github.com/awslabs/aws-serverless-express).
15 | This function will serve up the YouTube MP3 Downloader's HTML download page as well as provide the internal API methods that it uses behind the scenes.
16 |
17 | Note that you'll need to perform the deployment steps on Linux because we'll be bundling a locally installed version of FFmpeg.
18 | If you use either Windows or macOS, then you'll need to work inside of a virtual machine running Linux.
19 | You could alternatively spin up an EC2 instance, and work on the remote machine.
20 |
21 | Before deploying the Lambda function, it will be necessary to install several dependencies.
22 | You'll first need to make sure that `aws-cli`, `jq`, `git`, `node`, `npm`, `yarn`, `python`, `pip`, and `ffmpeg` are all available.
23 | All of these should be available in your system package manager, and you already likely have most of them installed.
24 |
25 | After that, you'll need to install [Exodus](https://github.com/intoli/exodus).
26 | This can be done by running the following.
27 |
28 | ```bash
29 | # Install the `exodus_bundler` package.
30 | pip install --user exodus_bundler
31 |
32 | # Make sure that `exodus` is in your `PATH`.
33 | export PATH="${HOME}/.local/bin/:${PATH}"
34 | ```
35 |
36 | You might also want to add the `export PATH="${HOME}/.local/bin/:${PATH}"` line to your `~/.bashrc` file, so that the `exodus` command will be in your path in the future.
37 |
38 | Next, you'll need to clone the [intoli-article-materials repository](https://github.com/intoli/intoli-article-materials), move into this article's directory, and install the Node dependencies.
39 |
40 | ```bash
41 | # Clone the repository and move into the directory.
42 | git clone https://github.com/intoli/intoli-article-materials.git
43 | cd intoli-article-materials/articles/youtube-mp3-downloader/
44 |
45 | # Install the node dependencies from `package.json` and `yarn.lock`.
46 | yarn install
47 | ```
48 |
49 | After the Node dependencies finish installing, you must create a local Exodus bundle for FFmpeg.
50 | The following command will create a local directory called `exodus` that includes FFmpeg as well as all of its dependencies.
51 |
52 | ```bash
53 | # Create an `ffmpeg` bundle and extract it in the current directory.
54 | exodus --tarball ffmpeg | tar -zx
55 | ```
56 |
57 | At this stage, you're very close to being ready to deploy everything.
58 | The last thing that you need to do is to customize the names of the S3 bucket, the Lambda functions, and other AWS resources which must have unique names.
59 | These are defined at the top of the [deploy-everything.sh](deploy-everything.sh) and [app.js](app.js) files.
60 | After setting these to uniques values, you can simply run
61 |
62 | ```bash
63 | ./deploy-everything.sh
64 | ```
65 |
66 | to deploy all of the AWS resources.
67 | This will echo out a lot of information about the AWS resources being created, and then at the end you should see something like this.
68 |
69 | ```
70 | Now just create a bookmarklet with the following contents!
71 | javascript:window.open(`https://osacfvxuq7.execute-api.us-east-2.amazonaws.com/v1/${window.location.href}`);
72 | ```
73 |
74 | Then just create the bookmarklet, navigate to a video on YouTube, and click the bookmarklet to try it out!
75 |
--------------------------------------------------------------------------------
/articles/youtube-mp3-downloader/app.js:
--------------------------------------------------------------------------------
1 | const AWS = require('aws-sdk');
2 | const express = require('express');
3 | const nunjucks = require('nunjucks');
4 | const ytdl = require('ytdl-core');
5 |
6 | const apiStage = 'v1';
7 | const transcoderFunctionName = 'YoutubeMp3TranscoderFunction';
8 | const lambda = new AWS.Lambda({ region: 'us-east-2' });
9 | const s3 = new AWS.S3({ signatureVersion: 'v4' });
10 | // You'll need to change this to point towards your own bucket.
11 | const s3Bucket = 'youtube-mp3-downloader';
12 |
13 | const app = express();
14 | nunjucks.configure('.', { express: app });
15 | const router = express.Router();
16 |
17 |
18 | router.get('/transcode/:videoId', (req, res) => {
19 | const timestamp = Date.now().toString();
20 | const { videoId } = req.params;
21 | const videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
22 |
23 | // Get information on the available video file formats.
24 | Promise.resolve().then(() => new Promise((resolve, revoke) => {
25 | ytdl.getInfo(videoUrl, (error, info) => error ? revoke(error) : resolve(info))
26 | }))
27 | // Choose the best format and construct the Lambda event.
28 | .then(({ formats, title }) => {
29 | // We'll just pick the largest audio source file size for simplicity here,
30 | // you could prioritize things based on bitrate, file format, etc. if you wanted to.
31 | const format = formats
32 | .filter(format => format.audioEncoding != null)
33 | .filter(format => format.clen != null)
34 | .sort((a, b) => parseInt(b.clen, 10) - parseInt(a.clen, 10))[0];
35 |
36 | return {
37 | filename: `${title}.mp3`,
38 | logKey: `log/${timestamp} - ${title}.log`,
39 | mp3Key: `mp3/${timestamp} - ${title}.mp3`,
40 | s3Bucket,
41 | url: format.url,
42 | };
43 | })
44 | // Trigger the actual conversion in the other Lambda function.
45 | .then(lambdaEvent => new Promise((resolve, revoke) => {
46 | lambda.invoke({
47 | FunctionName: transcoderFunctionName,
48 | InvocationType: 'Event',
49 | Payload: JSON.stringify(lambdaEvent),
50 | }, (error, data) => error ? revoke(error) : resolve(lambdaEvent));
51 | }))
52 | // Send a response
53 | .then(({ logKey, mp3Key }) => {
54 | res.status(200).send(JSON.stringify({ logKey, mp3Key }));
55 | })
56 | // Handle errors.
57 | .catch((error) => {
58 | return res.status(500).send(`Something went wrong: ${error.message}`);
59 | });
60 | });
61 |
62 |
63 | router.get('/signed-url/:logKey/:mp3Key', (req, res) => {
64 | const logKey = decodeURIComponent(req.params.logKey);
65 | const mp3Key = decodeURIComponent(req.params.mp3Key);
66 | s3.headObject({
67 | Bucket: s3Bucket,
68 | Key: logKey,
69 | }, (error) => {
70 | if (error && error.code === 'NotFound') {
71 | res.status(200).send(JSON.stringify({ url: null }));
72 | } else {
73 | s3.getSignedUrl('getObject', {
74 | Bucket: s3Bucket,
75 | Expires: 3600,
76 | Key: mp3Key,
77 | }, (error, url) => {
78 | res.status(200).send(JSON.stringify({ url }));
79 | });
80 | }
81 | });
82 | });
83 |
84 |
85 | router.get('/*', (req, res) => {
86 | // Handle extracting the path from the original URL.
87 | const originalUrl = module.parent ? req.originalUrl.slice(1) :
88 | req.originalUrl.slice(`/${apiStage}/`.length);
89 | const path = decodeURIComponent(originalUrl);
90 |
91 | // Handle full youtube URLs or just the video ID.
92 | const urlPrefixes = ['https://', 'http://', 'www.youtube.com', 'youtube.com'];
93 | let videoId, videoUrl;
94 | if (urlPrefixes.some(prefix => path.startsWith(prefix))) {
95 | videoUrl = path;
96 | videoId = videoUrl.match(/v=([^&]*)/)[1];
97 | } else {
98 | videoId = path;
99 | videoUrl = `https://www.youtube.com/watch?v=${videoId}`;
100 | }
101 |
102 | // Render the download page template.
103 | res.render('download.html', { apiStage, videoId, videoUrl });
104 | });
105 |
106 | // Run the app when the file is being run as a script.
107 | if (!module.parent) {
108 | app.use(`/${apiStage}/`, router);
109 | app.listen(3000, () => console.log('Listening on port 3000!'))
110 | } else {
111 | app.use('/', router);
112 | }
113 |
114 | // Export the app for use with lambda.
115 | module.exports = app;
116 |
--------------------------------------------------------------------------------
/articles/youtube-mp3-downloader/deploy-everything.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | # You'll need to configure these settings, they must be unique.
4 | # Be sure to also change the bucket name in `app.js`!
5 | export bucket_name="youtube-mp3-downloader"
6 |
7 | export role_name="YoutubeMp3DownloaderRole"
8 | export policy_name="YoutubeMp3DownloaderPolicy"
9 |
10 | export transcoder_function_name="YoutubeMp3TranscoderFunction"
11 | export downloader_function_name="YoutubeMp3DownloaderFunction"
12 | export downloader_api_name="YoutubeDownloaderApi"
13 |
14 |
15 | # Make a new S3 bucket.
16 | aws s3 mb "s3://${bucket_name}"
17 |
18 |
19 | # Create a new role.
20 | read -r -d '' role_policy_document <<'EOF'
21 | {
22 | "Version": "2012-10-17",
23 | "Statement": [
24 | {
25 | "Effect": "Allow",
26 | "Principal": {
27 | "Service": [
28 | "apigateway.amazonaws.com",
29 | "lambda.amazonaws.com"
30 | ]
31 | },
32 | "Action": "sts:AssumeRole"
33 | }
34 | ]
35 | }
36 | EOF
37 | response="$(aws iam create-role \
38 | --role-name "${role_name}" \
39 | --assume-role-policy-document "${role_policy_document}")"
40 | echo "${response}"
41 | role_arn="$(jq -r .Role.Arn <<< "${response}")"
42 |
43 |
44 | # Assign a role to the policy.
45 | read -r -d '' policy_document <
2 |
3 |
4 |
5 |
6 | Youtube MP3 Downloader
7 |
9 |
17 |
18 |
19 |