├── groove ├── groove-python │ ├── groove │ │ ├── __init__.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_fingerprint.py │ │ │ ├── test_auth.py │ │ │ ├── test_tape.py │ │ │ ├── test_end_proxy.py │ │ │ ├── mock_server.py │ │ │ └── test_cache.py │ │ ├── .DS_Store │ │ ├── assets │ │ │ ├── .DS_Store │ │ │ └── __init__.py │ │ ├── cli.py │ │ ├── models.py │ │ ├── enums.py │ │ ├── dialer.py │ │ └── tape.py │ ├── .DS_Store │ ├── pyproject.toml │ ├── benchmark_end_proxy.py │ ├── README.md │ └── build.py ├── setup.sh ├── groove-node │ ├── src │ │ ├── index.ts │ │ ├── install-ca.ts │ │ ├── dialer.ts │ │ ├── __tests__ │ │ │ └── index.test.ts │ │ ├── tape.ts │ │ └── utilities.ts │ ├── lifecycle │ │ ├── uninstall.sh │ │ └── install.sh │ ├── .eslintrc.cjs │ ├── jest.config.js │ ├── tsconfig.json │ ├── scripts │ │ └── build.ts │ ├── esbuild-hook.js │ ├── package.json │ └── README.md ├── groove_entrypoint.sh ├── README.md ├── build.sh ├── proxy │ ├── cache │ │ ├── utilities.go │ │ ├── utilities_test.go │ │ ├── invalidator_test.go │ │ └── lru_test.go │ ├── cache_key_test.go │ ├── headers.go │ ├── go.mod │ ├── LICENSE │ ├── utilities.go │ ├── optimized_storage.go │ ├── cache_key.go │ ├── archives.go │ ├── end_proxy.go │ ├── cert.go │ └── main.go └── groove.Dockerfile ├── proxy-benchmarks ├── proxy_benchmarks │ ├── __init__.py │ ├── cli │ │ ├── __init__.py │ │ ├── main.py │ │ ├── ssl_validity.py │ │ └── speed.py │ ├── proxies │ │ ├── __init__.py │ │ ├── martian.py │ │ ├── base.py │ │ ├── gomitmproxy.py │ │ ├── node_http_proxy.py │ │ ├── goproxy.py │ │ └── mitmproxy.py │ ├── tests │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_ssl_validity.py │ │ ├── test_fingerprinting.py │ │ ├── test_speed.py │ │ └── test_load.py │ ├── assets │ │ ├── speed-test │ │ │ ├── locust │ │ │ │ ├── data │ │ │ │ │ └── .gitkeep │ │ │ │ ├── README.md │ │ │ │ ├── http_locust.conf │ │ │ │ ├── https_locust.conf │ │ │ │ ├── http_baseline_locust.conf │ │ │ │ ├── https_baseline_locust.conf │ │ │ │ ├── no_proxy_load_test.py │ │ │ │ └── proxy_load_test.py │ │ │ ├── server │ │ │ │ ├── go.mod │ │ │ │ ├── go.sum │ │ │ │ ├── pyproject.toml │ │ │ │ ├── main.go │ │ │ │ └── setup.sh │ │ │ └── .DS_Store │ │ ├── proxies │ │ │ ├── .DS_Store │ │ │ ├── gomitmproxy-mimic │ │ │ │ ├── README.md │ │ │ │ ├── go.mod │ │ │ │ ├── setup.sh │ │ │ │ ├── go.sum │ │ │ │ └── transport.go │ │ │ ├── gomitmproxy │ │ │ │ ├── .DS_Store │ │ │ │ ├── go.mod │ │ │ │ ├── setup.sh │ │ │ │ └── go.sum │ │ │ ├── goproxy │ │ │ │ ├── go.mod │ │ │ │ ├── go.sum │ │ │ │ ├── setup.sh │ │ │ │ ├── cert.go │ │ │ │ ├── LICENSE │ │ │ │ ├── optimized_storage.go │ │ │ │ └── main.go │ │ │ ├── martian │ │ │ │ ├── go.mod │ │ │ │ ├── setup.sh │ │ │ │ └── main.go │ │ │ ├── node_http_proxy │ │ │ │ ├── package.json │ │ │ │ ├── setup.sh │ │ │ │ ├── setup.js │ │ │ │ └── index.js │ │ │ ├── goproxy-mimic │ │ │ │ ├── go.mod │ │ │ │ ├── setup.sh │ │ │ │ ├── cert.go │ │ │ │ ├── LICENSE │ │ │ │ ├── optimized_storage.go │ │ │ │ ├── go.sum │ │ │ │ ├── main.go │ │ │ │ └── transport.go │ │ │ └── mitmproxy │ │ │ │ └── setup.sh │ │ └── __init__.py │ ├── process.py │ ├── io.py │ ├── requests.py │ └── load_test.py ├── benchmark_entrypoint.sh ├── setup.sh ├── docker-compose.yml ├── pyproject.toml ├── benchmark.Dockerfile ├── results_certificate_speed.csv ├── results_load_test.csv ├── results_load_test_mimick.csv └── README.md ├── .DS_Store ├── .gitignore ├── LICENSE ├── README.md └── .github └── workflows └── test-benchmarks.yml /groove/groove-python/groove/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /groove/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | (cd proxy && go install) 5 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/.DS_Store -------------------------------------------------------------------------------- /groove/groove-python/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/groove/groove-python/.DS_Store -------------------------------------------------------------------------------- /groove/groove-python/groove/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/groove/groove-python/groove/.DS_Store -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | poetry run locust --config=no_proxy_load_locust.conf 3 | ``` 4 | -------------------------------------------------------------------------------- /groove/groove-node/src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './proxy'; 2 | export * from './dialer'; 3 | export * from './tape'; 4 | export * from './utilities'; 5 | -------------------------------------------------------------------------------- /groove/groove-python/groove/assets/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/groove/groove-python/groove/assets/.DS_Store -------------------------------------------------------------------------------- /groove/groove-node/lifecycle/uninstall.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | npm_bin=$(npm bin) 4 | 5 | rm $npm_bin/grooveproxy 6 | 7 | echo "Uninstall completed." 8 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/server/go.mod: -------------------------------------------------------------------------------- 1 | module speed_test_server 2 | 3 | go 1.18 4 | 5 | require github.com/google/uuid v1.3.0 // indirect 6 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/proxy-benchmarks/proxy_benchmarks/assets/proxies/.DS_Store -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/proxy-benchmarks/proxy_benchmarks/assets/speed-test/.DS_Store -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy-mimic/README.md: -------------------------------------------------------------------------------- 1 | ## gomitmproxy-mimic 2 | 3 | Forked version of gomitmproxy to introduce Hello Client TLS mimicking support. 4 | -------------------------------------------------------------------------------- /proxy-benchmarks/benchmark_entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [ "$1" = "test" ]; then 4 | exec poetry run pytest -s proxy_benchmarks/tests "${@:2}" 5 | else 6 | exec "$@" 7 | fi 8 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/piercefreeman/grooveproxy/HEAD/proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy/.DS_Store -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/go.mod: -------------------------------------------------------------------------------- 1 | module proxy_benchmarks/goproxy 2 | 3 | go 1.18 4 | 5 | require github.com/elazarl/goproxy v0.0.0-20220901064549-fbd10ff4f5a1 // indirect 6 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/http_locust.conf: -------------------------------------------------------------------------------- 1 | locustfile = proxy_load_test.py 2 | headless = true 3 | users = 1000 4 | spawn-rate = 50 5 | 6 | csv = data/http 7 | csv-full-history = true 8 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/https_locust.conf: -------------------------------------------------------------------------------- 1 | locustfile = proxy_load_test.py 2 | headless = true 3 | users = 1000 4 | spawn-rate = 50 5 | 6 | csv = data/https 7 | csv-full-history = true 8 | -------------------------------------------------------------------------------- /groove/groove-python/groove/assets/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.resources import files 2 | from pathlib import Path 3 | 4 | 5 | def get_asset_path(asset_name: str) -> Path: 6 | return Path(files(__name__) / asset_name) 7 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/server/go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 2 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 3 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/http_baseline_locust.conf: -------------------------------------------------------------------------------- 1 | locustfile = no_proxy_load_test.py 2 | headless = true 3 | users = 1000 4 | spawn-rate = 50 5 | 6 | csv = data/baseline_http 7 | csv-full-history = true 8 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/https_baseline_locust.conf: -------------------------------------------------------------------------------- 1 | locustfile = no_proxy_load_test.py 2 | headless = true 3 | users = 1000 4 | spawn-rate = 50 5 | 6 | csv = data/baseline_https 7 | csv-full-history = true 8 | -------------------------------------------------------------------------------- /groove/groove-node/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended'], 3 | parser: '@typescript-eslint/parser', 4 | plugins: ['@typescript-eslint'], 5 | root: true, 6 | }; 7 | -------------------------------------------------------------------------------- /groove/groove-python/groove/cli.py: -------------------------------------------------------------------------------- 1 | from subprocess import run 2 | 3 | from groove.assets import get_asset_path 4 | 5 | 6 | def install_ca(): 7 | run([ 8 | str(get_asset_path("grooveproxy")), 9 | "install-ca", 10 | ]) 11 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/__init__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from pkg_resources import resource_filename 4 | 5 | 6 | def get_asset_path(asset_name: str) -> Path: 7 | return Path(resource_filename(__name__, asset_name)) 8 | -------------------------------------------------------------------------------- /groove/groove-python/groove/models.py: -------------------------------------------------------------------------------- 1 | from humps import camelize 2 | from pydantic import BaseModel 3 | 4 | 5 | class GrooveModelBase(BaseModel): 6 | class Config: 7 | alias_generator = camelize 8 | allow_population_by_field_name = True 9 | -------------------------------------------------------------------------------- /groove/groove-python/groove/enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class CacheModeEnum(Enum): 5 | # Ensure enum values are aligned with the cache.go definitions 6 | OFF = 0 7 | STANDARD = 1 8 | AGGRESSIVE_GET = 2 9 | AGGRESSIVE = 3 10 | -------------------------------------------------------------------------------- /groove/groove-node/jest.config.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = { 3 | transform: {'^.+\\.ts?$': 'ts-jest'}, 4 | testEnvironment: 'node', 5 | testRegex: '/__tests__/.*\\.(test|spec)?\\.(ts|tsx)$', 6 | moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'] 7 | }; 8 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from rich.console import Console 3 | 4 | 5 | @pytest.fixture(scope="session") 6 | def cli_object(): 7 | console = Console() 8 | 9 | return dict( 10 | console=console, 11 | divider="-"*console.width, 12 | ) 13 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy/go.mod: -------------------------------------------------------------------------------- 1 | module proxy_benchmarks/gomitmproxy 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/AdguardTeam/golibs v0.4.0 7 | github.com/AdguardTeam/gomitmproxy v0.2.1 8 | ) 9 | 10 | require ( 11 | github.com/pkg/errors v0.9.1 // indirect 12 | golang.org/x/text v0.3.6 // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/martian/go.mod: -------------------------------------------------------------------------------- 1 | module proxy_benchmarks/martian 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/google/martian v2.1.0+incompatible // indirect 7 | github.com/google/martian/v3 v3.3.2 // indirect 8 | golang.org/x/net v0.0.0-20190628185345-da137c7871d7 // indirect 9 | golang.org/x/text v0.3.0 // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | node_modules 4 | 5 | # Certificate cache 6 | .http-mitm-proxy 7 | **/*.crt 8 | **/*.key 9 | **/*.pem 10 | 11 | # Load test 12 | **/data/*.csv 13 | **/load-test/*.json 14 | **/load-test-*/*.json 15 | **/certificate-generation-test/*.json 16 | 17 | .pytest_cache 18 | 19 | build 20 | dist 21 | **/assets/grooveproxy 22 | 23 | # Exclude built python files 24 | **/*.so 25 | __pycache__ 26 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/server/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "speed-test-server" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Pierce Freeman "] 6 | readme = "README.md" 7 | packages = [{include = "speed_test_server"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.10" 11 | 12 | 13 | [build-system] 14 | requires = ["poetry-core"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/node_http_proxy/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "node_http_proxy", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "setup": "node setup.js", 8 | "test": "echo \"Error: no test specified\" && exit 1" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "ISC", 13 | "dependencies": { 14 | "@bjowes/http-mitm-proxy": "^0.9.4", 15 | "chalk": "^4.1.2", 16 | "commander": "^9.4.1" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /groove/groove-node/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["./src/**/*.ts"], 3 | "exclude": ["./src/**/__tests__"], 4 | "compilerOptions": { 5 | "lib": ["es2020"], 6 | "module": "commonjs", 7 | "target": "es2020", 8 | 9 | "rootDir": "./", 10 | "outDir": "build", 11 | 12 | "strict": true, 13 | "sourceMap": true, 14 | "esModuleInterop": true, 15 | "skipLibCheck": true, 16 | "forceConsistentCasingInFileNames": true, 17 | "declaration": true, 18 | "resolveJsonModule": true 19 | } 20 | } -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/go.sum: -------------------------------------------------------------------------------- 1 | github.com/elazarl/goproxy v0.0.0-20220901064549-fbd10ff4f5a1 h1:ecIiM5NYeEOhy5trm8xel6wpUhYH+QWteUKnwcbCMl4= 2 | github.com/elazarl/goproxy v0.0.0-20220901064549-fbd10ff4f5a1/go.mod h1:Ro8st/ElPeALwNFlcTpWmkr6IoMFfkjXAvTHpevnDsM= 3 | github.com/elazarl/goproxy/ext v0.0.0-20190711103511-473e67f1d7d2/go.mod h1:gNh8nYJoAm43RfaxurUnxr+N1PwuFV3ZMl/efxlIlY8= 4 | github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc= 5 | -------------------------------------------------------------------------------- /groove/groove_entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [ "$1" = "test-python" ]; then 4 | cd groove-python && exec poetry run pytest -s groove/tests "${@:2}" 5 | elif [ "$1" = "test-node" ]; then 6 | cd groove-node && exec npm run test "${@:2}" 7 | elif [ "$1" = "test-go" ]; then 8 | # Run on all sub-packages 9 | cd proxy && exec go test -v ./... "${@:2}" 10 | elif [ "$1" = "race-go" ]; then 11 | # Run on all sub-packages 12 | cd proxy && exec go test -v -race ./... "${@:2}" 13 | else 14 | exec "$@" 15 | fi 16 | -------------------------------------------------------------------------------- /groove/README.md: -------------------------------------------------------------------------------- 1 | # groove 2 | 3 | ## Development 4 | 5 | Groove is a separate golang executable but it has Python and Node APIs that you often want to co-develop. When developing locally you can make use of the `build.sh` script that will manually build the go project and place it into the necessary bin paths for Python and Node to start using it. 6 | 7 | If you'd like to continuously compile changes to the golang proxy in the background: 8 | 9 | ``` 10 | cd groove-python && poetry run watchmedo shell-command --command="cd ../ && bash ./build.sh" ../proxy 11 | ``` 12 | -------------------------------------------------------------------------------- /groove/groove-node/lifecycle/install.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # Consider providing prebuilt go executables and dynamically downloading like: 4 | # https://github.com/sanathkr/go-npm 5 | current_directory=$(pwd) 6 | tmp_directory=$(mktemp -d) 7 | npm_bin=$(npm bin) 8 | 9 | mkdir -p $npm_bin 10 | 11 | echo "Building in $tmp_directory..." 12 | 13 | cd $tmp_directory 14 | 15 | git clone https://github.com/piercefreeman/grooveproxy.git 16 | cd grooveproxy/groove/proxy 17 | go build -o $npm_bin/grooveproxy 18 | 19 | echo "Build and npm install complete." 20 | 21 | cd $current_directory 22 | -------------------------------------------------------------------------------- /groove/groove-node/src/install-ca.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { Groove } from './index'; 4 | import { exec } from 'child_process'; 5 | import { promisify } from 'util'; 6 | 7 | const main = async () => { 8 | const proxy = new Groove({}); 9 | const executable = await proxy.getExecutablePath(); 10 | 11 | try { 12 | await promisify(exec)(`${executable} install-ca`); 13 | } catch (error: any) { 14 | console.log(`Error while installing CA: ${error.stderr}`) 15 | process.exit() 16 | } 17 | console.log("CA Install Success") 18 | } 19 | 20 | main(); 21 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/go.mod: -------------------------------------------------------------------------------- 1 | module proxy_benchmarks/goproxymimic 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/piercefreeman/goproxy v0.0.3 7 | github.com/refraction-networking/utls v1.1.3 8 | golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458 9 | ) 10 | 11 | require ( 12 | github.com/andybalholm/brotli v1.0.4 // indirect 13 | github.com/klauspost/compress v1.13.6 // indirect 14 | golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa // indirect 15 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 // indirect 16 | golang.org/x/text v0.3.7 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/node_http_proxy/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | npm install 5 | npm run setup 6 | 7 | if [ "$(uname)" == "Darwin" ]; then 8 | # Mac OS X platform 9 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain .http-mitm-proxy/certs/ca.pem 10 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 11 | # GNU/Linux 12 | cp .http-mitm-proxy/certs/ca.pem /usr/local/share/ca-certificates/http-mitm-proxy-ca.pem 13 | sudo update-ca-certificates 14 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "http-mitm-proxy" -i /usr/local/share/ca-certificates/http-mitm-proxy-ca.pem 15 | fi 16 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy-mimic/go.mod: -------------------------------------------------------------------------------- 1 | module proxy_benchmarks/gomitmproxymimic 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/AdguardTeam/golibs v0.4.0 7 | github.com/piercefreeman/gomitmproxy v0.0.2 8 | github.com/refraction-networking/utls v1.1.3 9 | golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458 10 | ) 11 | 12 | require ( 13 | github.com/andybalholm/brotli v1.0.4 // indirect 14 | github.com/klauspost/compress v1.13.6 // indirect 15 | github.com/pkg/errors v0.9.1 // indirect 16 | golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa // indirect 17 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 // indirect 18 | golang.org/x/text v0.3.7 // indirect 19 | ) 20 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/cli/main.py: -------------------------------------------------------------------------------- 1 | from click import group, pass_context 2 | from rich.console import Console 3 | 4 | from proxy_benchmarks.cli.fingerprinting import fingerprint 5 | from proxy_benchmarks.cli.load import load_test 6 | from proxy_benchmarks.cli.speed import speed_test 7 | from proxy_benchmarks.cli.ssl_validity import basic_ssl_test 8 | 9 | 10 | @group() 11 | @pass_context 12 | def main(ctx): 13 | console = Console(soft_wrap=True) 14 | 15 | ctx.obj = dict( 16 | console=console, 17 | divider="-" * console.width 18 | ) 19 | 20 | main.add_command(fingerprint) 21 | main.add_command(load_test) 22 | main.add_command(speed_test) 23 | main.add_command(basic_ssl_test) 24 | -------------------------------------------------------------------------------- /groove/build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | 3 | # On OSX can be installed by `brew install coreutils` 4 | scriptPath=$(realpath $0) 5 | rootDirectory="$(dirname "$scriptPath")" 6 | 7 | echo "Rebuilding groove proxy..." 8 | 9 | # Clear out build files so we can more easily see build failures 10 | rm -rf $rootDirectory/build 11 | rm -rf $rootDirectory/groove-python/groove/assets/grooveproxy 12 | 13 | # Build 14 | mkdir -p $rootDirectory/build 15 | (cd $rootDirectory/proxy && go build -o $rootDirectory/build) 16 | 17 | # Manual Python install 18 | cp $rootDirectory/build/grooveproxy $rootDirectory/groove-python/groove/assets/grooveproxy 19 | 20 | # Manual Node install 21 | cp $rootDirectory/build/grooveproxy $rootDirectory/groove-node/node_modules/.bin/grooveproxy 22 | -------------------------------------------------------------------------------- /groove/proxy/cache/utilities.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "fmt" 7 | "log" 8 | ) 9 | 10 | func objectToBytes(obj any) ([]byte, error) { 11 | var writeBuffer bytes.Buffer 12 | encoder := gob.NewEncoder(&writeBuffer) 13 | err := encoder.Encode(obj) 14 | if err != nil { 15 | log.Println(fmt.Errorf("Failed to encode cache entry %w", err)) 16 | return nil, err 17 | } 18 | 19 | return writeBuffer.Bytes(), nil 20 | } 21 | 22 | func objectFromBytes(readBuffer []byte, obj any) error { 23 | decoder := gob.NewDecoder(bytes.NewReader(readBuffer)) 24 | err := decoder.Decode(obj) 25 | if err != nil { 26 | log.Println(fmt.Errorf("Failed to decode cache entry %w", err)) 27 | return err 28 | } 29 | return nil 30 | } 31 | -------------------------------------------------------------------------------- /groove/proxy/cache/utilities_test.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | type TestSimpleObject struct { 8 | Value string 9 | } 10 | 11 | func TestEncodeDecodeObject(t *testing.T) { 12 | object := &TestSimpleObject{ 13 | Value: "test", 14 | } 15 | 16 | encodedObject, err := objectToBytes(object) 17 | 18 | if err != nil { 19 | t.Fatalf("Error encoding object: %s", err) 20 | } 21 | 22 | var objectRecovered TestSimpleObject 23 | err = objectFromBytes(encodedObject, &objectRecovered) 24 | 25 | if err != nil { 26 | t.Fatalf("Error getting object: %s", err) 27 | } 28 | 29 | if objectRecovered.Value != object.Value { 30 | t.Fatalf("Recovered object does not match original (%s vs. %s)", objectRecovered.Value, object.Value) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | go install 5 | 6 | openssl genrsa -out ca.key 2048 7 | openssl req -new -x509 -key ca.key -out ca.crt -subj "/C=US/ST=CA/L= /O= /OU= /CN=GoProxy/emailAddress= " 8 | 9 | if [ "$(uname)" == "Darwin" ]; then 10 | # Mac OS X platform 11 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ./ca.crt 12 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 13 | # GNU/Linux 14 | cp ./ca.crt /usr/local/share/ca-certificates/goproxy-ca.crt 15 | sudo update-ca-certificates 16 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "goproxy" -i /usr/local/share/ca-certificates/goproxy-ca.crt 17 | fi 18 | 19 | mkdir -p ssl 20 | cp ca.crt ssl/ca.crt 21 | cp ca.key ssl/ca.key 22 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/martian/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | go install 5 | 6 | openssl genrsa -out ca.key 2048 7 | openssl req -new -x509 -key ca.key -out ca.crt -subj "/C=US/ST=CA/L= /O= /OU= /CN=Martian/emailAddress= " 8 | 9 | if [ "$(uname)" == "Darwin" ]; then 10 | # Mac OS X platform 11 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ./ca.crt 12 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 13 | # GNU/Linux 14 | cp ./ca.crt /usr/local/share/ca-certificates/martian-ca.crt 15 | sudo update-ca-certificates 16 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "martian" -i /usr/local/share/ca-certificates/martian-ca.crt 17 | fi 18 | 19 | mkdir -p ssl 20 | cp ca.crt ssl/ca.crt 21 | cp ca.key ssl/ca.key 22 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/process.py: -------------------------------------------------------------------------------- 1 | from signal import SIGTERM 2 | from subprocess import Popen 3 | 4 | from psutil import Process as PsutilProcess 5 | 6 | 7 | def terminate_all(process: Popen): 8 | """ 9 | The normal `.terminate` just kills the shell. If subprocesses have been 10 | spawned from the shell (which is normal within our go processes) then these 11 | won't be cleaned up and will be left hanging. 12 | 13 | This function terminates all spawned subprocesses. 14 | 15 | """ 16 | if process.returncode is not None: 17 | # No-op if process has already exited 18 | return 19 | 20 | signal = SIGTERM 21 | process = PsutilProcess(process.pid) 22 | for child in process.children(recursive=True): 23 | child.send_signal(signal) 24 | process.send_signal(signal) 25 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | go install 5 | 6 | openssl genrsa -out ca.key 2048 7 | openssl req -new -x509 -key ca.key -out ca.crt -subj "/C=US/ST=CA/L= /O= /OU= /CN=GoMitmProxy/emailAddress= " 8 | 9 | 10 | if [ "$(uname)" == "Darwin" ]; then 11 | # Mac OS X platform 12 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ./ca.crt 13 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 14 | # GNU/Linux 15 | cp ./ca.crt /usr/local/share/ca-certificates/gomitmproxy-ca.crt 16 | sudo update-ca-certificates 17 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "gomitmproxy" -i /usr/local/share/ca-certificates/gomitmproxy-ca.crt 18 | fi 19 | 20 | mkdir -p ssl 21 | cp ca.crt ssl/ca.crt 22 | cp ca.key ssl/ca.key 23 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | go install 5 | 6 | openssl genrsa -out ca.key 2048 7 | openssl req -new -x509 -key ca.key -out ca.crt -subj "/C=US/ST=CA/L= /O= /OU= /CN=GoProxy/emailAddress= " 8 | 9 | if [ "$(uname)" == "Darwin" ]; then 10 | # Mac OS X platform 11 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ./ca.crt 12 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 13 | # GNU/Linux 14 | cp ./ca.crt /usr/local/share/ca-certificates/goproxy-mimic-ca.crt 15 | sudo update-ca-certificates 16 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "goproxy-mimic" -i /usr/local/share/ca-certificates/goproxy-mimic-ca.crt 17 | fi 18 | 19 | mkdir -p ssl 20 | cp ca.crt ssl/ca.crt 21 | cp ca.key ssl/ca.key 22 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/io.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | from pathlib import Path 3 | 4 | 5 | def is_docker() -> bool: 6 | """ 7 | Check if we are running inside a Docker container. 8 | 9 | """ 10 | # Assume that we have injected an env variable into docker at build time 11 | return getenv("DOCKER") == "1" 12 | 13 | 14 | def wrap_command_with_sudo(command: list[str]): 15 | """ 16 | Depending on the environment we have different needs for sudo permissions. When 17 | running on a local machine, we can use sudo to elevate permissions. When running 18 | in a docker container, we can't prompt for a password, so we expect that the process 19 | running the command is in the appropriate process group. 20 | 21 | """ 22 | if is_docker(): 23 | return command 24 | return ["sudo", *command] 25 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy-mimic/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | go install 5 | 6 | openssl genrsa -out ca.key 2048 7 | openssl req -new -x509 -key ca.key -out ca.crt -subj "/C=US/ST=CA/L= /O= /OU= /CN=GoMitmProxy/emailAddress= " 8 | 9 | if [ "$(uname)" == "Darwin" ]; then 10 | # Mac OS X platform 11 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ./ca.crt 12 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 13 | # GNU/Linux 14 | cp ./ca.crt /usr/local/share/ca-certificates/gomitmproxy-mimic-ca.crt 15 | sudo update-ca-certificates 16 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "gomitmproxy-mimic" -i /usr/local/share/ca-certificates/gomitmproxy-mimic-ca.crt 17 | fi 18 | 19 | mkdir -p ssl 20 | cp ca.crt ssl/ca.crt 21 | cp ca.key ssl/ca.key 22 | -------------------------------------------------------------------------------- /groove/groove-python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "groove" 3 | version = "1.0.4" 4 | description = "" 5 | authors = ["Pierce Freeman "] 6 | readme = "README.md" 7 | include = ["proxy"] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.10" 11 | pydantic = "^1.10.2" 12 | requests = "^2.28.1" 13 | beautifulsoup4 = "^4.11.1" 14 | pyhumps = "^3.8.0" 15 | 16 | 17 | [tool.poetry.group.dev.dependencies] 18 | playwright = "^1.27.1" 19 | pytest = "^7.1.3" 20 | fastapi = "^0.85.1" 21 | uvicorn = "^0.18.3" 22 | isort = "^5.10.1" 23 | watchdog = "^2.1.9" 24 | 25 | [tool.poetry.scripts] 26 | install-ca = "groove.cli:install_ca" 27 | 28 | [tool.poetry.build] 29 | # Custom builder until Poetry expands their build plugin support: https://github.com/python-poetry/poetry/issues/2740 30 | generate-setup-file = false 31 | script = "build.py" 32 | 33 | [build-system] 34 | requires = ["poetry-core"] 35 | build-backend = "poetry.core.masonry.api" 36 | -------------------------------------------------------------------------------- /proxy-benchmarks/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # 4 | # Global setup script for the project 5 | # 6 | set -e 7 | 8 | # mitmproxy 9 | echo "Setting up mitmproxy" 10 | (cd proxy_benchmarks/assets/proxies/mitmproxy && ./setup.sh) 11 | 12 | # node_http_proxy 13 | echo "Setting up node_http_proxy" 14 | (cd proxy_benchmarks/assets/proxies/node_http_proxy && ./setup.sh) 15 | 16 | # gomitmproxy 17 | echo "Setting up gomitmproxy" 18 | (cd proxy_benchmarks/assets/proxies/gomitmproxy && ./setup.sh) 19 | (cd proxy_benchmarks/assets/proxies/gomitmproxy-mimic && ./setup.sh) 20 | 21 | # goproxy 22 | echo "Setting up goproxy" 23 | (cd proxy_benchmarks/assets/proxies/goproxy && ./setup.sh) 24 | (cd proxy_benchmarks/assets/proxies/goproxy-mimic && ./setup.sh) 25 | 26 | # martian 27 | echo "Setting up martian" 28 | (cd proxy_benchmarks/assets/proxies/martian && ./setup.sh) 29 | 30 | # speed-test-server 31 | echo "Setting up speed-test-server" 32 | (cd proxy_benchmarks/assets/speed-test/server && ./setup.sh) 33 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/no_proxy_load_test.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | 3 | from locust import ( 4 | FastHttpUser, 5 | HttpUser, 6 | events, 7 | task, 8 | ) 9 | from locust.runners import MasterRunner, WorkerRunner 10 | 11 | 12 | @events.init.add_listener 13 | def on_locust_init(environment, **_kwargs): 14 | # Fix issue on 15 | if isinstance(environment.runner, MasterRunner): 16 | environment.stats.use_response_times_cache = True 17 | if isinstance(environment.runner, WorkerRunner): 18 | environment.stats.use_response_times_cache = True 19 | 20 | 21 | load_test_certificate = getenv("LOAD_TEST_CERTIFICATE") 22 | load_test_certificate_key = getenv("LOAD_TEST_CERTIFICATE_KEY") 23 | 24 | 25 | #class WebsiteUser(FastHttpUser): 26 | class WebsiteUser(HttpUser): 27 | @task 28 | def index(self): 29 | self.client.get( 30 | "/handle", 31 | verify=load_test_certificate, 32 | ) 33 | -------------------------------------------------------------------------------- /proxy-benchmarks/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | benchmark: 5 | build: 6 | context: . 7 | dockerfile: benchmark.Dockerfile 8 | cap_add: 9 | # Required for synthetic 127.0.0.2 ports 10 | - NET_ADMIN 11 | volumes: 12 | - ./proxy_benchmarks:/app/proxy_benchmarks 13 | # ignore the ssl files since these are separately generated in the docker image 14 | # and mounted to the relevant system paths to validate 15 | - /app/proxy_benchmarks/assets/proxies/gomitmproxy/ssl 16 | - /app/proxy_benchmarks/assets/proxies/gomitmproxy-mimic/ssl 17 | - /app/proxy_benchmarks/assets/proxies/goproxy/ssl 18 | - /app/proxy_benchmarks/assets/proxies/goproxy-mimic/ssl 19 | - /app/proxy_benchmarks/assets/proxies/martian/ssl 20 | - /app/proxy_benchmarks/assets/proxies/mitmproxy/ssl 21 | - /app/proxy_benchmarks/assets/proxies/node_http_proxy/.http-mitm-proxy 22 | - /app/proxy_benchmarks/assets/speed-test/server/ssl 23 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/mitmproxy/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | openssl genrsa -out mitmproxy-ca.key 2048 5 | openssl req -new -x509 -key mitmproxy-ca.key -out mitmproxy-ca.crt -subj "/C=US/ST=CA/L= /O= /OU= /CN=MitmProxy/emailAddress= " 6 | 7 | # mitmproxy will look for a consolidate pem root file 8 | cat mitmproxy-ca.key mitmproxy-ca.crt > mitmproxy-ca.pem 9 | 10 | if [ "$(uname)" == "Darwin" ]; then 11 | # Mac OS X platform 12 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain mitmproxy-ca.crt 13 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 14 | # GNU/Linux 15 | cp ./mitmproxy-ca.crt /usr/local/share/ca-certificates/mitmproxy-ca.crt 16 | sudo update-ca-certificates 17 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "mitmproxy" -i /usr/local/share/ca-certificates/mitmproxy-ca.crt 18 | fi 19 | 20 | mkdir -p ssl 21 | cp mitmproxy-ca.key ssl/mitmproxy-ca.key 22 | cp mitmproxy-ca.crt ssl/mitmproxy-ca.crt 23 | cp mitmproxy-ca.pem ssl/mitmproxy-ca.pem 24 | -------------------------------------------------------------------------------- /groove/groove-node/src/dialer.ts: -------------------------------------------------------------------------------- 1 | 2 | 3 | export interface RequestRequiresDefinition { 4 | urlRegex: string 5 | resourceTypes: string[] 6 | } 7 | 8 | export interface ProxyDefinition { 9 | url: string 10 | username?: string 11 | password?: string 12 | } 13 | 14 | export interface DialerDefinition { 15 | priority: number 16 | proxy?: ProxyDefinition 17 | requestRequires?: RequestRequiresDefinition 18 | } 19 | 20 | export const DefaultInternetDialer: DialerDefinition = { 21 | /* 22 | * Proxy all requests to the open internet, with low priority 23 | */ 24 | priority: 1, 25 | } 26 | 27 | export const DefaultLocalPassthroughDialer: DialerDefinition = { 28 | /* 29 | * Proxy generally static assets to the open internet, with high priority 30 | */ 31 | priority: 1000, 32 | requestRequires: { 33 | urlRegex: ".*?.(?:txt|json|css|less|js|mjs|cjs|gif|ico|jpe?g|svg|png|webp|mkv|mp4|mpe?g|webm|eot|ttf|woff2?)", 34 | resourceTypes: ["script", "image", "stylesheet", "media", "font"], 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /groove/groove-node/scripts/build.ts: -------------------------------------------------------------------------------- 1 | // https://github.com/gjuchault/typescript-library-starter/blob/main/esbuild-hook.js 2 | import path from "path"; 3 | import { build as esbuild, BuildOptions } from "esbuild"; 4 | 5 | const baseConfig: BuildOptions = { 6 | platform: "node", 7 | target: "esnext", 8 | format: "cjs", 9 | nodePaths: [path.join(__dirname, "../src")], 10 | sourcemap: true, 11 | external: [], 12 | bundle: true, 13 | }; 14 | 15 | async function main() { 16 | await esbuild({ 17 | ...baseConfig, 18 | outdir: path.join(__dirname, "../build/cjs"), 19 | entryPoints: [ 20 | path.join(__dirname, "../src/index.ts"), 21 | path.join(__dirname, "../src/install-ca.ts"), 22 | ], 23 | }); 24 | 25 | await esbuild({ 26 | ...baseConfig, 27 | format: "esm", 28 | outdir: path.join(__dirname, "../build/esm"), 29 | entryPoints: [ 30 | path.join(__dirname, "../src/index.ts"), 31 | path.join(__dirname, "../src/install-ca.ts"), 32 | ], 33 | }); 34 | } 35 | 36 | if (require.main === module) { 37 | main(); 38 | } -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/node_http_proxy/setup.js: -------------------------------------------------------------------------------- 1 | const ca = require('@bjowes/http-mitm-proxy/lib/ca.js'); 2 | const path = require('path'); 3 | const chalk = require('chalk'); 4 | const { execSync } = require('child_process'); 5 | 6 | const installCertificate = (certificateDirectory) => { 7 | return new Promise((resolve, reject) => { 8 | ca.create(certificateDirectory, function (err, ca) { 9 | if (err) reject(err); 10 | else resolve(ca); 11 | }); 12 | }); 13 | } 14 | 15 | const main = async () => { 16 | // https://github.com/bjowes/node-http-mitm-proxy/blob/master/lib/proxy.js#L59 17 | const defaultCertificateDirectory = path.resolve(process.cwd(), '.http-mitm-proxy'); 18 | 19 | let certificate = null; 20 | 21 | try { 22 | certificate = await installCertificate(defaultCertificateDirectory); 23 | } catch (e) { 24 | console.log(chalk.red(`Error: ${e}`)); 25 | process.exit(1); 26 | } 27 | 28 | console.log(chalk.green("Certificate generation succeeded.")) 29 | } 30 | 31 | main(); 32 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright.sync_api import sync_playwright 3 | 4 | from groove.proxy import Groove 5 | from requests import Session 6 | 7 | 8 | # We want this to recreate by default on every unit test to clear the state 9 | @pytest.fixture(scope="function") 10 | def proxy(): 11 | proxy = Groove() 12 | with proxy.launch(): 13 | # Before we yield to new client calls clear out any remaining cache items 14 | proxy.cache_clear() 15 | yield proxy 16 | 17 | @pytest.fixture(scope="function") 18 | def browser(): 19 | with sync_playwright() as p: 20 | yield p.chromium.launch( 21 | headless=True, 22 | ) 23 | 24 | @pytest.fixture 25 | def context(proxy, browser): 26 | yield browser.new_context( 27 | proxy={ 28 | "server": proxy.base_url_proxy, 29 | } 30 | ) 31 | 32 | @pytest.fixture(scope="function") 33 | def session(proxy): 34 | session = Session() 35 | session.proxies = { 36 | "http": proxy.base_url_proxy, 37 | "https": proxy.base_url_proxy, 38 | } 39 | yield session 40 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/test_fingerprint.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from playwright._impl._api_types import Error as PlaywrightError 3 | 4 | from groove.proxy import ProxyFailureError, Groove 5 | 6 | 7 | def test_tls_addons(proxy: Groove, context): 8 | """ 9 | Test that our TLS payload has a field listing the installed signature extensions. We know 10 | that Google specifically checks for ALPS (ApplicationSettingsExtension) so we try to render 11 | the homepage here. 12 | 13 | """ 14 | page = context.new_page() 15 | 16 | proxy.tape_start() 17 | 18 | try: 19 | page.goto("https://www.google.com:443/") 20 | except PlaywrightError as e: 21 | if "net::ERR_EMPTY_RESPONSE" in e.message: 22 | raise ProxyFailureError() 23 | 24 | # Get the page 25 | tape_session = proxy.tape_get() 26 | 27 | assert len(tape_session.records) > 1 28 | 29 | main_page = [ 30 | record 31 | for record in tape_session.records 32 | if record.request.url.strip("/") == "https://www.google.com:443" 33 | ] 34 | 35 | assert len(main_page[0].response.body) > 1000 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Pierce Freeman 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /groove/groove-node/src/__tests__/index.test.ts: -------------------------------------------------------------------------------- 1 | const { Groove, CacheModeEnum } = require("../index"); 2 | const { fetchWithProxy } = require("../utilities"); 3 | 4 | describe('testing proxy client', () => { 5 | let proxy : typeof Groove | null = null; 6 | 7 | beforeAll(async () => { 8 | proxy = new Groove({}); 9 | await proxy.launch(); 10 | }); 11 | 12 | afterAll(() => { 13 | proxy.stop(); 14 | }); 15 | 16 | test('tape should record and edit', async () => { 17 | await proxy.setCacheMode(CacheModeEnum.OFF); 18 | await proxy.tapeStart(); 19 | 20 | const contents = await fetchWithProxy( 21 | "https://freeman.vc", proxy 22 | ); 23 | expect(contents.length).toBeGreaterThanOrEqual(100); 24 | await proxy.tapeStop(); 25 | 26 | const tape = await proxy.tapeGet(); 27 | 28 | tape.records[0].response.body = Buffer.from("Hello world"); 29 | await proxy.tapeLoad(tape); 30 | 31 | const contentsUpdated = await fetchWithProxy( 32 | "https://freeman.vc", proxy 33 | ) 34 | expect(contentsUpdated).toBe("Hello world"); 35 | }); 36 | }); 37 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/tests/test_ssl_validity.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from proxy_benchmarks.cli.ssl_validity import execute_raw 4 | from proxy_benchmarks.enums import MimicTypeEnum 5 | from proxy_benchmarks.proxies.gomitmproxy import GoMitmProxy 6 | from proxy_benchmarks.proxies.goproxy import GoProxy 7 | from proxy_benchmarks.proxies.martian import MartianProxy 8 | from proxy_benchmarks.proxies.mitmproxy import MitmProxy 9 | from proxy_benchmarks.proxies.node_http_proxy import NodeHttpProxy 10 | from proxy_benchmarks.requests import ChromeRequest 11 | 12 | 13 | @pytest.mark.ssl 14 | @pytest.mark.parametrize( 15 | "proxy", 16 | [ 17 | GoProxy(MimicTypeEnum.STANDARD), 18 | GoProxy(MimicTypeEnum.MIMIC), 19 | GoMitmProxy(MimicTypeEnum.STANDARD), 20 | GoMitmProxy(MimicTypeEnum.MIMIC), 21 | MartianProxy(), 22 | MitmProxy(), 23 | NodeHttpProxy(), 24 | ], 25 | ) 26 | def test_ssl_validity(cli_object, proxy): 27 | request = ChromeRequest(headless=True, keep_open=False) 28 | 29 | execute_raw( 30 | cli_object, 31 | inspect_browser=False, 32 | request=request, 33 | proxies=[proxy], 34 | ) 35 | -------------------------------------------------------------------------------- /groove/proxy/cache_key_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "net/url" 6 | "testing" 7 | ) 8 | 9 | func TestCacheKeyParameters(t *testing.T) { 10 | var tests = []struct { 11 | url1, url2 string 12 | desiredEquality bool 13 | }{ 14 | // Identity 15 | {"http://example.com?test1=a&test2=b", "http://example.com?test1=a&test2=b", true}, 16 | // Switching parameter order 17 | {"http://example.com?test1=a&test2=b", "http://example.com?test2=b&test1=a", true}, 18 | // Different parameters 19 | {"http://example.com?test1=a&test2=b", "http://example.com?&test1=a&test3=b", false}, 20 | // Different protocols 21 | {"https://example.com", "http://example.com", true}, 22 | // Different domain suffix same host name 23 | {"http://example.com", "http://example.net", false}, 24 | } 25 | 26 | for _, tt := range tests { 27 | url1, _ := url.Parse(tt.url1) 28 | url2, _ := url.Parse(tt.url2) 29 | cacheEquality := getCacheKey(&http.Request{URL: url1}) == getCacheKey(&http.Request{URL: url2}) 30 | 31 | if cacheEquality != tt.desiredEquality { 32 | t.Fatalf("CacheKey - `%s` `%s` (actual: %v, expected: %v)", url1, url2, cacheEquality, tt.desiredEquality) 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /groove/proxy/headers.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | 6 | goproxy "github.com/piercefreeman/goproxy" 7 | ) 8 | 9 | type HeaderDefinition struct { 10 | tapeID string 11 | resourceType string 12 | } 13 | 14 | // Don't prefix with `Prefix` - chromium appears to have specific manipulation 15 | // routines when a header is prefixed with `Proxy-` 16 | const ( 17 | ProxyResourceType = "Resource-Type" 18 | ProxyTapeIdentifier = "Tape-ID" 19 | ) 20 | 21 | func setupHeadersMiddleware(proxy *goproxy.ProxyHttpServer) { 22 | /* 23 | * This should be mounted before other dependent middlewares 24 | */ 25 | proxy.OnRequest().DoFunc( 26 | func(r *http.Request, ctx *goproxy.ProxyCtx) (*http.Request, *http.Response) { 27 | ctx.UserData = &HeaderDefinition{ 28 | tapeID: r.Header.Get(ProxyTapeIdentifier), 29 | resourceType: r.Header.Get(ProxyResourceType), 30 | } 31 | 32 | // Remove the extracted keys so they're not passed on 33 | r.Header.Del(ProxyTapeIdentifier) 34 | 35 | // Currently ProxyResourceType is also consumed directly by the dialer, which doesn't 36 | // have access to the larger context. Keep it redundant for now. 37 | //r.Header.Del(ProxyResourceType) 38 | 39 | return r, nil 40 | }, 41 | ) 42 | } 43 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/cert.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/a92cc753f88eb1d5f3ca49bd91da71fe815537ca/examples/goproxy-customca/cert.go 2 | package main 3 | 4 | import ( 5 | "crypto/tls" 6 | "crypto/x509" 7 | 8 | "github.com/elazarl/goproxy" 9 | ) 10 | 11 | func setCA(caCert string, caKey string) error { 12 | // Override the default support: https://github.com/elazarl/goproxy/blob/fbd10ff4f5a16de73dca5030fc12245548f76141/https.go#L32 13 | goproxyCa, err := tls.LoadX509KeyPair(caCert, caKey) 14 | if err != nil { 15 | return err 16 | } 17 | if goproxyCa.Leaf, err = x509.ParseCertificate(goproxyCa.Certificate[0]); err != nil { 18 | return err 19 | } 20 | goproxy.GoproxyCa = goproxyCa 21 | goproxy.OkConnect = &goproxy.ConnectAction{Action: goproxy.ConnectAccept, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 22 | goproxy.MitmConnect = &goproxy.ConnectAction{Action: goproxy.ConnectMitm, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 23 | goproxy.HTTPMitmConnect = &goproxy.ConnectAction{Action: goproxy.ConnectHTTPMitm, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 24 | goproxy.RejectConnect = &goproxy.ConnectAction{Action: goproxy.ConnectReject, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 25 | return nil 26 | } 27 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/cert.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/a92cc753f88eb1d5f3ca49bd91da71fe815537ca/examples/goproxy-customca/cert.go 2 | package main 3 | 4 | import ( 5 | "crypto/tls" 6 | "crypto/x509" 7 | 8 | goproxy "github.com/piercefreeman/goproxy" 9 | ) 10 | 11 | func setCA(caCert string, caKey string) error { 12 | // Override the default support: https://github.com/elazarl/goproxy/blob/fbd10ff4f5a16de73dca5030fc12245548f76141/https.go#L32 13 | goproxyCa, err := tls.LoadX509KeyPair(caCert, caKey) 14 | if err != nil { 15 | return err 16 | } 17 | if goproxyCa.Leaf, err = x509.ParseCertificate(goproxyCa.Certificate[0]); err != nil { 18 | return err 19 | } 20 | goproxy.GoproxyCa = goproxyCa 21 | goproxy.OkConnect = &goproxy.ConnectAction{Action: goproxy.ConnectAccept, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 22 | goproxy.MitmConnect = &goproxy.ConnectAction{Action: goproxy.ConnectMitm, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 23 | goproxy.HTTPMitmConnect = &goproxy.ConnectAction{Action: goproxy.ConnectHTTPMitm, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 24 | goproxy.RejectConnect = &goproxy.ConnectAction{Action: goproxy.ConnectReject, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 25 | return nil 26 | } 27 | -------------------------------------------------------------------------------- /groove/groove-node/esbuild-hook.js: -------------------------------------------------------------------------------- 1 | const Module = require("module"); 2 | const { transformSync } = require("esbuild"); 3 | const sourceMapSupport = require("source-map-support"); 4 | 5 | const cache = {}; 6 | 7 | function esbuildHook(code, filepath) { 8 | const result = transformSync(code, { 9 | target: "node16", 10 | sourcemap: "both", 11 | loader: "ts", 12 | format: "cjs", 13 | sourcefile: filepath, 14 | }); 15 | 16 | cache[filepath] = { 17 | url: filepath, 18 | code: result.code, 19 | map: result.map, 20 | }; 21 | 22 | return result.code; 23 | } 24 | 25 | sourceMapSupport.install({ 26 | environment: "node", 27 | retrieveFile(pathOrUrl) { 28 | const file = cache[pathOrUrl]; 29 | if (file) { 30 | return file.code; 31 | } else { 32 | return ""; 33 | } 34 | }, 35 | }); 36 | 37 | const defaultLoader = Module._extensions[".js"]; 38 | 39 | Module._extensions[".ts"] = function (mod, filename) { 40 | if (filename.includes("node_modules")) { 41 | return defaultLoader(mod, filename); 42 | } 43 | 44 | const defaultCompile = mod._compile; 45 | mod._compile = function (code) { 46 | mod._compile = defaultCompile; 47 | return mod._compile(esbuildHook(code, filename), filename); 48 | }; 49 | 50 | defaultLoader(mod, filename); 51 | }; -------------------------------------------------------------------------------- /groove/groove-node/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@piercefreeman/groove", 3 | "version": "1.0.4", 4 | "description": "", 5 | "main": "build/cjs/index.js", 6 | "module": "build/esm/index.js", 7 | "types": "build/src/index.d.ts", 8 | "directories": { 9 | "test": "__tests__" 10 | }, 11 | "scripts": { 12 | "prepublish": "tsc", 13 | "postinstall": "./lifecycle/install.sh", 14 | "preuninstall": "./lifecycle/uninstall.sh", 15 | "build": "npm run type:dts && npm run build:main", 16 | "build:main": "node -r ./esbuild-hook ./scripts/build", 17 | "type:dts": "tsc --emitDeclarationOnly", 18 | "lint": "eslint .", 19 | "prepare": "npm run build", 20 | "test": "jest ." 21 | }, 22 | "bin": { 23 | "install-ca": "build/cjs/install-ca.js" 24 | }, 25 | "author": "", 26 | "license": "MIT", 27 | "devDependencies": { 28 | "@types/jest": "^29.2.0", 29 | "@types/node": "^18.11.0", 30 | "@typescript-eslint/eslint-plugin": "^5.40.1", 31 | "@typescript-eslint/parser": "^5.40.1", 32 | "eslint": "^8.25.0", 33 | "jest": "^29.2.1", 34 | "prettier": "^2.7.1", 35 | "ts-jest": "^29.0.3", 36 | "typescript": "^4.8.4" 37 | }, 38 | "dependencies": { 39 | "@types/node-fetch": "^2.6.2", 40 | "esbuild": "^0.15.12", 41 | "form-data": "^4.0.0", 42 | "https-proxy-agent": "^5.0.1", 43 | "node-fetch": "^2.6.7" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /proxy-benchmarks/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "proxy-benchmarks" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Pierce Freeman "] 6 | readme = "README.md" 7 | packages = [{include = "proxy_benchmarks"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.10" 11 | click = "^8.1.3" 12 | pyja3 = "^1.0.0" 13 | psutil = "^5.9.2" 14 | requests = "^2.28.1" 15 | mitmproxy = "^8.1.1" 16 | playwright = "^1.27.0" 17 | rich = "^12.6.0" 18 | locust = "^2.12.1" 19 | ConfigArgParse = "^1.5.3" 20 | pandas = "^1.5.0" 21 | tqdm = "^4.64.1" 22 | 23 | [tool.poetry.scripts] 24 | benchmark = "proxy_benchmarks.cli.main:main" 25 | 26 | [tool.poetry.group.dev.dependencies] 27 | isort = "^5.10.1" 28 | pytest = "^7.1.3" 29 | 30 | [build-system] 31 | requires = ["poetry-core"] 32 | build-backend = "poetry.core.masonry.api" 33 | 34 | [tool.isort] 35 | combine_as_imports = true 36 | force_grid_wrap = 4 37 | float_to_top = true 38 | include_trailing_comma = true 39 | known_first_party = "proxy_benchmarks" 40 | extra_standard_library = "pkg_resources" 41 | skip = "__init__.py" 42 | 43 | line_length = 120 44 | lines_after_imports = 2 45 | multi_line_output = 3 46 | 47 | [tool.pytest] 48 | markers = """ 49 | fingerprint: mark a test as a fingerprint test 50 | load: mark a test as a load test 51 | speed: mark a test as a speed test 52 | ssl: mark a test as an ssl test 53 | """ 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Groove 2 | 3 | Groove is an opinionated proxy server built for web crawling and unit test mocking. It's based on [goproxy](https://github.com/elazarl/goproxy), a well supported proxy implementation written in Go. It builds on this foundation to include: 4 | 5 | - HTTP and HTTPs support over HTTP/1 and HTTP/2. 6 | - Local CA certificate generation and installation on Mac and Linux to support system curl and Chromium. 7 | - Different tiers of caching support. Standard caching will respect server-driven "Cache-Control" HTTP response headers. Aggressive caching will always cache a page regardless of headers. Disabling caching will always fetch new contents regardless of server preference. 8 | - Limit outbound requests of the same URL to 1 concurrent request to save on bandwidth if requests are already inflight. 9 | - Record and replay requests made to outgoing servers. Recreate testing flows in unit tests while separating them from crawling business logic. 10 | - 3rd party proxy support for commercial proxies. 11 | - Custom TLS Hello Client support to maintain a Chromium-like TLS handshake while intercepting requests and re-forwarding on packets. 12 | - API clients for Python and Node. 13 | 14 | ## Proxy Benchmarks 15 | 16 | Before settling on goproxy, we benchmarked a variety of MITM proxy servers across Python, Go, and Node. To view the benchmarking code and results see [proxy-benchmarks](./proxy-benchmarks/). 17 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/locust/proxy_load_test.py: -------------------------------------------------------------------------------- 1 | from os import getenv 2 | 3 | from locust import ( 4 | FastHttpUser, 5 | HttpUser, 6 | events, 7 | task, 8 | ) 9 | from locust.runners import MasterRunner, WorkerRunner 10 | 11 | 12 | @events.init.add_listener 13 | def on_locust_init(environment, **_kwargs): 14 | # Fix issue on 15 | if isinstance(environment.runner, MasterRunner): 16 | environment.stats.use_response_times_cache = True 17 | if isinstance(environment.runner, WorkerRunner): 18 | environment.stats.use_response_times_cache = True 19 | 20 | proxy_port = getenv("PROXY_PORT") 21 | proxy_certificate = getenv("PROXY_CERTIFICATE") 22 | proxy_certificate_key = getenv("PROXY_CERTIFICATE_KEY") 23 | 24 | if not proxy_port: 25 | raise ValueError("Proxy port is required.") 26 | if not proxy_certificate: 27 | raise ValueError("Proxy certificate is required.") 28 | if not proxy_certificate_key: 29 | raise ValueError("Proxy certificate key is required.") 30 | 31 | proxies = { 32 | "http": f"http://localhost:{proxy_port}", 33 | "https": f"http://localhost:{proxy_port}", 34 | } 35 | 36 | print("Proxy configuration", proxies) 37 | 38 | class WebsiteUser(HttpUser): 39 | @task 40 | def index(self): 41 | self.client.get( 42 | "/handle", 43 | proxies=proxies, 44 | verify=proxy_certificate, 45 | ) 46 | -------------------------------------------------------------------------------- /groove/proxy/cache/invalidator_test.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "io/ioutil" 5 | "testing" 6 | ) 7 | 8 | func TestSaveReadIndex(t *testing.T) { 9 | cacheDirectory, err := ioutil.TempDir("", "") 10 | if err != nil { 11 | t.Fatalf("Error creating temp dir: %s", err) 12 | } 13 | 14 | invalidator := NewCacheInvalidator(cacheDirectory, 10, 10, 1) 15 | invalidator.Set("testKey", &TestSimpleObject{"testValue"}) 16 | 17 | // Ensure it saved automatically - should have spanwed a goroutine 18 | invalidator.saveWaiter.Wait() 19 | 20 | // Attempt to read the index file 21 | fileContents, _, err := invalidator.readIndex() 22 | 23 | if err != nil { 24 | t.Fatalf("Error reading index: %s", err) 25 | } 26 | 27 | if len(fileContents) != 1 { 28 | t.Fatalf("Index should have one entry") 29 | } 30 | 31 | if fileContents[0].Key != "testKey" { 32 | t.Fatalf("Index should have testKey (actual: %s)", fileContents[0].Key) 33 | } 34 | 35 | if fileContents[0].Size == 0 { 36 | t.Fatalf("Index should have non-zero size (actual: %d)", fileContents[0].Size) 37 | } 38 | 39 | // Determine if depenent modules can also load this file 40 | invalidator.memoryCache = invalidator.buildMemoryCache(1) 41 | invalidator.diskCache = invalidator.buildDiskCache(1, cacheDirectory) 42 | 43 | // Then dump to disk one more time to make sure we can save the loaded representation 44 | invalidator.writeIndex() 45 | invalidator.saveWaiter.Wait() 46 | } 47 | -------------------------------------------------------------------------------- /groove/groove-python/groove/dialer.py: -------------------------------------------------------------------------------- 1 | from groove.models import GrooveModelBase 2 | 3 | 4 | class RequestRequiresDefinition(GrooveModelBase): 5 | url_regex: str 6 | resource_types: list[str] 7 | 8 | 9 | class ProxyDefinition(GrooveModelBase): 10 | url: str 11 | username: str | None = None 12 | password: str | None = None 13 | 14 | 15 | class DialerDefinition(GrooveModelBase): 16 | priority: int 17 | proxy: ProxyDefinition | None = None 18 | request_requires: RequestRequiresDefinition | None = None 19 | 20 | 21 | class DefaultInternetDialer(DialerDefinition): 22 | """ 23 | Proxy all requests to the open internet, with low priority 24 | """ 25 | def __init__(self): 26 | super().__init__( 27 | priority=1, 28 | proxy=None, 29 | request_requires=None, 30 | ) 31 | 32 | class DefaultLocalPassthroughDialer(DialerDefinition): 33 | """ 34 | Proxy generally static assets to the open internet, with high priority 35 | """ 36 | def __init__(self): 37 | super().__init__( 38 | priority=1000, 39 | proxy=None, 40 | request_requires=RequestRequiresDefinition( 41 | url_regex=".*?.(?:txt|json|css|less|js|mjs|cjs|gif|ico|jpe?g|svg|png|webp|mkv|mp4|mpe?g|webm|eot|ttf|woff2?)", 42 | resource_types=["script", "image", "stylesheet", "media", "font"], 43 | ), 44 | ) 45 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/martian.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from subprocess import Popen 3 | from time import sleep 4 | 5 | from proxy_benchmarks.assets import get_asset_path 6 | from proxy_benchmarks.process import terminate_all 7 | from proxy_benchmarks.proxies.base import CertificateAuthority, ProxyBase 8 | 9 | 10 | class MartianProxy(ProxyBase): 11 | def __init__(self): 12 | super().__init__(port=6014) 13 | 14 | @contextmanager 15 | def launch(self): 16 | current_extension_path = get_asset_path("proxies/martian") 17 | process = Popen(["go", "run", ".", "--port", str(self.port)], cwd=current_extension_path) 18 | 19 | self.wait_for_launch() 20 | # Requires a bit more time to load than our other proxies 21 | sleep(2) 22 | 23 | try: 24 | yield process 25 | finally: 26 | terminate_all(process) 27 | 28 | # Wait for the socket to close 29 | self.wait_for_close() 30 | 31 | @property 32 | def certificate_authority(self) -> CertificateAuthority: 33 | return CertificateAuthority( 34 | public=get_asset_path("proxies/martian/ssl/ca.crt"), 35 | key=get_asset_path("proxies/martian/ssl/ca.key"), 36 | ) 37 | 38 | @property 39 | def short_name(self) -> str: 40 | return "martian" 41 | 42 | def __repr__(self) -> str: 43 | return f"MartianProxy(port={self.port})" 44 | -------------------------------------------------------------------------------- /groove/proxy/go.mod: -------------------------------------------------------------------------------- 1 | module grooveproxy 2 | 3 | go 1.18 4 | 5 | // replace github.com/piercefreeman/goproxy => /Users/piercefreeman/projects/goproxy 6 | 7 | require ( 8 | github.com/gin-gonic/gin v1.8.1 9 | github.com/google/uuid v1.3.0 10 | github.com/piercefreeman/goproxy v0.0.7 11 | github.com/pquerna/cachecontrol v0.1.0 12 | github.com/refraction-networking/utls v1.2.0 13 | golang.org/x/net v0.1.0 14 | ) 15 | 16 | require ( 17 | github.com/andybalholm/brotli v1.0.4 // indirect 18 | github.com/gin-contrib/sse v0.1.0 // indirect 19 | github.com/go-playground/locales v0.14.0 // indirect 20 | github.com/go-playground/universal-translator v0.18.0 // indirect 21 | github.com/go-playground/validator/v10 v10.10.0 // indirect 22 | github.com/goccy/go-json v0.9.7 // indirect 23 | github.com/google/btree v1.0.0 // indirect 24 | github.com/json-iterator/go v1.1.12 // indirect 25 | github.com/klauspost/compress v1.15.12 // indirect 26 | github.com/leodido/go-urn v1.2.1 // indirect 27 | github.com/mattn/go-isatty v0.0.14 // indirect 28 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect 29 | github.com/modern-go/reflect2 v1.0.2 // indirect 30 | github.com/pelletier/go-toml/v2 v2.0.1 // indirect 31 | github.com/peterbourgon/diskv/v3 v3.0.1 // indirect 32 | github.com/ugorji/go/codec v1.2.7 // indirect 33 | golang.org/x/crypto v0.1.0 // indirect 34 | golang.org/x/sys v0.1.0 // indirect 35 | golang.org/x/text v0.4.0 // indirect 36 | google.golang.org/protobuf v1.28.0 // indirect 37 | gopkg.in/yaml.v2 v2.4.0 // indirect 38 | ) 39 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/node_http_proxy/index.js: -------------------------------------------------------------------------------- 1 | const Proxy = require('@bjowes/http-mitm-proxy'); 2 | const { program } = require('commander'); 3 | const chalk = require('chalk'); 4 | 5 | // Allow self-signed certificates during fetch, since the local harness launches 6 | // a web service that generates its own self-signed certificates. 7 | process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0"; 8 | 9 | program 10 | .option('--port ') 11 | 12 | program.parse(); 13 | const { port } = program.opts(); 14 | 15 | var proxy = Proxy(); 16 | 17 | proxy.onError(function(ctx, err) { 18 | const url = ctx && ctx.clientToProxyRequest ? ctx.clientToProxyRequest.url : ""; 19 | console.error(`Proxy error on ${url}:`, err); 20 | if (err.code === "ERR_SSL_SSLV3_ALERT_CERTIFICATE_UNKNOWN") { 21 | console.log(chalk.red("SSL certification failed.\nIt's likely you haven't installed the root certificate on your machine.")); 22 | 23 | // This will add a `NodeMITMProxyCA` cert to your local desktop keychain 24 | console.log(chalk.red("MacOS: security add-trusted-cert -r trustRoot -k ~/Library/Keychains/login.keychain-db ./.http-mitm-proxy/certs/ca.pem")); 25 | } 26 | }); 27 | 28 | 29 | const exitOnSignal = (signal) => { 30 | process.on(signal, () => { 31 | console.log('\nCaught ' + signal + ', exiting'); 32 | proxy.close(); 33 | process.exit(1); 34 | }); 35 | } 36 | 37 | exitOnSignal('SIGINT'); 38 | exitOnSignal('SIGTERM'); 39 | 40 | console.log(`Will launch proxy on port ${port}`) 41 | 42 | proxy.listen({port: port}); 43 | -------------------------------------------------------------------------------- /groove/proxy/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Elazar Leibovich. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Elazar Leibovich. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Elazar Leibovich. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Elazar Leibovich. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Elazar Leibovich. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Elazar Leibovich. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/optimized_storage.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/ac903b0f516b4b07599ab573d837cbaccb26feba/examples/goproxy-certstorage/optimized_storage.go 2 | // Has per-host locks to avoid situation, when multiple concurrent requests to a host without ready to use certificate will 3 | // generate the same certificate multiple times. 4 | package main 5 | 6 | import ( 7 | "crypto/tls" 8 | "fmt" 9 | "sync" 10 | ) 11 | 12 | type OptimizedCertStore struct { 13 | certs map[string]*tls.Certificate 14 | locks map[string]*sync.Mutex 15 | sync.Mutex 16 | } 17 | 18 | func NewOptimizedCertStore() *OptimizedCertStore { 19 | return &OptimizedCertStore{ 20 | certs: map[string]*tls.Certificate{}, 21 | locks: map[string]*sync.Mutex{}, 22 | } 23 | } 24 | 25 | func (s *OptimizedCertStore) Fetch(host string, genCert func() (*tls.Certificate, error)) (*tls.Certificate, error) { 26 | fmt.Printf("Fetching certificate for %s\n", host) 27 | 28 | hostLock := s.hostLock(host) 29 | hostLock.Lock() 30 | defer hostLock.Unlock() 31 | 32 | cert, ok := s.certs[host] 33 | var err error 34 | if !ok { 35 | fmt.Printf("cache miss: %s\n", host) 36 | 37 | cert, err = genCert() 38 | if err != nil { 39 | return nil, err 40 | } 41 | s.certs[host] = cert 42 | } else { 43 | fmt.Printf("cache hit: %s\n", host) 44 | } 45 | return cert, nil 46 | } 47 | 48 | func (s *OptimizedCertStore) hostLock(host string) *sync.Mutex { 49 | s.Lock() 50 | defer s.Unlock() 51 | 52 | lock, ok := s.locks[host] 53 | if !ok { 54 | lock = &sync.Mutex{} 55 | s.locks[host] = lock 56 | } 57 | return lock 58 | } 59 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/tests/test_fingerprinting.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from proxy_benchmarks.cli.fingerprinting import compare_dynamic_raw 4 | from proxy_benchmarks.enums import MimicTypeEnum 5 | from proxy_benchmarks.proxies.gomitmproxy import GoMitmProxy 6 | from proxy_benchmarks.proxies.goproxy import GoProxy 7 | from proxy_benchmarks.proxies.martian import MartianProxy 8 | from proxy_benchmarks.proxies.mitmproxy import MitmProxy 9 | from proxy_benchmarks.proxies.node_http_proxy import NodeHttpProxy 10 | from proxy_benchmarks.requests import ChromeRequest 11 | 12 | 13 | @pytest.mark.fingerprint 14 | @pytest.mark.parametrize( 15 | "proxy", 16 | [ 17 | GoProxy(MimicTypeEnum.STANDARD), 18 | GoProxy(MimicTypeEnum.MIMIC), 19 | GoMitmProxy(MimicTypeEnum.STANDARD), 20 | GoMitmProxy(MimicTypeEnum.MIMIC), 21 | MartianProxy(), 22 | MitmProxy(), 23 | NodeHttpProxy(), 24 | ], 25 | ) 26 | def test_fingerprint_independent(cli_object, proxy): 27 | """ 28 | Ensure that we can benchmark each of the fingerprints against a baseline SSL connection 29 | """ 30 | compare_dynamic_raw( 31 | cli_object, 32 | ChromeRequest(headless=True), 33 | [proxy] 34 | ) 35 | 36 | 37 | def test_fingerprint_multiple(cli_object): 38 | """ 39 | Ensure that we can compare standard and mimic values with one another 40 | """ 41 | compare_dynamic_raw( 42 | cli_object, 43 | ChromeRequest(headless=True), 44 | [ 45 | GoProxy(MimicTypeEnum.STANDARD), 46 | GoProxy(MimicTypeEnum.MIMIC), 47 | ] 48 | ) 49 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/optimized_storage.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/ac903b0f516b4b07599ab573d837cbaccb26feba/examples/goproxy-certstorage/optimized_storage.go 2 | // Has per-host locks to avoid situation, when multiple concurrent requests to a host without ready to use certificate will 3 | // generate the same certificate multiple times. 4 | package main 5 | 6 | import ( 7 | "crypto/tls" 8 | "fmt" 9 | "sync" 10 | ) 11 | 12 | type OptimizedCertStore struct { 13 | certs map[string]*tls.Certificate 14 | locks map[string]*sync.Mutex 15 | sync.Mutex 16 | } 17 | 18 | func NewOptimizedCertStore() *OptimizedCertStore { 19 | return &OptimizedCertStore{ 20 | certs: map[string]*tls.Certificate{}, 21 | locks: map[string]*sync.Mutex{}, 22 | } 23 | } 24 | 25 | func (s *OptimizedCertStore) Fetch(host string, genCert func() (*tls.Certificate, error)) (*tls.Certificate, error) { 26 | fmt.Printf("Fetching certificate for %s\n", host) 27 | 28 | hostLock := s.hostLock(host) 29 | hostLock.Lock() 30 | defer hostLock.Unlock() 31 | 32 | cert, ok := s.certs[host] 33 | var err error 34 | if !ok { 35 | fmt.Printf("cache miss: %s\n", host) 36 | 37 | cert, err = genCert() 38 | if err != nil { 39 | return nil, err 40 | } 41 | s.certs[host] = cert 42 | } else { 43 | fmt.Printf("cache hit: %s\n", host) 44 | } 45 | return cert, nil 46 | } 47 | 48 | func (s *OptimizedCertStore) hostLock(host string) *sync.Mutex { 49 | s.Lock() 50 | defer s.Unlock() 51 | 52 | lock, ok := s.locks[host] 53 | if !ok { 54 | lock = &sync.Mutex{} 55 | s.locks[host] = lock 56 | } 57 | return lock 58 | } 59 | -------------------------------------------------------------------------------- /groove/proxy/utilities.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | ) 6 | 7 | func reverseSlice[T any](s []T) { 8 | // https://github.com/golang/go/wiki/SliceTricks#reversing 9 | // https://eli.thegreenplace.net/2021/generic-functions-on-slices-with-go-type-parameters/ 10 | for left, right := 0, len(s)-1; left < right; left, right = left+1, right-1 { 11 | s[left], s[right] = s[right], s[left] 12 | } 13 | } 14 | 15 | func filterSlice[T any](s []T, f func(T) bool) []T { 16 | filtered := make([]T, 0) 17 | 18 | for _, value := range s { 19 | if f(value) { 20 | filtered = append(filtered, value) 21 | } 22 | } 23 | 24 | return filtered 25 | } 26 | 27 | func contains[T comparable](s []T, e T) bool { 28 | for _, v := range s { 29 | if v == e { 30 | return true 31 | } 32 | } 33 | return false 34 | } 35 | 36 | func getRedirectHistory(response *http.Response) ([]*http.Request, []*http.Response) { 37 | // The eventually resolved response payload carries alongside all of the request 38 | // history - this function reassembles it 39 | requestHistory := make([]*http.Request, 0) 40 | responseHistory := make([]*http.Response, 0) 41 | 42 | for response != nil { 43 | request := response.Request 44 | requestHistory = append(requestHistory, request) 45 | responseHistory = append(responseHistory, response) 46 | response = request.Response 47 | } 48 | 49 | // The response order is actually reversed from what we expect 50 | // The last request that eventually made the response comes first in the slice 51 | reverseSlice(requestHistory) 52 | reverseSlice(responseHistory) 53 | 54 | return requestHistory, responseHistory 55 | } 56 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tape.py: -------------------------------------------------------------------------------- 1 | from base64 import b64decode, b64encode 2 | from gzip import compress, decompress 3 | from json import dumps, loads 4 | 5 | from pydantic import validator 6 | 7 | from groove.models import GrooveModelBase 8 | 9 | 10 | class TapeRequest(GrooveModelBase): 11 | url: str 12 | method: str 13 | headers: dict[str, list[str]] 14 | body: bytes 15 | 16 | @validator("body") 17 | def validate_body(cls, value): 18 | return b64decode(value) 19 | 20 | 21 | class TapeResponse(GrooveModelBase): 22 | status: int 23 | headers: dict[str, list[str]] 24 | body: bytes 25 | 26 | @validator("body") 27 | def validate_body(cls, value): 28 | return b64decode(value) 29 | 30 | 31 | class TapeRecord(GrooveModelBase): 32 | request: TapeRequest 33 | response: TapeResponse 34 | 35 | class Config: 36 | json_encoders = { 37 | # Assume that body bytes should always be encoded as base64 strings 38 | bytes: lambda value: b64encode(value).decode(), 39 | } 40 | 41 | 42 | class TapeSession(GrooveModelBase): 43 | records: list[TapeRecord] 44 | 45 | @classmethod 46 | def from_server(cls, data: bytes): 47 | raw_records = loads(decompress(data)) 48 | return cls(records=raw_records) 49 | 50 | def to_server(self) -> bytes: 51 | return compress( 52 | dumps([ 53 | # json_encoders doesn't operate on list items, must iterate manually 54 | # https://github.com/pydantic/pydantic/issues/4085 55 | loads(record.json()) 56 | for record in self.records 57 | ]).encode() 58 | ) -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/tests/test_speed.py: -------------------------------------------------------------------------------- 1 | from tempfile import TemporaryDirectory 2 | 3 | import pytest 4 | 5 | from proxy_benchmarks.cli.speed import execute_raw 6 | from proxy_benchmarks.enums import MimicTypeEnum 7 | from proxy_benchmarks.proxies.gomitmproxy import GoMitmProxy 8 | from proxy_benchmarks.proxies.goproxy import GoProxy 9 | from proxy_benchmarks.proxies.martian import MartianProxy 10 | from proxy_benchmarks.proxies.mitmproxy import MitmProxy 11 | from proxy_benchmarks.proxies.node_http_proxy import NodeHttpProxy 12 | from pathlib import Path 13 | 14 | 15 | @pytest.mark.speed 16 | @pytest.mark.parametrize( 17 | "proxy", 18 | [ 19 | GoProxy(MimicTypeEnum.STANDARD), 20 | GoProxy(MimicTypeEnum.MIMIC), 21 | GoMitmProxy(MimicTypeEnum.STANDARD), 22 | MartianProxy(), 23 | MitmProxy(), 24 | NodeHttpProxy(), 25 | ], 26 | ) 27 | def test_speed_simple(cli_object, proxy): 28 | with TemporaryDirectory() as directory: 29 | directory = Path(directory) 30 | 31 | execute_raw( 32 | cli_object, 33 | data_path=directory, 34 | samples=5, 35 | proxies=[proxy], 36 | ) 37 | 38 | @pytest.mark.speed 39 | @pytest.mark.xfail(reason="crash because of http/2 protocol") 40 | @pytest.mark.parametrize( 41 | "proxy", 42 | [ 43 | GoMitmProxy(MimicTypeEnum.MIMIC), 44 | ] 45 | ) 46 | def test_speed_simple_broken(cli_object, proxy): 47 | with TemporaryDirectory() as directory: 48 | directory = Path(directory) 49 | 50 | execute_raw( 51 | cli_object, 52 | data_path=directory, 53 | samples=5, 54 | proxies=[proxy], 55 | ) 56 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "net/http" 9 | "os" 10 | "os/signal" 11 | "strconv" 12 | "time" 13 | 14 | "github.com/google/uuid" 15 | ) 16 | 17 | func main() { 18 | var ( 19 | port = flag.Int("port", 3010, "http port to listen on") 20 | tlsPort = flag.Int("tls-port", 3011, "tls port to listen on") 21 | ) 22 | flag.Parse() 23 | 24 | http.HandleFunc("/handle", func(w http.ResponseWriter, r *http.Request) { 25 | id := uuid.New() 26 | 27 | time.Sleep(1 * time.Second) 28 | 29 | w.WriteHeader(http.StatusOK) 30 | w.Write([]byte("Request handled.\nValue:" + id.String())) 31 | }) 32 | 33 | fmt.Printf("Will launch speed test server on port %d and tls on %d\n", *port, *tlsPort) 34 | 35 | go func() { 36 | err := http.ListenAndServe(":"+strconv.Itoa(*port), nil) 37 | if err != nil { 38 | log.Fatal(err) 39 | } 40 | }() 41 | 42 | go func() { 43 | // generate a `Certificate` struct 44 | cert, _ := tls.LoadX509KeyPair("ssl/cert.crt", "ssl/cert.key") 45 | 46 | // create a custom server with `TLSConfig` 47 | s := &http.Server{ 48 | Addr: ":" + strconv.Itoa(*tlsPort), 49 | Handler: nil, // use `http.DefaultServeMux` 50 | TLSConfig: &tls.Config{ 51 | Certificates: []tls.Certificate{cert}, 52 | }, 53 | } 54 | 55 | //err := http.ListenAndServeTLS(":"+strconv.Itoa(tlsPort), "cert.crt", "cert.key", nil) 56 | err := s.ListenAndServeTLS("", "") 57 | if err != nil { 58 | log.Fatal(err) 59 | } 60 | }() 61 | 62 | sigc := make(chan os.Signal, 1) 63 | signal.Notify(sigc, os.Interrupt) 64 | 65 | <-sigc 66 | 67 | log.Println("speed test: shutting down") 68 | os.Exit(0) 69 | } 70 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from contextlib import contextmanager 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | from time import sleep 6 | 7 | from proxy_benchmarks.networking import is_socket_bound 8 | 9 | 10 | @dataclass 11 | class CertificateAuthority: 12 | public: Path 13 | key: Path 14 | 15 | 16 | class ProxyBase(ABC): 17 | def __init__(self, port): 18 | self.port = port 19 | 20 | @abstractmethod 21 | @contextmanager 22 | def launch(self): 23 | pass 24 | 25 | def wait_for_launch(self, timeout=20): 26 | # Wait for the socket to open 27 | while not is_socket_bound(self.port) and timeout > 0: 28 | print("Waiting for proxy port to open...") 29 | sleep(1) 30 | timeout -= 1 31 | if timeout == 0: 32 | raise TimeoutError("Timed out waiting for proxy to open") 33 | 34 | def wait_for_close(self, timeout=20): 35 | # Wait for the socket to open 36 | while is_socket_bound(self.port) and timeout > 0: 37 | print("Waiting for proxy port to close...") 38 | sleep(1) 39 | timeout -= 1 40 | if timeout == 0: 41 | raise TimeoutError("Timed out waiting for proxy to close") 42 | 43 | @property 44 | @abstractmethod 45 | def certificate_authority(self) -> CertificateAuthority: 46 | """ 47 | Root CA that's used to generate the different client hosts. 48 | """ 49 | pass 50 | 51 | @property 52 | @abstractmethod 53 | def short_name(self) -> str: 54 | pass 55 | 56 | @abstractmethod 57 | def __repr__(self) -> str: 58 | pass 59 | -------------------------------------------------------------------------------- /groove/proxy/optimized_storage.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/ac903b0f516b4b07599ab573d837cbaccb26feba/examples/goproxy-certstorage/optimized_storage.go 2 | // Has per-host locks to avoid situation, when multiple concurrent requests to a host without ready to use certificate will 3 | // generate the same certificate multiple times. 4 | package main 5 | 6 | import ( 7 | "crypto/tls" 8 | "fmt" 9 | "sync" 10 | ) 11 | 12 | type OptimizedCertStore struct { 13 | certs map[string]*tls.Certificate 14 | locks map[string]*sync.Mutex 15 | certLock *sync.RWMutex 16 | 17 | sync.Mutex 18 | } 19 | 20 | func NewOptimizedCertStore() *OptimizedCertStore { 21 | return &OptimizedCertStore{ 22 | certs: map[string]*tls.Certificate{}, 23 | locks: map[string]*sync.Mutex{}, 24 | certLock: &sync.RWMutex{}, 25 | } 26 | } 27 | 28 | func (s *OptimizedCertStore) Fetch(host string, genCert func() (*tls.Certificate, error)) (*tls.Certificate, error) { 29 | fmt.Printf("Fetching certificate for %s\n", host) 30 | 31 | hostLock := s.hostLock(host) 32 | hostLock.Lock() 33 | defer hostLock.Unlock() 34 | 35 | s.certLock.RLock() 36 | cert, ok := s.certs[host] 37 | s.certLock.RUnlock() 38 | var err error 39 | if !ok { 40 | fmt.Printf("cache miss: %s\n", host) 41 | 42 | cert, err = genCert() 43 | if err != nil { 44 | return nil, err 45 | } 46 | s.certLock.Lock() 47 | s.certs[host] = cert 48 | s.certLock.Unlock() 49 | } else { 50 | fmt.Printf("cache hit: %s\n", host) 51 | } 52 | return cert, nil 53 | } 54 | 55 | func (s *OptimizedCertStore) hostLock(host string) *sync.Mutex { 56 | // Only one host lock should be generated at one time 57 | s.Lock() 58 | defer s.Unlock() 59 | 60 | lock, ok := s.locks[host] 61 | if !ok { 62 | lock = &sync.Mutex{} 63 | s.locks[host] = lock 64 | } 65 | return lock 66 | } 67 | -------------------------------------------------------------------------------- /groove/proxy/cache_key.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/base64" 5 | "net/http" 6 | "net/url" 7 | "sort" 8 | ) 9 | 10 | type FlatQuery struct { 11 | // By default url.Values() is a map of string to slice of strings 12 | // This isn't compatible with sorting so we need to flatten them 13 | key string 14 | value string 15 | } 16 | 17 | type QueryArray []FlatQuery 18 | 19 | func (s QueryArray) Len() int { 20 | return len(s) 21 | } 22 | func (s QueryArray) Swap(i, j int) { 23 | s[i], s[j] = s[j], s[i] 24 | } 25 | func (s QueryArray) Less(i, j int) bool { 26 | queryPairI := s[i] 27 | queryPairJ := s[j] 28 | 29 | if queryPairI.key < queryPairJ.key { 30 | return true 31 | } else if queryPairI.key < queryPairJ.key { 32 | return false 33 | } 34 | 35 | return queryPairI.value < queryPairJ.value 36 | } 37 | 38 | func newQueryArray(values url.Values) QueryArray { 39 | /* 40 | * Convert a url.Values into a QueryArray 41 | */ 42 | var queryArray QueryArray 43 | for key, keyValues := range values { 44 | for _, value := range keyValues { 45 | queryArray = append(queryArray, FlatQuery{key, value}) 46 | } 47 | } 48 | return queryArray 49 | } 50 | 51 | func getCacheKey(request *http.Request) string { 52 | /* 53 | * Generates a key based upon a request 54 | * TODO: Add heuristics for stripping a URL of parameters that are lightly to change 55 | * - Host 56 | * - Path 57 | * - Method 58 | */ 59 | urlBase := request.URL.Hostname() + request.URL.Path 60 | method := request.Method 61 | 62 | // Sort arguments to align them across cache requests with same parameters 63 | queryArray := newQueryArray(request.URL.Query()) 64 | sort.Sort(queryArray) 65 | 66 | str := method + "-" + urlBase + "-" 67 | for _, queryPair := range queryArray { 68 | str += queryPair.key + "=" + queryPair.value + "&" 69 | } 70 | 71 | return base64.StdEncoding.EncodeToString([]byte(str)) 72 | } 73 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/speed-test/server/setup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | go install 5 | 6 | # Create a custom openssl config that sets the subject of the certificate to localhost 7 | if [ "$(uname)" == "Darwin" ]; then 8 | # Mac OS X platform 9 | FILE=/etc/ssl/openssl.cnf 10 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 11 | # GNU/Linux 12 | FILE=/usr/lib/ssl/openssl.cnf 13 | fi 14 | 15 | if [ -f "$FILE" ]; then 16 | echo "$FILE exists." 17 | else 18 | echo "$FILE does not exist." 19 | exit 1 20 | fi 21 | 22 | cat $FILE > openssl_config.conf 23 | 24 | # Use printf instead of echo since linux doesn't render \n properly 25 | printf '\n[SAN]\nsubjectAltName=DNS:localhost,IP:127.0.0.1,IP:127.0.0.2' >> openssl_config.conf 26 | 27 | cat openssl_config.conf 28 | 29 | # https://serverfault.com/questions/880804/can-not-get-rid-of-neterr-cert-common-name-invalid-error-in-chrome-with-self 30 | openssl genrsa -out cert.key 2048 31 | openssl req \ 32 | -newkey rsa:2048 \ 33 | -x509 \ 34 | -nodes \ 35 | -keyout cert.key \ 36 | -new \ 37 | -out cert.crt \ 38 | -subj /CN=SpeedTestServer \ 39 | -reqexts SAN \ 40 | -extensions SAN \ 41 | -config openssl_config.conf \ 42 | -sha256 \ 43 | -days 3650 44 | 45 | if [ "$(uname)" == "Darwin" ]; then 46 | # Mac OS X platform 47 | sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ./cert.crt 48 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then 49 | # GNU/Linux 50 | cp ./cert.crt /usr/local/share/ca-certificates/speed-test-server.crt 51 | sudo update-ca-certificates 52 | #certutil -A -n "speed-test-server" -d ~/.pki/nssdb -t C,, -a -i /usr/local/share/ca-certificates/speed-test-server.crt 53 | certutil -d sql:$HOME/.pki/nssdb -A -t "C,," -n "speed-test-server" -i /usr/local/share/ca-certificates/speed-test-server.crt 54 | fi 55 | 56 | rm openssl_config.conf 57 | 58 | mkdir -p ssl 59 | cp cert.key ssl/cert.key 60 | cp cert.crt ssl/cert.crt 61 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/gomitmproxy.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from subprocess import Popen 3 | from time import sleep 4 | 5 | from proxy_benchmarks.assets import get_asset_path 6 | from proxy_benchmarks.enums import MimicTypeEnum 7 | from proxy_benchmarks.process import terminate_all 8 | from proxy_benchmarks.proxies.base import CertificateAuthority, ProxyBase 9 | 10 | 11 | proxy_configurations = { 12 | MimicTypeEnum.STANDARD: dict( 13 | project_path="gomitmproxy", 14 | port=6010, 15 | ), 16 | MimicTypeEnum.MIMIC: dict( 17 | project_path="gomitmproxy-mimic", 18 | port=6011, 19 | ) 20 | } 21 | 22 | class GoMitmProxy(ProxyBase): 23 | def __init__(self, proxy_type: MimicTypeEnum): 24 | configuration = proxy_configurations[proxy_type] 25 | 26 | super().__init__(port=configuration["port"]) 27 | self.project_path = configuration["project_path"] 28 | 29 | @contextmanager 30 | def launch(self): 31 | current_extension_path = get_asset_path(f"proxies/{self.project_path}") 32 | process = Popen(["go", "run", ".", "--port", str(self.port)], cwd=current_extension_path) 33 | 34 | self.wait_for_launch() 35 | # Requires a bit more time to load than our other proxies 36 | sleep(2) 37 | 38 | try: 39 | yield process 40 | finally: 41 | terminate_all(process) 42 | 43 | # Wait for the socket to close 44 | self.wait_for_close(60) 45 | 46 | @property 47 | def certificate_authority(self) -> CertificateAuthority: 48 | return CertificateAuthority( 49 | public=get_asset_path(f"proxies/{self.project_path}/ssl/ca.crt"), 50 | key=get_asset_path(f"proxies/{self.project_path}/ssl/ca.key"), 51 | ) 52 | 53 | @property 54 | def short_name(self) -> str: 55 | return self.project_path 56 | 57 | def __repr__(self) -> str: 58 | return f"GoMitmProxy(port={self.port},version={self.project_path})" 59 | -------------------------------------------------------------------------------- /proxy-benchmarks/benchmark.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DOCKER "1" 4 | ENV PYTHONUNBUFFERED "1" 5 | ENV NODE_VERSION v16.14.0 6 | ENV NVM_DIR /usr/local/nvm 7 | 8 | RUN apt-get -y update \ 9 | && apt-get -y install python3 python3.10-venv curl gcc python3-dev sudo ca-certificates tcpdump golang-go git lsof software-properties-common iproute2 iptables 10 | 11 | RUN sudo add-apt-repository -y ppa:wireshark-dev/stable \ 12 | && sudo apt-get -y update \ 13 | && echo "wireshark-common wireshark-common/install-setuid boolean true" | sudo debconf-set-selections \ 14 | && sudo DEBIAN_FRONTEND=noninteractive apt-get -y install tshark 15 | 16 | # Install node. We need to source the nvm executable via `. nvm.sh` to allow the script to work inside sh, which 17 | # is the default shell during docker build 18 | RUN mkdir -p $NVM_DIR \ 19 | && curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \ 20 | && . $NVM_DIR/nvm.sh \ 21 | && nvm install $NODE_VERSION 22 | 23 | RUN curl -sSL https://install.python-poetry.org | python3 - 24 | 25 | # Required to add poetry and node executables to the path 26 | ENV PATH="/root/.local/bin:/$NVM_DIR/versions/node/$NODE_VERSION/bin:$PATH" 27 | 28 | WORKDIR /app 29 | 30 | # Install benchmarking dependencies 31 | ADD poetry.lock poetry.lock 32 | ADD pyproject.toml pyproject.toml 33 | RUN poetry install --no-root 34 | 35 | ADD . /app 36 | ADD ./benchmark_entrypoint.sh /app/benchmark_entrypoint.sh 37 | 38 | # Mount the scripts, don't perform any additional installation 39 | RUN poetry install --no-interaction 40 | 41 | # Install the certificate management tools that Chromium uses on Linux 42 | # This is required to add our custom certificates 43 | # https://chromium.googlesource.com/chromium/src/+/master/docs/linux/cert_management.md 44 | RUN apt-get install -y libnss3-tools 45 | RUN mkdir -p $HOME/.pki/nssdb 46 | 47 | # Install the dependent packages and root certificates 48 | RUN ./setup.sh 49 | 50 | RUN poetry run playwright install-deps chromium 51 | RUN poetry run playwright install chromium 52 | 53 | ENTRYPOINT [ "/app/benchmark_entrypoint.sh" ] 54 | -------------------------------------------------------------------------------- /groove/groove-node/src/tape.ts: -------------------------------------------------------------------------------- 1 | import { gunzipSync, gzipSync } from "zlib"; 2 | 3 | 4 | export interface TapeRequest { 5 | url: string 6 | method: string 7 | headers: Record 8 | body: Buffer 9 | } 10 | 11 | export interface TapeResponse { 12 | status: number 13 | headers: Record 14 | body: Buffer 15 | } 16 | 17 | export interface TapeRecord { 18 | request: TapeRequest 19 | response: TapeResponse 20 | } 21 | 22 | export class TapeSession { 23 | records: TapeRecord[]; 24 | 25 | constructor(records?: TapeRecord[]) { 26 | this.records = records || []; 27 | } 28 | 29 | async readFromServer(contents: Buffer) { 30 | // un-gzip and un-json the blob 31 | const uncompressed = gunzipSync(contents); 32 | const jsonPayload = JSON.parse(uncompressed.toString()); 33 | 34 | // Un-base64 the bodies 35 | this.records = jsonPayload.map((record: any) => { 36 | return { 37 | ...record, 38 | request: { 39 | ...record.request, 40 | body: Buffer.from(record.request.body, 'base64'), 41 | }, 42 | response: { 43 | ...record.response, 44 | body: Buffer.from(record.response.body, 'base64'), 45 | } 46 | } 47 | }); 48 | } 49 | 50 | toServer() : Buffer { 51 | // Base64 the bodies 52 | const jsonPayload = this.records.map((record) => { 53 | return { 54 | ...record, 55 | request: { 56 | ...record.request, 57 | body: record.request.body.toString('base64'), 58 | }, 59 | response: { 60 | ...record.response, 61 | body: record.response.body.toString('base64'), 62 | } 63 | } 64 | }); 65 | 66 | // Json and gzip the blob 67 | const compressed = gzipSync(JSON.stringify(jsonPayload)); 68 | return compressed; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/tests/test_load.py: -------------------------------------------------------------------------------- 1 | from tempfile import TemporaryDirectory 2 | 3 | import pytest 4 | from pathlib import Path 5 | 6 | from proxy_benchmarks.cli.load import execute_raw, analyze_raw 7 | from proxy_benchmarks.enums import MimicTypeEnum 8 | from proxy_benchmarks.proxies.gomitmproxy import GoMitmProxy 9 | from proxy_benchmarks.proxies.goproxy import GoProxy 10 | from proxy_benchmarks.proxies.martian import MartianProxy 11 | from proxy_benchmarks.proxies.mitmproxy import MitmProxy 12 | from proxy_benchmarks.proxies.node_http_proxy import NodeHttpProxy 13 | 14 | 15 | @pytest.mark.load 16 | @pytest.mark.parametrize( 17 | "proxy", 18 | [ 19 | GoProxy(MimicTypeEnum.STANDARD), 20 | GoProxy(MimicTypeEnum.MIMIC), 21 | GoMitmProxy(MimicTypeEnum.STANDARD), 22 | GoMitmProxy(MimicTypeEnum.MIMIC), 23 | MartianProxy(), 24 | MitmProxy(), 25 | NodeHttpProxy(), 26 | ], 27 | ) 28 | def test_load_simple(cli_object, proxy): 29 | with TemporaryDirectory() as directory: 30 | directory = Path(directory) 31 | 32 | # Execute the trials 33 | execute_raw( 34 | cli_object, 35 | output_path=directory, 36 | runtime_seconds=5, 37 | proxies=[None, proxy], 38 | ) 39 | 40 | # Now analyze 41 | df = analyze_raw(directory, [None, proxy]) 42 | 43 | # Ensure 44 | baseline_http_failure = df[(df.proxy == "baseline") & (df.protocol == "http")]["Failure Count"].iloc[0] 45 | baseline_https_failure = df[(df.proxy == "baseline") & (df.protocol == "https")]["Failure Count"].iloc[0] 46 | proxy_http_failure = df[(df.proxy == proxy.short_name) & (df.protocol == "http")]["Failure Count"].iloc[0] 47 | proxy_https_failure = df[(df.proxy == proxy.short_name) & (df.protocol == "https")]["Failure Count"].iloc[0] 48 | 49 | # Under this shallow load we shouldn't see any errors 50 | assert baseline_http_failure == "0" 51 | assert baseline_https_failure == "0" 52 | assert proxy_http_failure == "0" 53 | assert proxy_https_failure == "0" 54 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/node_http_proxy.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from subprocess import Popen 3 | from time import sleep 4 | 5 | from proxy_benchmarks.assets import get_asset_path 6 | from proxy_benchmarks.process import terminate_all 7 | from proxy_benchmarks.proxies.base import CertificateAuthority, ProxyBase 8 | 9 | 10 | class NodeHttpProxy(ProxyBase): 11 | def __init__(self): 12 | super().__init__(port=6016) 13 | 14 | @contextmanager 15 | def launch(self): 16 | current_extension_path = get_asset_path("proxies/node_http_proxy") 17 | # We need to launch with node and not npm, otherwise it won't receive the shutdown signal 18 | # and shutdown will time out 19 | process = Popen(["node", "index.js", "--port", str(self.port)], cwd=current_extension_path) 20 | 21 | self.wait_for_launch() 22 | sleep(1) 23 | 24 | try: 25 | yield process 26 | finally: 27 | terminate_all(process) 28 | 29 | # Delete the content on disk 30 | certificates_path = get_asset_path("proxies/node_http_proxy/.http-mitm-proxy/certs") 31 | keys_path = get_asset_path("proxies/node_http_proxy/.http-mitm-proxy/keys") 32 | filename_whitelist = {"ca.private.key", "ca.public.key", "ca.pem"} 33 | 34 | for root_path in [certificates_path, keys_path]: 35 | for path in root_path.iterdir(): 36 | if path.name not in filename_whitelist: 37 | print(f"Will remove: {path}") 38 | path.unlink() 39 | 40 | self.wait_for_close() 41 | 42 | @property 43 | def certificate_authority(self) -> CertificateAuthority: 44 | return CertificateAuthority( 45 | public=get_asset_path("proxies/node_http_proxy/.http-mitm-proxy/certs/ca.pem"), 46 | key=get_asset_path("proxies/node_http_proxy/.http-mitm-proxy/keys/ca.private.key"), 47 | ) 48 | 49 | @property 50 | def short_name(self) -> str: 51 | return "node_http_proxy" 52 | 53 | def __repr__(self) -> str: 54 | return f"NodeHttpProxy(port={self.port})" 55 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/goproxy.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from subprocess import Popen 3 | from time import sleep 4 | 5 | from proxy_benchmarks.assets import get_asset_path 6 | from proxy_benchmarks.enums import MimicTypeEnum 7 | from proxy_benchmarks.process import terminate_all 8 | from proxy_benchmarks.proxies.base import CertificateAuthority, ProxyBase 9 | 10 | 11 | proxy_configurations = { 12 | MimicTypeEnum.STANDARD: dict( 13 | project_path="goproxy", 14 | port=6012, 15 | ), 16 | MimicTypeEnum.MIMIC: dict( 17 | project_path="goproxy-mimic", 18 | port=6013, 19 | ) 20 | } 21 | 22 | class GoProxy(ProxyBase): 23 | def __init__(self, proxy_type: MimicTypeEnum, verbose: bool = True): 24 | configuration = proxy_configurations[proxy_type] 25 | 26 | super().__init__(port=configuration["port"]) 27 | self.project_path = configuration["project_path"] 28 | self.verbose = verbose 29 | 30 | @contextmanager 31 | def launch(self): 32 | current_extension_path = get_asset_path(f"proxies/{self.project_path}") 33 | # Disable verbose logging 34 | verbose = "false" if self.verbose else "true" 35 | process = Popen(["go", "run", ".", "--port", str(self.port), f"-v={verbose}"], cwd=current_extension_path) 36 | 37 | # Wait for the proxy to spin up 38 | self.wait_for_launch() 39 | 40 | # Requires a bit more time to load than our other proxies 41 | sleep(2) 42 | 43 | try: 44 | yield process 45 | finally: 46 | terminate_all(process) 47 | 48 | # Wait for the socket to close 49 | self.wait_for_close(60) 50 | 51 | @property 52 | def certificate_authority(self) -> CertificateAuthority: 53 | return CertificateAuthority( 54 | public=get_asset_path(f"proxies/{self.project_path}/ssl/ca.crt"), 55 | key=get_asset_path(f"proxies/{self.project_path}/ssl/ca.key"), 56 | ) 57 | 58 | @property 59 | def short_name(self) -> str: 60 | return self.project_path 61 | 62 | def __repr__(self) -> str: 63 | return f"GoProxy(port={self.port},version={self.project_path})" 64 | -------------------------------------------------------------------------------- /groove/groove.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DOCKER "1" 4 | ENV PYTHONUNBUFFERED "1" 5 | ENV NODE_VERSION v16.14.0 6 | ENV NVM_DIR /usr/local/nvm 7 | 8 | RUN apt-get -y update \ 9 | && apt-get -y install python3 python3.10-venv curl gcc python3-dev sudo ca-certificates golang-go git 10 | 11 | # Install node. We need to source the nvm executable via `. nvm.sh` to allow the script to work inside sh, which 12 | # is the default shell during docker build 13 | RUN mkdir -p $NVM_DIR \ 14 | && curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \ 15 | && . $NVM_DIR/nvm.sh \ 16 | && nvm install $NODE_VERSION 17 | 18 | RUN curl -sSL https://install.python-poetry.org | python3 - 19 | 20 | # Required to add poetry and node executables to the path 21 | ENV PATH="/root/.local/bin:/$NVM_DIR/versions/node/$NODE_VERSION/bin:$PATH" 22 | 23 | WORKDIR /app 24 | 25 | # Install python dependencies 26 | ADD groove-python/poetry.lock groove-python/poetry.lock 27 | ADD groove-python/pyproject.toml groove-python/pyproject.toml 28 | RUN cd groove-python && poetry install --no-root 29 | 30 | ADD . /app 31 | ADD ./groove_entrypoint.sh /app/benchmark_entrypoint.sh 32 | 33 | # Mount the scripts, don't perform any additional installation 34 | RUN cd groove-python && poetry install --no-interaction 35 | 36 | # This has to be done after application files are moved over, because it relies on the tsconfig 37 | # and other raw file paths. Ideally npm dependencies would be installed alongside the python raw 38 | # ones before code is moved over 39 | RUN cd groove-node && npm install && npm run build 40 | 41 | # Install the certificate management tools that Chromium uses on Linux 42 | # This is required to add our custom certificates 43 | # https://chromium.googlesource.com/chromium/src/+/master/docs/linux/cert_management.md 44 | RUN apt-get install -y libnss3-tools 45 | RUN mkdir -p $HOME/.pki/nssdb 46 | 47 | # Install the dependent packages 48 | RUN ./setup.sh 49 | RUN ./build.sh 50 | 51 | # Install the root certificates 52 | RUN cd proxy && go run . install-ca 53 | 54 | RUN cd groove-python && poetry run playwright install-deps chromium 55 | RUN cd groove-python && poetry run playwright install chromium 56 | 57 | ENTRYPOINT [ "/app/groove_entrypoint.sh" ] 58 | -------------------------------------------------------------------------------- /groove/groove-node/README.md: -------------------------------------------------------------------------------- 1 | # Groove 2 | 3 | Node APIs for Groove, a proxy server built for web crawling and unit test mocking. Highlights of its primary features: 4 | 5 | - HTTP and HTTPs support over HTTP/1 and HTTP/2. 6 | - Local CA certificate generation and installation on Mac and Linux to support system curl and Chromium. 7 | - Different tiers of caching support - from disabling completely to aggressively maintaining all body archives. 8 | - Limit outbound requests of the same URL to 1 concurrent request to save on bandwidth if requests are already inflight. 9 | - Record and replay requests made to outgoing servers. Recreate testing flows in unit tests while separating them from crawling business logic. 10 | - 3rd party proxy support for commercial proxies. 11 | - Custom TLS Hello Client support to maintain a Chromium-like TLS handshake while intercepting requests and re-forwarding on packets. 12 | 13 | For more information, see the [Github](https://github.com/piercefreeman/grooveproxy) project. 14 | 15 | ## Usage 16 | 17 | Add groove to your project and generate the local certificates. 18 | 19 | ``` 20 | npm install @piercefreeman/groove 21 | npx @piercefreeman/groove install-ca 22 | ``` 23 | 24 | ```javascript 25 | import { Grove, TapeSession, fetchWithProxy } from '@piercefreeman/groove' 26 | 27 | const main = async () => { 28 | const proxy = new Groove( 29 | commandTimeout?; 30 | port?; 31 | controlPort?; 32 | proxyServer?; 33 | proxyUsername?; 34 | proxyPassword?:; 35 | ) 36 | await proxy.launch() 37 | 38 | const mockedSession = new TapeSession( 39 | [ 40 | { 41 | request: { 42 | url: "https://example.com:443/", 43 | method: "GET", 44 | headers: {}, 45 | body: Buffer.from(""), 46 | }, 47 | response: { 48 | status: 200, 49 | headers: {}, 50 | body: Buffer.from("Test response") 51 | } 52 | } 53 | ] 54 | ) 55 | 56 | await proxy.tapeLoad(mockedSession); 57 | 58 | const response = await fetchWithProxy("https://example.com", proxy); 59 | console.log(response) // "Test response" 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /groove/groove-python/benchmark_end_proxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Benchmark the load performance of a 3rd party proxy provider 3 | 4 | """ 5 | from time import time 6 | 7 | from click import command, option, secho 8 | from groove.enums import CacheModeEnum 9 | from playwright.sync_api import sync_playwright 10 | 11 | from groove.dialer import (DefaultLocalPassthroughDialer, DialerDefinition, 12 | ProxyDefinition) 13 | from groove.proxy import Groove 14 | 15 | 16 | def handle(route, request): 17 | resource_type = request.resource_type 18 | 19 | # override headers 20 | headers = { 21 | **request.headers, 22 | "Resource-Type": resource_type, 23 | } 24 | route.continue_(headers=headers) 25 | 26 | 27 | @command() 28 | @option("--url", required=True) 29 | @option("--proxy-server", required=True) 30 | @option("--proxy-username", required=True) 31 | @option("--proxy-password", required=True) 32 | def benchmark(url, proxy_server, proxy_username, proxy_password): 33 | groove = Groove(port=6040, control_port=6041) 34 | 35 | with groove.launch(): 36 | with sync_playwright() as p: 37 | groove.set_cache_mode(CacheModeEnum.OFF) 38 | groove.dialer_load( 39 | [ 40 | DefaultLocalPassthroughDialer(), 41 | DialerDefinition( 42 | priority=DefaultLocalPassthroughDialer().priority - 1, 43 | proxy=ProxyDefinition( 44 | url=proxy_server, 45 | username=proxy_username, 46 | password=proxy_password, 47 | ), 48 | ) 49 | ] 50 | ) 51 | 52 | browser = p.chromium.launch( 53 | headless=False, 54 | ) 55 | 56 | context = browser.new_context( 57 | proxy={ 58 | "server": groove.base_url_proxy, 59 | } 60 | ) 61 | 62 | page = context.new_page() 63 | 64 | page.route("**/*", handle) 65 | 66 | start = time() 67 | page.goto(url, timeout=60000) 68 | end = time() 69 | print(f"Time taken: {end - start}") 70 | 71 | browser.close() 72 | 73 | if __name__ == '__main__': 74 | benchmark() 75 | -------------------------------------------------------------------------------- /proxy-benchmarks/results_certificate_speed.csv: -------------------------------------------------------------------------------- 1 | ,proxy,cold_start,cold_start,cold_start,cold_start,cold_start,cold_start,cold_start,cold_start,warm_start,warm_start,warm_start,warm_start,warm_start,warm_start,warm_start,warm_start,difference,difference,difference,difference,difference,difference,difference,difference 2 | ,,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max 3 | 0,gomitmproxy,100.0,1.0496425485610958,0.00879036595403604,1.0289888381958,1.044404983520507,1.0503998994827266,1.0550725460052486,1.083940029144287,100.0,1.0207015419006344,0.0054636224502961055,1.011942148208618,1.0173577070236202,1.020434498786926,1.0240057706832881,1.041516065597534,100.0,0.028941006660461364,0.008554006896741738,0.007764816284180132,0.022738754749297985,0.030244946479797474,0.0351329445838931,0.05309605598449707 4 | 1,goproxy,100.0,1.215417215824127,0.09625686371339456,1.109206914901733,1.1294960379600523,1.140260577201843,1.3195731043815608,1.333560228347778,100.0,1.0201748442649838,0.003752586200017458,1.011085033416748,1.0173680186271663,1.021618008613586,1.0230638980865474,1.026111125946045,100.0,0.195242371559143,0.096283889886155,0.09007072448730491,0.10887652635574324,0.11922502517700195,0.299478471279144,0.31599688529968306 5 | 2,martian,100.0,1.0522930335998533,0.01513513339338744,1.029561042785644,1.0440703630447388,1.0526983737945554,1.0587595701217645,1.166044950485229,100.0,1.0196023631095883,0.008804890046724134,0.9448549747467041,1.0169251561164856,1.0205835103988645,1.0242233872413635,1.029875040054321,100.0,0.03269067049026481,0.01673337243365261,0.009275913238525169,0.024302005767822488,0.031364917755127064,0.038413822650909424,0.15472888946533203 6 | 3,mitmproxy,100.0,1.0964409613609312,0.012799810091654706,1.073225021362304,1.0915142893791192,1.0974068641662595,1.0991045236587522,1.153535842895507,100.0,1.0336298251152036,0.019191992628070878,1.017199039459228,1.0291442871093746,1.032397031784057,1.034441471099853,1.211155891418457,100.0,0.06281113624572758,0.019788189050755366,-0.10944890975952193,0.06084465980529735,0.0649429559707645,0.06749248504638655,0.11164188385009699 7 | 4,node_http_proxy,100.0,1.1461002588272091,0.03284950095399102,1.083567857742309,1.1246507167816158,1.1414339542388916,1.163746416568756,1.318521976470947,100.0,1.0241687130928037,0.00578730663361507,1.013131141662597,1.0193566679954524,1.025245070457458,1.0283074378967285,1.045683860778808,100.0,0.12193154573440551,0.0313346536082107,0.0654597282409668,0.10139590501785267,0.11684358119964555,0.13757568597793574,0.29004907608032204 8 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy/go.sum: -------------------------------------------------------------------------------- 1 | github.com/AdguardTeam/golibs v0.4.0 h1:4VX6LoOqFe9p9Gf55BeD8BvJD6M6RDYmgEiHrENE9KU= 2 | github.com/AdguardTeam/golibs v0.4.0/go.mod h1:skKsDKIBB7kkFflLJBpfGX+G8QFTx0WKUzB6TIgtUj4= 3 | github.com/AdguardTeam/gomitmproxy v0.2.1 h1:p9gr8Er1TYvf+7ic81Ax1sZ62UNCsMTZNbm7tC59S9o= 4 | github.com/AdguardTeam/gomitmproxy v0.2.1/go.mod h1:Qdv0Mktnzer5zpdpi5rAwixNJzW2FN91LjKJCkVbYGU= 5 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 10 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 11 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 12 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 13 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 14 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 17 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 18 | github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= 19 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= 20 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 21 | golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= 22 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 23 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 24 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 25 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 26 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 27 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 28 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 29 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/go.sum: -------------------------------------------------------------------------------- 1 | github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= 2 | github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= 3 | github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= 4 | github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= 5 | github.com/piercefreeman/goproxy v0.0.3 h1:6hAyj4plFuu3+lgt5u5ZozcXjPSJ0QwtxuhMkFJchsc= 6 | github.com/piercefreeman/goproxy v0.0.3/go.mod h1:l5HLvLDhJ/BRwyAg1MZI7X0wKuyrFqCX7flLI11zsqg= 7 | github.com/refraction-networking/utls v1.1.3 h1:K9opY+iKxcGvHOBG2019wFEVtsNFh0f5WqHyc2i3iU0= 8 | github.com/refraction-networking/utls v1.1.3/go.mod h1:+D89TUtA8+NKVFj1IXWr0p3tSdX1+SqUB7rL0QnGqyg= 9 | github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc= 10 | golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa h1:idItI2DDfCokpg0N51B2VtiLdJ4vAuXC9fnCb2gACo4= 11 | golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 12 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 13 | golang.org/x/net v0.0.0-20211111160137-58aab5ef257a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 14 | golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458 h1:MgJ6t2zo8v0tbmLCueaCbF1RM+TtB0rs3Lv8DGtOIpY= 15 | golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= 16 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 17 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 18 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 19 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg= 20 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 21 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 22 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 23 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 24 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= 25 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 26 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 27 | -------------------------------------------------------------------------------- /proxy-benchmarks/results_load_test.csv: -------------------------------------------------------------------------------- 1 | ,Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100%,proxy,protocol 2 | 0,GET,/handle,49500,0,1000.4473339649849,1006.3756309931952,1000.4473339649849,1090.5747080105357,59.0,831.2566455713992,0.0,1000,1000,1000,1000,1000,1000,1000,1000,1100,1100,1100,baseline,http 3 | 0,GET,/handle,49500,0,1000.6045830086805,1009.8150941664863,1000.6045830086805,1247.5687920232303,59.0,826.0450596737427,0.0,1000,1000,1000,1000,1000,1000,1100,1200,1200,1200,1200,baseline,https 4 | 0,GET,/handle,32879,0,1200.0,1500.5476697827937,1002.9480829834938,24037.6951249782,59.0,547.9979381488608,0.0,1200,1300,1300,1300,1300,1400,6800,12000,24000,24000,24000,mitmproxy,http 5 | 0,GET,/handle,35412,0,1400.0,1403.9605547386368,1006.1023749876767,3472.1471249940805,59.0,590.1054150496846,0.0,1400,1400,1500,1500,1500,1600,1700,1800,3500,3500,3500,mitmproxy,https 6 | 0,GET,/handle,15613,684,1100.0,3102.3485121692393,1002.2186249843799,27834.054457955062,56.415230897329145,260.2023815945621,11.399374176050758,1100,1100,1200,1300,7700,21000,26000,26000,27000,28000,28000,node_http_proxy,http 7 | 0,GET,/handle,11946,152,1200.0,3537.6253651541215,1006.0942910495214,41292.19812504016,58.24928846475808,199.0978639116583,2.5333061539069193,1200,1400,2100,3200,8200,21000,22000,26000,40000,41000,41000,node_http_proxy,https 8 | 0,GET,/handle,27512,2,1100.0,1741.6814580679684,1000.9505829657428,8930.587625014596,58.9957109624891,458.72150233351147,0.033347012382488474,1100,1400,1600,1900,3600,5200,6600,6900,8600,8900,8900,gomitmproxy,http 9 | 0,GET,/handle,20702,1359,1200.0,2119.0302954133613,297.47037502238527,34138.19887494901,55.126895952081924,345.04796979855536,22.650960822927097,1200,1300,1300,1500,2400,4000,24000,31000,34000,34000,34000,gomitmproxy,https 10 | 0,GET,/handle,25359,9,1000.6525840144604,1417.6377039526644,1000.6525840144604,9463.470916962251,58.97906068851295,487.681960584702,0.17308007592027752,1000,1100,1500,1800,2300,2900,4200,5600,8500,9300,9500,martian,http 11 | 0,GET,/handle,22742,685,1800.0,2033.0576934022638,120.986666996032,7519.9379999539815,57.22289156626506,380.8180183001155,11.470422237955287,1800,2000,2200,2400,3000,4100,5600,7000,7400,7500,7500,martian,https 12 | 0,GET,/handle,36753,0,1100.0,1340.76236831164,1001.1865830165334,3370.086500013713,59.0,615.3236016568449,0.0,1100,1300,1600,1700,1900,2100,2200,2300,3300,3400,3400,goproxy,http 13 | 0,GET,/handle,29438,1070,1600.0,1656.032863465346,142.88012497127056,8962.757707980927,56.855492900332905,489.9096807090814,17.807030313157046,1600,1800,2000,2100,2200,2300,2500,2600,2900,8900,9000,goproxy,https 14 | -------------------------------------------------------------------------------- /.github/workflows/test-benchmarks.yml: -------------------------------------------------------------------------------- 1 | name: Test benchmarks 2 | 3 | on: 4 | push: 5 | paths: 6 | - 'proxy-benchmarks/**/*' 7 | - '.github/workflows/*benchmarks.*' 8 | 9 | env: 10 | IMAGE: piercefreeman/proxy-benchmarks 11 | # Update the `run_benchmarking_tests` alongside these tags 12 | FINGERPRINT_TAG: "fingerprint" 13 | LOAD_TAG: "load" 14 | SPEED_TAG: "speed" 15 | SSL_TAG: "ssl" 16 | 17 | jobs: 18 | build: 19 | name: Build testing base 20 | runs-on: ubuntu-latest 21 | steps: 22 | - name: Check out the repo 23 | uses: actions/checkout@v3 24 | 25 | - name: Set up QEMU 26 | uses: docker/setup-qemu-action@v2 27 | 28 | - name: Set up Docker Buildx 29 | uses: docker/setup-buildx-action@v2 30 | 31 | - name: Login to Docker Hub 32 | uses: docker/login-action@v2 33 | with: 34 | username: ${{ secrets.DOCKERHUB_USERNAME }} 35 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 36 | 37 | - name: Build and push 38 | uses: docker/build-push-action@v3 39 | with: 40 | file: proxy-benchmarks/benchmark.Dockerfile 41 | context: proxy-benchmarks 42 | push: true 43 | tags: ${{ env.IMAGE }}:${{ github.sha }} 44 | 45 | run_benchmarking_tests: 46 | name: Run benchmarking tests 47 | runs-on: ubuntu-latest 48 | strategy: 49 | matrix: 50 | # Ideally these would be imported from the env variable, but matrix format 51 | # doesn't support variable filling 52 | tag: [fingerprint, load, speed, ssl] 53 | needs: build 54 | steps: 55 | - name: Login to Docker Hub 56 | uses: docker/login-action@v2 57 | with: 58 | username: ${{ secrets.DOCKERHUB_USERNAME }} 59 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 60 | 61 | - name: Pull image 62 | run: docker pull ${{ env.IMAGE }}:${{ github.sha }} 63 | 64 | - name: Run test 65 | run: docker run --cap-add NET_ADMIN ${{ env.IMAGE }}:${{ github.sha }} test -m ${{ matrix.tag }} 66 | 67 | run_remaining: 68 | name: Run remaining tests 69 | runs-on: ubuntu-latest 70 | needs: build 71 | steps: 72 | - name: Login to Docker Hub 73 | uses: docker/login-action@v2 74 | with: 75 | username: ${{ secrets.DOCKERHUB_USERNAME }} 76 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 77 | 78 | - name: Pull image 79 | run: docker pull ${{ env.IMAGE }}:${{ github.sha }} 80 | 81 | - name: Run test 82 | run: 83 | docker run --cap-add NET_ADMIN ${{ env.IMAGE }}:${{ github.sha }} test -m "not ${{ env.FINGERPRINT_TAG }} and not ${{ env.LOAD_TAG }} and not ${{ env.SPEED_TAG }} and not ${{ env.SSL_TAG }}" 84 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/proxies/mitmproxy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Basic skeleton of a mitmproxy addon. 3 | 4 | Run as follows: 5 | 6 | """ 7 | from contextlib import contextmanager 8 | from pathlib import Path 9 | from subprocess import Popen 10 | from time import sleep 11 | 12 | from mitmproxy import ctx, http 13 | 14 | from proxy_benchmarks.assets import get_asset_path 15 | from proxy_benchmarks.process import terminate_all 16 | from proxy_benchmarks.proxies.base import CertificateAuthority, ProxyBase 17 | 18 | 19 | class Counter: 20 | def __init__(self): 21 | self.num = 0 22 | 23 | def request(self, flow: http.HTTPFlow): 24 | print("FLOW", flow) 25 | self.num = self.num + 1 26 | ctx.log.info("We've seen %d flows" % self.num) 27 | 28 | # if flow.request.pretty_url == "https://example.com/path": 29 | # flow.response = http.Response.make( 30 | # 200, 31 | # b"Hello World", 32 | # {"Content-Type": "text/html"}, 33 | # ) 34 | 35 | def response(self, flow: http.HTTPFlow): 36 | # flow.response.content += b"\nInjected content" 37 | pass 38 | 39 | 40 | addons = [Counter()] 41 | 42 | 43 | class MitmProxy(ProxyBase): 44 | def __init__(self): 45 | super().__init__(port=6015) 46 | 47 | @contextmanager 48 | def launch(self): 49 | current_extension_path = Path(__file__).resolve() 50 | certificate_directory = get_asset_path("proxies/mitmproxy/ssl") 51 | 52 | process = Popen( 53 | # NOTE: Even though our local testing server validates in the system keychain, mitmdump appears to 54 | # do a separate validation and throws a 502 bad gateway error when using locally signed certificates. 55 | ["poetry", "run", "mitmdump", "-s", str(current_extension_path), "--listen-port", str(self.port), "--set", f"confdir={certificate_directory}", "--ssl-insecure"], 56 | ) 57 | 58 | self.wait_for_launch() 59 | sleep(1) 60 | 61 | try: 62 | yield process 63 | finally: 64 | terminate_all(process) 65 | 66 | # Remove certificates from launch so we can explicitly test new credential generation 67 | 68 | self.wait_for_close() 69 | 70 | @property 71 | def certificate_authority(self) -> CertificateAuthority: 72 | return CertificateAuthority( 73 | public=get_asset_path("proxies/mitmproxy/ssl/mitmproxy-ca.crt"), 74 | key=get_asset_path("proxies/mitmproxy/ssl/mitmproxy-ca.key"), 75 | ) 76 | 77 | @property 78 | def short_name(self) -> str: 79 | return "mitmproxy" 80 | 81 | def __repr__(self) -> str: 82 | return f"MitmProxy(port={self.port})" 83 | -------------------------------------------------------------------------------- /groove/proxy/archives.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "io/ioutil" 7 | "log" 8 | "net/http" 9 | ) 10 | 11 | type ArchivedRequest struct { 12 | Url string `json:"url"` 13 | Method string `json:"method"` 14 | Headers map[string][]string `json:"headers"` 15 | Body []byte `json:"body"` 16 | 17 | // Order that the request was issued; expected to be FIFO 18 | // Allows requests with the same parameters to return in the correct order 19 | //order int 20 | } 21 | 22 | type ArchivedResponse struct { 23 | // The response mirrors the request on the same URL. Redirects are logged separately 24 | // and will return a "Location" redirect prompt in the headers here. 25 | /// response metadata 26 | //redirected bool 27 | Status int `json:"status"` 28 | Headers map[string][]string `json:"headers"` 29 | Body []byte `json:"body"` 30 | } 31 | 32 | func requestToArchivedRequest(request *http.Request) *ArchivedRequest { 33 | requestBody, err := io.ReadAll(request.Body) 34 | 35 | if err != nil { 36 | log.Println("Unable to read request body stream.") 37 | return nil 38 | } 39 | 40 | // Allow other clients to consume these bodies again 41 | request.Body = ioutil.NopCloser(bytes.NewReader(requestBody)) 42 | 43 | return &ArchivedRequest{ 44 | // last url accessed - how do we get the first 45 | Url: request.URL.String(), 46 | Method: request.Method, 47 | Headers: request.Header, 48 | Body: requestBody, 49 | } 50 | } 51 | 52 | func responseToArchivedResponse(response *http.Response) *ArchivedResponse { 53 | responseBody, err := io.ReadAll(response.Body) 54 | 55 | if err != nil { 56 | log.Println("Unable to read response body stream.") 57 | return nil 58 | } 59 | 60 | // Allow other clients to consume these bodies again 61 | response.Body = ioutil.NopCloser(bytes.NewReader(responseBody)) 62 | 63 | return &ArchivedResponse{ 64 | Status: response.StatusCode, 65 | Headers: response.Header, 66 | Body: responseBody, 67 | } 68 | } 69 | 70 | func archivedResponseToResponse(request *http.Request, archivedResponse *ArchivedResponse) *http.Response { 71 | // Format the archived response as a full http response 72 | resp := &http.Response{} 73 | resp.Request = request 74 | resp.TransferEncoding = request.TransferEncoding 75 | resp.Header = make(http.Header) 76 | for key, valueList := range archivedResponse.Headers { 77 | for _, value := range valueList { 78 | resp.Header.Add(key, value) 79 | } 80 | } 81 | resp.StatusCode = archivedResponse.Status 82 | resp.Status = http.StatusText(archivedResponse.Status) 83 | resp.ContentLength = int64(len(archivedResponse.Body)) 84 | resp.Body = ioutil.NopCloser(bytes.NewReader(archivedResponse.Body)) 85 | return resp 86 | } 87 | -------------------------------------------------------------------------------- /groove/groove-node/src/utilities.ts: -------------------------------------------------------------------------------- 1 | import fetch, { RequestInit } from 'node-fetch'; 2 | import { HttpsProxyAgent } from 'https-proxy-agent'; 3 | import { request, RequestOptions } from 'https'; 4 | 5 | 6 | interface FetchTimeoutConfiguration extends RequestInit { 7 | timeout?: number; 8 | } 9 | 10 | export const fetchWithTimeout = async (url: string, options: FetchTimeoutConfiguration) => { 11 | const { timeout } = options; 12 | 13 | const controller = new AbortController(); 14 | const id = setTimeout(() => controller.abort(), timeout); 15 | 16 | const response = await fetch(url, { 17 | ...options, 18 | //signal: controller.signal, 19 | // Non-standard timeout parameter specified in node-fetch 20 | // We use this instead of the signal because the 2.x.x signal type definitions 21 | // are incompatible. They were fixed in 3.x but 3.x also brought along ESM only requirements 22 | // that we don't currently use. 23 | timeout, 24 | }); 25 | 26 | clearTimeout(id); 27 | return response; 28 | } 29 | 30 | export const sleep = (ms: number) => { 31 | return new Promise(resolve => setTimeout(resolve, ms)) 32 | } 33 | 34 | export const streamToBuffer = (stream: any) : Promise => { 35 | // Right now we can't typehint the stream because node's built-in ReadableStream 36 | // typehint is missing `.on` properties 37 | return new Promise((resolve, reject) => { 38 | 39 | const _buf: Uint8Array[] = []; 40 | 41 | stream.on("data", (chunk: Uint8Array) => _buf.push(chunk)); 42 | stream.on("end", () => resolve(Buffer.concat(_buf))); 43 | stream.on("error", (err: Error) => reject(err)); 44 | }); 45 | } 46 | 47 | /** 48 | * @param {import('./index').Groove} proxy - proxy instance 49 | * @param {RequestOptions?} configuration - configuration for the request 50 | */ 51 | export const fetchWithProxy = async (url: string, proxy: any, configuration: any) : Promise => { 52 | /* 53 | * Helper method to fetch through the proxy while respecting the locally signed certificates 54 | * For the configuration paramter pass anything that would normally be passed to `https.request` 55 | */ 56 | const agent = new HttpsProxyAgent(proxy.baseUrlProxy); 57 | 58 | return new Promise((resolve, reject) => { 59 | request( 60 | url, 61 | { 62 | agent, 63 | ca: proxy.certificate, 64 | ...(configuration || {}), 65 | }, 66 | (response: any) => { 67 | let data = ""; 68 | 69 | response.on("data", (chunk: any) => { 70 | data = data + chunk.toString(); 71 | }); 72 | 73 | response.on("end", () => { 74 | resolve(data); 75 | }); 76 | 77 | response.on("error", (error: Error) => { 78 | reject(error); 79 | }); 80 | }).end(); 81 | }); 82 | } 83 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/cli/ssl_validity.py: -------------------------------------------------------------------------------- 1 | from click import command, option, pass_obj 2 | 3 | from proxy_benchmarks.enums import MimicTypeEnum 4 | from proxy_benchmarks.load_test import run_load_server 5 | from proxy_benchmarks.networking import SyntheticHostDefinition, SyntheticHosts 6 | from proxy_benchmarks.proxies.base import ProxyBase 7 | from proxy_benchmarks.proxies.gomitmproxy import GoMitmProxy 8 | from proxy_benchmarks.proxies.goproxy import GoProxy 9 | from proxy_benchmarks.proxies.martian import MartianProxy 10 | from proxy_benchmarks.proxies.mitmproxy import MitmProxy 11 | from proxy_benchmarks.proxies.node_http_proxy import NodeHttpProxy 12 | from proxy_benchmarks.requests import ChromeRequest, RequestBase 13 | 14 | 15 | @command() 16 | @option("--inspect-browser", is_flag=True, default=True) 17 | @pass_obj 18 | def basic_ssl_test(obj, inspect_browser: bool): 19 | """ 20 | Walk through the different proxy servers and test their SSL validity separately. 21 | 22 | :param inspect-browser: If true, upon issuing each command will wait for the user to press 23 | enter to continue. This allows you to fully inspect to certificate in the Chrome inspector 24 | and debugging console. 25 | 26 | """ 27 | proxies: list[ProxyBase] = [ 28 | GoProxy(MimicTypeEnum.STANDARD), 29 | GoProxy(MimicTypeEnum.MIMIC), 30 | MitmProxy(), 31 | NodeHttpProxy(), 32 | GoMitmProxy(MimicTypeEnum.STANDARD), 33 | GoMitmProxy(MimicTypeEnum.MIMIC), 34 | MartianProxy(), 35 | ] 36 | 37 | request = ChromeRequest(headless=False, keep_open=inspect_browser) 38 | execute_raw(obj, inspect_browser, request, proxies) 39 | 40 | 41 | def execute_raw(obj, inspect_browser: bool, request: RequestBase, proxies: list[ProxyBase]): 42 | console = obj["console"] 43 | divider = obj["divider"] 44 | 45 | with run_load_server() as load_server_definition: 46 | synthetic_ip_addresses = SyntheticHosts( 47 | [ 48 | SyntheticHostDefinition( 49 | name="load-server", 50 | http_port=load_server_definition["http"], 51 | https_port=load_server_definition["https"], 52 | ) 53 | ] 54 | ).configure() 55 | synthetic_ip_address = next(iter(synthetic_ip_addresses.values())) 56 | print("\nSynthetic IP", synthetic_ip_address) 57 | 58 | if inspect_browser: 59 | print("Waiting for manual client access...") 60 | if input(" > Press enter when ready...") != "": 61 | return 62 | 63 | for proxy in proxies: 64 | with proxy.launch(): 65 | console.print(f"{divider}\nTesting {request} with proxy {proxy})\n{divider}", style="bold blue") 66 | request.handle_request( 67 | f"https://{synthetic_ip_address}/handle", 68 | proxy=f"http://localhost:{proxy.port}", 69 | ) 70 | -------------------------------------------------------------------------------- /groove/groove-python/README.md: -------------------------------------------------------------------------------- 1 | # Groove 2 | 3 | Python APIs for Groove, a proxy server built for web crawling and unit test mocking. Highlights of its primary features: 4 | 5 | - HTTP and HTTPs support over HTTP/1 and HTTP/2. 6 | - Local CA certificate generation and installation on Mac and Linux to support system curl and Chromium. 7 | - Different tiers of caching support - from disabling completely to aggressively maintaining all body archives. 8 | - Limit outbound requests of the same URL to 1 concurrent request to save on bandwidth if requests are already inflight. 9 | - Record and replay requests made to outgoing servers. Recreate testing flows in unit tests while separating them from crawling business logic. 10 | - 3rd party proxy support for commercial proxies. 11 | - Custom TLS Hello Client support to maintain a Chromium-like TLS handshake while intercepting requests and re-forwarding on packets. 12 | 13 | For more information, see the [Github](https://github.com/piercefreeman/grooveproxy) project. 14 | 15 | ## Usage 16 | 17 | Add groove to your project and install the local certificates that allow for https certificate generation: 18 | 19 | ``` 20 | pip install groove 21 | install-ca 22 | ``` 23 | 24 | Instantiating Groove with the default parameters is usually fine for most deployments. To ensure we clean up resources once you're completed with the proxy, wrap your code in the `launch` contextmanager. 25 | 26 | ```python 27 | from groove.proxy import Groove 28 | from requests import get 29 | from pathlib import Path 30 | 31 | proxy = Groove() 32 | with proxy.launch(): 33 | response = get( 34 | "https://www.example.com", 35 | proxies={ 36 | "http": proxy.base_url_proxy, 37 | "https": proxy.base_url_proxy, 38 | }, 39 | verify=str(Path("~/.grooveproxy/ca.crt").expanduser()), 40 | ) 41 | assert response.status_code == 200 42 | ``` 43 | 44 | Create a fully fake outbound for testing: 45 | 46 | ```python 47 | from groove.proxy import Groove 48 | from groove.tape import TapeRecord, TapeRequest, TapeResponse, TapeSession 49 | from requests import get 50 | from pathlib import Path 51 | 52 | records = [ 53 | TapeRecord( 54 | request=TapeRequest( 55 | url="https://example.com:443/", 56 | method="GET", 57 | headers={}, 58 | body=b"", 59 | ), 60 | response=TapeResponse( 61 | status=200, 62 | headers={}, 63 | body=b64encode("Test response".encode()) 64 | ), 65 | ) 66 | ] 67 | 68 | proxy = Groove() 69 | with proxy.launch(): 70 | proxy.tape_load( 71 | TapeSession( 72 | records=records 73 | ) 74 | ) 75 | 76 | response = get( 77 | "https://www.example.com", 78 | proxies={ 79 | "http": proxy.base_url_proxy, 80 | "https": proxy.base_url_proxy, 81 | }, 82 | verify=str(Path("~/.grooveproxy/ca.crt").expanduser()) 83 | ) 84 | assert response.content == b"Test response" 85 | ``` 86 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/test_auth.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | 3 | import pytest 4 | from bs4 import BeautifulSoup 5 | from playwright._impl._api_types import Error as PlaywrightError 6 | from requests import get 7 | 8 | from groove.assets import get_asset_path 9 | from groove.proxy import Groove 10 | from groove.tape import TapeRecord, TapeRequest, TapeResponse, TapeSession 11 | 12 | AUTH_USERNAME = "test-username" 13 | AUTH_PASSWORD = "test-password" 14 | 15 | 16 | @pytest.mark.xfail() 17 | def test_auth_requests(): 18 | proxy = Groove(port=6040, control_port=6041, auth_username=AUTH_USERNAME, auth_password=AUTH_PASSWORD) 19 | 20 | record = TapeRecord( 21 | request=TapeRequest( 22 | url="https://freeman.vc:443/", 23 | method="GET", 24 | headers={}, 25 | body=b"", 26 | ), 27 | response=TapeResponse( 28 | status=200, 29 | headers={}, 30 | body=b64encode(b"Test content") 31 | ), 32 | ) 33 | 34 | with proxy.launch(): 35 | proxy.tape_load( 36 | TapeSession( 37 | records=[record] 38 | ) 39 | ) 40 | 41 | response = get( 42 | "https://freeman.vc", 43 | proxies={ 44 | "http": f"http://{proxy.auth_username}:{proxy.auth_password}@localhost:{proxy.port}", 45 | "https": f"http://{proxy.auth_username}:{proxy.auth_password}@localhost:{proxy.port}", 46 | }, 47 | verify=get_asset_path("ssl/ca.crt"), 48 | ) 49 | assert response.ok 50 | assert BeautifulSoup(response.content, features="html.parser").strip() == "Test content" 51 | 52 | 53 | @pytest.mark.xfail() 54 | def test_auth_chromium(browser): 55 | """ 56 | Ensure the proxy can forward to an end proxy 57 | """ 58 | proxy = Groove(port=6040, control_port=6041, auth_username=AUTH_USERNAME, auth_password=AUTH_PASSWORD) 59 | 60 | record = TapeRecord( 61 | request=TapeRequest( 62 | url="https://freeman.vc:443/", 63 | method="GET", 64 | headers={}, 65 | body=b"", 66 | ), 67 | response=TapeResponse( 68 | status=200, 69 | headers={}, 70 | body=b64encode(b"Test content") 71 | ), 72 | ) 73 | 74 | with proxy.launch(): 75 | proxy.tape_load( 76 | TapeSession( 77 | records=[record] 78 | ) 79 | ) 80 | 81 | # Make sure the end proxy has configured correctly 82 | context = browser.new_context( 83 | proxy={ 84 | "server": proxy.base_url_proxy, 85 | "username": proxy.auth_username, 86 | "password": proxy.auth_password, 87 | }, 88 | ) 89 | page = context.new_page() 90 | page.goto("https://freeman.vc", timeout=5000) 91 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == "Test content" 92 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/martian/main.go: -------------------------------------------------------------------------------- 1 | // https://github.com/google/martian/blob/master/proxy.go 2 | // https://github.com/google/martian/blob/master/cmd/proxy/main.go 3 | // Copyright 2015 Google Inc. All rights reserved. 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package main 18 | 19 | import ( 20 | "crypto/tls" 21 | "crypto/x509" 22 | "flag" 23 | "log" 24 | "net" 25 | "os" 26 | "os/signal" 27 | "strconv" 28 | "time" 29 | 30 | "github.com/google/martian" 31 | "github.com/google/martian/mitm" 32 | ) 33 | 34 | func getCredentials(cert *string, key *string) (*x509.Certificate, interface{}, error) { 35 | var x509c *x509.Certificate 36 | var priv interface{} 37 | 38 | tlsc, err := tls.LoadX509KeyPair(*cert, *key) 39 | if err != nil { 40 | return nil, nil, err 41 | } 42 | priv = tlsc.PrivateKey 43 | 44 | x509c, err = x509.ParseCertificate(tlsc.Certificate[0]) 45 | if err != nil { 46 | return nil, nil, err 47 | } 48 | 49 | return x509c, priv, nil 50 | } 51 | 52 | func main() { 53 | var ( 54 | port = flag.Int("port", 8080, "bind port") 55 | cert = flag.String("cert", "ssl/ca.crt", "filepath to the CA certificate used to sign MITM certificates") 56 | key = flag.String("key", "ssl/ca.key", "filepath to the private key of the CA used to sign MITM certificates") 57 | organization = flag.String("organization", "Martian Proxy", "organization name for MITM certificates") 58 | validity = flag.Duration("validity", time.Hour, "window of time that MITM certificates are valid") 59 | ) 60 | 61 | flag.Parse() 62 | 63 | p := martian.NewProxy() 64 | defer p.Close() 65 | 66 | l, err := net.Listen("tcp", ":"+strconv.Itoa(*port)) 67 | if err != nil { 68 | log.Fatal(err) 69 | } 70 | 71 | log.Printf("martian: starting proxy on %s", l.Addr().String()) 72 | 73 | x509c, priv, err := getCredentials(cert, key) 74 | if err != nil { 75 | log.Fatal(err) 76 | } 77 | 78 | mc, err := mitm.NewConfig(x509c, priv) 79 | if err != nil { 80 | log.Fatal(err) 81 | } 82 | 83 | mc.SetValidity(*validity) 84 | mc.SetOrganization(*organization) 85 | 86 | // Always require server-side TLS validation 87 | mc.SkipTLSVerify(false) 88 | 89 | p.SetMITM(mc) 90 | 91 | go p.Serve(l) 92 | 93 | sigc := make(chan os.Signal, 1) 94 | signal.Notify(sigc, os.Interrupt) 95 | 96 | <-sigc 97 | 98 | log.Println("martian: shutting down") 99 | os.Exit(0) 100 | } 101 | -------------------------------------------------------------------------------- /groove/groove-python/build.py: -------------------------------------------------------------------------------- 1 | from distutils.command.build_ext import build_ext 2 | from distutils.core import Distribution 3 | from distutils.errors import (CCompilerError, CompileError, DistutilsExecError, 4 | DistutilsPlatformError) 5 | from distutils.extension import Extension 6 | from os import chmod, stat 7 | from pathlib import Path 8 | from shutil import copyfile 9 | from subprocess import run 10 | 11 | 12 | class GoExtension(Extension): 13 | def __init__(self, name, path): 14 | super().__init__(name, sources=[]) 15 | self.path = path 16 | 17 | 18 | extensions = [ 19 | GoExtension( 20 | #"groove", 21 | "groove.assets.grooveproxy", 22 | # Assume we have temporarily copied over the proxy folder into our current path 23 | # We don't want it to be referenced in the actual parent library, since we need to bundle 24 | # it alongside the python project in sdist in case clients need to build from source 25 | # when wheels aren't available. 26 | "./proxy", 27 | ) 28 | ] 29 | 30 | 31 | class BuildFailed(Exception): 32 | pass 33 | 34 | 35 | class GoExtensionBuilder(build_ext): 36 | def run(self): 37 | try: 38 | build_ext.run(self) 39 | except (DistutilsPlatformError, FileNotFoundError): 40 | raise BuildFailed("File not found. Could not compile extension.") 41 | 42 | def build_extension(self, ext): 43 | try: 44 | if isinstance(ext, GoExtension): 45 | extension_root = Path(__file__).parent.resolve() / ext.path 46 | ext_path = self.get_ext_fullpath(ext.name) 47 | result = run(["go", "build", "-o", str(Path(ext_path).absolute())], cwd=extension_root) 48 | if result.returncode != 0: 49 | raise CompileError("Go build failed") 50 | else: 51 | build_ext.build_extension(self, ext) 52 | except (CCompilerError, DistutilsExecError, DistutilsPlatformError, ValueError): 53 | raise BuildFailed('Could not compile C extension.') 54 | 55 | 56 | def build(setup_kwargs): 57 | distribution = Distribution({"name": "python_ctypes", "ext_modules": extensions}) 58 | distribution.package_dir = "python_ctypes" 59 | 60 | cmd = GoExtensionBuilder(distribution) 61 | cmd.ensure_finalized() 62 | cmd.run() 63 | 64 | # This is somewhat of a hack with go executables; this pipeline will package 65 | # them as .so files but they aren't actually built libraries. We maintain 66 | # this convention only for the ease of plugging in to poetry and distutils that 67 | # use this suffix to indicate the build architecture and run on the 68 | # correct downstream client OS. 69 | for output in cmd.get_outputs(): 70 | relative_extension = Path(output).relative_to(cmd.build_lib) 71 | copyfile(output, relative_extension) 72 | mode = stat(relative_extension).st_mode 73 | mode |= (mode & 0o444) >> 2 74 | chmod(relative_extension, mode) 75 | 76 | 77 | if __name__ == "__main__": 78 | build({}) 79 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/requests.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | from playwright.sync_api import sync_playwright 4 | from requests import get 5 | 6 | from proxy_benchmarks.io import is_docker 7 | 8 | 9 | class RequestBase(ABC): 10 | @property 11 | @abstractmethod 12 | def short_name(self) -> str: 13 | pass 14 | 15 | @abstractmethod 16 | def handle_request(self, url: str, proxy: str | None): 17 | pass 18 | 19 | 20 | class PythonRequest(RequestBase): 21 | def handle_request(self, url: str, proxy: str | None): 22 | response = get( 23 | url, 24 | proxies={ 25 | "http": proxy, 26 | "https": proxy, 27 | } if proxy else None, 28 | verify=False, 29 | ) 30 | assert response.ok 31 | 32 | @property 33 | def short_name(self) -> str: 34 | return "python" 35 | 36 | def __repr__(self) -> str: 37 | return "PythonRequest()" 38 | 39 | 40 | class ChromeRequest(RequestBase): 41 | def __init__(self, headless, keep_open: bool = False): 42 | """ 43 | :param headless: Whether to open the browser in headless mode. 44 | :param keep_open: Useful for debugging. Can optionally stop every time a 45 | page loads to better inspect the outgoing network requests and certificates. 46 | 47 | """ 48 | self.headless = headless 49 | self.keep_open = keep_open 50 | 51 | def handle_request(self, url: str, proxy: str | None): 52 | with sync_playwright() as p: 53 | browser = p.chromium.launch( 54 | headless=self.headless, 55 | ) 56 | payload = { 57 | **({ 58 | "proxy": { 59 | "server": proxy, 60 | } 61 | } if proxy else {}), 62 | # We explicitly don't set `ignore_https_errors=True` because we expect 63 | # that the setup pipeline will correctly configure our proxy certificates 64 | # and our test server certificates 65 | } 66 | 67 | context = browser.new_context( 68 | **payload 69 | ) 70 | page = context.new_page() 71 | page_load_exception = None 72 | try: 73 | response = page.goto(url) 74 | except Exception as e: 75 | print("Exception encountered:", e) 76 | page_load_exception = e 77 | 78 | if self.keep_open: 79 | # TODO: Update coloring in case it's only available in the scrollback history 80 | if input("Press any key to continue..."): 81 | pass 82 | 83 | # Wait until after halting the browser to throw 84 | if page_load_exception: 85 | raise page_load_exception 86 | 87 | assert response.ok 88 | browser.close() 89 | 90 | @property 91 | def short_name(self) -> str: 92 | return "chrome_headless" if self.headless else "chrome_headfull" 93 | 94 | def __repr__(self) -> str: 95 | return f"ChromeRequest(headless={self.headless})" 96 | -------------------------------------------------------------------------------- /proxy-benchmarks/results_load_test_mimick.csv: -------------------------------------------------------------------------------- 1 | ,proxy,protocol,Type,Name,Request Count,Failure Count,Median Response Time,Average Response Time,Min Response Time,Max Response Time,Average Content Size,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,98%,99%,99.9%,99.99%,100% 2 | 0,baseline,http,GET,/handle,49500,0,1000.488999998197,1011.4510293835457,1000.488999998197,1081.3865839736536,59.0,827.6833889677449,0.0,1000,1000,1000,1000,1000,1000,1000,1000,1100,1100,1100 3 | 0,baseline,https,GET,/handle,49500,0,1000.496040971484,1009.40118661796,1000.496040971484,1091.0654999897815,59.0,829.1644239206194,0.0,1000,1000,1000,1000,1000,1000,1000,1000,1100,1100,1100 4 | 0,mitmproxy,http,GET,/handle,40503,0,1200.0,1227.6686545363298,1002.384457970038,1707.092874974478,59.0,675.7549613347611,0.0,1200,1300,1300,1300,1300,1400,1600,1600,1700,1700,1700 5 | 0,mitmproxy,https,GET,/handle,36790,0,1400.0,1351.4674173770281,1003.3034160151146,3133.232207968831,59.0,613.6178274163591,0.0,1400,1400,1400,1400,1400,1500,1600,1600,3000,3100,3100 6 | 0,node_http_proxy,http,GET,/handle,15674,663,1100.0,3065.3146587986166,1002.1431249915622,27246.74249999225,56.504338394793926,262.44935649417545,11.10143698836534,1100,1200,1200,1300,7800,21000,26000,26000,27000,27000,27000 7 | 0,node_http_proxy,https,GET,/handle,11574,211,1200.0,3590.5202838633436,1005.6092090089805,40349.59366597468,57.9243995161569,192.93927813012266,3.517382727272843,1200,1300,2200,4600,8100,21000,26000,26000,34000,40000,40000 8 | 0,gomitmproxy,http,GET,/handle,29436,182,1100.0,1698.8464600524396,260.4702090029605,12059.617916995194,58.63520858812338,490.2044151654043,3.030887469768433,1100,1400,1600,2000,2800,6000,7300,11000,11000,11000,12000 9 | 0,gomitmproxy,https,GET,/handle,19521,719,1200.0,2230.5731829602005,51.64074996719137,30900.85354197072,56.82690435940782,325.3983924557211,11.985115730529351,1200,1300,1600,1900,2700,4500,22000,26000,31000,31000,31000 10 | 0,gomitmproxy-mimic,http,GET,/handle,27247,2,1001.0444169747643,1309.9179519289937,1001.0444169747643,5001.709542004392,58.995669247990605,536.5251729495114,0.03938233001427764,1000,1100,1200,1600,2200,2700,3200,3500,4300,5000,5000 11 | 0,gomitmproxy-mimic,https,GET,/handle,23627,554,1100.0,1859.4644568309495,163.9345419825986,30077.215208031703,57.61658272315571,393.88923621774103,9.235816517739389,1100,1300,1600,1800,2200,4600,7900,26000,30000,30000,30000 12 | 0,martian,http,GET,/handle,20068,114,1000.0,1329.82356559779,279.85450002597645,6339.614583994262,58.66483954554515,492.27718713707947,2.796471962010517,1000,1100,1100,1200,2100,3200,3700,4300,6200,6300,6300 13 | 0,martian,https,GET,/handle,29733,253,1100.0,1672.9227807401173,192.39174999529496,8052.101582987234,58.49796522382538,495.34732070917516,4.214942055608963,1100,1700,2000,2000,2800,4400,5400,7600,8000,8100,8100 14 | 0,goproxy,http,GET,/handle,37855,14,1000.0,1298.910843795451,18.259250035043806,3297.2259999951348,59.00443798705587,633.77917199949,0.23439198013453602,1000,1100,1600,1700,2100,2200,2300,2700,3200,3300,3300 15 | 0,goproxy,https,GET,/handle,28728,1479,1700.0,1669.9074147774024,65.36229199264199,8985.648249974474,55.9625104427736,485.85205305978195,25.01305995806939,1700,1900,2000,2100,2200,2400,2800,3000,3300,3800,9000 16 | 0,goproxy-mimic,http,GET,/handle,10102,56,1800.0,2162.403625331737,1022.3960420116782,13115.720833011437,59.02217382696496,256.2232025670275,1.4203622395321263,1800,1900,2000,2000,3000,4700,12000,12000,13000,13000,13000 17 | 0,goproxy-mimic,https,GET,/handle,23867,2519,2000.0,2020.258533450883,110.40208296617493,10503.495333017781,52.77295009846231,403.9685645480247,42.63614254395082,2000,2100,2300,2400,3100,4300,5000,5400,6300,9500,11000 18 | -------------------------------------------------------------------------------- /groove/proxy/end_proxy.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "crypto/tls" 6 | "encoding/base64" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "io/ioutil" 11 | "net" 12 | "net/http" 13 | "net/url" 14 | "strings" 15 | ) 16 | 17 | var defaultTLSConfig = &tls.Config{ 18 | InsecureSkipVerify: true, 19 | } 20 | 21 | const ProxyAuthHeader = "Proxy-Authorization" 22 | 23 | func SetBasicAuth(username, password string, req *http.Request) { 24 | req.Header.Set(ProxyAuthHeader, fmt.Sprintf("Basic %s", basicAuth(username, password))) 25 | } 26 | 27 | func basicAuth(username, password string) string { 28 | return base64.StdEncoding.EncodeToString([]byte(username + ":" + password)) 29 | } 30 | 31 | func NewConnectDialToProxyWithHandler(https_proxy string, connectReqHandler func(req *http.Request)) func(network, addr string) (net.Conn, error) { 32 | /* 33 | * This is a modified version of the goproxy.ConnectDialToProxyWithHandler to use the raw 34 | * network dialer instead of a modified version. 35 | */ 36 | u, err := url.Parse(https_proxy) 37 | if err != nil { 38 | return nil 39 | } 40 | if u.Scheme == "" || u.Scheme == "http" { 41 | if strings.IndexRune(u.Host, ':') == -1 { 42 | u.Host += ":80" 43 | } 44 | return func(network, addr string) (net.Conn, error) { 45 | connectReq := &http.Request{ 46 | Method: "CONNECT", 47 | URL: &url.URL{Opaque: addr}, 48 | Host: addr, 49 | Header: make(http.Header), 50 | } 51 | if connectReqHandler != nil { 52 | connectReqHandler(connectReq) 53 | } 54 | c, err := net.Dial(network, u.Host) 55 | if err != nil { 56 | return nil, err 57 | } 58 | connectReq.Write(c) 59 | // Read response. 60 | // Okay to use and discard buffered reader here, because 61 | // TLS server will not speak until spoken to. 62 | br := bufio.NewReader(c) 63 | resp, err := http.ReadResponse(br, connectReq) 64 | if err != nil { 65 | c.Close() 66 | return nil, err 67 | } 68 | defer resp.Body.Close() 69 | if resp.StatusCode != 200 { 70 | resp, err := ioutil.ReadAll(resp.Body) 71 | if err != nil { 72 | return nil, err 73 | } 74 | c.Close() 75 | return nil, errors.New("proxy refused connection" + string(resp)) 76 | } 77 | return c, nil 78 | } 79 | } 80 | if u.Scheme == "https" || u.Scheme == "wss" { 81 | if strings.IndexRune(u.Host, ':') == -1 { 82 | u.Host += ":443" 83 | } 84 | return func(network, addr string) (net.Conn, error) { 85 | c, err := net.Dial(network, u.Host) 86 | if err != nil { 87 | return nil, err 88 | } 89 | // TODO: Upgrade to utls dependency 90 | c = tls.Client(c, defaultTLSConfig) 91 | connectReq := &http.Request{ 92 | Method: "CONNECT", 93 | URL: &url.URL{Opaque: addr}, 94 | Host: addr, 95 | Header: make(http.Header), 96 | } 97 | if connectReqHandler != nil { 98 | connectReqHandler(connectReq) 99 | } 100 | connectReq.Write(c) 101 | // Read response. 102 | // Okay to use and discard buffered reader here, because 103 | // TLS server will not speak until spoken to. 104 | br := bufio.NewReader(c) 105 | resp, err := http.ReadResponse(br, connectReq) 106 | if err != nil { 107 | c.Close() 108 | return nil, err 109 | } 110 | defer resp.Body.Close() 111 | if resp.StatusCode != 200 { 112 | body, err := ioutil.ReadAll(io.LimitReader(resp.Body, 500)) 113 | if err != nil { 114 | return nil, err 115 | } 116 | c.Close() 117 | return nil, errors.New("proxy refused connection" + string(body)) 118 | } 119 | return c, nil 120 | } 121 | } 122 | return nil 123 | } 124 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy/main.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/master/examples/goproxy-transparent/transparent.go 2 | package main 3 | 4 | import ( 5 | "bufio" 6 | "context" 7 | "flag" 8 | "fmt" 9 | "log" 10 | "net" 11 | "net/http" 12 | "os" 13 | "os/signal" 14 | "regexp" 15 | "strconv" 16 | 17 | "github.com/elazarl/goproxy" 18 | ) 19 | 20 | func orPanic(err error) { 21 | if err != nil { 22 | panic(err) 23 | } 24 | } 25 | 26 | func main() { 27 | var ( 28 | verbose = flag.Bool("v", true, "should every proxy request be logged to stdout") 29 | port = flag.Int("port", 8080, "proxy http listen address") 30 | ) 31 | flag.Parse() 32 | 33 | log.Printf("Verbose: %v", *verbose) 34 | 35 | // Set our own CA instead of the one that's default bundled with the proxy 36 | setCA("ssl/ca.crt", "ssl/ca.key") 37 | 38 | proxy := goproxy.NewProxyHttpServer() 39 | proxy.Verbose = *verbose 40 | 41 | // Our other implementations cache the certificates for some length of time, so we do the 42 | // same here for equality in benchmarking 43 | proxy.CertStore = NewOptimizedCertStore() 44 | 45 | if proxy.Verbose { 46 | log.Printf("Server starting up! - configured to listen on http interface %d", *port) 47 | } 48 | 49 | proxy.NonproxyHandler = http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 50 | if req.Host == "" { 51 | fmt.Fprintln(w, "Cannot handle requests without Host header, e.g., HTTP 1.0") 52 | return 53 | } 54 | req.URL.Scheme = "http" 55 | req.URL.Host = req.Host 56 | proxy.ServeHTTP(w, req) 57 | }) 58 | proxy.OnRequest(goproxy.ReqHostMatches(regexp.MustCompile("^.*$"))). 59 | HandleConnect(goproxy.AlwaysMitm) 60 | proxy.OnRequest(goproxy.ReqHostMatches(regexp.MustCompile("^.*:80$"))). 61 | HijackConnect(func(req *http.Request, client net.Conn, ctx *goproxy.ProxyCtx) { 62 | defer func() { 63 | if e := recover(); e != nil { 64 | ctx.Logf("error connecting to remote: %v", e) 65 | client.Write([]byte("HTTP/1.1 500 Cannot reach destination\r\n\r\n")) 66 | } 67 | client.Close() 68 | }() 69 | clientBuf := bufio.NewReadWriter(bufio.NewReader(client), bufio.NewWriter(client)) 70 | 71 | remote, err := connectDial(req.Context(), proxy, "tcp", req.URL.Host) 72 | orPanic(err) 73 | remoteBuf := bufio.NewReadWriter(bufio.NewReader(remote), bufio.NewWriter(remote)) 74 | for { 75 | req, err := http.ReadRequest(clientBuf.Reader) 76 | orPanic(err) 77 | orPanic(req.Write(remoteBuf)) 78 | orPanic(remoteBuf.Flush()) 79 | resp, err := http.ReadResponse(remoteBuf.Reader, req) 80 | orPanic(err) 81 | orPanic(resp.Write(clientBuf.Writer)) 82 | orPanic(clientBuf.Flush()) 83 | } 84 | }) 85 | 86 | go func() { 87 | log.Fatalln(http.ListenAndServe(":"+strconv.Itoa(*port), proxy)) 88 | }() 89 | 90 | sigc := make(chan os.Signal, 1) 91 | signal.Notify(sigc, os.Interrupt) 92 | 93 | <-sigc 94 | 95 | log.Println("goproxy: shutting down") 96 | os.Exit(0) 97 | } 98 | 99 | // copied/converted from https.go 100 | func dial(ctx context.Context, proxy *goproxy.ProxyHttpServer, network, addr string) (c net.Conn, err error) { 101 | if proxy.Tr.DialContext != nil { 102 | return proxy.Tr.DialContext(ctx, network, addr) 103 | } 104 | var d net.Dialer 105 | return d.DialContext(ctx, network, addr) 106 | } 107 | 108 | // copied/converted from https.go 109 | func connectDial(ctx context.Context, proxy *goproxy.ProxyHttpServer, network, addr string) (c net.Conn, err error) { 110 | if proxy.ConnectDial == nil { 111 | return dial(ctx, proxy, network, addr) 112 | } 113 | return proxy.ConnectDial(network, addr) 114 | } 115 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/main.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/master/examples/goproxy-transparent/transparent.go 2 | package main 3 | 4 | import ( 5 | "bufio" 6 | "context" 7 | "flag" 8 | "fmt" 9 | "log" 10 | "net" 11 | "net/http" 12 | "os" 13 | "os/signal" 14 | "regexp" 15 | "strconv" 16 | 17 | "github.com/piercefreeman/goproxy" 18 | ) 19 | 20 | func orPanic(err error) { 21 | if err != nil { 22 | panic(err) 23 | } 24 | } 25 | 26 | func main() { 27 | var ( 28 | verbose = flag.Bool("v", true, "should every proxy request be logged to stdout") 29 | port = flag.Int("port", 8080, "proxy http listen address") 30 | ) 31 | flag.Parse() 32 | 33 | log.Printf("Verbose: %v", *verbose) 34 | 35 | // Set our own CA instead of the one that's default bundled with the proxy 36 | setCA("ssl/ca.crt", "ssl/ca.key") 37 | 38 | proxy := goproxy.NewProxyHttpServer() 39 | proxy.Verbose = *verbose 40 | 41 | // Our other implementations cache the certificates for some length of time, so we do the 42 | // same here for equality in benchmarking 43 | proxy.CertStore = NewOptimizedCertStore() 44 | 45 | // Fingerprint mimic logic 46 | proxy.RoundTripper = newRoundTripper() 47 | 48 | if proxy.Verbose { 49 | log.Printf("Server starting up! - configured to listen on http interface %d", *port) 50 | } 51 | 52 | proxy.NonproxyHandler = http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 53 | if req.Host == "" { 54 | fmt.Fprintln(w, "Cannot handle requests without Host header, e.g., HTTP 1.0") 55 | return 56 | } 57 | req.URL.Scheme = "http" 58 | req.URL.Host = req.Host 59 | proxy.ServeHTTP(w, req) 60 | }) 61 | proxy.OnRequest(goproxy.ReqHostMatches(regexp.MustCompile("^.*$"))). 62 | HandleConnect(goproxy.AlwaysMitm) 63 | proxy.OnRequest(goproxy.ReqHostMatches(regexp.MustCompile("^.*:80$"))). 64 | HijackConnect(func(req *http.Request, client net.Conn, ctx *goproxy.ProxyCtx) { 65 | defer func() { 66 | if e := recover(); e != nil { 67 | ctx.Logf("error connecting to remote: %v", e) 68 | client.Write([]byte("HTTP/1.1 500 Cannot reach destination\r\n\r\n")) 69 | } 70 | client.Close() 71 | }() 72 | clientBuf := bufio.NewReadWriter(bufio.NewReader(client), bufio.NewWriter(client)) 73 | 74 | remote, err := connectDial(req.Context(), proxy, "tcp", req.URL.Host) 75 | orPanic(err) 76 | remoteBuf := bufio.NewReadWriter(bufio.NewReader(remote), bufio.NewWriter(remote)) 77 | for { 78 | req, err := http.ReadRequest(clientBuf.Reader) 79 | orPanic(err) 80 | orPanic(req.Write(remoteBuf)) 81 | orPanic(remoteBuf.Flush()) 82 | resp, err := http.ReadResponse(remoteBuf.Reader, req) 83 | orPanic(err) 84 | orPanic(resp.Write(clientBuf.Writer)) 85 | orPanic(clientBuf.Flush()) 86 | } 87 | }) 88 | 89 | go func() { 90 | log.Fatalln(http.ListenAndServe(":"+strconv.Itoa(*port), proxy)) 91 | }() 92 | 93 | sigc := make(chan os.Signal, 1) 94 | signal.Notify(sigc, os.Interrupt) 95 | 96 | <-sigc 97 | 98 | log.Println("goproxy: shutting down") 99 | os.Exit(0) 100 | } 101 | 102 | // copied/converted from https.go 103 | func dial(ctx context.Context, proxy *goproxy.ProxyHttpServer, network, addr string) (c net.Conn, err error) { 104 | if proxy.Tr.DialContext != nil { 105 | return proxy.Tr.DialContext(ctx, network, addr) 106 | } 107 | var d net.Dialer 108 | return d.DialContext(ctx, network, addr) 109 | } 110 | 111 | // copied/converted from https.go 112 | func connectDial(ctx context.Context, proxy *goproxy.ProxyHttpServer, network, addr string) (c net.Conn, err error) { 113 | if proxy.ConnectDial == nil { 114 | return dial(ctx, proxy, network, addr) 115 | } 116 | return proxy.ConnectDial(network, addr) 117 | } 118 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/test_tape.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | from uuid import uuid4 3 | 4 | from bs4 import BeautifulSoup 5 | from functools import partial 6 | from requests import get 7 | 8 | from groove.tape import TapeRecord, TapeRequest, TapeResponse, TapeSession 9 | from groove.tests.mock_server import MockPageDefinition, mock_server 10 | 11 | 12 | def test_tape_global(proxy, browser): 13 | """ 14 | Ensure the basic tape functions work correctly 15 | """ 16 | proxy.tape_start() 17 | 18 | # Explicitly use different contexts because Chromium will cache this page client side 19 | context = browser.new_context( 20 | proxy={ 21 | "server": proxy.base_url_proxy, 22 | } 23 | ) 24 | page = context.new_page() 25 | page.goto("https://freeman.vc") 26 | page.close() 27 | 28 | modified_records = 0 29 | session = proxy.tape_get() 30 | assert len(session.records) > 0 31 | 32 | for record in session.records: 33 | if record.request.url == "https://freeman.vc:443/": 34 | record.response.body = "Mocked content".encode() 35 | modified_records += 1 36 | assert modified_records == 1 37 | 38 | proxy.tape_load(session) 39 | 40 | context = browser.new_context( 41 | proxy={ 42 | "server": proxy.base_url_proxy, 43 | } 44 | ) 45 | page = context.new_page() 46 | page.goto("https://freeman.vc") 47 | 48 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == "Mocked content" 49 | 50 | 51 | def test_tape_id(proxy, session): 52 | """ 53 | Ensure tapes can be recorded separately 54 | """ 55 | proxy.tape_start() 56 | 57 | with mock_server([ 58 | MockPageDefinition( 59 | "/test1", 60 | content=f"Request 1" 61 | ), 62 | MockPageDefinition( 63 | "/test2", 64 | content=f"Request 2" 65 | ), 66 | ]) as mock_url: 67 | response1 = session.get(f"{mock_url}/test1", headers={"Tape-ID": "Tape1"}) 68 | assert response1.ok 69 | response2 = session.get(f"{mock_url}/test2", headers={"Tape-ID": "Tape2"}) 70 | assert response2.ok 71 | 72 | session1 = proxy.tape_get("Tape1") 73 | session2 = proxy.tape_get("Tape2") 74 | assert len(session1.records) == 1 75 | assert len(session2.records) == 1 76 | 77 | assert session1.records[0].request.url == f"{mock_url}/test1" 78 | assert session2.records[0].request.url == f"{mock_url}/test2" 79 | 80 | 81 | def test_multiple_requests(proxy, context): 82 | """ 83 | Ensure mocked requests resolve in the same order 84 | """ 85 | response_1 = str(uuid4()) 86 | response_2 = str(uuid4()) 87 | 88 | records = [ 89 | TapeRecord( 90 | request=TapeRequest( 91 | url="https://freeman.vc:443/", 92 | method="GET", 93 | headers={}, 94 | body=b"", 95 | ), 96 | response=TapeResponse( 97 | status=200, 98 | headers={}, 99 | body=b64encode(response.encode()) 100 | ), 101 | ) 102 | for response in [response_1, response_2] 103 | ] 104 | 105 | proxy.tape_load( 106 | TapeSession( 107 | records=records 108 | ) 109 | ) 110 | 111 | page = context.new_page() 112 | 113 | page.goto("https://freeman.vc") 114 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == response_1 115 | 116 | page.goto("https://freeman.vc") 117 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == response_2 118 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/test_end_proxy.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | 3 | import pytest 4 | from bs4 import BeautifulSoup 5 | 6 | from groove.dialer import DialerDefinition, ProxyDefinition 7 | from groove.proxy import Groove 8 | from groove.tape import TapeRecord, TapeRequest, TapeResponse, TapeSession 9 | 10 | AUTH_USERNAME = "test-username" 11 | AUTH_PASSWORD = "test-password" 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "end_proxy,middle_proxy", 16 | [ 17 | ( 18 | # Unauthenticated end proxy 19 | Groove(port=6040, control_port=6041), 20 | Groove(port=6010, control_port=6011), 21 | ), 22 | #( 23 | # # Authenticated end proxy 24 | # # Currently failing because of Chromium not sending Auth headers on every request 25 | # Groove(port=6040, control_port=6041, auth_username=AUTH_USERNAME, auth_password=AUTH_PASSWORD), 26 | # Groove(port=6010, control_port=6011, proxy_username=AUTH_USERNAME, proxy_password=AUTH_PASSWORD), 27 | #) 28 | ] 29 | ) 30 | def test_end_proxy(end_proxy, middle_proxy, browser): 31 | """ 32 | Ensure the proxy can forward to an end proxy 33 | """ 34 | record = TapeRecord( 35 | request=TapeRequest( 36 | url="https://freeman.vc:443/", 37 | method="GET", 38 | headers={}, 39 | body=b"", 40 | ), 41 | response=TapeResponse( 42 | status=200, 43 | headers={}, 44 | body=b64encode(b"Test content") 45 | ), 46 | ) 47 | 48 | with middle_proxy.launch(): 49 | with end_proxy.launch(): 50 | # Route everything to the proxy 51 | middle_proxy.dialer_load( 52 | [ 53 | DialerDefinition( 54 | priority=1, 55 | proxy=ProxyDefinition( 56 | url=end_proxy.base_url_proxy 57 | ) 58 | ) 59 | ] 60 | ) 61 | 62 | end_proxy.tape_load( 63 | TapeSession( 64 | records=[ 65 | # Double requests for the two proxy requests 66 | record, 67 | record, 68 | ] 69 | ) 70 | ) 71 | 72 | proxy_payload = { 73 | "server": end_proxy.base_url_proxy, 74 | **( 75 | { 76 | "username": end_proxy.auth_username, 77 | "password": end_proxy.auth_password, 78 | } 79 | if end_proxy.auth_username and end_proxy.auth_password 80 | else {} 81 | ), 82 | } 83 | print("End proxy request payload", proxy_payload) 84 | 85 | # Make sure the end proxy has configured correctly 86 | context = browser.new_context( 87 | proxy=proxy_payload, 88 | ) 89 | page = context.new_page() 90 | page.goto("https://freeman.vc", timeout=5000) 91 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == "Test content" 92 | 93 | # Make sure the middle proxy routes through the end proxy correctly 94 | context = browser.new_context( 95 | proxy={ 96 | "server": middle_proxy.base_url_proxy, 97 | } 98 | ) 99 | page = context.new_page() 100 | page.goto("https://freeman.vc", timeout=5000) 101 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == "Test content" 102 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/load_test.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | from dataclasses import dataclass 3 | from os import environ 4 | from subprocess import Popen 5 | from time import sleep 6 | 7 | from configargparse import DefaultConfigFileParser 8 | 9 | from proxy_benchmarks.assets import get_asset_path 10 | from proxy_benchmarks.networking import is_socket_bound 11 | from proxy_benchmarks.process import terminate_all 12 | from proxy_benchmarks.proxies.base import ProxyBase 13 | 14 | 15 | @dataclass 16 | class LoadTestResults: 17 | stats: str 18 | exceptions: str 19 | failures: str 20 | stats_history: str 21 | 22 | 23 | def load_config(path: str) -> str: 24 | config = DefaultConfigFileParser() 25 | with open(path) as file: 26 | return config.parse(file) 27 | 28 | 29 | @contextmanager 30 | def run_load_server(port=3010, tls_port=3011): 31 | """ 32 | :port 3010: host a standard http server 33 | """ 34 | server_path = get_asset_path("speed-test/server") 35 | server_process = Popen(["go", "run", ".", "--port", str(port), "--tls-port", str(tls_port)], cwd=server_path) 36 | 37 | # Wait for the server to spin up 38 | sleep(2) 39 | 40 | try: 41 | yield dict(http=port, https=tls_port) 42 | finally: 43 | terminate_all(server_process) 44 | 45 | 46 | def run_load_test( 47 | url: str, 48 | config_path: str, 49 | run_time_seconds: int = 60, 50 | spawn_processes: int = 5, 51 | proxy: ProxyBase | None = None 52 | ) -> LoadTestResults: 53 | locust_project_path = get_asset_path("speed-test/locust") 54 | locust_config = load_config(locust_project_path / config_path) 55 | 56 | env = { 57 | **environ, 58 | "LOAD_TEST_CERTIFICATE": get_asset_path("speed-test/server/ssl/cert.crt"), 59 | "LOAD_TEST_CERTIFICATE_KEY": get_asset_path("speed-test/server/ssl/cert.key"), 60 | } 61 | 62 | if proxy: 63 | assert proxy.certificate_authority.public.exists() 64 | assert proxy.certificate_authority.key.exists() 65 | 66 | env["PROXY_PORT"] = str(proxy.port) 67 | # Even though these certificates are in the system keychain, python still needs 68 | # them explicitly specified. Since the load tester is written in python, these need 69 | # to be passed into the harness. 70 | env["PROXY_CERTIFICATE"] = str(proxy.certificate_authority.public) 71 | env["PROXY_CERTIFICATE_KEY"] = str(proxy.certificate_authority.key) 72 | 73 | try: 74 | # Launch the coordination server 75 | # This will wait to launch until N processes have connected 76 | main_process = Popen( 77 | ["poetry", "run", "locust", "--run-time", f"{run_time_seconds}s", "--master", "--expect-workers", str(spawn_processes), f"--config={config_path}", f"--host={url}"], 78 | cwd=locust_project_path, 79 | env=env 80 | ) 81 | 82 | worker_processes = [ 83 | Popen( 84 | ["poetry", "run", "locust", "--worker", f"--config={config_path}", f"--host={url}"], 85 | cwd=locust_project_path, 86 | env=env 87 | ) 88 | for _ in range(spawn_processes) 89 | ] 90 | 91 | main_process.wait() 92 | 93 | finally: 94 | terminate_all(main_process) 95 | for process in worker_processes: 96 | terminate_all(process) 97 | 98 | # Path prefix to the csv files, relative to the locust project 99 | csv_prefix = locust_config["csv"] 100 | 101 | return LoadTestResults( 102 | stats=locust_project_path / f"{csv_prefix}_stats.csv", 103 | stats_history=locust_project_path / f"{csv_prefix}_stats_history.csv", 104 | exceptions=locust_project_path / f"{csv_prefix}_exceptions.csv", 105 | failures=locust_project_path / f"{csv_prefix}_failures.csv", 106 | ) 107 | -------------------------------------------------------------------------------- /groove/proxy/cert.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/a92cc753f88eb1d5f3ca49bd91da71fe815537ca/examples/goproxy-customca/cert.go 2 | package main 3 | 4 | import ( 5 | "crypto/tls" 6 | "crypto/x509" 7 | "fmt" 8 | "log" 9 | "os" 10 | "os/exec" 11 | "os/user" 12 | "path" 13 | "runtime" 14 | 15 | goproxy "github.com/piercefreeman/goproxy" 16 | ) 17 | 18 | func setCA(caCert string, caKey string) error { 19 | // Override the default support: https://github.com/elazarl/goproxy/blob/fbd10ff4f5a16de73dca5030fc12245548f76141/https.go#L32 20 | goproxyCa, err := tls.LoadX509KeyPair(caCert, caKey) 21 | if err != nil { 22 | return err 23 | } 24 | if goproxyCa.Leaf, err = x509.ParseCertificate(goproxyCa.Certificate[0]); err != nil { 25 | return err 26 | } 27 | goproxy.GoproxyCa = goproxyCa 28 | goproxy.OkConnect = &goproxy.ConnectAction{Action: goproxy.ConnectAccept, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 29 | goproxy.MitmConnect = &goproxy.ConnectAction{Action: goproxy.ConnectMitm, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 30 | goproxy.HTTPMitmConnect = &goproxy.ConnectAction{Action: goproxy.ConnectHTTPMitm, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 31 | goproxy.RejectConnect = &goproxy.ConnectAction{Action: goproxy.ConnectReject, TLSConfig: goproxy.TLSConfigFromCA(&goproxyCa)} 32 | return nil 33 | } 34 | 35 | func getLocalCAPaths() (localPath string, localCAPath string, localCAKey string) { 36 | user, err := user.Current() 37 | if err != nil { 38 | log.Fatal(fmt.Errorf("Unable to resolve current user: %w", err)) 39 | } 40 | 41 | localPath = path.Join(user.HomeDir, ".grooveproxy") 42 | localCAPath = path.Join(localPath, "ca.crt") 43 | localCAKey = path.Join(localPath, "ca.key") 44 | 45 | return localPath, localCAPath, localCAKey 46 | } 47 | 48 | func installCA() { 49 | /* 50 | * Determine if a certificate has already been generated for the proxy and if not will create 51 | * one in the user's home directory under `.grooveproxy/{ca.crt,ca.key}`. 52 | */ 53 | localPath, localCAPath, localCAKey := getLocalCAPaths() 54 | 55 | // Ensure this folder is created 56 | if err := os.MkdirAll(localPath, os.ModePerm); err != nil { 57 | log.Fatal(err) 58 | } 59 | 60 | // Check for existing CA certificates 61 | if _, err := os.Stat(localCAPath); err == nil { 62 | log.Fatal(fmt.Errorf("CA certificate already generated, remove to regenerate:\n `rm %s && rm %s`\n", localCAPath, localCAKey)) 63 | } 64 | 65 | cmd := exec.Command("openssl", "genrsa", "-out", "ca.key", "2048") 66 | cmd.Dir = localPath 67 | if _, err := cmd.Output(); err != nil { 68 | log.Fatal(err) 69 | } 70 | 71 | cmd = exec.Command("openssl", "req", "-new", "-x509", "-key", "ca.key", "-out", "ca.crt", "-subj", "/C=US/ST=CA/L= /O= /OU= /CN=GrooveProxy/emailAddress= ") 72 | cmd.Dir = localPath 73 | if _, err := cmd.Output(); err != nil { 74 | log.Fatal(err) 75 | } 76 | 77 | switch { 78 | case runtime.GOOS == "linux": 79 | installCALinux(localCAPath) 80 | case runtime.GOOS == "darwin": 81 | installCADarwin(localCAPath) 82 | default: 83 | log.Fatal("Unknown OS, can't perform local installation") 84 | } 85 | 86 | log.Println("Certificate generation completed.") 87 | } 88 | 89 | func installCALinux(caPath string) { 90 | // System installation path 91 | user, err := user.Current() 92 | if err != nil { 93 | log.Fatal(fmt.Errorf("Unable to resolve current user: %w", err)) 94 | } 95 | 96 | systemPath := "/usr/local/share/ca-certificates/grooveproxy-ca.crt" 97 | cmd := exec.Command("sudo", "cp", caPath, systemPath) 98 | if _, err = cmd.Output(); err != nil { 99 | log.Fatal(err) 100 | } 101 | 102 | cmd = exec.Command("sudo", "update-ca-certificates") 103 | if _, err = cmd.Output(); err != nil { 104 | log.Fatal(err) 105 | } 106 | 107 | // Chrome / Chromium doesn't respect the system certificate store on Ubuntu 108 | // Instead use 109 | // https://chromium.googlesource.com/chromium/src/+/master/docs/linux/cert_management.md 110 | certUtilPath := fmt.Sprintf("sql:%s/.pki/nssdb", user.HomeDir) 111 | cmd = exec.Command("sudo", "certutil", "-d", certUtilPath, "-A", "-t", "C,,", "-n", "grooveproxy", "-i", systemPath) 112 | if _, err = cmd.Output(); err != nil { 113 | log.Fatal(err) 114 | } 115 | } 116 | 117 | func installCADarwin(caPath string) { 118 | cmd := exec.Command("sudo", "security", "add-trusted-cert", "-d", "-p", "ssl", "-p", "basic", "-k", "/Library/Keychains/System.keychain", caPath) 119 | if _, err := cmd.Output(); err != nil { 120 | log.Fatal(err) 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy-mimic/go.sum: -------------------------------------------------------------------------------- 1 | github.com/AdguardTeam/golibs v0.4.0 h1:4VX6LoOqFe9p9Gf55BeD8BvJD6M6RDYmgEiHrENE9KU= 2 | github.com/AdguardTeam/golibs v0.4.0/go.mod h1:skKsDKIBB7kkFflLJBpfGX+G8QFTx0WKUzB6TIgtUj4= 3 | github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= 4 | github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= 5 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= 10 | github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= 11 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 12 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 13 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 14 | github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= 15 | github.com/piercefreeman/gomitmproxy v0.0.2 h1:TpoMkPfU68glWpqM6y6DZOVbUxHG+0rHl0Qc/YBlln0= 16 | github.com/piercefreeman/gomitmproxy v0.0.2/go.mod h1:eR64D6SYi7yK9SIF39fjt4fz7QGXG4STq9gNkdaCBgg= 17 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 18 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 20 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 21 | github.com/refraction-networking/utls v1.1.3 h1:K9opY+iKxcGvHOBG2019wFEVtsNFh0f5WqHyc2i3iU0= 22 | github.com/refraction-networking/utls v1.1.3/go.mod h1:+D89TUtA8+NKVFj1IXWr0p3tSdX1+SqUB7rL0QnGqyg= 23 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 24 | github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= 25 | github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= 26 | golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa h1:idItI2DDfCokpg0N51B2VtiLdJ4vAuXC9fnCb2gACo4= 27 | golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 28 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 29 | golang.org/x/net v0.0.0-20211111160137-58aab5ef257a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 30 | golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458 h1:MgJ6t2zo8v0tbmLCueaCbF1RM+TtB0rs3Lv8DGtOIpY= 31 | golang.org/x/net v0.0.0-20221012135044-0b7e1fb9d458/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk= 32 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 33 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 34 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 35 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10 h1:WIoqL4EROvwiPdUtaip4VcDdpZ4kha7wBWZrbVKCIZg= 36 | golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 37 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 38 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 39 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 40 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 41 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= 42 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 43 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 44 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 45 | gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 46 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 47 | gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= 48 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 49 | -------------------------------------------------------------------------------- /proxy-benchmarks/README.md: -------------------------------------------------------------------------------- 1 | # proxy-benchmarks 2 | Benchmark open man-in-the-middle proxies, for use in web crawling and unit test construction. 3 | 4 | ## Background 5 | 6 | This benchmarking is geared to find a proxy application for use in web crawl caching and test harnesses. As such, we need the following functionality: 7 | 8 | - HTTP + HTTP(s) support. HTTP/2 is optional 9 | - Client overrides of defined webpages, so we are able to return a mocked version of the page at test time 10 | - Capture client requests for later use in tests. I sometimes refer to this as "replaying the tape," assuming that a cassette started rolling at request time. 11 | 12 | Based on these requirements, especially the https support, we can't leverage normal forward proxies. Standard proxies rely on issuing a TCP handshake between the client and upstream server via http connect tunneling, so the proxy server doesn't have access to the request or response content. This has the benefit of ensuring the security of client connections. It also has the positive upshot of providing "transparency" to the proxy, so if destination servers rely on fingerprinting the TCP handshake of the client, it will appear as if the requests are coming from a valid origin. 13 | 14 | The mark of a good mitm proxy, which routes from local -> proxy -> remote: 15 | 1. Fast; minimal computational processing between receiving a request and forwarding it along 16 | 2. Concurrent; ability to handle multiple requests in parallel 17 | 3. Transparent; appearing as if requets came from the source 18 | 19 | Transparency in reconstructing requests: 20 | 1. Maintain TLS Fingerprints from client. We use [ja3](https://github.com/salesforce/ja3) to check for fingerprint identity. 21 | 22 | Tested on OS X with Python 3.10. 23 | 24 | ## Proxies 25 | 26 | Our proxies need additional setup when running locally. For convenience we have a setup script that handles this across all proxies. This script will install the necessary dependencies across node, go, and python to power the necessary proxies. It will also generate root certificates for the MITM proxy handlers. Expect to see multiple popups to trust these credentials. 27 | 28 | ``` 29 | ./setup.sh 30 | ``` 31 | 32 | ### mitmproxy 33 | 34 | `mitmproxy` will automatically create a root certificate to authorize requests. To then trust this certificate locally, follow the instructions here: https://docs.mitmproxy.org/stable/concepts-certificates/ 35 | 36 | ## Requests 37 | 38 | ### Playwright 39 | 40 | Install the browsers that we want to test through the proxy. 41 | 42 | ``` 43 | poetry run playwright install chromium 44 | ``` 45 | 46 | ## Benchmarking Harness 47 | 48 | MITM proxies re-issue the commands that clients give them. Test whether fingerprints of the proxies align with their originally issuing browsers. 49 | 50 | ``` 51 | poetry run benchmark fingerprint execute [--output-directory ./fingerprint-capture] 52 | ``` 53 | 54 | View a more specific breakdown of the Ja3 fingerprint differences between the proxy and baseline (will execute the baseline comparison by default). 55 | 56 | ``` 57 | poetry run benchmark fingerprint compare-dynamic --proxy gomitmproxy 58 | ``` 59 | 60 | Conduct a load test of each proxy server, separately over http and https connections since https has additional overhead of having to manage the server->proxy certificate decryption and the proxy->client re-encryption. 61 | 62 | ``` 63 | poetry run benchmark load-test execute --data-path ./load-test 64 | poetry run benchmark load-test analyze --data-path ./load-test 65 | ``` 66 | 67 | Conduct a speed test of the MITM host certificate generation process. In the wild we expect this to happen relatively frequently (every time we visit a new host) whereas in our load test this was completely excluded, because all proxies have a method to cache previously generated certificates either in memory or on disk. 68 | 69 | ``` 70 | poetry run benchmark speed-test execute --data-path ./speed-test 71 | poetry run benchmark speed-test analyze --data-path ./speed-test 72 | ``` 73 | 74 | ## Debugging 75 | 76 | Q. I'm seeing an `ERR_CERT_AUTHORITY_INVALID` during tests. 77 | A. Each OS (and potentially browser within that OS) has a different location where it stores certificates. On Ubuntu, for instance Chrome has its own credential storage manager [[1]](https://serverfault.com/questions/946756/ssl-certificate-in-system-store-not-trusted-by-chrome) [[2]](https://chromium.googlesource.com/chromium/src/+/master/docs/linux/cert_management.md). 78 | 79 | Q. How do I perform a test inside of the docker image? 80 | A. 81 | 82 | ``` 83 | docker-compose run -it benchmark test -k test_fingerprint_independent 84 | ``` 85 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/mock_server.py: -------------------------------------------------------------------------------- 1 | from collections import Counter, defaultdict 2 | from contextlib import contextmanager 3 | from dataclasses import dataclass 4 | from functools import partial 5 | from itertools import groupby 6 | from multiprocessing import Process, Semaphore 7 | from time import sleep 8 | from typing import Any, Optional 9 | 10 | import uvicorn 11 | from fastapi import FastAPI 12 | from fastapi.responses import Response 13 | 14 | 15 | @dataclass 16 | class MockPageDefinition: 17 | url: str 18 | status_code: int = 200 19 | content_type: str = "text/html" 20 | 21 | method: str = "get" 22 | headers: dict[str, str] | None = None 23 | 24 | # Optional function callable - one should be specified 25 | content: Optional[str] = None 26 | handler: Optional[Any] = None 27 | 28 | def launch_server(payloads: list[MockPageDefinition], port: int, setup_callback: Optional[Any], allow_repeat_access: bool, semaphore: Semaphore): 29 | server = FastAPI() 30 | requests_by_url = defaultdict(int) 31 | 32 | if setup_callback is not None: 33 | setup_callback(server) 34 | 35 | def render_favicon(): 36 | return "fav" 37 | 38 | def render_default(page_definitions: list[MockPageDefinition]): 39 | url = page_definitions[0].url 40 | existing_requests = requests_by_url[url] 41 | 42 | if len(page_definitions) > 1 and existing_requests >= len(page_definitions): 43 | raise ValueError(f"Accessing beyond bound of sequential definitions: `{url}`.") 44 | if len(page_definitions) == 1 and existing_requests > 0 and not allow_repeat_access: 45 | raise ValueError(f"You have already accessed this url: `{url}`.") 46 | 47 | if len(page_definitions) > 1: 48 | definition = page_definitions[existing_requests] 49 | else: 50 | definition = page_definitions[0] 51 | 52 | requests_by_url[url] += 1 53 | 54 | return Response( 55 | content=definition.content, 56 | status_code=definition.status_code, 57 | media_type=definition.content_type, 58 | headers=definition.headers, 59 | ) 60 | 61 | # Group by URL (maintaining the original order) 62 | definitions_by_url = { 63 | (url, method): [definition for _, definition in definitions] 64 | for (url, method), definitions in groupby( 65 | sorted(enumerate(payloads), key=lambda x: (x[1].url, x[1].method, x[0])), 66 | key=lambda x: (x[1].url, x[1].method) 67 | ) 68 | } 69 | 70 | for (url, method), definitions in definitions_by_url.items(): 71 | mount_fn = getattr(server, method) 72 | 73 | # Only one URL should be provided if a handler is active 74 | has_handler = any([definition.handler for definition in definitions]) 75 | if has_handler and len(definitions) > 1: 76 | raise ValueError(f"Only one definition is supported when handler is active: `{url}`") 77 | 78 | mount_fn(url)(definitions[0].handler or partial(render_default, page_definitions=definitions)) 79 | 80 | def not_found_fallback(path): 81 | raise ValueError(f"No page matching query: {path}") 82 | 83 | server.get("/favicon.ico")(render_favicon) 84 | # TODO: Switch to a 404 handler 85 | server.get("{path:path}")(not_found_fallback) 86 | semaphore.release() 87 | uvicorn.run(server, host="0.0.0.0", port=port) 88 | 89 | @contextmanager 90 | def mock_server( 91 | payloads: list[MockPageDefinition], 92 | port=6012, 93 | setup_callback=None, 94 | allow_repeat_access: bool = True, 95 | ): 96 | """ 97 | Sets up a test server parameterized by MockPageDefinition in the background. 98 | 99 | :param setup_callback: If specified, will be passed the FastAPI server object once createed. This function 100 | will be called in the separate process. 101 | :parma allow_repeat_access: Allow the same URL to be accessed multiple times within the 102 | same mock session. If false will throw an error if already accessed. Only applies to entries 103 | that have one definition per URL. If multiple definitions are given per URL, will assume 104 | that you intend to iterate them. 105 | 106 | """ 107 | process_ready_semaphore = Semaphore(0) 108 | 109 | process = Process(target=launch_server, args=(payloads, port, setup_callback, allow_repeat_access, process_ready_semaphore)) 110 | process.start() 111 | 112 | with process_ready_semaphore: 113 | try: 114 | print("Launched mock server...") 115 | # Wait for process to actually spawn 116 | sleep(0.1) 117 | yield f"http://localhost:{port}" 118 | except Exception as e: 119 | raise e 120 | finally: 121 | process.kill() 122 | -------------------------------------------------------------------------------- /groove/proxy/cache/lru_test.go: -------------------------------------------------------------------------------- 1 | package cache 2 | 3 | import ( 4 | "io/ioutil" 5 | "log" 6 | "path/filepath" 7 | "testing" 8 | ) 9 | 10 | func TestCacheStorage(t *testing.T) { 11 | invalidator := &CacheInvalidator{} 12 | 13 | cacheDirectory, err := ioutil.TempDir("", "") 14 | if err != nil { 15 | t.Fatalf("Error creating temp dir: %s", err) 16 | } 17 | 18 | var tests = []struct { 19 | lruCache *LRUCache 20 | label string 21 | }{ 22 | {invalidator.buildMemoryCache(1), "memory"}, 23 | {invalidator.buildDiskCache(1, cacheDirectory), "disk"}, 24 | } 25 | 26 | testKey := "testKey" 27 | testObject := []byte{97} 28 | 29 | for _, test := range tests { 30 | log.Printf("TestCacheStorage: Testing cache: %s", test.label) 31 | 32 | test.lruCache.Set(testKey, &testObject) 33 | objectRecovered, err := test.lruCache.Get(testKey) 34 | 35 | if err != nil { 36 | t.Fatalf("Error getting object: %s", err) 37 | } 38 | 39 | if (*objectRecovered)[0] != 97 { 40 | t.Fatalf("Recovered object does not match original") 41 | } 42 | } 43 | } 44 | 45 | func TestCacheStorageRace(t *testing.T) { 46 | /* 47 | * Race condition for cache storage 48 | */ 49 | invalidator := &CacheInvalidator{} 50 | 51 | cacheDirectory, err := ioutil.TempDir("", "") 52 | if err != nil { 53 | t.Fatalf("Error creating temp dir: %s", err) 54 | } 55 | 56 | var tests = []struct { 57 | lruCache *LRUCache 58 | label string 59 | spawns int 60 | }{ 61 | {invalidator.buildMemoryCache(1), "memory", 500}, 62 | {invalidator.buildDiskCache(1, cacheDirectory), "disk", 500}, 63 | } 64 | 65 | // Explicitly try to create conflicts on one key 66 | testKey := "testKey" 67 | 68 | for _, test := range tests { 69 | log.Printf("TestCacheStorage: Testing cache: %s", test.label) 70 | 71 | for i := 0; i < test.spawns; i++ { 72 | log.Printf("Spawning: %d", i) 73 | // Create a new object for each spawn since we pass a pointer 74 | go func(lruCache *LRUCache) { 75 | testObject := []byte{97} 76 | lruCache.Set(testKey, &testObject) 77 | lruCache.Get(testKey) 78 | }(test.lruCache) 79 | } 80 | } 81 | } 82 | 83 | func TestInvalidateCache(t *testing.T) { 84 | invalidator := &CacheInvalidator{} 85 | 86 | cacheDirectory, err := ioutil.TempDir("", "") 87 | if err != nil { 88 | t.Fatalf("Error creating temp dir: %s", err) 89 | } 90 | 91 | var tests = []struct { 92 | lruCache *LRUCache 93 | label string 94 | }{ 95 | // Set max size equal to 0, this should mean nothing is cached 96 | {invalidator.buildMemoryCache(0), "memory"}, 97 | {invalidator.buildDiskCache(0, cacheDirectory), "disk"}, 98 | } 99 | 100 | testKey := "testKey" 101 | testObject := []byte{97} 102 | 103 | for _, test := range tests { 104 | log.Printf("TestInvalidateCache: Testing cache: %s", test.label) 105 | 106 | test.lruCache.Set(testKey, &testObject) 107 | _, err := test.lruCache.Get(testKey) 108 | 109 | if err == nil { 110 | t.Fatalf("Object should not have been saved: %s", err) 111 | } 112 | } 113 | } 114 | 115 | func TestLimitedCacheSize(t *testing.T) { 116 | invalidator := &CacheInvalidator{} 117 | 118 | cacheDirectory, err := ioutil.TempDir("", "") 119 | if err != nil { 120 | t.Fatalf("Error creating temp dir: %s", err) 121 | } 122 | 123 | var tests = []struct { 124 | lruCache *LRUCache 125 | label string 126 | }{ 127 | // Allow one object 128 | {invalidator.buildMemoryCache(1), "memory"}, 129 | {invalidator.buildDiskCache(1, cacheDirectory), "disk"}, 130 | } 131 | 132 | testKey1 := "testKey-1" 133 | testKey2 := "testKey-2" 134 | testObject1 := []byte{97} 135 | testObject2 := []byte{98} 136 | 137 | for _, test := range tests { 138 | log.Printf("TestLimitedCacheSize: Testing cache: %s", test.label) 139 | 140 | test.lruCache.Set(testKey1, &testObject1) 141 | test.lruCache.Set(testKey2, &testObject2) 142 | 143 | _, err := test.lruCache.Get(testKey1) 144 | if err == nil { 145 | t.Fatalf("Key should have been invalidated: %s", err) 146 | } 147 | 148 | _, err = test.lruCache.Get(testKey2) 149 | if err != nil { 150 | t.Fatalf("Key should have saved: %s", err) 151 | } 152 | } 153 | } 154 | 155 | func TestDiskWrite(t *testing.T) { 156 | invalidator := &CacheInvalidator{} 157 | 158 | cacheDirectory, err := ioutil.TempDir("", "") 159 | if err != nil { 160 | t.Fatalf("Error creating temp dir: %s", err) 161 | } 162 | 163 | diskCache := invalidator.buildDiskCache(1, cacheDirectory) 164 | 165 | testKey1 := "testKey-1" 166 | testObject1 := []byte{97} 167 | 168 | diskCache.Set(testKey1, &testObject1) 169 | 170 | expectedLocation := blockTransform(testKey1) 171 | 172 | // Check the disk location for the file contents 173 | pathArgs := []string{cacheDirectory} 174 | pathArgs = append(pathArgs, expectedLocation...) 175 | pathArgs = append(pathArgs, testKey1) 176 | 177 | // Check the file exists 178 | _, err = ioutil.ReadFile(filepath.Join(pathArgs...)) 179 | if err != nil { 180 | t.Fatalf("Error reading file: %s", err) 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/cli/speed.py: -------------------------------------------------------------------------------- 1 | from json import dump 2 | from pathlib import Path 3 | from time import time 4 | 5 | import pandas as pd 6 | from click import ( 7 | Path as ClickPath, 8 | group, 9 | option, 10 | pass_obj, 11 | ) 12 | from requests import get 13 | from tqdm import tqdm 14 | 15 | from proxy_benchmarks.enums import MimicTypeEnum 16 | from proxy_benchmarks.load_test import run_load_server 17 | from proxy_benchmarks.networking import SyntheticHostDefinition, SyntheticHosts 18 | from proxy_benchmarks.proxies.base import ProxyBase 19 | from proxy_benchmarks.proxies.gomitmproxy import GoMitmProxy 20 | from proxy_benchmarks.proxies.goproxy import GoProxy 21 | from proxy_benchmarks.proxies.martian import MartianProxy 22 | from proxy_benchmarks.proxies.mitmproxy import MitmProxy 23 | from proxy_benchmarks.proxies.node_http_proxy import NodeHttpProxy 24 | 25 | 26 | @group() 27 | def speed_test(): 28 | pass 29 | 30 | 31 | @speed_test.command() 32 | @option("--samples", type=int, default=100) 33 | @option("--data-path", type=ClickPath(dir_okay=True, file_okay=False), required=True) 34 | @pass_obj 35 | def execute(obj, samples, data_path): 36 | """ 37 | Benchmark speed of certificate generation. 38 | 39 | """ 40 | proxies: list[ProxyBase] = [ 41 | MitmProxy(), 42 | NodeHttpProxy(), 43 | GoMitmProxy(MimicTypeEnum.STANDARD), 44 | MartianProxy(), 45 | GoProxy(MimicTypeEnum.STANDARD), 46 | ] 47 | 48 | execute_raw(obj, samples, data_path, proxies) 49 | 50 | 51 | def execute_raw(obj, samples: int, data_path: str | Path, proxies: list[ProxyBase]): 52 | console = obj["console"] 53 | divider = obj["divider"] 54 | 55 | data_path = Path(data_path).expanduser() 56 | data_path.mkdir(exist_ok=True) 57 | 58 | proxy_samples = [] 59 | 60 | with run_load_server() as load_server_definition: 61 | synthetic_ip_addresses = SyntheticHosts( 62 | [ 63 | SyntheticHostDefinition( 64 | name="load-server", 65 | http_port=load_server_definition["http"], 66 | https_port=load_server_definition["https"], 67 | ) 68 | ] 69 | ).configure() 70 | synthetic_ip_address = next(iter(synthetic_ip_addresses.values())) 71 | 72 | # Clear out any previously generated certificates by opening and then closing 73 | # the context manager 74 | console.print(f"{divider}\nCleaning up cached proxy certificates...\n{divider}", style="bold blue") 75 | for proxy in proxies: 76 | with proxy.launch(): 77 | pass 78 | 79 | for proxy in proxies: 80 | console.print(f"{divider}\nWill perform certificate generation test with {proxy}...\n{divider}", style="bold blue") 81 | 82 | proxy_definition = { 83 | "http": f"http://localhost:{proxy.port}", 84 | "https": f"http://localhost:{proxy.port}", 85 | } 86 | 87 | for _ in tqdm(range(samples)): 88 | with proxy.launch(): 89 | start_time = time() 90 | cold_start_response = get( 91 | f"https://{synthetic_ip_address}/handle", 92 | proxies=proxy_definition, 93 | verify=proxy.certificate_authority.public, 94 | ) 95 | cold_start_time = time() - start_time 96 | 97 | start_time = time() 98 | warm_start_response = get( 99 | f"https://{synthetic_ip_address}/handle", 100 | proxies=proxy_definition, 101 | verify=proxy.certificate_authority.public, 102 | ) 103 | warm_start_time = time() - start_time 104 | 105 | proxy_samples.append( 106 | dict( 107 | proxy=proxy.short_name, 108 | cold_start=cold_start_time, 109 | cold_start_status=cold_start_response.status_code, 110 | warm_start=warm_start_time, 111 | warm_start_status=warm_start_response.status_code, 112 | ) 113 | ) 114 | 115 | with open(data_path / "raw.json", "w") as file: 116 | dump(proxy_samples, file) 117 | 118 | 119 | @speed_test.command() 120 | @option("--data-path", type=ClickPath(dir_okay=True, file_okay=False), required=True) 121 | def analyze(data_path): 122 | data_path = Path(data_path).expanduser() 123 | 124 | df = pd.read_json(data_path / "raw.json") 125 | df = df.assign( 126 | difference=df.cold_start-df.warm_start, 127 | ) 128 | 129 | # Confirm basic success statistics 130 | print(df.groupby("proxy")["cold_start_status", "warm_start_status"].value_counts()) 131 | 132 | distribution_df = df.groupby("proxy")[["cold_start", "warm_start", "difference"]].describe().reset_index() 133 | distribution_df.to_csv("results_certificate_speed.csv") 134 | -------------------------------------------------------------------------------- /groove/groove-python/groove/tests/test_cache.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from groove.proxy import CacheModeEnum 6 | from groove.tests.mock_server import MockPageDefinition, mock_server 7 | 8 | 9 | def test_cache_off(proxy, context): 10 | """ 11 | Ensure the cache is off will route all requests 12 | """ 13 | proxy.set_cache_mode(CacheModeEnum.OFF) 14 | 15 | # Leverage random identifiers for each test to ensure there isn't 16 | # data leakage from one unit test to the other 17 | request1_content = str(uuid4()) 18 | request2_content = str(uuid4()) 19 | 20 | with mock_server([ 21 | MockPageDefinition( 22 | "/test", 23 | content=f"{request1_content}" 24 | ), 25 | MockPageDefinition( 26 | "/test", 27 | content=f"{request2_content}" 28 | ), 29 | ]) as mock_url: 30 | page = context.new_page() 31 | page.goto(f"{mock_url}/test") 32 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 33 | 34 | page.goto(f"{mock_url}/test") 35 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request2_content 36 | 37 | 38 | def test_cache_standard(proxy, context): 39 | """ 40 | Ensure the cache respects server headers 41 | """ 42 | proxy.set_cache_mode(CacheModeEnum.STANDARD) 43 | 44 | request1_content = str(uuid4()) 45 | request2_content = str(uuid4()) 46 | 47 | with mock_server([ 48 | MockPageDefinition( 49 | "/test", 50 | content=f"{request1_content}", 51 | headers={ 52 | "Cache-Control": "max-age=604800", 53 | } 54 | ), 55 | MockPageDefinition( 56 | "/test", 57 | content=f"{request2_content}", 58 | headers={ 59 | "Cache-Control": "max-age=604800", 60 | } 61 | ), 62 | ]) as mock_url: 63 | page = context.new_page() 64 | page.goto(f"{mock_url}/test") 65 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 66 | 67 | # We should never hit the second definition because of the first requests' headers 68 | page.goto(f"{mock_url}/test") 69 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 70 | 71 | 72 | def test_cache_aggressive(proxy, context): 73 | """ 74 | Ensure the aggressive cache will cache all requests 75 | """ 76 | # Clear previous cache records, if they exist 77 | proxy.set_cache_mode(CacheModeEnum.AGGRESSIVE) 78 | 79 | request1_content = str(uuid4()) 80 | request2_content = str(uuid4()) 81 | 82 | with mock_server([ 83 | MockPageDefinition( 84 | "/test", 85 | content=f"{request1_content}" 86 | ), 87 | MockPageDefinition( 88 | "/test", 89 | content=f"{request2_content}" 90 | ), 91 | ]) as mock_url: 92 | page = context.new_page() 93 | page.goto(f"{mock_url}/test") 94 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 95 | 96 | page.goto(f"{mock_url}/test") 97 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 98 | 99 | def test_cache_aggressive_get(proxy, context): 100 | """ 101 | Ensure the aggressive cache will cache GET request but not POST requests 102 | """ 103 | # Clear previous cache records, if they exist 104 | proxy.set_cache_mode(CacheModeEnum.AGGRESSIVE_GET) 105 | 106 | request1_content = str(uuid4()) 107 | request2_content = str(uuid4()) 108 | request3_content = str(uuid4()) 109 | 110 | with mock_server([ 111 | MockPageDefinition( 112 | "/test", 113 | content=f"{request1_content}
" 114 | ), 115 | MockPageDefinition( 116 | "/test", 117 | content=f"{request2_content}" 118 | ), 119 | MockPageDefinition( 120 | "/test", 121 | content=f"{request3_content}", 122 | method="post", 123 | ), 124 | ]) as mock_url: 125 | page = context.new_page() 126 | page.goto(f"{mock_url}/test") 127 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 128 | 129 | # GET requests should cache 130 | page.goto(f"{mock_url}/test") 131 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request1_content 132 | 133 | # But POST requests should not; issue via form submission 134 | page.click("#submit-button") 135 | assert BeautifulSoup(page.content(), features="html.parser").text.strip() == request3_content 136 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/gomitmproxy-mimic/transport.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Yawning Angel 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "crypto/tls" 22 | "errors" 23 | "fmt" 24 | "net" 25 | "net/http" 26 | "net/url" 27 | "strings" 28 | "sync" 29 | 30 | utls "github.com/refraction-networking/utls" 31 | "golang.org/x/net/http2" 32 | ) 33 | 34 | var errProtocolNegotiated = errors.New("meek_lite: protocol negotiated") 35 | 36 | type roundTripper struct { 37 | sync.Mutex 38 | 39 | transport http.RoundTripper 40 | 41 | initConn net.Conn 42 | } 43 | 44 | func (rt *roundTripper) RoundTrip(req *http.Request) (*http.Response, error) { 45 | // Note: This isn't protected with a lock, since the meeklite ioWorker 46 | // serializes RoundTripper requests. 47 | // 48 | // This also assumes that req.URL.Host will remain constant for the 49 | // lifetime of the roundTripper, which is a valid assumption for meeklite. 50 | if rt.transport == nil { 51 | if err := rt.getTransport(req); err != nil { 52 | return nil, err 53 | } 54 | } 55 | return rt.transport.RoundTrip(req) 56 | } 57 | 58 | func (rt *roundTripper) getTransport(req *http.Request) error { 59 | switch strings.ToLower(req.URL.Scheme) { 60 | case "http": 61 | rt.transport = &http.Transport{Dial: net.Dial} 62 | return nil 63 | case "https": 64 | default: 65 | return fmt.Errorf("meek_lite: invalid URL scheme: '%v'", req.URL.Scheme) 66 | } 67 | 68 | _, err := rt.dialTLS("tcp", getDialTLSAddr(req.URL)) 69 | switch err { 70 | case errProtocolNegotiated: 71 | case nil: 72 | // Should never happen. 73 | panic("meek_lite: dialTLS returned no error when determining transport") 74 | default: 75 | return err 76 | } 77 | 78 | return nil 79 | } 80 | 81 | func (rt *roundTripper) dialTLS(network, addr string) (net.Conn, error) { 82 | // Unlike rt.transport, this is protected by a critical section 83 | // since past the initial manual call from getTransport, the HTTP 84 | // client will be the caller. 85 | rt.Lock() 86 | defer rt.Unlock() 87 | 88 | // If we have the connection from when we determined the HTTPS 89 | // transport to use, return that. 90 | if conn := rt.initConn; conn != nil { 91 | rt.initConn = nil 92 | return conn, nil 93 | } 94 | 95 | rawConn, err := net.Dial(network, addr) 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | var host string 101 | if host, _, err = net.SplitHostPort(addr); err != nil { 102 | host = addr 103 | } 104 | 105 | // TODO: Make this configurable. What "works" is host dependent. 106 | // * HelloChrome_Auto - Failures in a stand alone testcase against google.com 107 | // * HelloFirefox_Auto - Fails with the azure bridge, incompatible group. 108 | // * HelloIOS_Auto - Seems to work. 109 | // 110 | // Since HelloChrome_Auto works with azure, that's what'll be used for 111 | // now, since that's what the overwelming vast majority of people will 112 | // use. 113 | conn := utls.UClient(rawConn, &utls.Config{ServerName: host}, utls.HelloChrome_Auto) 114 | if err = conn.Handshake(); err != nil { 115 | conn.Close() 116 | return nil, err 117 | } 118 | 119 | if rt.transport != nil { 120 | return conn, nil 121 | } 122 | 123 | // No http.Transport constructed yet, create one based on the results 124 | // of ALPN. 125 | switch conn.ConnectionState().NegotiatedProtocol { 126 | case http2.NextProtoTLS: 127 | // The remote peer is speaking HTTP 2 + TLS. 128 | rt.transport = &http2.Transport{DialTLS: rt.dialTLSHTTP2} 129 | default: 130 | // Assume the remote peer is speaking HTTP 1.x + TLS. 131 | rt.transport = &http.Transport{DialTLS: rt.dialTLS} 132 | } 133 | 134 | // Stash the connection just established for use servicing the 135 | // actual request (should be near-immediate). 136 | rt.initConn = conn 137 | 138 | return nil, errProtocolNegotiated 139 | } 140 | 141 | func (rt *roundTripper) dialTLSHTTP2(network, addr string, cfg *tls.Config) (net.Conn, error) { 142 | return rt.dialTLS(network, addr) 143 | } 144 | 145 | func getDialTLSAddr(u *url.URL) string { 146 | host, port, err := net.SplitHostPort(u.Host) 147 | if err == nil { 148 | return net.JoinHostPort(host, port) 149 | } 150 | 151 | return net.JoinHostPort(u.Host, u.Scheme) 152 | } 153 | 154 | func newRoundTripper() http.RoundTripper { 155 | return &roundTripper{} 156 | } 157 | 158 | func init() { 159 | // Attempt to increase compatibility, there's an encrypted link 160 | // underneath, and this doesn't (shouldn't) affect the external 161 | // fingerprint. 162 | utls.EnableWeakCiphers() 163 | } 164 | -------------------------------------------------------------------------------- /proxy-benchmarks/proxy_benchmarks/assets/proxies/goproxy-mimic/transport.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 Yawning Angel 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package main 19 | 20 | import ( 21 | "crypto/tls" 22 | "errors" 23 | "fmt" 24 | "net" 25 | "net/http" 26 | "net/url" 27 | "strings" 28 | "sync" 29 | 30 | utls "github.com/refraction-networking/utls" 31 | "golang.org/x/net/http2" 32 | ) 33 | 34 | var errProtocolNegotiated = errors.New("meek_lite: protocol negotiated") 35 | 36 | type roundTripper struct { 37 | sync.Mutex 38 | 39 | transport http.RoundTripper 40 | 41 | initConn net.Conn 42 | } 43 | 44 | func (rt *roundTripper) RoundTrip(req *http.Request) (*http.Response, error) { 45 | // Note: This isn't protected with a lock, since the meeklite ioWorker 46 | // serializes RoundTripper requests. 47 | // 48 | // This also assumes that req.URL.Host will remain constant for the 49 | // lifetime of the roundTripper, which is a valid assumption for meeklite. 50 | if rt.transport == nil { 51 | if err := rt.getTransport(req); err != nil { 52 | return nil, err 53 | } 54 | } 55 | return rt.transport.RoundTrip(req) 56 | } 57 | 58 | func (rt *roundTripper) getTransport(req *http.Request) error { 59 | switch strings.ToLower(req.URL.Scheme) { 60 | case "http": 61 | rt.transport = &http.Transport{Dial: net.Dial} 62 | return nil 63 | case "https": 64 | default: 65 | return fmt.Errorf("meek_lite: invalid URL scheme: '%v'", req.URL.Scheme) 66 | } 67 | 68 | _, err := rt.dialTLS("tcp", getDialTLSAddr(req.URL)) 69 | switch err { 70 | case errProtocolNegotiated: 71 | case nil: 72 | // Should never happen. 73 | panic("meek_lite: dialTLS returned no error when determining transport") 74 | default: 75 | return err 76 | } 77 | 78 | return nil 79 | } 80 | 81 | func (rt *roundTripper) dialTLS(network, addr string) (net.Conn, error) { 82 | // Unlike rt.transport, this is protected by a critical section 83 | // since past the initial manual call from getTransport, the HTTP 84 | // client will be the caller. 85 | rt.Lock() 86 | defer rt.Unlock() 87 | 88 | // If we have the connection from when we determined the HTTPS 89 | // transport to use, return that. 90 | if conn := rt.initConn; conn != nil { 91 | rt.initConn = nil 92 | return conn, nil 93 | } 94 | 95 | rawConn, err := net.Dial(network, addr) 96 | if err != nil { 97 | return nil, err 98 | } 99 | 100 | var host string 101 | if host, _, err = net.SplitHostPort(addr); err != nil { 102 | host = addr 103 | } 104 | 105 | // TODO: Make this configurable. What "works" is host dependent. 106 | // * HelloChrome_Auto - Failures in a stand alone testcase against google.com 107 | // * HelloFirefox_Auto - Fails with the azure bridge, incompatible group. 108 | // * HelloIOS_Auto - Seems to work. 109 | // 110 | // Since HelloChrome_Auto works with azure, that's what'll be used for 111 | // now, since that's what the overwelming vast majority of people will 112 | // use. 113 | conn := utls.UClient(rawConn, &utls.Config{ServerName: host}, utls.HelloChrome_Auto) 114 | if err = conn.Handshake(); err != nil { 115 | conn.Close() 116 | return nil, err 117 | } 118 | 119 | if rt.transport != nil { 120 | return conn, nil 121 | } 122 | 123 | // No http.Transport constructed yet, create one based on the results 124 | // of ALPN. 125 | switch conn.ConnectionState().NegotiatedProtocol { 126 | case http2.NextProtoTLS: 127 | // The remote peer is speaking HTTP 2 + TLS. 128 | rt.transport = &http2.Transport{DialTLS: rt.dialTLSHTTP2} 129 | default: 130 | // Assume the remote peer is speaking HTTP 1.x + TLS. 131 | rt.transport = &http.Transport{DialTLS: rt.dialTLS} 132 | } 133 | 134 | // Stash the connection just established for use servicing the 135 | // actual request (should be near-immediate). 136 | rt.initConn = conn 137 | 138 | return nil, errProtocolNegotiated 139 | } 140 | 141 | func (rt *roundTripper) dialTLSHTTP2(network, addr string, cfg *tls.Config) (net.Conn, error) { 142 | return rt.dialTLS(network, addr) 143 | } 144 | 145 | func getDialTLSAddr(u *url.URL) string { 146 | host, port, err := net.SplitHostPort(u.Host) 147 | if err == nil { 148 | return net.JoinHostPort(host, port) 149 | } 150 | 151 | return net.JoinHostPort(u.Host, u.Scheme) 152 | } 153 | 154 | func newRoundTripper() http.RoundTripper { 155 | return &roundTripper{} 156 | } 157 | 158 | func init() { 159 | // Attempt to increase compatibility, there's an encrypted link 160 | // underneath, and this doesn't (shouldn't) affect the external 161 | // fingerprint. 162 | utls.EnableWeakCiphers() 163 | } 164 | -------------------------------------------------------------------------------- /groove/proxy/main.go: -------------------------------------------------------------------------------- 1 | // https://github.com/elazarl/goproxy/blob/master/examples/goproxy-transparent/transparent.go 2 | package main 3 | 4 | import ( 5 | "flag" 6 | "fmt" 7 | "log" 8 | "net" 9 | "net/http" 10 | "os" 11 | "os/signal" 12 | "regexp" 13 | "strconv" 14 | 15 | "github.com/piercefreeman/goproxy" 16 | ) 17 | 18 | func main() { 19 | if len(os.Args) > 1 { 20 | command := os.Args[1] 21 | if command == "install-ca" { 22 | installCA() 23 | return 24 | } else { 25 | // Assume other requests will be handled by the regular proxy - passthrough 26 | } 27 | } 28 | 29 | var ( 30 | verbose = flag.Bool("v", true, "should every proxy request be logged to stdout") 31 | port = flag.Int("port", 6010, "proxy http listen address") 32 | controlPort = flag.Int("control-port", 6011, "control API listen address") 33 | 34 | // Location to CA 35 | caCertificate = flag.String("ca-certificate", "", "Path to CA Certificate") 36 | caKey = flag.String("ca-key", "", "Path to CA Key") 37 | 38 | // Cache size (in memory) 39 | cacheMemorySize = flag.Int("cache-memory-mb", 25, "cache memory size") 40 | 41 | // Require authentication to access this proxy 42 | //authUsername = flag.String("auth-username", "", "Require authentication to the current server") 43 | //authPassword = flag.String("auth-password", "", "Require authentication to the current server") 44 | ) 45 | flag.Parse() 46 | 47 | log.Printf("Verbose: %v", *verbose) 48 | 49 | if len(*caCertificate) == 0 || len(*caKey) == 0 { 50 | log.Println("Falling back to default CA certificate") 51 | _, localCAPath, localCAKey := getLocalCAPaths() 52 | if err := setCA(localCAPath, localCAKey); err != nil { 53 | log.Fatal(fmt.Errorf("Error setting CA: %w", err)) 54 | } 55 | } else { 56 | // Set our own CA instead of the one that's default bundled with the proxy 57 | if err := setCA(*caCertificate, *caKey); err != nil { 58 | log.Fatal(fmt.Errorf("Error setting CA: %w", err)) 59 | } 60 | } 61 | 62 | recorder := NewRecorder() 63 | cache := NewCache(uint64(*cacheMemorySize)) 64 | 65 | proxy := goproxy.NewProxyHttpServer() 66 | proxy.Verbose = *verbose 67 | 68 | // If specified, protect the proxy with an auth login 69 | // @pierce - Currently failing on MITM because of repeat CONNECTs, some without auth 70 | /*if len(*authUsername) > 0 && len(*authPassword) > 0 { 71 | log.Println("Protect proxy with username and password") 72 | auth.ProxyBasicMitm(proxy, "my_realm", func(user, pwd string) bool { 73 | return user == *authUsername && pwd == *authPassword 74 | }) 75 | } else { 76 | log.Println("Creating unauthenticated proxy") 77 | }*/ 78 | 79 | // Our other implementations cache the certificates for some length of time, so we do the 80 | // same here for equality in benchmarking 81 | proxy.CertStore = NewOptimizedCertStore() 82 | 83 | dialerSession := NewDialerSession() 84 | 85 | // Default the session to a full passthrough from local -> Internet 86 | // This will get overridden by clients when they provide values 87 | dialerSession.DialerDefinitions = append( 88 | dialerSession.DialerDefinitions, 89 | NewDialerDefinition(0, nil, nil), 90 | ) 91 | 92 | roundTripper := NewCustomRoundTripper(dialerSession) 93 | 94 | // Static function to run a new dial without the context of a particular request 95 | // In theory we could just make this static at launch time but we keep it dynamic 96 | // in case the `next` logic changes inflight (ie. clients add more proxies with 97 | // different priorities, etc) - there are some slight performance impacts to this 98 | // approach but it's likely negligible given the overall network latencies 99 | proxy.ConnectDial = func(network, addr string) (net.Conn, error) { 100 | context := dialerSession.NewDialerContext(nil) 101 | dialDefinition := dialerSession.NextDialer(context) 102 | return dialDefinition.Dial(network, addr) 103 | } 104 | 105 | controller := createController(recorder, cache, dialerSession) 106 | 107 | // Cast the custom roundtripper implementation to a standard http.RoundTripper 108 | proxy.RoundTripper = http.RoundTripper(roundTripper) 109 | 110 | if proxy.Verbose { 111 | log.Printf("Server starting up! - configured to listen on http interface %d", *port) 112 | } 113 | 114 | setupHeadersMiddleware(proxy) 115 | setupRecorderMiddleware(proxy, recorder) 116 | setupCacheMiddleware(proxy, cache, recorder) 117 | 118 | proxy.NonproxyHandler = http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { 119 | if req.Host == "" { 120 | fmt.Fprintln(w, "Cannot handle requests without Host header, e.g., HTTP 1.0") 121 | return 122 | } 123 | req.URL.Scheme = "http" 124 | req.URL.Host = req.Host 125 | proxy.ServeHTTP(w, req) 126 | }) 127 | 128 | proxy.OnRequest(goproxy.ReqHostMatches(regexp.MustCompile("^.*$"))). 129 | HandleConnect(goproxy.AlwaysMitm) 130 | 131 | go func() { 132 | controller.Run(":" + strconv.Itoa(*controlPort)) 133 | }() 134 | 135 | go func() { 136 | // Host on TLS so clients can use http/2 multiplexing - required for the requests 137 | // that block the system lock 138 | log.Fatalln(http.ListenAndServe(":"+strconv.Itoa(*port), proxy)) 139 | }() 140 | 141 | sigc := make(chan os.Signal, 1) 142 | signal.Notify(sigc, os.Interrupt) 143 | 144 | <-sigc 145 | 146 | log.Println("groove: shutting down") 147 | os.Exit(0) 148 | } 149 | --------------------------------------------------------------------------------